├── .gemtest
├── test
    ├── data
    │   ├── news
    │   │   ├── ryzom-naissance-du-projet-libre-ryzom-forge.md
    │   │   └── index.html
    │   ├── validator.nu-success.json
    │   ├── assets
    │   │   └── application-92f19110a9d47a56d2ebe744e15af301.css
    │   ├── validator.nu-failure.json
    │   ├── xhtml1-strict.html
    │   ├── html5.html
    │   ├── html5-fail.html
    │   └── html4-strict.html
    ├── core_test.rb
    ├── test_helper.rb
    ├── example
    │   └── ruby smalltalk
    │   │   └── blockcamp-paris-le-28-novembre.html
    ├── webmock_helper.rb
    ├── static_test.rb
    ├── validator_test.rb
    └── crawler_test.rb
├── validate-website.png
├── lib
    ├── validate_website.rb
    └── validate_website
    │   ├── version.rb
    │   ├── validator_class_methods.rb
    │   ├── colorful_messages.rb
    │   ├── runner.rb
    │   ├── static_link.rb
    │   ├── utils.rb
    │   ├── static.rb
    │   ├── crawl.rb
    │   ├── validator.rb
    │   ├── core.rb
    │   └── option_parser.rb
├── .gitignore
├── Gemfile
├── bin
    ├── validate-website
    └── validate-website-static
├── .rubocop.yml
├── data
    └── schemas
    │   ├── xhtml2.xsd
    │   ├── xhtml-inlstyle-1.xsd
    │   ├── frameset.dtd
    │   ├── xhtml-inputmode-1.xsd
    │   ├── xhtml-copyright-1.xsd
    │   ├── xml-events-copyright-1.xsd
    │   ├── xml-events-copyright-2.xsd
    │   ├── xhtml-base-1.xsd
    │   ├── xhtml-charent-1.xsd
    │   ├── xhtml-metaAttributes-1.xsd
    │   ├── xhtml-ssismap-1.xsd
    │   ├── xhtml-target-1.xsd
    │   ├── xhtml-nameident-1.xsd
    │   ├── xml-events-attribs-1.xsd
    │   ├── xml-events-1.xsd
    │   ├── xml-events-2.xsd
    │   ├── xml-events-attribs-2.xsd
    │   ├── xhtml-ruby-basic-1.xsd
    │   ├── xhtml-notations-1.xsd
    │   ├── xhtml-special.ent
    │   ├── xml-handlers-1.xsd
    │   ├── xhtml-events-1.xsd
    │   ├── xframes-1.xsd
    │   ├── xhtml-datatypes-1.xsd
    │   ├── xhtml-basic11.dtd
    │   ├── xml.xsd
    │   ├── xhtml-lat1.ent
    │   └── xhtml-symbol.ent
├── .github
    └── workflows
    │   └── ci.yml
├── Rakefile
├── .gitlab-ci.yml
├── LICENSE
├── validate-website.gemspec
├── doc
    ├── validate-website-static.adoc
    └── validate-website.adoc
├── man
    └── man1
    │   ├── validate-website-static.1
    │   └── validate-website.1
├── README.md
└── History.md


/.gemtest:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/test/data/news/ryzom-naissance-du-projet-libre-ryzom-forge.md:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/validate-website.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/spk/validate-website/HEAD/validate-website.png


--------------------------------------------------------------------------------
/test/data/validator.nu-success.json:
--------------------------------------------------------------------------------
1 | {"url":"https://example.org/","messages":[],"language":"fr"}
2 | 


--------------------------------------------------------------------------------
/lib/validate_website.rb:
--------------------------------------------------------------------------------
1 | # frozen_string_literal: true
2 | 
3 | require 'validate_website/core'
4 | require 'validate_website/version'
5 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | pkg
2 | tags
3 | Gemfile.lock
4 | *.gem
5 | man/man1/validate-website-static.xml
6 | man/man1/validate-website.xml
7 | coverage
8 | 


--------------------------------------------------------------------------------
/test/data/assets/application-92f19110a9d47a56d2ebe744e15af301.css:
--------------------------------------------------------------------------------
1 | .t { background-image: url(/image/42.png) }
2 | /**/ .foo {} #{bar {}
3 | 


--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | source 'https://rubygems.org'
 4 | 
 5 | gemspec
 6 | 
 7 | gem 'simplecov', require: false
 8 | 
 9 | # vim: syntax=ruby filetype=ruby
10 | 


--------------------------------------------------------------------------------
/bin/validate-website:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # frozen_string_literal: true
3 | 
4 | require 'validate_website/runner'
5 | exit_status = ValidateWebsite::Runner.run_crawl(ARGV)
6 | exit(exit_status)
7 | 


--------------------------------------------------------------------------------
/bin/validate-website-static:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | # frozen_string_literal: true
3 | 
4 | require 'validate_website/runner'
5 | exit_status = ValidateWebsite::Runner.run_static(ARGV)
6 | exit(exit_status)
7 | 


--------------------------------------------------------------------------------
/.rubocop.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | AllCops:
 3 |   TargetRubyVersion: 2.7
 4 | Naming/FileName:
 5 |   Enabled: false
 6 | Lint/MissingCopEnableDirective:
 7 |   Enabled: false
 8 | Lint/InterpolationCheck:
 9 |   Enabled: false
10 | Lint/UriEscapeUnescape:
11 |   Enabled: false
12 | 


--------------------------------------------------------------------------------
/lib/validate_website/version.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Version file for ValidateWebsite
 4 | module ValidateWebsite
 5 |   VERSION = '1.12.0'
 6 | 
 7 |   def self.jruby? # :nodoc:
 8 |     defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/test/core_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.expand_path('test_helper', __dir__)
 4 | 
 5 | describe ValidateWebsite::Core do
 6 |   describe 'invalid options' do
 7 |     it 'raise ArgumentError on wrong validation_type' do
 8 |       _(proc { ValidateWebsite::Core.new({ color: false }, :fail) })
 9 |         .must_raise ArgumentError
10 |     end
11 |   end
12 | end
13 | 


--------------------------------------------------------------------------------
/test/test_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | begin
 4 |   require 'simplecov'
 5 |   SimpleCov.start
 6 | rescue LoadError
 7 |   warn 'simplecov not loaded'
 8 | end
 9 | 
10 | require 'minitest/autorun'
11 | require 'spidr'
12 | 
13 | require 'validate_website/core'
14 | 
15 | require File.expand_path('webmock_helper', __dir__)
16 | 
17 | TEST_DOMAIN = 'http://www.example.com/'
18 | ENV['LC_ALL'] = 'C.UTF-8' if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
19 | 


--------------------------------------------------------------------------------
/test/data/news/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<title>title</title>
 6 | 		<meta name="description" content="" />
 7 | 		<meta name="keywords" content="" />
 8 | 		<meta name="author" content="" />
 9 | 	</head>
10 | 	<body>
11 | 		<header>
12 | 			<nav>
13 | 				<ul>
14 | 					<li><a href="ryzom-naissance-du-projet-libre-ryzom-forge.md" title="title">my url</a></li>
15 | 				</ul>
16 | 			</nav>
17 | 		</header>
18 | 	</body>
19 | </html>
20 | 


--------------------------------------------------------------------------------
/lib/validate_website/validator_class_methods.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'tidy_ffi'
 4 | 
 5 | # Validator Class Methods
 6 | module ValidatorClassMethods
 7 |   def validator_uri
 8 |     @validator_uri ||=
 9 |       ENV['VALIDATOR_NU_URL'] || @html5_validator_service_url
10 |   end
11 | 
12 |   def tidy
13 |     return @tidy if defined?(@tidy)
14 | 
15 |     @lib_tidy = TidyFFI::LibTidy
16 |     @tidy = TidyFFI::Tidy
17 |   rescue TidyFFI::LibTidyNotInstalled
18 |     @tidy = nil
19 |   end
20 | end
21 | 


--------------------------------------------------------------------------------
/test/data/validator.nu-failure.json:
--------------------------------------------------------------------------------
1 | {"url":"https://www.rust-lang.org/en-US/","messages":[{"type":"error","lastLine":113,"lastColumn":6,"firstColumn":1,"message":"End tag “pre” seen, but there were open elements.","extract":"}\n    }\n}\n</pre>\n</div","hiliteStart":10,"hiliteLength":6},{"type":"error","lastLine":98,"lastColumn":115,"firstColumn":110,"message":"Unclosed element “span”.","extract":"ng'>\"Hola\"<span>, <spa","hiliteStart":10,"hiliteLength":6},{"type":"error","lastLine":98,"lastColumn":103,"firstColumn":83,"message":"Unclosed element “span”.","extract":"\"</span>, <span class='string'>\"Hola\"","hiliteStart":10,"hiliteLength":21}]}
2 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml2.xsd:
--------------------------------------------------------------------------------
 1 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema"
 2 |            targetNamespace="http://www.w3.org/2002/06/xhtml2/">
 3 | 
 4 |   <xs:annotation>
 5 |     <xs:documentation>
 6 |       A minimal XML Schema for XHTML 2.0
 7 |       $Id: xhtml2.xsd,v 1.4 2005/06/14 15:28:27 mimasa Exp $
 8 |     </xs:documentation>
 9 |   </xs:annotation>
10 | 
11 |   <xs:element name="html">
12 |     <xs:complexType>
13 |       <xs:sequence>
14 |         <xs:any namespace="##any" processContents="skip"
15 |           minOccurs="0" maxOccurs="unbounded"/>
16 |       </xs:sequence>
17 |       <xs:anyAttribute namespace="##any" processContents="skip"/>
18 |     </xs:complexType>
19 |   </xs:element>
20 | 
21 | </xs:schema>
22 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: CI
 3 | 
 4 | on:
 5 |   push:
 6 |     branches: [master]
 7 |   pull_request:
 8 |     branches: [master]
 9 | 
10 | jobs:
11 |   test:
12 |     runs-on: ubuntu-latest
13 |     strategy:
14 |       fail-fast: false
15 |       matrix:
16 |         ruby: ['2.7', '3.0', '3.1', jruby-9.4]
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - name: Set up Ruby
20 |         uses: ruby/setup-ruby@v1
21 |         with:
22 |           bundler-cache: true
23 |           ruby-version: ${{ matrix.ruby }}
24 |       - name: Install libtidy-dev
25 |         run: sudo apt-get install libtidy-dev
26 |       - name: Install dependencies
27 |         run: bundle install -j 3
28 |       - name: Run tests
29 |         run: bundle exec rake
30 | 


--------------------------------------------------------------------------------
/Rakefile:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'rake/testtask'
 4 | require 'rubocop/rake_task'
 5 | require 'asciidoctor'
 6 | 
 7 | default = %i[test rubocop]
 8 | task default: default
 9 | 
10 | desc 'Update manpage from asciidoc file'
11 | task :manpage do
12 |   Dir.glob('doc/*.adoc').each do |adoc|
13 |     Asciidoctor.convert_file adoc, to_file: true,
14 |                                    backend: 'manpage',
15 |                                    to_dir: 'man/man1'
16 |   end
17 | end
18 | 
19 | Rake::TestTask.new do |t|
20 |   t.pattern = 'test/**/*_test.rb'
21 | end
22 | task spec: :test
23 | 
24 | desc 'Execute rubocop'
25 | RuboCop::RakeTask.new(:rubocop) do |t|
26 |   t.options = ['--display-cop-names', '--display-style-guide']
27 |   t.fail_on_error = true
28 | end
29 | 


--------------------------------------------------------------------------------
/lib/validate_website/colorful_messages.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'paint'
 4 | 
 5 | module ValidateWebsite
 6 |   # Internal helper for colorful messages
 7 |   module ColorfulMessages
 8 |     def color(type, message, colored = true)
 9 |       return message unless colored
10 | 
11 |       send(type, message)
12 |     end
13 | 
14 |     def error(message)
15 |       Paint[message, :red]
16 |     end
17 | 
18 |     def warning(message)
19 |       Paint[message, :yellow]
20 |     end
21 | 
22 |     def success(message)
23 |       Paint[message, :green]
24 |     end
25 | 
26 |     alias message success
27 | 
28 |     def note(message)
29 |       Paint[message, :magenta]
30 |     end
31 | 
32 |     def info(message)
33 |       Paint[message, :blue]
34 |     end
35 |   end
36 | end
37 | 


--------------------------------------------------------------------------------
/lib/validate_website/runner.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'validate_website/core'
 4 | 
 5 | module ValidateWebsite
 6 |   # Runner for command line use and clean exit on ctrl-c
 7 |   class Runner
 8 |     def self.trap_interrupt
 9 |       trap('INT') do
10 |         warn "\nExiting..."
11 |         exit!(1)
12 |       end
13 |     end
14 | 
15 |     def self.run_crawl(args)
16 |       trap_interrupt
17 |       validate_website = ValidateWebsite::Crawl.new(args)
18 |       validate_website.crawl
19 |       validate_website.exit_status
20 |     end
21 | 
22 |     def self.run_static(args)
23 |       trap_interrupt
24 |       validate_website = ValidateWebsite::Static.new(args)
25 |       validate_website.crawl
26 |       validate_website.exit_status
27 |     end
28 |   end
29 | end
30 | 


--------------------------------------------------------------------------------
/.gitlab-ci.yml:
--------------------------------------------------------------------------------
 1 | ---
 2 | stages:
 3 |   - test
 4 |   - coverage
 5 | 
 6 | default:
 7 |   before_script:
 8 |     - apt-get update -qy
 9 |     - apt-get install -y libtidy-dev
10 |     - ruby -v
11 |     - which ruby
12 |     - gem install bundler --no-document
13 |     - bundle install --jobs $(nproc)  "${FLAGS[@]}"
14 | 
15 | .tests:
16 |   script:
17 |     - bundle exec rake
18 |   stage: test
19 | 
20 | test:2.7:
21 |   extends: .tests
22 |   image: 'ruby:2.7'
23 | 
24 | test:3.0:
25 |   extends: .tests
26 |   image: 'ruby:3.0'
27 | 
28 | test:3.1:
29 |   extends: .tests
30 |   image: 'ruby:3.1'
31 | 
32 | test:jruby:
33 |   extends: .tests
34 |   image: 'jruby:9.4-jre'
35 | 
36 | coverage:
37 |   image: 'ruby:3.1'
38 |   script:
39 |     - bundle exec rake test
40 |   artifacts:
41 |     paths:
42 |       - coverage/
43 |   stage: coverage
44 | 


--------------------------------------------------------------------------------
/test/data/xhtml1-strict.html:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 3 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 4 | <html xmlns="http://www.w3.org/1999/xhtml">
 5 | 	<head>
 6 | 		<title>title</title>
 7 | 	</head>
 8 | 	<body>
 9 | 		<h1>Title 1</h1>
10 | 		<p>Paragraphe.</p>
11 | 
12 | 		<h2>Title 2</h2>
13 | 		<ul>
14 | 			<li><a href="/my-url1" title="title">my url</a></li>
15 | 			<li><a href="/my-url2" title="title">my url</a></li>
16 | 			<li><a href="/my-url1" title="title">my url</a></li>
17 | 		</ul>
18 |         <p><img src="http://test.com/img.png" alt="non local img" /></p>
19 |         <p><img src="http://www.example.com/img1.png" alt="local img with absolute uri" /></p>
20 |         <p><img src="/img2.png" alt="local img with non absolute uri" /></p>
21 | 	</body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/test/example/ruby smalltalk/blockcamp-paris-le-28-novembre.html:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
 3 | "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
 4 | <html xmlns="http://www.w3.org/1999/xhtml">
 5 | 	<head>
 6 | 		<title>title</title>
 7 | 	</head>
 8 | 	<body>
 9 | 		<h1>Title 1</h1>
10 | 		<p>Paragraphe.</p>
11 | 
12 | 		<h2>Title 2</h2>
13 | 		<ul>
14 | 			<li><a href="/my-url1" title="title">my url</a></li>
15 | 			<li><a href="/my-url2" title="title">my url</a></li>
16 | 			<li><a href="/my-url1" title="title">my url</a></li>
17 | 		</ul>
18 |         <p><img src="http://test.com/img.png" alt="non local img" /></p>
19 |         <p><img src="http://www.example.com/img1.png" alt="local img with absolute uri" /></p>
20 |         <p><img src="/img2.png" alt="local img with non absolute uri" /></p>
21 | 	</body>
22 | </html>
23 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-inlstyle-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/">
 3 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" schemaLocation="xhtml-datatypes-1.xsd"/>
 4 |     <xs:annotation>
 5 |         <xs:documentation>
 6 |       Inline Style module    
 7 |       This is the XML Schema Inline Style module for XHTML
 8 |       
 9 |          * styloe attribute
10 | 
11 |       This module declares the 'style' attribute, used to support inline 
12 |       style markup. 
13 | 
14 |       $Id: xhtml-inlstyle-1.xsd,v 1.2 2005/09/26 22:54:53 ahby Exp $
15 |     </xs:documentation>
16 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
17 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_styleattributemodule"/>
18 |     </xs:annotation>
19 |     <xs:attributeGroup name="xhtml.style.attrib">
20 |         <xs:attribute name="style" type="xh11d:CDATA"/>
21 |     </xs:attributeGroup>
22 | </xs:schema>


--------------------------------------------------------------------------------
/test/webmock_helper.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'webmock/minitest'
 4 | 
 5 | # FakePage html helper for webmock
 6 | class FakePage
 7 |   include WebMock::API
 8 | 
 9 |   attr_accessor :links
10 |   attr_accessor :hrefs
11 |   attr_accessor :body
12 | 
13 |   def initialize(name = '', options = {})
14 |     @name = name
15 |     @links = [options[:links]].flatten if options.key?(:links)
16 |     @hrefs = [options[:hrefs]].flatten if options.key?(:hrefs)
17 |     @content_type = options[:content_type] || 'text/html'
18 |     @body = options[:body]
19 | 
20 |     create_body unless @body
21 |     add_to_webmock
22 |   end
23 | 
24 |   def url
25 |     TEST_DOMAIN + @name
26 |   end
27 | 
28 |   private
29 | 
30 |   def create_body
31 |     @body = '<html><body>'
32 |     @links&.each { |l| @body += "<a href=\"#{TEST_DOMAIN}#{l}\"></a>" }
33 |     @hrefs&.each { |h| @body += "<a href=\"#{h}\"></a>" }
34 |     @body += '</body></html>'
35 |   end
36 | 
37 |   def add_to_webmock
38 |     options = { body: @body, headers: { 'Content-Type' => @content_type } }
39 |     stub_request(:get, url).to_return(options)
40 |   end
41 | end
42 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License
 2 | 
 3 | Copyright (c) 2009-2022 Laurent Arnoud <laurent@spkdev.net>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | 'Software'), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/data/schemas/frameset.dtd:
--------------------------------------------------------------------------------
 1 | <!--
 2 |     This is the HTML 4.01 Frameset DTD, which should be
 3 |     used for documents with frames. This DTD is identical
 4 |     to the HTML 4.01 Transitional DTD except for the
 5 |     content model of the "HTML" element: in frameset
 6 |     documents, the "FRAMESET" element replaces the "BODY"
 7 |     element.
 8 | 
 9 |           Draft: $Date: 1999/12/24 23:37:45 $
10 | 
11 |           Authors:
12 |               Dave Raggett <dsr@w3.org>
13 |               Arnaud Le Hors <lehors@w3.org>
14 |               Ian Jacobs <ij@w3.org>
15 | 
16 |     Further information about HTML 4.01 is available at:
17 | 
18 |           http://www.w3.org/TR/1999/REC-html401-19991224.
19 | -->
20 | <!ENTITY % HTML.Version "-//W3C//DTD HTML 4.01 Frameset//EN"
21 |   -- Typical usage:
22 | 
23 |     <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Frameset//EN"
24 |             "http://www.w3.org/TR/html4/frameset.dtd">
25 |     <html>
26 |     <head>
27 |     ...
28 |     </head>
29 |     <frameset>
30 |     ...
31 |     </frameset>
32 |     </html>
33 | -->
34 | 
35 | <!ENTITY % HTML.Frameset "INCLUDE">
36 | <!ENTITY % HTML4.dtd PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
37 | %HTML4.dtd;


--------------------------------------------------------------------------------
/test/data/html5.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html>
 3 | 	<head>
 4 | 		<meta charset="utf-8" />
 5 | 		<title>title</title>
 6 | 		<meta name="description" content="" />
 7 | 		<meta name="keywords" content="" />
 8 | 		<meta name="author" content="" />
 9 | 	</head>
10 | 	<body>
11 | 		<header>
12 | 			<nav>
13 | 				<ul>
14 | 					<li><a href="/my-url1" title="title">my url</a></li>
15 | 					<li><a href="/my-url2" title="title">my url</a></li>
16 | 					<li><a href="/my-url3" title="title">my url</a></li>
17 | 				</ul>
18 | 			</nav>
19 | 		</header>
20 | 
21 | 		<div>
22 | 			<article>
23 | 				<h1>article title</h1>
24 | 				<p>texte de my article</p>
25 | 				<section>
26 | 					<h2>article subtitle</h2>
27 | 					<p>text</p>
28 | 					<figure>
29 | 						<a href="/my-url4"><img src="image.png" alt="my image"></a>
30 | 					</figure>
31 | 				</section>
32 | 			</article>
33 | 		</div>
34 | 
35 | 		<footer>
36 | 			<nav>
37 | 				<ul>
38 | 					<li><a href="/my-url5" title="title">my url</a></li>
39 | 					<li><a href="/my-url6" title="title">my url</a></li>
40 | 					<li><a href="/my-url7" title="title">my url</a></li>
41 | 					<li><a href="/tags/#test">Test</a></li>
42 | 				</ul>
43 | 			</nav>
44 | 		</footer>
45 | 	</body>
46 | </html>
47 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-inputmode-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema
 3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
 4 |  elementFormDefault="qualified"
 5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
 6 | >
 7 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" 
 8 |                schemaLocation="xhtml-datatypes-1.xsd" />
 9 |   <xs:annotation>
10 |     <xs:documentation>
11 |       This is the XML Schema inputmode module for XHTML
12 |       $Id: xhtml-inputmode-1.xsd,v 1.1 2008/05/29 19:43:21 smccarro Exp $
13 |     </xs:documentation>
14 |     <xs:documentation source="xhtml-copyright-1.xsd"/>
15 |   </xs:annotation>
16 | 
17 |   <xs:annotation>
18 |     <xs:documentation>
19 |       InputMode 
20 |       
21 |         * inputmode
22 |       
23 |       This module declares the 'inputmode' attribute used for giving hints about how to deal with input
24 |     </xs:documentation>
25 |   </xs:annotation>
26 |   
27 |   <xs:attributeGroup name="xhtml.input.inputmode.attlist">
28 |     <xs:attribute name="inputmode" type="xh11d:FrameTarget"/>
29 |   </xs:attributeGroup>
30 | 
31 |   <xs:attributeGroup name="xhtml.textarea.inputmode.attlist">
32 |     <xs:attribute name="inputmode" type="xh11d:FrameTarget"/>
33 |   </xs:attributeGroup>
34 | 
35 | </xs:schema>
36 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-copyright-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema
 3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
 4 |  elementFormDefault="qualified"
 5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
 6 | >
 7 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" 
 8 |                schemaLocation="xhtml-datatypes-1.xsd" />
 9 |   <xs:annotation>
10 |     <xs:documentation>
11 |       This is XHTML, a reformulation of HTML as a modular XML application
12 |       The Extensible HyperText Markup Language (XHTML)
13 |       Copyright &#169;1998-2005 World Wide Web Consortium
14 |       (Massachusetts Institute of Technology, European Research Consortium
15 |        for Informatics and Mathematics, Keio University).
16 |       All Rights Reserved.
17 |     
18 |       Permission to use, copy, modify and distribute the XHTML Schema
19 |       modules and their accompanying xs:documentation for any purpose
20 |       and without fee is hereby granted in perpetuity, provided that the above
21 |       copyright notice and this paragraph appear in all copies.  
22 |       The copyright holders make no representation about the suitability of
23 |       these XML Schema modules for any purpose.
24 |     
25 |       They are provided "as is" without expressed or implied warranty.
26 |     </xs:documentation>
27 |   </xs:annotation>
28 | 
29 | </xs:schema>
30 | 


--------------------------------------------------------------------------------
/data/schemas/xml-events-copyright-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema	
 3 |     targetNamespace="http://www.w3.org/2001/xml-events"
 4 |     xmlns="http://www.w3.org/2001/xml-events"
 5 |     xmlns:xs="http://www.w3.org/2001/XMLSchema"  
 6 |     elementFormDefault="unqualified" 
 7 |     blockDefault="#all" 
 8 |     finalDefault="#all" 
 9 |     attributeFormDefault="unqualified">
10 | 
11 |   <xs:annotation>
12 |     <xs:documentation>
13 |       This is XML Events, a generalized event model for XML-based
14 |       markup languages. 
15 | 
16 |         Copyright 2001-2003 World Wide Web Consortium
17 |             (Massachusetts Institute of Technology, European Research
18 |             Consortium for Informatics and Mathematics, Keio University).
19 |             All Rights Reserved.
20 | 		
21 |         Permission to use, copy, modify and distribute the 
22 |         XML Events Schema modules and their accompanying xs:documentation 
23 |         for any purpose and without fee is hereby granted in perpetuity, 
24 |         provided that the above copyright notice and this paragraph appear 
25 |         in all copies.  
26 | 
27 |         The copyright holders make no representation about the suitability of
28 |         these XML Schema modules for any purpose.
29 | 		
30 |         They are provided "as is" without expressed or implied warranty.
31 |     </xs:documentation>
32 |   </xs:annotation>
33 | 
34 | </xs:schema>
35 | 


--------------------------------------------------------------------------------
/data/schemas/xml-events-copyright-2.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema	
 3 |     targetNamespace="http://www.w3.org/2001/xml-events"
 4 |     xmlns="http://www.w3.org/2001/xml-events"
 5 |     xmlns:xs="http://www.w3.org/2001/XMLSchema"  
 6 |     elementFormDefault="unqualified" 
 7 |     blockDefault="#all" 
 8 |     finalDefault="#all" 
 9 |     attributeFormDefault="unqualified">
10 | 
11 |   <xs:annotation>
12 |     <xs:documentation>
13 |       This is XML Events, a generalized event model for XML-based
14 |       markup languages. 
15 | 
16 |         Copyright 2001-2007 World Wide Web Consortium
17 |             (Massachusetts Institute of Technology, European Research
18 |             Consortium for Informatics and Mathematics, Keio University).
19 |             All Rights Reserved.
20 | 		
21 |         Permission to use, copy, modify and distribute the 
22 |         XML Events Schema modules and their accompanying xs:documentation 
23 |         for any purpose and without fee is hereby granted in perpetuity, 
24 |         provided that the above copyright notice and this paragraph appear 
25 |         in all copies.  
26 | 
27 |         The copyright holders make no representation about the suitability of
28 |         these XML Schema modules for any purpose.
29 | 		
30 |         They are provided "as is" without expressed or implied warranty.
31 |     </xs:documentation>
32 |   </xs:annotation>
33 | 
34 | </xs:schema>
35 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-base-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/">
 3 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" schemaLocation="xhtml-datatypes-1.xsd"/>
 4 | 
 5 |     <xs:annotation>
 6 |         <xs:documentation>
 7 |         Base element
 8 |         This is the XML Schema Base Element module for XHTML      
 9 |                 
10 |           * base
11 | 
12 |         This module declares the base element type and its attributes,        
13 |         used to define a base URI against which relative URIs in the
14 |         document will be resolved.
15 | 
16 |         $Id: xhtml-base-1.xsd,v 1.2 2005/09/26 22:54:53 ahby Exp $
17 |       </xs:documentation>
18 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
19 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_basemodule"/>
20 |     </xs:annotation>
21 |     <xs:attributeGroup name="xhtml.base.attlist">
22 |         <xs:attribute name="href" type="xh11d:URI" use="required"/>
23 |     </xs:attributeGroup>
24 |     <xs:group name="xhtml.base.content">
25 |         <xs:sequence/>
26 |     </xs:group>
27 |     <xs:complexType name="xhtml.base.type">
28 |         <xs:group ref="xhtml.base.content"/>
29 |         <xs:attributeGroup ref="xhtml.base.attlist"/>
30 |     </xs:complexType>
31 | </xs:schema>


--------------------------------------------------------------------------------
/lib/validate_website/static_link.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'uri'
 4 | require 'validate_website/utils'
 5 | require 'validate_website/static'
 6 | require 'spidr'
 7 | 
 8 | # rubocop:disable Metrics/BlockLength
 9 | StaticLink = Struct.new(:link, :site) do
10 |   def link_uri
11 |     @link_uri = URI.parse(WEBrick::HTTPUtils.escape(link))
12 |     @link_uri = URI.join(site, @link_uri) if @link_uri.host.nil?
13 |     @link_uri
14 |   end
15 | 
16 |   def in_static_domain?
17 |     URI.parse(site).host == link_uri.host
18 |   end
19 | 
20 |   def content_types
21 |     if css?
22 |       ['text/css']
23 |     else
24 |       ValidateWebsite::Static::CONTENT_TYPES
25 |     end
26 |   end
27 | 
28 |   def body
29 |     if File.exist?(link)
30 |       File.open(link).read
31 |     else
32 |       File.open(file_path).read
33 |     end
34 |   end
35 | 
36 |   def response
37 |     @response ||= ValidateWebsite::Static.fake_httpresponse(
38 |       body,
39 |       content_types
40 |     )
41 |   end
42 | 
43 |   def page
44 |     @page ||= Spidr::Page.new(link_uri, response)
45 |   end
46 | 
47 |   def extract_urls_from_fake_css_response
48 |     ValidateWebsite::Utils.extract_urls_from_css(page)
49 |   end
50 | 
51 |   def file_path
52 |     @file_path ||= URI.parse(
53 |       File.join(Dir.getwd, link_uri.path || '/')
54 |     ).path
55 |   end
56 | 
57 |   def extname
58 |     @extname ||= File.extname(file_path)
59 |   end
60 | 
61 |   def css?
62 |     extname == '.css'
63 |   end
64 | 
65 |   def check?
66 |     !link.include?('#') && in_static_domain?
67 |   end
68 | end
69 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-charent-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <!--
 3 |   This schema module includes three named character entity files.
 4 | -->
 5 | <!DOCTYPE xs:schema [
 6 |     <!-- These are the entity sets for ISO Latin 1 characters for the XHTML -->
 7 |     <!ENTITY % HTMLlat1 PUBLIC
 8 |    "-//W3C//ENTITIES Latin 1 for XHTML//EN"
 9 |    "http://www.w3.org/MarkUp/DTD/xhtml-lat1.ent">
10 |     %HTMLlat1;
11 |     <!-- These are the entity sets for special characters for the XHTML -->
12 |     <!ENTITY % HTMLsymbol PUBLIC
13 |    "-//W3C//ENTITIES Symbols for XHTML//EN"
14 |    "http://www.w3.org/MarkUp/DTD/xhtml-symbol.ent">
15 |     %HTMLsymbol;
16 |     <!-- These are the entity sets for symbol characters for the XHTML -->
17 |     <!ENTITY % HTMLspecial PUBLIC
18 |    "-//W3C//ENTITIES Special for XHTML//EN"
19 |    "http://www.w3.org/MarkUp/DTD/xhtml-special.ent">
20 |     %HTMLspecial;
21 | ]>
22 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" 
23 |            elementFormDefault="qualified">
24 |     <xs:annotation>
25 |         <xs:documentation>
26 |       Character Entities for XHTML    
27 |       This is the XML Schema Character Entities module for XHTML
28 | 
29 |       This module declares the set of character entities for XHTML,
30 |       including the Latin 1, Symbol and Special character collections.
31 |       XML Schema does not support Entities, hence Entities are enable
32 |       through an Internal DTD Subset.
33 |       
34 |       $Id: xhtml-charent-1.xsd,v 1.3 2005/09/26 22:54:53 ahby Exp $
35 |     </xs:documentation>
36 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
37 |     </xs:annotation>
38 | </xs:schema>
39 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-metaAttributes-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/" elementFormDefault="qualified">
 3 |   <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" schemaLocation="xhtml-datatypes-1.xsd"/>
 4 |   <xs:annotation>
 5 |     <xs:documentation>
 6 |       This is the XML Schema Metainformation Attributes module for XHTML
 7 | 
 8 |       $Id: xhtml-metaAttributes-1.xsd,v 1.6 2008/07/05 04:15:30 smccarro Exp $
 9 |     </xs:documentation>
10 |     <xs:documentation source="xhtml-rdfa-copyright-1.xsd"/>
11 |   </xs:annotation>
12 | 
13 |   <xs:annotation>
14 |     <xs:documentation>
15 |       XHTML Metainformation Attributes
16 |     </xs:documentation>
17 |   </xs:annotation>
18 | 
19 |   <xs:attribute name="about" type="xh11d:URIorSafeCURIE"/>
20 |   <xs:attribute name="content" type="xh11d:CDATA"/>
21 |   <xs:attribute name="datatype" type="xh11d:CURIE"/>
22 |   <xs:attribute name="typeof" type="xh11d:CURIEs"/>
23 |   <xs:attribute name="property" type="xh11d:CURIEs"/>
24 |   <xs:attribute name="rel" type="xh11d:CURIEs"/>
25 |   <xs:attribute name="resource" type="xh11d:URIorSafeCURIE"/>
26 |   <xs:attribute name="rev" type="xh11d:CURIEs"/>
27 | 
28 |   <xs:attributeGroup name="xhtml.metaAttributes.attrib">
29 |     <xs:attribute name="about"/>
30 |     <xs:attribute name="content"/>
31 |     <xs:attribute name="datatype"/>
32 |     <xs:attribute name="typeof"/>
33 |     <xs:attribute name="property"/>
34 |     <xs:attribute name="rel"/>
35 |     <xs:attribute name="resource"/>
36 |     <xs:attribute name="rev"/>
37 |   </xs:attributeGroup>
38 | 
39 | </xs:schema>


--------------------------------------------------------------------------------
/lib/validate_website/utils.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | # Base module ValidateWebsite
 4 | module ValidateWebsite
 5 |   # Utils class for CSS helpers
 6 |   class Utils
 7 |     # Extract urls from CSS page
 8 |     #
 9 |     # @param [Spidr::Page] a Spidr::Page object
10 |     # @return [Set] Lists of urls
11 |     #
12 |     def self.extract_urls_from_css(page)
13 |       return Set[] unless page
14 |       return Set[] if page.body.nil?
15 | 
16 |       nodes = Crass::Parser.parse_stylesheet(page.body)
17 |       extract_urls_from_nodes nodes, page
18 |     end
19 | 
20 |     # Return urls as absolute from Crass nodes
21 |     #
22 |     # @param [Hash] node from Crass
23 |     # @param [Spidr::Page] a Spidr::Page object
24 |     # @return [Set] list of obsolute urls
25 |     def self.urls_to_absolute(node, page)
26 |       if node[:node] == :function && node[:name] == 'url' || node[:node] == :url
27 |         Array(node[:value]).map do |v|
28 |           url = v.is_a?(String) ? v : v[:value]
29 |           page.to_absolute(url).to_s
30 |         end
31 |       else
32 |         Set.new
33 |       end
34 |     end
35 | 
36 |     # Extract urls from Crass nodes
37 |     # @param [Array] Array of nodes from Crass
38 |     # @param [Spidr::Page] a Spidr::Page object
39 |     # @param [Set] memo for recursivity
40 |     # @return [Set] list of urls
41 |     def self.extract_urls_from_nodes(nodes, page, memo = Set[])
42 |       nodes.each_with_object(memo) do |node, result|
43 |         result.merge urls_to_absolute(node, page)
44 |         if node[:children]
45 |           extract_urls_from_nodes node.delete(:children), page, result
46 |         end
47 |         result
48 |       end
49 |     end
50 |   end
51 | end
52 | 


--------------------------------------------------------------------------------
/validate-website.gemspec:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.expand_path('lib/validate_website/version', __dir__)
 4 | 
 5 | Gem::Specification.new do |s|
 6 |   s.author = 'Laurent Arnoud'
 7 |   s.email = 'laurent@spkdev.net'
 8 |   s.homepage = 'http://github.com/spk/validate-website'
 9 |   s.platform = Gem::Platform::RUBY
10 |   s.summary = 'Web crawler for checking the validity of your documents'
11 |   s.name = 'validate-website'
12 |   s.version = ValidateWebsite::VERSION
13 |   s.license = 'MIT'
14 |   s.required_ruby_version = '>= 2.7'
15 |   s.add_dependency 'crass', '~> 1'
16 |   s.add_dependency 'nokogiri', '~> 1.15'
17 |   s.add_dependency 'paint', '~> 2'
18 |   s.add_dependency 'slop', '~> 4.6'
19 |   s.add_dependency 'spidr', '~> 0.7'
20 |   s.add_dependency 'tidy_ffi', '~> 1.0'
21 |   s.add_dependency 'w3c_validators', '~> 1.3'
22 |   s.add_dependency 'webrick', '~> 1'
23 |   s.add_development_dependency 'asciidoctor', '~> 1.5'
24 |   s.add_development_dependency 'minitest', '~> 5.0'
25 |   s.add_development_dependency 'rake', '~> 12'
26 |   s.add_development_dependency 'rubocop', '~> 0.76.0'
27 |   s.add_development_dependency 'webmock', '~> 3.4'
28 |   s.require_path = 'lib'
29 |   s.bindir = 'bin'
30 |   s.executables << 'validate-website'
31 |   s.executables << 'validate-website-static'
32 |   s.files = Dir['README.md', 'Rakefile', 'LICENSE', 'History.md',
33 |                 'bin',
34 |                 'lib/**/*.rb',
35 |                 'man/**/*',
36 |                 'test/**/*',
37 |                 'data/**/*']
38 |   s.description = %(validate-website is a web crawler for checking the markup \
39 | validity with XML Schema / DTD and not found urls.)
40 |   s.test_files = Dir.glob('test/**/*_test.rb')
41 | end
42 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-ssismap-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/">
 3 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" schemaLocation="xhtml-datatypes-1.xsd"/>
 4 |     <xs:annotation>
 5 |         <xs:documentation>
 6 |       This is the XML Schema Server-side Image Maps module for XHTML
 7 |       $Id: xhtml-ssismap-1.xsd,v 1.3 2005/09/26 22:54:53 ahby Exp $
 8 |     </xs:documentation>
 9 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
10 |     </xs:annotation>
11 |     <xs:annotation>
12 |         <xs:documentation>
13 |       Server-side Image Maps
14 |       
15 |       This adds the 'ismap' attribute to the img element to 
16 |       support server-side processing of a user selection.
17 |     </xs:documentation>
18 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_servermapmodule"/>
19 |     </xs:annotation>
20 |     <xs:attributeGroup name="xhtml.img.ssimap.attlist">
21 |         <xs:attribute name="ismap">
22 |             <xs:simpleType>
23 |                 <xs:restriction base="xs:NMTOKEN">
24 |                     <xs:enumeration value="ismap"/>
25 |                 </xs:restriction>
26 |             </xs:simpleType>
27 |         </xs:attribute>
28 |     </xs:attributeGroup>
29 |     <xs:attributeGroup name="xhtml.input.ssimap.attlist">
30 |         <xs:attribute name="ismap">
31 |             <xs:simpleType>
32 |                 <xs:restriction base="xs:NMTOKEN">
33 |                     <xs:enumeration value="ismap"/>
34 |                 </xs:restriction>
35 |             </xs:simpleType>
36 |         </xs:attribute>
37 |     </xs:attributeGroup>
38 | </xs:schema>


--------------------------------------------------------------------------------
/data/schemas/xhtml-target-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema
 3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
 4 |  elementFormDefault="qualified"
 5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
 6 | >
 7 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" 
 8 |                schemaLocation="xhtml-datatypes-1.xsd" />
 9 |   <xs:annotation>
10 |     <xs:documentation>
11 |       This is the XML Schema Target module for XHTML
12 |       $Id: xhtml-target-1.xsd,v 1.3 2007/04/03 18:27:01 ahby Exp $
13 |     </xs:documentation>
14 |     <xs:documentation source="xhtml-copyright-1.xsd"/>
15 |   </xs:annotation>
16 | 
17 |   <xs:annotation>
18 |     <xs:documentation>
19 |       Target 
20 |       
21 |         * target
22 |       
23 |       This module declares the 'target' attribute used for opening windows
24 |     </xs:documentation>
25 |     <xs:documentation 
26 |          source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_targetmodule"/>
27 |   </xs:annotation>
28 |   
29 |   <xs:attributeGroup name="xhtml.base.target.attlist">
30 |     <xs:attribute name="target" type="xh11d:FrameTarget"/>
31 |   </xs:attributeGroup>
32 | 
33 |   <xs:attributeGroup name="xhtml.form.target.attlist">
34 |     <xs:attribute name="target" type="xh11d:FrameTarget"/>
35 |   </xs:attributeGroup>
36 | 
37 |   <xs:attributeGroup name="xhtml.link.target.attlist">
38 |     <xs:attribute name="target" type="xh11d:FrameTarget"/>
39 |   </xs:attributeGroup>
40 | 
41 |   <xs:attributeGroup name="xhtml.area.target.attlist">
42 |     <xs:attribute name="target" type="xh11d:FrameTarget"/>
43 |   </xs:attributeGroup>
44 | 
45 |   <xs:attributeGroup name="xhtml.a.target.attlist">
46 |     <xs:attribute name="target" type="xh11d:FrameTarget"/>
47 |   </xs:attributeGroup>
48 | 
49 | </xs:schema>
50 | 


--------------------------------------------------------------------------------
/doc/validate-website-static.adoc:
--------------------------------------------------------------------------------
 1 | validate-website-static(1)
 2 | ==========================
 3 | 
 4 | NAME
 5 | ----
 6 | validate-website-static - check the validity of your documents
 7 | 
 8 | SYNOPSIS
 9 | --------
10 | *validate-website-static* ['OPTIONS']
11 | 
12 | DESCRIPTION
13 | -----------
14 | validate-website-static check the markup validity of your local documents with
15 | XML Schema / DTD.
16 | HTML5 support with Validator.nu Web Service.
17 | 
18 | OPTIONS
19 | -------
20 | *-s*, *--site* 'SITE'::
21 |   Where static files will be hosted (Default: http://www.example.com/)
22 | *-p*, *--pattern* 'PATTERN'::
23 |   Change filenames pattern (Default: \*\*/*.html)
24 | *-e*, *--exclude* 'EXCLUDE'::
25 |   Url to exclude (ex: 'redirect|news')
26 | *-i*, *--ignore* 'IGNORE'::
27 |   Ignore certain validation errors (ex: 'autocorrect')
28 | *-m*, *--[no-]markup*::
29 |   Markup validation (Default: true)
30 | *--css-syntax*::
31 |   Css validation (Default: false)
32 | *-n*, *--not-found*::
33 |   Log files not on filesystem, pwd considered as root « / » (Default: false)
34 | *--[no-]color*::
35 |   Show colored output (Default: true)
36 | *-x*, *--html5-validator* 'VALIDATOR'::
37 |   Change default html5 validator engine (tidy/nu/nokogiri)
38 | *-5*, *--html5-validator-service-url* 'URL'::
39 |   Change default html5 validator service URL for "nu" engine
40 | *-v*, *--verbose*::
41 |   Show detail of validator errors (Default: false).
42 | *-h*, *--help*::
43 |   Show help message and exit.
44 | 
45 | EXIT STATUS
46 | -----------
47 | 0::
48 |   Markup is valid.
49 | 64::
50 |   Not valid markup found.
51 | 65::
52 |   There are pages not found.
53 | 66::
54 |   There are not valid markup and pages not found.
55 | 
56 | AUTHOR
57 | ------
58 | Laurent Arnoud mailto:laurent@spkdev.net[Laurent Arnoud]
59 | 
60 | LICENSE
61 | -------
62 | The MIT License
63 | 
64 | Copyright (c) 2009-2019 mailto:laurent@spkdev.net[Laurent Arnoud]
65 | 
66 | // vim: set syntax=asciidoc:
67 | 


--------------------------------------------------------------------------------
/doc/validate-website.adoc:
--------------------------------------------------------------------------------
 1 | validate-website(1)
 2 | ===================
 3 | 
 4 | NAME
 5 | ----
 6 | validate-website - Web crawler for checking the validity of your documents
 7 | 
 8 | SYNOPSIS
 9 | --------
10 | *validate-website* ['OPTIONS']
11 | 
12 | DESCRIPTION
13 | -----------
14 | validate-website is a web crawler for checking the markup validity with XML
15 | Schema / DTD and not found urls.
16 | HTML5 support with Validator.nu Web Service.
17 | 
18 | OPTIONS
19 | -------
20 | *-s*, *--site* 'SITE'::
21 |   Website to crawl (Default: http://localhost:3000/)
22 | *-u*, *--user-agent* 'USERAGENT'::
23 |   Change user agent (Default: Spidr.user_agent)
24 | *-e*, *--exclude* 'EXCLUDE'::
25 |   Url to exclude (ex: 'redirect|news')
26 | *-i*, *--ignore* 'IGNORE'::
27 |   Ignore certain validation errors (ex: 'autocorrect')
28 | *-c*, *--cookies* 'COOKIES'::
29 |   Set defaults cookies
30 | *-m*, *--[no-]markup*::
31 |   Markup validation (Default: true)
32 | *--css-syntax*::
33 |   Css validation (Default: false)
34 | *-n*, *--not-found*::
35 |   Log not found url (Default: false)
36 | *--[no-]color*::
37 |   Show colored output (Default: true)
38 | *-x*, *--html5-validator* 'VALIDATOR'::
39 |   Change default html5 validator engine (tidy/nu/nokogiri)
40 | *-5*, *--html5-validator-service-url* 'URL'::
41 |   Change default html5 validator service URL for "nu" engine
42 | *-v*, *--verbose*::
43 |   Show detail of validator errors (Default: false).
44 | *-h*, *--help*::
45 |   Show help message and exit.
46 | 
47 | EXIT STATUS
48 | -----------
49 | 0::
50 |   Markup is valid and no 404 found.
51 | 64::
52 |   Not valid markup found.
53 | 65::
54 |   There are pages not found.
55 | 66::
56 |   There are not valid markup and pages not found.
57 | 
58 | AUTHOR
59 | ------
60 | Laurent Arnoud mailto:laurent@spkdev.net[Laurent Arnoud]
61 | 
62 | LICENSE
63 | -------
64 | The MIT License
65 | 
66 | Copyright (c) 2009-2019 mailto:laurent@spkdev.net[Laurent Arnoud]
67 | 
68 | // vim: set syntax=asciidoc:
69 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-nameident-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema
 3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
 4 |  elementFormDefault="qualified"
 5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
 6 | >
 7 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" 
 8 |                schemaLocation="xhtml-datatypes-1.xsd" />
 9 |   <xs:annotation>
10 |     <xs:documentation>
11 |       This is the XML Schema Name Identifier module for XHTML
12 |       $Id: xhtml-nameident-1.xsd,v 1.2 2005/09/26 22:54:53 ahby Exp $
13 |     </xs:documentation>
14 |     <xs:documentation source="xhtml-copyright-1.xsd"/>
15 |   </xs:annotation>
16 | 
17 |   <xs:annotation>
18 |     <xs:documentation>
19 |       Name Identifier
20 |     
21 |         * 'name' attribute on form, img, a, map, applet, frame, iframe
22 |     
23 |       This module declares the 'name' attribute on element types when 
24 |       it is used as a node identifier for legacy linking and scripting 
25 |       support. This does not include those instances when 'name' is used 
26 |       as a container for form control, property or metainformation names.
27 |          
28 |       This module should be instantiated following all modules it modifies.
29 |     </xs:documentation>
30 |     <xs:documentation 
31 |         source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_nameidentmodule"/>
32 |   </xs:annotation>
33 | 
34 | 
35 |   <xs:attributeGroup name="xhtml.form.name.attlist">
36 |     <xs:attribute name="name" type="xh11d:CDATA"/>
37 |   </xs:attributeGroup>
38 | 
39 |   <xs:attributeGroup name="xhtml.img.name.attlist">
40 |     <xs:attribute name="name" type="xh11d:CDATA"/>
41 |   </xs:attributeGroup>
42 | 
43 |   <xs:attributeGroup name="xhtml.a.name.attlist">
44 |     <xs:attribute name="name" type="xh11d:CDATA"/>
45 |   </xs:attributeGroup>
46 | 
47 |   <xs:attributeGroup name="xhtml.map.name.attlist">
48 |     <xs:attribute name="name" type="xh11d:CDATA"/>
49 |   </xs:attributeGroup>
50 | 
51 |   <xs:attributeGroup name="xhtml.applet.name.attlist">
52 |     <xs:attribute name="name" type="xh11d:CDATA"/>
53 |   </xs:attributeGroup>
54 | 
55 |   <xs:attributeGroup name="xhtml.frame.name.attlist">
56 |     <xs:attribute name="name" type="xh11d:CDATA"/>
57 |   </xs:attributeGroup>
58 | 
59 |   <xs:attributeGroup name="xhtml.iframe.name.attlist">
60 |     <xs:attribute name="name" type="xh11d:CDATA"/>
61 |   </xs:attributeGroup>
62 | 
63 | </xs:schema>
64 | 


--------------------------------------------------------------------------------
/data/schemas/xml-events-attribs-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema 
 3 |     targetNamespace="http://www.w3.org/2001/xml-events" 
 4 |     xmlns:ev="http://www.w3.org/2001/xml-events" 
 5 |     xmlns:xs="http://www.w3.org/2001/XMLSchema" 
 6 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
 7 |     xsi:schemaLocation="http://www.w3.org/2001/XMLSchema 
 8 |                         http://www.w3.org/2001/XMLSchema.xsd" 
 9 |     elementFormDefault="unqualified" 
10 |     blockDefault="#all" 
11 |     finalDefault="#all" 
12 |     attributeFormDefault="unqualified">
13 | 
14 |   <xs:annotation>
15 |     <xs:documentation>
16 |       This is the XML Schema for XML Events global attributes
17 | 
18 |       URI: http://www.w3.org/MarkUp/SCHEMA/xml-events-attribs-1.xsd
19 |       $Id: xml-events-attribs-1.xsd,v 1.7 2004/11/22 17:09:15 ahby Exp $
20 |     </xs:documentation>
21 |     <xs:documentation source="xml-events-copyright-1.xsd"/>
22 |   </xs:annotation>
23 | 
24 |   <xs:annotation>
25 |     <xs:documentation>
26 |       XML Event Attributes
27 | 		
28 |         These "global" event attributes are defined in "Attaching
29 |         Attributes Directly to the Observer Element" of the XML
30 |         Events specification.
31 |     </xs:documentation>
32 |   </xs:annotation>
33 | 
34 |   <xs:attribute name="event" type="xs:NMTOKEN"/>
35 |   <xs:attribute name="observer" type="xs:IDREF"/>
36 |   <xs:attribute name="target" type="xs:IDREF"/>
37 |   <xs:attribute name="handler" type="xs:anyURI"/>
38 |   <xs:attribute name="phase" default="default">
39 |     <xs:simpleType>
40 |       <xs:restriction base="xs:NMTOKEN">
41 |         <xs:enumeration value="capture"/>
42 |         <xs:enumeration value="default"/>
43 |       </xs:restriction>
44 |     </xs:simpleType>
45 |   </xs:attribute>
46 |   <xs:attribute name="propagate" default="continue">
47 |     <xs:simpleType>
48 |       <xs:restriction base="xs:NMTOKEN">
49 |         <xs:enumeration value="stop"/>
50 |         <xs:enumeration value="continue"/>
51 |       </xs:restriction>
52 |     </xs:simpleType>
53 |   </xs:attribute>
54 |   <xs:attribute name="defaultAction" default="perform">
55 |     <xs:simpleType>
56 |       <xs:restriction base="xs:NMTOKEN">
57 |         <xs:enumeration value="cancel"/>
58 |         <xs:enumeration value="perform"/>
59 |       </xs:restriction>
60 |     </xs:simpleType>
61 |   </xs:attribute>
62 | 
63 |   <xs:attributeGroup name="XmlEvents.attlist">
64 |     <xs:attribute ref="ev:event"/>
65 |     <xs:attribute ref="ev:observer"/>
66 |     <xs:attribute ref="ev:target"/>
67 |     <xs:attribute ref="ev:handler"/>
68 |     <xs:attribute ref="ev:phase"/>
69 |     <xs:attribute ref="ev:propagate"/>
70 |     <xs:attribute ref="ev:defaultAction"/>
71 |   </xs:attributeGroup>
72 | 
73 | </xs:schema>
74 | 


--------------------------------------------------------------------------------
/data/schemas/xml-events-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema 
 3 |     targetNamespace="http://www.w3.org/2001/xml-events" 
 4 |     xmlns="http://www.w3.org/2001/xml-events" 
 5 |     xmlns:xs="http://www.w3.org/2001/XMLSchema" 
 6 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
 7 |     xsi:schemaLocation="http://www.w3.org/2001/XMLSchema 
 8 |                         http://www.w3.org/2001/XMLSchema.xsd" 
 9 |     elementFormDefault="unqualified" 
10 |     blockDefault="#all" 
11 |     finalDefault="#all" 
12 |     attributeFormDefault="unqualified">
13 | 
14 |   <xs:annotation>
15 |     <xs:documentation>
16 |       This is the XML Schema for XML Events
17 | 
18 |       URI: http://www.w3.org/MarkUp/SCHEMA/xml-events-1.xsd
19 |       $Id: xml-events-1.xsd,v 1.8 2004/11/22 17:09:15 ahby Exp $
20 |     </xs:documentation>
21 |     <xs:documentation source="xml-events-copyright-1.xsd"/>
22 |   </xs:annotation>
23 | 
24 |   <xs:annotation>
25 |     <xs:documentation>
26 |       XML Events element listener
27 | 		
28 |         This module defines the listener element for XML Events.
29 |         This element can be used to define event listeners. This
30 |         module relies upon the XmlEvents.attlist attribute group
31 |         defined in xml-events-attribs-1.xsd.
32 |     </xs:documentation>
33 |   </xs:annotation>
34 | 
35 |   <xs:attributeGroup name="listener.attlist">
36 |     <xs:attribute name="event" use="required" type="xs:NMTOKEN"/>
37 |     <xs:attribute name="observer" type="xs:IDREF"/>
38 |     <xs:attribute name="target" type="xs:IDREF"/>
39 |     <xs:attribute name="handler" type="xs:anyURI"/>
40 |     <xs:attribute name="phase" default="default">
41 |       <xs:simpleType>
42 |         <xs:restriction base="xs:NMTOKEN">
43 |           <xs:enumeration value="capture"/>
44 |           <xs:enumeration value="default"/>
45 |         </xs:restriction>
46 |       </xs:simpleType>
47 |     </xs:attribute>
48 |     <xs:attribute name="propagate" default="continue">
49 |       <xs:simpleType>
50 |         <xs:restriction base="xs:NMTOKEN">
51 |           <xs:enumeration value="stop"/>
52 |           <xs:enumeration value="continue"/>
53 |         </xs:restriction>
54 |       </xs:simpleType>
55 |     </xs:attribute>
56 |     <xs:attribute name="defaultAction" default="perform">
57 |       <xs:simpleType>
58 |         <xs:restriction base="xs:NMTOKEN">
59 |           <xs:enumeration value="cancel"/>
60 |           <xs:enumeration value="perform"/>
61 |         </xs:restriction>
62 |       </xs:simpleType>
63 |     </xs:attribute>
64 |     <xs:attribute name="id" type="xs:ID"/>
65 |   </xs:attributeGroup>
66 | 
67 |   <xs:complexType name="listener.type">
68 |     <xs:attributeGroup ref="listener.attlist"/>
69 |   </xs:complexType>
70 | 
71 |   <xs:element name="listener" type="listener.type"/>
72 | 
73 | </xs:schema>
74 | 


--------------------------------------------------------------------------------
/data/schemas/xml-events-2.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema 
 3 |     targetNamespace="http://www.w3.org/2001/xml-events" 
 4 |     xmlns="http://www.w3.org/2001/xml-events" 
 5 |     xmlns:xs="http://www.w3.org/2001/XMLSchema" 
 6 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
 7 |     xsi:schemaLocation="http://www.w3.org/2001/XMLSchema 
 8 |                         http://www.w3.org/2001/XMLSchema.xsd" 
 9 |     elementFormDefault="unqualified" 
10 |     blockDefault="#all" 
11 |     finalDefault="#all" 
12 |     attributeFormDefault="unqualified">
13 | 
14 |   <xs:annotation>
15 |     <xs:documentation>
16 |       This is the XML Schema for XML Events
17 | 
18 |       URI: http://www.w3.org/MarkUp/SCHEMA/xml-events-2.xsd
19 |       $Id: xml-events-2.xsd,v 1.2 2008/06/25 14:36:17 smccarro Exp $
20 |     </xs:documentation>
21 |     <xs:documentation source="xml-events-copyright-2.xsd"/>
22 |   </xs:annotation>
23 | 
24 |   <xs:annotation>
25 |     <xs:documentation>
26 |       XML Events element listener
27 | 		
28 |         This module defines the listener element for XML Events.
29 |         This element can be used to define event listeners. This
30 |         module relies upon the XmlEvents.attlist attribute group
31 |         defined in xml-events-attribs-2.xsd.
32 |     </xs:documentation>
33 |   </xs:annotation>
34 | 
35 |   <xs:attributeGroup name="listener.attlist">
36 |     <xs:attribute name="event" use="required" type="xs:NMTOKEN"/>
37 |     <xs:attribute name="observer" type="xs:IDREF"/>
38 |     <xs:attribute name="target" type="xs:IDREF"/>
39 |     <xs:attribute name="handler" type="xs:anyURI"/>
40 |     <xs:attribute name="phase" default="default">
41 |       <xs:simpleType>
42 |         <xs:restriction base="xs:NMTOKEN">
43 |           <xs:enumeration value="capture"/>
44 |           <xs:enumeration value="default"/>
45 |         </xs:restriction>
46 |       </xs:simpleType>
47 |     </xs:attribute>
48 |     <xs:attribute name="propagate" default="continue">
49 |       <xs:simpleType>
50 |         <xs:restriction base="xs:NMTOKEN">
51 |           <xs:enumeration value="stop"/>
52 |           <xs:enumeration value="continue"/>
53 |         </xs:restriction>
54 |       </xs:simpleType>
55 |     </xs:attribute>
56 |     <xs:attribute name="defaultAction" default="perform">
57 |       <xs:simpleType>
58 |         <xs:restriction base="xs:NMTOKEN">
59 |           <xs:enumeration value="cancel"/>
60 |           <xs:enumeration value="perform"/>
61 |         </xs:restriction>
62 |       </xs:simpleType>
63 |     </xs:attribute>
64 |     <xs:attribute name="id" type="xs:ID"/>
65 |   </xs:attributeGroup>
66 | 
67 |   <xs:complexType name="listener.type">
68 |     <xs:attributeGroup ref="listener.attlist"/>
69 |   </xs:complexType>
70 | 
71 |   <xs:element name="listener" type="listener.type"/>
72 | 
73 | </xs:schema>
74 | 


--------------------------------------------------------------------------------
/data/schemas/xml-events-attribs-2.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema 
 3 |     targetNamespace="http://www.w3.org/2001/xml-events" 
 4 |     xmlns:ev="http://www.w3.org/2001/xml-events" 
 5 |     xmlns:xs="http://www.w3.org/2001/XMLSchema" 
 6 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
 7 |     xsi:schemaLocation="http://www.w3.org/2001/XMLSchema 
 8 |                         http://www.w3.org/2001/XMLSchema.xsd" 
 9 |     elementFormDefault="unqualified" 
10 |     blockDefault="#all" 
11 |     finalDefault="#all" 
12 |     attributeFormDefault="unqualified">
13 | 
14 |   <xs:annotation>
15 |     <xs:documentation>
16 |       This is the XML Schema for XML Events global attributes
17 | 
18 |       URI: http://www.w3.org/MarkUp/SCHEMA/xml-events-attribs-2.xsd
19 |       $Id: xml-events-attribs-2.xsd,v 1.2 2008/06/25 14:36:21 smccarro Exp $
20 |     </xs:documentation>
21 |     <xs:documentation source="xml-events-copyright-2.xsd"/>
22 |   </xs:annotation>
23 | 
24 |   <xs:annotation>
25 |     <xs:documentation>
26 |       XML Event Attributes
27 | 		
28 |         These "global" event attributes are defined in "Attaching
29 |         Attributes Directly to the Observer Element" of the XML
30 |         Events specification.
31 |     </xs:documentation>
32 |   </xs:annotation>
33 | 
34 |   <xs:attribute name="event" type="xs:NMTOKEN"/>
35 |   <xs:attribute name="observer" type="xs:IDREF"/>
36 |   <xs:attribute name="targetid" type="xs:IDREF"/>
37 |   <xs:attribute name="handler" type="xs:anyURI"/>
38 |   <xs:attribute name="phase" default="default">
39 |     <xs:simpleType>
40 |       <xs:restriction base="xs:NMTOKEN">
41 |         <xs:enumeration value="bubble"/>
42 |         <xs:enumeration value="capture"/>
43 |         <xs:enumeration value="default"/>
44 |         <xs:enumeration value="target"/>
45 |       </xs:restriction>
46 |     </xs:simpleType>
47 |   </xs:attribute>
48 |   <xs:attribute name="propagate" default="continue">
49 |     <xs:simpleType>
50 |       <xs:restriction base="xs:NMTOKEN">
51 |         <xs:enumeration value="stop"/>
52 |         <xs:enumeration value="continue"/>
53 |       </xs:restriction>
54 |     </xs:simpleType>
55 |   </xs:attribute>
56 |   <xs:attribute name="defaultAction" default="perform">
57 |     <xs:simpleType>
58 |       <xs:restriction base="xs:NMTOKEN">
59 |         <xs:enumeration value="cancel"/>
60 |         <xs:enumeration value="perform"/>
61 |       </xs:restriction>
62 |     </xs:simpleType>
63 |   </xs:attribute>
64 | 
65 |   <xs:attributeGroup name="XmlEvents.attlist">
66 |     <xs:attribute ref="ev:event"/>
67 |     <xs:attribute ref="ev:observer"/>
68 |     <xs:attribute ref="ev:targetid"/>
69 |     <xs:attribute ref="ev:handler"/>
70 |     <xs:attribute ref="ev:phase"/>
71 |     <xs:attribute ref="ev:propagate"/>
72 |     <xs:attribute ref="ev:defaultAction"/>
73 |   </xs:attributeGroup>
74 | 
75 | </xs:schema>
76 | 


--------------------------------------------------------------------------------
/lib/validate_website/static.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'validate_website/core'
 4 | require 'validate_website/utils'
 5 | 
 6 | module ValidateWebsite
 7 |   # Class for validation Static website
 8 |   class Static < Core
 9 |     CONTENT_TYPES = ['text/html', 'text/xhtml+xml'].freeze
10 |     START_MESSAGE_TYPE = 'files'
11 | 
12 |     attr_reader :history_count
13 | 
14 |     def initialize(options = {}, validation_type = :static)
15 |       @history_count = 0
16 |       super
17 |       start_message("#{START_MESSAGE_TYPE} in #{Dir.pwd} (#{@site} as site)")
18 |     end
19 | 
20 |     # @param [Hash] options
21 |     #
22 |     def crawl(options = {})
23 |       @options = @options.merge(options)
24 |       @site = @options[:site]
25 | 
26 |       files = Dir.glob(@options[:pattern])
27 |       files.each do |file|
28 |         next unless File.file?(file)
29 |         next if @options[:exclude]&.match(file)
30 | 
31 |         @history_count += 1
32 |         check_static_file(file)
33 |       end
34 |       print_status_line(files.size, 0, @not_founds_count, @errors_count)
35 |     end
36 | 
37 |     # Fake http response for Spidr static crawling
38 |     # see https://github.com/ruby/ruby/blob/trunk/lib/net/http/response.rb
39 |     #
40 |     # @param [String] response body
41 |     # @param [Array] content types
42 |     # @return [Net::HTTPResponse] fake http response
43 |     def self.fake_httpresponse(body, content_types = CONTENT_TYPES)
44 |       response = Net::HTTPResponse.new '1.1', 200, 'OK'
45 |       response.instance_variable_set(:@read, true)
46 |       response.body = body
47 |       content_types.each do |c|
48 |         response.add_field('content-type', c)
49 |       end
50 |       response
51 |     end
52 | 
53 |     private
54 | 
55 |     def check_static_file(file)
56 |       page = StaticLink.new(file, @site).page
57 |       check_page(file, page)
58 |       check_css_syntax(page) if page.css? && options[:css_syntax]
59 |     end
60 | 
61 |     def check_page(file, page)
62 |       if page.html? && options[:markup]
63 |         keys = %i[ignore html5_validator]
64 |         slice = options.slice(*keys)
65 |         validate(page.doc, page.body, file, slice)
66 |       end
67 |       check_static_not_found(page.links, page.url.to_s) if options[:not_found]
68 |     end
69 | 
70 |     # check files linked on static document
71 |     # see lib/validate_website/runner.rb
72 |     def check_static_not_found(links, site = @site)
73 |       static_links = links.map { |l| StaticLink.new(l, site) }
74 |       static_links.each do |static_link|
75 |         next unless static_link.check?
76 | 
77 |         unless File.exist?(static_link.file_path)
78 |           not_found_error(static_link.file_path)
79 |           next
80 |         end
81 |         next unless static_link.css?
82 | 
83 |         check_static_not_found static_link.extract_urls_from_fake_css_response
84 |       end
85 |     end
86 |   end
87 | end
88 | 


--------------------------------------------------------------------------------
/lib/validate_website/crawl.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require 'validate_website/core'
 4 | require 'validate_website/utils'
 5 | 
 6 | module ValidateWebsite
 7 |   # Class for http website validation
 8 |   class Crawl < Core
 9 |     attr_reader :crawler
10 | 
11 |     def initialize(options = {}, validation_type = :crawl)
12 |       super
13 |       start_message(@site)
14 |     end
15 | 
16 |     def history_count
17 |       crawler.history.size
18 |     end
19 | 
20 |     # @param [Hash] options
21 |     #   :color [Boolean] color output (true, false)
22 |     #   :exclude [String] a String used by Regexp.new
23 |     #   :markup [Boolean] Check the markup validity
24 |     #   :not_found [Boolean] Check for not found page (404)
25 |     #
26 |     def crawl(options = {})
27 |       @options = @options.merge(options)
28 |       @options[:ignore_links] = @options[:exclude] if @options[:exclude]
29 | 
30 |       @crawler = spidr_crawler(@site, @options)
31 |       print_status_line(@crawler.history.size,
32 |                         @crawler.failures.size,
33 |                         @not_founds_count,
34 |                         @errors_count)
35 |     end
36 | 
37 |     private
38 | 
39 |     # Extract imgs urls from page
40 |     #
41 |     # @param [Spidr::Page] an Spidr::Page object
42 |     # @return [Array] Lists of urls
43 |     #
44 |     def extract_imgs_from_page(page)
45 |       return Set[] if page.is_redirect?
46 | 
47 |       page.doc.search('//img[@src]').reduce(Set[]) do |result, elem|
48 |         u = elem.attributes['src'].content
49 |         result << page.to_absolute(URI.parse(WEBrick::HTTPUtils.escape(u)))
50 |       end
51 |     end
52 | 
53 |     def spidr_crawler(site, options)
54 |       @host = URI(site).host
55 |       Spidr.site(site, **options.slice(:user_agent, :ignore_links)) do |crawler|
56 |         crawler.cookies[@host] = default_cookies if options[:cookies]
57 |         on_every_css_page(crawler)
58 |         on_every_html_page(crawler)
59 |         on_every_failed_url(crawler) if options[:not_found]
60 |       end
61 |     end
62 | 
63 |     def on_every_css_page(crawler)
64 |       crawler.every_css_page do |page|
65 |         check_css_syntax(page) if options[:css_syntax]
66 |         ValidateWebsite::Utils.extract_urls_from_css(page).each do |u|
67 |           crawler.enqueue(u)
68 |         end
69 |       end
70 |     end
71 | 
72 |     def validate?(page)
73 |       options[:markup] && page.html? && !page.is_redirect?
74 |     end
75 | 
76 |     def on_every_html_page(crawler)
77 |       crawler.every_html_page do |page|
78 |         extract_imgs_from_page(page).each do |i|
79 |           crawler.enqueue(i)
80 |         end
81 | 
82 |         if validate?(page)
83 |           keys = %i[ignore html5_validator]
84 |           validate(page.doc, page.body, page.url, options.slice(keys))
85 |         end
86 |       end
87 |     end
88 | 
89 |     def on_every_failed_url(crawler)
90 |       crawler.every_failed_url do |url|
91 |         not_found_error(url)
92 |       end
93 |     end
94 |   end
95 | end
96 | 


--------------------------------------------------------------------------------
/man/man1/validate-website-static.1:
--------------------------------------------------------------------------------
  1 | '\" t
  2 | .\"     Title: validate-website-static
  3 | .\"    Author: [see the "AUTHOR(S)" section]
  4 | .\" Generator: Asciidoctor 2.0.18
  5 | .\"      Date: 2022-05-02
  6 | .\"    Manual: \ \&
  7 | .\"    Source: \ \&
  8 | .\"  Language: English
  9 | .\"
 10 | .TH "VALIDATE\-WEBSITE\-STATIC" "1" "2022-05-02" "\ \&" "\ \&"
 11 | .ie \n(.g .ds Aq \(aq
 12 | .el       .ds Aq '
 13 | .ss \n[.ss] 0
 14 | .nh
 15 | .ad l
 16 | .de URL
 17 | \fI\\$2\fP <\\$1>\\$3
 18 | ..
 19 | .als MTO URL
 20 | .if \n[.g] \{\
 21 | .  mso www.tmac
 22 | .  am URL
 23 | .    ad l
 24 | .  .
 25 | .  am MTO
 26 | .    ad l
 27 | .  .
 28 | .  LINKSTYLE blue R < >
 29 | .\}
 30 | .SH "NAME"
 31 | validate-website-static \- check the validity of your documents
 32 | .SH "SYNOPSIS"
 33 | .sp
 34 | \fBvalidate\-website\-static\fP [\fIOPTIONS\fP]
 35 | .SH "DESCRIPTION"
 36 | .sp
 37 | validate\-website\-static check the markup validity of your local documents with
 38 | XML Schema / DTD.
 39 | HTML5 support with Validator.nu Web Service.
 40 | .SH "OPTIONS"
 41 | .sp
 42 | \fB\-s\fP, \fB\-\-site\fP \fISITE\fP
 43 | .RS 4
 44 | Where static files will be hosted (Default: \c
 45 | .URL "http://www.example.com/" "" ")"
 46 | .RE
 47 | .sp
 48 | \fB\-p\fP, \fB\-\-pattern\fP \fIPATTERN\fP
 49 | .RS 4
 50 | Change filenames pattern (Default: *\(rs*/*.html)
 51 | .RE
 52 | .sp
 53 | \fB\-e\fP, \fB\-\-exclude\fP \fIEXCLUDE\fP
 54 | .RS 4
 55 | Url to exclude (ex: \fIredirect|news\fP)
 56 | .RE
 57 | .sp
 58 | \fB\-i\fP, \fB\-\-ignore\fP \fIIGNORE\fP
 59 | .RS 4
 60 | Ignore certain validation errors (ex: \fIautocorrect\fP)
 61 | .RE
 62 | .sp
 63 | \fB\-m\fP, \fB\-\-[no\-]markup\fP
 64 | .RS 4
 65 | Markup validation (Default: true)
 66 | .RE
 67 | .sp
 68 | \fB\-\-css\-syntax\fP
 69 | .RS 4
 70 | Css validation (Default: false)
 71 | .RE
 72 | .sp
 73 | \fB\-n\fP, \fB\-\-not\-found\fP
 74 | .RS 4
 75 | Log files not on filesystem, pwd considered as root « / » (Default: false)
 76 | .RE
 77 | .sp
 78 | \fB\-\-[no\-]color\fP
 79 | .RS 4
 80 | Show colored output (Default: true)
 81 | .RE
 82 | .sp
 83 | \fB\-x\fP, \fB\-\-html5\-validator\fP \fIVALIDATOR\fP
 84 | .RS 4
 85 | Change default html5 validator engine (tidy/nu/nokogiri)
 86 | .RE
 87 | .sp
 88 | \fB\-5\fP, \fB\-\-html5\-validator\-service\-url\fP \fIURL\fP
 89 | .RS 4
 90 | Change default html5 validator service URL for "nu" engine
 91 | .RE
 92 | .sp
 93 | \fB\-v\fP, \fB\-\-verbose\fP
 94 | .RS 4
 95 | Show detail of validator errors (Default: false).
 96 | .RE
 97 | .sp
 98 | \fB\-h\fP, \fB\-\-help\fP
 99 | .RS 4
100 | Show help message and exit.
101 | .RE
102 | .SH "EXIT STATUS"
103 | .sp
104 | 0
105 | .RS 4
106 | Markup is valid.
107 | .RE
108 | .sp
109 | 64
110 | .RS 4
111 | Not valid markup found.
112 | .RE
113 | .sp
114 | 65
115 | .RS 4
116 | There are pages not found.
117 | .RE
118 | .sp
119 | 66
120 | .RS 4
121 | There are not valid markup and pages not found.
122 | .RE
123 | .SH "AUTHOR"
124 | .sp
125 | Laurent Arnoud \c
126 | .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""
127 | .SH "LICENSE"
128 | .sp
129 | The MIT License
130 | .sp
131 | Copyright (c) 2009\-2019 \c
132 | .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""


--------------------------------------------------------------------------------
/man/man1/validate-website.1:
--------------------------------------------------------------------------------
  1 | '\" t
  2 | .\"     Title: validate-website
  3 | .\"    Author: [see the "AUTHOR(S)" section]
  4 | .\" Generator: Asciidoctor 2.0.18
  5 | .\"      Date: 2022-05-02
  6 | .\"    Manual: \ \&
  7 | .\"    Source: \ \&
  8 | .\"  Language: English
  9 | .\"
 10 | .TH "VALIDATE\-WEBSITE" "1" "2022-05-02" "\ \&" "\ \&"
 11 | .ie \n(.g .ds Aq \(aq
 12 | .el       .ds Aq '
 13 | .ss \n[.ss] 0
 14 | .nh
 15 | .ad l
 16 | .de URL
 17 | \fI\\$2\fP <\\$1>\\$3
 18 | ..
 19 | .als MTO URL
 20 | .if \n[.g] \{\
 21 | .  mso www.tmac
 22 | .  am URL
 23 | .    ad l
 24 | .  .
 25 | .  am MTO
 26 | .    ad l
 27 | .  .
 28 | .  LINKSTYLE blue R < >
 29 | .\}
 30 | .SH "NAME"
 31 | validate-website \- Web crawler for checking the validity of your documents
 32 | .SH "SYNOPSIS"
 33 | .sp
 34 | \fBvalidate\-website\fP [\fIOPTIONS\fP]
 35 | .SH "DESCRIPTION"
 36 | .sp
 37 | validate\-website is a web crawler for checking the markup validity with XML
 38 | Schema / DTD and not found urls.
 39 | HTML5 support with Validator.nu Web Service.
 40 | .SH "OPTIONS"
 41 | .sp
 42 | \fB\-s\fP, \fB\-\-site\fP \fISITE\fP
 43 | .RS 4
 44 | Website to crawl (Default: \c
 45 | .URL "http://localhost:3000/" "" ")"
 46 | .RE
 47 | .sp
 48 | \fB\-u\fP, \fB\-\-user\-agent\fP \fIUSERAGENT\fP
 49 | .RS 4
 50 | Change user agent (Default: Spidr.user_agent)
 51 | .RE
 52 | .sp
 53 | \fB\-e\fP, \fB\-\-exclude\fP \fIEXCLUDE\fP
 54 | .RS 4
 55 | Url to exclude (ex: \fIredirect|news\fP)
 56 | .RE
 57 | .sp
 58 | \fB\-i\fP, \fB\-\-ignore\fP \fIIGNORE\fP
 59 | .RS 4
 60 | Ignore certain validation errors (ex: \fIautocorrect\fP)
 61 | .RE
 62 | .sp
 63 | \fB\-c\fP, \fB\-\-cookies\fP \fICOOKIES\fP
 64 | .RS 4
 65 | Set defaults cookies
 66 | .RE
 67 | .sp
 68 | \fB\-m\fP, \fB\-\-[no\-]markup\fP
 69 | .RS 4
 70 | Markup validation (Default: true)
 71 | .RE
 72 | .sp
 73 | \fB\-\-css\-syntax\fP
 74 | .RS 4
 75 | Css validation (Default: false)
 76 | .RE
 77 | .sp
 78 | \fB\-n\fP, \fB\-\-not\-found\fP
 79 | .RS 4
 80 | Log not found url (Default: false)
 81 | .RE
 82 | .sp
 83 | \fB\-\-[no\-]color\fP
 84 | .RS 4
 85 | Show colored output (Default: true)
 86 | .RE
 87 | .sp
 88 | \fB\-x\fP, \fB\-\-html5\-validator\fP \fIVALIDATOR\fP
 89 | .RS 4
 90 | Change default html5 validator engine (tidy/nu/nokogiri)
 91 | .RE
 92 | .sp
 93 | \fB\-5\fP, \fB\-\-html5\-validator\-service\-url\fP \fIURL\fP
 94 | .RS 4
 95 | Change default html5 validator service URL for "nu" engine
 96 | .RE
 97 | .sp
 98 | \fB\-v\fP, \fB\-\-verbose\fP
 99 | .RS 4
100 | Show detail of validator errors (Default: false).
101 | .RE
102 | .sp
103 | \fB\-h\fP, \fB\-\-help\fP
104 | .RS 4
105 | Show help message and exit.
106 | .RE
107 | .SH "EXIT STATUS"
108 | .sp
109 | 0
110 | .RS 4
111 | Markup is valid and no 404 found.
112 | .RE
113 | .sp
114 | 64
115 | .RS 4
116 | Not valid markup found.
117 | .RE
118 | .sp
119 | 65
120 | .RS 4
121 | There are pages not found.
122 | .RE
123 | .sp
124 | 66
125 | .RS 4
126 | There are not valid markup and pages not found.
127 | .RE
128 | .SH "AUTHOR"
129 | .sp
130 | Laurent Arnoud \c
131 | .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""
132 | .SH "LICENSE"
133 | .sp
134 | The MIT License
135 | .sp
136 | Copyright (c) 2009\-2019 \c
137 | .MTO "laurent\(atspkdev.net" "Laurent Arnoud" ""


--------------------------------------------------------------------------------
/test/static_test.rb:
--------------------------------------------------------------------------------
 1 | # frozen_string_literal: true
 2 | 
 3 | require File.expand_path('test_helper', __dir__)
 4 | 
 5 | # rubocop:disable Metrics/BlockLength
 6 | describe ValidateWebsite::Static do
 7 |   before do
 8 |     _out, _err = capture_io do
 9 |       @validate_website = ValidateWebsite::Static.new(color: false)
10 |     end
11 |   end
12 | 
13 |   it 'exclude directories' do
14 |     pattern = File.join(File.dirname(__FILE__), '**/*.html')
15 |     _out, _err = capture_io do
16 |       @validate_website.crawl(pattern: pattern,
17 |                               site: 'http://spkdev.net/',
18 |                               markup: false,
19 |                               not_found: false,
20 |                               exclude: /data|example/)
21 |     end
22 |     _(@validate_website.history_count).must_equal 0
23 |   end
24 | 
25 |   it 'no space in directory name' do
26 |     pattern = File.join(File.dirname(__FILE__), 'example/**/*.html')
27 |     _out, _err = capture_io do
28 |       @validate_website.crawl(pattern: pattern,
29 |                               site: 'http://dev.af83.com/',
30 |                               markup: false,
31 |                               not_found: false)
32 |     end
33 |     _(@validate_website.not_founds_count).must_equal 0
34 |   end
35 | 
36 |   it 'not found' do
37 |     Dir.chdir('test/data') do
38 |       _out, _err = capture_io do
39 |         @validate_website.crawl(pattern: '**/*.html',
40 |                                 site: 'https://linuxfr.org/',
41 |                                 markup: false,
42 |                                 not_found: true)
43 |       end
44 |       _(@validate_website.not_founds_count).must_equal 210
45 |     end
46 |   end
47 | 
48 |   it 'can change validator' do
49 |     validator_res = File.join('test', 'data', 'validator.nu-failure.json')
50 |     stub_request(:any,
51 |                  /#{ValidateWebsite::Validator.html5_validator_service_url}/)
52 |       .to_return(body: File.open(validator_res).read)
53 |     pattern = File.join(File.dirname(__FILE__), 'data',
54 |                         'html5-fail.html')
55 |     Dir.chdir('test/data') do
56 |       _out, _err = capture_io do
57 |         @validate_website.crawl(pattern: pattern,
58 |                                 site: 'http://w3.org/',
59 |                                 ignore: /Warning/,
60 |                                 html5_validator: :nu)
61 |       end
62 |       _(@validate_website.errors_count).must_equal 1
63 |     end
64 |   end
65 | 
66 |   it 'ignore' do
67 |     pattern = File.join(File.dirname(__FILE__), 'data',
68 |                         'w3.org-xhtml1-strict-errors.html')
69 |     Dir.chdir('test/data') do
70 |       _out, _err = capture_io do
71 |         @validate_website.crawl(pattern: pattern,
72 |                                 site: 'http://w3.org/',
73 |                                 ignore: /height|width|Length/)
74 |       end
75 |       _(@validate_website.errors_count).must_equal 0
76 |     end
77 |   end
78 | 
79 |   describe 'css' do
80 |     it 'validate' do
81 |       pattern = File.join(File.dirname(__FILE__), '**/*.{html,css}')
82 |       Dir.chdir('test/data') do
83 |         _out, _err = capture_io do
84 |           @validate_website.crawl(pattern: pattern,
85 |                                   site: 'https://linuxfr.org/',
86 |                                   markup: false,
87 |                                   css_syntax: true)
88 |         end
89 |         _(@validate_website.errors_count).must_equal 1
90 |       end
91 |     end
92 |   end
93 | end
94 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-ruby-basic-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema" elementFormDefault="qualified" xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/">
 3 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" schemaLocation="xhtml-datatypes-1.xsd"/>
 4 |     <xs:annotation>
 5 |         <xs:documentation>
 6 |       This is the XML Schema module for Ruby Basic.
 7 |       $Id: xhtml-ruby-basic-1.xsd,v 1.6 2005/09/26 22:54:53 ahby Exp $
 8 |     </xs:documentation>
 9 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
10 |     </xs:annotation>
11 |     <xs:annotation>
12 |         <xs:documentation>
13 |       "Ruby" are short runs of text alongside the base text, typically
14 |       used in East Asian documents to indicate pronunciation or to
15 |       provide a short annotation. The full specification for Ruby is here:
16 |       
17 |         http://www.w3.org/TR/2001/REC-ruby-20010531/
18 | 
19 |       This module defines "Ruby Basic" or "simple Ruby" as described
20 |       in the specification:
21 |       
22 |         http://www.w3.org/TR/ruby/#simple-ruby1
23 |         
24 |       This module declares the elements and their attributes used to
25 |       support simple ruby annotation markup. Elements defined here are
26 |           * ruby, rb, rt, rp
27 |       Ruby Basic does not use the rbc or rtc elements.
28 |       The content of the ruby element for Ruby Basic
29 |       uses the rp element for fallback purposes.        
30 |     </xs:documentation>
31 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-ruby-20010531/#simple-ruby1"/>
32 |     </xs:annotation>
33 |     <xs:group name="xhtml.ruby.content.simple">
34 |         <xs:sequence>
35 |             <xs:element name="rb" type="xhtml.rb.type"/>
36 |             <xs:choice>
37 |                 <xs:element name="rt" type="xhtml.rt.type"/>
38 |                 <xs:sequence>
39 |                     <xs:element name="rp" type="xhtml.rp.type"/>
40 |                     <xs:element name="rt" type="xhtml.rt.type"/>
41 |                     <xs:element name="rp" type="xhtml.rp.type"/>
42 |                 </xs:sequence>
43 |             </xs:choice>
44 |         </xs:sequence>
45 |     </xs:group>
46 |     <!-- 
47 |    add to this group any common attributes for all Ruby elements 
48 |   -->
49 |     <xs:attributeGroup name="xhtml.ruby.common.attrib"/>
50 |     <xs:complexType name="xhtml.ruby.type">
51 |         <xs:group ref="xhtml.ruby.content.simple"/>
52 |         <xs:attributeGroup ref="xhtml.ruby.common.attrib"/>
53 |     </xs:complexType>
54 |     <!-- 
55 |    rb (ruby base) element 
56 |   -->
57 |     <xs:attributeGroup name="xhtml.rb.attrib">
58 |         <xs:attributeGroup ref="xhtml.ruby.common.attrib"/>
59 |     </xs:attributeGroup>
60 |     <xs:complexType name="xhtml.rb.type" mixed="true">
61 |         <xs:attributeGroup ref="xhtml.rb.attrib"/>
62 |     </xs:complexType>
63 |     <!--
64 |    rt (ruby text) element 
65 |   -->
66 |     <xs:attributeGroup name="xhtml.rt.attrib">
67 |         <xs:attributeGroup ref="xhtml.ruby.common.attrib"/>
68 |     </xs:attributeGroup>
69 |     <!--
70 |    Note that the rbspan element is not used in Ruby Basic
71 |   -->
72 |     <xs:complexType name="xhtml.rt.type" mixed="true">
73 |         <xs:attributeGroup ref="xhtml.rt.attrib"/>
74 |     </xs:complexType>
75 |     <!-- 
76 |    rp (ruby parenthesis) element 
77 |   -->
78 |     <xs:attributeGroup name="xhtml.rp.attrib">
79 |         <xs:attributeGroup ref="xhtml.ruby.common.attrib"/>
80 |     </xs:attributeGroup>
81 |     <xs:complexType name="xhtml.rp.type" mixed="true">
82 |         <xs:attributeGroup ref="xhtml.rp.attrib"/>
83 |     </xs:complexType>
84 | </xs:schema>


--------------------------------------------------------------------------------
/data/schemas/xhtml-notations-1.xsd:
--------------------------------------------------------------------------------
 1 | <?xml version="1.0" encoding="UTF-8"?>
 2 | <xs:schema
 3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
 4 |  elementFormDefault="qualified"
 5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
 6 | >
 7 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" 
 8 |                schemaLocation="xhtml-datatypes-1.xsd" />
 9 |     <xs:annotation>
10 |         <xs:documentation>
11 |       Notations module
12 |       This is the XML Schema module for data type notations for XHTML
13 |       $Id: xhtml-notations-1.xsd,v 1.5 2005/09/26 22:54:53 ahby Exp $
14 |     </xs:documentation>
15 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
16 |     </xs:annotation>
17 |     <xs:annotation>
18 |         <xs:documentation>
19 |       Notations module
20 |       Defines the XHTML notations, many of these imported from 
21 |       other specifications and standards. When an existing FPI is
22 |       known, it is incorporated here.            
23 |     </xs:documentation>
24 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstraction.html#s_common_attrtypes"/>
25 |     </xs:annotation>
26 |     <!-- W3C XML 1.0 Recommendation -->
27 |     <xs:notation name="w3c-xml" public="ISO 8879//NOTATION Extensible Markup Language (XML) 1.0//EN"/>
28 |     <!-- XML 1.0 CDATA -->
29 |     <xs:notation name="cdata" public="-//W3C//NOTATION XML 1.0: CDATA//EN"/>
30 |     <!-- SGML Formal Public Identifiers -->
31 |     <xs:notation name="fpi" public="ISO 8879:1986//NOTATION Formal Public Identifier//EN"/>
32 |     <!-- XHTML Notations ... -->
33 |     <!-- Length defined for cellpadding/cellspacing -->
34 |     <!-- nn for pixels or nn% for percentage length -->
35 |     <!-- a single character, as per section 2.2 of [XML] -->
36 |     <xs:notation name="character" public="-//W3C//NOTATION XHTML Datatype: Character//EN"/>
37 |     <!-- a character encoding, as per [RFC2045] -->
38 |     <xs:notation name="charset" public="-//W3C//NOTATION XHTML Datatype: Charset//EN"/>
39 |     <!-- a space separated list of character encodings, as per [RFC2045] -->
40 |     <xs:notation name="charsets" public="-//W3C//NOTATION XHTML Datatype: Charsets//EN"/>
41 |     <!-- media type, as per [RFC2045] -->
42 |     <xs:notation name="contentType" public="-//W3C//NOTATION XHTML Datatype: ContentType//EN"/>
43 |     <!-- comma-separated list of media types, as per [RFC2045] -->
44 |     <xs:notation name="contentTypes" public="-//W3C//NOTATION XHTML Datatype: ContentTypes//EN"/>
45 |     <!-- date and time information. ISO date format -->
46 |     <xs:notation name="datetime" public="-//W3C//NOTATION XHTML Datatype: Datetime//EN"/>
47 |     <!-- a language code, as per [RFC3066] -->
48 |     <xs:notation name="languageCode" public="-//W3C//NOTATION XHTML Datatype: LanguageCode//EN"/>
49 |     <!-- nn for pixels or nn% for percentage length -->
50 |     <xs:notation name="length" public="-//W3C//NOTATION XHTML Datatype: Length//EN"/>
51 |     <!-- space-separated list of link types -->
52 |     <xs:notation name="linkTypes" public="-//W3C//NOTATION XHTML Datatype: LinkTypes//EN"/>
53 |     <!-- single or comma-separated list of media descriptors -->
54 |     <xs:notation name="mediaDesc" public="-//W3C//NOTATION XHTML Datatype: MediaDesc//EN"/>
55 |     <!-- pixel, percentage, or relative -->
56 |     <xs:notation name="multiLength" public="-//W3C//NOTATION XHTML Datatype: MultiLength//EN"/>
57 |     <!-- one or more digits (NUMBER) -->
58 |     <xs:notation name="number" public="-//W3C//NOTATION XHTML Datatype: Number//EN"/>
59 |     <!-- one or more digits (NUMBER) -->
60 |     <xs:notation name="pixels" public="-//W3C//NOTATION XHTML Datatype: Pixels//EN"/>
61 |     <!-- script expression -->
62 |     <xs:notation name="script" public="-//W3C//NOTATION XHTML Datatype: Script//EN"/>
63 |     <!-- textual content -->
64 |     <xs:notation name="text" public="-//W3C//NOTATION XHTML Datatype: Text//EN"/>
65 |     <!-- a Uniform Resource Identifier, see [URI] -->
66 |     <xs:notation name="uri" public="-//W3C//NOTATION XHTML Datatype: URI//EN"/>
67 |     <!-- a space-separated list of Uniform Resource Identifiers, see [URI] -->
68 |     <xs:notation name="uris" public="-//W3C//NOTATION XHTML Datatype: URIs//EN"/>
69 | </xs:schema>
70 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-special.ent:
--------------------------------------------------------------------------------
 1 | <!-- Special characters for XHTML -->
 2 | 
 3 | <!-- Character entity set. Typical invocation:
 4 |      <!ENTITY % HTMLspecial PUBLIC
 5 |         "-//W3C//ENTITIES Special for XHTML//EN"
 6 |         "http://www.w3.org/TR/xhtml1/DTD/xhtml-special.ent">
 7 |      %HTMLspecial;
 8 | -->
 9 | 
10 | <!-- Portions (C) International Organization for Standardization 1986:
11 |      Permission to copy in any form is granted for use with
12 |      conforming SGML systems and applications as defined in
13 |      ISO 8879, provided this notice is included in all copies.
14 | -->
15 | 
16 | <!-- Relevant ISO entity set is given unless names are newly introduced.
17 |      New names (i.e., not in ISO 8879 list) do not clash with any
18 |      existing ISO 8879 entity names. ISO 10646 character numbers
19 |      are given for each character, in hex. values are decimal
20 |      conversions of the ISO 10646 values and refer to the document
21 |      character set. Names are Unicode names.
22 | -->
23 | 
24 | <!-- C0 Controls and Basic Latin -->
25 | <!ENTITY quot    "&#34;"> <!--  quotation mark, U+0022 ISOnum -->
26 | <!ENTITY amp     "&#38;#38;"> <!--  ampersand, U+0026 ISOnum -->
27 | <!ENTITY lt      "&#38;#60;"> <!--  less-than sign, U+003C ISOnum -->
28 | <!ENTITY gt      "&#62;"> <!--  greater-than sign, U+003E ISOnum -->
29 | <!ENTITY apos	 "&#39;"> <!--  apostrophe = APL quote, U+0027 ISOnum -->
30 | 
31 | <!-- Latin Extended-A -->
32 | <!ENTITY OElig   "&#338;"> <!--  latin capital ligature OE,
33 |                                     U+0152 ISOlat2 -->
34 | <!ENTITY oelig   "&#339;"> <!--  latin small ligature oe, U+0153 ISOlat2 -->
35 | <!-- ligature is a misnomer, this is a separate character in some languages -->
36 | <!ENTITY Scaron  "&#352;"> <!--  latin capital letter S with caron,
37 |                                     U+0160 ISOlat2 -->
38 | <!ENTITY scaron  "&#353;"> <!--  latin small letter s with caron,
39 |                                     U+0161 ISOlat2 -->
40 | <!ENTITY Yuml    "&#376;"> <!--  latin capital letter Y with diaeresis,
41 |                                     U+0178 ISOlat2 -->
42 | 
43 | <!-- Spacing Modifier Letters -->
44 | <!ENTITY circ    "&#710;"> <!--  modifier letter circumflex accent,
45 |                                     U+02C6 ISOpub -->
46 | <!ENTITY tilde   "&#732;"> <!--  small tilde, U+02DC ISOdia -->
47 | 
48 | <!-- General Punctuation -->
49 | <!ENTITY ensp    "&#8194;"> <!-- en space, U+2002 ISOpub -->
50 | <!ENTITY emsp    "&#8195;"> <!-- em space, U+2003 ISOpub -->
51 | <!ENTITY thinsp  "&#8201;"> <!-- thin space, U+2009 ISOpub -->
52 | <!ENTITY zwnj    "&#8204;"> <!-- zero width non-joiner,
53 |                                     U+200C NEW RFC 2070 -->
54 | <!ENTITY zwj     "&#8205;"> <!-- zero width joiner, U+200D NEW RFC 2070 -->
55 | <!ENTITY lrm     "&#8206;"> <!-- left-to-right mark, U+200E NEW RFC 2070 -->
56 | <!ENTITY rlm     "&#8207;"> <!-- right-to-left mark, U+200F NEW RFC 2070 -->
57 | <!ENTITY ndash   "&#8211;"> <!-- en dash, U+2013 ISOpub -->
58 | <!ENTITY mdash   "&#8212;"> <!-- em dash, U+2014 ISOpub -->
59 | <!ENTITY lsquo   "&#8216;"> <!-- left single quotation mark,
60 |                                     U+2018 ISOnum -->
61 | <!ENTITY rsquo   "&#8217;"> <!-- right single quotation mark,
62 |                                     U+2019 ISOnum -->
63 | <!ENTITY sbquo   "&#8218;"> <!-- single low-9 quotation mark, U+201A NEW -->
64 | <!ENTITY ldquo   "&#8220;"> <!-- left double quotation mark,
65 |                                     U+201C ISOnum -->
66 | <!ENTITY rdquo   "&#8221;"> <!-- right double quotation mark,
67 |                                     U+201D ISOnum -->
68 | <!ENTITY bdquo   "&#8222;"> <!-- double low-9 quotation mark, U+201E NEW -->
69 | <!ENTITY dagger  "&#8224;"> <!-- dagger, U+2020 ISOpub -->
70 | <!ENTITY Dagger  "&#8225;"> <!-- double dagger, U+2021 ISOpub -->
71 | <!ENTITY permil  "&#8240;"> <!-- per mille sign, U+2030 ISOtech -->
72 | <!ENTITY lsaquo  "&#8249;"> <!-- single left-pointing angle quotation mark,
73 |                                     U+2039 ISO proposed -->
74 | <!-- lsaquo is proposed but not yet ISO standardized -->
75 | <!ENTITY rsaquo  "&#8250;"> <!-- single right-pointing angle quotation mark,
76 |                                     U+203A ISO proposed -->
77 | <!-- rsaquo is proposed but not yet ISO standardized -->
78 | 
79 | <!-- Currency Symbols -->
80 | <!ENTITY euro   "&#8364;"> <!--  euro sign, U+20AC NEW -->
81 | 


--------------------------------------------------------------------------------
/lib/validate_website/validator.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'uri'
  4 | 
  5 | require 'nokogiri' unless ValidateWebsite.jruby?
  6 | require 'w3c_validators'
  7 | 
  8 | require 'validate_website/validator_class_methods'
  9 | 
 10 | module ValidateWebsite
 11 |   # Document validation from DTD or XSD (webservice for html5)
 12 |   class Validator
 13 |     extend ValidatorClassMethods
 14 | 
 15 |     @html5_validator_service_url = 'https://validator.nu/'
 16 |     XHTML_SCHEMA_PATH = File.expand_path('../../data/schemas', __dir__)
 17 |     @mutex = Mutex.new
 18 | 
 19 |     class << self
 20 |       attr_accessor :html5_validator_service_url
 21 | 
 22 |       # http://www.w3.org/TR/xhtml1-schema/
 23 |       def schema(namespace)
 24 |         @mutex.synchronize do
 25 |           Dir.chdir(XHTML_SCHEMA_PATH) do
 26 |             if File.exist?("#{namespace}.xsd")
 27 |               Nokogiri::XML::Schema(File.read("#{namespace}.xsd"))
 28 |             end
 29 |           end
 30 |         end
 31 |       end
 32 | 
 33 |       alias xsd schema
 34 |     end
 35 | 
 36 |     attr_reader :original_doc, :body, :dtd, :doc, :namespace, :html5_validator
 37 | 
 38 |     ##
 39 |     # @param [Nokogiri::HTML::Document] original_doc
 40 |     # @param [String] The raw HTTP response body of the page
 41 |     # @param [Regexp] Errors to ignore
 42 |     # @param [Symbol] html5_validator default offline :tidy
 43 |     #                                 fallback webservice :nu
 44 |     def initialize(original_doc, body, ignore: nil, html5_validator: :tidy)
 45 |       @errors = []
 46 |       @document, @dtd_uri = nil
 47 |       @original_doc = original_doc
 48 |       @body = body
 49 |       @ignore = ignore
 50 |       @html5_validator = html5_validator
 51 |       @dtd = @original_doc.internal_subset
 52 |       @namespace = find_namespace(@dtd)
 53 |     end
 54 | 
 55 |     ##
 56 |     # @return [Boolean]
 57 |     def valid?
 58 |       find_errors
 59 |       errors.empty?
 60 |     end
 61 | 
 62 |     # @return [Array] of errors
 63 |     def errors
 64 |       @errors.map!(&:to_s)
 65 |       @ignore ? @errors.reject { |e| @ignore =~ e } : @errors
 66 |     end
 67 | 
 68 |     private
 69 | 
 70 |     # http://www.w3.org/TR/xhtml1/#dtds
 71 |     def find_namespace(dtd)
 72 |       return unless dtd.system_id
 73 | 
 74 |       dtd_uri = URI.parse(dtd.system_id)
 75 |       return unless dtd_uri.path
 76 | 
 77 |       @dtd_uri = dtd_uri
 78 |       File.basename(@dtd_uri.path, '.dtd')
 79 |     end
 80 | 
 81 |     def document
 82 |       return @document if @document
 83 | 
 84 |       @document = if @dtd_uri && @body.match(@dtd_uri.to_s)
 85 |                     @body.sub(@dtd_uri.to_s, @namespace + '.dtd')
 86 |                   else
 87 |                     @body
 88 |                   end
 89 |     end
 90 | 
 91 |     # @return [Array] contain result errors
 92 |     def validate
 93 |       if document =~ /^\<!DOCTYPE html\>/i
 94 |         html5_validate
 95 |       elsif self.class.schema(@namespace)
 96 |         self.class.schema(@namespace).validate(xhtml_doc)
 97 |       else
 98 |         # dont have xsd fall back to dtd
 99 |         Dir.chdir(XHTML_SCHEMA_PATH) do
100 |           Nokogiri::HTML.parse(document)
101 |         end.errors
102 |       end
103 |     end
104 | 
105 |     # http://nokogiri.org/tutorials/ensuring_well_formed_markup.html
106 |     def find_errors
107 |       @errors = validate
108 |     rescue Nokogiri::XML::SyntaxError => e
109 |       @errors << e
110 |     end
111 | 
112 |     def html5_validate
113 |       if html5_validator.to_sym == :tidy && self.class.tidy
114 |         tidy_validate
115 |       elsif html5_validator.to_sym == :nu
116 |         nu_validate
117 |       else
118 |         Nokogiri::HTML5(document, max_errors: -1).errors
119 |       end
120 |     end
121 | 
122 |     def tidy_validate
123 |       results = self.class.tidy.new(document)
124 |       if results.errors
125 |         errors.concat(results.errors.split("\n"))
126 |       else
127 |         []
128 |       end
129 |     end
130 | 
131 |     def nu_validate
132 |       validator = W3CValidators::NuValidator.new(
133 |         validator_uri: self.class.validator_uri
134 |       )
135 |       results = validator.validate_text(document)
136 |       errors.concat(results.errors)
137 |     end
138 | 
139 |     def xhtml_doc
140 |       Dir.chdir(XHTML_SCHEMA_PATH) do
141 |         Nokogiri::XML(document) { |cfg| cfg.nonoent.dtdload.dtdvalid.nonet }
142 |       end
143 |     end
144 |   end
145 | end
146 | 


--------------------------------------------------------------------------------
/lib/validate_website/core.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'set'
  4 | require 'open-uri'
  5 | require 'webrick/cookie'
  6 | 
  7 | require 'validate_website/option_parser'
  8 | require 'validate_website/validator'
  9 | require 'validate_website/colorful_messages'
 10 | 
 11 | require 'spidr'
 12 | require 'crass'
 13 | 
 14 | # Base module ValidateWebsite
 15 | module ValidateWebsite
 16 |   autoload :Crawl, 'validate_website/crawl'
 17 |   autoload :Static, 'validate_website/static'
 18 |   autoload :StaticLink, 'validate_website/static_link'
 19 | 
 20 |   # Core class for static or website validation
 21 |   class Core
 22 |     attr_accessor :site
 23 |     attr_reader :options, :host, :errors_count, :not_founds_count
 24 | 
 25 |     include ColorfulMessages
 26 | 
 27 |     EXIT_SUCCESS = 0
 28 |     EXIT_FAILURE_MARKUP = 64
 29 |     EXIT_FAILURE_NOT_FOUND = 65
 30 |     EXIT_FAILURE_MARKUP_NOT_FOUND = 66
 31 |     START_MESSAGE = 'Validating'
 32 | 
 33 |     # Initialize core ValidateWebsite class
 34 |     # @example
 35 |     #   new({ site: "https://example.com/" }, :crawl)
 36 |     # @param [Hash] options
 37 |     # @param [Symbol] validation_type `crawl` for web or `static` for local
 38 |     # @return [NilClass]
 39 |     def initialize(options, validation_type)
 40 |       @not_founds_count = 0
 41 |       @errors_count = 0
 42 |       @options = Parser.parse(options, validation_type).to_h
 43 |       @site = @options[:site]
 44 |       @service_url = @options[:html5_validator_service_url]
 45 |       Validator.html5_validator_service_url = @service_url if @service_url
 46 |     end
 47 | 
 48 |     def errors?
 49 |       @errors_count.positive?
 50 |     end
 51 | 
 52 |     def not_founds?
 53 |       @not_founds_count.positive?
 54 |     end
 55 | 
 56 |     def exit_status
 57 |       if errors? && not_founds?
 58 |         EXIT_FAILURE_MARKUP_NOT_FOUND
 59 |       elsif errors?
 60 |         EXIT_FAILURE_MARKUP
 61 |       elsif not_founds?
 62 |         EXIT_FAILURE_NOT_FOUND
 63 |       else
 64 |         EXIT_SUCCESS
 65 |       end
 66 |     end
 67 | 
 68 |     def default_cookies
 69 |       WEBrick::Cookie.parse(@options[:cookies]).each_with_object({}) do |c, h|
 70 |         h[c.name] = c.value
 71 |         h
 72 |       end
 73 |     end
 74 | 
 75 |     private
 76 | 
 77 |     def start_message(type)
 78 |       puts color(:note, "#{START_MESSAGE} #{type}\n", @options[:color])
 79 |     end
 80 | 
 81 |     def check_css_syntax(page)
 82 |       nodes = Crass::Parser.parse_stylesheet(page.body)
 83 |       return unless any_css_errors?(nodes)
 84 | 
 85 |       handle_validation_error(page.url)
 86 |     end
 87 | 
 88 |     def any_css_errors?(nodes)
 89 |       nodes.any? do |node|
 90 |         if node[:children]
 91 |           any_css_errors? node.delete(:children)
 92 |         elsif node[:tokens]
 93 |           any_css_errors? node.delete(:tokens)
 94 |         else
 95 |           node[:node] == :error || node[:error] == true
 96 |         end
 97 |       end
 98 |     end
 99 | 
100 |     def print_status_line(total, failures, not_founds, errors)
101 |       puts "\n\n"
102 |       puts color(:info, ["#{total} visited",
103 |                          "#{failures} failures",
104 |                          "#{not_founds} not founds",
105 |                          "#{errors} errors"].join(', '), options[:color])
106 |     end
107 | 
108 |     def not_found_error(location)
109 |       puts "\n"
110 |       puts color(:error, "#{location} linked but not exist", options[:color])
111 |       @not_founds_count += 1
112 |     end
113 | 
114 |     ##
115 |     # @param [Nokogiri::HTML::Document] original_doc
116 |     # @param [String] The raw HTTP response body of the page
117 |     # @param [String] url
118 |     # @param [Hash] Validator options
119 |     #
120 |     def validate(doc, body, url, options)
121 |       validator = Validator.new(doc, body, **options)
122 |       if validator.valid?
123 |         print color(:success, '.', options[:color]) # rspec style
124 |       else
125 |         handle_html_validation_error(validator, url)
126 |       end
127 |     end
128 | 
129 |     def handle_html_validation_error(validator, url)
130 |       handle_validation_error(url)
131 |       return unless options[:verbose]
132 | 
133 |       puts color(:error, validator.errors.join(', '), options[:color])
134 |     end
135 | 
136 |     def handle_validation_error(url)
137 |       @errors_count += 1
138 |       puts "\n"
139 |       puts color(:error, "* #{url}", options[:color])
140 |     end
141 |   end
142 | end
143 | 


--------------------------------------------------------------------------------
/lib/validate_website/option_parser.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require 'slop'
  4 | require File.expand_path('version', __dir__)
  5 | 
  6 | module ValidateWebsite
  7 |   # Internal class for parse command line args
  8 |   class Parser
  9 |     VALID_TYPES = %i[crawl static].freeze
 10 | 
 11 |     DEFAULT_OPTIONS = {
 12 |       site: 'http://localhost/',
 13 |       pattern: '**/*.html',
 14 |       exclude: nil,
 15 |       user_agent: nil,
 16 |       markup: true,
 17 |       css_syntax: false,
 18 |       # crawler: log not found url (404 status code)
 19 |       # static: log not found url (not on filesystem, `pwd` considered
 20 |       # as root " / ")
 21 |       not_found: false,
 22 |       file: nil,
 23 |       # regex to ignore certain validation errors
 24 |       ignore: nil,
 25 |       color: true,
 26 |       html5_validator: 'tidy',
 27 |       # internal verbose for ValidateWebsite
 28 |       verbose: false
 29 |     }.freeze
 30 | 
 31 |     # Generic parse method for crawl or static options
 32 |     def self.parse(options, type)
 33 |       raise ArgumentError unless VALID_TYPES.include?(type)
 34 | 
 35 |       # We are in command line (ARGV)
 36 |       if options.is_a?(Array)
 37 |         send("command_line_parse_#{type}", options)
 38 |       else
 39 |         # for testing or Ruby usage with a Hash
 40 |         DEFAULT_OPTIONS.merge(options)
 41 |       end
 42 |     end
 43 | 
 44 |     def self.default_args
 45 |       Slop.parse do |opt|
 46 |         yield opt if block_given?
 47 |         markup_syntax(opt)
 48 |         boolean_options(opt)
 49 |         ignore_html5_options(opt)
 50 |         verbose_option(opt)
 51 |         version_help(opt)
 52 |       end
 53 |     end
 54 | 
 55 |     def self.ignore_html5_options(opt)
 56 |       opt.regexp('-i', '--ignore',
 57 |                  'Validation errors to ignore (ex: "valign|autocorrect")')
 58 |       opt.string('-x', '--html5-validator',
 59 |                  'Change default html5 validator engine (tidy/nu/nokogiri)',
 60 |                  default: DEFAULT_OPTIONS[:html5_validator])
 61 |       opt.string('-5', '--html5-validator-service-url',
 62 |                  'Change default html5 validator service URL for "nu" engine')
 63 |     end
 64 | 
 65 |     def self.markup_syntax(opt)
 66 |       opt.bool('-m', '--markup',
 67 |                "Markup validation (default: #{DEFAULT_OPTIONS[:markup]})",
 68 |                default: DEFAULT_OPTIONS[:markup])
 69 |       opt.bool('--css-syntax',
 70 |                "Css validation (default: #{DEFAULT_OPTIONS[:css_syntax]})",
 71 |                default: DEFAULT_OPTIONS[:css_syntax])
 72 |     end
 73 | 
 74 |     def self.boolean_options(opt)
 75 |       opt.bool('-n', '--not-found',
 76 |                "Log not found url (default: #{DEFAULT_OPTIONS[:not_found]})",
 77 |                default: DEFAULT_OPTIONS[:not_found])
 78 |       opt.bool('--color',
 79 |                "Show colored output (default: #{DEFAULT_OPTIONS[:color]})",
 80 |                default: DEFAULT_OPTIONS[:color])
 81 |     end
 82 | 
 83 |     def self.verbose_option(opt)
 84 |       opt.bool('-v', '--verbose',
 85 |                "Show validator errors (default: #{DEFAULT_OPTIONS[:verbose]})",
 86 |                default: DEFAULT_OPTIONS[:verbose])
 87 |     end
 88 | 
 89 |     def self.version_help(opt)
 90 |       opt.on('--version', 'Display version.') do
 91 |         puts ValidateWebsite::VERSION
 92 |         exit
 93 |       end
 94 |       opt.on('-h', '--help', 'Display this help message.') do
 95 |         puts opt
 96 |         exit
 97 |       end
 98 |     end
 99 | 
100 |     # Parse command line for validate-website bin
101 |     # @params [ARGV]
102 |     # @return [Hash]
103 |     def self.command_line_parse_crawl(_args)
104 |       default_args do |opt|
105 |         opt.string('-s', '--site',
106 |                    "Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
107 |                    default: DEFAULT_OPTIONS[:site])
108 |         opt.string('-u', '--user-agent',
109 |                    'Change user agent',
110 |                    default: DEFAULT_OPTIONS[:user_agent])
111 |         opt.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
112 |         opt.string('-c', '--cookies', 'Set defaults cookies')
113 |       end
114 |     end
115 | 
116 |     # Parse command line for validate-website-static bin
117 |     # @params [ARGV]
118 |     # @return [Hash]
119 |     def self.command_line_parse_static(_args)
120 |       default_args do |opt|
121 |         opt.string('-s', '--site',
122 |                    "Website to crawl (default: #{DEFAULT_OPTIONS[:site]})",
123 |                    default: DEFAULT_OPTIONS[:site])
124 |         opt.string('-p', '--pattern',
125 |                    "Filename pattern (default: #{DEFAULT_OPTIONS[:pattern]})",
126 |                    default: DEFAULT_OPTIONS[:pattern])
127 |         opt.regexp('-e', '--exclude', 'Url to exclude (ex: "redirect|news")')
128 |       end
129 |     end
130 |   end
131 | end
132 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # validate-website
  2 | 
  3 | ## Description
  4 | 
  5 | Web crawler for checking the validity of your documents
  6 | 
  7 | ![validate website](https://raw.github.com/spk/validate-website/master/validate-website.png)
  8 | 
  9 | ## Installation
 10 | 
 11 | ### Debian
 12 | 
 13 | ```
 14 | apt install ruby-dev libxslt1-dev libxml2-dev
 15 | ```
 16 | 
 17 | If you want complete local validation look [tidy
 18 | packages](https://binaries.html-tidy.org/)
 19 | 
 20 | ### RubyGems
 21 | 
 22 | ```
 23 | gem install validate-website
 24 | ```
 25 | 
 26 | ## Synopsis
 27 | 
 28 | ```
 29 | validate-website [OPTIONS]
 30 | validate-website-static [OPTIONS]
 31 | ```
 32 | 
 33 | ## Examples
 34 | 
 35 | ```
 36 | validate-website -v -s https://www.ruby-lang.org/
 37 | validate-website -v -x tidy -s https://www.ruby-lang.org/
 38 | validate-website -v -x nu -s https://www.ruby-lang.org/
 39 | validate-website -h
 40 | ```
 41 | 
 42 | ## Description
 43 | 
 44 | validate-website is a web crawler for checking the markup validity with XML
 45 | Schema / DTD and not found urls (more info [doc/validate-website.adoc](https://github.com/spk/validate-website/blob/master/doc/validate-website.adoc)).
 46 | 
 47 | validate-website-static checks the markup validity of your local documents with
 48 | XML Schema / DTD (more info [doc/validate-website-static.adoc](https://github.com/spk/validate-website/blob/master/doc/validate-website-static.adoc)).
 49 | 
 50 | HTML5 support with [libtidy5](http://www.html-tidy.org/) or [Validator.nu Web
 51 | Service](https://checker.html5.org/).
 52 | 
 53 | ## Exit status
 54 | 
 55 | * 0: Markup is valid and no 404 found.
 56 | * 64: Not valid markup found.
 57 | * 65: There are pages not found.
 58 | * 66: There are not valid markup and pages not found.
 59 | 
 60 | ## On your application
 61 | 
 62 | ``` ruby
 63 | require 'validate_website/validator'
 64 | body = '<!DOCTYPE html><html></html>'
 65 | v = ValidateWebsite::Validator.new(Nokogiri::HTML(body), body)
 66 | v.valid? # => false
 67 | ```
 68 | 
 69 | ## Jekyll static site validation
 70 | 
 71 | You can add this Rake task to validate a
 72 | [jekyll](https://github.com/jekyll/jekyll) site:
 73 | 
 74 | ``` ruby
 75 | desc 'validate _site with validate website'
 76 | task validate: :build do
 77 |     Dir.chdir("_site") do
 78 |       system("validate-website-static",
 79 |                "--verbose",
 80 |                "--exclude", "examples",
 81 |                "--site", HTTP_URL)
 82 |       exit($?.exitstatus)
 83 |     end
 84 |   end
 85 | end
 86 | ```
 87 | 
 88 | ## More info
 89 | 
 90 | ### HTML5
 91 | 
 92 | #### Tidy5
 93 | 
 94 | If the libtidy5 is found on your system this will be the default to validate
 95 | your html5 document. This does not depend on a tier service everything is done
 96 | locally.
 97 | 
 98 | #### nokogiri
 99 | 
100 | nokogiri can validate html5 document without tier service but reports less
101 | errors than tidy.
102 | 
103 | #### Validator.nu web service
104 | 
105 | When `--html5-validator nu` option is used HTML5 support is done by using the
106 | Validator.nu Web Service, so the content of your webpage is logged by a tier.
107 | It's not the case for other validation because validate-website use the XML
108 | Schema or DTD stored on the data/ directory.
109 | 
110 | Please read <http://about.validator.nu/#tos> for more info on the HTML5
111 | validation service.
112 | 
113 | ##### Use validator standalone web server locally
114 | 
115 | You can download [validator](https://github.com/validator/validator) jar and
116 | start it with:
117 | 
118 | ```
119 | java -cp PATH_TO/vnu.jar nu.validator.servlet.Main 8888
120 | ```
121 | 
122 | Then you can use validate-website option:
123 | 
124 | ```
125 | --html5-validator-service-url http://localhost:8888/
126 | # or
127 | export VALIDATOR_NU_URL="http://localhost:8888/"
128 | ```
129 | 
130 | This will prevent you to be blacklisted from validator webservice.
131 | 
132 | ## Tests
133 | 
134 | With standard environment:
135 | 
136 | ```
137 | bundle exec rake
138 | ```
139 | 
140 | ## Credits
141 | 
142 | * Thanks tenderlove for Nokogiri, this tool is inspired from markup_validity.
143 | * And Chris Kite for Anemone web-spider framework and postmodern for Spidr.
144 | 
145 | ## Contributors
146 | 
147 | See [GitHub](https://github.com/spk/validate-website/graphs/contributors).
148 | 
149 | ## License
150 | 
151 | The MIT License
152 | 
153 | Copyright (c) 2009-2022 Laurent Arnoud <laurent@spkdev.net>
154 | 
155 | ---
156 | [![Build](https://img.shields.io/gitlab/pipeline/spkdev/validate-website/master)](https://gitlab.com/spkdev/validate-website/-/commits/master)
157 | [![Coverage](https://gitlab.com/spkdev/validate-website/badges/master/coverage.svg)](https://gitlab.com/spkdev/validate-website/-/commits/master)
158 | [![Version](https://img.shields.io/gem/v/validate-website.svg)](https://rubygems.org/gems/validate-website)
159 | [![Documentation](https://img.shields.io/badge/doc-rubydoc-blue.svg)](http://www.rubydoc.info/gems/validate-website)
160 | [![License](https://img.shields.io/badge/license-MIT-blue.svg)](http://opensource.org/licenses/MIT "MIT")
161 | [![Inline docs](https://inch-ci.org/github/spk/validate-website.svg?branch=master)](http://inch-ci.org/github/spk/validate-website)
162 | 


--------------------------------------------------------------------------------
/data/schemas/xml-handlers-1.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <xs:schema 
  3 |     targetNamespace="http://www.w3.org/2001/xml-events" 
  4 |     xmlns="http://www.w3.org/2001/xml-events" 
  5 |     xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
  6 |     xmlns:xs="http://www.w3.org/2001/XMLSchema" 
  7 |     xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" 
  8 |     xsi:schemaLocation="http://www.w3.org/2001/XMLSchema 
  9 |                         http://www.w3.org/2001/XMLSchema.xsd" 
 10 |     elementFormDefault="unqualified" 
 11 |     blockDefault="#all" 
 12 |     finalDefault="#all" 
 13 |     attributeFormDefault="unqualified">
 14 | 
 15 |   <xs:annotation>
 16 |     <xs:documentation>
 17 |       This is the XML Schema for XML Handlers
 18 | 
 19 |       URI: http://www.w3.org/MarkUp/SCHEMA/xml-handlers-1.xsd
 20 |       $Id: xml-handlers-1.xsd,v 1.1 2008/06/25 14:36:29 smccarro Exp $
 21 |     </xs:documentation>
 22 |     <xs:documentation source="xml-events-copyright-2.xsd"/>
 23 |   </xs:annotation>
 24 | 
 25 |   <xs:attributeGroup name="action.attlist">
 26 |     <xs:attribute name="event" use="required" type="xs:QName"/>
 27 |     <xs:attribute name="targetid" type="xs:IDREF"/>
 28 |     <xs:attribute name="declare">
 29 |         <xs:simpleType>
 30 |             <xs:restriction base="xs:NMTOKEN">
 31 |                 <xs:enumeration value="declare"/>
 32 |             </xs:restriction>
 33 |         </xs:simpleType>
 34 |     </xs:attribute>
 35 |     <xs:attribute name="if" type="xs:normalizedString"/>
 36 |     <xs:attribute name="while" type="xs:normalizedString"/>
 37 |     <xs:attribute name="id" type="xs:ID"/>
 38 |   </xs:attributeGroup>
 39 | 
 40 |   <xs:complexType name="action.type">
 41 |     <xs:attributeGroup ref="action.attlist"/>
 42 |   </xs:complexType>
 43 | 
 44 |   <xs:element name="action" type="action.type"/>
 45 | 
 46 |   <xs:attributeGroup name="dispatchEvent.attlist">
 47 |     <xs:attribute name="raise" type="xs:QName"/>
 48 |     <xs:attribute name="to" type="xs:IDREF"/>
 49 |     <xs:attribute name="bubbles">
 50 |         <xs:simpleType>
 51 |             <xs:restriction base="xs:NMTOKEN">
 52 |                 <xs:enumeration value="bubbles"/>
 53 |             </xs:restriction>
 54 |         </xs:simpleType>
 55 |     </xs:attribute>
 56 |     <xs:attribute name="cancelable">
 57 |         <xs:simpleType>
 58 |             <xs:restriction base="xs:NMTOKEN">
 59 |                 <xs:enumeration value="cancelable"/>
 60 |             </xs:restriction>
 61 |         </xs:simpleType>
 62 |     </xs:attribute>
 63 |     <xs:attribute name="id" type="xs:ID"/>
 64 |   </xs:attributeGroup>
 65 | 
 66 |   <xs:complexType name="dispatchEvent.type">
 67 |     <xs:attributeGroup ref="dispatchEvent.attlist"/>
 68 |   </xs:complexType>
 69 | 
 70 |   <xs:element name="dispatchEvent" type="dispatchEvent.type"/>
 71 | 
 72 |   <xs:attributeGroup name="addEventListener.attlist">
 73 |     <xs:attribute name="event" use="required" type="xs:QName"/>
 74 |     <xs:attribute name="handler" use="required" type="xs:IDREF"/>
 75 |     <xs:attribute name="phase" default="default">
 76 |         <xs:simpleType>
 77 |             <xs:restriction base="xs:NMTOKEN">
 78 |                 <xs:enumeration value="bubble"/>
 79 |                 <xs:enumeration value="capture"/>
 80 |                 <xs:enumeration value="default"/>
 81 |                 <xs:enumeration value="target"/>
 82 |             </xs:restriction>
 83 |         </xs:simpleType>
 84 |     </xs:attribute>
 85 |     <xs:attribute name="id" type="xs:ID"/>
 86 |   </xs:attributeGroup>
 87 | 
 88 |   <xs:complexType name="addEventListener.type">
 89 |     <xs:attributeGroup ref="addEventListener.attlist"/>
 90 |   </xs:complexType>
 91 | 
 92 |   <xs:element name="addEventListener" type="addEventListener.type"/>
 93 | 
 94 |   <xs:attributeGroup name="removeEventListener.attlist">
 95 |     <xs:attribute name="event" use="required" type="xs:QName"/>
 96 |     <xs:attribute name="handler" use="required" type="xs:IDREF"/>
 97 |     <xs:attribute name="phase" default="default">
 98 |         <xs:simpleType>
 99 |             <xs:restriction base="xs:NMTOKEN">
100 |                 <xs:enumeration value="bubble"/>
101 |                 <xs:enumeration value="capture"/>
102 |                 <xs:enumeration value="default"/>
103 |                 <xs:enumeration value="target"/>
104 |             </xs:restriction>
105 |         </xs:simpleType>
106 |     </xs:attribute>
107 |     <xs:attribute name="id" type="xs:ID"/>
108 |   </xs:attributeGroup>
109 |   <xs:complexType name="removeEventListener.type">
110 |     <xs:attributeGroup ref="removeEventListener.attlist"/>
111 |   </xs:complexType>
112 | 
113 |   <xs:element name="removeEventListener" type="removeEventListener.type"/>
114 | 
115 |   <xs:attributeGroup name="stopPropagation.attlist">
116 |     <xs:attribute name="event" use="required" type="xs:QName"/>
117 |     <xs:attribute name="id" type="xs:ID"/>
118 |   </xs:attributeGroup>
119 | 
120 |   <xs:complexType name="stopPropagation.type">
121 |     <xs:attributeGroup ref="stopPropagation.attlist"/>
122 |   </xs:complexType>
123 | 
124 |   <xs:element name="stopPropagation" type="stopPropagation.type"/>
125 | 
126 |   <xs:attributeGroup name="preventDefault.attlist">
127 |     <xs:attribute name="event" use="required" type="xs:QName"/>
128 |     <xs:attribute name="id" type="xs:ID"/>
129 |   </xs:attributeGroup>
130 | 
131 |   <xs:complexType name="preventDefault.type">
132 |     <xs:attributeGroup ref="preventDefault.attlist"/>
133 |   </xs:complexType>
134 | 
135 |   <xs:element name="preventDefault" type="stopPropagation.type"/>
136 | </xs:schema>
137 | 


--------------------------------------------------------------------------------
/test/data/html5-fail.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE html>
  2 | <!-- Page last generated 2017-04-08 21:20:41 +0000 -->
  3 | <html lang="en">
  4 |   <head>
  5 |     <meta charset="utf-8">
  6 |     <meta http-equiv="X-UA-Compatible" content="IE=edge">
  7 |     <meta name="viewport" content="width=device-width, initial-scale=1">
  8 |     <title>The Rust Programming Language</title>
  9 |     <meta name="keywords" content="Rust, Rust programming language, rustlang, rust-lang, Mozilla Rust">
 10 |     <meta name="description" content="A systems programming language that runs blazingly fast, prevents segfaults, and guarantees thread safety.">
 11 | 
 12 |     <link rel="stylesheet" href="/css/bootstrap.css">
 13 |     <link rel="stylesheet" href="/css/style.css">
 14 |     
 15 |   </head>
 16 | 
 17 |   <body class="container">
 18 |     <a href="https://github.com/rust-lang/rust">
 19 |       <img class="ribbon" style="display: none" src="/logos/forkme.png" alt="Fork me on GitHub" width="298" height="298">
 20 |     </a>
 21 | 
 22 |     <header>
 23 |       <ul class="row menu">
 24 | 	<li class="col-xs-12 col-md-2">
 25 |           <a href="/en-US/index.html">
 26 |             <img class="img-responsive" src="/logos/rust-logo-blk.svg" onerror="this.src='/logos/rust-logo-256x256-blk.png'" height="128" width="128" alt="Rust logo" />
 27 |           </a>
 28 | 	</li>
 29 | 	<li class="col-xs-12 col-md-10 menu">
 30 | 	  <h2><a href="/en-US/documentation.html">Documentation</a></h2>
 31 | 	  <h2><a href="/en-US/install.html">Install</a></h2>
 32 | 	  <h2><a href="/en-US/community.html">Community</a></h2>
 33 | 	  <h2><a href="/en-US/contribute.html">Contribute</a></h2>
 34 | 	</li>
 35 |       </ul>
 36 |     </header>
 37 | 
 38 |         <div class="row pitch-row">
 39 |       <div class="col-md-8">
 40 |         <p class="pitch">
 41 |           <b>Rust</b> is a systems programming language
 42 |           that runs blazingly fast,
 43 |           prevents segfaults,
 44 |           and guarantees thread safety.
 45 |           <br/>
 46 |           <b><a href="friends.html">See who's using Rust.</a></b>
 47 |         </p>
 48 |       </div>
 49 |       <div class="col-md-4">
 50 |         <a class="release-button" href="install.html">
 51 |           <div class="release-version">Install Rust <span>1.16.0</span></div>
 52 |         </a>
 53 |         <div class="release-date">March 16, 2017</div>
 54 |       </div>
 55 |     </div>
 56 | 
 57 |     <div class="row code-row">
 58 |       <div class="col-md-4">
 59 |       <h2>Featuring</h2>
 60 |       <ul class="laundry-list">
 61 |         <li>zero-cost abstractions</li>
 62 |         <li>move semantics</li>
 63 |         <li>guaranteed memory safety</li>
 64 |         <li>threads without data races</li>
 65 |         <li>trait-based generics</li>
 66 |         <li>pattern matching</li>
 67 |         <li>type inference</li>
 68 |         <li>minimal runtime</li>
 69 |         <li>efficient C bindings</li>
 70 |       </ul>
 71 |       </div>
 72 |       <div class="col-md-8">
 73 |         <div id="active-code">
 74 |           <button type="button" class="btn btn-primary btn-sm" id="run-code">Run</button>
 75 |           <div id="editor">fn main() { 
 76 |     let greetings = ["Hello", "Hola", "Bonjour",
 77 |                      "こんにちは", "您好"];
 78 | 
 79 |     for (num, greeting) in greetings.iter().enumerate() {
 80 |         println!("{}", greeting);
 81 |         match num {
 82 |             0 =>  println!("This code is editable and runnable!"),
 83 |             1 =>  println!("Este código es editable y ejecutable!"),
 84 |             2 =>  println!("Ce code est modifiable et exécutable!"),
 85 |             3 =>  println!("このコードは編集して実行出来ます！"),
 86 |             4 =>  println!("这个代码是可以编辑并且能够运行的！"),
 87 |             _ =>  {},
 88 |         }
 89 |     }
 90 | }
 91 | </div>
 92 |           <div id="result" data-msg-running="Running...">
 93 |             <a id="playlink"><i class="icon-link-ext"></i></a>
 94 |           </div>
 95 |         </div>
 96 |         <div id="static-code"><pre class='rust'>
 97 | <span class='kw'>fn</span> main() {
 98 |     <span class='kw'>let</span> greetings = [<span class='string'>"Hello"</span>, <span class='string'>"Hola"<span>, <span class='string'>"Bonjour"</span>,
 99 |                      <span class='string'>"こんにちは"</span>, <span class='string'>"您好"</span>];
100 | 
101 |     <span class='kw'>for</span> (num, greeting) in greetings.iter().enumerate() {
102 |         <span class='prelude-val'>println!</span>(<span class='string'>"{}"</span>, greeting);
103 |         <span class='kw'>match</span> num {
104 |             0 =>  <span class='prelude-val'>println!</span>(<span class='string'>"This code is editable and runnable!"</span>),
105 |             1 =>  <span class='prelude-val'>println!</span>(<span class='string'>"Este código es editable y ejecutable!"</span>),
106 |             2 =>  <span class='prelude-val'>println!</span>(<span class='string'>"Ce code est modifiable et exécutable!"</span>),
107 |             3 =>  <span class='prelude-val'>println!</span>(<span class='string'>"このコードは編集して実行出来ます！"</span>),
108 |             4 =>  <span class='prelude-val'>println!</span>(<span class='string'>"这个代码是可以编辑并且能够运行的！"</span>),
109 |             _ =>  {},
110 |         }
111 |     }
112 | }
113 | </pre>
114 | </div>
115 |         <div class="more-examples">
116 |           <a href="http://rustbyexample.com/">More examples</a>
117 |         </div>
118 |       </div>
119 |     </div>
120 | 
121 |     <footer>
122 |       <p>Our site in other languages:
123 |         <a href="/en-US/">English</a>,
124 | <a href="/es-ES/">Español</a>,
125 | <a href="/fr-FR/">Français</a>,
126 | <a href="/it-IT/">Italiano</a>,
127 | <a href="/ja-JP/">日本語</a>,
128 | <a href="/ko-KR/">한국어</a>,
129 | <a href="/pl-PL/">Polski</a>,
130 | <a href="/pt-BR/">Português</a>,
131 | <a href="/ru-RU/">Русский</a>,
132 | <a href="/vi-VN/">Tiếng việt</a>,
133 | <a href="/zh-CN/">简体中文</a>
134 | 
135 |       </p>
136 |     </footer>
137 | 
138 |   </body>
139 | </html>
140 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-events-1.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <xs:schema
  3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
  4 |  elementFormDefault="qualified"
  5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
  6 | >
  7 |     <xs:import namespace="http://www.w3.org/1999/xhtml/datatypes/" 
  8 |                schemaLocation="xhtml-datatypes-1.xsd" />
  9 | 
 10 |     <xs:annotation>
 11 |         <xs:documentation>
 12 |       This is the XML Schema Intrinsic Events module for XHTML
 13 |       $Id: xhtml-events-1.xsd,v 1.4 2005/09/26 22:54:53 ahby Exp $
 14 |     </xs:documentation>
 15 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
 16 |     </xs:annotation>
 17 |     <xs:annotation>
 18 |         <xs:documentation>
 19 |       Intrinsic Event Attributes
 20 |       These are the event attributes defined in HTML 4,
 21 |       Section 18.2.3 "Intrinsic Events".
 22 |     </xs:documentation>
 23 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstract_modules.html#s_intrinsiceventsmodule"/>
 24 |     </xs:annotation>
 25 |     <xs:attributeGroup name="xhtml.Events.attrib">
 26 |         <xs:attribute name="onclick" type="xh11d:Script"/>
 27 |         <xs:attribute name="ondblclick" type="xh11d:Script"/>
 28 |         <xs:attribute name="onmousedown" type="xh11d:Script"/>
 29 |         <xs:attribute name="onmouseup" type="xh11d:Script"/>
 30 |         <xs:attribute name="onmouseover" type="xh11d:Script"/>
 31 |         <xs:attribute name="onmousemove" type="xh11d:Script"/>
 32 |         <xs:attribute name="onmouseout" type="xh11d:Script"/>
 33 |         <xs:attribute name="onkeypress" type="xh11d:Script"/>
 34 |         <xs:attribute name="onkeydown" type="xh11d:Script"/>
 35 |         <xs:attribute name="onkeyup" type="xh11d:Script"/>
 36 |     </xs:attributeGroup>
 37 |     <!--
 38 |     additional attributes on anchor element
 39 | -->
 40 |     <xs:attributeGroup name="xhtml.a.events.attlist">
 41 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
 42 |         <xs:attribute name="onblur" type="xh11d:Script"/>
 43 |     </xs:attributeGroup>
 44 |     <!--
 45 |     additional attributes on form element
 46 | -->
 47 |     <xs:attributeGroup name="xhtml.form.events.attlist">
 48 |         <xs:attribute name="onsubmit" type="xh11d:Script"/>
 49 |         <xs:attribute name="onreset" type="xh11d:Script"/>
 50 |     </xs:attributeGroup>
 51 |     <!--
 52 |     additional attributes on label element
 53 | -->
 54 |     <xs:attributeGroup name="xhtml.label.events.attlist">
 55 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
 56 |         <xs:attribute name="onblur" type="xh11d:Script"/>
 57 |     </xs:attributeGroup>
 58 |     <!--
 59 |     additional attributes on input element
 60 | -->
 61 |     <xs:attributeGroup name="xhtml.input.events.attlist">
 62 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
 63 |         <xs:attribute name="onblur" type="xh11d:Script"/>
 64 |         <xs:attribute name="onselect" type="xh11d:Script"/>
 65 |         <xs:attribute name="onchange" type="xh11d:Script"/>
 66 |     </xs:attributeGroup>
 67 |     <!--
 68 |     additional attributes on select element
 69 | -->
 70 |     <xs:attributeGroup name="xhtml.select.events.attlist">
 71 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
 72 |         <xs:attribute name="onblur" type="xh11d:Script"/>
 73 |         <xs:attribute name="onchange" type="xh11d:Script"/>
 74 |     </xs:attributeGroup>
 75 |     <!--
 76 |     additional attributes on textarea element
 77 | -->
 78 |     <xs:attributeGroup name="xhtml.textarea.events.attlist">
 79 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
 80 |         <xs:attribute name="onblur" type="xh11d:Script"/>
 81 |         <xs:attribute name="onselect" type="xh11d:Script"/>
 82 |         <xs:attribute name="onchange" type="xh11d:Script"/>
 83 |     </xs:attributeGroup>
 84 |     <!--
 85 |     additional attributes on button element
 86 | -->
 87 |     <xs:attributeGroup name="xhtml.button.events.attlist">
 88 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
 89 |         <xs:attribute name="onblur" type="xh11d:Script"/>
 90 |     </xs:attributeGroup>
 91 |     <!--
 92 |     additional attributes on body element
 93 | -->
 94 |     <xs:attributeGroup name="xhtml.body.events.attlist">
 95 |         <xs:attribute name="onload" type="xh11d:Script"/>
 96 |         <xs:attribute name="onunload" type="xh11d:Script"/>
 97 |     </xs:attributeGroup>
 98 |     <!--
 99 |     additional attributes on area element
100 | -->
101 |     <xs:attributeGroup name="xhtml.area.events.attlist">
102 |         <xs:attribute name="onfocus" type="xh11d:Script"/>
103 |         <xs:attribute name="onblur" type="xh11d:Script"/>
104 |     </xs:attributeGroup>
105 |     <!--
106 |    Global Events Attributes
107 | -->
108 |     <xs:attribute name="onclick" type="xh11d:Script"/>
109 |     <xs:attribute name="ondblclick" type="xh11d:Script"/>
110 |     <xs:attribute name="onmousedown" type="xh11d:Script"/>
111 |     <xs:attribute name="onmouseup" type="xh11d:Script"/>
112 |     <xs:attribute name="onmouseover" type="xh11d:Script"/>
113 |     <xs:attribute name="onmousemove" type="xh11d:Script"/>
114 |     <xs:attribute name="onmouseout" type="xh11d:Script"/>
115 |     <xs:attribute name="onkeypress" type="xh11d:Script"/>
116 |     <xs:attribute name="onkeydown" type="xh11d:Script"/>
117 |     <xs:attribute name="onkeyup" type="xh11d:Script"/>
118 |     <xs:attributeGroup name="xhtml.Global.events.attrib">
119 |         <xs:attribute ref="onclick"/>
120 |         <xs:attribute ref="ondblclick"/>
121 |         <xs:attribute ref="onmousedown"/>
122 |         <xs:attribute ref="onmouseup"/>
123 |         <xs:attribute ref="onmouseover"/>
124 |         <xs:attribute ref="onmousemove"/>
125 |         <xs:attribute ref="onmouseout"/>
126 |         <xs:attribute ref="onkeypress"/>
127 |         <xs:attribute ref="onkeydown"/>
128 |         <xs:attribute ref="onkeyup"/>
129 |     </xs:attributeGroup>
130 | </xs:schema>
131 | 


--------------------------------------------------------------------------------
/data/schemas/xframes-1.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <xs:schema
  3 |     xmlns:xs="http://www.w3.org/2001/XMLSchema"
  4 |     targetNamespace="http://www.w3.org/2002/06/xframes/"
  5 |     xmlns="http://www.w3.org/2002/06/xframes/"
  6 |     xmlns:xml="http://www.w3.org/XML/1998/namespace"
  7 |     elementFormDefault="qualified">
  8 | 
  9 |   <xs:annotation>
 10 |     <xs:documentation xml:lang="en">
 11 |       This is XFrames - an XML application for composing documents together.
 12 |       URI: http://www.w3.org/MarkUp/SCHEMA/xframes-1.xsd
 13 |      
 14 |       Copyright ©2002-2005 W3C (MIT, ERCIM, Keio), All Rights Reserved.
 15 | 
 16 |         Editor:   Masayasu Ishikawa (mimasa@w3.org)
 17 |         Revision: $Id: xframes-1.xsd,v 1.9 2005/10/05 23:56:45 mimasa Exp $
 18 | 
 19 |       Permission to use, copy, modify and distribute this XML Schema for
 20 |       XFrames and its accompanying documentation for any purpose and without
 21 |       fee is hereby granted in perpetuity, provided that the above copyright
 22 |       notice and this paragraph appear in all copies. The copyright holders
 23 |       make no representation about the suitability of this XML Schema
 24 |       for any purpose.
 25 | 
 26 |       It is provided "as is" without expressed or implied warranty.
 27 |     </xs:documentation>
 28 |   </xs:annotation>
 29 | 
 30 |   <xs:import namespace="http://www.w3.org/XML/1998/namespace"
 31 |     schemaLocation="http://www.w3.org/2001/xml.xsd">
 32 |     <xs:annotation>
 33 |       <xs:documentation xml:lang="en">
 34 |         Get access to the XML namespace
 35 |       </xs:documentation>
 36 |     </xs:annotation>
 37 |   </xs:import>
 38 | 
 39 |   <xs:annotation>
 40 |     <xs:documentation xml:lang="en">
 41 |       Datatypes
 42 |     </xs:documentation>
 43 |   </xs:annotation>
 44 | 
 45 |   <xs:simpleType name="ContentType.datatype">
 46 |     <xs:annotation>
 47 |       <xs:documentation xml:lang="en">
 48 |         media type, as per [RFC2045]
 49 |       </xs:documentation>
 50 |     </xs:annotation>
 51 |     <xs:restriction base="xs:string"/>
 52 |   </xs:simpleType>
 53 | 
 54 |   <xs:simpleType name="MediaDesc.datatype">
 55 |     <xs:annotation>
 56 |       <xs:documentation>
 57 |         A comma-separated list of media descriptors as described by [CSS2].
 58 |         The default is all.
 59 |       </xs:documentation>
 60 |     </xs:annotation>
 61 |     <xs:restriction base="xs:string">
 62 |       <xs:pattern value="[^,]+(,\s*[^,]+)*"/>
 63 |     </xs:restriction>
 64 |   </xs:simpleType>
 65 | 
 66 |   <xs:simpleType name="QName.datatype">
 67 |     <xs:annotation>
 68 |       <xs:documentation>
 69 |         An [XMLNS]-qualified name.
 70 |       </xs:documentation>
 71 |     </xs:annotation>
 72 |     <xs:restriction base="xs:QName"/>
 73 |   </xs:simpleType>
 74 | 
 75 |   <xs:simpleType name="URI.datatype">
 76 |     <xs:annotation>
 77 |       <xs:documentation xml:lang="en">
 78 |         An Internationalized Resource Identifier Reference, as defined
 79 |         by [IRI].
 80 |       </xs:documentation>
 81 |     </xs:annotation>
 82 |     <xs:restriction base="xs:anyURI"/>
 83 |   </xs:simpleType>
 84 | 
 85 |   <xs:attributeGroup name="Common.attrib">
 86 |     <xs:annotation>
 87 |       <xs:documentation xml:lang="en">
 88 |         Common attributes
 89 |       </xs:documentation>
 90 |     </xs:annotation>
 91 |     <xs:attribute name="class" type="xs:NMTOKENS"/>
 92 |     <xs:attribute ref="xml:id"/>
 93 |     <xs:attribute name="title" type="xs:string"/>
 94 |   </xs:attributeGroup>
 95 | 
 96 |   <xs:element name="frames">
 97 |     <xs:complexType>
 98 |       <xs:sequence>
 99 |         <xs:element minOccurs="0" maxOccurs="1" ref="head"/>
100 |         <xs:choice>
101 |           <xs:element ref="group"/>
102 |           <xs:element minOccurs="1" maxOccurs="unbounded" ref="frame"/>
103 |         </xs:choice>
104 |       </xs:sequence>
105 |       <xs:attributeGroup ref="Common.attrib"/>
106 |       <xs:attribute ref="xml:base"/>
107 |     </xs:complexType>
108 |   </xs:element>
109 | 
110 |   <xs:element name="head">
111 |     <xs:complexType>
112 |       <xs:sequence>
113 |         <xs:element ref="title"/>
114 |         <xs:element minOccurs="0" maxOccurs="unbounded" ref="style"/>
115 |       </xs:sequence>
116 |       <xs:attributeGroup ref="Common.attrib"/>
117 |     </xs:complexType>
118 |   </xs:element>
119 | 
120 |   <xs:element name="title">
121 |     <xs:complexType mixed="true">
122 |       <xs:attributeGroup ref="Common.attrib"/>
123 |     </xs:complexType>
124 |   </xs:element>
125 | 
126 |   <xs:element name="style">
127 |     <xs:complexType mixed="true">
128 |       <xs:attributeGroup ref="Common.attrib"/>
129 |       <xs:attribute name="type" type="ContentType.datatype" use="required"/>
130 |       <xs:attribute name="media" type="MediaDesc.datatype"/>
131 |       <xs:attribute name="source" type="URI.datatype"/>
132 |     </xs:complexType>
133 |   </xs:element>
134 | 
135 |   <xs:element name="group">
136 |     <xs:complexType>
137 |       <xs:choice minOccurs="1" maxOccurs="unbounded">
138 |         <xs:element ref="group"/>
139 |         <xs:element ref="frame"/>
140 |       </xs:choice>
141 |       <xs:attributeGroup ref="Common.attrib"/>
142 |       <xs:attribute name="compose" default="vertical">
143 |         <xs:simpleType>
144 |           <xs:union memberTypes="QName.datatype">
145 |             <xs:simpleType>
146 |              <xs:restriction base="xs:token">
147 |                <xs:enumeration value="vertical"/>
148 |                <xs:enumeration value="horizontal"/>
149 |                <xs:enumeration value="single"/>
150 |                <xs:enumeration value="free"/>
151 |              </xs:restriction>
152 |            </xs:simpleType>
153 |           </xs:union>
154 |         </xs:simpleType>
155 |       </xs:attribute>
156 |     </xs:complexType>
157 |   </xs:element>
158 | 
159 |   <xs:element name="frame">
160 |     <xs:complexType>
161 |       <xs:attributeGroup ref="Common.attrib"/>
162 |       <xs:attribute name="source" type="URI.datatype"/>
163 |     </xs:complexType>
164 |   </xs:element>
165 | 
166 | </xs:schema>
167 | 


--------------------------------------------------------------------------------
/test/validator_test.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require File.expand_path('test_helper', __dir__)
  4 | 
  5 | # rubocop:disable Metrics/BlockLength
  6 | describe ValidateWebsite::Validator do
  7 |   let(:subject) { ValidateWebsite::Validator }
  8 | 
  9 |   before do
 10 |     WebMock.reset!
 11 |     @http = Spidr::Agent.new
 12 |   end
 13 | 
 14 |   describe('xhtml1') do
 15 |     it 'can ignore' do
 16 |       name = 'w3.org-xhtml1-strict-errors'
 17 |       file = File.join('test', 'data', "#{name}.html")
 18 |       page = FakePage.new(name,
 19 |                           body: File.open(file).read,
 20 |                           content_type: 'text/html')
 21 |       @xhtml1_page = @http.get_page(page.url)
 22 |       ignore = /width|height|Length/
 23 |       validator = subject.new(@xhtml1_page.doc,
 24 |                               @xhtml1_page.body,
 25 |                               ignore: ignore)
 26 |       _(validator.valid?).must_equal true
 27 |       _(validator.errors).must_equal []
 28 |     end
 29 | 
 30 |     it 'xhtml1-strict should be valid' do
 31 |       name = 'xhtml1-strict'
 32 |       dtd_uri = 'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'
 33 |       file = File.join('test', 'data', "#{name}.html")
 34 |       page = FakePage.new(name,
 35 |                           body: File.open(file).read,
 36 |                           content_type: 'text/html')
 37 |       @xhtml1_page = @http.get_page(page.url)
 38 |       ignore = /width|height|Length/
 39 |       validator = subject.new(@xhtml1_page.doc,
 40 |                               @xhtml1_page.body,
 41 |                               ignore: ignore)
 42 |       _(validator.dtd.system_id).must_equal dtd_uri
 43 |       _(validator.namespace).must_equal name
 44 |       _(validator.valid?).must_equal true
 45 |       _(validator.errors).must_equal []
 46 |     end
 47 |   end
 48 | 
 49 |   describe('html5') do
 50 |     describe('when valid') do
 51 |       before do
 52 |         validator_res = File.join('test', 'data', 'validator.nu-success.json')
 53 |         stub_request(:any, /#{subject.html5_validator_service_url}/)
 54 |           .to_return(body: File.open(validator_res).read)
 55 |       end
 56 |       it 'html5 should be valid' do
 57 |         name = 'html5'
 58 |         file = File.join('test', 'data', "#{name}.html")
 59 |         page = FakePage.new(name,
 60 |                             body: File.open(file).read,
 61 |                             content_type: 'text/html')
 62 |         @html5_page = @http.get_page(page.url)
 63 |         validator = subject.new(@html5_page.doc,
 64 |                                 @html5_page.body)
 65 |         _(validator.valid?).must_equal true
 66 |       end
 67 |     end
 68 | 
 69 |     describe('when not valid') do
 70 |       before do
 71 |         validator_res = File.join('test', 'data', 'validator.nu-failure.json')
 72 |         stub_request(:any, /#{subject.html5_validator_service_url}/)
 73 |           .to_return(body: File.open(validator_res).read)
 74 |         name = 'html5-fail'
 75 |         file = File.join('test', 'data', "#{name}.html")
 76 |         page = FakePage.new(name,
 77 |                             body: File.open(file).read,
 78 |                             content_type: 'text/html')
 79 |         @html5_page = @http.get_page(page.url)
 80 |       end
 81 | 
 82 |       describe('with nu') do
 83 |         it 'should have an array of errors' do
 84 |           validator = subject.new(@html5_page.doc,
 85 |                                   @html5_page.body,
 86 |                                   html5_validator: :nu)
 87 |           _(validator.valid?).must_equal false
 88 |           _(validator.errors.size).must_equal 3
 89 |         end
 90 | 
 91 |         it 'should exclude errors ignored by :ignore option' do
 92 |           ignore = /Unclosed element/
 93 |           validator = subject.new(@html5_page.doc,
 94 |                                   @html5_page.body,
 95 |                                   ignore: ignore,
 96 |                                   html5_validator: :nu)
 97 |           _(validator.valid?).must_equal false
 98 |           _(validator.errors.size).must_equal 1
 99 |         end
100 |       end
101 | 
102 |       describe('with nokogiri') do
103 |         it 'have an array of errors' do
104 |           skip('nokogiri dont support jruby') if ValidateWebsite.jruby?
105 |           validator = subject.new(@html5_page.doc,
106 |                                   @html5_page.body,
107 |                                   html5_validator: :nokogiri)
108 |           _(validator.valid?).must_equal false
109 |           _(validator.errors.size).must_equal 1
110 |         end
111 | 
112 |         it 'exclude errors ignored by :ignore option' do
113 |           skip('nokogiri dont support jruby') if ValidateWebsite.jruby?
114 |           ignore = /End tag 'pre' isn't allowed here/
115 |           validator = subject.new(@html5_page.doc,
116 |                                   @html5_page.body,
117 |                                   ignore: ignore,
118 |                                   html5_validator: :nokogiri)
119 |           _(validator.valid?).must_equal true
120 |           _(validator.errors.size).must_equal 0
121 |         end
122 |       end
123 | 
124 |       describe('with tidy') do
125 |         it 'should have an array of errors' do
126 |           skip('tidy is not installed') unless ValidateWebsite::Validator.tidy
127 |           validator = subject.new(@html5_page.doc,
128 |                                   @html5_page.body)
129 |           _(validator.valid?).must_equal false
130 |           _(validator.errors.size).must_equal 3
131 |         end
132 | 
133 |         it 'should exclude errors ignored by :ignore option' do
134 |           skip('tidy is not installed') unless ValidateWebsite::Validator.tidy
135 |           ignore = /letter not allowed here|trimming empty/
136 |           validator = subject.new(@html5_page.doc,
137 |                                   @html5_page.body,
138 |                                   ignore: ignore)
139 |           _(validator.valid?).must_equal false
140 |           _(validator.errors.size).must_equal 2
141 |         end
142 |       end
143 |     end
144 |   end
145 | 
146 |   describe('html4') do
147 |     it 'should validate html4' do
148 |       name = 'html4-strict'
149 |       file = File.join('test', 'data', "#{name}.html")
150 |       page = FakePage.new(name,
151 |                           body: File.open(file).read,
152 |                           content_type: 'text/html')
153 |       @html4_strict_page = @http.get_page(page.url)
154 |       validator = subject.new(@html4_strict_page.doc,
155 |                               @html4_strict_page.body)
156 |       validator.valid?
157 |       _(validator.errors).must_equal []
158 |     end
159 |   end
160 | end
161 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-datatypes-1.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version="1.0" encoding="UTF-8"?>
  2 | <xs:schema
  3 |  xmlns:xs="http://www.w3.org/2001/XMLSchema"
  4 |  xmlns="http://www.w3.org/1999/xhtml/datatypes/"
  5 |  xmlns:xh11d="http://www.w3.org/1999/xhtml/datatypes/"
  6 |  targetNamespace="http://www.w3.org/1999/xhtml/datatypes/"
  7 |  elementFormDefault="qualified"
  8 | >
  9 |     <xs:annotation>
 10 |         <xs:documentation>
 11 |           XHTML Datatypes
 12 |           This is the XML Schema datatypes module for XHTML
 13 |           
 14 |           Defines containers for the XHTML datatypes, many of
 15 |           these imported from other specifications and standards.
 16 |           
 17 |           $Id: xhtml-datatypes-1.xsd,v 1.11 2008/10/07 16:57:03 ahby Exp $
 18 |         </xs:documentation>
 19 |         <xs:documentation source="xhtml-copyright-1.xsd"/>
 20 |         <xs:documentation source="http://www.w3.org/TR/2001/REC-xhtml-modularization-20010410/abstraction.html#s_common_attrtypes"/>
 21 |     </xs:annotation>
 22 | 
 23 |     <!-- nn for pixels or nn% for percentage length -->
 24 |     <xs:simpleType name="Length">
 25 |         <xs:union memberTypes="xs:nonNegativeInteger">
 26 |             <xs:simpleType>
 27 |                 <xs:restriction base="xs:token">
 28 |                     <xs:pattern value="\d+[%]|\d*\.\d+[%]"/>
 29 |                 </xs:restriction>
 30 |             </xs:simpleType>
 31 |         </xs:union>
 32 |     </xs:simpleType>
 33 |     <!-- space-separated list of link types -->
 34 |     <xs:simpleType name="LinkTypes">
 35 |         <xs:list itemType="xs:NMTOKEN"/>
 36 |     </xs:simpleType>
 37 |     <!-- single or comma-separated list of media descriptors -->
 38 |     <xs:simpleType name="MediaDesc">
 39 |         <xs:restriction base="xs:string"/>
 40 |     </xs:simpleType>
 41 |     <!-- pixel, percentage, or relative -->
 42 |     <xs:simpleType name="MultiLength">
 43 |         <xs:union memberTypes="xh11d:Length">
 44 |             <xs:simpleType>
 45 |                 <xs:restriction base="xs:token">
 46 |                     <xs:pattern value="\d*\*"/>
 47 |                 </xs:restriction>
 48 |             </xs:simpleType>
 49 |         </xs:union>
 50 |     </xs:simpleType>
 51 |     <!-- one or more digits (NUMBER) -->
 52 |     <xs:simpleType name="Number">
 53 |         <xs:restriction base="xs:nonNegativeInteger"/>
 54 |     </xs:simpleType>
 55 |     <!-- integer representing length in pixels -->
 56 |     <xs:simpleType name="Pixels">
 57 |         <xs:restriction base="xs:nonNegativeInteger"/>
 58 |     </xs:simpleType>
 59 |     <!-- script expression -->
 60 |     <xs:simpleType name="Script">
 61 |         <xs:restriction base="xs:string"/>
 62 |     </xs:simpleType>
 63 |     <!-- sixteen color names or RGB color expression-->
 64 |     <xs:simpleType name="Color">
 65 |         <xs:union memberTypes="xs:NMTOKEN">
 66 |             <xs:simpleType>
 67 |                 <xs:restriction base="xs:token">
 68 |                     <xs:pattern value="#[0-9a-fA-F]{3}([0-9a-fA-F]{3})?"/>
 69 |                 </xs:restriction>
 70 |             </xs:simpleType>
 71 |         </xs:union>
 72 |     </xs:simpleType>
 73 |     <!-- textual content -->
 74 |     <xs:simpleType name="Text">
 75 |         <xs:restriction base="xs:string"/>
 76 |     </xs:simpleType>
 77 |     <!-- Imported Datatypes  -->
 78 |     <!-- a single character, as per section 2.2 of [XML] -->
 79 |     <xs:simpleType name="Character">
 80 |         <xs:restriction base="xs:string">
 81 |             <xs:length value="1" fixed="true"/>
 82 |         </xs:restriction>
 83 |     </xs:simpleType>
 84 |     <!-- a character encoding, as per [RFC2045] -->
 85 |     <xs:simpleType name="Charset">
 86 |         <xs:restriction base="xs:string"/>
 87 |     </xs:simpleType>
 88 |     <!-- a space separated list of character encodings, as per [RFC2045] -->
 89 |     <xs:simpleType name="Charsets">
 90 |         <xs:list itemType="Charset"/>
 91 |     </xs:simpleType>
 92 |     <!-- media type, as per [RFC2045] -->
 93 |     <xs:simpleType name="ContentType">
 94 |         <xs:restriction base="xs:string"/>
 95 |     </xs:simpleType>
 96 |     <!-- comma-separated list of media types, as per [RFC2045] -->
 97 |     <xs:simpleType name="ContentTypes">
 98 |         <xs:restriction base="xs:string"/>
 99 |     </xs:simpleType>
100 |     <!-- date and time information. ISO date format -->
101 |     <xs:simpleType name="Datetime">
102 |         <xs:restriction base="xs:dateTime"/>
103 |     </xs:simpleType>
104 |     <!-- formal public identifier, as per [ISO8879] -->
105 |     <xs:simpleType name="FPI">
106 |         <xs:restriction base="xs:normalizedString"/>
107 |     </xs:simpleType>
108 | 
109 |     <!-- a window name as used in the target attribute -->
110 |     <xs:simpleType name="FrameTarget">
111 |       <xs:union>
112 |         <xs:simpleType>
113 |           <xs:restriction base="xs:NMTOKEN">
114 |             <xs:enumeration value="_blank"/>
115 |             <xs:enumeration value="_self"/>
116 |             <xs:enumeration value="_parent"/>
117 |             <xs:enumeration value="_top"/>
118 |           </xs:restriction>
119 |         </xs:simpleType>
120 |         <xs:simpleType>
121 |           <xs:restriction base="xs:string">
122 |             <xs:pattern value="[a-zA-Z].*"/>
123 |           </xs:restriction>
124 |         </xs:simpleType>
125 |       </xs:union>
126 |     </xs:simpleType>
127 |     
128 |     <!-- a language code, as per [RFC3066] -->
129 |     <xs:simpleType name="LanguageCode">
130 |         <xs:restriction base="xs:language"/>
131 |     </xs:simpleType>
132 |     <!-- a comma separated list of language ranges -->
133 |     <xs:simpleType name="LanguageCodes">
134 |         <xs:restriction base="xs:string"/>
135 |     </xs:simpleType>
136 |     <!-- a Uniform Resource Identifier, see [URI] -->
137 |     <xs:simpleType name="URI">
138 |         <xs:restriction base="xs:anyURI"/>
139 |     </xs:simpleType>
140 |     <!-- a space-separated list of Uniform Resource Identifiers, see [URI] -->
141 |     <xs:simpleType name="URIs">
142 |         <xs:list itemType="xs:anyURI"/>
143 |     </xs:simpleType>
144 |     <!-- comma-separated list of MultiLength -->
145 |     <xs:simpleType name="MultiLengths">
146 |         <xs:restriction base="xs:string"/>
147 |     </xs:simpleType>
148 |     <!-- character Data -->
149 |     <xs:simpleType name="CDATA">
150 |         <xs:restriction base="xs:string"/>
151 |     </xs:simpleType>
152 |     <!-- CURIE placeholder datatypes -->
153 |     <xs:simpleType name="CURIE">
154 |         <xs:restriction base="xs:string">
155 |             <xs:pattern value="(([\i-[:]][\c-[:]]*)?:)?.+" />
156 |             <xs:minLength value="1"/>
157 |         </xs:restriction>
158 |     </xs:simpleType> 
159 |     <xs:simpleType name="CURIEs">
160 |         <xs:list itemType="xh11d:CURIE"/>
161 |     </xs:simpleType>
162 |     <xs:simpleType name="SafeCURIE">
163 |     <xs:restriction base="xs:string">
164 |         <xs:pattern value="\[(([\i-[:]][\c-[:]]*)?:)?.+\]" />
165 |         <xs:minLength value="3"/>
166 |     </xs:restriction>
167 |     </xs:simpleType>
168 |     <xs:simpleType name="SafeCURIEs">
169 |         <xs:list itemType="xh11d:SafeCURIE"/>
170 |     </xs:simpleType>
171 |     <xs:simpleType name="URIorSafeCURIE">
172 |         <xs:union memberTypes="xs:anyURI xh11d:SafeCURIE" />
173 |     </xs:simpleType>
174 |     <xs:simpleType name="URIorSafeCURIEs">
175 |         <xs:list itemType="xh11d:URIorSafeCURIE"/>
176 |     </xs:simpleType>
177 | </xs:schema>
178 | 


--------------------------------------------------------------------------------
/test/crawler_test.rb:
--------------------------------------------------------------------------------
  1 | # frozen_string_literal: true
  2 | 
  3 | require File.expand_path('test_helper', __dir__)
  4 | 
  5 | # rubocop:disable Metrics/BlockLength
  6 | describe ValidateWebsite::Crawl do
  7 |   before do
  8 |     WebMock.reset!
  9 |     stub_request(:get, /#{TEST_DOMAIN}/).to_return(status: 200)
 10 |     _out, _err = capture_io do
 11 |       @validate_website = ValidateWebsite::Crawl.new(color: false)
 12 |     end
 13 |   end
 14 | 
 15 |   def validator
 16 |     ValidateWebsite::Validator
 17 |   end
 18 | 
 19 |   describe 'options' do
 20 |     it 'can change user-agent' do
 21 |       ua = %{Linux / Firefox 29: Mozilla/5.0 (X11; Linux x86_64; rv:29.0) \
 22 |              Gecko/20100101 Firefox/29.0}
 23 |       _out, _err = capture_io do
 24 |         v = ValidateWebsite::Crawl.new(site: TEST_DOMAIN, user_agent: ua)
 25 |         v.crawl
 26 |         _(v.crawler.user_agent).must_equal ua
 27 |       end
 28 |     end
 29 | 
 30 |     it 'can change html5 validator service url' do
 31 |       original = validator.html5_validator_service_url
 32 |       new = 'http://localhost:8888/'
 33 |       _out, _err = capture_io do
 34 |         ValidateWebsite::Crawl.new(site: TEST_DOMAIN,
 35 |                                    html5_validator_service_url: new)
 36 |         _(validator.html5_validator_service_url).must_equal new
 37 |         validator.html5_validator_service_url = original
 38 |       end
 39 |     end
 40 |   end
 41 | 
 42 |   describe('cookies') do
 43 |     it 'can set cookies' do
 44 |       cookies = 'tz=Europe%2FBerlin; guid=ZcpBshbtStgl9VjwTofq'
 45 |       _out, _err = capture_io do
 46 |         v = ValidateWebsite::Crawl.new(site: TEST_DOMAIN, cookies: cookies)
 47 |         v.crawl
 48 |         _(v.crawler.cookies.cookies_for_host(v.host))
 49 |           .must_equal v.default_cookies
 50 |       end
 51 |     end
 52 |   end
 53 | 
 54 |   describe('html') do
 55 |     it 'extract url' do
 56 |       name = 'xhtml1-strict'
 57 |       file = File.join('test', 'data', "#{name}.html")
 58 |       page = FakePage.new(name,
 59 |                           body: File.open(file).read,
 60 |                           content_type: 'text/html')
 61 |       @validate_website.site = page.url
 62 |       _out, _err = capture_io do
 63 |         @validate_website.crawl
 64 |       end
 65 |       _(@validate_website.history_count).must_equal 5
 66 |     end
 67 | 
 68 |     it 'extract link' do
 69 |       name = 'html4-strict'
 70 |       file = File.join('test', 'data', "#{name}.html")
 71 |       page = FakePage.new(name,
 72 |                           body: File.open(file).read,
 73 |                           content_type: 'text/html')
 74 |       @validate_website.site = page.url
 75 |       _out, _err = capture_io do
 76 |         @validate_website.crawl
 77 |       end
 78 |       _(@validate_website.history_count).must_equal 98
 79 |     end
 80 | 
 81 |     it 'can change validator' do
 82 |       name = 'html5-fail'
 83 |       file = File.join('test', 'data', "#{name}.html")
 84 |       page = FakePage.new(name,
 85 |                           body: File.open(file).read,
 86 |                           content_type: 'text/html')
 87 |       validator_res = File.join('test', 'data', 'validator.nu-failure.json')
 88 |       stub_request(:any, /#{validator.html5_validator_service_url}/)
 89 |         .to_return(body: File.open(validator_res).read)
 90 |       @validate_website.site = page.url
 91 |       _out, _err = capture_io do
 92 |         @validate_website.crawl(html5_validator: :nu, ignore: /Warning/)
 93 |       end
 94 |       _(@validate_website.errors_count).must_equal 1
 95 |     end
 96 | 
 97 |     it 'crawl when URLs are not ascii only' do
 98 |       name = 'cozy-community'
 99 |       file = File.join('test', 'data', "#{name}.html")
100 |       page = FakePage.new(name,
101 |                           body: File.open(file).read,
102 |                           content_type: 'text/html')
103 |       validator_res = File.join('test', 'data', 'validator.nu-failure.json')
104 |       stub_request(:any, /#{validator.html5_validator_service_url}/)
105 |         .to_return(body: File.open(validator_res).read)
106 |       @validate_website.site = page.url
107 |       _out, _err = capture_io do
108 |         @validate_website.crawl
109 |       end
110 |     end
111 | 
112 |     it 'dont try to extract imgs for redirect' do
113 |       url = 'https://wordpress.org/support/bb-login.php'
114 |       stub_request(:get, url).to_return(
115 |         status: 302,
116 |         headers: {
117 |           'Location' => 'https://login.wordpress.org/',
118 |           'Content-Type' => 'text/html; charset=UTF-8'
119 |         }
120 |       )
121 |       @validate_website.site = url
122 |       _out, _err = capture_io do
123 |         @validate_website.crawl
124 |       end
125 |     end
126 |   end
127 | 
128 |   describe('css') do
129 |     describe 'extract urls' do
130 |       it 'crawl css and extract url' do
131 |         page = FakePage.new('test.css',
132 |                             body: '.t {background-image: url(pouet);}
133 |                                  .t {background-image: url(/image/pouet.png)}
134 |                                  .t {background-image: url(/image/pouet_42.png)}
135 |                                  .t {background-image: url(/image/pouet)}',
136 |                             content_type: 'text/css')
137 |         @validate_website.site = page.url
138 |         _out, _err = capture_io do
139 |           @validate_website.crawl
140 |         end
141 |         _(@validate_website.history_count).must_equal 5
142 |       end
143 | 
144 |       it 'should extract url with single quote' do
145 |         page = FakePage.new('test.css',
146 |                             body: ".test {background-image: url('pouet');}",
147 |                             content_type: 'text/css')
148 |         @validate_website.site = page.url
149 |         _out, _err = capture_io do
150 |           @validate_website.crawl
151 |         end
152 |         _(@validate_website.history_count).must_equal 2
153 |       end
154 | 
155 |       it 'should extract url with double quote' do
156 |         page = FakePage.new('test.css',
157 |                             body: '.test {background-image: url("pouet");}',
158 |                             content_type: 'text/css')
159 |         @validate_website.site = page.url
160 |         _out, _err = capture_io do
161 |           @validate_website.crawl
162 |         end
163 |         _(@validate_website.history_count).must_equal 2
164 |       end
165 | 
166 |       it 'should extract url with params' do
167 |         page = FakePage.new('test.css',
168 |                             body: '.test {background-image: url(/t?size=s);}',
169 |                             content_type: 'text/css')
170 |         @validate_website.site = page.url
171 |         _out, _err = capture_io do
172 |           @validate_website.crawl
173 |         end
174 |         _(@validate_website.history_count).must_equal 2
175 |       end
176 | 
177 |       it 'should not extract invalid urls' do
178 |         page = FakePage.new('test.css',
179 |                             body: '.test {background-image: url(/test.png");}',
180 |                             content_type: 'text/css')
181 |         @validate_website.site = page.url
182 |         _out, _err = capture_io do
183 |           @validate_website.crawl
184 |         end
185 |         _(@validate_website.history_count).must_equal 1
186 |       end
187 |     end
188 | 
189 |     describe 'validate css syntax' do
190 |       before do
191 |         _out, _err = capture_io do
192 |           @validate_website = ValidateWebsite::Crawl.new(color: false,
193 |                                                          css_syntax: true)
194 |         end
195 |       end
196 |       it 'should be invalid with bad urls' do
197 |         page = FakePage.new('test.css',
198 |                             body: '.test {background-image: url(/test.png");}',
199 |                             content_type: 'text/css')
200 |         @validate_website.site = page.url
201 |         _out, _err = capture_io do
202 |           @validate_website.crawl
203 |         end
204 |         _(@validate_website.errors_count).must_equal 1
205 |       end
206 | 
207 |       it 'should be invalid with syntax error' do
208 |         page = FakePage.new('test.css',
209 |                             body: ' /**/ .foo {} #{bar {}',
210 |                             content_type: 'text/css')
211 |         @validate_website.site = page.url
212 |         _out, _err = capture_io do
213 |           @validate_website.crawl
214 |         end
215 |         _(@validate_website.errors_count).must_equal 1
216 |       end
217 |     end
218 |   end
219 | end
220 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-basic11.dtd:
--------------------------------------------------------------------------------
  1 | <!-- XHTML Basic 1.1 DTD  ...................................................... -->
  2 | <!-- file: xhtml-basic11.dtd -->
  3 | 
  4 | <!-- XHTML Basic 1.1 DTD
  5 | 
  6 |      This is XHTML Basic, a proper subset of XHTML.
  7 | 
  8 |      The Extensible HyperText Markup Language (XHTML)
  9 |      Copyright 1998-2007 World Wide Web Consortium
 10 |         (Massachusetts Institute of Technology, European Research Consortium
 11 |          for Informatics and Mathematics, Keio University).
 12 |          All Rights Reserved.
 13 | 
 14 |      Permission to use, copy, modify and distribute the XHTML Basic DTD
 15 |      and its accompanying documentation for any purpose and without fee is
 16 |      hereby granted in perpetuity, provided that the above copyright notice
 17 |      and this paragraph appear in all copies.  The copyright holders make
 18 |      no representation about the suitability of the DTD for any purpose.
 19 | 
 20 |      It is provided "as is" without expressed or implied warranty.
 21 | 
 22 |         Editors:    Murray M. Altheim <mailto:altheim@eng.sun.com>
 23 |                     Peter Stark       <mailto:Peter.Stark@ecs.ericsson.se>
 24 | 					Shane McCarron    <mailto:shane@aptest.com>
 25 |         Revision:   $Id: xhtml-basic11.dtd,v 1.1 2008/07/26 17:02:27 jules Exp $
 26 | 
 27 | -->
 28 | <!-- This is the driver file for version 1.1 of the XHTML Basic DTD.
 29 | 
 30 |      This DTD is identified by the PUBLIC and SYSTEM identifiers:
 31 | 
 32 |      PUBLIC: "-//W3C//DTD XHTML Basic 1.1//EN"
 33 |      SYSTEM: "http://www.w3.org/MarkUp/DTD/xhtml-basic11.dtd"
 34 | -->
 35 | <!ENTITY % XHTML.version  "-//W3C//DTD XHTML Basic 1.1//EN" >
 36 | 
 37 | <!-- Use this URI to identify the default namespace:
 38 | 
 39 |          "http://www.w3.org/1999/xhtml"
 40 | 
 41 |      See the Qualified Names module for information
 42 |      on the use of namespace prefixes in the DTD.
 43 | -->
 44 | <!ENTITY % NS.prefixed "IGNORE" >
 45 | <!ENTITY % XHTML.prefix  "" >
 46 | 
 47 | <!-- Reserved for use with the XLink namespace:
 48 | -->
 49 | <!ENTITY % XLINK.xmlns "" >
 50 | <!ENTITY % XLINK.xmlns.attrib "" >
 51 | 
 52 | <!-- For example, if you are using XHTML Basic 1.1 directly, use
 53 |      the public identifier in the DOCTYPE declaration, with the namespace
 54 |      declaration on the document element to identify the default namespace:
 55 | 
 56 |          <?xml version="1.0"?>
 57 |          <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML Basic 1.1//EN"
 58 |              "http://www.w3.org/MarkUp/DTD/xhtml-basic11.dtd" >
 59 |          <html xmlns="http://www.w3.org/1999/xhtml"
 60 |                xml:lang="en" >
 61 |          ...
 62 |          </html>
 63 | -->
 64 | 
 65 | <!-- reserved for future use with document profiles -->
 66 | <!ENTITY % XHTML.profile  "" >
 67 | 
 68 | <!-- Bidirectional Text features
 69 |      This feature-test entity is used to declare elements
 70 |      and attributes used for bidirectional text support.
 71 | -->
 72 | <!ENTITY % XHTML.bidi  "IGNORE" >
 73 | 
 74 | <?doc type="doctype" role="title" { XHTML Basic 1.1 } ?>
 75 | 
 76 | <!-- :::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -->
 77 | 
 78 | <!ENTITY % xhtml-events.module   "INCLUDE" >
 79 | <!ENTITY % xhtml-bdo.module      "%XHTML.bidi;" >
 80 | 
 81 | <!-- Inline Style Module  ........................................ -->
 82 | <!ENTITY % xhtml-inlstyle.module "INCLUDE" >
 83 | <![%xhtml-inlstyle.module;[
 84 | <!ENTITY % xhtml-inlstyle.mod
 85 |      PUBLIC "-//W3C//ELEMENTS XHTML Inline Style 1.0//EN"
 86 |             "http://www.w3.org/MarkUp/DTD/xhtml-inlstyle-1.mod" >
 87 | %xhtml-inlstyle.mod;]]>
 88 | 
 89 | 
 90 | <!ENTITY % xhtml-model.mod
 91 |      PUBLIC "-//W3C//ENTITIES XHTML Basic 1.1 Document Model 1.0//EN"
 92 |             "http://www.w3.org/MarkUp/DTD/xhtml-basic11-model-1.mod" >
 93 | 
 94 | <!ENTITY % xhtml-framework.mod
 95 |      PUBLIC "-//W3C//ENTITIES XHTML Modular Framework 1.0//EN"
 96 |             "http://www.w3.org/MarkUp/DTD/xhtml-framework-1.mod" >
 97 | %xhtml-framework.mod;
 98 | 
 99 | <!ENTITY % pre.content
100 |      "( #PCDATA
101 |       | %InlStruct.class;
102 |       %InlPhras.class;
103 |       %Anchor.class;
104 |       %Inline.extra; )*"
105 | >
106 | 
107 | <!ENTITY % xhtml-text.mod
108 |      PUBLIC "-//W3C//ELEMENTS XHTML Text 1.0//EN"
109 |             "http://www.w3.org/MarkUp/DTD/xhtml-text-1.mod" >
110 | %xhtml-text.mod;
111 | 
112 | <!ENTITY % xhtml-hypertext.mod
113 |      PUBLIC "-//W3C//ELEMENTS XHTML Hypertext 1.0//EN"
114 |             "http://www.w3.org/MarkUp/DTD/xhtml-hypertext-1.mod" >
115 | %xhtml-hypertext.mod;
116 | 
117 | <!ENTITY % xhtml-list.mod
118 |      PUBLIC "-//W3C//ELEMENTS XHTML Lists 1.0//EN"
119 |             "http://www.w3.org/MarkUp/DTD/xhtml-list-1.mod" >
120 | %xhtml-list.mod;
121 | 
122 | <!-- Add in the value attribute to the li element -->
123 | <!ATTLIST %li.qname;
124 |       value        %Number.datatype;         #IMPLIED
125 | >
126 | 
127 | <!-- ::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: -->
128 | 
129 | <!-- Scripting Module  ........................................... -->
130 | <!ENTITY % xhtml-script.module "INCLUDE" >
131 | <![%xhtml-script.module;[
132 | <!ENTITY % xhtml-script.mod
133 |      PUBLIC "-//W3C//ELEMENTS XHTML Scripting 1.0//EN"
134 |             "http://www.w3.org/MarkUp/DTD/xhtml-script-1.mod" >
135 | %xhtml-script.mod;]]>
136 | 
137 | <!-- Style Sheets Module  ......................................... -->
138 | <!ENTITY % xhtml-style.module "INCLUDE" >
139 | <![%xhtml-style.module;[
140 | <!ENTITY % xhtml-style.mod
141 |      PUBLIC "-//W3C//ELEMENTS XHTML Style Sheets 1.0//EN"
142 |             "http://www.w3.org/MarkUp/DTD/xhtml-style-1.mod" >
143 | %xhtml-style.mod;]]>
144 | 
145 | <!-- Image Module  ............................................... -->
146 | <!ENTITY % xhtml-image.module "INCLUDE" >
147 | <![%xhtml-image.module;[
148 | <!ENTITY % xhtml-image.mod
149 |      PUBLIC "-//W3C//ELEMENTS XHTML Images 1.0//EN"
150 |             "http://www.w3.org/MarkUp/DTD/xhtml-image-1.mod" >
151 | %xhtml-image.mod;]]>
152 | 
153 | <!-- Tables Module ............................................... -->
154 | <!ENTITY % xhtml-table.module "INCLUDE" >
155 | <![%xhtml-table.module;[
156 | <!ENTITY % xhtml-table.mod
157 |      PUBLIC "-//W3C//ELEMENTS XHTML Basic Tables 1.0//EN"
158 |             "http://www.w3.org/MarkUp/DTD/xhtml-basic-table-1.mod" >
159 | %xhtml-table.mod;]]>
160 | 
161 | <!-- Forms Module  ............................................... -->
162 | <!ENTITY % xhtml-form.module "INCLUDE" >
163 | <![%xhtml-form.module;[
164 | <!ENTITY % xhtml-form.mod
165 |      PUBLIC "-//W3C//ELEMENTS XHTML Forms 1.0//EN"
166 |             "http://www.w3.org/MarkUp/DTD/xhtml-form-1.mod" >
167 | %xhtml-form.mod;]]>
168 | 
169 | <!-- Presentation Module  ........................................ -->
170 | <!ENTITY % xhtml-pres.module "INCLUDE" >
171 | <![%xhtml-pres.module;[
172 | <!ENTITY % xhtml-pres.mod
173 |      PUBLIC "-//W3C//ELEMENTS XHTML Presentation 1.0//EN"
174 |             "http://www.w3.org/MarkUp/DTD/xhtml-pres-1.mod" >
175 | %xhtml-pres.mod;]]>
176 | 
177 | <!-- Link Element Module  ........................................ -->
178 | <!ENTITY % xhtml-link.module "INCLUDE" >
179 | <![%xhtml-link.module;[
180 | <!ENTITY % xhtml-link.mod
181 |      PUBLIC "-//W3C//ELEMENTS XHTML Link Element 1.0//EN"
182 |             "http://www.w3.org/MarkUp/DTD/xhtml-link-1.mod" >
183 | %xhtml-link.mod;]]>
184 | 
185 | <!-- Document Metainformation Module  ............................ -->
186 | <!ENTITY % xhtml-meta.module "INCLUDE" >
187 | <![%xhtml-meta.module;[
188 | <!ENTITY % xhtml-meta.mod
189 |      PUBLIC "-//W3C//ELEMENTS XHTML Metainformation 1.0//EN"
190 |             "http://www.w3.org/MarkUp/DTD/xhtml-meta-1.mod" >
191 | %xhtml-meta.mod;]]>
192 | 
193 | <!-- Base Element Module  ........................................ -->
194 | <!ENTITY % xhtml-base.module "INCLUDE" >
195 | <![%xhtml-base.module;[
196 | <!ENTITY % xhtml-base.mod
197 |      PUBLIC "-//W3C//ELEMENTS XHTML Base Element 1.0//EN"
198 |             "http://www.w3.org/MarkUp/DTD/xhtml-base-1.mod" >
199 | %xhtml-base.mod;]]>
200 | 
201 | <!-- Param Element Module  ....................................... -->
202 | <!ENTITY % xhtml-param.module "INCLUDE" >
203 | <![%xhtml-param.module;[
204 | <!ENTITY % xhtml-param.mod
205 |      PUBLIC "-//W3C//ELEMENTS XHTML Param Element 1.0//EN"
206 |             "http://www.w3.org/MarkUp/DTD/xhtml-param-1.mod" >
207 | %xhtml-param.mod;]]>
208 | 
209 | <!-- Embedded Object Module  ..................................... -->
210 | <!ENTITY % xhtml-object.module "INCLUDE" >
211 | <![%xhtml-object.module;[
212 | <!ENTITY % xhtml-object.mod
213 |      PUBLIC "-//W3C//ELEMENTS XHTML Embedded Object 1.0//EN"
214 |             "http://www.w3.org/MarkUp/DTD/xhtml-object-1.mod" >
215 | %xhtml-object.mod;]]>
216 | 
217 | <!-- Inputmode Attribute Module  .................................. -->
218 | <!ENTITY % xhtml-inputmode.module "INCLUDE" >
219 | <![%xhtml-inputmode.module;[
220 | <!ENTITY % xhtml-inputmode.mod
221 |      PUBLIC "-//W3C//ELEMENTS XHTML Inputmode 1.0//EN"
222 |             "http://www.w3.org/MarkUp/DTD/xhtml-inputmode-1.mod" >
223 | %xhtml-inputmode.mod;]]>
224 | 
225 | <!-- Target Attribute Module  .................................... -->
226 | <!ENTITY % xhtml-target.module "INCLUDE" >
227 | <![%xhtml-target.module;[
228 | <!ENTITY % xhtml-target.mod
229 |      PUBLIC "-//W3C//ELEMENTS XHTML Target 1.0//EN"
230 |             "http://www.w3.org/MarkUp/DTD/xhtml-target-1.mod" >
231 | %xhtml-target.mod;]]>
232 | 
233 | 
234 | <!ENTITY % xhtml-struct.mod
235 |      PUBLIC "-//W3C//ELEMENTS XHTML Document Structure 1.0//EN"
236 |             "http://www.w3.org/MarkUp/DTD/xhtml-struct-1.mod" >
237 | %xhtml-struct.mod;
238 | 
239 | <!-- end of XHTML Basic 1.1 DTD  ........................................... -->
240 | 


--------------------------------------------------------------------------------
/data/schemas/xml.xsd:
--------------------------------------------------------------------------------
  1 | <?xml version='1.0'?>
  2 | <?xml-stylesheet href="../2008/09/xsd.xsl" type="text/xsl"?>
  3 | <xs:schema targetNamespace="http://www.w3.org/XML/1998/namespace" 
  4 |   xmlns:xs="http://www.w3.org/2001/XMLSchema" 
  5 |   xmlns   ="http://www.w3.org/1999/xhtml"
  6 |   xml:lang="en">
  7 | 
  8 |  <xs:annotation>
  9 |   <xs:documentation>
 10 |    <div>
 11 |     <h1>About the XML namespace</h1>
 12 | 
 13 |     <div class="bodytext">
 14 |      <p>
 15 |       This schema document describes the XML namespace, in a form
 16 |       suitable for import by other schema documents.
 17 |      </p>
 18 |      <p>
 19 |       See <a href="http://www.w3.org/XML/1998/namespace.html">
 20 |       http://www.w3.org/XML/1998/namespace.html</a> and
 21 |       <a href="http://www.w3.org/TR/REC-xml">
 22 |       http://www.w3.org/TR/REC-xml</a> for information 
 23 |       about this namespace.
 24 |      </p>
 25 |      <p>
 26 |       Note that local names in this namespace are intended to be
 27 |       defined only by the World Wide Web Consortium or its subgroups.
 28 |       The names currently defined in this namespace are listed below.
 29 |       They should not be used with conflicting semantics by any Working
 30 |       Group, specification, or document instance.
 31 |      </p>
 32 |      <p>   
 33 |       See further below in this document for more information about <a
 34 |       href="#usage">how to refer to this schema document from your own
 35 |       XSD schema documents</a> and about <a href="#nsversioning">the
 36 |       namespace-versioning policy governing this schema document</a>.
 37 |      </p>
 38 |     </div>
 39 |    </div>
 40 |   </xs:documentation>
 41 |  </xs:annotation>
 42 | 
 43 |  <xs:attribute name="lang">
 44 |   <xs:annotation>
 45 |    <xs:documentation>
 46 |     <div>
 47 |      
 48 |       <h3>lang (as an attribute name)</h3>
 49 |       <p>
 50 |        denotes an attribute whose value
 51 |        is a language code for the natural language of the content of
 52 |        any element; its value is inherited.  This name is reserved
 53 |        by virtue of its definition in the XML specification.</p>
 54 |      
 55 |     </div>
 56 |     <div>
 57 |      <h4>Notes</h4>
 58 |      <p>
 59 |       Attempting to install the relevant ISO 2- and 3-letter
 60 |       codes as the enumerated possible values is probably never
 61 |       going to be a realistic possibility.  
 62 |      </p>
 63 |      <p>
 64 |       See BCP 47 at <a href="http://www.rfc-editor.org/rfc/bcp/bcp47.txt">
 65 |        http://www.rfc-editor.org/rfc/bcp/bcp47.txt</a>
 66 |       and the IANA language subtag registry at
 67 |       <a href="http://www.iana.org/assignments/language-subtag-registry">
 68 |        http://www.iana.org/assignments/language-subtag-registry</a>
 69 |       for further information.
 70 |      </p>
 71 |      <p>
 72 |       The union allows for the 'un-declaration' of xml:lang with
 73 |       the empty string.
 74 |      </p>
 75 |     </div>
 76 |    </xs:documentation>
 77 |   </xs:annotation>
 78 |   <xs:simpleType>
 79 |    <xs:union memberTypes="xs:language">
 80 |     <xs:simpleType>    
 81 |      <xs:restriction base="xs:string">
 82 |       <xs:enumeration value=""/>
 83 |      </xs:restriction>
 84 |     </xs:simpleType>
 85 |    </xs:union>
 86 |   </xs:simpleType>
 87 |  </xs:attribute>
 88 | 
 89 |  <xs:attribute name="space">
 90 |   <xs:annotation>
 91 |    <xs:documentation>
 92 |     <div>
 93 |      
 94 |       <h3>space (as an attribute name)</h3>
 95 |       <p>
 96 |        denotes an attribute whose
 97 |        value is a keyword indicating what whitespace processing
 98 |        discipline is intended for the content of the element; its
 99 |        value is inherited.  This name is reserved by virtue of its
100 |        definition in the XML specification.</p>
101 |      
102 |     </div>
103 |    </xs:documentation>
104 |   </xs:annotation>
105 |   <xs:simpleType>
106 |    <xs:restriction base="xs:NCName">
107 |     <xs:enumeration value="default"/>
108 |     <xs:enumeration value="preserve"/>
109 |    </xs:restriction>
110 |   </xs:simpleType>
111 |  </xs:attribute>
112 |  
113 |  <xs:attribute name="base" type="xs:anyURI"> <xs:annotation>
114 |    <xs:documentation>
115 |     <div>
116 |      
117 |       <h3>base (as an attribute name)</h3>
118 |       <p>
119 |        denotes an attribute whose value
120 |        provides a URI to be used as the base for interpreting any
121 |        relative URIs in the scope of the element on which it
122 |        appears; its value is inherited.  This name is reserved
123 |        by virtue of its definition in the XML Base specification.</p>
124 |      
125 |      <p>
126 |       See <a
127 |       href="http://www.w3.org/TR/xmlbase/">http://www.w3.org/TR/xmlbase/</a>
128 |       for information about this attribute.
129 |      </p>
130 |     </div>
131 |    </xs:documentation>
132 |   </xs:annotation>
133 |  </xs:attribute>
134 |  
135 |  <xs:attribute name="id" type="xs:ID">
136 |   <xs:annotation>
137 |    <xs:documentation>
138 |     <div>
139 |      
140 |       <h3>id (as an attribute name)</h3> 
141 |       <p>
142 |        denotes an attribute whose value
143 |        should be interpreted as if declared to be of type ID.
144 |        This name is reserved by virtue of its definition in the
145 |        xml:id specification.</p>
146 |      
147 |      <p>
148 |       See <a
149 |       href="http://www.w3.org/TR/xml-id/">http://www.w3.org/TR/xml-id/</a>
150 |       for information about this attribute.
151 |      </p>
152 |     </div>
153 |    </xs:documentation>
154 |   </xs:annotation>
155 |  </xs:attribute>
156 | 
157 |  <xs:attributeGroup name="specialAttrs">
158 |   <xs:attribute ref="xml:base"/>
159 |   <xs:attribute ref="xml:lang"/>
160 |   <xs:attribute ref="xml:space"/>
161 |   <xs:attribute ref="xml:id"/>
162 |  </xs:attributeGroup>
163 | 
164 |  <xs:annotation>
165 |   <xs:documentation>
166 |    <div>
167 |    
168 |     <h3>Father (in any context at all)</h3> 
169 | 
170 |     <div class="bodytext">
171 |      <p>
172 |       denotes Jon Bosak, the chair of 
173 |       the original XML Working Group.  This name is reserved by 
174 |       the following decision of the W3C XML Plenary and 
175 |       XML Coordination groups:
176 |      </p>
177 |      <blockquote>
178 |        <p>
179 | 	In appreciation for his vision, leadership and
180 | 	dedication the W3C XML Plenary on this 10th day of
181 | 	February, 2000, reserves for Jon Bosak in perpetuity
182 | 	the XML name "xml:Father".
183 |        </p>
184 |      </blockquote>
185 |     </div>
186 |    </div>
187 |   </xs:documentation>
188 |  </xs:annotation>
189 | 
190 |  <xs:annotation>
191 |   <xs:documentation>
192 |    <div xml:id="usage" id="usage">
193 |     <h2><a name="usage">About this schema document</a></h2>
194 | 
195 |     <div class="bodytext">
196 |      <p>
197 |       This schema defines attributes and an attribute group suitable
198 |       for use by schemas wishing to allow <code>xml:base</code>,
199 |       <code>xml:lang</code>, <code>xml:space</code> or
200 |       <code>xml:id</code> attributes on elements they define.
201 |      </p>
202 |      <p>
203 |       To enable this, such a schema must import this schema for
204 |       the XML namespace, e.g. as follows:
205 |      </p>
206 |      <pre>
207 |           &lt;schema . . .>
208 |            . . .
209 |            &lt;import namespace="http://www.w3.org/XML/1998/namespace"
210 |                       schemaLocation="http://www.w3.org/2001/xml.xsd"/>
211 |      </pre>
212 |      <p>
213 |       or
214 |      </p>
215 |      <pre>
216 |            &lt;import namespace="http://www.w3.org/XML/1998/namespace"
217 |                       schemaLocation="http://www.w3.org/2009/01/xml.xsd"/>
218 |      </pre>
219 |      <p>
220 |       Subsequently, qualified reference to any of the attributes or the
221 |       group defined below will have the desired effect, e.g.
222 |      </p>
223 |      <pre>
224 |           &lt;type . . .>
225 |            . . .
226 |            &lt;attributeGroup ref="xml:specialAttrs"/>
227 |      </pre>
228 |      <p>
229 |       will define a type which will schema-validate an instance element
230 |       with any of those attributes.
231 |      </p>
232 |     </div>
233 |    </div>
234 |   </xs:documentation>
235 |  </xs:annotation>
236 | 
237 |  <xs:annotation>
238 |   <xs:documentation>
239 |    <div id="nsversioning" xml:id="nsversioning">
240 |     <h2><a name="nsversioning">Versioning policy for this schema document</a></h2>
241 |     <div class="bodytext">
242 |      <p>
243 |       In keeping with the XML Schema WG's standard versioning
244 |       policy, this schema document will persist at
245 |       <a href="http://www.w3.org/2009/01/xml.xsd">
246 |        http://www.w3.org/2009/01/xml.xsd</a>.
247 |      </p>
248 |      <p>
249 |       At the date of issue it can also be found at
250 |       <a href="http://www.w3.org/2001/xml.xsd">
251 |        http://www.w3.org/2001/xml.xsd</a>.
252 |      </p>
253 |      <p>
254 |       The schema document at that URI may however change in the future,
255 |       in order to remain compatible with the latest version of XML
256 |       Schema itself, or with the XML namespace itself.  In other words,
257 |       if the XML Schema or XML namespaces change, the version of this
258 |       document at <a href="http://www.w3.org/2001/xml.xsd">
259 |        http://www.w3.org/2001/xml.xsd 
260 |       </a> 
261 |       will change accordingly; the version at 
262 |       <a href="http://www.w3.org/2009/01/xml.xsd">
263 |        http://www.w3.org/2009/01/xml.xsd 
264 |       </a> 
265 |       will not change.
266 |      </p>
267 |      <p>
268 |       Previous dated (and unchanging) versions of this schema 
269 |       document are at:
270 |      </p>
271 |      <ul>
272 |       <li><a href="http://www.w3.org/2009/01/xml.xsd">
273 | 	http://www.w3.org/2009/01/xml.xsd</a></li>
274 |       <li><a href="http://www.w3.org/2007/08/xml.xsd">
275 | 	http://www.w3.org/2007/08/xml.xsd</a></li>
276 |       <li><a href="http://www.w3.org/2004/10/xml.xsd">
277 | 	http://www.w3.org/2004/10/xml.xsd</a></li>
278 |       <li><a href="http://www.w3.org/2001/03/xml.xsd">
279 | 	http://www.w3.org/2001/03/xml.xsd</a></li>
280 |      </ul>
281 |     </div>
282 |    </div>
283 |   </xs:documentation>
284 |  </xs:annotation>
285 | 
286 | </xs:schema>


--------------------------------------------------------------------------------
/data/schemas/xhtml-lat1.ent:
--------------------------------------------------------------------------------
  1 | <!-- Portions (C) International Organization for Standardization 1986
  2 |      Permission to copy in any form is granted for use with
  3 |      conforming SGML systems and applications as defined in
  4 |      ISO 8879, provided this notice is included in all copies.
  5 | -->
  6 | <!-- Character entity set. Typical invocation:
  7 |     <!ENTITY % HTMLlat1 PUBLIC
  8 |        "-//W3C//ENTITIES Latin 1 for XHTML//EN"
  9 |        "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">
 10 |     %HTMLlat1;
 11 | -->
 12 | 
 13 | <!ENTITY nbsp   "&#160;"> <!-- no-break space = non-breaking space,
 14 |                                   U+00A0 ISOnum -->
 15 | <!ENTITY iexcl  "&#161;"> <!-- inverted exclamation mark, U+00A1 ISOnum -->
 16 | <!ENTITY cent   "&#162;"> <!-- cent sign, U+00A2 ISOnum -->
 17 | <!ENTITY pound  "&#163;"> <!-- pound sign, U+00A3 ISOnum -->
 18 | <!ENTITY curren "&#164;"> <!-- currency sign, U+00A4 ISOnum -->
 19 | <!ENTITY yen    "&#165;"> <!-- yen sign = yuan sign, U+00A5 ISOnum -->
 20 | <!ENTITY brvbar "&#166;"> <!-- broken bar = broken vertical bar,
 21 |                                   U+00A6 ISOnum -->
 22 | <!ENTITY sect   "&#167;"> <!-- section sign, U+00A7 ISOnum -->
 23 | <!ENTITY uml    "&#168;"> <!-- diaeresis = spacing diaeresis,
 24 |                                   U+00A8 ISOdia -->
 25 | <!ENTITY copy   "&#169;"> <!-- copyright sign, U+00A9 ISOnum -->
 26 | <!ENTITY ordf   "&#170;"> <!-- feminine ordinal indicator, U+00AA ISOnum -->
 27 | <!ENTITY laquo  "&#171;"> <!-- left-pointing double angle quotation mark
 28 |                                   = left pointing guillemet, U+00AB ISOnum -->
 29 | <!ENTITY not    "&#172;"> <!-- not sign = angled dash,
 30 |                                   U+00AC ISOnum -->
 31 | <!ENTITY shy    "&#173;"> <!-- soft hyphen = discretionary hyphen,
 32 |                                   U+00AD ISOnum -->
 33 | <!ENTITY reg    "&#174;"> <!-- registered sign = registered trade mark sign,
 34 |                                   U+00AE ISOnum -->
 35 | <!ENTITY macr   "&#175;"> <!-- macron = spacing macron = overline
 36 |                                   = APL overbar, U+00AF ISOdia -->
 37 | <!ENTITY deg    "&#176;"> <!-- degree sign, U+00B0 ISOnum -->
 38 | <!ENTITY plusmn "&#177;"> <!-- plus-minus sign = plus-or-minus sign,
 39 |                                   U+00B1 ISOnum -->
 40 | <!ENTITY sup2   "&#178;"> <!-- superscript two = superscript digit two
 41 |                                   = squared, U+00B2 ISOnum -->
 42 | <!ENTITY sup3   "&#179;"> <!-- superscript three = superscript digit three
 43 |                                   = cubed, U+00B3 ISOnum -->
 44 | <!ENTITY acute  "&#180;"> <!-- acute accent = spacing acute,
 45 |                                   U+00B4 ISOdia -->
 46 | <!ENTITY micro  "&#181;"> <!-- micro sign, U+00B5 ISOnum -->
 47 | <!ENTITY para   "&#182;"> <!-- pilcrow sign = paragraph sign,
 48 |                                   U+00B6 ISOnum -->
 49 | <!ENTITY middot "&#183;"> <!-- middle dot = Georgian comma
 50 |                                   = Greek middle dot, U+00B7 ISOnum -->
 51 | <!ENTITY cedil  "&#184;"> <!-- cedilla = spacing cedilla, U+00B8 ISOdia -->
 52 | <!ENTITY sup1   "&#185;"> <!-- superscript one = superscript digit one,
 53 |                                   U+00B9 ISOnum -->
 54 | <!ENTITY ordm   "&#186;"> <!-- masculine ordinal indicator,
 55 |                                   U+00BA ISOnum -->
 56 | <!ENTITY raquo  "&#187;"> <!-- right-pointing double angle quotation mark
 57 |                                   = right pointing guillemet, U+00BB ISOnum -->
 58 | <!ENTITY frac14 "&#188;"> <!-- vulgar fraction one quarter
 59 |                                   = fraction one quarter, U+00BC ISOnum -->
 60 | <!ENTITY frac12 "&#189;"> <!-- vulgar fraction one half
 61 |                                   = fraction one half, U+00BD ISOnum -->
 62 | <!ENTITY frac34 "&#190;"> <!-- vulgar fraction three quarters
 63 |                                   = fraction three quarters, U+00BE ISOnum -->
 64 | <!ENTITY iquest "&#191;"> <!-- inverted question mark
 65 |                                   = turned question mark, U+00BF ISOnum -->
 66 | <!ENTITY Agrave "&#192;"> <!-- latin capital letter A with grave
 67 |                                   = latin capital letter A grave,
 68 |                                   U+00C0 ISOlat1 -->
 69 | <!ENTITY Aacute "&#193;"> <!-- latin capital letter A with acute,
 70 |                                   U+00C1 ISOlat1 -->
 71 | <!ENTITY Acirc  "&#194;"> <!-- latin capital letter A with circumflex,
 72 |                                   U+00C2 ISOlat1 -->
 73 | <!ENTITY Atilde "&#195;"> <!-- latin capital letter A with tilde,
 74 |                                   U+00C3 ISOlat1 -->
 75 | <!ENTITY Auml   "&#196;"> <!-- latin capital letter A with diaeresis,
 76 |                                   U+00C4 ISOlat1 -->
 77 | <!ENTITY Aring  "&#197;"> <!-- latin capital letter A with ring above
 78 |                                   = latin capital letter A ring,
 79 |                                   U+00C5 ISOlat1 -->
 80 | <!ENTITY AElig  "&#198;"> <!-- latin capital letter AE
 81 |                                   = latin capital ligature AE,
 82 |                                   U+00C6 ISOlat1 -->
 83 | <!ENTITY Ccedil "&#199;"> <!-- latin capital letter C with cedilla,
 84 |                                   U+00C7 ISOlat1 -->
 85 | <!ENTITY Egrave "&#200;"> <!-- latin capital letter E with grave,
 86 |                                   U+00C8 ISOlat1 -->
 87 | <!ENTITY Eacute "&#201;"> <!-- latin capital letter E with acute,
 88 |                                   U+00C9 ISOlat1 -->
 89 | <!ENTITY Ecirc  "&#202;"> <!-- latin capital letter E with circumflex,
 90 |                                   U+00CA ISOlat1 -->
 91 | <!ENTITY Euml   "&#203;"> <!-- latin capital letter E with diaeresis,
 92 |                                   U+00CB ISOlat1 -->
 93 | <!ENTITY Igrave "&#204;"> <!-- latin capital letter I with grave,
 94 |                                   U+00CC ISOlat1 -->
 95 | <!ENTITY Iacute "&#205;"> <!-- latin capital letter I with acute,
 96 |                                   U+00CD ISOlat1 -->
 97 | <!ENTITY Icirc  "&#206;"> <!-- latin capital letter I with circumflex,
 98 |                                   U+00CE ISOlat1 -->
 99 | <!ENTITY Iuml   "&#207;"> <!-- latin capital letter I with diaeresis,
100 |                                   U+00CF ISOlat1 -->
101 | <!ENTITY ETH    "&#208;"> <!-- latin capital letter ETH, U+00D0 ISOlat1 -->
102 | <!ENTITY Ntilde "&#209;"> <!-- latin capital letter N with tilde,
103 |                                   U+00D1 ISOlat1 -->
104 | <!ENTITY Ograve "&#210;"> <!-- latin capital letter O with grave,
105 |                                   U+00D2 ISOlat1 -->
106 | <!ENTITY Oacute "&#211;"> <!-- latin capital letter O with acute,
107 |                                   U+00D3 ISOlat1 -->
108 | <!ENTITY Ocirc  "&#212;"> <!-- latin capital letter O with circumflex,
109 |                                   U+00D4 ISOlat1 -->
110 | <!ENTITY Otilde "&#213;"> <!-- latin capital letter O with tilde,
111 |                                   U+00D5 ISOlat1 -->
112 | <!ENTITY Ouml   "&#214;"> <!-- latin capital letter O with diaeresis,
113 |                                   U+00D6 ISOlat1 -->
114 | <!ENTITY times  "&#215;"> <!-- multiplication sign, U+00D7 ISOnum -->
115 | <!ENTITY Oslash "&#216;"> <!-- latin capital letter O with stroke
116 |                                   = latin capital letter O slash,
117 |                                   U+00D8 ISOlat1 -->
118 | <!ENTITY Ugrave "&#217;"> <!-- latin capital letter U with grave,
119 |                                   U+00D9 ISOlat1 -->
120 | <!ENTITY Uacute "&#218;"> <!-- latin capital letter U with acute,
121 |                                   U+00DA ISOlat1 -->
122 | <!ENTITY Ucirc  "&#219;"> <!-- latin capital letter U with circumflex,
123 |                                   U+00DB ISOlat1 -->
124 | <!ENTITY Uuml   "&#220;"> <!-- latin capital letter U with diaeresis,
125 |                                   U+00DC ISOlat1 -->
126 | <!ENTITY Yacute "&#221;"> <!-- latin capital letter Y with acute,
127 |                                   U+00DD ISOlat1 -->
128 | <!ENTITY THORN  "&#222;"> <!-- latin capital letter THORN,
129 |                                   U+00DE ISOlat1 -->
130 | <!ENTITY szlig  "&#223;"> <!-- latin small letter sharp s = ess-zed,
131 |                                   U+00DF ISOlat1 -->
132 | <!ENTITY agrave "&#224;"> <!-- latin small letter a with grave
133 |                                   = latin small letter a grave,
134 |                                   U+00E0 ISOlat1 -->
135 | <!ENTITY aacute "&#225;"> <!-- latin small letter a with acute,
136 |                                   U+00E1 ISOlat1 -->
137 | <!ENTITY acirc  "&#226;"> <!-- latin small letter a with circumflex,
138 |                                   U+00E2 ISOlat1 -->
139 | <!ENTITY atilde "&#227;"> <!-- latin small letter a with tilde,
140 |                                   U+00E3 ISOlat1 -->
141 | <!ENTITY auml   "&#228;"> <!-- latin small letter a with diaeresis,
142 |                                   U+00E4 ISOlat1 -->
143 | <!ENTITY aring  "&#229;"> <!-- latin small letter a with ring above
144 |                                   = latin small letter a ring,
145 |                                   U+00E5 ISOlat1 -->
146 | <!ENTITY aelig  "&#230;"> <!-- latin small letter ae
147 |                                   = latin small ligature ae, U+00E6 ISOlat1 -->
148 | <!ENTITY ccedil "&#231;"> <!-- latin small letter c with cedilla,
149 |                                   U+00E7 ISOlat1 -->
150 | <!ENTITY egrave "&#232;"> <!-- latin small letter e with grave,
151 |                                   U+00E8 ISOlat1 -->
152 | <!ENTITY eacute "&#233;"> <!-- latin small letter e with acute,
153 |                                   U+00E9 ISOlat1 -->
154 | <!ENTITY ecirc  "&#234;"> <!-- latin small letter e with circumflex,
155 |                                   U+00EA ISOlat1 -->
156 | <!ENTITY euml   "&#235;"> <!-- latin small letter e with diaeresis,
157 |                                   U+00EB ISOlat1 -->
158 | <!ENTITY igrave "&#236;"> <!-- latin small letter i with grave,
159 |                                   U+00EC ISOlat1 -->
160 | <!ENTITY iacute "&#237;"> <!-- latin small letter i with acute,
161 |                                   U+00ED ISOlat1 -->
162 | <!ENTITY icirc  "&#238;"> <!-- latin small letter i with circumflex,
163 |                                   U+00EE ISOlat1 -->
164 | <!ENTITY iuml   "&#239;"> <!-- latin small letter i with diaeresis,
165 |                                   U+00EF ISOlat1 -->
166 | <!ENTITY eth    "&#240;"> <!-- latin small letter eth, U+00F0 ISOlat1 -->
167 | <!ENTITY ntilde "&#241;"> <!-- latin small letter n with tilde,
168 |                                   U+00F1 ISOlat1 -->
169 | <!ENTITY ograve "&#242;"> <!-- latin small letter o with grave,
170 |                                   U+00F2 ISOlat1 -->
171 | <!ENTITY oacute "&#243;"> <!-- latin small letter o with acute,
172 |                                   U+00F3 ISOlat1 -->
173 | <!ENTITY ocirc  "&#244;"> <!-- latin small letter o with circumflex,
174 |                                   U+00F4 ISOlat1 -->
175 | <!ENTITY otilde "&#245;"> <!-- latin small letter o with tilde,
176 |                                   U+00F5 ISOlat1 -->
177 | <!ENTITY ouml   "&#246;"> <!-- latin small letter o with diaeresis,
178 |                                   U+00F6 ISOlat1 -->
179 | <!ENTITY divide "&#247;"> <!-- division sign, U+00F7 ISOnum -->
180 | <!ENTITY oslash "&#248;"> <!-- latin small letter o with stroke,
181 |                                   = latin small letter o slash,
182 |                                   U+00F8 ISOlat1 -->
183 | <!ENTITY ugrave "&#249;"> <!-- latin small letter u with grave,
184 |                                   U+00F9 ISOlat1 -->
185 | <!ENTITY uacute "&#250;"> <!-- latin small letter u with acute,
186 |                                   U+00FA ISOlat1 -->
187 | <!ENTITY ucirc  "&#251;"> <!-- latin small letter u with circumflex,
188 |                                   U+00FB ISOlat1 -->
189 | <!ENTITY uuml   "&#252;"> <!-- latin small letter u with diaeresis,
190 |                                   U+00FC ISOlat1 -->
191 | <!ENTITY yacute "&#253;"> <!-- latin small letter y with acute,
192 |                                   U+00FD ISOlat1 -->
193 | <!ENTITY thorn  "&#254;"> <!-- latin small letter thorn,
194 |                                   U+00FE ISOlat1 -->
195 | <!ENTITY yuml   "&#255;"> <!-- latin small letter y with diaeresis,
196 |                                   U+00FF ISOlat1 -->
197 | 


--------------------------------------------------------------------------------
/data/schemas/xhtml-symbol.ent:
--------------------------------------------------------------------------------
  1 | <!-- Mathematical, Greek and Symbolic characters for XHTML -->
  2 | 
  3 | <!-- Character entity set. Typical invocation:
  4 |      <!ENTITY % HTMLsymbol PUBLIC
  5 |         "-//W3C//ENTITIES Symbols for XHTML//EN"
  6 |         "http://www.w3.org/TR/xhtml1/DTD/xhtml-symbol.ent">
  7 |      %HTMLsymbol;
  8 | -->
  9 | 
 10 | <!-- Portions (C) International Organization for Standardization 1986:
 11 |      Permission to copy in any form is granted for use with
 12 |      conforming SGML systems and applications as defined in
 13 |      ISO 8879, provided this notice is included in all copies.
 14 | -->
 15 | 
 16 | <!-- Relevant ISO entity set is given unless names are newly introduced.
 17 |      New names (i.e., not in ISO 8879 list) do not clash with any
 18 |      existing ISO 8879 entity names. ISO 10646 character numbers
 19 |      are given for each character, in hex. values are decimal
 20 |      conversions of the ISO 10646 values and refer to the document
 21 |      character set. Names are Unicode names.
 22 | -->
 23 | 
 24 | <!-- Latin Extended-B -->
 25 | <!ENTITY fnof     "&#402;"> <!-- latin small letter f with hook = function
 26 |                                     = florin, U+0192 ISOtech -->
 27 | 
 28 | <!-- Greek -->
 29 | <!ENTITY Alpha    "&#913;"> <!-- greek capital letter alpha, U+0391 -->
 30 | <!ENTITY Beta     "&#914;"> <!-- greek capital letter beta, U+0392 -->
 31 | <!ENTITY Gamma    "&#915;"> <!-- greek capital letter gamma,
 32 |                                     U+0393 ISOgrk3 -->
 33 | <!ENTITY Delta    "&#916;"> <!-- greek capital letter delta,
 34 |                                     U+0394 ISOgrk3 -->
 35 | <!ENTITY Epsilon  "&#917;"> <!-- greek capital letter epsilon, U+0395 -->
 36 | <!ENTITY Zeta     "&#918;"> <!-- greek capital letter zeta, U+0396 -->
 37 | <!ENTITY Eta      "&#919;"> <!-- greek capital letter eta, U+0397 -->
 38 | <!ENTITY Theta    "&#920;"> <!-- greek capital letter theta,
 39 |                                     U+0398 ISOgrk3 -->
 40 | <!ENTITY Iota     "&#921;"> <!-- greek capital letter iota, U+0399 -->
 41 | <!ENTITY Kappa    "&#922;"> <!-- greek capital letter kappa, U+039A -->
 42 | <!ENTITY Lambda   "&#923;"> <!-- greek capital letter lamda,
 43 |                                     U+039B ISOgrk3 -->
 44 | <!ENTITY Mu       "&#924;"> <!-- greek capital letter mu, U+039C -->
 45 | <!ENTITY Nu       "&#925;"> <!-- greek capital letter nu, U+039D -->
 46 | <!ENTITY Xi       "&#926;"> <!-- greek capital letter xi, U+039E ISOgrk3 -->
 47 | <!ENTITY Omicron  "&#927;"> <!-- greek capital letter omicron, U+039F -->
 48 | <!ENTITY Pi       "&#928;"> <!-- greek capital letter pi, U+03A0 ISOgrk3 -->
 49 | <!ENTITY Rho      "&#929;"> <!-- greek capital letter rho, U+03A1 -->
 50 | <!-- there is no Sigmaf, and no U+03A2 character either -->
 51 | <!ENTITY Sigma    "&#931;"> <!-- greek capital letter sigma,
 52 |                                     U+03A3 ISOgrk3 -->
 53 | <!ENTITY Tau      "&#932;"> <!-- greek capital letter tau, U+03A4 -->
 54 | <!ENTITY Upsilon  "&#933;"> <!-- greek capital letter upsilon,
 55 |                                     U+03A5 ISOgrk3 -->
 56 | <!ENTITY Phi      "&#934;"> <!-- greek capital letter phi,
 57 |                                     U+03A6 ISOgrk3 -->
 58 | <!ENTITY Chi      "&#935;"> <!-- greek capital letter chi, U+03A7 -->
 59 | <!ENTITY Psi      "&#936;"> <!-- greek capital letter psi,
 60 |                                     U+03A8 ISOgrk3 -->
 61 | <!ENTITY Omega    "&#937;"> <!-- greek capital letter omega,
 62 |                                     U+03A9 ISOgrk3 -->
 63 | 
 64 | <!ENTITY alpha    "&#945;"> <!-- greek small letter alpha,
 65 |                                     U+03B1 ISOgrk3 -->
 66 | <!ENTITY beta     "&#946;"> <!-- greek small letter beta, U+03B2 ISOgrk3 -->
 67 | <!ENTITY gamma    "&#947;"> <!-- greek small letter gamma,
 68 |                                     U+03B3 ISOgrk3 -->
 69 | <!ENTITY delta    "&#948;"> <!-- greek small letter delta,
 70 |                                     U+03B4 ISOgrk3 -->
 71 | <!ENTITY epsilon  "&#949;"> <!-- greek small letter epsilon,
 72 |                                     U+03B5 ISOgrk3 -->
 73 | <!ENTITY zeta     "&#950;"> <!-- greek small letter zeta, U+03B6 ISOgrk3 -->
 74 | <!ENTITY eta      "&#951;"> <!-- greek small letter eta, U+03B7 ISOgrk3 -->
 75 | <!ENTITY theta    "&#952;"> <!-- greek small letter theta,
 76 |                                     U+03B8 ISOgrk3 -->
 77 | <!ENTITY iota     "&#953;"> <!-- greek small letter iota, U+03B9 ISOgrk3 -->
 78 | <!ENTITY kappa    "&#954;"> <!-- greek small letter kappa,
 79 |                                     U+03BA ISOgrk3 -->
 80 | <!ENTITY lambda   "&#955;"> <!-- greek small letter lamda,
 81 |                                     U+03BB ISOgrk3 -->
 82 | <!ENTITY mu       "&#956;"> <!-- greek small letter mu, U+03BC ISOgrk3 -->
 83 | <!ENTITY nu       "&#957;"> <!-- greek small letter nu, U+03BD ISOgrk3 -->
 84 | <!ENTITY xi       "&#958;"> <!-- greek small letter xi, U+03BE ISOgrk3 -->
 85 | <!ENTITY omicron  "&#959;"> <!-- greek small letter omicron, U+03BF NEW -->
 86 | <!ENTITY pi       "&#960;"> <!-- greek small letter pi, U+03C0 ISOgrk3 -->
 87 | <!ENTITY rho      "&#961;"> <!-- greek small letter rho, U+03C1 ISOgrk3 -->
 88 | <!ENTITY sigmaf   "&#962;"> <!-- greek small letter final sigma,
 89 |                                     U+03C2 ISOgrk3 -->
 90 | <!ENTITY sigma    "&#963;"> <!-- greek small letter sigma,
 91 |                                     U+03C3 ISOgrk3 -->
 92 | <!ENTITY tau      "&#964;"> <!-- greek small letter tau, U+03C4 ISOgrk3 -->
 93 | <!ENTITY upsilon  "&#965;"> <!-- greek small letter upsilon,
 94 |                                     U+03C5 ISOgrk3 -->
 95 | <!ENTITY phi      "&#966;"> <!-- greek small letter phi, U+03C6 ISOgrk3 -->
 96 | <!ENTITY chi      "&#967;"> <!-- greek small letter chi, U+03C7 ISOgrk3 -->
 97 | <!ENTITY psi      "&#968;"> <!-- greek small letter psi, U+03C8 ISOgrk3 -->
 98 | <!ENTITY omega    "&#969;"> <!-- greek small letter omega,
 99 |                                     U+03C9 ISOgrk3 -->
100 | <!ENTITY thetasym "&#977;"> <!-- greek theta symbol,
101 |                                     U+03D1 NEW -->
102 | <!ENTITY upsih    "&#978;"> <!-- greek upsilon with hook symbol,
103 |                                     U+03D2 NEW -->
104 | <!ENTITY piv      "&#982;"> <!-- greek pi symbol, U+03D6 ISOgrk3 -->
105 | 
106 | <!-- General Punctuation -->
107 | <!ENTITY bull     "&#8226;"> <!-- bullet = black small circle,
108 |                                      U+2022 ISOpub  -->
109 | <!-- bullet is NOT the same as bullet operator, U+2219 -->
110 | <!ENTITY hellip   "&#8230;"> <!-- horizontal ellipsis = three dot leader,
111 |                                      U+2026 ISOpub  -->
112 | <!ENTITY prime    "&#8242;"> <!-- prime = minutes = feet, U+2032 ISOtech -->
113 | <!ENTITY Prime    "&#8243;"> <!-- double prime = seconds = inches,
114 |                                      U+2033 ISOtech -->
115 | <!ENTITY oline    "&#8254;"> <!-- overline = spacing overscore,
116 |                                      U+203E NEW -->
117 | <!ENTITY frasl    "&#8260;"> <!-- fraction slash, U+2044 NEW -->
118 | 
119 | <!-- Letterlike Symbols -->
120 | <!ENTITY weierp   "&#8472;"> <!-- script capital P = power set
121 |                                      = Weierstrass p, U+2118 ISOamso -->
122 | <!ENTITY image    "&#8465;"> <!-- black-letter capital I = imaginary part,
123 |                                      U+2111 ISOamso -->
124 | <!ENTITY real     "&#8476;"> <!-- black-letter capital R = real part symbol,
125 |                                      U+211C ISOamso -->
126 | <!ENTITY trade    "&#8482;"> <!-- trade mark sign, U+2122 ISOnum -->
127 | <!ENTITY alefsym  "&#8501;"> <!-- alef symbol = first transfinite cardinal,
128 |                                      U+2135 NEW -->
129 | <!-- alef symbol is NOT the same as hebrew letter alef,
130 |      U+05D0 although the same glyph could be used to depict both characters -->
131 | 
132 | <!-- Arrows -->
133 | <!ENTITY larr     "&#8592;"> <!-- leftwards arrow, U+2190 ISOnum -->
134 | <!ENTITY uarr     "&#8593;"> <!-- upwards arrow, U+2191 ISOnum-->
135 | <!ENTITY rarr     "&#8594;"> <!-- rightwards arrow, U+2192 ISOnum -->
136 | <!ENTITY darr     "&#8595;"> <!-- downwards arrow, U+2193 ISOnum -->
137 | <!ENTITY harr     "&#8596;"> <!-- left right arrow, U+2194 ISOamsa -->
138 | <!ENTITY crarr    "&#8629;"> <!-- downwards arrow with corner leftwards
139 |                                      = carriage return, U+21B5 NEW -->
140 | <!ENTITY lArr     "&#8656;"> <!-- leftwards double arrow, U+21D0 ISOtech -->
141 | <!-- Unicode does not say that lArr is the same as the 'is implied by' arrow
142 |     but also does not have any other character for that function. So lArr can
143 |     be used for 'is implied by' as ISOtech suggests -->
144 | <!ENTITY uArr     "&#8657;"> <!-- upwards double arrow, U+21D1 ISOamsa -->
145 | <!ENTITY rArr     "&#8658;"> <!-- rightwards double arrow,
146 |                                      U+21D2 ISOtech -->
147 | <!-- Unicode does not say this is the 'implies' character but does not have
148 |      another character with this function so rArr can be used for 'implies'
149 |      as ISOtech suggests -->
150 | <!ENTITY dArr     "&#8659;"> <!-- downwards double arrow, U+21D3 ISOamsa -->
151 | <!ENTITY hArr     "&#8660;"> <!-- left right double arrow,
152 |                                      U+21D4 ISOamsa -->
153 | 
154 | <!-- Mathematical Operators -->
155 | <!ENTITY forall   "&#8704;"> <!-- for all, U+2200 ISOtech -->
156 | <!ENTITY part     "&#8706;"> <!-- partial differential, U+2202 ISOtech  -->
157 | <!ENTITY exist    "&#8707;"> <!-- there exists, U+2203 ISOtech -->
158 | <!ENTITY empty    "&#8709;"> <!-- empty set = null set, U+2205 ISOamso -->
159 | <!ENTITY nabla    "&#8711;"> <!-- nabla = backward difference,
160 |                                      U+2207 ISOtech -->
161 | <!ENTITY isin     "&#8712;"> <!-- element of, U+2208 ISOtech -->
162 | <!ENTITY notin    "&#8713;"> <!-- not an element of, U+2209 ISOtech -->
163 | <!ENTITY ni       "&#8715;"> <!-- contains as member, U+220B ISOtech -->
164 | <!ENTITY prod     "&#8719;"> <!-- n-ary product = product sign,
165 |                                      U+220F ISOamsb -->
166 | <!-- prod is NOT the same character as U+03A0 'greek capital letter pi' though
167 |      the same glyph might be used for both -->
168 | <!ENTITY sum      "&#8721;"> <!-- n-ary summation, U+2211 ISOamsb -->
169 | <!-- sum is NOT the same character as U+03A3 'greek capital letter sigma'
170 |      though the same glyph might be used for both -->
171 | <!ENTITY minus    "&#8722;"> <!-- minus sign, U+2212 ISOtech -->
172 | <!ENTITY lowast   "&#8727;"> <!-- asterisk operator, U+2217 ISOtech -->
173 | <!ENTITY radic    "&#8730;"> <!-- square root = radical sign,
174 |                                      U+221A ISOtech -->
175 | <!ENTITY prop     "&#8733;"> <!-- proportional to, U+221D ISOtech -->
176 | <!ENTITY infin    "&#8734;"> <!-- infinity, U+221E ISOtech -->
177 | <!ENTITY ang      "&#8736;"> <!-- angle, U+2220 ISOamso -->
178 | <!ENTITY and      "&#8743;"> <!-- logical and = wedge, U+2227 ISOtech -->
179 | <!ENTITY or       "&#8744;"> <!-- logical or = vee, U+2228 ISOtech -->
180 | <!ENTITY cap      "&#8745;"> <!-- intersection = cap, U+2229 ISOtech -->
181 | <!ENTITY cup      "&#8746;"> <!-- union = cup, U+222A ISOtech -->
182 | <!ENTITY int      "&#8747;"> <!-- integral, U+222B ISOtech -->
183 | <!ENTITY there4   "&#8756;"> <!-- therefore, U+2234 ISOtech -->
184 | <!ENTITY sim      "&#8764;"> <!-- tilde operator = varies with = similar to,
185 |                                      U+223C ISOtech -->
186 | <!-- tilde operator is NOT the same character as the tilde, U+007E,
187 |      although the same glyph might be used to represent both  -->
188 | <!ENTITY cong     "&#8773;"> <!-- approximately equal to, U+2245 ISOtech -->
189 | <!ENTITY asymp    "&#8776;"> <!-- almost equal to = asymptotic to,
190 |                                      U+2248 ISOamsr -->
191 | <!ENTITY ne       "&#8800;"> <!-- not equal to, U+2260 ISOtech -->
192 | <!ENTITY equiv    "&#8801;"> <!-- identical to, U+2261 ISOtech -->
193 | <!ENTITY le       "&#8804;"> <!-- less-than or equal to, U+2264 ISOtech -->
194 | <!ENTITY ge       "&#8805;"> <!-- greater-than or equal to,
195 |                                      U+2265 ISOtech -->
196 | <!ENTITY sub      "&#8834;"> <!-- subset of, U+2282 ISOtech -->
197 | <!ENTITY sup      "&#8835;"> <!-- superset of, U+2283 ISOtech -->
198 | <!ENTITY nsub     "&#8836;"> <!-- not a subset of, U+2284 ISOamsn -->
199 | <!ENTITY sube     "&#8838;"> <!-- subset of or equal to, U+2286 ISOtech -->
200 | <!ENTITY supe     "&#8839;"> <!-- superset of or equal to,
201 |                                      U+2287 ISOtech -->
202 | <!ENTITY oplus    "&#8853;"> <!-- circled plus = direct sum,
203 |                                      U+2295 ISOamsb -->
204 | <!ENTITY otimes   "&#8855;"> <!-- circled times = vector product,
205 |                                      U+2297 ISOamsb -->
206 | <!ENTITY perp     "&#8869;"> <!-- up tack = orthogonal to = perpendicular,
207 |                                      U+22A5 ISOtech -->
208 | <!ENTITY sdot     "&#8901;"> <!-- dot operator, U+22C5 ISOamsb -->
209 | <!-- dot operator is NOT the same character as U+00B7 middle dot -->
210 | 
211 | <!-- Miscellaneous Technical -->
212 | <!ENTITY lceil    "&#8968;"> <!-- left ceiling = APL upstile,
213 |                                      U+2308 ISOamsc  -->
214 | <!ENTITY rceil    "&#8969;"> <!-- right ceiling, U+2309 ISOamsc  -->
215 | <!ENTITY lfloor   "&#8970;"> <!-- left floor = APL downstile,
216 |                                      U+230A ISOamsc  -->
217 | <!ENTITY rfloor   "&#8971;"> <!-- right floor, U+230B ISOamsc  -->
218 | <!ENTITY lang     "&#9001;"> <!-- left-pointing angle bracket = bra,
219 |                                      U+2329 ISOtech -->
220 | <!-- lang is NOT the same character as U+003C 'less than sign'
221 |      or U+2039 'single left-pointing angle quotation mark' -->
222 | <!ENTITY rang     "&#9002;"> <!-- right-pointing angle bracket = ket,
223 |                                      U+232A ISOtech -->
224 | <!-- rang is NOT the same character as U+003E 'greater than sign'
225 |      or U+203A 'single right-pointing angle quotation mark' -->
226 | 
227 | <!-- Geometric Shapes -->
228 | <!ENTITY loz      "&#9674;"> <!-- lozenge, U+25CA ISOpub -->
229 | 
230 | <!-- Miscellaneous Symbols -->
231 | <!ENTITY spades   "&#9824;"> <!-- black spade suit, U+2660 ISOpub -->
232 | <!-- black here seems to mean filled as opposed to hollow -->
233 | <!ENTITY clubs    "&#9827;"> <!-- black club suit = shamrock,
234 |                                      U+2663 ISOpub -->
235 | <!ENTITY hearts   "&#9829;"> <!-- black heart suit = valentine,
236 |                                      U+2665 ISOpub -->
237 | <!ENTITY diams    "&#9830;"> <!-- black diamond suit, U+2666 ISOpub -->
238 | 


--------------------------------------------------------------------------------
/test/data/html4-strict.html:
--------------------------------------------------------------------------------
  1 | <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  2 | <html lang="fr">
  3 | <head>
  4 |   <meta http-equiv="Content-Type" content="text/html; charset=utf-8">
  5 |   <title>Debian -- Le système d'exploitation universel </title>
  6 |   <link rev="made" href="mailto:webmaster@debian.org">
  7 |   <link rel="shortcut icon" href="favicon.ico">
  8 |   <meta name="Keywords" content="debian, GNU, linux, unix, open source, libre, DFSG">
  9 |   <meta name="Description" content="Debian GNU/Linux est une distribution libre du système d'exploitation GNU/Linux. Elle est développée et mise à jour grâce au travail de nombreux utilisateurs qui offrent leur temps et leurs efforts.">
 10 |   <meta name="Generator" content="WML 2.0.11 (19-Aug-2006)">
 11 |   <meta name="Modified" content="2010-10-22 23:28:09">
 12 | <link rel="alternate" type="application/rss+xml"
 13 |  title="Annonces de sécurité Debian (titres seulement)" href="security/dsa">
 14 | <link rel="alternate" type="application/rss+xml"
 15 |  title="Annonces de sécurité Debian (résumés)" href="security/dsa-long">
 16 | <link href="./debian.css" rel="stylesheet" type="text/css">
 17 |   <link href="./debian-fr.css" rel="stylesheet" type="text/css" media="all">
 18 | </head>
 19 | <body>
 20 | <div id="header">
 21 |    <div id="upperheader">
 22 |    <div id="logo">
 23 |   <a href="./"><img src="./logos/openlogo-nd-50.png" width="50" height="61" alt=""></a>
 24 |   <a href="./" rel="start"><img src="Pics/debian.png" width="179" height="61" alt="Projet Debian"></a>
 25 |   </div> <!-- end logo -->
 26 | </div> <!-- end upperheader -->
 27 | <!--UdmComment-->
 28 | <div id="navbar">
 29 | <p class="hidecss"><a href="#inner">Sauter le menu</a></p>
 30 | <ul>
 31 |    <li><a href="intro/about">À propos de Debian</a></li>
 32 |    <li><a href="./News/">Actualités</a></li>
 33 |    <li><a href="distrib/">Obtenir Debian</a></li>
 34 |    <li><a href="./support">Assistance</a></li>
 35 |    <li><a href="./devel/">Le&nbsp;coin&nbsp;du&nbsp;développeur</a></li>
 36 |    <li><a href="./sitemap">Plan du site</a></li>
 37 |    <li><a href="http://search.debian.org/">Recherche</a></li>
 38 | </ul>
 39 | </div> <!-- end navbar -->
 40 | </div> <!-- end header -->
 41 | <!--/UdmComment-->
 42 | <div id="outer">
 43 | <div id="inner">
 44 | <div id="leftcol">
 45 | <!--UdmComment-->
 46 |     <ul>
 47 |     <li><a href="intro/about">À&nbsp;propos&nbsp;de&nbsp;Debian</a>
 48 |       <ul>
 49 |       <li><a href="./social_contract">Notre&nbsp;contrat&nbsp;social</a></li>
 50 |       <li><a href="./intro/free">Logiciel&nbsp;libre</a></li>
 51 |       <li><a href="./partners/">Partenaires</a></li>
 52 |       <li><a href="./donations">Dons</a></li>
 53 |       <li><a href="./contact">Nous&nbsp;contacter</a></li>
 54 |       </ul>
 55 |     </li>
 56 |     <li><a href="./News/">Actualités</a>
 57 |       <ul>
 58 |       <li><a href="./News/project/">Nouvelles du projet</a></li>
 59 |       <li><a href="./events/">Événements</a></li>
 60 |       </ul>
 61 |     </li>
 62 |     <li><a href="distrib/">Obtenir Debian</a>
 63 |       <ul>
 64 |       <li><a href="CD/vendors/">Vendeurs de CD</a></li>
 65 |       <li><a href="CD/">Images ISO de CD</a></li>
 66 |       <li><a href="distrib/netinst">Installation par le réseau</a></li>
 67 |       <li><a href="distrib/pre-installed">Préinstallée</a></li>
 68 |       </ul>
 69 |     </li>
 70 |     <li><a href="distrib/packages">Paquets&nbsp;Debian</a></li>
 71 |     <li><a href="doc/">Documentation</a>
 72 |       <ul>
 73 |       <li><a href="./releases/">Dernière&nbsp;version</a></li>
 74 |       <li><a href="./releases/stable/installmanual">Manuel&nbsp;d'installation</a></li>
 75 |       <li><a href="doc/books">Livres&nbsp;Debian</a></li>
 76 |       </ul>
 77 |     </li>
 78 |     <li><a href="./support">Assistance</a>
 79 |       <ul>
 80 |       <li><a href="./international/">Debian&nbsp;et&nbsp;l'international</a></li>
 81 |       <li><a href="./security/">Informations&nbsp;sur&nbsp;la&nbsp;sécurité</a></li>
 82 |       <li><a href="Bugs/">Rapports de bogues</a></li>
 83 |       <li><a href="MailingLists/">Listes&nbsp;de&nbsp;diffusion</a></li>
 84 |       <li><a href="http://lists.debian.org/">Archives&nbsp;des&nbsp;listes&nbsp;de&nbsp;diffusion</a></li>
 85 |       <li><a href="./ports/">Portages/Architectures</a></li>
 86 |       </ul>
 87 |     </li>
 88 |     <li><a href="misc/">Divers</a></li>
 89 |     <li><a href="./intro/help">Aider Debian</a></li>
 90 |     <li><a href="./devel/">Le&nbsp;coin&nbsp;du&nbsp;développeur</a></li>
 91 |     <li><a href="./sitemap">Plan du site</a></li>
 92 |     <li><a href="http://search.debian.org/">Recherche</a></li>
 93 |     </ul>
 94 |     <form method="get" action="http://search.debian.org/">
 95 |     <p>
 96 |     <input type="text" name="q" size="12">
 97 |     </p>
 98 |     </form>
 99 |     <p>
100 |     <a href="./sponsor.html"><img src="sponsor_img.jpg" alt="Le site de notre sponsor" width="102" height="60"></a>
101 |     </p>
102 |     <p>
103 |     <a href="http://validator.w3.org/check/referer"><img src="http://www.w3.org/Icons/valid-html401" alt="Valid HTML 4.01!" width="88" height="31"></a>
104 |     </p>
105 |     <p>
106 |     <img src="http://jigsaw.w3.org/css-validator/images/vcss"
107 |         alt="Valid CSS!" width="88" height="31">
108 |     </p>
109 | <!--/UdmComment-->
110 | </div> <!-- end leftcol -->
111 | <div id="maincol">
112 | <a href="./News/2009/20090214"><img src="Pics/lennybanner_indexed.png" alt="Debian 5.0 - Le système d'exploitation universel" width="380" height="310" style="margin-right: 10px; float: left;"></a>
113 | <h2>Qu'est-ce que Debian&nbsp;?</h2>
114 | <p><a href="http://www.debian.org/">Debian</a> est un système d'exploitation
115 | <a href="intro/free">libre</a> pour votre ordinateur. Un système d'exploitation
116 | est la suite des programmes de base et des utilitaires qui permettent à un
117 | ordinateur de fonctionner. Debian utilise le noyau
118 | <a href="http://www.kernel.org/">Linux</a> (le c&oelig;ur d'un système d'exploitation),
119 | mais la plupart des outils de base du système proviennent du
120 | <a href="http://www.gnu.org/">projet GNU</a>&nbsp;; d'où le nom GNU/Linux.</p>
121 | <p>Debian GNU/Linux est bien plus qu'un simple système d'exploitation&nbsp;:
122 | il contient plus de 25000
123 | <a href="distrib/packages">paquets</a>&nbsp;; les paquets sont des composants
124 | logiciels précompilés conçus pour s'installer facilement sur votre machine.</p>
125 | <p><a href="intro/about">Suite...</a></p>
126 | <hr>
127 | <h2>Pour commencer</h2>
128 | <p>La <a href="releases/stable/">dernière version stable de Debian</a> est
129 | la 5.0. La dernière mise à jour de cette version a été publiée
130 | le 4 septembre 2010. Vous pouvez aussi accéder aux
131 | <a href="releases/">autres versions disponibles de Debian</a>.</p>
132 | <p>Si vous souhaitez commencer à utiliser Debian, vous pouvez facilement
133 | <a href="distrib/">en obtenir une copie</a>, et ensuite suivre les
134 | <a href="releases/stable/installmanual">instructions d'installation</a>
135 | pour l'installer.</p>
136 | <p>Si vous mettez à niveau votre système depuis une ancienne version vers
137 | la dernière version stable publiée, veuillez lire les
138 | <a href="releases/stable/releasenotes">notes de publication</a>
139 | avant de commencer.</p>
140 | <p>Pour obtenir de l'aide concernant l'utilisation ou la configuration
141 | de Debian, consultez nos pages sur la <a href="doc/">documentation</a>
142 | et l'<a href="support">assistance</a>.</p>
143 | <p>Les utilisateurs qui parlent une langue autre que l'anglais peuvent
144 | consulter la section sur l'<a href="international/">international</a>.</p>
145 | <p>Les personnes ayant un autre système qu'Intel x86 peuvent
146 | consulter la section sur les <a href="ports/">portages</a>.</p>
147 | <hr>
148 | <h2>Actualités</h2>
149 | <p><tt>[19 octobre 2010]</tt> <strong><a href="News/2010/20101019">Debian sur le point d'accueillir officiellement les contributeurs non empaqueteurs</a></strong><br>
150 | <tt>[7 octobre 2010]</tt> <strong><a href="News/2010/20101007">Debian à la rencontre de la <q>Society for Neuroscience</q></a></strong><br>
151 | <tt>[8 septembre 2010]</tt> <strong><a href="News/2010/20100908">Paris Mini-DebConf 2010</a></strong><br>
152 | <tt>[5 septembre 2010]</tt> <strong><a href="News/2010/20100905">Le service de rétroportages (« backports ») devient officiel</a></strong><br>
153 | <tt>[4 septembre 2010]</tt> <strong><a href="News/2010/20100904">Publication de la mise à jour de Debian GNU/Linux 5.0.6</a></strong><br>
154 | <tt>[3 septembre 2010]</tt> <strong><a href="News/2010/20100903">Conférence 2010 de la communauté Debian italienne - du 17 au 19 septembre à Pérouse, Italie</a></strong><br>
155 | </p>
156 | <p>Pour les communiqués plus anciens, consultez la suite de la page <a href="./News/">actualités</a>.
157 | Si vous voulez recevoir un courrier (en anglais) à chaque fois qu'un communiqué paraît, abonnez-vous
158 | à la <a href="MailingLists/debian-announce">liste de diffusion debian-announce</a>.</p>
159 | <hr>
160 | <h2>Annonces de sécurité</h2>
161 | <p><tt>[22 octobre 2010]</tt> <strong><a href="security/2010/dsa-2122">DSA-2122 glibc</a></strong> - missing input sanitization <br>
162 | <tt>[19 octobre 2010]</tt> <strong><a href="security/2010/dsa-2121">DSA-2121 typo3-src</a></strong> - several vulnerabilities <br>
163 | <tt>[12 octobre 2010]</tt> <strong><a href="security/2010/dsa-2120">DSA-2120 postgresql-8.3</a></strong> - privilege escalation <br>
164 | <tt>[12 octobre 2010]</tt> <strong><a href="security/2010/dsa-2119">DSA-2119 poppler</a></strong> - several vulnerabilities <br>
165 | <tt>[8 octobre 2010]</tt> <strong><a href="security/2010/dsa-2118">DSA-2118 subversion</a></strong> - logic flaw <br>
166 | <tt>[4 octobre 2010]</tt> <strong><a href="security/2010/dsa-2117">DSA-2117 apr-util</a></strong> - denial of service <br>
167 | <tt>[4 octobre 2010]</tt> <strong><a href="security/2010/dsa-2116">DSA-2116 freetype</a></strong> - integer overflow <br>
168 | <tt>[29 septembre 2010]</tt> <strong><a href="security/2010/dsa-2115">DSA-2115 moodle</a></strong> - several vulnerabilities <br>
169 | <tt>[26 septembre 2010]</tt> <strong><a href="security/2010/dsa-2114">DSA-2114 git-core</a></strong> - buffer overflow <br>
170 | <tt>[20 septembre 2010]</tt> <strong><a href="security/2010/dsa-2113">DSA-2113 drupal6</a></strong> - several vulnerabilities <br>
171 | <tt>[20 septembre 2010]</tt> <strong><a href="security/2010/dsa-2112">DSA-2112 bzip2</a></strong> - integer overflow <br>
172 | </p>
173 | <p>Pour les annonces de sécurité, consultez la <a href="./security/">
174 | page sécurité</a>.
175 | Si vous voulez recevoir les annonces de sécurité (en anglais) dès leur parution, abonnez-vous
176 | à la <a href="http://lists.debian.org/debian-security-announce/">liste de diffusion debian-security-announce</a>.</p>
177 | </div> <!-- end maincol -->
178 | <div class="clr"></div>
179 | </div> <!-- end inner -->
180 | <div id="footer">
181 |   <hr class="hidecss">
182 |   <!--UdmComment-->
183 | <p>
184 | Cette page est aussi disponible dans les langues suivantes&nbsp;:
185 | </p><p class="navpara">
186 |  <a href="index.ar.html" title="arabe" hreflang="ar" lang="ar" rel="alternate">&#1593;&#1585;&#1576;&#1610;&#1577;&nbsp;(Arabiya)</a>
187 |  <a href="index.bg.html" title="bulgare" hreflang="bg" lang="bg" rel="alternate">&#1041;&#1098;&#1083;&#1075;&#1072;&#1088;&#1089;&#1082;&#1080;&nbsp;(B&#601;lgarski)</a>
188 |  <a href="index.ca.html" title="catalan" hreflang="ca" lang="ca" rel="alternate">catal&agrave;</a>
189 |  <a href="index.cs.html" title="tchèque" hreflang="cs" lang="cs" rel="alternate">&#269;esky</a>
190 |  <a href="index.da.html" title="danois" hreflang="da" lang="da" rel="alternate">dansk</a>
191 |  <a href="index.de.html" title="allemand" hreflang="de" lang="de" rel="alternate">Deutsch</a>
192 |  <a href="index.el.html" title="grec" hreflang="el" lang="el" rel="alternate">&#917;&#955;&#955;&#951;&#957;&#953;&#954;&#940;&nbsp;(Ellinika)</a>
193 |  <a href="index.en.html" title="anglais" hreflang="en" lang="en" rel="alternate">English</a>
194 |  <a href="index.es.html" title="espagnol" hreflang="es" lang="es" rel="alternate">espa&ntilde;ol</a>
195 |  <a href="index.eo.html" title="espéranto" hreflang="eo" lang="eo" rel="alternate">Esperanto</a>
196 |  <a href="index.ko.html" title="coréen" hreflang="ko" lang="ko" rel="alternate">&#54620;&#44397;&#50612;&nbsp;(Hangul)</a>
197 |  <a href="index.hy.html" title="arménien" hreflang="hy" lang="hy" rel="alternate">&#1344;&#1377;&#1397;&#1381;&#1408;&#1381;&#1398;&nbsp;(hayeren)</a>
198 |  <a href="index.hr.html" title="croate" hreflang="hr" lang="hr" rel="alternate">hrvatski</a>
199 |  <a href="index.it.html" title="italien" hreflang="it" lang="it" rel="alternate">Italiano</a>
200 |  <a href="index.he.html" title="hébreu" hreflang="he" lang="he" rel="alternate">&#1506;&#1489;&#1512;&#1497;&#1514;&nbsp;(ivrit)</a>
201 |  <a href="index.lt.html" title="lituanien" hreflang="lt" lang="lt" rel="alternate">Lietuvi&#371;</a>
202 |  <a href="index.hu.html" title="hongrois" hreflang="hu" lang="hu" rel="alternate">magyar</a>
203 |  <a href="index.nl.html" title="néerlandais" hreflang="nl" lang="nl" rel="alternate">Nederlands</a>
204 |  <a href="index.ja.html" title="japonais" hreflang="ja" lang="ja" rel="alternate">&#26085;&#26412;&#35486;&nbsp;(Nihongo)</a>
205 |  <a href="index.nb.html" title="norvégien" hreflang="nb" lang="nb" rel="alternate">norsk&nbsp;(bokm&aring;l)</a>
206 |  <a href="index.pl.html" title="polonais" hreflang="pl" lang="pl" rel="alternate">polski</a>
207 |  <a href="index.pt.html" title="portugais" hreflang="pt" lang="pt" rel="alternate">Portugu&ecirc;s</a>
208 |  <a href="index.ro.html" title="roumain" hreflang="ro" lang="ro" rel="alternate">rom&acirc;n&#259;</a>
209 |  <a href="index.ru.html" title="russe" hreflang="ru" lang="ru" rel="alternate">&#1056;&#1091;&#1089;&#1089;&#1082;&#1080;&#1081;&nbsp;(Russkij)</a>
210 |  <a href="index.sk.html" title="slovaque" hreflang="sk" lang="sk" rel="alternate">slovensky</a>
211 |  <a href="index.fi.html" title="finnois" hreflang="fi" lang="fi" rel="alternate">suomi</a>
212 |  <a href="index.sv.html" title="suédois" hreflang="sv" lang="sv" rel="alternate">svenska</a>
213 |  <a href="index.ta.html" title="tamoul" hreflang="ta" lang="ta" rel="alternate">&#2980;&#2990;&#3007;&#2996;&#3021;&nbsp;(Tamil)</a>
214 |  <a href="index.tr.html" title="turc" hreflang="tr" lang="tr" rel="alternate">T&uuml;rk&ccedil;e</a>
215 |  <a href="index.uk.html" title="ukrainien" hreflang="uk" lang="uk" rel="alternate">&#1091;&#1082;&#1088;&#1072;&#1111;&#1085;&#1089;&#1100;&#1082;&#1072;&nbsp;(ukrajins'ka)</a>
216 |  <a href="index.zh-cn.html" title="chinois (Chine)" hreflang="zh-CN" lang="zh-CN" rel="alternate">&#20013;&#25991;(&#31616;)</a>
217 |  <a href="index.zh-hk.html" title="chinois (Hong Kong)" hreflang="zh-HK" lang="zh-HK" rel="alternate">&#20013;&#25991;(HK)</a>
218 |  <a href="index.zh-tw.html" title="chinois (Taïwan)" hreflang="zh-TW" lang="zh-TW" rel="alternate">&#20013;&#25991;(&#32321;)</a>
219 | </p><p>
220 | Comment configurer la <a href="./intro/cn">langue par défaut du document</a>
221 | </p>
222 | <!--/UdmComment-->
223 | <hr>
224 | <!--UdmComment-->
225 | <div id="fineprint">
226 |   <p>Pour signaler un problème sur le site web, envoyez un courriel en anglais à <a href="mailto:debian-www@lists.debian.org">debian-www@lists.debian.org</a> ou en français à <a href="mailto:debian-l10n-french@lists.debian.org">debian-l10n-french@lists.debian.org</a>. Pour obtenir d'autres informations, référez-vous à la <a href="./contact">page contact</a> de Debian.</p>
227 | <p>
228 | Dernière modification&nbsp;: vendredi 22 octobre 2010 23:28:09 UTC
229 |   <br>
230 |   Copyright &copy; 1997-2010
231 |  <a href="http://www.spi-inc.org/">SPI</a>; voir <a href="./license" rel="copyright">les termes de la licence</a>.<br>
232 |   Debian est une <a href="./trademark">marque déposée</a> de Software in the Public Interest, Inc.
233 | </p>
234 | </div>
235 | <!--/UdmComment-->
236 | </div> <!-- end footer -->
237 | </div> <!-- end outer -->
238 | </body>
239 | </html>
240 | 


--------------------------------------------------------------------------------
/History.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 1.12.0 / 2022-11-15
  3 | ===================
  4 | 
  5 |   * Regen manpages
  6 |   * Bump year
  7 |   * Fix lint Metrics/AbcSize on check_page
  8 |   * Fix test/static_test.rb with relative path
  9 |   * Add relative link test fail
 10 |   * Fix test/static_test.rb with relative path
 11 |   * Nokigumbo is merged into nokogiri 1.12~
 12 | 
 13 | 1.11.1 / 2021-01-10
 14 | ===================
 15 | 
 16 |   * Add nokogumbo doc
 17 |   * nokogumbo support for ruby engine
 18 |   * Add support for nokogumbo
 19 |   * Add jruby to github actions
 20 |   * Merge pull request #24 from @marocchino / patch-1
 21 |     * Some minor improvements
 22 |   * Remove travis ci
 23 |   * Init github actions ci
 24 | 
 25 | 1.11.0 / 2021-01-08
 26 | ===================
 27 | 
 28 |   * Merge pull request #23 from @marocchino / ruby-3-support
 29 |     * Use webrick's escape instead of encode_www_form_component
 30 |     * Support ruby 3
 31 |   * Fix doc for ValidateWebsite::Core initialize
 32 |   * Switch to gitlab ci and remove 2.{3,4} support
 33 |   * Update rubocop to 0.76.0
 34 | 
 35 | 1.10.0 / 2020-07-03
 36 | ==================
 37 | 
 38 |   * Fix build for Ruby 2.3 and 2.4
 39 |   * Remove rbx-3 from build
 40 |   * Remove minitest-focus and fix minitest 6 warnings
 41 |   * Fix html5_validator option and change html5_validator_service_url
 42 |   * Add Ruby 2.7 to CI and update jruby
 43 |   * Update rubocop and fix offences
 44 |   * Remove Ruby 2.2 support and update rubocop
 45 | 
 46 | 1.9.3 / 2019-04-11
 47 | ==================
 48 | 
 49 |   * Update tidy_ffi to 1.0
 50 |   * Avoid testing tidy bug with js
 51 | 
 52 | 1.9.2 / 2019-03-09
 53 | ==================
 54 | 
 55 |   * Load schema when needed instead of boot
 56 | 
 57 | 1.9.1 / 2019-03-05
 58 | ==================
 59 | 
 60 |   * Improve start message for static validator
 61 |   * Update travis config
 62 |   * Fix bundler to <2 on travis (dropped support Ruby < 2.3)
 63 | 
 64 | 1.9.0 / 2018-12-25
 65 | ==================
 66 | 
 67 |   * Update deps paint; slop; webmock
 68 |   * Remove Ruby 2.1 support
 69 |   * Force nonet and disable substitute entities on xhtml parse
 70 |   * Use coveralls for code coverage
 71 | 
 72 | v1.8.1 / 2018-03-25
 73 | ===================
 74 | 
 75 |   * Change document to local dtd only when needed
 76 |   * Switch to asciidoctor to generate manpage
 77 | 
 78 | v1.8.0 / 2017-08-24
 79 | ===================
 80 | 
 81 |   * Update manpages
 82 |   * Update rubies and jruby on travis
 83 |   * Remove encoding and other Style/SymbolArray fixes
 84 |   * Add tidy validator for html5
 85 | 
 86 | v1.7.0 / 2017-04-08
 87 | ===================
 88 | 
 89 |   * Use w3c_validators and remove multipart_body gem
 90 |   * Drop Ruby 2.0.0
 91 |   * Update webmock to 2.3
 92 |   * Load xsd on init to avoid conflicting chdir
 93 | 
 94 | v1.6.0 / 2016-09-24
 95 | ===================
 96 | 
 97 |   * Update spidr and rubocop
 98 |   * Use more upto date validator service and https
 99 | 
100 | v1.5.8 / 2016-06-27
101 | ===================
102 | 
103 |   * Update manpages
104 |   * Exclude urls on static crawl
105 |   * Fix warning: instance variable not initialized
106 | 
107 | v1.5.7 / 2016-06-13
108 | ===================
109 | 
110 |   * Fix JRuby CI build
111 |   * Update JRuby and other rubies on Travis
112 |   * Update rubocop to v0.40
113 |   * Add env variable VALIDATOR_NU_URL support
114 | 
115 | v1.5.6 / 2016-03-02
116 | ===================
117 | 
118 |   * Fix error with redirect and extract images (see #16)
119 |   * Display version number
120 |   * Rubocop fixes
121 |   * Updates deps
122 | 
123 | v1.5.4 / 2015-12-08
124 | ===================
125 | 
126 |   * Fix URI must be ascii only error
127 | 
128 | v1.5.3 / 2015-11-08
129 | ===================
130 | 
131 |   * Fix test_files on gemspec
132 |   * Use File.expand_path for jruby
133 |   * Update minitest and webmock
134 |   * Capture output on spec
135 |   * Move jruby-9.0.0.0 to allow failures
136 |   * Added jruby-9.0.0.0 to travis
137 |   * Options hash is mandatory on ValidateWebsite::Core
138 |   * Added rubocop on default rake task
139 |   * Remove unnecessary spacing
140 |   * Rakefile: add --display-style-guide option to rubocop
141 | 
142 | v1.5.0 / 2015-07-27
143 | ===================
144 | 
145 |   * Bump to 1.5.0
146 |   * Added license badge
147 |   * Document --css-syntax option
148 |   * Fix --pattern option only string
149 |   * Extract CSS methods to Utils class
150 |   * Added css_syntax option checking css errors
151 |   * Call method only on :not_found enabled
152 |   * Rename spec to test
153 |   * Add inch documentation badge
154 |   * Use Crass gem to extract urls
155 |   * Update README
156 |   * Only display cop on task
157 |   * Fix rubocop build and add custom task
158 | 
159 | v1.1.0 / 2015-07-07
160 | ===================
161 | 
162 |   * Bump to 1.1.0
163 |   * Enable rubocop on travis build
164 |   * Fix default_args method has too many lines
165 |   * Fix crawl#spidr_crawler ABC size
166 |   * Fix Core#validate ABC size
167 |   * Fix Static#check_static_file ABC size
168 |   * Fix Static#crawl ABC size
169 |   * Fix check_static_not_found css urls
170 |   * Refacto check_static_not_found method
171 |   * Syntax fix: use next in Enumarator
172 |   * README: typo
173 |   * Refactor: create ValidateWebsite::{Static,Crawl} classes
174 |   * Refactor Validator
175 |   * Syntax fixes
176 |   * Syntax fixes
177 |   * Update travis
178 |   * Fix markup option
179 |   * Cleanup default options
180 |   * Better args options manage between crawl and static
181 |   * Fix jruby ignore tests
182 |   * Move on stop support Ruby 1.9
183 |   * Opps forget spec data
184 |   * Fix ignore option for static crawl and non html5
185 |   * Use slop 4.2
186 | 
187 | v1.0.5 / 2015-05-25
188 | ===================
189 | 
190 |   * Bump to 1.0.5
191 |   * Added option html5-validator-service-url
192 |   * Update paint to 1.0
193 |   * Add ruby-head to travis
194 |   * Remove docker stuff [ci skip]
195 |   * Allow customize html5 validator service url
196 | 
197 | v1.0.4 / 2015-03-10
198 | ===================
199 | 
200 |   * Bump to 1.0.4
201 |   * Fix issue #12 with excessive requests to validator.nu
202 |   * Added failing test for issue #12
203 | 
204 | v1.0.3 / 2015-02-27
205 | ===================
206 | 
207 |   * Bump to 1.0.3
208 |   * Fix static not found with anchor link (see #14)
209 |   * Added fig config
210 |   * bundle update
211 |   * travis: added 2.2.0 version
212 | 
213 | v1.0.2 / 2015-02-18
214 | ===================
215 | 
216 |   * Bump to 1.0.2
217 |   * Fix issue #13
218 |   * Added failing test for issue #13
219 |   * Bump year on LICENSE file
220 | 
221 | v1.0.1 / 2015-02-15
222 | ===================
223 | 
224 |   * Bump to 1.0.1
225 |   * Revert "Remove shebang its already handle by RubyGems"
226 |   * Fix html5 validator service url (see #11)
227 |   * Update year and manpages
228 |   * Remove shebang its already handle by RubyGems
229 |   * spec/core_spec.rb: codestyle
230 |   * Use each_with_object instead of inject
231 |   * Ignore asciidoc generated files
232 |   * Extract spidr_crawler for less complexity in crawl
233 |   * Improve jekyll sample code
234 |   * Merge pull request #10 from marocchino/improve-readme
235 |   * Improve jekyll sample code
236 | 
237 | v1.0.0 / 2014-10-18
238 | ===================
239 | 
240 |   * Bump to 1.0.0 :exclamation:
241 |   * Can set cookies from command line
242 |   * Can set cookies
243 |   * Documentation update
244 |   * Options notfound => not_found
245 |   * Can change user-agent
246 |   * Move internet connection check to private
247 |   * use next instead of return for check static links
248 |   * update screenshot
249 |   * rubocop fixes (complexity, line too long)
250 |   * remove matcher rspec (obsolete)
251 |   * fix not found on static webpage
252 |   * update linuxfr webpage and add static for tests
253 |   * Fix URI::InvalidURIError
254 |   * Fix Errno::ENOENT error
255 |   * Make tests fail for static not found
256 |   * Use slop for ARGV parsing and remove some options
257 |   * Fix not_found_error and print not founds status
258 |   * Make tests fail for check_static_not_found
259 |   * Add status line
260 | 
261 | v0.9.5 / 2014-09-23
262 | ===================
263 | 
264 |   * Bump to 0.9.5
265 |   * Change internal verbose option
266 |   * Print green dot when quiet
267 |   * Fix options parser strings
268 |   * Line is too long fix
269 |   * Coding style
270 |   * Replace class var with a class instance var
271 |   * Use next to skip iteration
272 |   * Use a guard clause instead of wrapping the code
273 |   * spec wrong validation_type
274 |   * Prefer `$ERROR_INFO` from the English library over `$!`
275 |   * Use fail instead of raise to signal exceptions
276 |   * Coding style fix
277 | 
278 | v0.9.0 / 2014-09-20
279 | ===================
280 | 
281 |   * Bump to 0.9.0
282 |   * documentation update
283 |   * README: add Jekyll static site validation task
284 |   * move crawler from anemone to spidr gem
285 | 
286 | v0.8.1 / 2014-09-18
287 | ===================
288 | 
289 |   * bump to 0.8.1
290 |   * fix require set
291 | 
292 | v0.8.0 / 2014-09-18
293 | ===================
294 | 
295 |   * gemspec: fix pessimistic dependency
296 |   * gemspec: fix open-ended deps and bump to 0.8.0
297 |   * travis: remove jruby-head
298 |   * README cleanup
299 |   * README added badges and screenshot
300 |   * spec/validator_spec.rb: cleanup
301 |   * fix jruby build use Nokogiri::HTML intead of Nokogiri::XML
302 |   * travis: cache bundler
303 |   * move http testing to webmock
304 |   * travis: added config
305 |   * use set instead of array for links
306 |   * fix: use HTML5_VALIDATOR_SERVICE
307 |   * validate_website/core: code quality crawl
308 |   * validate_website/core: code quality extract_urls_from_img_script_iframe_link
309 |   * validate_website/core: code quality internet connection
310 |   * gemspec: added pry for development
311 |   * explanatory comments for classes
312 |   * validate_website/validator: code quality
313 |   * Change color gem from rainbow to paint
314 |   * Fix html5 validator spec
315 |   * README: rubygems package dont exist anymore
316 |   * Added some comment
317 | 
318 | v0.7.9 / 2013-03-18
319 | ===================
320 | 
321 |   * Bump to v0.7.9
322 |   * html5: change host because having some timeout
323 |   * README: more readeable
324 |   * README: use markdown
325 |   * Added info about internet_connection.
326 |   * Indent fakeweb_helper.
327 | 
328 | v0.7.7 / 2012-07-23
329 | ===================
330 | 
331 |   * Bump to v0.7.7
332 |   * Update doc: Use dependency package default Ruby version
333 |   * Add ignore_errors option on validate-website-static
334 |   * Add contributors and incr year.
335 | 
336 | v0.7.6 / 2012-04-18
337 | ===================
338 | 
339 |   * Bump version to 0.7.6
340 |   * Documentation for --ignore-errors
341 |   * Merge default opts on crawl and static validator.
342 |   * Add spec for :ignore_errors option
343 |   * Add -i option for ignoring certain validation err
344 | 
345 | v0.7.5 / 2012-02-07
346 | ===================
347 | 
348 |   * Bump version to 0.7.5
349 |   * Ignore *.gem files.
350 |   * Add rspec matcher be_w3c_valid
351 |   * Get errors from http://validator.nu for HTML5
352 |   * Add encodings.
353 |   * README fixes.
354 | 
355 | v0.7.1 / 2011-12-25
356 | ===================
357 | 
358 |   * Bump version to 0.7.1
359 |   * Make test fail for issue #4
360 |   * Merge pull request #4 from nono/patch-1
361 |   * Merge pull request #5 from GunioRobot/clean
362 |   * Remove whitespace [Gun.io WhitespaceBot]
363 |   * Update lib/validate_website/core.rb
364 |   * Move to minitest
365 |   * Requirement fixes for tests
366 |   * Quiet in tests
367 |   * [Documentation] Validator for use on other application.
368 |   * Remove rubygems hooks, use bundler.
369 | 
370 | v0.7.0 / 2011-06-06
371 | ===================
372 | 
373 |   * Bump version to 0.7.0
374 |   * Check CSS files urls for static files
375 |   * Cleanup, useless body variable and not_found check
376 |   * Same options parse for static and crawl
377 |   * Document --site option for validate-website-static.
378 |   * Move to private validate extract_urls check_static_not_found
379 |   * Move crawl static logic to Core class and extract urls from img script iframe
380 |   * Opps exit status 64 already used for failure markup.
381 |   * Add --color, --no-color options.
382 |   * Rescue on missing arg or invalid options parse.
383 | 
384 | v0.6.5 / 2011-06-05
385 | ===================
386 | 
387 |   * Bump version to 0.6.5
388 |   * Add some todos.
389 |   * Update dependencies.
390 |   * Use gemspec for build validate-website gem.
391 |   * README updates.
392 |   * HTML5 support using Validator.nu Web Service.
393 |   * Merge branch 'master' of github.com:spk/validate-website
394 |   * add alias for task spec
395 |   * README fix space
396 | 
397 | v0.6.1 / 2011-04-11
398 | ===================
399 | 
400 |   * Bump version to 0.6.1
401 |   * update doc and README
402 |   * Add :markup_validation and :not_found to validate-website-static
403 |   * add contributors, it is never too late
404 |   * follow recommendation from rubygems-test
405 |   * share to data directory
406 |   * Add Gemfile (bundler)
407 | 
408 | v0.6.0 / 2010-12-26
409 | ===================
410 | 
411 |   * Bump version to 0.6.0
412 |   * Add Runner class for executables
413 |   * Add option parser and document validate-website-static
414 |   * Can pass Hash options to ValidateWebsite::Core
415 |   * Add ValidateWebsite module to avoid conflicts
416 |   * Update README requirements
417 | 
418 | v0.5.7 / 2010-12-10
419 | ===================
420 | 
421 |   * Add validate-website-static executable
422 |   * Cleanup: remove spk-html5 and use upstream anemone
423 |   * ValidateWebsite code improvement for options
424 |   * Change Validator initialize argument
425 |   * Add linuxfr html5 page (should be valid)
426 | 
427 | v0.5.3 / 2010-12-05
428 | ===================
429 | 
430 |   * Bump version to 0.5.3
431 |   * Add -q, --quiet option (Only report errors)
432 |   * Improve installation documentation for Debian users
433 |   * print note on validating website
434 |   * rename internal option :error_verbose to :validate_verbose
435 | 
436 | v0.5.2 / 2010-11-05
437 | ===================
438 | 
439 |   * Bump version to 0.5.2
440 |   * Using my fork of html5 Rubygem
441 |   * Show line for html5 parser errors
442 | 
443 | v0.5.1 / 2010-11-04
444 | ===================
445 | 
446 |   * Bump version to 0.5.1
447 |   * Fix issue with 1.9.2 and CSS url (use first instead of to_s)
448 |   * Move get_url to private access
449 |   * Better requirement and remove require 'rubygems' from spec/spec_helper.rb
450 | 
451 | v0.5.0 / 2010-11-01
452 | ===================
453 | 
454 |   * Bump version to 0.5.0
455 |   * Change exit status
456 |   * Fix html4 validation by falling back to dtd validation
457 |   * Add failing test on html4 strict
458 |   * Update documentation
459 |   * Sync options with anemone
460 |   * Improve documentation and add manpage
461 |   * Add experimental html5 support
462 |   * Show properly errors with verbose option
463 |   * Update RSpec to version 2.0 and add spec task
464 | 
465 | v0.4.1 / 2010-10-24
466 | ===================
467 | 
468 |   * Bump version to 0.4.1
469 |   * Move to_file to private access
470 |   * Pass missing options to crawl (see on github #2)
471 |   * Add Validator spec file, rename and add html test on validate_website_spec
472 | 
473 | v0.4.0 / 2010-09-14
474 | ===================
475 | 
476 |   * Bump version to 0.4.0
477 |   * add lib/xhtml/xhtml-basic11.dtd file
478 |   * lib/validator.rb: cleanup and rescue on Nokogiri::XML::SyntaxError
479 |   * Add --[no-]markup-validation option
480 |   * typo capitalize help
481 |   * added debug options for anemone, and verbose option for validator errors
482 |   * include ColorfulMessages on ValidateWebsite class
483 | 
484 | v0.3.5 / 2010-08-25
485 | ===================
486 | 
487 |   * Bump version to 0.3.5 and add spec directory to pkg files
488 |   * Add default for ValidateWebsite initialize and crawl opts
489 |   * added test on css
490 |   * added development dependency: rspec and fakeweb
491 |   * Refactor validate website and crawl url in css
492 |   * updated REAME.rdoc
493 |   * added option -c for adding cookies
494 |   * added verbose option
495 |   * lib/validate_website.rb: bug fix on bad uri case bin/validate-website: minor change, use «unless» instead of «if not»
496 |   * search 404 in img, link, script and iframe tags
497 |   * Rename README to README.rdoc
498 |   * Update readme and gem spec
499 |   * Add not_found option (thanks to François de Metz)
500 |   * exit code depend of validation result
501 |   * only try to validate html file
502 |   * fix some ruby 1.9 issue
503 |   * fix some validation issue with no dtd or xsd
504 |   * update readme
505 |   * move to anemone web-spider, and use XML Schema for validation of XHTML
506 |   * add optparse options
507 |   * create a gem
508 |   * initial commit
509 | 


--------------------------------------------------------------------------------