├── .formatter.exs
├── .github
    └── workflows
    │   ├── bugfix-reproducer.yml
    │   └── ci-workflow.yml
├── .gitignore
├── .tool-versions
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── lib
    ├── html_sanitize_ex.ex
    └── html_sanitize_ex
    │   ├── parser.ex
    │   ├── scrubber.ex
    │   ├── scrubber
    │       ├── basic_html.ex
    │       ├── css.ex
    │       ├── html5.ex
    │       ├── markdown_html.ex
    │       ├── meta.ex
    │       ├── no_scrub.ex
    │       └── strip_tags.ex
    │   └── traverser.ex
├── mix.exs
├── mix.lock
└── test
    ├── basic_html_test.exs
    ├── css_test.exs
    ├── custom_scrubber_test.exs
    ├── html5_test.exs
    ├── html5_test_data_uri
    ├── html_sanitize_ex_test.exs
    ├── markdown_html_test.exs
    ├── no_scrub_test.exs
    ├── strip_tags_test.exs
    ├── test_helper.exs
    ├── test_if_tests_fail_after_resetting_lib.sh
    └── traverser_test.exs


/.formatter.exs:
--------------------------------------------------------------------------------
 1 | # Used by "mix format" and to export configuration.
 2 | export_locals_without_parens = []
 3 | 
 4 | [
 5 |   inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"],
 6 |   locals_without_parens: export_locals_without_parens,
 7 |   export: [locals_without_parens: export_locals_without_parens],
 8 |   line_length: 80
 9 | ]
10 | 


--------------------------------------------------------------------------------
/.github/workflows/bugfix-reproducer.yml:
--------------------------------------------------------------------------------
 1 | name: "Reproducing Test-Case Detector (experimental)"
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     paths:
 6 |     - '**_test.exs'
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     name: "Test for lib/ changes"
12 |     strategy:
13 |       matrix:
14 |         otp: [22.2]
15 |         elixir: [1.10.4]
16 |     steps:
17 |       - uses: actions/checkout@v2.3.1
18 | 
19 |       - uses: erlef/setup-beam@v1
20 |         with:
21 |           otp-version: ${{matrix.otp}}
22 |           elixir-version: ${{matrix.elixir}}
23 | 
24 |       - run: git fetch origin master:master
25 | 
26 |       - name: Check changes to lib/
27 |         id: check_changes
28 |         run: echo "::set-output name=changes_to_lib::$(git diff --name-only master | grep "^lib")"
29 | 
30 |       - name: There are changes to lib/
31 |         if: "contains(steps.check_changes.outputs.changes_to_lib, 'lib')"
32 |         run: |
33 |           mix deps.get
34 |           sh test/test_if_tests_fail_after_resetting_lib.sh
35 | 
36 |       - name: There are no changes to lib/
37 |         if: "!contains(steps.check_changes.outputs.changes_to_lib, 'lib')"
38 |         run: echo "${{ toJSON(steps.check_changes.outputs.changes_to_lib) }}"
39 | 


--------------------------------------------------------------------------------
/.github/workflows/ci-workflow.yml:
--------------------------------------------------------------------------------
 1 | name: "CI Tests"
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - master
 6 |       - release/*
 7 |   pull_request:
 8 |     branches:
 9 |       - master
10 | 
11 | jobs:
12 |   test:
13 |     runs-on: ubuntu-18.04
14 |     name: "[${{matrix.otp}}/${{matrix.elixir}}] CI Tests on Credo [OTP/Elixir]"
15 |     strategy:
16 |       fail-fast: false
17 |       matrix:
18 |         otp: [20.3, 21.3, 22.3, 23.3, 24.0, 25.1]
19 |         elixir: [1.7.4, 1.8.2, 1.9.4, 1.10.4, 1.11.4, 1.12.2, 1.14.1]
20 |         exclude:
21 |           - otp: 25.1
22 |             elixir: 1.7.4
23 |           - otp: 25.1
24 |             elixir: 1.8.2
25 |           - otp: 25.1
26 |             elixir: 1.9.4
27 |           - otp: 25.1
28 |             elixir: 1.10.4
29 |           - otp: 25.1
30 |             elixir: 1.11.4
31 |           - otp: 25.1
32 |             elixir: 1.12.2
33 |           - otp: 24.0
34 |             elixir: 1.7.4
35 |           - otp: 24.0
36 |             elixir: 1.8.2
37 |           - otp: 24.0
38 |             elixir: 1.9.4
39 |           - otp: 24.0
40 |             elixir: 1.10.4
41 |           - otp: 23.3
42 |             elixir: 1.7.4
43 |           - otp: 23.3
44 |             elixir: 1.8.2
45 |           - otp: 23.3
46 |             elixir: 1.9.4
47 |           - otp: 22.3
48 |             elixir: 1.14.1
49 |           - otp: 21.3
50 |             elixir: 1.12.2
51 |           - otp: 21.3
52 |             elixir: 1.14.1
53 |           - otp: 20.3
54 |             elixir: 1.10.4
55 |           - otp: 20.3
56 |             elixir: 1.11.4
57 |           - otp: 20.3
58 |             elixir: 1.12.2
59 |           - otp: 20.3
60 |             elixir: 1.14.1
61 |     steps:
62 |       - uses: actions/checkout@v2.3.1
63 |         with:
64 |           fetch-depth: 0
65 |       - uses: erlef/setup-beam@v1
66 |         with:
67 |           otp-version: ${{matrix.otp}}
68 |           elixir-version: ${{matrix.elixir}}
69 |       - run: mix deps.get
70 |       - run: mix deps.compile
71 |       - run: mix compile --warnings-as-errors
72 |       - run: mix test
73 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.elixir_ls
2 | /_build
3 | /deps
4 | /docs/all.json
5 | /doc
6 | test.json
7 | erl_crash.dump
8 | *.ez
9 | 


--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | erlang 24.2
2 | elixir 1.13.4-otp-24
3 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | sudo: false
 2 | language: elixir
 3 | elixir:
 4 |   - 1.5.3
 5 |   - 1.6.6
 6 |   - 1.7.2
 7 |   - 1.8.2
 8 |   - 1.9.4
 9 | otp_release:
10 |   - 19.3
11 |   - 20.3
12 |   - 21.3
13 |   - 22.0
14 | script:
15 |   - mix deps.compile
16 |   - mix compile --warnings-as-errors
17 |   - mix test
18 | matrix:
19 |   exclude:
20 |   - elixir: 1.5.3
21 |     otp_release: 21.3
22 |   - elixir: 1.5.3
23 |     otp_release: 22.0
24 |   - elixir: 1.6.6
25 |     otp_release: 22.0
26 |   - elixir: 1.8.2
27 |     otp_release: 19.3
28 |   - elixir: 1.9.4
29 |     otp_release: 19.3
30 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | 
 3 | ## 1.4.3
 4 | 
 5 | - Allow `mochiweb` dep to be `~> 2.15 or ~> 3.1`
 6 | 
 7 | ## 1.4.2
 8 | 
 9 | - Fix regression when parsing schemes from URIs
10 | - Fix compiler warnings
11 | - Add missing `<body>` tag to `HTML5` scrubber
12 | 
13 | ## 1.4.1
14 | 
15 | - Add missing `<h6>` tag to `BasicHTML` and `MarkdownHTML` scrubbers
16 | 
17 | ## 1.4.0
18 | 
19 | - Add more missing HTML5 attributes
20 | - Add "middle" to valid CSS keywords
21 | 
22 | ## 1.3.0
23 | 
24 | - Add valid scheme for links: `mailto`
25 | - Update white-space handling in order to keep more of it untouched
26 | 
27 | ## 1.2.0
28 | 
29 | - Update `mochiweb` version requirement
30 | - Fix missing elements in HTML5: div, caption
31 | 
32 | ## 1.1.1
33 | 
34 | - Fix missing element in HTML5: blockquote
35 | 
36 | ## 1.1.0
37 | 
38 | - Add new scrubber: MarkdownHTML
39 | 
40 |   It is meant to scrub HTML that resulted from converting Markdown to HTML. It
41 |   supports GitHub flavored Markdown (GFM).
42 | 
43 | ## 1.0.1
44 | 
45 | - Fix Elixir 1.3 compiler warnings
46 | 
47 | ## 1.0.0
48 | 
49 | - First release
50 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2014 René Föhring
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining
 4 | a copy of this software and associated documentation files (the
 5 | "Software"), to deal in the Software without restriction, including
 6 | without limitation the rights to use, copy, modify, merge, publish,
 7 | distribute, sublicense, and/or sell copies of the Software, and to
 8 | permit persons to whom the Software is furnished to do so, subject to
 9 | the following conditions:
10 | 
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 | 
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # HtmlSanitizeEx [![Build Status](https://travis-ci.org/rrrene/html_sanitize_ex.svg)](https://travis-ci.org/rrrene/html_sanitize_ex) [![Inline docs](http://inch-ci.org/github/rrrene/html_sanitize_ex.svg?branch=master)](http://inch-ci.org/github/rrrene/html_sanitize_ex)
  2 | 
  3 | `html_sanitize_ex` provides a fast and straightforward HTML Sanitizer written in Elixir which lets you include HTML authored by third-parties in your web application while protecting against XSS.
  4 | 
  5 | It is the first Hex package to come out of the [elixirstatus.com](http://elixirstatus.com) project, where it will be used to sanitize user announcements from the Elixir community.
  6 | 
  7 | 
  8 | 
  9 | ## What can it do?
 10 | 
 11 | `html_sanitize_ex` parses a given HTML string and, based on the used [Scrubber](https://github.com/rrrene/html_sanitize_ex/tree/master/lib/html_sanitize_ex/scrubber), either completely strips it from HTML tags or sanitizes it by only allowing certain HTML elements and attributes to be present.
 12 | 
 13 | **NOTE:** The one thing missing at this moment is ***support for styles***. To add this, we have to implement a Scrubber for CSS, to prevent nasty CSS hacks using `<style>` tags and attributes.
 14 | 
 15 | Otherwise `html_sanitize_ex` is a full-featured HTML sanitizer.
 16 | 
 17 | ## Installation
 18 | 
 19 | Add html_sanitize_ex as a dependency in your `mix.exs` file.
 20 | 
 21 | ```elixir
 22 | defp deps do
 23 |   [{:html_sanitize_ex, "~> 1.4"}]
 24 | end
 25 | ```
 26 | 
 27 | After adding you are done, run `mix deps.get` in your shell to fetch the new dependency.
 28 | 
 29 | The only dependency of `html_sanitize_ex` is `mochiweb` which is used to parse HTML.
 30 | 
 31 | 
 32 | ## Usage
 33 | 
 34 | Depending on the scrubber you select, it can strip all tags from the given string:
 35 | 
 36 |     text = "<a href=\"javascript:alert('XSS');\">text here</a>"
 37 |     HtmlSanitizeEx.strip_tags(text)
 38 |     # => "text here"
 39 | 
 40 | Or allow certain basic HTML elements to remain:
 41 | 
 42 |     text = "<h1>Hello <script>World!</script></h1>"
 43 |     HtmlSanitizeEx.basic_html(text)
 44 |     # => "<h1>Hello World!</h1>"
 45 | 
 46 | There are built-in scrubbers that cover common use cases, but you can also
 47 | easily define custom scrubbers (see the next section).
 48 | 
 49 | The following default scrubbing options exist:
 50 | 
 51 |     HtmlSanitizeEx.basic_html(html)
 52 |     HtmlSanitizeEx.html5(html)
 53 |     HtmlSanitizeEx.markdown_html(html)
 54 |     HtmlSanitizeEx.strip_tags(html)
 55 | 
 56 | There is also one scrubber primarily used for testing:
 57 | 
 58 |     HtmlSanitizeEx.noscrub(html)
 59 | 
 60 | Before using a built-in scrubber, you should verify that it functions in the way
 61 | you expect. The built-in scrubbers are located in
 62 | [/lib/html_sanitize_ex/scrubber](https://github.com/rrrene/html_sanitize_ex/tree/master/lib/html_sanitize_ex/scrubber)
 63 | 
 64 | ## Custom Scrubbers
 65 | 
 66 | A custom scrubber has the advantage of allowing you to support only the minimum
 67 | functionality needed for your use case.
 68 | 
 69 | With a custom scrubber, you define which tags, attributes, and uri schemes (e.g.
 70 | `https`, `mailto`, `javascript`, etc.) are allowed. Anything not allowed can
 71 | then be stripped out.
 72 | 
 73 | There are also utility functions to remove CDATA sections and comments which you
 74 | will generally include.
 75 | 
 76 | Here is an example of a custom scrubber which allows only `p`, `h1`, and
 77 | `a` tags, and restricts the `href` attribute to only the `https` and `mailto`
 78 | [URI schemes](https://en.wikipedia.org/wiki/List_of_URI_schemes). It also
 79 | removes CDATA sections and comments.
 80 | 
 81 | Note that the scrubber should include `Meta.strip_everything_not_covered()` at
 82 | the end.
 83 | 
 84 | 
 85 | ```elixir
 86 | defmodule MyProject.MyScrubber do
 87 |   require HtmlSanitizeEx.Scrubber.Meta
 88 |   alias HtmlSanitizeEx.Scrubber.Meta
 89 | 
 90 |   Meta.remove_cdata_sections_before_scrub()
 91 |   Meta.strip_comments()
 92 | 
 93 |   Meta.allow_tag_with_these_attributes("p", [])
 94 |   Meta.allow_tag_with_these_attributes("h1", [])
 95 |   Meta.allow_tag_with_uri_attributes("a", ["href"], ["https", "mailto"])
 96 | 
 97 |   Meta.strip_everything_not_covered()
 98 | end
 99 | ```
100 | 
101 | Then, you can use the scrubber in your project by giving it as the second
102 | argument to `Scrubber.scrub/2`:
103 | 
104 | ```elixir
105 | defmodule MyProject.MyModule do
106 |   alias HtmlSanitizeEx.Scrubber
107 |   alias MyProject.MyScrubber
108 | 
109 |   def sanitize_html(html) do
110 |     Scrubber.scrub(html, MyScrubber)
111 |   end
112 | end
113 | ```
114 | 
115 | A great way to make a custom scrubber is to use one the of built-in scrubbers
116 | closest to your use case as a template. The built in scrubbers are located in
117 | [/lib/html_sanitize_ex/scrubber](https://github.com/rrrene/html_sanitize_ex/tree/master/lib/html_sanitize_ex/scrubber)
118 | 
119 | 
120 | ## Contributing
121 | 
122 | 1. [Fork it!](http://github.com/rrrene/html_sanitize_ex/fork)
123 | 2. Create your feature branch (`git checkout -b my-new-feature`)
124 | 3. Commit your changes (`git commit -am 'Add some feature'`)
125 | 4. Push to the branch (`git push origin my-new-feature`)
126 | 5. Create new Pull Request
127 | 
128 | 
129 | 
130 | ## Author
131 | 
132 | René Föhring (@rrrene)
133 | 
134 | 
135 | 
136 | 
137 | ## License
138 | 
139 | html_sanitize_ex is released under the MIT License. See the LICENSE file for further
140 | details.
141 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx do
 2 |   alias HtmlSanitizeEx.Scrubber
 3 | 
 4 |   def noscrub(html) do
 5 |     html |> Scrubber.scrub(Scrubber.NoScrub)
 6 |   end
 7 | 
 8 |   def basic_html(html) do
 9 |     html |> Scrubber.scrub(Scrubber.BasicHTML)
10 |   end
11 | 
12 |   def html5(html) do
13 |     html |> Scrubber.scrub(Scrubber.HTML5)
14 |   end
15 | 
16 |   def markdown_html(html) do
17 |     html |> Scrubber.scrub(Scrubber.MarkdownHTML)
18 |   end
19 | 
20 |   def strip_tags(html) do
21 |     html |> Scrubber.scrub(Scrubber.StripTags)
22 |   end
23 | end
24 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/parser.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Parser do
 2 |   @doc """
 3 |   Parses a HTML string.
 4 | 
 5 |   ## Examples
 6 | 
 7 |       iex> HtmlSanitizeEx.Parser.parse("<div class=js-action>hello world</div>")
 8 |       {"div", [{"class", "js-action"}], ["hello world"]}
 9 | 
10 |       iex> HtmlSanitizeEx.Parser.parse("<div>first</div><div>second</div>")
11 |       [{"div", [], ["first"]}, {"div", [], ["second"]}]
12 |   """
13 | 
14 |   @type html_tree :: tuple | list
15 | 
16 |   @my_root_node "html_sanitize_ex"
17 |   @replacement_linebreak [239, 188, 191]
18 |   @replacement_space [239, 189, 191]
19 |   @replacement_tab [239, 190, 191]
20 | 
21 |   @spec parse(binary) :: html_tree
22 | 
23 |   def parse(html) do
24 |     html = "<#{@my_root_node}>#{before_parse(html)}</#{@my_root_node}>"
25 |     {@my_root_node, [], parsed} = :mochiweb_html.parse(html)
26 | 
27 |     if length(parsed) == 1, do: hd(parsed), else: parsed
28 |   end
29 | 
30 |   defp before_parse(html) do
31 |     html
32 |     |> String.replace(~r/(>)(\r?\n)/, "\\1 #{@replacement_linebreak} \\2")
33 |     |> String.replace(~r/(>)(\ +)(<)/, "\\1 #{@replacement_space}\\2\\3")
34 |     |> String.replace(~r/(>)(\t+)(<)/, "\\1 #{@replacement_tab}\\2\\3")
35 |   end
36 | 
37 |   def to_html(tokens) do
38 |     {@my_root_node, [], List.wrap(tokens)}
39 |     |> :mochiweb_html.to_html()
40 |     |> Enum.join()
41 |     |> String.replace(~r/^<#{@my_root_node}>/, "")
42 |     |> String.replace(~r/<\/#{@my_root_node}>$/, "")
43 |     |> String.replace("&lt;/html_sanitize_ex&gt;", "")
44 |     |> after_to_html()
45 |   end
46 | 
47 |   defp after_to_html(html) do
48 |     html
49 |     |> String.replace(~r/(\ ?#{@replacement_linebreak} )(\r?\n)/, "\\2")
50 |     |> String.replace(
51 |       ~r/(\&gt\;|>)(\ +)(#{@replacement_space})(\ +)(\&lt\;|<)/,
52 |       "\\1\\4\\5"
53 |     )
54 |     |> String.replace(
55 |       ~r/(\&gt\;|>)(\ +)(#{@replacement_tab})(\t+)(\&lt\;|<)/,
56 |       "\\1\\4\\5"
57 |     )
58 |   end
59 | 
60 |   @doc false
61 |   def replacement_for_linebreak, do: @replacement_linebreak
62 | 
63 |   @doc false
64 |   def replacement_for_space, do: @replacement_space
65 | 
66 |   @doc false
67 |   def replacement_for_tab, do: @replacement_tab
68 | end
69 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Scrubber do
 2 |   def scrub("", _) do
 3 |     ""
 4 |   end
 5 | 
 6 |   def scrub(nil, _) do
 7 |     ""
 8 |   end
 9 | 
10 |   def scrub(html, scrubber_module) do
11 |     html
12 |     |> scrubber_module.before_scrub
13 |     |> HtmlSanitizeEx.Parser.parse()
14 |     |> HtmlSanitizeEx.Traverser.traverse(scrubber_module)
15 |     |> HtmlSanitizeEx.Parser.to_html()
16 |   end
17 | end
18 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/basic_html.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Scrubber.BasicHTML do
 2 |   @moduledoc """
 3 |   Allows basic HTML tags to support user input for writing relatively
 4 |   plain text but allowing headings, links, bold, and so on.
 5 | 
 6 |   Does not allow any mailto-links, styling, HTML5 tags, video embeds etc.
 7 |   """
 8 | 
 9 |   require HtmlSanitizeEx.Scrubber.Meta
10 |   alias HtmlSanitizeEx.Scrubber.Meta
11 | 
12 |   @valid_schemes ["http", "https", "mailto"]
13 | 
14 |   # Removes any CDATA tags before the traverser/scrubber runs.
15 |   Meta.remove_cdata_sections_before_scrub()
16 | 
17 |   Meta.strip_comments()
18 | 
19 |   Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
20 |   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
21 | 
22 |   Meta.allow_tag_with_these_attributes("b", [])
23 |   Meta.allow_tag_with_these_attributes("blockquote", [])
24 |   Meta.allow_tag_with_these_attributes("br", [])
25 |   Meta.allow_tag_with_these_attributes("code", [])
26 |   Meta.allow_tag_with_these_attributes("del", [])
27 |   Meta.allow_tag_with_these_attributes("em", [])
28 |   Meta.allow_tag_with_these_attributes("h1", [])
29 |   Meta.allow_tag_with_these_attributes("h2", [])
30 |   Meta.allow_tag_with_these_attributes("h3", [])
31 |   Meta.allow_tag_with_these_attributes("h4", [])
32 |   Meta.allow_tag_with_these_attributes("h5", [])
33 |   Meta.allow_tag_with_these_attributes("h6", [])
34 |   Meta.allow_tag_with_these_attributes("hr", [])
35 |   Meta.allow_tag_with_these_attributes("i", [])
36 | 
37 |   Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
38 | 
39 |   Meta.allow_tag_with_these_attributes("img", [
40 |     "width",
41 |     "height",
42 |     "title",
43 |     "alt"
44 |   ])
45 | 
46 |   Meta.allow_tag_with_these_attributes("li", [])
47 |   Meta.allow_tag_with_these_attributes("ol", [])
48 |   Meta.allow_tag_with_these_attributes("p", [])
49 |   Meta.allow_tag_with_these_attributes("pre", [])
50 |   Meta.allow_tag_with_these_attributes("span", [])
51 |   Meta.allow_tag_with_these_attributes("strong", [])
52 |   Meta.allow_tag_with_these_attributes("table", [])
53 |   Meta.allow_tag_with_these_attributes("tbody", [])
54 |   Meta.allow_tag_with_these_attributes("td", [])
55 |   Meta.allow_tag_with_these_attributes("th", [])
56 |   Meta.allow_tag_with_these_attributes("thead", [])
57 |   Meta.allow_tag_with_these_attributes("tr", [])
58 |   Meta.allow_tag_with_these_attributes("u", [])
59 |   Meta.allow_tag_with_these_attributes("ul", [])
60 | 
61 |   Meta.strip_everything_not_covered()
62 | end
63 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/css.ex:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeEx.Scrubber.CSS do
  2 |   @moduledoc """
  3 |   Scrub CSS.
  4 |   """
  5 | 
  6 |   def scrub(nil), do: ""
  7 | 
  8 |   def scrub(text) do
  9 |     text = String.replace(text, ~r/(\/\*|\*\/|<!--|-->)/, " ")
 10 | 
 11 |     Regex.replace(~r/([-\w]+)\s*:\s*([^:;]*)/, text, fn _all, a, b ->
 12 |       case scrub_css(a, b) do
 13 |         {property, value} -> "#{property}: #{value}"
 14 |         nil -> ""
 15 |       end
 16 |     end)
 17 |   end
 18 | 
 19 |   defp scrub_css("azimuth", val), do: validate({"azimuth", scrub_val(val)})
 20 | 
 21 |   defp scrub_css("background-color", val),
 22 |     do: validate({"background-color", scrub_val(val)})
 23 | 
 24 |   defp scrub_css("border-bottom-color", val),
 25 |     do: validate({"border-bottom-color", scrub_val(val)})
 26 | 
 27 |   defp scrub_css("border-collapse", val),
 28 |     do: validate({"border-collapse", scrub_val(val)})
 29 | 
 30 |   defp scrub_css("border-color", val),
 31 |     do: validate({"border-color", scrub_val(val)})
 32 | 
 33 |   defp scrub_css("border-left-color", val),
 34 |     do: validate({"border-left-color", scrub_val(val)})
 35 | 
 36 |   defp scrub_css("border-right-color", val),
 37 |     do: validate({"border-right-color", scrub_val(val)})
 38 | 
 39 |   defp scrub_css("border-top-color", val),
 40 |     do: validate({"border-top-color", scrub_val(val)})
 41 | 
 42 |   defp scrub_css("clear", val), do: validate({"clear", scrub_val(val)})
 43 |   defp scrub_css("color", val), do: validate({"color", scrub_val(val)})
 44 |   defp scrub_css("cursor", val), do: validate({"cursor", scrub_val(val)})
 45 |   defp scrub_css("direction", val), do: validate({"direction", scrub_val(val)})
 46 |   defp scrub_css("display", val), do: validate({"display", scrub_val(val)})
 47 |   defp scrub_css("elevation", val), do: validate({"elevation", scrub_val(val)})
 48 |   defp scrub_css("float", val), do: validate({"float", scrub_val(val)})
 49 |   defp scrub_css("font", val), do: validate({"font", scrub_val(val)})
 50 | 
 51 |   defp scrub_css("font-family", val),
 52 |     do: validate({"font-family", scrub_val(val)})
 53 | 
 54 |   defp scrub_css("font-size", val), do: validate({"font-size", scrub_val(val)})
 55 | 
 56 |   defp scrub_css("font-style", val),
 57 |     do: validate({"font-style", scrub_val(val)})
 58 | 
 59 |   defp scrub_css("font-variant", val),
 60 |     do: validate({"font-variant", scrub_val(val)})
 61 | 
 62 |   defp scrub_css("font-weight", val),
 63 |     do: validate({"font-weight", scrub_val(val)})
 64 | 
 65 |   defp scrub_css("height", val), do: validate({"height", scrub_val(val)})
 66 | 
 67 |   defp scrub_css("letter-spacing", val),
 68 |     do: validate({"letter-spacing", scrub_val(val)})
 69 | 
 70 |   defp scrub_css("line-height", val),
 71 |     do: validate({"line-height", scrub_val(val)})
 72 | 
 73 |   defp scrub_css("overflow", val), do: validate({"overflow", scrub_val(val)})
 74 |   defp scrub_css("pause", val), do: validate({"pause", scrub_val(val)})
 75 | 
 76 |   defp scrub_css("pause-after", val),
 77 |     do: validate({"pause-after", scrub_val(val)})
 78 | 
 79 |   defp scrub_css("pause-before", val),
 80 |     do: validate({"pause-before", scrub_val(val)})
 81 | 
 82 |   defp scrub_css("pitch", val), do: validate({"pitch", scrub_val(val)})
 83 | 
 84 |   defp scrub_css("pitch-range", val),
 85 |     do: validate({"pitch-range", scrub_val(val)})
 86 | 
 87 |   defp scrub_css("richness", val), do: validate({"richness", scrub_val(val)})
 88 |   defp scrub_css("speak", val), do: validate({"speak", scrub_val(val)})
 89 | 
 90 |   defp scrub_css("speak-header", val),
 91 |     do: validate({"speak-header", scrub_val(val)})
 92 | 
 93 |   defp scrub_css("speak-numeral", val),
 94 |     do: validate({"speak-numeral", scrub_val(val)})
 95 | 
 96 |   defp scrub_css("speak-punctuation", val),
 97 |     do: validate({"speak-punctuation", scrub_val(val)})
 98 | 
 99 |   defp scrub_css("speech-rate", val),
100 |     do: validate({"speech-rate", scrub_val(val)})
101 | 
102 |   defp scrub_css("stress", val), do: validate({"stress", scrub_val(val)})
103 | 
104 |   defp scrub_css("text-align", val),
105 |     do: validate({"text-align", scrub_val(val)})
106 | 
107 |   defp scrub_css("text-decoration", val),
108 |     do: validate({"text-decoration", scrub_val(val)})
109 | 
110 |   defp scrub_css("text-indent", val),
111 |     do: validate({"text-indent", scrub_val(val)})
112 | 
113 |   defp scrub_css("unicode-bidi", val),
114 |     do: validate({"unicode-bidi", scrub_val(val)})
115 | 
116 |   defp scrub_css("vertical-align", val),
117 |     do: validate({"vertical-align", scrub_val(val)})
118 | 
119 |   defp scrub_css("voice-family", val),
120 |     do: validate({"voice-family", scrub_val(val)})
121 | 
122 |   defp scrub_css("volume", val), do: validate({"volume", scrub_val(val)})
123 | 
124 |   defp scrub_css("white-space", val),
125 |     do: validate({"white-space", scrub_val(val)})
126 | 
127 |   defp scrub_css("width", val), do: validate({"width", scrub_val(val)})
128 | 
129 |   defp scrub_css("background", val),
130 |     do: validate({"background", scrub_val(val)})
131 | 
132 |   defp scrub_css("background-" <> prop, val),
133 |     do: validate({"background-#{prop}", scrub_val(val)})
134 | 
135 |   defp scrub_css("border", val), do: validate({"border", scrub_val(val)})
136 | 
137 |   defp scrub_css("border-" <> prop, val),
138 |     do: validate({"border-#{prop}", scrub_val(val)})
139 | 
140 |   defp scrub_css("margin", val), do: validate({"margin", scrub_val(val)})
141 | 
142 |   defp scrub_css("margin-" <> prop, val),
143 |     do: validate({"margin-#{prop}", scrub_val(val)})
144 | 
145 |   defp scrub_css("padding", val), do: validate({"padding", scrub_val(val)})
146 | 
147 |   defp scrub_css("padding-" <> prop, val),
148 |     do: validate({"padding-#{prop}", scrub_val(val)})
149 | 
150 |   defp scrub_css(_, _), do: nil
151 | 
152 |   defp validate({_property, ""}), do: nil
153 |   defp validate({property, val}), do: {property, val}
154 | 
155 |   defp scrub_val(val) do
156 |     val = if String.match?(val, ~r/(\\|&)/), do: "", else: val
157 | 
158 |     Regex.replace(~r/(\S+)/, val, fn _all, a ->
159 |       if(allowed_keyword?(a) || measured_unit?(a), do: a, else: "")
160 |     end)
161 |   end
162 | 
163 |   @allowed_keywords [
164 |     "auto",
165 |     "aqua",
166 |     "black",
167 |     "block",
168 |     "blue",
169 |     "bold",
170 |     "both",
171 |     "bottom",
172 |     "brown",
173 |     "center",
174 |     "collapse",
175 |     "dashed",
176 |     "dotted",
177 |     "fuchsia",
178 |     "gray",
179 |     "green",
180 |     "!important",
181 |     "italic",
182 |     "left",
183 |     "lime",
184 |     "maroon",
185 |     "medium",
186 |     "middle",
187 |     "none",
188 |     "navy",
189 |     "normal",
190 |     "nowrap",
191 |     "olive",
192 |     "pointer",
193 |     "purple",
194 |     "red",
195 |     "right",
196 |     "solid",
197 |     "silver",
198 |     "teal",
199 |     "top",
200 |     "transparent",
201 |     "underline",
202 |     "white",
203 |     "yellow"
204 |   ]
205 | 
206 |   def allowed_keyword?(val) do
207 |     Enum.member?(@allowed_keywords, String.downcase(val))
208 |   end
209 | 
210 |   defp measured_unit?(val) do
211 |     String.match?(
212 |       val,
213 |       ~r/\A(#[0-9a-f]+|rgb\(\d+%?,\d*%?,?\d*%?\)?|-?\d{0,2}\.?\d{0,2}(cm|em|ex|in|mm|pc|pt|px|%|,|\))?)\z/
214 |     )
215 |   end
216 | end
217 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/html5.ex:
--------------------------------------------------------------------------------
   1 | #
   2 | # This is not yet ready in any way.
   3 | #
   4 | #
   5 | # TODOS:
   6 | #
   7 | # - add these attributes allowed on all elements:
   8 | #
   9 | #     aria-* data-* style
  10 | #
  11 | # - sanitize css in style= attributes
  12 | # - sanitize css in <style> tags
  13 | # - sanitize all URL containing fields regardless of element
  14 | #
  15 | defmodule HtmlSanitizeEx.Scrubber.HTML5 do
  16 |   @moduledoc """
  17 |   Allows all HTML5 tags to support user input.
  18 | 
  19 |   Sanitizes all malicious content.
  20 |   """
  21 | 
  22 |   require HtmlSanitizeEx.Scrubber.Meta
  23 |   alias HtmlSanitizeEx.Scrubber.Meta
  24 | 
  25 |   # Removes any CDATA tags before the traverser/scrubber runs.
  26 |   Meta.remove_cdata_sections_before_scrub()
  27 | 
  28 |   Meta.strip_comments()
  29 | 
  30 |   @valid_schemes ["http", "https", "mailto"]
  31 | 
  32 |   Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
  33 | 
  34 |   Meta.allow_tag_with_these_attributes("a", [
  35 |     "accesskey",
  36 |     "class",
  37 |     "contenteditable",
  38 |     "contextmenu",
  39 |     "dir",
  40 |     "draggable",
  41 |     "dropzone",
  42 |     "hidden",
  43 |     "id",
  44 |     "inert",
  45 |     "itemid",
  46 |     "itemprop",
  47 |     "itemref",
  48 |     "itemscope",
  49 |     "itemtype",
  50 |     "lang",
  51 |     "role",
  52 |     "spellcheck",
  53 |     "tabindex",
  54 |     "title",
  55 |     "translate",
  56 |     "target",
  57 |     "ping",
  58 |     "rel",
  59 |     "media",
  60 |     "hreflang",
  61 |     "type"
  62 |   ])
  63 | 
  64 |   Meta.allow_tag_with_these_attributes("article", [
  65 |     "accesskey",
  66 |     "autocapitalize",
  67 |     "class",
  68 |     "contenteditable",
  69 |     "contextmenu",
  70 |     "dir",
  71 |     "draggable",
  72 |     "dropzone",
  73 |     "hidden",
  74 |     "id",
  75 |     "inputmode",
  76 |     "is",
  77 |     "itemid",
  78 |     "itemprop",
  79 |     "itemref",
  80 |     "itemscope",
  81 |     "itemtype",
  82 |     "lang",
  83 |     "slot",
  84 |     "spellcheck",
  85 |     "style",
  86 |     "tabindex",
  87 |     "title",
  88 |     "translate"
  89 |   ])
  90 | 
  91 |   Meta.allow_tag_with_these_attributes("aside", [
  92 |     "accesskey",
  93 |     "autocapitalize",
  94 |     "class",
  95 |     "contenteditable",
  96 |     "contextmenu",
  97 |     "dir",
  98 |     "draggable",
  99 |     "dropzone",
 100 |     "hidden",
 101 |     "id",
 102 |     "inputmode",
 103 |     "is",
 104 |     "itemid",
 105 |     "itemprop",
 106 |     "itemref",
 107 |     "itemscope",
 108 |     "itemtype",
 109 |     "lang",
 110 |     "slot",
 111 |     "spellcheck",
 112 |     "style",
 113 |     "tabindex",
 114 |     "title",
 115 |     "translate"
 116 |   ])
 117 | 
 118 |   Meta.allow_tag_with_these_attributes("b", [
 119 |     "accesskey",
 120 |     "class",
 121 |     "contenteditable",
 122 |     "contextmenu",
 123 |     "dir",
 124 |     "draggable",
 125 |     "dropzone",
 126 |     "hidden",
 127 |     "id",
 128 |     "inert",
 129 |     "itemid",
 130 |     "itemprop",
 131 |     "itemref",
 132 |     "itemscope",
 133 |     "itemtype",
 134 |     "lang",
 135 |     "role",
 136 |     "spellcheck",
 137 |     "tabindex",
 138 |     "title",
 139 |     "translate"
 140 |   ])
 141 | 
 142 |   Meta.allow_tag_with_these_attributes("blockquote", [
 143 |     "accesskey",
 144 |     "cite",
 145 |     "class",
 146 |     "contenteditable",
 147 |     "contextmenu",
 148 |     "dir",
 149 |     "draggable",
 150 |     "dropzone",
 151 |     "hidden",
 152 |     "id",
 153 |     "itemid",
 154 |     "itemprop",
 155 |     "itemref",
 156 |     "itemscope",
 157 |     "itemtype",
 158 |     "lang",
 159 |     "spellcheck",
 160 |     "tabindex",
 161 |     "title",
 162 |     "translate"
 163 |   ])
 164 | 
 165 |   Meta.allow_tag_with_these_attributes("body", [
 166 |     "accesskey",
 167 |     "class",
 168 |     "contenteditable",
 169 |     "contextmenu",
 170 |     "dir",
 171 |     "draggable",
 172 |     "dropzone",
 173 |     "hidden",
 174 |     "id",
 175 |     "inert",
 176 |     "itemid",
 177 |     "itemprop",
 178 |     "itemref",
 179 |     "itemscope",
 180 |     "itemtype",
 181 |     "lang",
 182 |     "role",
 183 |     "spellcheck",
 184 |     "tabindex",
 185 |     "title",
 186 |     "translate"
 187 |   ])
 188 | 
 189 |   Meta.allow_tag_with_these_attributes("br", [
 190 |     "accesskey",
 191 |     "class",
 192 |     "contenteditable",
 193 |     "contextmenu",
 194 |     "dir",
 195 |     "draggable",
 196 |     "dropzone",
 197 |     "hidden",
 198 |     "id",
 199 |     "itemid",
 200 |     "itemprop",
 201 |     "itemref",
 202 |     "itemscope",
 203 |     "itemtype",
 204 |     "lang",
 205 |     "spellcheck",
 206 |     "tabindex",
 207 |     "title",
 208 |     "translate"
 209 |   ])
 210 | 
 211 |   Meta.allow_tag_with_these_attributes("caption", [
 212 |     "accesskey",
 213 |     "class",
 214 |     "contenteditable",
 215 |     "contextmenu",
 216 |     "dir",
 217 |     "draggable",
 218 |     "dropzone",
 219 |     "hidden",
 220 |     "id",
 221 |     "inert",
 222 |     "itemid",
 223 |     "itemprop",
 224 |     "itemref",
 225 |     "itemscope",
 226 |     "itemtype",
 227 |     "lang",
 228 |     "role",
 229 |     "spellcheck",
 230 |     "tabindex",
 231 |     "title",
 232 |     "translate"
 233 |   ])
 234 | 
 235 |   Meta.allow_tag_with_these_attributes("code", [
 236 |     "accesskey",
 237 |     "class",
 238 |     "contenteditable",
 239 |     "contextmenu",
 240 |     "dir",
 241 |     "draggable",
 242 |     "dropzone",
 243 |     "hidden",
 244 |     "id",
 245 |     "itemid",
 246 |     "itemprop",
 247 |     "itemref",
 248 |     "itemscope",
 249 |     "itemtype",
 250 |     "lang",
 251 |     "spellcheck",
 252 |     "tabindex",
 253 |     "title",
 254 |     "translate"
 255 |   ])
 256 | 
 257 |   Meta.allow_tag_with_these_attributes("del", [
 258 |     "accesskey",
 259 |     "cite",
 260 |     "datetime",
 261 |     "class",
 262 |     "contenteditable",
 263 |     "contextmenu",
 264 |     "dir",
 265 |     "draggable",
 266 |     "dropzone",
 267 |     "hidden",
 268 |     "id",
 269 |     "itemid",
 270 |     "itemprop",
 271 |     "itemref",
 272 |     "itemscope",
 273 |     "itemtype",
 274 |     "lang",
 275 |     "spellcheck",
 276 |     "tabindex",
 277 |     "title",
 278 |     "translate"
 279 |   ])
 280 | 
 281 |   Meta.allow_tag_with_these_attributes("div", [
 282 |     "accesskey",
 283 |     "class",
 284 |     "contenteditable",
 285 |     "contextmenu",
 286 |     "dir",
 287 |     "draggable",
 288 |     "dropzone",
 289 |     "hidden",
 290 |     "id",
 291 |     "inert",
 292 |     "itemid",
 293 |     "itemprop",
 294 |     "itemref",
 295 |     "itemscope",
 296 |     "itemtype",
 297 |     "lang",
 298 |     "role",
 299 |     "spellcheck",
 300 |     "tabindex",
 301 |     "title",
 302 |     "translate"
 303 |   ])
 304 | 
 305 |   Meta.allow_tag_with_these_attributes("em", [
 306 |     "accesskey",
 307 |     "class",
 308 |     "contenteditable",
 309 |     "contextmenu",
 310 |     "dir",
 311 |     "draggable",
 312 |     "dropzone",
 313 |     "hidden",
 314 |     "id",
 315 |     "itemid",
 316 |     "itemprop",
 317 |     "itemref",
 318 |     "itemscope",
 319 |     "itemtype",
 320 |     "lang",
 321 |     "spellcheck",
 322 |     "tabindex",
 323 |     "title",
 324 |     "translate"
 325 |   ])
 326 | 
 327 |   Meta.allow_tag_with_these_attributes("footer", [
 328 |     "accesskey",
 329 |     "autocapitalize",
 330 |     "class",
 331 |     "contenteditable",
 332 |     "contextmenu",
 333 |     "dir",
 334 |     "draggable",
 335 |     "dropzone",
 336 |     "hidden",
 337 |     "id",
 338 |     "inputmode",
 339 |     "is",
 340 |     "itemid",
 341 |     "itemprop",
 342 |     "itemref",
 343 |     "itemscope",
 344 |     "itemtype",
 345 |     "lang",
 346 |     "slot",
 347 |     "spellcheck",
 348 |     "style",
 349 |     "tabindex",
 350 |     "title",
 351 |     "translate"
 352 |   ])
 353 | 
 354 |   Meta.allow_tag_with_these_attributes("h1", [
 355 |     "accesskey",
 356 |     "class",
 357 |     "contenteditable",
 358 |     "contextmenu",
 359 |     "dir",
 360 |     "draggable",
 361 |     "dropzone",
 362 |     "hidden",
 363 |     "id",
 364 |     "inert",
 365 |     "itemid",
 366 |     "itemprop",
 367 |     "itemref",
 368 |     "itemscope",
 369 |     "itemtype",
 370 |     "lang",
 371 |     "role",
 372 |     "spellcheck",
 373 |     "tabindex",
 374 |     "title",
 375 |     "translate"
 376 |   ])
 377 | 
 378 |   Meta.allow_tag_with_these_attributes("h2", [
 379 |     "accesskey",
 380 |     "class",
 381 |     "contenteditable",
 382 |     "contextmenu",
 383 |     "dir",
 384 |     "draggable",
 385 |     "dropzone",
 386 |     "hidden",
 387 |     "id",
 388 |     "inert",
 389 |     "itemid",
 390 |     "itemprop",
 391 |     "itemref",
 392 |     "itemscope",
 393 |     "itemtype",
 394 |     "lang",
 395 |     "role",
 396 |     "spellcheck",
 397 |     "tabindex",
 398 |     "title",
 399 |     "translate"
 400 |   ])
 401 | 
 402 |   Meta.allow_tag_with_these_attributes("h3", [
 403 |     "accesskey",
 404 |     "class",
 405 |     "contenteditable",
 406 |     "contextmenu",
 407 |     "dir",
 408 |     "draggable",
 409 |     "dropzone",
 410 |     "hidden",
 411 |     "id",
 412 |     "inert",
 413 |     "itemid",
 414 |     "itemprop",
 415 |     "itemref",
 416 |     "itemscope",
 417 |     "itemtype",
 418 |     "lang",
 419 |     "role",
 420 |     "spellcheck",
 421 |     "tabindex",
 422 |     "title",
 423 |     "translate"
 424 |   ])
 425 | 
 426 |   Meta.allow_tag_with_these_attributes("h4", [
 427 |     "accesskey",
 428 |     "class",
 429 |     "contenteditable",
 430 |     "contextmenu",
 431 |     "dir",
 432 |     "draggable",
 433 |     "dropzone",
 434 |     "hidden",
 435 |     "id",
 436 |     "inert",
 437 |     "itemid",
 438 |     "itemprop",
 439 |     "itemref",
 440 |     "itemscope",
 441 |     "itemtype",
 442 |     "lang",
 443 |     "role",
 444 |     "spellcheck",
 445 |     "tabindex",
 446 |     "title",
 447 |     "translate"
 448 |   ])
 449 | 
 450 |   Meta.allow_tag_with_these_attributes("h5", [
 451 |     "accesskey",
 452 |     "class",
 453 |     "contenteditable",
 454 |     "contextmenu",
 455 |     "dir",
 456 |     "draggable",
 457 |     "dropzone",
 458 |     "hidden",
 459 |     "id",
 460 |     "inert",
 461 |     "itemid",
 462 |     "itemprop",
 463 |     "itemref",
 464 |     "itemscope",
 465 |     "itemtype",
 466 |     "lang",
 467 |     "role",
 468 |     "spellcheck",
 469 |     "tabindex",
 470 |     "title",
 471 |     "translate"
 472 |   ])
 473 | 
 474 |   Meta.allow_tag_with_these_attributes("h6", [
 475 |     "accesskey",
 476 |     "class",
 477 |     "contenteditable",
 478 |     "contextmenu",
 479 |     "dir",
 480 |     "draggable",
 481 |     "dropzone",
 482 |     "hidden",
 483 |     "id",
 484 |     "inert",
 485 |     "itemid",
 486 |     "itemprop",
 487 |     "itemref",
 488 |     "itemscope",
 489 |     "itemtype",
 490 |     "lang",
 491 |     "role",
 492 |     "spellcheck",
 493 |     "tabindex",
 494 |     "title",
 495 |     "translate"
 496 |   ])
 497 | 
 498 |   Meta.allow_tag_with_these_attributes("head", [
 499 |     "accesskey",
 500 |     "class",
 501 |     "contenteditable",
 502 |     "contextmenu",
 503 |     "dir",
 504 |     "draggable",
 505 |     "dropzone",
 506 |     "hidden",
 507 |     "id",
 508 |     "inert",
 509 |     "itemid",
 510 |     "itemprop",
 511 |     "itemref",
 512 |     "itemscope",
 513 |     "itemtype",
 514 |     "lang",
 515 |     "role",
 516 |     "spellcheck",
 517 |     "tabindex",
 518 |     "title",
 519 |     "translate"
 520 |   ])
 521 | 
 522 |   Meta.allow_tag_with_these_attributes("header", [
 523 |     "accesskey",
 524 |     "class",
 525 |     "contenteditable",
 526 |     "contextmenu",
 527 |     "dir",
 528 |     "draggable",
 529 |     "dropzone",
 530 |     "hidden",
 531 |     "id",
 532 |     "inert",
 533 |     "itemid",
 534 |     "itemprop",
 535 |     "itemref",
 536 |     "itemscope",
 537 |     "itemtype",
 538 |     "lang",
 539 |     "role",
 540 |     "spellcheck",
 541 |     "tabindex",
 542 |     "title",
 543 |     "translate"
 544 |   ])
 545 | 
 546 |   Meta.allow_tag_with_these_attributes("hgroup", [
 547 |     "accesskey",
 548 |     "class",
 549 |     "contenteditable",
 550 |     "contextmenu",
 551 |     "dir",
 552 |     "draggable",
 553 |     "dropzone",
 554 |     "hidden",
 555 |     "id",
 556 |     "inert",
 557 |     "itemid",
 558 |     "itemprop",
 559 |     "itemref",
 560 |     "itemscope",
 561 |     "itemtype",
 562 |     "lang",
 563 |     "role",
 564 |     "spellcheck",
 565 |     "tabindex",
 566 |     "title",
 567 |     "translate"
 568 |   ])
 569 | 
 570 |   Meta.allow_tag_with_these_attributes("hr", [
 571 |     "accesskey",
 572 |     "class",
 573 |     "contenteditable",
 574 |     "contextmenu",
 575 |     "dir",
 576 |     "draggable",
 577 |     "dropzone",
 578 |     "hidden",
 579 |     "id",
 580 |     "inert",
 581 |     "itemid",
 582 |     "itemprop",
 583 |     "itemref",
 584 |     "itemscope",
 585 |     "itemtype",
 586 |     "lang",
 587 |     "role",
 588 |     "spellcheck",
 589 |     "tabindex",
 590 |     "title",
 591 |     "translate"
 592 |   ])
 593 | 
 594 |   Meta.allow_tag_with_these_attributes("html", [
 595 |     "accesskey",
 596 |     "class",
 597 |     "contenteditable",
 598 |     "contextmenu",
 599 |     "dir",
 600 |     "draggable",
 601 |     "dropzone",
 602 |     "hidden",
 603 |     "id",
 604 |     "inert",
 605 |     "itemid",
 606 |     "itemprop",
 607 |     "itemref",
 608 |     "itemscope",
 609 |     "itemtype",
 610 |     "lang",
 611 |     "role",
 612 |     "spellcheck",
 613 |     "tabindex",
 614 |     "title",
 615 |     "translate",
 616 |     "manifest"
 617 |   ])
 618 | 
 619 |   Meta.allow_tag_with_these_attributes("i", [
 620 |     "accesskey",
 621 |     "class",
 622 |     "contenteditable",
 623 |     "contextmenu",
 624 |     "dir",
 625 |     "draggable",
 626 |     "dropzone",
 627 |     "hidden",
 628 |     "id",
 629 |     "inert",
 630 |     "itemid",
 631 |     "itemprop",
 632 |     "itemref",
 633 |     "itemscope",
 634 |     "itemtype",
 635 |     "lang",
 636 |     "role",
 637 |     "spellcheck",
 638 |     "tabindex",
 639 |     "title",
 640 |     "translate"
 641 |   ])
 642 | 
 643 |   Meta.allow_tag_with_uri_attributes("iframe", ["src"], @valid_schemes)
 644 | 
 645 |   Meta.allow_tag_with_these_attributes("iframe", [
 646 |     "accesskey",
 647 |     "class",
 648 |     "contenteditable",
 649 |     "contextmenu",
 650 |     "dir",
 651 |     "draggable",
 652 |     "dropzone",
 653 |     "hidden",
 654 |     "id",
 655 |     "inert",
 656 |     "itemid",
 657 |     "itemprop",
 658 |     "itemref",
 659 |     "itemscope",
 660 |     "itemtype",
 661 |     "lang",
 662 |     "role",
 663 |     "spellcheck",
 664 |     "tabindex",
 665 |     "title",
 666 |     "translate",
 667 |     "name",
 668 |     "sandbox",
 669 |     "seamless",
 670 |     "width",
 671 |     "height"
 672 |   ])
 673 | 
 674 |   Meta.allow_tag_with_uri_attributes(
 675 |     "img",
 676 |     ["src", "lowsrc", "srcset"],
 677 |     @valid_schemes
 678 |   )
 679 | 
 680 |   Meta.allow_tag_with_these_attributes("img", [
 681 |     "accesskey",
 682 |     "class",
 683 |     "contenteditable",
 684 |     "contextmenu",
 685 |     "dir",
 686 |     "draggable",
 687 |     "dropzone",
 688 |     "hidden",
 689 |     "id",
 690 |     "inert",
 691 |     "itemid",
 692 |     "itemprop",
 693 |     "itemref",
 694 |     "itemscope",
 695 |     "itemtype",
 696 |     "lang",
 697 |     "role",
 698 |     "spellcheck",
 699 |     "tabindex",
 700 |     "title",
 701 |     "translate",
 702 |     "alt",
 703 |     "crossorigin",
 704 |     "usemap",
 705 |     "ismap",
 706 |     "width",
 707 |     "height",
 708 |     "sizes"
 709 |   ])
 710 | 
 711 |   Meta.allow_tag_with_uri_attributes("input", ["src"], @valid_schemes)
 712 | 
 713 |   Meta.allow_tag_with_these_attributes("input", [
 714 |     "accesskey",
 715 |     "class",
 716 |     "contenteditable",
 717 |     "contextmenu",
 718 |     "dir",
 719 |     "draggable",
 720 |     "dropzone",
 721 |     "hidden",
 722 |     "id",
 723 |     "inert",
 724 |     "itemid",
 725 |     "itemprop",
 726 |     "itemref",
 727 |     "itemscope",
 728 |     "itemtype",
 729 |     "lang",
 730 |     "role",
 731 |     "spellcheck",
 732 |     "tabindex",
 733 |     "title",
 734 |     "translate",
 735 |     "accept",
 736 |     "alt",
 737 |     "autocomplete",
 738 |     "autofocus",
 739 |     "checked",
 740 |     "dirname",
 741 |     "disabled",
 742 |     "form",
 743 |     "formaction",
 744 |     "formenctype",
 745 |     "formmethod",
 746 |     "formnovalidate",
 747 |     "formtarget",
 748 |     "height",
 749 |     "inputmode",
 750 |     "list",
 751 |     "max",
 752 |     "maxlength",
 753 |     "min",
 754 |     "multiple",
 755 |     "name",
 756 |     "pattern",
 757 |     "placeholder",
 758 |     "readonly",
 759 |     "required",
 760 |     "size",
 761 |     "step",
 762 |     "type",
 763 |     "value",
 764 |     "width"
 765 |   ])
 766 | 
 767 |   Meta.allow_tag_with_these_attributes("ins", [
 768 |     "accesskey",
 769 |     "class",
 770 |     "contenteditable",
 771 |     "contextmenu",
 772 |     "dir",
 773 |     "draggable",
 774 |     "dropzone",
 775 |     "hidden",
 776 |     "id",
 777 |     "inert",
 778 |     "itemid",
 779 |     "itemprop",
 780 |     "itemref",
 781 |     "itemscope",
 782 |     "itemtype",
 783 |     "lang",
 784 |     "role",
 785 |     "spellcheck",
 786 |     "tabindex",
 787 |     "title",
 788 |     "translate",
 789 |     "cite",
 790 |     "datetime"
 791 |   ])
 792 | 
 793 |   Meta.allow_tag_with_these_attributes("kbd", [
 794 |     "accesskey",
 795 |     "class",
 796 |     "contenteditable",
 797 |     "contextmenu",
 798 |     "dir",
 799 |     "draggable",
 800 |     "dropzone",
 801 |     "hidden",
 802 |     "id",
 803 |     "inert",
 804 |     "itemid",
 805 |     "itemprop",
 806 |     "itemref",
 807 |     "itemscope",
 808 |     "itemtype",
 809 |     "lang",
 810 |     "role",
 811 |     "spellcheck",
 812 |     "tabindex",
 813 |     "title",
 814 |     "translate"
 815 |   ])
 816 | 
 817 |   Meta.allow_tag_with_these_attributes("keygen", [
 818 |     "accesskey",
 819 |     "class",
 820 |     "contenteditable",
 821 |     "contextmenu",
 822 |     "dir",
 823 |     "draggable",
 824 |     "dropzone",
 825 |     "hidden",
 826 |     "id",
 827 |     "inert",
 828 |     "itemid",
 829 |     "itemprop",
 830 |     "itemref",
 831 |     "itemscope",
 832 |     "itemtype",
 833 |     "lang",
 834 |     "role",
 835 |     "spellcheck",
 836 |     "tabindex",
 837 |     "title",
 838 |     "translate",
 839 |     "autofocus",
 840 |     "challenge",
 841 |     "disabled",
 842 |     "form",
 843 |     "keytype",
 844 |     "name"
 845 |   ])
 846 | 
 847 |   Meta.allow_tag_with_these_attributes("label", [
 848 |     "accesskey",
 849 |     "class",
 850 |     "contenteditable",
 851 |     "contextmenu",
 852 |     "dir",
 853 |     "draggable",
 854 |     "dropzone",
 855 |     "hidden",
 856 |     "id",
 857 |     "inert",
 858 |     "itemid",
 859 |     "itemprop",
 860 |     "itemref",
 861 |     "itemscope",
 862 |     "itemtype",
 863 |     "lang",
 864 |     "role",
 865 |     "spellcheck",
 866 |     "tabindex",
 867 |     "title",
 868 |     "translate",
 869 |     "form",
 870 |     "for"
 871 |   ])
 872 | 
 873 |   Meta.allow_tag_with_these_attributes("legend", [
 874 |     "accesskey",
 875 |     "class",
 876 |     "contenteditable",
 877 |     "contextmenu",
 878 |     "dir",
 879 |     "draggable",
 880 |     "dropzone",
 881 |     "hidden",
 882 |     "id",
 883 |     "inert",
 884 |     "itemid",
 885 |     "itemprop",
 886 |     "itemref",
 887 |     "itemscope",
 888 |     "itemtype",
 889 |     "lang",
 890 |     "role",
 891 |     "spellcheck",
 892 |     "tabindex",
 893 |     "title",
 894 |     "translate"
 895 |   ])
 896 | 
 897 |   Meta.allow_tag_with_these_attributes("li", [
 898 |     "accesskey",
 899 |     "class",
 900 |     "contenteditable",
 901 |     "contextmenu",
 902 |     "dir",
 903 |     "draggable",
 904 |     "dropzone",
 905 |     "hidden",
 906 |     "id",
 907 |     "inert",
 908 |     "itemid",
 909 |     "itemprop",
 910 |     "itemref",
 911 |     "itemscope",
 912 |     "itemtype",
 913 |     "lang",
 914 |     "role",
 915 |     "spellcheck",
 916 |     "tabindex",
 917 |     "title",
 918 |     "translate",
 919 |     "value"
 920 |   ])
 921 | 
 922 |   # Meta.allow_tag_with_uri_attributes   "link", ["href"], @valid_schemes
 923 |   # Meta.allow_tag_with_these_attributes "link", ["href rel media hreflang type sizes"]
 924 | 
 925 |   Meta.allow_tag_with_these_attributes("map", [
 926 |     "accesskey",
 927 |     "class",
 928 |     "contenteditable",
 929 |     "contextmenu",
 930 |     "dir",
 931 |     "draggable",
 932 |     "dropzone",
 933 |     "hidden",
 934 |     "id",
 935 |     "inert",
 936 |     "itemid",
 937 |     "itemprop",
 938 |     "itemref",
 939 |     "itemscope",
 940 |     "itemtype",
 941 |     "lang",
 942 |     "role",
 943 |     "spellcheck",
 944 |     "tabindex",
 945 |     "title",
 946 |     "translate",
 947 |     "name"
 948 |   ])
 949 | 
 950 |   Meta.allow_tag_with_these_attributes("mark", [
 951 |     "accesskey",
 952 |     "class",
 953 |     "contenteditable",
 954 |     "contextmenu",
 955 |     "dir",
 956 |     "draggable",
 957 |     "dropzone",
 958 |     "hidden",
 959 |     "id",
 960 |     "inert",
 961 |     "itemid",
 962 |     "itemprop",
 963 |     "itemref",
 964 |     "itemscope",
 965 |     "itemtype",
 966 |     "lang",
 967 |     "role",
 968 |     "spellcheck",
 969 |     "tabindex",
 970 |     "title",
 971 |     "translate"
 972 |   ])
 973 | 
 974 |   Meta.allow_tag_with_these_attributes("menu", [
 975 |     "accesskey",
 976 |     "class",
 977 |     "contenteditable",
 978 |     "contextmenu",
 979 |     "dir",
 980 |     "draggable",
 981 |     "dropzone",
 982 |     "hidden",
 983 |     "id",
 984 |     "inert",
 985 |     "itemid",
 986 |     "itemprop",
 987 |     "itemref",
 988 |     "itemscope",
 989 |     "itemtype",
 990 |     "lang",
 991 |     "role",
 992 |     "spellcheck",
 993 |     "tabindex",
 994 |     "title",
 995 |     "translate",
 996 |     "type",
 997 |     "label"
 998 |   ])
 999 | 
1000 |   Meta.allow_tag_with_these_attributes("meta", [
1001 |     "accesskey",
1002 |     "class",
1003 |     "contenteditable",
1004 |     "contextmenu",
1005 |     "dir",
1006 |     "draggable",
1007 |     "dropzone",
1008 |     "hidden",
1009 |     "id",
1010 |     "inert",
1011 |     "itemid",
1012 |     "itemprop",
1013 |     "itemref",
1014 |     "itemscope",
1015 |     "itemtype",
1016 |     "lang",
1017 |     "role",
1018 |     "spellcheck",
1019 |     "tabindex",
1020 |     "title",
1021 |     "translate",
1022 |     "name",
1023 |     "http-equiv",
1024 |     "content",
1025 |     "charset"
1026 |   ])
1027 | 
1028 |   Meta.allow_tag_with_these_attributes("meter", [
1029 |     "accesskey",
1030 |     "class",
1031 |     "contenteditable",
1032 |     "contextmenu",
1033 |     "dir",
1034 |     "draggable",
1035 |     "dropzone",
1036 |     "hidden",
1037 |     "id",
1038 |     "inert",
1039 |     "itemid",
1040 |     "itemprop",
1041 |     "itemref",
1042 |     "itemscope",
1043 |     "itemtype",
1044 |     "lang",
1045 |     "role",
1046 |     "spellcheck",
1047 |     "tabindex",
1048 |     "title",
1049 |     "translate",
1050 |     "value",
1051 |     "min",
1052 |     "max",
1053 |     "low",
1054 |     "high",
1055 |     "optimum"
1056 |   ])
1057 | 
1058 |   Meta.allow_tag_with_these_attributes("nav", [
1059 |     "accesskey",
1060 |     "class",
1061 |     "contenteditable",
1062 |     "contextmenu",
1063 |     "dir",
1064 |     "draggable",
1065 |     "dropzone",
1066 |     "hidden",
1067 |     "id",
1068 |     "inert",
1069 |     "itemid",
1070 |     "itemprop",
1071 |     "itemref",
1072 |     "itemscope",
1073 |     "itemtype",
1074 |     "lang",
1075 |     "role",
1076 |     "spellcheck",
1077 |     "tabindex",
1078 |     "title",
1079 |     "translate"
1080 |   ])
1081 | 
1082 |   # Meta.allow_tag_with_these_attributes "noscript"
1083 | 
1084 |   Meta.allow_tag_with_these_attributes("object", [
1085 |     "accesskey",
1086 |     "class",
1087 |     "contenteditable",
1088 |     "contextmenu",
1089 |     "dir",
1090 |     "draggable",
1091 |     "dropzone",
1092 |     "hidden",
1093 |     "id",
1094 |     "inert",
1095 |     "itemid",
1096 |     "itemprop",
1097 |     "itemref",
1098 |     "itemscope",
1099 |     "itemtype",
1100 |     "lang",
1101 |     "role",
1102 |     "spellcheck",
1103 |     "tabindex",
1104 |     "title",
1105 |     "translate",
1106 |     "data",
1107 |     "type",
1108 |     "typemustmatch",
1109 |     "name",
1110 |     "usemap",
1111 |     "form",
1112 |     "width",
1113 |     "height"
1114 |   ])
1115 | 
1116 |   Meta.allow_tag_with_these_attributes("ol", [
1117 |     "accesskey",
1118 |     "class",
1119 |     "contenteditable",
1120 |     "contextmenu",
1121 |     "dir",
1122 |     "draggable",
1123 |     "dropzone",
1124 |     "hidden",
1125 |     "id",
1126 |     "inert",
1127 |     "itemid",
1128 |     "itemprop",
1129 |     "itemref",
1130 |     "itemscope",
1131 |     "itemtype",
1132 |     "lang",
1133 |     "role",
1134 |     "spellcheck",
1135 |     "tabindex",
1136 |     "title",
1137 |     "translate",
1138 |     "reversed",
1139 |     "start"
1140 |   ])
1141 | 
1142 |   Meta.allow_tag_with_these_attributes("optgroup", [
1143 |     "accesskey",
1144 |     "class",
1145 |     "contenteditable",
1146 |     "contextmenu",
1147 |     "dir",
1148 |     "draggable",
1149 |     "dropzone",
1150 |     "hidden",
1151 |     "id",
1152 |     "inert",
1153 |     "itemid",
1154 |     "itemprop",
1155 |     "itemref",
1156 |     "itemscope",
1157 |     "itemtype",
1158 |     "lang",
1159 |     "role",
1160 |     "spellcheck",
1161 |     "tabindex",
1162 |     "title",
1163 |     "translate",
1164 |     "disabled",
1165 |     "label"
1166 |   ])
1167 | 
1168 |   Meta.allow_tag_with_these_attributes("option", [
1169 |     "accesskey",
1170 |     "class",
1171 |     "contenteditable",
1172 |     "contextmenu",
1173 |     "dir",
1174 |     "draggable",
1175 |     "dropzone",
1176 |     "hidden",
1177 |     "id",
1178 |     "inert",
1179 |     "itemid",
1180 |     "itemprop",
1181 |     "itemref",
1182 |     "itemscope",
1183 |     "itemtype",
1184 |     "lang",
1185 |     "role",
1186 |     "spellcheck",
1187 |     "tabindex",
1188 |     "title",
1189 |     "translate",
1190 |     "disabled",
1191 |     "label",
1192 |     "selected",
1193 |     "value"
1194 |   ])
1195 | 
1196 |   Meta.allow_tag_with_these_attributes("output", [
1197 |     "accesskey",
1198 |     "class",
1199 |     "contenteditable",
1200 |     "contextmenu",
1201 |     "dir",
1202 |     "draggable",
1203 |     "dropzone",
1204 |     "hidden",
1205 |     "id",
1206 |     "inert",
1207 |     "itemid",
1208 |     "itemprop",
1209 |     "itemref",
1210 |     "itemscope",
1211 |     "itemtype",
1212 |     "lang",
1213 |     "role",
1214 |     "spellcheck",
1215 |     "tabindex",
1216 |     "title",
1217 |     "translate",
1218 |     "for",
1219 |     "form",
1220 |     "name"
1221 |   ])
1222 | 
1223 |   Meta.allow_tag_with_these_attributes("p", [
1224 |     "accesskey",
1225 |     "class",
1226 |     "contenteditable",
1227 |     "contextmenu",
1228 |     "dir",
1229 |     "draggable",
1230 |     "dropzone",
1231 |     "hidden",
1232 |     "id",
1233 |     "inert",
1234 |     "itemid",
1235 |     "itemprop",
1236 |     "itemref",
1237 |     "itemscope",
1238 |     "itemtype",
1239 |     "lang",
1240 |     "role",
1241 |     "spellcheck",
1242 |     "tabindex",
1243 |     "title",
1244 |     "translate"
1245 |   ])
1246 | 
1247 |   Meta.allow_tag_with_these_attributes("param", [
1248 |     "accesskey",
1249 |     "class",
1250 |     "contenteditable",
1251 |     "contextmenu",
1252 |     "dir",
1253 |     "draggable",
1254 |     "dropzone",
1255 |     "hidden",
1256 |     "id",
1257 |     "inert",
1258 |     "itemid",
1259 |     "itemprop",
1260 |     "itemref",
1261 |     "itemscope",
1262 |     "itemtype",
1263 |     "lang",
1264 |     "role",
1265 |     "spellcheck",
1266 |     "tabindex",
1267 |     "title",
1268 |     "translate",
1269 |     "name",
1270 |     "value"
1271 |   ])
1272 | 
1273 |   Meta.allow_tag_with_these_attributes("pre", [
1274 |     "accesskey",
1275 |     "class",
1276 |     "contenteditable",
1277 |     "contextmenu",
1278 |     "dir",
1279 |     "draggable",
1280 |     "dropzone",
1281 |     "hidden",
1282 |     "id",
1283 |     "inert",
1284 |     "itemid",
1285 |     "itemprop",
1286 |     "itemref",
1287 |     "itemscope",
1288 |     "itemtype",
1289 |     "lang",
1290 |     "role",
1291 |     "spellcheck",
1292 |     "tabindex",
1293 |     "title",
1294 |     "translate"
1295 |   ])
1296 | 
1297 |   Meta.allow_tag_with_these_attributes("progress", [
1298 |     "accesskey",
1299 |     "class",
1300 |     "contenteditable",
1301 |     "contextmenu",
1302 |     "dir",
1303 |     "draggable",
1304 |     "dropzone",
1305 |     "hidden",
1306 |     "id",
1307 |     "inert",
1308 |     "itemid",
1309 |     "itemprop",
1310 |     "itemref",
1311 |     "itemscope",
1312 |     "itemtype",
1313 |     "lang",
1314 |     "role",
1315 |     "spellcheck",
1316 |     "tabindex",
1317 |     "title",
1318 |     "translate",
1319 |     "value",
1320 |     "max"
1321 |   ])
1322 | 
1323 |   Meta.allow_tag_with_these_attributes("q", [
1324 |     "accesskey",
1325 |     "class",
1326 |     "contenteditable",
1327 |     "contextmenu",
1328 |     "dir",
1329 |     "draggable",
1330 |     "dropzone",
1331 |     "hidden",
1332 |     "id",
1333 |     "inert",
1334 |     "itemid",
1335 |     "itemprop",
1336 |     "itemref",
1337 |     "itemscope",
1338 |     "itemtype",
1339 |     "lang",
1340 |     "role",
1341 |     "spellcheck",
1342 |     "tabindex",
1343 |     "title",
1344 |     "translate",
1345 |     "cite"
1346 |   ])
1347 | 
1348 |   Meta.allow_tag_with_these_attributes("rp", [
1349 |     "accesskey",
1350 |     "class",
1351 |     "contenteditable",
1352 |     "contextmenu",
1353 |     "dir",
1354 |     "draggable",
1355 |     "dropzone",
1356 |     "hidden",
1357 |     "id",
1358 |     "inert",
1359 |     "itemid",
1360 |     "itemprop",
1361 |     "itemref",
1362 |     "itemscope",
1363 |     "itemtype",
1364 |     "lang",
1365 |     "role",
1366 |     "spellcheck",
1367 |     "tabindex",
1368 |     "title",
1369 |     "translate"
1370 |   ])
1371 | 
1372 |   Meta.allow_tag_with_these_attributes("rt", [
1373 |     "accesskey",
1374 |     "class",
1375 |     "contenteditable",
1376 |     "contextmenu",
1377 |     "dir",
1378 |     "draggable",
1379 |     "dropzone",
1380 |     "hidden",
1381 |     "id",
1382 |     "inert",
1383 |     "itemid",
1384 |     "itemprop",
1385 |     "itemref",
1386 |     "itemscope",
1387 |     "itemtype",
1388 |     "lang",
1389 |     "role",
1390 |     "spellcheck",
1391 |     "tabindex",
1392 |     "title",
1393 |     "translate"
1394 |   ])
1395 | 
1396 |   Meta.allow_tag_with_these_attributes("ruby", [
1397 |     "accesskey",
1398 |     "class",
1399 |     "contenteditable",
1400 |     "contextmenu",
1401 |     "dir",
1402 |     "draggable",
1403 |     "dropzone",
1404 |     "hidden",
1405 |     "id",
1406 |     "inert",
1407 |     "itemid",
1408 |     "itemprop",
1409 |     "itemref",
1410 |     "itemscope",
1411 |     "itemtype",
1412 |     "lang",
1413 |     "role",
1414 |     "spellcheck",
1415 |     "tabindex",
1416 |     "title",
1417 |     "translate"
1418 |   ])
1419 | 
1420 |   Meta.allow_tag_with_these_attributes("s", [
1421 |     "accesskey",
1422 |     "class",
1423 |     "contenteditable",
1424 |     "contextmenu",
1425 |     "dir",
1426 |     "draggable",
1427 |     "dropzone",
1428 |     "hidden",
1429 |     "id",
1430 |     "inert",
1431 |     "itemid",
1432 |     "itemprop",
1433 |     "itemref",
1434 |     "itemscope",
1435 |     "itemtype",
1436 |     "lang",
1437 |     "role",
1438 |     "spellcheck",
1439 |     "tabindex",
1440 |     "title",
1441 |     "translate"
1442 |   ])
1443 | 
1444 |   Meta.allow_tag_with_these_attributes("samp", [
1445 |     "accesskey",
1446 |     "class",
1447 |     "contenteditable",
1448 |     "contextmenu",
1449 |     "dir",
1450 |     "draggable",
1451 |     "dropzone",
1452 |     "hidden",
1453 |     "id",
1454 |     "inert",
1455 |     "itemid",
1456 |     "itemprop",
1457 |     "itemref",
1458 |     "itemscope",
1459 |     "itemtype",
1460 |     "lang",
1461 |     "role",
1462 |     "spellcheck",
1463 |     "tabindex",
1464 |     "title",
1465 |     "translate"
1466 |   ])
1467 | 
1468 |   # Meta.allow_tag_with_these_attributes "script", ["src async defer type charset"]
1469 | 
1470 |   Meta.allow_tag_with_these_attributes("section", [
1471 |     "accesskey",
1472 |     "class",
1473 |     "contenteditable",
1474 |     "contextmenu",
1475 |     "dir",
1476 |     "draggable",
1477 |     "dropzone",
1478 |     "hidden",
1479 |     "id",
1480 |     "inert",
1481 |     "itemid",
1482 |     "itemprop",
1483 |     "itemref",
1484 |     "itemscope",
1485 |     "itemtype",
1486 |     "lang",
1487 |     "role",
1488 |     "spellcheck",
1489 |     "tabindex",
1490 |     "title",
1491 |     "translate"
1492 |   ])
1493 | 
1494 |   Meta.allow_tag_with_these_attributes("select", [
1495 |     "accesskey",
1496 |     "class",
1497 |     "contenteditable",
1498 |     "contextmenu",
1499 |     "dir",
1500 |     "draggable",
1501 |     "dropzone",
1502 |     "hidden",
1503 |     "id",
1504 |     "inert",
1505 |     "itemid",
1506 |     "itemprop",
1507 |     "itemref",
1508 |     "itemscope",
1509 |     "itemtype",
1510 |     "lang",
1511 |     "role",
1512 |     "spellcheck",
1513 |     "tabindex",
1514 |     "title",
1515 |     "translate",
1516 |     "autofocus",
1517 |     "disabled",
1518 |     "form",
1519 |     "multiple",
1520 |     "name",
1521 |     "required",
1522 |     "size"
1523 |   ])
1524 | 
1525 |   Meta.allow_tag_with_these_attributes("small", [
1526 |     "accesskey",
1527 |     "class",
1528 |     "contenteditable",
1529 |     "contextmenu",
1530 |     "dir",
1531 |     "draggable",
1532 |     "dropzone",
1533 |     "hidden",
1534 |     "id",
1535 |     "inert",
1536 |     "itemid",
1537 |     "itemprop",
1538 |     "itemref",
1539 |     "itemscope",
1540 |     "itemtype",
1541 |     "lang",
1542 |     "role",
1543 |     "spellcheck",
1544 |     "tabindex",
1545 |     "title",
1546 |     "translate"
1547 |   ])
1548 | 
1549 |   Meta.allow_tag_with_uri_attributes("source", ["src"], @valid_schemes)
1550 | 
1551 |   Meta.allow_tag_with_these_attributes("source", [
1552 |     "accesskey",
1553 |     "class",
1554 |     "contenteditable",
1555 |     "contextmenu",
1556 |     "dir",
1557 |     "draggable",
1558 |     "dropzone",
1559 |     "hidden",
1560 |     "id",
1561 |     "inert",
1562 |     "itemid",
1563 |     "itemprop",
1564 |     "itemref",
1565 |     "itemscope",
1566 |     "itemtype",
1567 |     "lang",
1568 |     "role",
1569 |     "spellcheck",
1570 |     "tabindex",
1571 |     "title",
1572 |     "translate",
1573 |     "type",
1574 |     "media"
1575 |   ])
1576 | 
1577 |   Meta.allow_tag_with_these_attributes("span", [
1578 |     "accesskey",
1579 |     "class",
1580 |     "contenteditable",
1581 |     "contextmenu",
1582 |     "dir",
1583 |     "draggable",
1584 |     "dropzone",
1585 |     "hidden",
1586 |     "id",
1587 |     "inert",
1588 |     "itemid",
1589 |     "itemprop",
1590 |     "itemref",
1591 |     "itemscope",
1592 |     "itemtype",
1593 |     "lang",
1594 |     "role",
1595 |     "spellcheck",
1596 |     "tabindex",
1597 |     "title",
1598 |     "translate"
1599 |   ])
1600 | 
1601 |   Meta.allow_tag_with_these_attributes("strong", [
1602 |     "accesskey",
1603 |     "class",
1604 |     "contenteditable",
1605 |     "contextmenu",
1606 |     "dir",
1607 |     "draggable",
1608 |     "dropzone",
1609 |     "hidden",
1610 |     "id",
1611 |     "inert",
1612 |     "itemid",
1613 |     "itemprop",
1614 |     "itemref",
1615 |     "itemscope",
1616 |     "itemtype",
1617 |     "lang",
1618 |     "role",
1619 |     "spellcheck",
1620 |     "tabindex",
1621 |     "title",
1622 |     "translate"
1623 |   ])
1624 | 
1625 |   Meta.allow_tag_with_these_attributes("sub", [
1626 |     "accesskey",
1627 |     "class",
1628 |     "contenteditable",
1629 |     "contextmenu",
1630 |     "dir",
1631 |     "draggable",
1632 |     "dropzone",
1633 |     "hidden",
1634 |     "id",
1635 |     "inert",
1636 |     "itemid",
1637 |     "itemprop",
1638 |     "itemref",
1639 |     "itemscope",
1640 |     "itemtype",
1641 |     "lang",
1642 |     "role",
1643 |     "spellcheck",
1644 |     "tabindex",
1645 |     "title",
1646 |     "translate"
1647 |   ])
1648 | 
1649 |   Meta.allow_tag_with_these_attributes("summary", [
1650 |     "accesskey",
1651 |     "class",
1652 |     "contenteditable",
1653 |     "contextmenu",
1654 |     "dir",
1655 |     "draggable",
1656 |     "dropzone",
1657 |     "hidden",
1658 |     "id",
1659 |     "inert",
1660 |     "itemid",
1661 |     "itemprop",
1662 |     "itemref",
1663 |     "itemscope",
1664 |     "itemtype",
1665 |     "lang",
1666 |     "role",
1667 |     "spellcheck",
1668 |     "tabindex",
1669 |     "title",
1670 |     "translate"
1671 |   ])
1672 | 
1673 |   Meta.allow_tag_with_these_attributes("sup", [
1674 |     "accesskey",
1675 |     "class",
1676 |     "contenteditable",
1677 |     "contextmenu",
1678 |     "dir",
1679 |     "draggable",
1680 |     "dropzone",
1681 |     "hidden",
1682 |     "id",
1683 |     "inert",
1684 |     "itemid",
1685 |     "itemprop",
1686 |     "itemref",
1687 |     "itemscope",
1688 |     "itemtype",
1689 |     "lang",
1690 |     "role",
1691 |     "spellcheck",
1692 |     "tabindex",
1693 |     "title",
1694 |     "translate"
1695 |   ])
1696 | 
1697 |   Meta.allow_tag_with_these_attributes("table", [
1698 |     "accesskey",
1699 |     "class",
1700 |     "contenteditable",
1701 |     "contextmenu",
1702 |     "dir",
1703 |     "draggable",
1704 |     "dropzone",
1705 |     "hidden",
1706 |     "id",
1707 |     "inert",
1708 |     "itemid",
1709 |     "itemprop",
1710 |     "itemref",
1711 |     "itemscope",
1712 |     "itemtype",
1713 |     "lang",
1714 |     "role",
1715 |     "spellcheck",
1716 |     "tabindex",
1717 |     "title",
1718 |     "translate"
1719 |   ])
1720 | 
1721 |   Meta.allow_tag_with_these_attributes("tbody", [
1722 |     "accesskey",
1723 |     "class",
1724 |     "contenteditable",
1725 |     "contextmenu",
1726 |     "dir",
1727 |     "draggable",
1728 |     "dropzone",
1729 |     "hidden",
1730 |     "id",
1731 |     "inert",
1732 |     "itemid",
1733 |     "itemprop",
1734 |     "itemref",
1735 |     "itemscope",
1736 |     "itemtype",
1737 |     "lang",
1738 |     "role",
1739 |     "spellcheck",
1740 |     "tabindex",
1741 |     "title",
1742 |     "translate"
1743 |   ])
1744 | 
1745 |   Meta.allow_tag_with_these_attributes("td", [
1746 |     "accesskey",
1747 |     "class",
1748 |     "contenteditable",
1749 |     "contextmenu",
1750 |     "dir",
1751 |     "draggable",
1752 |     "dropzone",
1753 |     "hidden",
1754 |     "id",
1755 |     "inert",
1756 |     "itemid",
1757 |     "itemprop",
1758 |     "itemref",
1759 |     "itemscope",
1760 |     "itemtype",
1761 |     "lang",
1762 |     "role",
1763 |     "spellcheck",
1764 |     "tabindex",
1765 |     "title",
1766 |     "translate",
1767 |     "colspan",
1768 |     "rowspan",
1769 |     "headers"
1770 |   ])
1771 | 
1772 |   Meta.allow_tag_with_these_attributes("textarea", [
1773 |     "accesskey",
1774 |     "class",
1775 |     "contenteditable",
1776 |     "contextmenu",
1777 |     "dir",
1778 |     "draggable",
1779 |     "dropzone",
1780 |     "hidden",
1781 |     "id",
1782 |     "inert",
1783 |     "itemid",
1784 |     "itemprop",
1785 |     "itemref",
1786 |     "itemscope",
1787 |     "itemtype",
1788 |     "lang",
1789 |     "role",
1790 |     "spellcheck",
1791 |     "tabindex",
1792 |     "title",
1793 |     "translate",
1794 |     "autocomplete",
1795 |     "autofocus",
1796 |     "cols",
1797 |     "dirname",
1798 |     "disabled",
1799 |     "form",
1800 |     "inputmode",
1801 |     "maxlength",
1802 |     "name",
1803 |     "placeholder",
1804 |     "readonly",
1805 |     "required",
1806 |     "rows",
1807 |     "wrap"
1808 |   ])
1809 | 
1810 |   Meta.allow_tag_with_these_attributes("tfoot", [
1811 |     "accesskey",
1812 |     "class",
1813 |     "contenteditable",
1814 |     "contextmenu",
1815 |     "dir",
1816 |     "draggable",
1817 |     "dropzone",
1818 |     "hidden",
1819 |     "id",
1820 |     "inert",
1821 |     "itemid",
1822 |     "itemprop",
1823 |     "itemref",
1824 |     "itemscope",
1825 |     "itemtype",
1826 |     "lang",
1827 |     "role",
1828 |     "spellcheck",
1829 |     "tabindex",
1830 |     "title",
1831 |     "translate"
1832 |   ])
1833 | 
1834 |   Meta.allow_tag_with_these_attributes("th", [
1835 |     "accesskey",
1836 |     "class",
1837 |     "contenteditable",
1838 |     "contextmenu",
1839 |     "dir",
1840 |     "draggable",
1841 |     "dropzone",
1842 |     "hidden",
1843 |     "id",
1844 |     "inert",
1845 |     "itemid",
1846 |     "itemprop",
1847 |     "itemref",
1848 |     "itemscope",
1849 |     "itemtype",
1850 |     "lang",
1851 |     "role",
1852 |     "spellcheck",
1853 |     "tabindex",
1854 |     "title",
1855 |     "translate",
1856 |     "colspan",
1857 |     "rowspan",
1858 |     "headers",
1859 |     "scope",
1860 |     "abbr"
1861 |   ])
1862 | 
1863 |   Meta.allow_tag_with_these_attributes("thead", [
1864 |     "accesskey",
1865 |     "class",
1866 |     "contenteditable",
1867 |     "contextmenu",
1868 |     "dir",
1869 |     "draggable",
1870 |     "dropzone",
1871 |     "hidden",
1872 |     "id",
1873 |     "inert",
1874 |     "itemid",
1875 |     "itemprop",
1876 |     "itemref",
1877 |     "itemscope",
1878 |     "itemtype",
1879 |     "lang",
1880 |     "role",
1881 |     "spellcheck",
1882 |     "tabindex",
1883 |     "title",
1884 |     "translate"
1885 |   ])
1886 | 
1887 |   Meta.allow_tag_with_these_attributes("time", [
1888 |     "accesskey",
1889 |     "class",
1890 |     "contenteditable",
1891 |     "contextmenu",
1892 |     "dir",
1893 |     "draggable",
1894 |     "dropzone",
1895 |     "hidden",
1896 |     "id",
1897 |     "inert",
1898 |     "itemid",
1899 |     "itemprop",
1900 |     "itemref",
1901 |     "itemscope",
1902 |     "itemtype",
1903 |     "lang",
1904 |     "role",
1905 |     "spellcheck",
1906 |     "tabindex",
1907 |     "title",
1908 |     "translate",
1909 |     "datetime",
1910 |     "pubdate"
1911 |   ])
1912 | 
1913 |   Meta.allow_tag_with_these_attributes("title", [
1914 |     "accesskey",
1915 |     "class",
1916 |     "contenteditable",
1917 |     "contextmenu",
1918 |     "dir",
1919 |     "draggable",
1920 |     "dropzone",
1921 |     "hidden",
1922 |     "id",
1923 |     "inert",
1924 |     "itemid",
1925 |     "itemprop",
1926 |     "itemref",
1927 |     "itemscope",
1928 |     "itemtype",
1929 |     "lang",
1930 |     "role",
1931 |     "spellcheck",
1932 |     "tabindex",
1933 |     "title",
1934 |     "translate"
1935 |   ])
1936 | 
1937 |   Meta.allow_tag_with_these_attributes("tr", [
1938 |     "accesskey",
1939 |     "class",
1940 |     "contenteditable",
1941 |     "contextmenu",
1942 |     "dir",
1943 |     "draggable",
1944 |     "dropzone",
1945 |     "hidden",
1946 |     "id",
1947 |     "inert",
1948 |     "itemid",
1949 |     "itemprop",
1950 |     "itemref",
1951 |     "itemscope",
1952 |     "itemtype",
1953 |     "lang",
1954 |     "role",
1955 |     "spellcheck",
1956 |     "tabindex",
1957 |     "title",
1958 |     "translate"
1959 |   ])
1960 | 
1961 |   Meta.allow_tag_with_uri_attributes("track", ["src"], @valid_schemes)
1962 | 
1963 |   Meta.allow_tag_with_these_attributes("track", [
1964 |     "accesskey",
1965 |     "class",
1966 |     "contenteditable",
1967 |     "contextmenu",
1968 |     "dir",
1969 |     "draggable",
1970 |     "dropzone",
1971 |     "hidden",
1972 |     "id",
1973 |     "inert",
1974 |     "itemid",
1975 |     "itemprop",
1976 |     "itemref",
1977 |     "itemscope",
1978 |     "itemtype",
1979 |     "lang",
1980 |     "role",
1981 |     "spellcheck",
1982 |     "tabindex",
1983 |     "title",
1984 |     "translate",
1985 |     "default",
1986 |     "kind",
1987 |     "label",
1988 |     "srclang"
1989 |   ])
1990 | 
1991 |   Meta.allow_tag_with_these_attributes("u", [
1992 |     "accesskey",
1993 |     "class",
1994 |     "contenteditable",
1995 |     "contextmenu",
1996 |     "dir",
1997 |     "draggable",
1998 |     "dropzone",
1999 |     "hidden",
2000 |     "id",
2001 |     "inert",
2002 |     "itemid",
2003 |     "itemprop",
2004 |     "itemref",
2005 |     "itemscope",
2006 |     "itemtype",
2007 |     "lang",
2008 |     "role",
2009 |     "spellcheck",
2010 |     "tabindex",
2011 |     "title",
2012 |     "translate"
2013 |   ])
2014 | 
2015 |   Meta.allow_tag_with_these_attributes("ul", [
2016 |     "accesskey",
2017 |     "class",
2018 |     "contenteditable",
2019 |     "contextmenu",
2020 |     "dir",
2021 |     "draggable",
2022 |     "dropzone",
2023 |     "hidden",
2024 |     "id",
2025 |     "inert",
2026 |     "itemid",
2027 |     "itemprop",
2028 |     "itemref",
2029 |     "itemscope",
2030 |     "itemtype",
2031 |     "lang",
2032 |     "role",
2033 |     "spellcheck",
2034 |     "tabindex",
2035 |     "title",
2036 |     "translate"
2037 |   ])
2038 | 
2039 |   Meta.allow_tag_with_these_attributes("var", [
2040 |     "accesskey",
2041 |     "class",
2042 |     "contenteditable",
2043 |     "contextmenu",
2044 |     "dir",
2045 |     "draggable",
2046 |     "dropzone",
2047 |     "hidden",
2048 |     "id",
2049 |     "inert",
2050 |     "itemid",
2051 |     "itemprop",
2052 |     "itemref",
2053 |     "itemscope",
2054 |     "itemtype",
2055 |     "lang",
2056 |     "role",
2057 |     "spellcheck",
2058 |     "tabindex",
2059 |     "title",
2060 |     "translate"
2061 |   ])
2062 | 
2063 |   Meta.allow_tag_with_uri_attributes("video", ["src"], @valid_schemes)
2064 | 
2065 |   Meta.allow_tag_with_these_attributes("video", [
2066 |     "accesskey",
2067 |     "class",
2068 |     "contenteditable",
2069 |     "contextmenu",
2070 |     "dir",
2071 |     "draggable",
2072 |     "dropzone",
2073 |     "hidden",
2074 |     "id",
2075 |     "inert",
2076 |     "itemid",
2077 |     "itemprop",
2078 |     "itemref",
2079 |     "itemscope",
2080 |     "itemtype",
2081 |     "lang",
2082 |     "role",
2083 |     "spellcheck",
2084 |     "tabindex",
2085 |     "title",
2086 |     "translate",
2087 |     "crossorigin",
2088 |     "poster",
2089 |     "preload",
2090 |     "autoplay",
2091 |     "mediagroup",
2092 |     "loop",
2093 |     "muted",
2094 |     "controls",
2095 |     "width",
2096 |     "height"
2097 |   ])
2098 | 
2099 |   Meta.allow_tag_with_these_attributes("wbr", [
2100 |     "accesskey",
2101 |     "class",
2102 |     "contenteditable",
2103 |     "contextmenu",
2104 |     "dir",
2105 |     "draggable",
2106 |     "dropzone",
2107 |     "hidden",
2108 |     "id",
2109 |     "inert",
2110 |     "itemid",
2111 |     "itemprop",
2112 |     "itemref",
2113 |     "itemscope",
2114 |     "itemtype",
2115 |     "lang",
2116 |     "role",
2117 |     "spellcheck",
2118 |     "tabindex",
2119 |     "title",
2120 |     "translate"
2121 |   ])
2122 | 
2123 |   Meta.allow_tags_with_style_attributes([
2124 |     "a",
2125 |     "blockquote",
2126 |     "br",
2127 |     "code",
2128 |     "del",
2129 |     "em",
2130 |     "h1",
2131 |     "h2",
2132 |     "h3",
2133 |     "h4",
2134 |     "h5",
2135 |     "h6",
2136 |     "head",
2137 |     "header",
2138 |     "hgroup",
2139 |     "hr",
2140 |     "html",
2141 |     "i",
2142 |     "iframe",
2143 |     "img",
2144 |     "input",
2145 |     "ins",
2146 |     "kbd",
2147 |     "keygen",
2148 |     "label",
2149 |     "legend",
2150 |     "li",
2151 |     "link",
2152 |     "map",
2153 |     "mark",
2154 |     "menu",
2155 |     "meta",
2156 |     "meter",
2157 |     "nav",
2158 |     "noscript",
2159 |     "object",
2160 |     "ol",
2161 |     "optgroup",
2162 |     "option",
2163 |     "output",
2164 |     "p",
2165 |     "param",
2166 |     "pre",
2167 |     "progress",
2168 |     "q",
2169 |     "rp",
2170 |     "rt",
2171 |     "ruby",
2172 |     "s",
2173 |     "samp",
2174 |     "script",
2175 |     "section",
2176 |     "select",
2177 |     "small",
2178 |     "source",
2179 |     "span",
2180 |     "strong",
2181 |     "sub",
2182 |     "summary",
2183 |     "sup",
2184 |     "table",
2185 |     "tbody",
2186 |     "td",
2187 |     "textarea",
2188 |     "tfoot",
2189 |     "th",
2190 |     "thead",
2191 |     "time",
2192 |     "title",
2193 |     "tr",
2194 |     "track",
2195 |     "u",
2196 |     "ul",
2197 |     "var",
2198 |     "video",
2199 |     "wbr"
2200 |   ])
2201 | 
2202 |   # style tags
2203 | 
2204 |   def scrub({"style", attributes, [text]}) do
2205 |     {"style", scrub_attributes("style", attributes), [scrub_css(text)]}
2206 |   end
2207 | 
2208 |   defp scrub_attributes("style", attributes) do
2209 |     Enum.map(attributes, fn attr -> scrub_attribute("style", attr) end)
2210 |     |> Enum.reject(&is_nil(&1))
2211 |   end
2212 | 
2213 |   def scrub_attribute("style", {"media", value}), do: {"media", value}
2214 |   def scrub_attribute("style", {"type", value}), do: {"type", value}
2215 |   def scrub_attribute("style", {"scoped", value}), do: {"scoped", value}
2216 | 
2217 |   # allow data tags
2218 |   def scrub_attribute(_tag, {"data-" <> data_tag, value}),
2219 |     do: {"data-" <> data_tag, value}
2220 | 
2221 |   # allow aria tags
2222 |   def scrub_attribute(_tag, {"aria-" <> data_tag, value}),
2223 |     do: {"aria-" <> data_tag, value}
2224 | 
2225 |   defp scrub_css(text) do
2226 |     HtmlSanitizeEx.Scrubber.CSS.scrub(text)
2227 |   end
2228 | 
2229 |   Meta.strip_everything_not_covered()
2230 | end
2231 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/markdown_html.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Scrubber.MarkdownHTML do
 2 |   @moduledoc """
 3 |   Allows basic HTML tags to support user input for writing relatively
 4 |   plain text with Markdown (GitHub flavoured Markdown supported).
 5 | 
 6 |   Technically this is a more relaxed version of the BasicHTML scrubber.
 7 | 
 8 |   Does not allow any mailto-links, styling, HTML5 tags, video embeds etc.
 9 |   """
10 | 
11 |   require HtmlSanitizeEx.Scrubber.Meta
12 |   alias HtmlSanitizeEx.Scrubber.Meta
13 | 
14 |   @valid_schemes ["http", "https", "mailto"]
15 | 
16 |   # Removes any CDATA tags before the traverser/scrubber runs.
17 |   Meta.remove_cdata_sections_before_scrub()
18 | 
19 |   Meta.strip_comments()
20 | 
21 |   Meta.allow_tag_with_uri_attributes("a", ["href"], @valid_schemes)
22 |   Meta.allow_tag_with_these_attributes("a", ["name", "title"])
23 | 
24 |   Meta.allow_tag_with_this_attribute_values("a", "target", ["_blank"])
25 | 
26 |   Meta.allow_tag_with_this_attribute_values("a", "rel", [
27 |     "noopener",
28 |     "noreferrer"
29 |   ])
30 | 
31 |   Meta.allow_tag_with_these_attributes("b", [])
32 |   Meta.allow_tag_with_these_attributes("blockquote", [])
33 |   Meta.allow_tag_with_these_attributes("br", [])
34 |   Meta.allow_tag_with_these_attributes("code", ["class"])
35 |   Meta.allow_tag_with_these_attributes("del", [])
36 |   Meta.allow_tag_with_these_attributes("em", [])
37 |   Meta.allow_tag_with_these_attributes("h1", [])
38 |   Meta.allow_tag_with_these_attributes("h2", [])
39 |   Meta.allow_tag_with_these_attributes("h3", [])
40 |   Meta.allow_tag_with_these_attributes("h4", [])
41 |   Meta.allow_tag_with_these_attributes("h5", [])
42 |   Meta.allow_tag_with_these_attributes("h6", [])
43 |   Meta.allow_tag_with_these_attributes("hr", [])
44 |   Meta.allow_tag_with_these_attributes("i", [])
45 | 
46 |   Meta.allow_tag_with_uri_attributes("img", ["src"], @valid_schemes)
47 | 
48 |   Meta.allow_tag_with_these_attributes("img", [
49 |     "width",
50 |     "height",
51 |     "title",
52 |     "alt"
53 |   ])
54 | 
55 |   Meta.allow_tag_with_these_attributes("li", [])
56 |   Meta.allow_tag_with_these_attributes("ol", [])
57 |   Meta.allow_tag_with_these_attributes("p", [])
58 |   Meta.allow_tag_with_these_attributes("pre", [])
59 |   Meta.allow_tag_with_these_attributes("span", [])
60 |   Meta.allow_tag_with_these_attributes("strong", [])
61 |   Meta.allow_tag_with_these_attributes("table", [])
62 |   Meta.allow_tag_with_these_attributes("tbody", [])
63 |   Meta.allow_tag_with_these_attributes("td", [])
64 |   Meta.allow_tag_with_these_attributes("th", [])
65 |   Meta.allow_tag_with_these_attributes("thead", [])
66 |   Meta.allow_tag_with_these_attributes("tr", [])
67 |   Meta.allow_tag_with_these_attributes("u", [])
68 |   Meta.allow_tag_with_these_attributes("ul", [])
69 | 
70 |   Meta.strip_everything_not_covered()
71 | end
72 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/meta.ex:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeEx.Scrubber.Meta do
  2 |   @moduledoc """
  3 |   This module contains some meta-programming magic to define your own rules
  4 |   for scrubbers.
  5 | 
  6 |   The StripTags scrubber is a good starting point:
  7 | 
  8 |       defmodule HtmlSanitizeEx.Scrubber.StripTags do
  9 |         require HtmlSanitizeEx.Scrubber.Meta
 10 |         alias HtmlSanitizeEx.Scrubber.Meta
 11 | 
 12 |         # Removes any CDATA tags before the traverser/scrubber runs.
 13 |         Meta.remove_cdata_sections_before_scrub
 14 | 
 15 |         Meta.strip_comments
 16 | 
 17 |         Meta.strip_everything_not_covered
 18 |       end
 19 | 
 20 |   You can use the `allow_tag_with_uri_attributes/3` and
 21 |   `allow_tag_with_these_attributes/2` macros to define what is allowed:
 22 | 
 23 |       defmodule HtmlSanitizeEx.Scrubber.StripTags do
 24 |         require HtmlSanitizeEx.Scrubber.Meta
 25 |         alias HtmlSanitizeEx.Scrubber.Meta
 26 | 
 27 |         # Removes any CDATA tags before the traverser/scrubber runs.
 28 |         Meta.remove_cdata_sections_before_scrub
 29 | 
 30 |         Meta.strip_comments
 31 | 
 32 |         Meta.allow_tag_with_uri_attributes   "img", ["src"], ["http", "https"]
 33 |         Meta.allow_tag_with_these_attributes "img", ["width", "height"]
 34 | 
 35 |         Meta.strip_everything_not_covered
 36 |       end
 37 | 
 38 |   You can stack these if convenient:
 39 | 
 40 |       Meta.allow_tag_with_uri_attributes   "img", ["src"], ["http", "https"]
 41 |       Meta.allow_tag_with_these_attributes "img", ["width", "height"]
 42 |       Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
 43 | 
 44 |   """
 45 | 
 46 |   @doc """
 47 |   Allow these tags and use the regular `scrub_attribute/2` function to scrub
 48 |   the attributes.
 49 |   """
 50 |   defmacro allow_tags_and_scrub_their_attributes(list) do
 51 |     Enum.map(list, fn tag_name ->
 52 |       allow_this_tag_and_scrub_its_attributes(tag_name)
 53 |     end)
 54 |   end
 55 | 
 56 |   @doc """
 57 |   Allow the given +list+ of attributes for the specified +tag+.
 58 | 
 59 |       Meta.allow_tag_with_these_attributes "a", ["name", "title"]
 60 | 
 61 |       Meta.allow_tag_with_these_attributes "img", ["title", "alt"]
 62 |   """
 63 |   defmacro allow_tag_with_these_attributes(tag_name, list \\ []) do
 64 |     list
 65 |     |> Enum.map(fn attr_name ->
 66 |       allow_this_tag_with_this_attribute(tag_name, attr_name)
 67 |     end)
 68 |     |> Enum.concat([allow_this_tag_and_scrub_its_attributes(tag_name)])
 69 |   end
 70 | 
 71 |   @doc """
 72 |   Allow any attributes for the specified +tag+.
 73 | 
 74 |       Meta.allow_tag_with_any_attributes "a"
 75 | 
 76 |       Meta.allow_tag_with_any_attributes "img"
 77 |   """
 78 |   defmacro allow_tag_with_any_attributes(tag_name) do
 79 |     quote do
 80 |       def scrub_attribute(unquote(tag_name), {attr_name, value}) do
 81 |         {attr_name, value}
 82 |       end
 83 | 
 84 |       unquote(allow_this_tag_and_scrub_its_attributes(tag_name))
 85 |     end
 86 |   end
 87 | 
 88 |   @doc """
 89 |   Allow the given list of +values+ for the given +attribute+ on the
 90 |   specified +tag+.
 91 | 
 92 |       Meta.allow_tag_with_this_attribute_values "a", "target", ["_blank"]
 93 |   """
 94 |   defmacro allow_tag_with_this_attribute_values(tag_name, attribute, values) do
 95 |     quote do
 96 |       def scrub_attribute(unquote(tag_name), {unquote(attribute), value})
 97 |           when value in unquote(values) do
 98 |         {unquote(attribute), value}
 99 |       end
100 |     end
101 |   end
102 | 
103 |   @doc """
104 |   Allow the given +list+ of attributes to contain URI information for the
105 |   specified +tag+.
106 | 
107 |       # Only allow SSL-enabled and mailto links
108 |       Meta.allow_tag_with_uri_attributes "a", ["href"], ["https", "mailto"]
109 | 
110 |       # Only allow none-SSL images
111 |       Meta.allow_tag_with_uri_attributes "img", ["src"], ["http"]
112 |   """
113 |   defmacro allow_tag_with_uri_attributes(tag, list, valid_schemes) do
114 |     list
115 |     |> Enum.map(fn name ->
116 |       allow_tag_with_uri_attribute(tag, name, valid_schemes)
117 |     end)
118 |   end
119 | 
120 |   @doc """
121 | 
122 |   """
123 |   defmacro allow_tags_with_style_attributes(list) do
124 |     list
125 |     |> Enum.map(fn tag_name -> allow_this_tag_with_style_attribute(tag_name) end)
126 |   end
127 | 
128 |   @doc """
129 |   Removes any CDATA tags before the traverser/scrubber runs.
130 |   """
131 |   defmacro remove_cdata_sections_before_scrub do
132 |     quote do
133 |       def before_scrub(html), do: String.replace(html, "<![CDATA[", "")
134 |     end
135 |   end
136 | 
137 |   @doc """
138 |   Strips all comments.
139 |   """
140 |   defmacro strip_comments do
141 |     quote do
142 |       def scrub({:comment, children}), do: ""
143 |     end
144 |   end
145 | 
146 |   @doc """
147 |   Ensures any tags/attributes not explicitly whitelisted until this
148 |   statement are stripped.
149 |   """
150 |   defmacro strip_everything_not_covered do
151 |     replacement_linebreak =
152 |       "#{HtmlSanitizeEx.Parser.replacement_for_linebreak()}"
153 | 
154 |     replacement_space = "#{HtmlSanitizeEx.Parser.replacement_for_space()}"
155 |     replacement_tab = "#{HtmlSanitizeEx.Parser.replacement_for_tab()}"
156 | 
157 |     quote do
158 |       # If we haven't covered the attribute until here, we just scrab it.
159 |       def scrub_attribute(_tag, _attribute), do: nil
160 | 
161 |       # If we haven't covered the attribute until here, we just scrab it.
162 |       def scrub({_tag, _attributes, children}), do: children
163 | 
164 |       def scrub({_tag, children}), do: children
165 | 
166 |       def scrub(unquote(" " <> replacement_linebreak <> " ") <> text), do: text
167 | 
168 |       def scrub(unquote(" " <> replacement_space <> " ") <> text),
169 |         do: " " <> text
170 | 
171 |       def scrub(unquote(" " <> replacement_tab <> " ") <> text), do: text
172 | 
173 |       # Text is left alone
174 |       def scrub("" <> text), do: text
175 |     end
176 |   end
177 | 
178 |   defp allow_this_tag_and_scrub_its_attributes(tag_name) do
179 |     quote do
180 |       def scrub({unquote(tag_name), attributes, children}) do
181 |         {unquote(tag_name), scrub_attributes(unquote(tag_name), attributes),
182 |          children}
183 |       end
184 | 
185 |       defp scrub_attributes(unquote(tag_name), attributes) do
186 |         Enum.map(attributes, fn attr ->
187 |           scrub_attribute(unquote(tag_name), attr)
188 |         end)
189 |         |> Enum.reject(&is_nil(&1))
190 |       end
191 |     end
192 |   end
193 | 
194 |   defp allow_this_tag_with_this_attribute(tag_name, attr_name) do
195 |     quote do
196 |       def scrub_attribute(unquote(tag_name), {unquote(attr_name), value}) do
197 |         {unquote(attr_name), value}
198 |       end
199 |     end
200 |   end
201 | 
202 |   defp allow_this_tag_with_style_attribute(tag_name) do
203 |     quote do
204 |       def scrub_attribute(unquote(tag_name), {"style", value}) do
205 |         {"style", HtmlSanitizeEx.Scrubber.CSS.scrub(value)}
206 |       end
207 |     end
208 |   end
209 | 
210 |   defp allow_tag_with_uri_attribute(tag_name, attr_name, valid_schemes) do
211 |     quote do
212 |       def scrub_attribute(unquote(tag_name), {unquote(attr_name), "&" <> value}) do
213 |         nil
214 |       end
215 | 
216 |       @protocol_separator ":|(&#0*58)|(&#x70)|(&#x0*3a)|(%|&#37;)3A"
217 |       @protocol_separator_regex Regex.compile!(@protocol_separator, "mi")
218 | 
219 |       @http_like_scheme "(?<scheme>.+?)(#{@protocol_separator})//"
220 |       @other_schemes "(?<other_schemes>mailto)(#{@protocol_separator})"
221 | 
222 |       @scheme_capture Regex.compile!(
223 |                         "(#{@http_like_scheme})|(#{@other_schemes})",
224 |                         "mi"
225 |                       )
226 | 
227 |       @max_scheme_length 20
228 | 
229 |       def scrub_attribute(unquote(tag_name), {unquote(attr_name), uri}) do
230 |         valid_schema =
231 |           if uri =~ @protocol_separator_regex do
232 |             case Regex.named_captures(
233 |                    @scheme_capture,
234 |                    uri |> String.slice(0..@max_scheme_length)
235 |                  ) do
236 |               %{"scheme" => scheme, "other_schemes" => ""} ->
237 |                 scheme in unquote(valid_schemes)
238 | 
239 |               %{"other_schemes" => scheme, "scheme" => ""} ->
240 |                 scheme in unquote(valid_schemes)
241 | 
242 |               _ ->
243 |                 false
244 |             end
245 |           else
246 |             true
247 |           end
248 | 
249 |         if valid_schema, do: {unquote(attr_name), uri}
250 |       end
251 |     end
252 |   end
253 | end
254 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/no_scrub.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Scrubber.NoScrub do
 2 |   @moduledoc """
 3 |   Scrubs neither tags, nor their attributes.
 4 | 
 5 |   This meant for testing purposes and as a template for your own scrubber.
 6 |   """
 7 | 
 8 |   @doc """
 9 |   Can be used to preprocess the given +html+ String before it is scrubbed.
10 |   """
11 |   def before_scrub(html) do
12 |     html
13 |   end
14 | 
15 |   @doc """
16 |   Scrubs its argument. Possible arguments are the following.
17 |   * A single tag given its attributes and children: `{tag, attributes, children}`.
18 |     In this case calls `scrub_attribute/2` to scrub individual attributes.
19 |   * Tokens like comments and doctypes: `{_token, children}`.
20 |   * A text node.
21 |   """
22 |   def scrub({tag, attributes, children}) do
23 |     {tag, scrub_attributes(tag, attributes), children}
24 |   end
25 | 
26 |   def scrub({_token, children}), do: children
27 | 
28 |   def scrub(text) do
29 |     text
30 |   end
31 | 
32 |   @doc false
33 |   def scrub_attributes(tag, attributes) do
34 |     Enum.map(attributes, fn attr -> scrub_attribute(tag, attr) end)
35 |     |> Enum.reject(&is_nil(&1))
36 |   end
37 | 
38 |   @doc """
39 |   Scrubs a single attribute for a given tag.
40 | 
41 |   You can utilize scrub_attribute to write custom matchers so you can sanitize
42 |   specific attributes of specific tags:
43 | 
44 |   As an example, if you only want to allow href attribute with the "http" and
45 |   "https" protocols, you could implement it like this:
46 | 
47 |       def scrub_attribute("a", {"href", "http" <> target}) do
48 |         {"href", "http" <> target}
49 |       end
50 | 
51 |       def scrub_attribute("a", {"href", _}) do
52 |         nil
53 |       end
54 |   """
55 |   def scrub_attribute(_tag, attribute) do
56 |     attribute
57 |   end
58 | end
59 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/scrubber/strip_tags.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Scrubber.StripTags do
 2 |   @moduledoc """
 3 |   Strips all tags.
 4 |   """
 5 | 
 6 |   require HtmlSanitizeEx.Scrubber.Meta
 7 |   alias HtmlSanitizeEx.Scrubber.Meta
 8 | 
 9 |   # Removes any CDATA tags before the traverser/scrubber runs.
10 |   Meta.remove_cdata_sections_before_scrub()
11 | 
12 |   Meta.strip_comments()
13 | 
14 |   Meta.strip_everything_not_covered()
15 | end
16 | 


--------------------------------------------------------------------------------
/lib/html_sanitize_ex/traverser.ex:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Traverser do
 2 |   @doc """
 3 |     Traverses an html_tree.
 4 |   """
 5 |   def traverse([], _scrubber_module) do
 6 |     []
 7 |   end
 8 | 
 9 |   def traverse([head | tail], scrubber_module) do
10 |     head = traverse(head, scrubber_module) |> collapse_list
11 |     tail = traverse(tail, scrubber_module)
12 | 
13 |     result = List.flatten([head] ++ tail)
14 | 
15 |     # IO.inspect {:head, head}
16 |     # IO.inspect {:tail, tail}
17 |     # IO.inspect {:result, result}
18 |     result
19 |   end
20 | 
21 |   def traverse({tag, attributes, children}, scrubber_module) do
22 |     children = children |> traverse(scrubber_module)
23 | 
24 |     {tag, attributes, children}
25 |     |> scrubber_module.scrub
26 |   end
27 | 
28 |   def traverse(text, scrubber_module) when is_binary(text) do
29 |     text
30 |     |> scrubber_module.scrub
31 |   end
32 | 
33 |   # Matches things like {:comment, "this is a comment"} or {:doctype, "..."}.
34 |   def traverse({token, children}, scrubber_module) do
35 |     children =
36 |       children
37 |       |> traverse(scrubber_module)
38 |       |> collapse_list
39 | 
40 |     {token, children}
41 |     |> scrubber_module.scrub
42 |   end
43 | 
44 |   # Matches things like {:comment, "this is a comment"} or {:doctype, "..."}.
45 |   def traverse(what, _scrubber_module) do
46 |     # IO.inspect "########################"
47 |     # IO.inspect {:error, what}
48 |     # IO.inspect "########################"
49 |     what
50 |   end
51 | 
52 |   # Collapses a list if it only consists of other lists.
53 |   defp collapse_list(children) do
54 |     result =
55 |       case children do
56 |         [single] -> single
57 |         list -> list
58 |       end
59 | 
60 |     result
61 |   end
62 | end
63 | 


--------------------------------------------------------------------------------
/mix.exs:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeEx.Mixfile do
 2 |   use Mix.Project
 3 | 
 4 |   def project do
 5 |     [
 6 |       app: :html_sanitize_ex,
 7 |       version: "1.4.3",
 8 |       elixir: "~> 1.0",
 9 |       description: "HTML sanitizer for Elixir",
10 |       source_url: "https://github.com/rrrene/html_sanitize_ex",
11 |       package: [
12 |         maintainers: ["René Föhring"],
13 |         licenses: ["MIT"],
14 |         links: %{
15 |           "GitHub" => "https://github.com/rrrene/html_sanitize_ex"
16 |         }
17 |       ],
18 |       build_embedded: Mix.env() == :prod,
19 |       start_permanent: Mix.env() == :prod,
20 |       deps: deps()
21 |     ]
22 |   end
23 | 
24 |   # Configuration for the OTP application
25 |   #
26 |   # Type `mix help compile.app` for more information
27 |   def application do
28 |     [applications: [:logger, :mochiweb]]
29 |   end
30 | 
31 |   # Dependencies can be Hex packages:
32 |   #
33 |   #   {:mydep, "~> 0.3.0"}
34 |   #
35 |   # Or git/path repositories:
36 |   #
37 |   #   {:mydep, git: "https://github.com/elixir-lang/mydep.git", tag: "0.1.0"}
38 |   #
39 |   # Type `mix help deps` for more examples and options
40 |   defp deps do
41 |     [
42 |       {:mochiweb, "~> 2.15 or ~> 3.1"},
43 |       {:ex_doc, ">= 0.0.0", only: :dev, runtime: false}
44 |     ]
45 |   end
46 | end
47 | 


--------------------------------------------------------------------------------
/mix.lock:
--------------------------------------------------------------------------------
 1 | %{
 2 |   "bunt": {:hex, :bunt, "0.2.0", "951c6e801e8b1d2cbe58ebbd3e616a869061ddadcc4863d0a2182541acae9a38", [:mix], [], "hexpm"},
 3 |   "earmark_parser": {:hex, :earmark_parser, "1.4.15", "b29e8e729f4aa4a00436580dcc2c9c5c51890613457c193cc8525c388ccb2f06", [:mix], [], "hexpm", "044523d6438ea19c1b8ec877ec221b008661d3c27e3b848f4c879f500421ca5c"},
 4 |   "ex_doc": {:hex, :ex_doc, "0.25.1", "4b736fa38dc76488a937e5ef2944f5474f3eff921de771b25371345a8dc810bc", [:mix], [{:earmark_parser, "~> 1.4.0", [hex: :earmark_parser, repo: "hexpm", optional: false]}, {:makeup_elixir, "~> 0.14", [hex: :makeup_elixir, repo: "hexpm", optional: false]}, {:makeup_erlang, "~> 0.1", [hex: :makeup_erlang, repo: "hexpm", optional: false]}], "hexpm", "3200b0a69ddb2028365281fbef3753ea9e728683863d8cdaa96580925c891f67"},
 5 |   "inch_ex": {:hex, :inch_ex, "2.0.0", "24268a9284a1751f2ceda569cd978e1fa394c977c45c331bb52a405de544f4de", [:mix], [{:bunt, "~> 0.2", [hex: :bunt, repo: "hexpm", optional: false]}, {:jason, "~> 1.0", [hex: :jason, repo: "hexpm", optional: false]}], "hexpm"},
 6 |   "jason": {:hex, :jason, "1.1.2", "b03dedea67a99223a2eaf9f1264ce37154564de899fd3d8b9a21b1a6fd64afe7", [:mix], [{:decimal, "~> 1.0", [hex: :decimal, repo: "hexpm", optional: true]}], "hexpm"},
 7 |   "makeup": {:hex, :makeup, "1.0.5", "d5a830bc42c9800ce07dd97fa94669dfb93d3bf5fcf6ea7a0c67b2e0e4a7f26c", [:mix], [{:nimble_parsec, "~> 0.5 or ~> 1.0", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "cfa158c02d3f5c0c665d0af11512fed3fba0144cf1aadee0f2ce17747fba2ca9"},
 8 |   "makeup_elixir": {:hex, :makeup_elixir, "0.15.1", "b5888c880d17d1cc3e598f05cdb5b5a91b7b17ac4eaf5f297cb697663a1094dd", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}, {:nimble_parsec, "~> 1.1", [hex: :nimble_parsec, repo: "hexpm", optional: false]}], "hexpm", "db68c173234b07ab2a07f645a5acdc117b9f99d69ebf521821d89690ae6c6ec8"},
 9 |   "makeup_erlang": {:hex, :makeup_erlang, "0.1.1", "3fcb7f09eb9d98dc4d208f49cc955a34218fc41ff6b84df7c75b3e6e533cc65f", [:mix], [{:makeup, "~> 1.0", [hex: :makeup, repo: "hexpm", optional: false]}], "hexpm", "174d0809e98a4ef0b3309256cbf97101c6ec01c4ab0b23e926a9e17df2077cbb"},
10 |   "mochiweb": {:hex, :mochiweb, "2.15.0", "e1daac474df07651e5d17cc1e642c4069c7850dc4508d3db7263a0651330aacc", [:rebar3], [], "hexpm", "b960d1cbcf40a30963eeee90ab7aeae074cbfa9a238561fb4434add1afc3075c"},
11 |   "nimble_parsec": {:hex, :nimble_parsec, "1.1.0", "3a6fca1550363552e54c216debb6a9e95bd8d32348938e13de5eda962c0d7f89", [:mix], [], "hexpm", "08eb32d66b706e913ff748f11694b17981c0b04a33ef470e33e11b3d3ac8f54b"},
12 |   "poison": {:hex, :poison, "2.1.0", "f583218ced822675e484648fa26c933d621373f01c6c76bd00005d7bd4b82e27", [:mix], []},
13 | }
14 | 


--------------------------------------------------------------------------------
/test/basic_html_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeExScrubberBasicHTMLTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   defp basic_html_sanitize(text) do
  5 |     HtmlSanitizeEx.basic_html(text)
  6 |   end
  7 | 
  8 |   test "strips nothing" do
  9 |     input = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
 10 |     expected = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
 11 |     assert expected == basic_html_sanitize(input)
 12 |   end
 13 | 
 14 |   test "does strip language class from code tag" do
 15 |     input = "<code class=\"ruby\">Something.new</code>"
 16 |     expected = "<code>Something.new</code>"
 17 |     assert expected == basic_html_sanitize(input)
 18 |   end
 19 | 
 20 |   test "strips everything except the allowed tags" do
 21 |     input = "<h1>hello <script>code!</script></h1>"
 22 |     expected = "<h1>hello code!</h1>"
 23 |     assert expected == basic_html_sanitize(input)
 24 |   end
 25 | 
 26 |   test "strips everything except the allowed tags (for multiple tags)" do
 27 |     input =
 28 |       "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
 29 | 
 30 |     expected = "code!<p>hello code!</p>"
 31 |     assert expected == basic_html_sanitize(input)
 32 |   end
 33 | 
 34 |   test "strips everything for faulty allowed_tags: key" do
 35 |     input = "<h1>hello<h1>"
 36 |     expected = "hello"
 37 |     assert expected != basic_html_sanitize(input)
 38 |   end
 39 | 
 40 |   test "strips invalid html" do
 41 |     input = "<<<bad html"
 42 |     expected = "&lt;&lt;"
 43 |     assert expected == basic_html_sanitize(input)
 44 |   end
 45 | 
 46 |   test "strips tags with quote" do
 47 |     input = "<\" <img src=\"trollface.gif\" onload=\"alert(1)\"> hi"
 48 | 
 49 |     assert "&lt;\" <img src=\"trollface.gif\" /> hi" ==
 50 |              basic_html_sanitize(input)
 51 |   end
 52 | 
 53 |   test "strips nested tags" do
 54 |     input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
 55 |     expected = "Wei&lt;<a>a onclick='alert(document.cookie);'</a>/&gt;rdos"
 56 |     assert expected == basic_html_sanitize(input)
 57 |   end
 58 | 
 59 |   test "strips certain tags in multi line strings" do
 60 |     input =
 61 |       "<title>This is <b>a <a href=\"\" target=\"_blank\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
 62 | 
 63 |     expected =
 64 |       "This is <b>a <a href=\"\">test</a></b>.\n\n\n\n<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>\n"
 65 | 
 66 |     assert expected == basic_html_sanitize(input)
 67 |   end
 68 | 
 69 |   test "strips blank string" do
 70 |     assert "" == basic_html_sanitize("")
 71 |     assert "" == basic_html_sanitize("  ")
 72 |     assert "" == basic_html_sanitize(nil)
 73 |   end
 74 | 
 75 |   test "strips nothing from plain text" do
 76 |     input = "Dont touch me"
 77 |     expected = "Dont touch me"
 78 |     assert expected == basic_html_sanitize(input)
 79 |   end
 80 | 
 81 |   test "strips nothing from a sentence" do
 82 |     input = "This is a test."
 83 |     expected = "This is a test."
 84 |     assert expected == basic_html_sanitize(input)
 85 |   end
 86 | 
 87 |   test "strips tags with comment" do
 88 |     input = "This has a <!-- comment --> here."
 89 |     expected = "This has a  here."
 90 |     assert expected == basic_html_sanitize(input)
 91 |   end
 92 | 
 93 |   test "strip_tags escapes special characters" do
 94 |     assert "&amp;", basic_html_sanitize("&")
 95 |   end
 96 | 
 97 |   # link sanitizer
 98 | 
 99 |   test "test_strip_links_with_tags_in_tags" do
100 |     input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
101 |     expected = "&lt;<a>a href='hello'&gt;all <b>day</b> long&lt;</a>/a&gt;"
102 |     assert expected == basic_html_sanitize(input)
103 |   end
104 | 
105 |   test "test_strip_links_with_unclosed_tags" do
106 |     assert "" == basic_html_sanitize("<a<a")
107 |   end
108 | 
109 |   test "test_strip_links_with_plaintext" do
110 |     assert "Dont touch me" == basic_html_sanitize("Dont touch me")
111 |   end
112 | 
113 |   @tag href_scrubbing: true
114 |   test "test_strip_links_with_line_feed_and_uppercase_tag" do
115 |     input = "<a href='almost'>on my mind</a> <A href='almost'>all day long</A>"
116 | 
117 |     assert "<a href=\"almost\">on my mind</a> <a href=\"almost\">all day long</a>" ==
118 |              basic_html_sanitize(input)
119 |   end
120 | 
121 |   @tag href_scrubbing: true
122 |   test "test_strip_links_leaves_nonlink_tags" do
123 |     assert "<a href=\"almost\">My mind</a>\n<a href=\"almost\">all <b>day</b> long</a>" ==
124 |              basic_html_sanitize(
125 |                "<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>"
126 |              )
127 |   end
128 | 
129 |   @tag href_scrubbing: true
130 |   test "strips tags with basic_html_sanitize/1" do
131 |     input =
132 |       "<p>This <u>is</u> a <a href='test.html'><strong>test</strong></a>.</p>"
133 | 
134 |     assert "<p>This <u>is</u> a <a href=\"test.html\"><strong>test</strong></a>.</p>" ==
135 |              basic_html_sanitize(input)
136 |   end
137 | 
138 |   @a_href_hacks [
139 |     "<a href=\"javascript:alert('XSS');\">text here</a>",
140 |     "<a href=javascript:alert('XSS')>text here</a>",
141 |     "<a href=JaVaScRiPt:alert('XSS')>text here</a>",
142 |     "<a href=javascript:alert(&quot;XSS&quot;)>text here</a>",
143 |     "<a href=javascript:alert(String.fromCharCode(88,83,83))>text here</a>",
144 |     "<a href=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>text here</a>",
145 |     "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>text here</a>",
146 |     "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>text here</a>",
147 |     "<a href=\"jav\tascript:alert('XSS');\">text here</a>",
148 |     "<a href=\"jav&#x09;ascript:alert('XSS');\">text here</a>",
149 |     "<a href=\"jav&#x0A;ascript:alert('XSS');\">text here</a>",
150 |     "<a href=\"jav&#x0D;ascript:alert('XSS');\">text here</a>",
151 |     "<a href=\" &#14;  javascript:alert('XSS');\">text here</a>",
152 |     "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
153 |     "<a href=`javascript:alert(\"RSnake says, 'XSS'\")`>text here</a>",
154 |     "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
155 |     "<a href=\"javascript&#x003a;alert('XSS');\">text here</a>",
156 |     "<a href=\"javascript&#x3A;alert('XSS');\">text here</a>",
157 |     "<a href=\"javascript&#x003A;alert('XSS');\">text here</a>",
158 |     "<a href=\"&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;\">text here</a>",
159 |     "<a href=\"JAVASCRIPT:alert(\'foo\')\">text here</a>",
160 |     "<a href=\"java<!-- -->script:alert(\'foo\')\">text here</a>",
161 |     "<a href=\"awesome.html#this:stuff\">text here</a>",
162 |     "<a href=\"java\0&#14;\t\r\n script:alert(\'foo\')\">text here</a>",
163 |     "<a href=\"javascript:alert(\'foo\')\">text here</a>",
164 |     "<a href=\"java&#0000000script:alert(\'foo\')\">text here</a>"
165 |   ]
166 | 
167 |   @tag href_scrubbing: true
168 |   test "strips malicious protocol hacks from a href attribute" do
169 |     expected = "<a>text here</a>"
170 | 
171 |     Enum.each(@a_href_hacks, fn x ->
172 |       assert expected == basic_html_sanitize(x)
173 |     end)
174 |   end
175 | 
176 |   @tag href_scrubbing: true
177 |   test "does not strip x03a legitimate" do
178 |     assert "<a href=\"http://legit\"></a>" ==
179 |              basic_html_sanitize("<a href=\"http&#x3a;//legit\">")
180 | 
181 |     assert "<a href=\"http://legit\"></a>" ==
182 |              basic_html_sanitize("<a href=\"http&#x3A;//legit\">")
183 |   end
184 | 
185 |   test "test_strip links with links" do
186 |     input =
187 |       "<a href='http://www.elixirstatus.com/'><a href='http://www.elixirstatus.com/' onlclick='steal()'>0wn3d</a></a>"
188 | 
189 |     assert "<a href=\"http://www.elixirstatus.com/\"><a href=\"http://www.elixirstatus.com/\">0wn3d</a></a>" ==
190 |              basic_html_sanitize(input)
191 |   end
192 | 
193 |   test "test_strip_links_with_linkception" do
194 |     assert "<a href=\"http://www.elixirstatus.com/\">Mag<a href=\"http://www.elixir-lang.org/\">ic</a></a>" ==
195 |              basic_html_sanitize(
196 |                "<a href='http://www.elixirstatus.com/'>Mag<a href='http://www.elixir-lang.org/'>ic"
197 |              )
198 |   end
199 | 
200 |   test "test_strip_links_with_a_tag_in_href" do
201 |     assert "FrrFox" ==
202 |              basic_html_sanitize("<href onlclick='steal()'>FrrFox</a></href>")
203 |   end
204 | 
205 |   test "normal scrubbing does only allow certain tags and attributes" do
206 |     input = "<plaintext><span data-foo=\"bar\">foo</span></plaintext>"
207 |     expected = "<span>foo</span>"
208 |     assert expected == basic_html_sanitize(input)
209 |   end
210 | 
211 |   test "strips not allowed attributes" do
212 |     input =
213 |       "start <a title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</a> end"
214 | 
215 |     expected = "start <a title=\"1\">foo bar baz</a> end"
216 |     assert expected == basic_html_sanitize(input)
217 |   end
218 | 
219 |   test "sanitize_script" do
220 |     assert "a b cblah blah blahd e f" ==
221 |              basic_html_sanitize(
222 |                "a b c<script language=\"Javascript\">blah blah blah</script>d e f"
223 |              )
224 |   end
225 | 
226 |   @tag href_scrubbing: true
227 |   test "sanitize_js_handlers" do
228 |     input =
229 |       ~s(onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>)
230 | 
231 |     assert "onthis=\"do that\" <a href=\"#\" name=\"foo\">hello</a>" ==
232 |              basic_html_sanitize(input)
233 |   end
234 | 
235 |   test "sanitize_javascript_href" do
236 |     raw =
237 |       ~s(href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>)
238 | 
239 |     assert ~s(href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>) ==
240 |              basic_html_sanitize(raw)
241 |   end
242 | 
243 |   test "sanitize_image_src" do
244 |     raw =
245 |       ~s(src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>)
246 | 
247 |     assert "src=\"javascript:bang\" <img width=\"5\" />, <span>bar</span>" ==
248 |              basic_html_sanitize(raw)
249 |   end
250 | 
251 |   @tag href_scrubbing: true
252 |   test "should only allow http/https protocols" do
253 |     assert "<a href=\"foo\">baz</a>" ==
254 |              basic_html_sanitize(
255 |                ~s(<a href="foo" onclick="bar"><script>baz</script></a>)
256 |              )
257 | 
258 |     assert "<a href=\"http://example.com\">baz</a>" ==
259 |              basic_html_sanitize(
260 |                ~s(<a href="http://example.com" onclick="bar"><script>baz</script></a>)
261 |              )
262 | 
263 |     assert "<a href=\"https://example.com\">baz</a>" ==
264 |              basic_html_sanitize(
265 |                ~s(<a href="https://example.com" onclick="bar"><script>baz</script></a>)
266 |              )
267 |   end
268 | 
269 |   # test "video_poster_sanitization" do
270 |   #  assert ~s(<video src="videofile.ogg" autoplay  poster="posterimage.jpg"></video>) == ~s(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
271 |   #  assert ~s(<video src="videofile.ogg"></video>) == basic_html_sanitize("<video src=\"videofile.ogg\" poster=javascript:alert(1)></video>")
272 |   # end
273 | 
274 |   test "strips not allowed tags " do
275 |     input = "<form><u></u></form>"
276 |     expected = "<u></u>"
277 |     assert expected == basic_html_sanitize(input)
278 |   end
279 | 
280 |   test "strips not allowed attributes " do
281 |     input = "<a foo=\"hello\" bar=\"world\"></a>"
282 |     expected = "<a></a>"
283 |     assert expected == basic_html_sanitize(input)
284 |   end
285 | 
286 |   @image_src_hacks [
287 |     "<IMG SRC=\"javascript:alert('XSS');\">",
288 |     "<IMG SRC=javascript:alert('XSS')>",
289 |     "<IMG SRC=JaVaScRiPt:alert('XSS')>",
290 |     "<IMG SRC=javascript:alert(&quot;XSS&quot;)>",
291 |     "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
292 |     "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
293 |     "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
294 |     "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
295 |     "<IMG SRC=\"jav\tascript:alert('XSS');\">",
296 |     "<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">",
297 |     "<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">",
298 |     "<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">",
299 |     "<IMG SRC=\" &#14;  javascript:alert('XSS');\">",
300 |     "<IMG SRC=\"javascript&#x3a;alert('XSS');\">",
301 |     "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>"
302 |   ]
303 | 
304 |   test "strips malicious protocol hacks from img src attribute" do
305 |     expected = "<img />"
306 | 
307 |     Enum.each(@image_src_hacks, fn x ->
308 |       assert expected == basic_html_sanitize(x)
309 |     end)
310 |   end
311 | 
312 |   test "strips script tag" do
313 |     input = "<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>"
314 |     expected = ""
315 |     assert expected == basic_html_sanitize(input)
316 |   end
317 | 
318 |   test "strips xss image hack with uppercase tags" do
319 |     input = "<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"
320 |     expected = "<img />alert(\"XSS\")\"&gt;"
321 |     assert expected == basic_html_sanitize(input)
322 |   end
323 | 
324 |   test "should_sanitize_tag_broken_up_by_null" do
325 |     assert "alert(\"XSS\")" ==
326 |              basic_html_sanitize("<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>")
327 |   end
328 | 
329 |   test "should_sanitize_invalid_script_tag" do
330 |     input = "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"
331 |     assert "" == basic_html_sanitize(input)
332 |   end
333 | 
334 |   test "should_sanitize_unclosed_script" do
335 |     input = "<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>"
336 |     assert "" == basic_html_sanitize(input)
337 |   end
338 | 
339 |   test "sanitize half open scripts" do
340 |     input = "<IMG SRC=\"javascript:alert('XSS')\""
341 |     assert "<img />" == basic_html_sanitize(input)
342 |   end
343 | 
344 |   test "should_not_fall_for_ridiculous_hack" do
345 |     img_hack = """
346 |     <IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
347 |     """
348 | 
349 |     assert "<img />)\n" == basic_html_sanitize(img_hack)
350 |   end
351 | 
352 |   test "should_sanitize_within attributes" do
353 |     input =
354 |       "<span title=\"&#39;&gt;&lt;script&gt;alert()&lt;/script&gt;\">blah</span>"
355 | 
356 |     assert "<span>blah</span>" == basic_html_sanitize(input)
357 |   end
358 | 
359 |   test "should_sanitize_invalid_tag_names" do
360 |   end
361 | 
362 |   test "should_sanitize_non_alpha_and_non_digit_characters_in_tags" do
363 |     assert "<a></a>foo" ==
364 |              basic_html_sanitize(
365 |                "<a onclick!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>"
366 |              )
367 |   end
368 | 
369 |   test "should_sanitize_invalid_tag_names_in_single_tags" do
370 |     assert "<img />" ==
371 |              basic_html_sanitize("<img/src=\"http://ha.ckers.org/xss.js\"/>")
372 |   end
373 | 
374 |   test "should_sanitize_img_dynsrc_lowsrc" do
375 |     assert "<img />" ==
376 |              basic_html_sanitize("<img lowsrc=\"javascript:alert('XSS')\" />")
377 |   end
378 | 
379 |   test "should_sanitize_img_vbscript" do
380 |     assert "<img />" ==
381 |              basic_html_sanitize("<img src='vbscript:msgbox(\"XSS\")' />")
382 |   end
383 | 
384 |   @tag cdata: true
385 |   test "should_sanitize_cdata_section" do
386 |     assert "<span>section</span>]]&gt;" ==
387 |              basic_html_sanitize("<![CDATA[<span>section</span>]]>")
388 |   end
389 | 
390 |   @tag cdata: true
391 |   test "should_sanitize_cdata_section like any other" do
392 |     assert "section]]&gt;" ==
393 |              basic_html_sanitize("<![CDATA[<script>section</script>]]>")
394 |   end
395 | 
396 |   @tag cdata: true
397 |   test "should_sanitize_unterminated_cdata_section" do
398 |     assert "<span>neverending...</span>" ==
399 |              basic_html_sanitize("<![CDATA[<span>neverending...")
400 |   end
401 | 
402 |   @tag cdata: true
403 |   test "strips CDATA" do
404 |     input = "This has a <![CDATA[<section>]]> here."
405 |     expected = "This has a ]]&gt; here."
406 |     assert expected == basic_html_sanitize(input)
407 |   end
408 | 
409 |   test "should_not_mangle_urls_with_ampersand" do
410 |     input = "<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>"
411 |     assert input == basic_html_sanitize(input)
412 |   end
413 | 
414 |   test "should_not_crash_on_invalid_schema_formatting" do
415 |     input =
416 |       "<a href=\"http//www.domain.com/?encoded_param=param1%3Aparam2\">text here</a>"
417 | 
418 |     assert "<a>text here</a>" == basic_html_sanitize(input)
419 |   end
420 | 
421 |   test "should_not_crash_on_invalid_schema_formatting_2" do
422 |     input = "<a href=\"ftp://www.domain.com/http%3A//\">text here</a>"
423 |     assert "<a>text here</a>" == basic_html_sanitize(input)
424 |   end
425 | 
426 |   test "should_sanitize_neverending_attribute" do
427 |     assert "<span></span>" == basic_html_sanitize("<span class=\"\\")
428 |   end
429 | 
430 |   # test "this affects only NS4, but we're on a roll, right?" do
431 |   #  input = "<div size=\"&{alert('XSS')}\">foo</div>"
432 |   #  expected = "<div>foo</div>"
433 |   #  assert expected == basic_html_sanitize(input)
434 |   # end
435 | 
436 |   test "does not strip the mailto URI scheme" do
437 |     input = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
438 |     expected = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
439 |     assert expected == basic_html_sanitize(input)
440 |   end
441 | 
442 |   test "does not strip any header tags" do
443 |     input = """
444 |     <h1>Header 1</h1>
445 |     <h2>Header 2</h2>
446 |     <h3>Header 3</h3>
447 |     <h4>Header 4</h4>
448 |     <h5>Header 5</h5>
449 |     <h6>Header 6</h6>
450 |     """
451 | 
452 |     assert input == basic_html_sanitize(input)
453 |   end
454 | end
455 | 


--------------------------------------------------------------------------------
/test/css_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeExScrubberCSSTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   def scrub_css(text) do
  5 |     HtmlSanitizeEx.Scrubber.CSS.scrub(text)
  6 |   end
  7 | 
  8 |   @good_css [
  9 |     ".test { color: red; border: 1px solid brown; }",
 10 |     "div.foo { width: 500px; height: 200px; }",
 11 |     # gibberish should work
 12 |     "GI b gkljfl kj { { { ********"
 13 |   ]
 14 | 
 15 |   test "should return valid css" do
 16 |     Enum.each(@good_css, fn text ->
 17 |       assert text == scrub_css(text)
 18 |     end)
 19 |   end
 20 | 
 21 |   # @good_css_background [
 22 |   #    "h1 { background: url(http://foobar.com/meh.jpg)}",
 23 |   #  ]
 24 | 
 25 |   # test "should return valid css 2" do
 26 |   #  Enum.each(@good_css_background, fn text ->
 27 |   #    assert text == scrub_css(text)
 28 |   #  end)
 29 |   # end
 30 | 
 31 |   @evil_css [
 32 |     "div.foo { width: 500px; behavior: url(http://foo.com); height: 200px; }",
 33 |     ".test { color: red; background-image: url('javascript:alert');  border: 1px solid brown; }",
 34 |     "div.foo { width: 500px; -moz-binding: foo; height: 200px; }",
 35 | 
 36 |     # no @import for you
 37 |     "\@import url(javascript:alert('Your cookie:'+document.cookie));",
 38 | 
 39 |     # no behavior either
 40 |     "behaviour:expression(function(element){alert(&#39;xss&#39;);}(this));'>",
 41 | 
 42 |     # case-sensitivity test
 43 |     "-Moz-binding: url(\"http://www.example.comtest.xml\");",
 44 | 
 45 |     # \\d gets parsed out on ffx and ie
 46 |     "background:url(&quot;javascri\\dpt:alert('injected js goes here')&quot;)",
 47 | 
 48 |     # http://rt.livejournal.org/Ticket/Display.html?id=436
 49 |     "-\4d oz-binding: url(\"http://localhost/test.xml#foo\");",
 50 | 
 51 |     # css comments are ignored sometimes
 52 |     "xss:expr/*XSS*/ession(alert('XSS'));",
 53 | 
 54 |     # html comments? fail
 55 |     "background:url(java<!-- -->script:alert('XSS'));",
 56 |     "a.foo { ba/* hack */r: x }",
 57 | 
 58 |     # weird comments
 59 |     "color: e/* * / */xpression(\"r\" + \"e\" + \"d\");",
 60 | 
 61 |     # weird comments to really test that regex
 62 |     "color: e/*/**/xpression(\"r\" + \"e\" + \"d\");",
 63 | 
 64 |     # we're not using a parser, but nonetheless ... if we were..
 65 |     """
 66 |     p {
 67 |     dummy: '//'; background:url(javascript:alert('XSS'));
 68 |     }
 69 |     """,
 70 |     """
 71 |     test{ width: expression(alert("sux 2 be u")); }
 72 |     a:link { color: red }
 73 |     """
 74 |   ]
 75 | 
 76 |   test "should NOT return invalid css" do
 77 |     Enum.each(@evil_css, fn text ->
 78 |       assert text != scrub_css(text)
 79 |     end)
 80 |   end
 81 | 
 82 |   @evil_css_background [
 83 |     # \uxxrl unicode
 84 |     "background:\\75rl('javascript:alert(\"\\75rl\")');",
 85 |     "background:&#x75;rl(javascript:alert('html &amp;#x75;'))",
 86 |     "b\\nackground: url(javascript:alert('line-broken background '))",
 87 |     "background:&#xff55;rl(javascript:alert('&amp;#xff55;rl(full-width u)'))",
 88 |     "background:&#117;rl(javascript:alert(&amp;#117;rl'))",
 89 |     "background:&#x75;rl(javascript:alert('&amp;#x75;rl'))",
 90 |     "background:\\75rl('javascript:alert(\"\\75rl\")')",
 91 | 
 92 |     # \uxxrl unicode
 93 |     "div { background:\\75rl('javascript:alert(\"\\75rl\")'); }",
 94 |     "div { background:&#x75;rl(javascript:alert('html &amp;#x75;')) }",
 95 |     "div { b\\nackground: url(javascript:alert('line-broken background ')) }",
 96 |     "div { background:&#xff55;rl(javascript:alert('&amp;#xff55;rl(full-width u)')) }",
 97 |     "div { background:&#117;rl(javascript:alert(&amp;#117;rl')) }",
 98 |     "div { background:&#x75;rl(javascript:alert('&amp;#x75;rl')) }",
 99 |     "div { background:\\75rl('javascript:alert(\"\\75rl\")') }"
100 |   ]
101 | 
102 |   test "should NOT return invalid css 2" do
103 |     Enum.each(@evil_css_background, fn text ->
104 |       assert text != scrub_css(text)
105 |     end)
106 |   end
107 | end
108 | 


--------------------------------------------------------------------------------
/test/custom_scrubber_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule CustomScrubberTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   defmodule Custom do
 5 |     require HtmlSanitizeEx.Scrubber.Meta
 6 |     alias HtmlSanitizeEx.Scrubber.Meta
 7 | 
 8 |     # Removes any CDATA tags before the traverser/scrubber runs.
 9 |     Meta.remove_cdata_sections_before_scrub()
10 | 
11 |     Meta.strip_comments()
12 | 
13 |     Meta.allow_tag_with_any_attributes("p")
14 | 
15 |     Meta.allow_tags_with_style_attributes(["span", "html", "body"])
16 | 
17 |     Meta.strip_everything_not_covered()
18 |   end
19 | 
20 |   defp scrub(text) do
21 |     HtmlSanitizeEx.Scrubber.scrub(text, __MODULE__.Custom)
22 |   end
23 | 
24 |   test "strips everything except the allowed tags (for multiple tags)" do
25 |     input =
26 |       "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
27 | 
28 |     expected = "code!<p>hello code!</p>"
29 |     assert expected == scrub(input)
30 |   end
31 | end
32 | 


--------------------------------------------------------------------------------
/test/html5_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeExScrubberHTML5Test do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   defp full_html_sanitize(text) do
  5 |     HtmlSanitizeEx.html5(text)
  6 |   end
  7 | 
  8 |   test "strips nothing" do
  9 |     input = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
 10 |     assert input == full_html_sanitize(input)
 11 |   end
 12 | 
 13 |   test "leaves the allowed tags alone" do
 14 |     input = ~S(<h1 class="heading" style="font-weight: bold">hello world!</h1>)
 15 |     assert input == full_html_sanitize(input)
 16 |   end
 17 | 
 18 |   test "leaves the allowed tags alone 2" do
 19 |     input = "<a href=\"http://github.com\" class=\"ext\">hello world!</a>"
 20 |     assert input == full_html_sanitize(input)
 21 |   end
 22 | 
 23 |   test "leaves the allowed tags alone 3" do
 24 |     input =
 25 |       ~S(<h1 class="heading" data-confirm="Some confirmation text" style="font-weight: bold">hello world!</h1>)
 26 | 
 27 |     assert input == full_html_sanitize(input)
 28 |   end
 29 | 
 30 |   test "strips everything except the allowed tags" do
 31 |     input = "<h1>hello <script>code!</script></h1>"
 32 |     expected = "<h1>hello code!</h1>"
 33 |     assert expected == full_html_sanitize(input)
 34 |   end
 35 | 
 36 |   test "handles css" do
 37 |     input = "<style> div.foo { width: 500px; height: 200px; } </style>"
 38 |     assert input == full_html_sanitize(input)
 39 |   end
 40 | 
 41 |   test "handles bad css" do
 42 |     input =
 43 |       "<style> \@import url(javascript:alert('Your cookie:'+document.cookie)); </style>"
 44 | 
 45 |     expected = "<style> @import url(:'+document.cookie)); </style>"
 46 |     assert expected == full_html_sanitize(input)
 47 |   end
 48 | 
 49 |   test "handles bad css in style attribute" do
 50 |     input =
 51 |       "<h1 style=\"color: red; background-image: url('javascript:alert');  border: 1px solid brown;\">hello code!</h1>"
 52 | 
 53 |     expected =
 54 |       "<h1 style=\"color: red; :alert');  border: 1px solid brown;\">hello code!</h1>"
 55 | 
 56 |     assert expected == full_html_sanitize(input)
 57 |   end
 58 | 
 59 |   test "strips everything except the allowed tags (for multiple tags)" do
 60 |     input =
 61 |       "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
 62 | 
 63 |     expected = "<section><header>code!</header><p>hello code!</p></section>"
 64 |     assert expected == full_html_sanitize(input)
 65 |   end
 66 | 
 67 |   test "does not strip caption from tables" do
 68 |     input =
 69 |       "<table><caption>This is a table</caption><thead></thead><tbody></tbody></table>"
 70 | 
 71 |     expected =
 72 |       "<table><caption>This is a table</caption><thead></thead><tbody></tbody></table>"
 73 | 
 74 |     assert expected == full_html_sanitize(input)
 75 |   end
 76 | 
 77 |   test "does not strip divs" do
 78 |     input = ~s(<div class="a"><div class="b">Hello</div></div>)
 79 |     expected = ~s(<div class="a"><div class="b">Hello</div></div>)
 80 |     assert expected == full_html_sanitize(input)
 81 |   end
 82 | 
 83 |   test "does not strip the mailto URI scheme" do
 84 |     input = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
 85 |     expected = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
 86 |     assert expected == full_html_sanitize(input)
 87 |   end
 88 | 
 89 |   test "does encode script in textarea, but preserves white-space" do
 90 |     input = ~s(<textarea> <script></script></textarea>)
 91 |     expected = ~s(<textarea> &lt;script&gt;&lt;/script&gt;</textarea>)
 92 |     assert expected == full_html_sanitize(input)
 93 |   end
 94 | 
 95 |   test "does not contain replacement characters in result" do
 96 |     input = ~s[<script>alert()</script> <p>Hi</p>]
 97 |     expected = ~s[alert() <p>Hi</p>]
 98 |     assert expected == full_html_sanitize(input)
 99 |   end
100 | 
101 |   test "does not strip valid html5 attributes from <img>" do
102 |     input =
103 |       ~s[<img src="http://abcd.com" width="100" height="100" translate="(0,0)" />]
104 | 
105 |     assert input == full_html_sanitize(input)
106 |   end
107 | 
108 |   test "does not strip valid html5 attributes srcset and sizes from <img>" do
109 |     input =
110 |       ~s[<img src="http://abcd.com" srcset="elva-fairy-480w.jpg 480w, elva-fairy-800w.jpg 800w" sizes="(max-width: 600px) 480px, 800px" />]
111 | 
112 |     assert input == full_html_sanitize(input)
113 |   end
114 | 
115 |   test "does not strip any header tags" do
116 |     input = """
117 |     <h1>Header 1</h1>
118 |     <h2>Header 2</h2>
119 |     <h3>Header 3</h3>
120 |     <h4>Header 4</h4>
121 |     <h5>Header 5</h5>
122 |     <h6>Header 6</h6>
123 |     """
124 | 
125 |     assert input == full_html_sanitize(input)
126 |   end
127 | 
128 |   test "make sure a very long URI is truncated before capturing URI scheme" do
129 |     input =
130 |       "<img src='#{File.read!(Path.join(__DIR__, "html5_test_data_uri"))}'>"
131 | 
132 |     assert "<img />" == full_html_sanitize(input)
133 |   end
134 | end
135 | 


--------------------------------------------------------------------------------
/test/html_sanitize_ex_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeExTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   test "strips all the tags" do
 5 |     input =
 6 |       "hello! <section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
 7 | 
 8 |     assert "hello! code!hello code!" == HtmlSanitizeEx.strip_tags(input)
 9 |   end
10 | end
11 | 


--------------------------------------------------------------------------------
/test/markdown_html_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeExScrubberMarkdownHTMLTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   defp sanitize(text) do
  5 |     text |> HtmlSanitizeEx.Scrubber.scrub(HtmlSanitizeEx.Scrubber.MarkdownHTML)
  6 |   end
  7 | 
  8 |   test "strips nothing" do
  9 |     input = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
 10 |     expected = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
 11 |     assert expected == sanitize(input)
 12 |   end
 13 | 
 14 |   test "does NOT strip language class from code tag" do
 15 |     input = "<code class=\"ruby\">Something.new</code>"
 16 |     assert input == sanitize(input)
 17 |   end
 18 | 
 19 |   test "strips everything except the allowed tags" do
 20 |     input = "<h1>hello <script>code!</script></h1>"
 21 |     expected = "<h1>hello code!</h1>"
 22 |     assert expected == sanitize(input)
 23 |   end
 24 | 
 25 |   test "allows target=_blank inside <a>" do
 26 |     input =
 27 |       ~S(<a href="test.html" target="_blank" name="testpoint">hello world</a>)
 28 | 
 29 |     expected =
 30 |       ~S(<a href="test.html" target="_blank" name="testpoint">hello world</a>)
 31 | 
 32 |     assert expected == sanitize(input)
 33 |   end
 34 | 
 35 |   test "disallows anything else for target= inside <a>" do
 36 |     input =
 37 |       ~S(<a href="test.html" target="asdf" name="testpoint">hello world</a>)
 38 | 
 39 |     expected = ~S(<a href="test.html" name="testpoint">hello world</a>)
 40 |     assert expected == sanitize(input)
 41 |   end
 42 | 
 43 |   test "allows rel=noopener inside <a>" do
 44 |     input =
 45 |       ~S(<a href="test.html" target="_blank" rel="noopener" name="testpoint">hello world</a>)
 46 | 
 47 |     expected =
 48 |       ~S(<a href="test.html" target="_blank" rel="noopener" name="testpoint">hello world</a>)
 49 | 
 50 |     assert expected == sanitize(input)
 51 |   end
 52 | 
 53 |   test "allows rel=noreferrer inside <a>" do
 54 |     input =
 55 |       ~S(<a href="test.html" target="_blank" rel="noreferrer" name="testpoint">hello world</a>)
 56 | 
 57 |     expected =
 58 |       ~S(<a href="test.html" target="_blank" rel="noreferrer" name="testpoint">hello world</a>)
 59 | 
 60 |     assert expected == sanitize(input)
 61 |   end
 62 | 
 63 |   test "disallows anything else for rel= inside <a>" do
 64 |     input =
 65 |       ~S(<a href="test.html" target="_blank" rel="asdf" name="testpoint">hello world</a>)
 66 | 
 67 |     expected =
 68 |       ~S(<a href="test.html" target="_blank" name="testpoint">hello world</a>)
 69 | 
 70 |     assert expected == sanitize(input)
 71 |   end
 72 | 
 73 |   test "strips everything except the allowed tags (for multiple tags)" do
 74 |     input =
 75 |       "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
 76 | 
 77 |     expected = "code!<p>hello code!</p>"
 78 |     assert expected == sanitize(input)
 79 |   end
 80 | 
 81 |   test "strips everything for faulty allowed_tags: key" do
 82 |     input = "<h1>hello<h1>"
 83 |     expected = "hello"
 84 |     assert expected != sanitize(input)
 85 |   end
 86 | 
 87 |   test "strips invalid html" do
 88 |     input = "<<<bad html"
 89 |     expected = "&lt;&lt;"
 90 |     assert expected == sanitize(input)
 91 |   end
 92 | 
 93 |   test "strips tags with quote" do
 94 |     input = "<\" <img src=\"trollface.gif\" onload=\"alert(1)\"> hi"
 95 |     assert "&lt;\" <img src=\"trollface.gif\" /> hi" == sanitize(input)
 96 |   end
 97 | 
 98 |   test "strips nested tags" do
 99 |     input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
100 |     expected = "Wei&lt;<a>a onclick='alert(document.cookie);'</a>/&gt;rdos"
101 |     assert expected == sanitize(input)
102 |   end
103 | 
104 |   test "strips certain tags in multi line strings" do
105 |     input =
106 |       "<title>This is <b>a <a href=\"\" target=\"_top\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
107 | 
108 |     expected =
109 |       "This is <b>a <a href=\"\">test</a></b>.\n\n\n\n<p>It no <b>longer <strong>contains <em>any HTML</em>.</strong></b></p>\n"
110 | 
111 |     assert expected == sanitize(input)
112 |   end
113 | 
114 |   test "strips blank string" do
115 |     assert "" == sanitize("")
116 |     assert "" == sanitize("  ")
117 |     assert "" == sanitize(nil)
118 |   end
119 | 
120 |   test "strips nothing from plain text" do
121 |     input = "Dont touch me"
122 |     expected = "Dont touch me"
123 |     assert expected == sanitize(input)
124 |   end
125 | 
126 |   test "strips nothing from a sentence" do
127 |     input = "This is a test."
128 |     expected = "This is a test."
129 |     assert expected == sanitize(input)
130 |   end
131 | 
132 |   test "strips tags with comment" do
133 |     input = "This has a <!-- comment --> here."
134 |     expected = "This has a  here."
135 |     assert expected == sanitize(input)
136 |   end
137 | 
138 |   test "strip_tags escapes special characters" do
139 |     assert "&amp;", sanitize("&")
140 |   end
141 | 
142 |   # link sanitizer
143 | 
144 |   test "test_strip_links_with_tags_in_tags" do
145 |     input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
146 |     expected = "&lt;<a>a href='hello'&gt;all <b>day</b> long&lt;</a>/a&gt;"
147 |     assert expected == sanitize(input)
148 |   end
149 | 
150 |   test "test_strip_links_with_unclosed_tags" do
151 |     assert "" == sanitize("<a<a")
152 |   end
153 | 
154 |   test "test_strip_links_with_plaintext" do
155 |     assert "Dont touch me" == sanitize("Dont touch me")
156 |   end
157 | 
158 |   @tag href_scrubbing: true
159 |   test "test_strip_links_with_line_feed_and_uppercase_tag" do
160 |     input = "<a href='almost'>on my mind</a> <A href='almost'>all day long</A>"
161 | 
162 |     assert "<a href=\"almost\">on my mind</a> <a href=\"almost\">all day long</a>" ==
163 |              sanitize(input)
164 |   end
165 | 
166 |   @tag href_scrubbing: true
167 |   test "test_strip_links_leaves_nonlink_tags" do
168 |     assert "<a href=\"almost\">My mind</a>\n<a href=\"almost\">all <b>day</b> long</a>" ==
169 |              sanitize(
170 |                "<a href='almost'>My mind</a>\n<A href='almost'>all <b>day</b> long</A>"
171 |              )
172 |   end
173 | 
174 |   @tag href_scrubbing: true
175 |   test "strips tags with sanitize/1" do
176 |     input =
177 |       "<p>This <u>is</u> a <a href='test.html'><strong>test</strong></a>.</p>"
178 | 
179 |     assert "<p>This <u>is</u> a <a href=\"test.html\"><strong>test</strong></a>.</p>" ==
180 |              sanitize(input)
181 |   end
182 | 
183 |   @a_href_hacks [
184 |     "<a href=\"javascript:alert('XSS');\">text here</a>",
185 |     "<a href=javascript:alert('XSS')>text here</a>",
186 |     "<a href=JaVaScRiPt:alert('XSS')>text here</a>",
187 |     "<a href=javascript:alert(&quot;XSS&quot;)>text here</a>",
188 |     "<a href=javascript:alert(String.fromCharCode(88,83,83))>text here</a>",
189 |     "<a href=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>text here</a>",
190 |     "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>text here</a>",
191 |     "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>text here</a>",
192 |     "<a href=\"jav\tascript:alert('XSS');\">text here</a>",
193 |     "<a href=\"jav&#x09;ascript:alert('XSS');\">text here</a>",
194 |     "<a href=\"jav&#x0A;ascript:alert('XSS');\">text here</a>",
195 |     "<a href=\"jav&#x0D;ascript:alert('XSS');\">text here</a>",
196 |     "<a href=\" &#14;  javascript:alert('XSS');\">text here</a>",
197 |     "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
198 |     "<a href=`javascript:alert(\"RSnake says, 'XSS'\")`>text here</a>",
199 |     "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
200 |     "<a href=\"javascript&#x003a;alert('XSS');\">text here</a>",
201 |     "<a href=\"javascript&#x3A;alert('XSS');\">text here</a>",
202 |     "<a href=\"javascript&#x003A;alert('XSS');\">text here</a>",
203 |     "<a href=\"&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;\">text here</a>",
204 |     "<a href=\"JAVASCRIPT:alert(\'foo\')\">text here</a>",
205 |     "<a href=\"java<!-- -->script:alert(\'foo\')\">text here</a>",
206 |     "<a href=\"awesome.html#this:stuff\">text here</a>",
207 |     "<a href=\"java\0&#14;\t\r\n script:alert(\'foo\')\">text here</a>",
208 |     "<a href=\"javascript:alert(\'foo\')\">text here</a>",
209 |     "<a href=\"java&#0000000script:alert(\'foo\')\">text here</a>"
210 |   ]
211 | 
212 |   @tag href_scrubbing: true
213 |   test "strips malicious protocol hacks from a href attribute" do
214 |     expected = "<a>text here</a>"
215 |     Enum.each(@a_href_hacks, fn x -> assert expected == sanitize(x) end)
216 |   end
217 | 
218 |   @tag href_scrubbing: true
219 |   test "does not strip x03a legitimate" do
220 |     assert "<a href=\"http://legit\"></a>" ==
221 |              sanitize("<a href=\"http&#x3a;//legit\">")
222 | 
223 |     assert "<a href=\"http://legit\"></a>" ==
224 |              sanitize("<a href=\"http&#x3A;//legit\">")
225 |   end
226 | 
227 |   test "test_strip links with links" do
228 |     input =
229 |       "<a href='http://www.elixirstatus.com/'><a href='http://www.elixirstatus.com/' onlclick='steal()'>0wn3d</a></a>"
230 | 
231 |     assert "<a href=\"http://www.elixirstatus.com/\"><a href=\"http://www.elixirstatus.com/\">0wn3d</a></a>" ==
232 |              sanitize(input)
233 |   end
234 | 
235 |   test "test_strip_links_with_linkception" do
236 |     assert "<a href=\"http://www.elixirstatus.com/\">Mag<a href=\"http://www.elixir-lang.org/\">ic</a></a>" ==
237 |              sanitize(
238 |                "<a href='http://www.elixirstatus.com/'>Mag<a href='http://www.elixir-lang.org/'>ic"
239 |              )
240 |   end
241 | 
242 |   test "test_strip_links_with_a_tag_in_href" do
243 |     assert "FrrFox" == sanitize("<href onlclick='steal()'>FrrFox</a></href>")
244 |   end
245 | 
246 |   test "normal scrubbing does only allow certain tags and attributes" do
247 |     input = "<plaintext><span data-foo=\"bar\">foo</span></plaintext>"
248 |     expected = "<span>foo</span>"
249 |     assert expected == sanitize(input)
250 |   end
251 | 
252 |   test "strips not allowed attributes" do
253 |     input =
254 |       "start <a title=\"1\" onclick=\"foo\">foo <bad>bar</bad> baz</a> end"
255 | 
256 |     expected = "start <a title=\"1\">foo bar baz</a> end"
257 |     assert expected == sanitize(input)
258 |   end
259 | 
260 |   test "sanitize_script" do
261 |     assert "a b cblah blah blahd e f" ==
262 |              sanitize(
263 |                "a b c<script language=\"Javascript\">blah blah blah</script>d e f"
264 |              )
265 |   end
266 | 
267 |   @tag href_scrubbing: true
268 |   test "sanitize_js_handlers" do
269 |     input =
270 |       ~s(onthis="do that" <a href="#" onclick="hello" name="foo" onbogus="remove me">hello</a>)
271 | 
272 |     assert "onthis=\"do that\" <a href=\"#\" name=\"foo\">hello</a>" ==
273 |              sanitize(input)
274 |   end
275 | 
276 |   test "sanitize_javascript_href" do
277 |     raw =
278 |       ~s(href="javascript:bang" <a href="javascript:bang" name="hello">foo</a>, <span href="javascript:bang">bar</span>)
279 | 
280 |     assert ~s(href="javascript:bang" <a name="hello">foo</a>, <span>bar</span>) ==
281 |              sanitize(raw)
282 |   end
283 | 
284 |   test "sanitize_image_src" do
285 |     raw =
286 |       ~s(src="javascript:bang" <img src="javascript:bang" width="5">foo</img>, <span src="javascript:bang">bar</span>)
287 | 
288 |     assert "src=\"javascript:bang\" <img width=\"5\" />, <span>bar</span>" ==
289 |              sanitize(raw)
290 |   end
291 | 
292 |   @tag href_scrubbing: true
293 |   test "should only allow http/https protocols" do
294 |     assert "<a href=\"foo\">baz</a>" ==
295 |              sanitize(~s(<a href="foo" onclick="bar"><script>baz</script></a>))
296 | 
297 |     assert "<a href=\"http://example.com\">baz</a>" ==
298 |              sanitize(
299 |                ~s(<a href="http://example.com" onclick="bar"><script>baz</script></a>)
300 |              )
301 | 
302 |     assert "<a href=\"https://example.com\">baz</a>" ==
303 |              sanitize(
304 |                ~s(<a href="https://example.com" onclick="bar"><script>baz</script></a>)
305 |              )
306 |   end
307 | 
308 |   # test "video_poster_sanitization" do
309 |   #  assert ~s(<video src="videofile.ogg" autoplay  poster="posterimage.jpg"></video>) == ~s(<video src="videofile.ogg" poster="posterimage.jpg"></video>)
310 |   #  assert ~s(<video src="videofile.ogg"></video>) == sanitize("<video src=\"videofile.ogg\" poster=javascript:alert(1)></video>")
311 |   # end
312 | 
313 |   test "strips not allowed tags " do
314 |     input = "<form><u></u></form>"
315 |     expected = "<u></u>"
316 |     assert expected == sanitize(input)
317 |   end
318 | 
319 |   test "strips not allowed attributes " do
320 |     input = "<a foo=\"hello\" bar=\"world\"></a>"
321 |     expected = "<a></a>"
322 |     assert expected == sanitize(input)
323 |   end
324 | 
325 |   @image_src_hacks [
326 |     "<IMG SRC=\"javascript:alert('XSS');\">",
327 |     "<IMG SRC=javascript:alert('XSS')>",
328 |     "<IMG SRC=JaVaScRiPt:alert('XSS')>",
329 |     "<IMG SRC=javascript:alert(&quot;XSS&quot;)>",
330 |     "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
331 |     "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
332 |     "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
333 |     "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
334 |     "<IMG SRC=\"jav\tascript:alert('XSS');\">",
335 |     "<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">",
336 |     "<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">",
337 |     "<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">",
338 |     "<IMG SRC=\" &#14;  javascript:alert('XSS');\">",
339 |     "<IMG SRC=\"javascript&#x3a;alert('XSS');\">",
340 |     "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>"
341 |   ]
342 | 
343 |   test "strips malicious protocol hacks from img src attribute" do
344 |     expected = "<img />"
345 |     Enum.each(@image_src_hacks, fn x -> assert expected == sanitize(x) end)
346 |   end
347 | 
348 |   test "strips script tag" do
349 |     input = "<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>"
350 |     expected = ""
351 |     assert expected == sanitize(input)
352 |   end
353 | 
354 |   test "strips xss image hack with uppercase tags" do
355 |     input = "<IMG \"\"\"><SCRIPT>alert(\"XSS\")</SCRIPT>\">"
356 |     expected = "<img />alert(\"XSS\")\"&gt;"
357 |     assert expected == sanitize(input)
358 |   end
359 | 
360 |   test "should_sanitize_tag_broken_up_by_null" do
361 |     assert "alert(\"XSS\")" == sanitize("<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>")
362 |   end
363 | 
364 |   test "should_sanitize_invalid_script_tag" do
365 |     input = "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"
366 |     assert "" == sanitize(input)
367 |   end
368 | 
369 |   test "should_sanitize_unclosed_script" do
370 |     input = "<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>"
371 |     assert "" == sanitize(input)
372 |   end
373 | 
374 |   test "sanitize half open scripts" do
375 |     input = "<IMG SRC=\"javascript:alert('XSS')\""
376 |     assert "<img />" == sanitize(input)
377 |   end
378 | 
379 |   test "should_not_fall_for_ridiculous_hack" do
380 |     img_hack = """
381 |     <IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
382 |     """
383 | 
384 |     assert "<img />)\n" == sanitize(img_hack)
385 |   end
386 | 
387 |   test "should_sanitize_within attributes" do
388 |     input =
389 |       "<span title=\"&#39;&gt;&lt;script&gt;alert()&lt;/script&gt;\">blah</span>"
390 | 
391 |     assert "<span>blah</span>" == sanitize(input)
392 |   end
393 | 
394 |   test "should_sanitize_invalid_tag_names" do
395 |   end
396 | 
397 |   test "should_sanitize_non_alpha_and_non_digit_characters_in_tags" do
398 |     assert "<a></a>foo" ==
399 |              sanitize(
400 |                "<a onclick!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>"
401 |              )
402 |   end
403 | 
404 |   test "should_sanitize_invalid_tag_names_in_single_tags" do
405 |     assert "<img />" == sanitize("<img/src=\"http://ha.ckers.org/xss.js\"/>")
406 |   end
407 | 
408 |   test "should_sanitize_img_dynsrc_lowsrc" do
409 |     assert "<img />" == sanitize("<img lowsrc=\"javascript:alert('XSS')\" />")
410 |   end
411 | 
412 |   test "should_sanitize_img_vbscript" do
413 |     assert "<img />" == sanitize("<img src='vbscript:msgbox(\"XSS\")' />")
414 |   end
415 | 
416 |   @tag cdata: true
417 |   test "should_sanitize_cdata_section" do
418 |     assert "<span>section</span>]]&gt;" ==
419 |              sanitize("<![CDATA[<span>section</span>]]>")
420 |   end
421 | 
422 |   @tag cdata: true
423 |   test "should_sanitize_cdata_section like any other" do
424 |     assert "section]]&gt;" == sanitize("<![CDATA[<script>section</script>]]>")
425 |   end
426 | 
427 |   @tag cdata: true
428 |   test "should_sanitize_unterminated_cdata_section" do
429 |     assert "<span>neverending...</span>" ==
430 |              sanitize("<![CDATA[<span>neverending...")
431 |   end
432 | 
433 |   @tag cdata: true
434 |   test "strips CDATA" do
435 |     input = "This has a <![CDATA[<section>]]> here."
436 |     expected = "This has a ]]&gt; here."
437 |     assert expected == sanitize(input)
438 |   end
439 | 
440 |   test "should_not_mangle_urls_with_ampersand" do
441 |     input = "<a href=\"http://www.domain.com?var1=1&amp;var2=2\">my link</a>"
442 |     assert input == sanitize(input)
443 |   end
444 | 
445 |   test "should_sanitize_neverending_attribute" do
446 |     assert "<span></span>" == sanitize("<span class=\"\\")
447 |   end
448 | 
449 |   # test "this affects only NS4, but we're on a roll, right?" do
450 |   #  input = "<div size=\"&{alert('XSS')}\">foo</div>"
451 |   #  expected = "<div>foo</div>"
452 |   #  assert expected == sanitize(input)
453 |   # end
454 | 
455 |   test "does not strip the mailto URI scheme" do
456 |     input = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
457 |     expected = ~s(<a href="mailto:someone@yoursite.com">Email Us</a>)
458 |     assert expected == sanitize(input)
459 |   end
460 | 
461 |   test "does not strip any header tags" do
462 |     input = """
463 |     <h1>Header 1</h1>
464 |     <h2>Header 2</h2>
465 |     <h3>Header 3</h3>
466 |     <h4>Header 4</h4>
467 |     <h5>Header 5</h5>
468 |     <h6>Header 6</h6>
469 |     """
470 | 
471 |     assert input == sanitize(input)
472 |   end
473 | end
474 | 


--------------------------------------------------------------------------------
/test/no_scrub_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule HtmlSanitizeExScrubberNoScrubTest do
 2 |   use ExUnit.Case, async: true
 3 | 
 4 |   defp no_scrub_sanitize(text) do
 5 |     HtmlSanitizeEx.noscrub(text)
 6 |   end
 7 | 
 8 |   test "strips nothing" do
 9 |     input = "This <b>is</b> <b>an</b> <i>example</i> of <u>space</u> eating."
10 |     assert input == no_scrub_sanitize(input)
11 |   end
12 | 
13 |   test "leaves white-space between nodes intact" do
14 |     input =
15 |       "This <b>is</b>\n<b>an</b> <i>example</i> of\n\n<u>space</u> eating."
16 | 
17 |     assert input == no_scrub_sanitize(input)
18 |   end
19 | 
20 |   test "leaves white-space between nodes intact (CR)" do
21 |     input =
22 |       "This <b>is</b>\n<b>an</b> <i>example</i> of\r\n\r\n<u>space</u> eating."
23 | 
24 |     assert input == no_scrub_sanitize(input)
25 |   end
26 | 
27 |   test "leaves white-space between nodes intact (tabs)" do
28 |     input =
29 |       "This <b>is</b>\t<b>an</b> <i>example</i> of\t\t<u>space</u> eating."
30 | 
31 |     assert input == no_scrub_sanitize(input)
32 |   end
33 | end
34 | 


--------------------------------------------------------------------------------
/test/strip_tags_test.exs:
--------------------------------------------------------------------------------
  1 | defmodule HtmlSanitizeExScrubberStripTagsTest do
  2 |   use ExUnit.Case, async: true
  3 | 
  4 |   defp strip_tags(text) do
  5 |     HtmlSanitizeEx.strip_tags(text)
  6 |   end
  7 | 
  8 |   test "strips everything except the allowed tags (for multiple tags)" do
  9 |     input =
 10 |       "<section><header><script>code!</script></header><p>hello <script>code!</script></p></section>"
 11 | 
 12 |     expected = "code!hello code!"
 13 |     assert expected == strip_tags(input)
 14 |   end
 15 | 
 16 |   test "strips everything" do
 17 |     input = "<h1>hello<h1>"
 18 |     expected = "hello"
 19 |     assert expected == strip_tags(input)
 20 |   end
 21 | 
 22 |   test "strips invalid html" do
 23 |     input = "<<<bad html"
 24 |     expected = "&lt;&lt;"
 25 |     assert expected == strip_tags(input)
 26 |   end
 27 | 
 28 |   test "strips tags with quote" do
 29 |     input = "<\" <img src=\"trollface.gif\" onload=\"alert(1)\"> hi"
 30 |     assert "&lt;\"  hi" == strip_tags(input)
 31 |   end
 32 | 
 33 |   test "strips nested tags" do
 34 |     input = "Wei<<a>a onclick='alert(document.cookie);'</a>/>rdos"
 35 |     expected = "Wei&lt;a onclick='alert(document.cookie);'/&gt;rdos"
 36 |     assert expected == strip_tags(input)
 37 |   end
 38 | 
 39 |   test "strips tags in multi line strings" do
 40 |     input =
 41 |       "<title>This is <b>a <a href=\"\" target=\"_blank\">test</a></b>.</title>\n\n<!-- it has a comment -->\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
 42 | 
 43 |     expected = "This is a test.\n\n\n\nIt no longer contains any HTML.\n"
 44 |     assert expected == strip_tags(input)
 45 |   end
 46 | 
 47 |   test "strips comments" do
 48 |     assert "This is &lt;-- not\n a comment here." ==
 49 |              strip_tags("This is <-- not\n a comment here.")
 50 |   end
 51 | 
 52 |   test "strips blank string" do
 53 |     assert "" == strip_tags("")
 54 |     assert "" == strip_tags("  ")
 55 |     assert "" == strip_tags(nil)
 56 |   end
 57 | 
 58 |   test "strips nothing from plain text" do
 59 |     input = "Dont touch me"
 60 |     expected = "Dont touch me"
 61 |     assert expected == strip_tags(input)
 62 |   end
 63 | 
 64 |   test "strips tags with many open quotes" do
 65 |     assert "&lt;&lt;" == strip_tags("<<<bad html>")
 66 |   end
 67 | 
 68 |   test "strips nothing from a sentence" do
 69 |     input = "This is a test."
 70 |     expected = "This is a test."
 71 |     assert expected == strip_tags(input)
 72 |   end
 73 | 
 74 |   test "strips tags with comment" do
 75 |     input = "This has a <!-- comment --> here."
 76 |     expected = "This has a  here."
 77 |     assert expected == strip_tags(input)
 78 |   end
 79 | 
 80 |   test "strip_tags escapes special characters" do
 81 |     assert "&amp;", strip_tags("&")
 82 |   end
 83 | 
 84 |   # link sanitizer
 85 | 
 86 |   test "test_strip_links_with_tags_in_tags" do
 87 |     input = "<<a>a href='hello'>all <b>day</b> long<</A>/a>"
 88 |     expected = "&lt;a href='hello'&gt;all day long&lt;/a&gt;"
 89 |     assert expected == strip_tags(input)
 90 |   end
 91 | 
 92 |   test "test_strip_links_with_unclosed_tags" do
 93 |     assert "" == strip_tags("<a<a")
 94 |   end
 95 | 
 96 |   test "test_strip_links_with_plaintext" do
 97 |     assert "Dont touch me" == strip_tags("Dont touch me")
 98 |   end
 99 | 
100 |   @a_href_hacks [
101 |     "<a href=\"javascript:alert('XSS');\">text here</a>",
102 |     "<a href=javascript:alert('XSS')>text here</a>",
103 |     "<a href=JaVaScRiPt:alert('XSS')>text here</a>",
104 |     "<a href=javascript:alert(&quot;XSS&quot;)>text here</a>",
105 |     "<a href=javascript:alert(String.fromCharCode(88,83,83))>text here</a>",
106 |     "<a href=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>text here</a>",
107 |     "<a href=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>text here</a>",
108 |     "<a href=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>text here</a>",
109 |     "<a href=\"jav\tascript:alert('XSS');\">text here</a>",
110 |     "<a href=\"jav&#x09;ascript:alert('XSS');\">text here</a>",
111 |     "<a href=\"jav&#x0A;ascript:alert('XSS');\">text here</a>",
112 |     "<a href=\"jav&#x0D;ascript:alert('XSS');\">text here</a>",
113 |     "<a href=\" &#14;  javascript:alert('XSS');\">text here</a>",
114 |     "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
115 |     "<a href=`javascript:alert(\"RSnake says, 'XSS'\")`>text here</a>",
116 |     "<a href=\"javascript&#x3a;alert('XSS');\">text here</a>",
117 |     "<a href=\"javascript&#x003a;alert('XSS');\">text here</a>",
118 |     "<a href=\"javascript&#x3A;alert('XSS');\">text here</a>",
119 |     "<a href=\"javascript&#x003A;alert('XSS');\">text here</a>",
120 |     "<a href=\"&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;\">text here</a>",
121 |     "<a href=\"JAVASCRIPT:alert(\'foo\')\">text here</a>",
122 |     "<a href=\"java<!-- -->script:alert(\'foo\')\">text here</a>",
123 |     "<a href=\"awesome.html#this:stuff\">text here</a>",
124 |     "<a href=\"java\0&#14;\t\r\n script:alert(\'foo\')\">text here</a>",
125 |     "<a href=\"javascript:alert(\'foo\')\">text here</a>",
126 |     "<a href=\"java&#0000000script:alert(\'foo\')\">text here</a>"
127 |   ]
128 | 
129 |   @tag href_scrubbing: true
130 |   test "strips malicious protocol hacks from a href attribute" do
131 |     expected = "text here"
132 |     Enum.each(@a_href_hacks, fn x -> assert expected == strip_tags(x) end)
133 |   end
134 | 
135 |   test "test_strip links with links" do
136 |     input =
137 |       "<a href='http://www.rubyonrails.com/'><a href='http://www.rubyonrails.com/' onlclick='steal()'>0wn3d</a></a>"
138 | 
139 |     assert "0wn3d" == strip_tags(input)
140 |   end
141 | 
142 |   test "test_strip_links_with_a_tag_in_href" do
143 |     assert "FrrFox" == strip_tags("<href onlclick='steal()'>FrrFox</a></href>")
144 |   end
145 | 
146 |   test "normal scrubbing does only allow certain tags and attributes" do
147 |     input = "<plaintext><span data-foo=\"bar\">foo</span></plaintext>"
148 |     expected = "foo"
149 |     assert expected == strip_tags(input)
150 |   end
151 | 
152 |   @image_src_hacks [
153 |     "<IMG SRC=\"javascript:alert('XSS');\">",
154 |     "<IMG SRC=javascript:alert('XSS')>",
155 |     "<IMG SRC=JaVaScRiPt:alert('XSS')>",
156 |     "<IMG SRC=javascript:alert(&quot;XSS&quot;)>",
157 |     "<IMG SRC=javascript:alert(String.fromCharCode(88,83,83))>",
158 |     "<IMG SRC=&#106;&#97;&#118;&#97;&#115;&#99;&#114;&#105;&#112;&#116;&#58;&#97;&#108;&#101;&#114;&#116;&#40;&#39;&#88;&#83;&#83;&#39;&#41;>",
159 |     "<IMG SRC=&#0000106&#0000097&#0000118&#0000097&#0000115&#0000099&#0000114&#0000105&#0000112&#0000116&#0000058&#0000097&#0000108&#0000101&#0000114&#0000116&#0000040&#0000039&#0000088&#0000083&#0000083&#0000039&#0000041>",
160 |     "<IMG SRC=&#x6A&#x61&#x76&#x61&#x73&#x63&#x72&#x69&#x70&#x74&#x3A&#x61&#x6C&#x65&#x72&#x74&#x28&#x27&#x58&#x53&#x53&#x27&#x29>",
161 |     "<IMG SRC=\"jav\tascript:alert('XSS');\">",
162 |     "<IMG SRC=\"jav&#x09;ascript:alert('XSS');\">",
163 |     "<IMG SRC=\"jav&#x0A;ascript:alert('XSS');\">",
164 |     "<IMG SRC=\"jav&#x0D;ascript:alert('XSS');\">",
165 |     "<IMG SRC=\" &#14;  javascript:alert('XSS');\">",
166 |     "<IMG SRC=\"javascript&#x3a;alert('XSS');\">",
167 |     "<IMG SRC=`javascript:alert(\"RSnake says, 'XSS'\")`>"
168 |   ]
169 | 
170 |   test "strips malicious protocol hacks from img src attribute" do
171 |     expected = ""
172 |     Enum.each(@image_src_hacks, fn x -> assert expected == strip_tags(x) end)
173 |   end
174 | 
175 |   test "strips script tag" do
176 |     input = "<SCRIPT\nSRC=http://ha.ckers.org/xss.js></SCRIPT>"
177 |     expected = ""
178 |     assert expected == strip_tags(input)
179 |   end
180 | 
181 |   test "should_sanitize_tag_broken_up_by_null" do
182 |     assert "alert(\"XSS\")" == strip_tags("<SCR\0IPT>alert(\"XSS\")</SCR\0IPT>")
183 |   end
184 | 
185 |   test "should_sanitize_invalid_script_tag" do
186 |     input = "<SCRIPT/XSS SRC=\"http://ha.ckers.org/xss.js\"></SCRIPT>"
187 |     assert "" == strip_tags(input)
188 |   end
189 | 
190 |   test "should_sanitize_script_tag_with_multiple_open_brackets" do
191 |     assert "&lt;alert(\"XSS\");//&lt;" ==
192 |              strip_tags("<<SCRIPT>alert(\"XSS\");//<</SCRIPT>")
193 | 
194 |     assert "" ==
195 |              strip_tags("<iframe src=http://ha.ckers.org/scriptlet.html\n<a")
196 |   end
197 | 
198 |   test "should_sanitize_unclosed_script" do
199 |     input = "<SCRIPT SRC=http://ha.ckers.org/xss.js?<B>"
200 |     assert "" == strip_tags(input)
201 |   end
202 | 
203 |   test "sanitize half open scripts" do
204 |     input = "<IMG SRC=\"javascript:alert('XSS')\""
205 |     assert "" == strip_tags(input)
206 |   end
207 | 
208 |   test "should_not_fall_for_ridiculous_hack" do
209 |     img_hack = """
210 |     <IMG\nSRC\n=\n"\nj\na\nv\na\ns\nc\nr\ni\np\nt\n:\na\nl\ne\nr\nt\n(\n'\nX\nS\nS\n'\n)\n"\n>)
211 |     """
212 | 
213 |     assert ")\n" == strip_tags(img_hack)
214 |   end
215 | 
216 |   test "should_sanitize_within attributes" do
217 |     input =
218 |       "<span title=\"&#39;&gt;&lt;script&gt;alert()&lt;/script&gt;\">blah</span>"
219 | 
220 |     assert "blah" == strip_tags(input)
221 |   end
222 | 
223 |   test "should_sanitize_invalid_tag_names" do
224 |     assert "a b cd e f" ==
225 |              strip_tags(
226 |                ~s(a b c<script/XSS src="http://ha.ckers.org/xss.js"></script>d e f)
227 |              )
228 |   end
229 | 
230 |   test "should_sanitize_non_alpha_and_non_digit_characters_in_tags" do
231 |     assert "foo" ==
232 |              strip_tags(
233 |                "<a onclick!#$%&()*~+-_.,:;?@[/|\\]^`=alert(\"XSS\")>foo</a>"
234 |              )
235 |   end
236 | 
237 |   @tag cdata: true
238 |   test "should_sanitize_cdata_section" do
239 |     assert "section]]&gt;" == strip_tags("<![CDATA[<span>section</span>]]>")
240 |   end
241 | 
242 |   @tag cdata: true
243 |   test "should_sanitize_cdata_section like any other" do
244 |     assert "section]]&gt;" == strip_tags("<![CDATA[<script>section</script>]]>")
245 |   end
246 | 
247 |   @tag cdata: true
248 |   test "should_sanitize_unterminated_cdata_section" do
249 |     assert "neverending..." == strip_tags("<![CDATA[<span>neverending...")
250 |   end
251 | 
252 |   @tag cdata: true
253 |   test "strips CDATA" do
254 |     input = "This has a <![CDATA[<section>]]> here."
255 |     expected = "This has a ]]&gt; here."
256 |     assert expected == strip_tags(input)
257 |   end
258 | 
259 |   test "should sanitize neverending attribute" do
260 |     assert "" == strip_tags("<span class=\"\\")
261 |   end
262 | 
263 |   test "should not destroy white-space" do
264 |     assert "some\r\ntext" == strip_tags("some\r\ntext")
265 |   end
266 | 
267 |   test "should not destroy white-space /2" do
268 |     assert "sometext with break between tags\r\nwill remove break" ==
269 |              strip_tags(
270 |                "some<b>text with break between tags</b>\r\n<i>will remove break</i>"
271 |              )
272 |   end
273 | 
274 |   test "should not destroy white-space /3" do
275 |     assert "some text\r\nbreak only from one side" ==
276 |              strip_tags("some text\r\n<b>break only from one side</b>")
277 |   end
278 | end
279 | 


--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 | 


--------------------------------------------------------------------------------
/test/test_if_tests_fail_after_resetting_lib.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # common setup
 4 | 
 5 | set -e
 6 | 
 7 | DIRNAME=$( cd "$( dirname "$0" )" && pwd )
 8 | PROJECT_ROOT=$( cd "$DIRNAME/.." && pwd )
 9 | 
10 | # execution
11 | 
12 | cd $PROJECT_ROOT
13 | 
14 | git checkout master lib/
15 | 
16 | if mix test ; then
17 |   echo ""
18 |   echo "------------------------------------------------------------------"
19 |   echo ""
20 |   echo "There are changes to both lib/ and test/ which can indicate"
21 |   echo "a bugfix with a corresponding test that reproduces the fixed bug"
22 |   echo ""
23 |   echo "(if this is not a bugfix PR, please ignore the following error)"
24 |   echo ""
25 |   echo "\e[31mAfter resetting changes in lib/, mix test should have failed"
26 |   echo ""
27 |   echo "------------------------------------------------------------------"
28 |   echo ""
29 |   exit 1
30 | else
31 |   exit 0
32 | fi
33 | 


--------------------------------------------------------------------------------
/test/traverser_test.exs:
--------------------------------------------------------------------------------
 1 | defmodule StripEverythingButB do
 2 |   def scrub({"b", attributes, children}), do: {"b", attributes, children}
 3 | 
 4 |   def scrub({_tag, _attributes, children}) do
 5 |     children
 6 |   end
 7 | 
 8 |   def scrub(text) do
 9 |     text
10 |   end
11 | end
12 | 
13 | defmodule HtmlSanitizeExTraverserTest do
14 |   use ExUnit.Case, async: true
15 | 
16 |   def parse_to_tree(html) do
17 |     html
18 |     |> HtmlSanitizeEx.Parser.parse()
19 |     |> HtmlSanitizeEx.Traverser.traverse(StripEverythingButB)
20 |   end
21 | 
22 |   test "should return expected tree" do
23 |     input =
24 |       "hello! <section><b><script>code!</script></b><p>hello <script>code!</script></p></section>"
25 | 
26 |     expected = ["hello! ", {"b", [], ["code!"]}, "hello ", "code!"]
27 |     assert expected == parse_to_tree(input)
28 |   end
29 | 
30 |   test "should return expected tree 2" do
31 |     input =
32 |       "<title>This is <b>the <a href=\"http://me@example.com\" target=\"_blank\">test</a></b>.</title>\n\n\n\n<p>It no <b>longer <strong>contains <em>any <strike>HTML</strike></em>.</strong></b></p>\n"
33 | 
34 |     expected = [
35 |       "This is ",
36 |       {"b", [], ["the ", "test"]},
37 |       ".",
38 |       " ï¼¿ \n\n\n\n",
39 |       "It no ",
40 |       {"b", [], ["longer ", "contains ", "any ", "HTML", "."]},
41 |       " ï¼¿ \n"
42 |     ]
43 | 
44 |     assert expected == parse_to_tree(input)
45 |   end
46 | 
47 |   test "should return expected tree 3" do
48 |     input = "This has a <!-- comment --> here."
49 |     expected = ["This has a ", {:comment, " comment "}, " here."]
50 |     assert expected == parse_to_tree(input)
51 |   end
52 | 
53 |   test "should return expected tree 4" do
54 |     input = "This has a <!-- comment here."
55 |     expected = ["This has a ", {:comment, " comment here.</html_sanitize_ex>"}]
56 |     assert expected == parse_to_tree(input)
57 |   end
58 | 
59 |   test "should return expected tree 5" do
60 |     input = "<<<bad html"
61 |     expected = ["<<"]
62 |     assert expected == parse_to_tree(input)
63 |   end
64 | 
65 |   test "should return expected tree 6" do
66 |     input = "<\" <img src=\"trollface.gif\" onload=\"alert(1)\"> hi"
67 |     expected = ["<\" ", " hi"]
68 |     assert expected == parse_to_tree(input)
69 |   end
70 | 
71 |   test "should return expected tree 7" do
72 |     input = "This has a <![CDATA[<section>]]> here."
73 |     expected = "This has a <section> here."
74 |     assert expected == parse_to_tree(input)
75 |   end
76 | end
77 | 


--------------------------------------------------------------------------------