├── .formatter.exs
├── .github
└── workflows
│ ├── bugfix-reproducer.yml
│ └── ci-workflow.yml
├── .gitignore
├── .tool-versions
├── .travis.yml
├── CHANGELOG.md
├── LICENSE
├── README.md
├── lib
├── html_sanitize_ex.ex
└── html_sanitize_ex
│ ├── parser.ex
│ ├── scrubber.ex
│ ├── scrubber
│ ├── basic_html.ex
│ ├── css.ex
│ ├── html5.ex
│ ├── markdown_html.ex
│ ├── meta.ex
│ ├── no_scrub.ex
│ └── strip_tags.ex
│ └── traverser.ex
├── mix.exs
├── mix.lock
└── test
├── basic_html_test.exs
├── css_test.exs
├── custom_scrubber_test.exs
├── html5_test.exs
├── html5_test_data_uri
├── html_sanitize_ex_test.exs
├── markdown_html_test.exs
├── no_scrub_test.exs
├── strip_tags_test.exs
├── test_helper.exs
├── test_if_tests_fail_after_resetting_lib.sh
└── traverser_test.exs
/.formatter.exs:
--------------------------------------------------------------------------------
1 | # Used by "mix format" and to export configuration.
2 | export_locals_without_parens = []
3 |
4 | [
5 | inputs: ["{mix,.formatter}.exs", "{config,lib,test}/**/*.{ex,exs}"],
6 | locals_without_parens: export_locals_without_parens,
7 | export: [locals_without_parens: export_locals_without_parens],
8 | line_length: 80
9 | ]
10 |
--------------------------------------------------------------------------------
/.github/workflows/bugfix-reproducer.yml:
--------------------------------------------------------------------------------
1 | name: "Reproducing Test-Case Detector (experimental)"
2 |
3 | on:
4 | pull_request:
5 | paths:
6 | - '**_test.exs'
7 |
8 | jobs:
9 | test:
10 | runs-on: ubuntu-latest
11 | name: "Test for lib/ changes"
12 | strategy:
13 | matrix:
14 | otp: [22.2]
15 | elixir: [1.10.4]
16 | steps:
17 | - uses: actions/checkout@v2.3.1
18 |
19 | - uses: erlef/setup-beam@v1
20 | with:
21 | otp-version: ${{matrix.otp}}
22 | elixir-version: ${{matrix.elixir}}
23 |
24 | - run: git fetch origin master:master
25 |
26 | - name: Check changes to lib/
27 | id: check_changes
28 | run: echo "::set-output name=changes_to_lib::$(git diff --name-only master | grep "^lib")"
29 |
30 | - name: There are changes to lib/
31 | if: "contains(steps.check_changes.outputs.changes_to_lib, 'lib')"
32 | run: |
33 | mix deps.get
34 | sh test/test_if_tests_fail_after_resetting_lib.sh
35 |
36 | - name: There are no changes to lib/
37 | if: "!contains(steps.check_changes.outputs.changes_to_lib, 'lib')"
38 | run: echo "${{ toJSON(steps.check_changes.outputs.changes_to_lib) }}"
39 |
--------------------------------------------------------------------------------
/.github/workflows/ci-workflow.yml:
--------------------------------------------------------------------------------
1 | name: "CI Tests"
2 | on:
3 | push:
4 | branches:
5 | - master
6 | - release/*
7 | pull_request:
8 | branches:
9 | - master
10 |
11 | jobs:
12 | test:
13 | runs-on: ubuntu-18.04
14 | name: "[${{matrix.otp}}/${{matrix.elixir}}] CI Tests on Credo [OTP/Elixir]"
15 | strategy:
16 | fail-fast: false
17 | matrix:
18 | otp: [20.3, 21.3, 22.3, 23.3, 24.0, 25.1]
19 | elixir: [1.7.4, 1.8.2, 1.9.4, 1.10.4, 1.11.4, 1.12.2, 1.14.1]
20 | exclude:
21 | - otp: 25.1
22 | elixir: 1.7.4
23 | - otp: 25.1
24 | elixir: 1.8.2
25 | - otp: 25.1
26 | elixir: 1.9.4
27 | - otp: 25.1
28 | elixir: 1.10.4
29 | - otp: 25.1
30 | elixir: 1.11.4
31 | - otp: 25.1
32 | elixir: 1.12.2
33 | - otp: 24.0
34 | elixir: 1.7.4
35 | - otp: 24.0
36 | elixir: 1.8.2
37 | - otp: 24.0
38 | elixir: 1.9.4
39 | - otp: 24.0
40 | elixir: 1.10.4
41 | - otp: 23.3
42 | elixir: 1.7.4
43 | - otp: 23.3
44 | elixir: 1.8.2
45 | - otp: 23.3
46 | elixir: 1.9.4
47 | - otp: 22.3
48 | elixir: 1.14.1
49 | - otp: 21.3
50 | elixir: 1.12.2
51 | - otp: 21.3
52 | elixir: 1.14.1
53 | - otp: 20.3
54 | elixir: 1.10.4
55 | - otp: 20.3
56 | elixir: 1.11.4
57 | - otp: 20.3
58 | elixir: 1.12.2
59 | - otp: 20.3
60 | elixir: 1.14.1
61 | steps:
62 | - uses: actions/checkout@v2.3.1
63 | with:
64 | fetch-depth: 0
65 | - uses: erlef/setup-beam@v1
66 | with:
67 | otp-version: ${{matrix.otp}}
68 | elixir-version: ${{matrix.elixir}}
69 | - run: mix deps.get
70 | - run: mix deps.compile
71 | - run: mix compile --warnings-as-errors
72 | - run: mix test
73 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.elixir_ls
2 | /_build
3 | /deps
4 | /docs/all.json
5 | /doc
6 | test.json
7 | erl_crash.dump
8 | *.ez
9 |
--------------------------------------------------------------------------------
/.tool-versions:
--------------------------------------------------------------------------------
1 | erlang 24.2
2 | elixir 1.13.4-otp-24
3 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | sudo: false
2 | language: elixir
3 | elixir:
4 | - 1.5.3
5 | - 1.6.6
6 | - 1.7.2
7 | - 1.8.2
8 | - 1.9.4
9 | otp_release:
10 | - 19.3
11 | - 20.3
12 | - 21.3
13 | - 22.0
14 | script:
15 | - mix deps.compile
16 | - mix compile --warnings-as-errors
17 | - mix test
18 | matrix:
19 | exclude:
20 | - elixir: 1.5.3
21 | otp_release: 21.3
22 | - elixir: 1.5.3
23 | otp_release: 22.0
24 | - elixir: 1.6.6
25 | otp_release: 22.0
26 | - elixir: 1.8.2
27 | otp_release: 19.3
28 | - elixir: 1.9.4
29 | otp_release: 19.3
30 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | ## 1.4.3
4 |
5 | - Allow `mochiweb` dep to be `~> 2.15 or ~> 3.1`
6 |
7 | ## 1.4.2
8 |
9 | - Fix regression when parsing schemes from URIs
10 | - Fix compiler warnings
11 | - Add missing `
` tag to `HTML5` scrubber
12 |
13 | ## 1.4.1
14 |
15 | - Add missing `` tag to `BasicHTML` and `MarkdownHTML` scrubbers
16 |
17 | ## 1.4.0
18 |
19 | - Add more missing HTML5 attributes
20 | - Add "middle" to valid CSS keywords
21 |
22 | ## 1.3.0
23 |
24 | - Add valid scheme for links: `mailto`
25 | - Update white-space handling in order to keep more of it untouched
26 |
27 | ## 1.2.0
28 |
29 | - Update `mochiweb` version requirement
30 | - Fix missing elements in HTML5: div, caption
31 |
32 | ## 1.1.1
33 |
34 | - Fix missing element in HTML5: blockquote
35 |
36 | ## 1.1.0
37 |
38 | - Add new scrubber: MarkdownHTML
39 |
40 | It is meant to scrub HTML that resulted from converting Markdown to HTML. It
41 | supports GitHub flavored Markdown (GFM).
42 |
43 | ## 1.0.1
44 |
45 | - Fix Elixir 1.3 compiler warnings
46 |
47 | ## 1.0.0
48 |
49 | - First release
50 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright (c) 2014 René Föhring
2 |
3 | Permission is hereby granted, free of charge, to any person obtaining
4 | a copy of this software and associated documentation files (the
5 | "Software"), to deal in the Software without restriction, including
6 | without limitation the rights to use, copy, modify, merge, publish,
7 | distribute, sublicense, and/or sell copies of the Software, and to
8 | permit persons to whom the Software is furnished to do so, subject to
9 | the following conditions:
10 |
11 | The above copyright notice and this permission notice shall be
12 | included in all copies or substantial portions of the Software.
13 |
14 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # HtmlSanitizeEx [](https://travis-ci.org/rrrene/html_sanitize_ex) [](http://inch-ci.org/github/rrrene/html_sanitize_ex)
2 |
3 | `html_sanitize_ex` provides a fast and straightforward HTML Sanitizer written in Elixir which lets you include HTML authored by third-parties in your web application while protecting against XSS.
4 |
5 | It is the first Hex package to come out of the [elixirstatus.com](http://elixirstatus.com) project, where it will be used to sanitize user announcements from the Elixir community.
6 |
7 |
8 |
9 | ## What can it do?
10 |
11 | `html_sanitize_ex` parses a given HTML string and, based on the used [Scrubber](https://github.com/rrrene/html_sanitize_ex/tree/master/lib/html_sanitize_ex/scrubber), either completely strips it from HTML tags or sanitizes it by only allowing certain HTML elements and attributes to be present.
12 |
13 | **NOTE:** The one thing missing at this moment is ***support for styles***. To add this, we have to implement a Scrubber for CSS, to prevent nasty CSS hacks using `"
38 | assert input == full_html_sanitize(input)
39 | end
40 |
41 | test "handles bad css" do
42 | input =
43 | ""
44 |
45 | expected = ""
46 | assert expected == full_html_sanitize(input)
47 | end
48 |
49 | test "handles bad css in style attribute" do
50 | input =
51 | "hello code!
"
52 |
53 | expected =
54 | "hello code!
"
55 |
56 | assert expected == full_html_sanitize(input)
57 | end
58 |
59 | test "strips everything except the allowed tags (for multiple tags)" do
60 | input =
61 | ""
62 |
63 | expected = ""
64 | assert expected == full_html_sanitize(input)
65 | end
66 |
67 | test "does not strip caption from tables" do
68 | input =
69 | "
"
70 |
71 | expected =
72 | ""
73 |
74 | assert expected == full_html_sanitize(input)
75 | end
76 |
77 | test "does not strip divs" do
78 | input = ~s()
79 | expected = ~s()
80 | assert expected == full_html_sanitize(input)
81 | end
82 |
83 | test "does not strip the mailto URI scheme" do
84 | input = ~s(Email Us)
85 | expected = ~s(Email Us)
86 | assert expected == full_html_sanitize(input)
87 | end
88 |
89 | test "does encode script in textarea, but preserves white-space" do
90 | input = ~s()
91 | expected = ~s()
92 | assert expected == full_html_sanitize(input)
93 | end
94 |
95 | test "does not contain replacement characters in result" do
96 | input = ~s[ Hi
]
97 | expected = ~s[alert() Hi
]
98 | assert expected == full_html_sanitize(input)
99 | end
100 |
101 | test "does not strip valid html5 attributes from
" do
102 | input =
103 | ~s[
]
104 |
105 | assert input == full_html_sanitize(input)
106 | end
107 |
108 | test "does not strip valid html5 attributes srcset and sizes from
" do
109 | input =
110 | ~s[
]
111 |
112 | assert input == full_html_sanitize(input)
113 | end
114 |
115 | test "does not strip any header tags" do
116 | input = """
117 | Header 1
118 | Header 2
119 | Header 3
120 | Header 4
121 | Header 5
122 | Header 6
123 | """
124 |
125 | assert input == full_html_sanitize(input)
126 | end
127 |
128 | test "make sure a very long URI is truncated before capturing URI scheme" do
129 | input =
130 | "
"
131 |
132 | assert "
" == full_html_sanitize(input)
133 | end
134 | end
135 |
--------------------------------------------------------------------------------
/test/html_sanitize_ex_test.exs:
--------------------------------------------------------------------------------
1 | defmodule HtmlSanitizeExTest do
2 | use ExUnit.Case, async: true
3 |
4 | test "strips all the tags" do
5 | input =
6 | "hello! "
7 |
8 | assert "hello! code!hello code!" == HtmlSanitizeEx.strip_tags(input)
9 | end
10 | end
11 |
--------------------------------------------------------------------------------
/test/markdown_html_test.exs:
--------------------------------------------------------------------------------
1 | defmodule HtmlSanitizeExScrubberMarkdownHTMLTest do
2 | use ExUnit.Case, async: true
3 |
4 | defp sanitize(text) do
5 | text |> HtmlSanitizeEx.Scrubber.scrub(HtmlSanitizeEx.Scrubber.MarkdownHTML)
6 | end
7 |
8 | test "strips nothing" do
9 | input = "This is an example of space eating."
10 | expected = "This is an example of space eating."
11 | assert expected == sanitize(input)
12 | end
13 |
14 | test "does NOT strip language class from code tag" do
15 | input = "Something.new
"
16 | assert input == sanitize(input)
17 | end
18 |
19 | test "strips everything except the allowed tags" do
20 | input = "hello
"
21 | expected = "hello code!
"
22 | assert expected == sanitize(input)
23 | end
24 |
25 | test "allows target=_blank inside " do
26 | input =
27 | ~S(hello world)
28 |
29 | expected =
30 | ~S(hello world)
31 |
32 | assert expected == sanitize(input)
33 | end
34 |
35 | test "disallows anything else for target= inside " do
36 | input =
37 | ~S(hello world)
38 |
39 | expected = ~S(hello world)
40 | assert expected == sanitize(input)
41 | end
42 |
43 | test "allows rel=noopener inside " do
44 | input =
45 | ~S(hello world)
46 |
47 | expected =
48 | ~S(hello world)
49 |
50 | assert expected == sanitize(input)
51 | end
52 |
53 | test "allows rel=noreferrer inside " do
54 | input =
55 | ~S(hello world)
56 |
57 | expected =
58 | ~S(hello world)
59 |
60 | assert expected == sanitize(input)
61 | end
62 |
63 | test "disallows anything else for rel= inside " do
64 | input =
65 | ~S(hello world)
66 |
67 | expected =
68 | ~S(hello world)
69 |
70 | assert expected == sanitize(input)
71 | end
72 |
73 | test "strips everything except the allowed tags (for multiple tags)" do
74 | input =
75 | ""
76 |
77 | expected = "code!hello code!
"
78 | assert expected == sanitize(input)
79 | end
80 |
81 | test "strips everything for faulty allowed_tags: key" do
82 | input = "hello"
83 | expected = "hello"
84 | assert expected != sanitize(input)
85 | end
86 |
87 | test "strips invalid html" do
88 | input = "<< hi"
95 | assert "<\"
hi" == sanitize(input)
96 | end
97 |
98 | test "strips nested tags" do
99 | input = "Wei<a onclick='alert(document.cookie);'/>rdos"
100 | expected = "Wei<a onclick='alert(document.cookie);'/>rdos"
101 | assert expected == sanitize(input)
102 | end
103 |
104 | test "strips certain tags in multi line strings" do
105 | input =
106 | "This is a test.\n\n\n\nIt no longer contains any HTML.
\n"
107 |
108 | expected =
109 | "This is a test.\n\n\n\nIt no longer contains any HTML.
\n"
110 |
111 | assert expected == sanitize(input)
112 | end
113 |
114 | test "strips blank string" do
115 | assert "" == sanitize("")
116 | assert "" == sanitize(" ")
117 | assert "" == sanitize(nil)
118 | end
119 |
120 | test "strips nothing from plain text" do
121 | input = "Dont touch me"
122 | expected = "Dont touch me"
123 | assert expected == sanitize(input)
124 | end
125 |
126 | test "strips nothing from a sentence" do
127 | input = "This is a test."
128 | expected = "This is a test."
129 | assert expected == sanitize(input)
130 | end
131 |
132 | test "strips tags with comment" do
133 | input = "This has a here."
134 | expected = "This has a here."
135 | assert expected == sanitize(input)
136 | end
137 |
138 | test "strip_tags escapes special characters" do
139 | assert "&", sanitize("&")
140 | end
141 |
142 | # link sanitizer
143 |
144 | test "test_strip_links_with_tags_in_tags" do
145 | input = "<a href='hello'>all day long</a>"
146 | expected = "<a href='hello'>all day long</a>"
147 | assert expected == sanitize(input)
148 | end
149 |
150 | test "test_strip_links_with_unclosed_tags" do
151 | assert "" == sanitize("on my mind all day long" ==
163 | sanitize(input)
164 | end
165 |
166 | @tag href_scrubbing: true
167 | test "test_strip_links_leaves_nonlink_tags" do
168 | assert "My mind\nall day long" ==
169 | sanitize(
170 | "My mind\nall day long"
171 | )
172 | end
173 |
174 | @tag href_scrubbing: true
175 | test "strips tags with sanitize/1" do
176 | input =
177 | "This is a test.
"
178 |
179 | assert "This is a test.
" ==
180 | sanitize(input)
181 | end
182 |
183 | @a_href_hacks [
184 | "text here",
185 | "text here",
186 | "text here",
187 | "text here",
188 | "text here",
189 | "text here",
190 | "text here",
191 | "text here",
192 | "text here",
193 | "text here",
194 | "text here",
195 | "text here",
196 | "text here",
197 | "text here",
198 | "text here",
199 | "text here",
200 | "text here",
201 | "text here",
202 | "text here",
203 | "text here",
204 | "text here",
205 | "script:alert(\'foo\')\">text here",
206 | "text here",
207 | "text here",
208 | "text here",
209 | "text here"
210 | ]
211 |
212 | @tag href_scrubbing: true
213 | test "strips malicious protocol hacks from a href attribute" do
214 | expected = "text here"
215 | Enum.each(@a_href_hacks, fn x -> assert expected == sanitize(x) end)
216 | end
217 |
218 | @tag href_scrubbing: true
219 | test "does not strip x03a legitimate" do
220 | assert "" ==
221 | sanitize("")
222 |
223 | assert "" ==
224 | sanitize("")
225 | end
226 |
227 | test "test_strip links with links" do
228 | input =
229 | "0wn3d"
230 |
231 | assert "0wn3d" ==
232 | sanitize(input)
233 | end
234 |
235 | test "test_strip_links_with_linkception" do
236 | assert "Magic" ==
237 | sanitize(
238 | "Magic"
239 | )
240 | end
241 |
242 | test "test_strip_links_with_a_tag_in_href" do
243 | assert "FrrFox" == sanitize("FrrFox")
244 | end
245 |
246 | test "normal scrubbing does only allow certain tags and attributes" do
247 | input = "foo"
248 | expected = "foo"
249 | assert expected == sanitize(input)
250 | end
251 |
252 | test "strips not allowed attributes" do
253 | input =
254 | "start foo bar baz end"
255 |
256 | expected = "start foo bar baz end"
257 | assert expected == sanitize(input)
258 | end
259 |
260 | test "sanitize_script" do
261 | assert "a b cblah blah blahd e f" ==
262 | sanitize(
263 | "a b cd e f"
264 | )
265 | end
266 |
267 | @tag href_scrubbing: true
268 | test "sanitize_js_handlers" do
269 | input =
270 | ~s(onthis="do that" hello)
271 |
272 | assert "onthis=\"do that\" hello" ==
273 | sanitize(input)
274 | end
275 |
276 | test "sanitize_javascript_href" do
277 | raw =
278 | ~s(href="javascript:bang" foo, bar)
279 |
280 | assert ~s(href="javascript:bang" foo, bar) ==
281 | sanitize(raw)
282 | end
283 |
284 | test "sanitize_image_src" do
285 | raw =
286 | ~s(src="javascript:bang"
foo, bar)
287 |
288 | assert "src=\"javascript:bang\"
, bar" ==
289 | sanitize(raw)
290 | end
291 |
292 | @tag href_scrubbing: true
293 | test "should only allow http/https protocols" do
294 | assert "baz" ==
295 | sanitize(~s())
296 |
297 | assert "baz" ==
298 | sanitize(
299 | ~s()
300 | )
301 |
302 | assert "baz" ==
303 | sanitize(
304 | ~s()
305 | )
306 | end
307 |
308 | # test "video_poster_sanitization" do
309 | # assert ~s() == ~s()
310 | # assert ~s() == sanitize("")
311 | # end
312 |
313 | test "strips not allowed tags " do
314 | input = ""
315 | expected = ""
316 | assert expected == sanitize(input)
317 | end
318 |
319 | test "strips not allowed attributes " do
320 | input = ""
321 | expected = ""
322 | assert expected == sanitize(input)
323 | end
324 |
325 | @image_src_hacks [
326 | "
",
327 | "
",
328 | "
",
329 | "
",
330 | "
",
331 | "
",
332 | "
",
333 | "
",
334 | "
",
335 | "
",
336 | "
",
337 | "
",
338 | "
",
339 | "
",
340 | "
"
341 | ]
342 |
343 | test "strips malicious protocol hacks from img src attribute" do
344 | expected = "
"
345 | Enum.each(@image_src_hacks, fn x -> assert expected == sanitize(x) end)
346 | end
347 |
348 | test "strips script tag" do
349 | input = ""
350 | expected = ""
351 | assert expected == sanitize(input)
352 | end
353 |
354 | test "strips xss image hack with uppercase tags" do
355 | input = "
\">"
356 | expected = "
alert(\"XSS\")\">"
357 | assert expected == sanitize(input)
358 | end
359 |
360 | test "should_sanitize_tag_broken_up_by_null" do
361 | assert "alert(\"XSS\")" == sanitize("alert(\"XSS\")")
362 | end
363 |
364 | test "should_sanitize_invalid_script_tag" do
365 | input = ""
366 | assert "" == sanitize(input)
367 | end
368 |
369 | test "should_sanitize_unclosed_script" do
370 | input = "]]>")
425 | end
426 |
427 | @tag cdata: true
428 | test "should_sanitize_unterminated_cdata_section" do
429 | assert "neverending..." ==
430 | sanitize("neverending...")
431 | end
432 |
433 | @tag cdata: true
434 | test "strips CDATA" do
435 | input = "This has a ]]> here."
436 | expected = "This has a ]]> here."
437 | assert expected == sanitize(input)
438 | end
439 |
440 | test "should_not_mangle_urls_with_ampersand" do
441 | input = "my link"
442 | assert input == sanitize(input)
443 | end
444 |
445 | test "should_sanitize_neverending_attribute" do
446 | assert "" == sanitize("foo"
451 | # expected = "foo
"
452 | # assert expected == sanitize(input)
453 | # end
454 |
455 | test "does not strip the mailto URI scheme" do
456 | input = ~s(Email Us)
457 | expected = ~s(Email Us)
458 | assert expected == sanitize(input)
459 | end
460 |
461 | test "does not strip any header tags" do
462 | input = """
463 | Header 1
464 | Header 2
465 | Header 3
466 | Header 4
467 | Header 5
468 | Header 6
469 | """
470 |
471 | assert input == sanitize(input)
472 | end
473 | end
474 |
--------------------------------------------------------------------------------
/test/no_scrub_test.exs:
--------------------------------------------------------------------------------
1 | defmodule HtmlSanitizeExScrubberNoScrubTest do
2 | use ExUnit.Case, async: true
3 |
4 | defp no_scrub_sanitize(text) do
5 | HtmlSanitizeEx.noscrub(text)
6 | end
7 |
8 | test "strips nothing" do
9 | input = "This is an example of space eating."
10 | assert input == no_scrub_sanitize(input)
11 | end
12 |
13 | test "leaves white-space between nodes intact" do
14 | input =
15 | "This is\nan example of\n\nspace eating."
16 |
17 | assert input == no_scrub_sanitize(input)
18 | end
19 |
20 | test "leaves white-space between nodes intact (CR)" do
21 | input =
22 | "This is\nan example of\r\n\r\nspace eating."
23 |
24 | assert input == no_scrub_sanitize(input)
25 | end
26 |
27 | test "leaves white-space between nodes intact (tabs)" do
28 | input =
29 | "This is\tan example of\t\tspace eating."
30 |
31 | assert input == no_scrub_sanitize(input)
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/test/strip_tags_test.exs:
--------------------------------------------------------------------------------
1 | defmodule HtmlSanitizeExScrubberStripTagsTest do
2 | use ExUnit.Case, async: true
3 |
4 | defp strip_tags(text) do
5 | HtmlSanitizeEx.strip_tags(text)
6 | end
7 |
8 | test "strips everything except the allowed tags (for multiple tags)" do
9 | input =
10 | ""
11 |
12 | expected = "code!hello code!"
13 | assert expected == strip_tags(input)
14 | end
15 |
16 | test "strips everything" do
17 | input = "hello"
18 | expected = "hello"
19 | assert expected == strip_tags(input)
20 | end
21 |
22 | test "strips invalid html" do
23 | input = "<< hi"
30 | assert "<\" hi" == strip_tags(input)
31 | end
32 |
33 | test "strips nested tags" do
34 | input = "Wei<a onclick='alert(document.cookie);'/>rdos"
35 | expected = "Wei<a onclick='alert(document.cookie);'/>rdos"
36 | assert expected == strip_tags(input)
37 | end
38 |
39 | test "strips tags in multi line strings" do
40 | input =
41 | "This is a test.\n\n\n\nIt no longer contains any HTML.
\n"
42 |
43 | expected = "This is a test.\n\n\n\nIt no longer contains any HTML.\n"
44 | assert expected == strip_tags(input)
45 | end
46 |
47 | test "strips comments" do
48 | assert "This is <-- not\n a comment here." ==
49 | strip_tags("This is <-- not\n a comment here.")
50 | end
51 |
52 | test "strips blank string" do
53 | assert "" == strip_tags("")
54 | assert "" == strip_tags(" ")
55 | assert "" == strip_tags(nil)
56 | end
57 |
58 | test "strips nothing from plain text" do
59 | input = "Dont touch me"
60 | expected = "Dont touch me"
61 | assert expected == strip_tags(input)
62 | end
63 |
64 | test "strips tags with many open quotes" do
65 | assert "<<" == strip_tags("<<")
66 | end
67 |
68 | test "strips nothing from a sentence" do
69 | input = "This is a test."
70 | expected = "This is a test."
71 | assert expected == strip_tags(input)
72 | end
73 |
74 | test "strips tags with comment" do
75 | input = "This has a here."
76 | expected = "This has a here."
77 | assert expected == strip_tags(input)
78 | end
79 |
80 | test "strip_tags escapes special characters" do
81 | assert "&", strip_tags("&")
82 | end
83 |
84 | # link sanitizer
85 |
86 | test "test_strip_links_with_tags_in_tags" do
87 | input = "<a href='hello'>all day long</a>"
88 | expected = "<a href='hello'>all day long</a>"
89 | assert expected == strip_tags(input)
90 | end
91 |
92 | test "test_strip_links_with_unclosed_tags" do
93 | assert "" == strip_tags("text here",
102 | "text here",
103 | "text here",
104 | "text here",
105 | "text here",
106 | "text here",
107 | "text here",
108 | "text here",
109 | "text here",
110 | "text here",
111 | "text here",
112 | "text here",
113 | "text here",
114 | "text here",
115 | "text here",
116 | "text here",
117 | "text here",
118 | "text here",
119 | "text here",
120 | "text here",
121 | "text here",
122 | "script:alert(\'foo\')\">text here",
123 | "text here",
124 | "text here",
125 | "text here",
126 | "text here"
127 | ]
128 |
129 | @tag href_scrubbing: true
130 | test "strips malicious protocol hacks from a href attribute" do
131 | expected = "text here"
132 | Enum.each(@a_href_hacks, fn x -> assert expected == strip_tags(x) end)
133 | end
134 |
135 | test "test_strip links with links" do
136 | input =
137 | "0wn3d"
138 |
139 | assert "0wn3d" == strip_tags(input)
140 | end
141 |
142 | test "test_strip_links_with_a_tag_in_href" do
143 | assert "FrrFox" == strip_tags("FrrFox")
144 | end
145 |
146 | test "normal scrubbing does only allow certain tags and attributes" do
147 | input = "foo"
148 | expected = "foo"
149 | assert expected == strip_tags(input)
150 | end
151 |
152 | @image_src_hacks [
153 | "
",
154 | "
",
155 | "
",
156 | "
",
157 | "
",
158 | "
",
159 | "
",
160 | "
",
161 | "
",
162 | "
",
163 | "
",
164 | "
",
165 | "
",
166 | "
",
167 | "
"
168 | ]
169 |
170 | test "strips malicious protocol hacks from img src attribute" do
171 | expected = ""
172 | Enum.each(@image_src_hacks, fn x -> assert expected == strip_tags(x) end)
173 | end
174 |
175 | test "strips script tag" do
176 | input = ""
177 | expected = ""
178 | assert expected == strip_tags(input)
179 | end
180 |
181 | test "should_sanitize_tag_broken_up_by_null" do
182 | assert "alert(\"XSS\")" == strip_tags("alert(\"XSS\")")
183 | end
184 |
185 | test "should_sanitize_invalid_script_tag" do
186 | input = ""
187 | assert "" == strip_tags(input)
188 | end
189 |
190 | test "should_sanitize_script_tag_with_multiple_open_brackets" do
191 | assert "<alert(\"XSS\");//<" ==
192 | strip_tags("<")
193 |
194 | assert "" ==
195 | strip_tags("
]]>")
240 | end
241 |
242 | @tag cdata: true
243 | test "should_sanitize_cdata_section like any other" do
244 | assert "section]]>" == strip_tags("section]]>")
245 | end
246 |
247 | @tag cdata: true
248 | test "should_sanitize_unterminated_cdata_section" do
249 | assert "neverending..." == strip_tags("neverending...")
250 | end
251 |
252 | @tag cdata: true
253 | test "strips CDATA" do
254 | input = "This has a ]]> here."
255 | expected = "This has a ]]> here."
256 | assert expected == strip_tags(input)
257 | end
258 |
259 | test "should sanitize neverending attribute" do
260 | assert "" == strip_tags("text with break between tags\r\nwill remove break"
271 | )
272 | end
273 |
274 | test "should not destroy white-space /3" do
275 | assert "some text\r\nbreak only from one side" ==
276 | strip_tags("some text\r\nbreak only from one side")
277 | end
278 | end
279 |
--------------------------------------------------------------------------------
/test/test_helper.exs:
--------------------------------------------------------------------------------
1 | ExUnit.start()
2 |
--------------------------------------------------------------------------------
/test/test_if_tests_fail_after_resetting_lib.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 |
3 | # common setup
4 |
5 | set -e
6 |
7 | DIRNAME=$( cd "$( dirname "$0" )" && pwd )
8 | PROJECT_ROOT=$( cd "$DIRNAME/.." && pwd )
9 |
10 | # execution
11 |
12 | cd $PROJECT_ROOT
13 |
14 | git checkout master lib/
15 |
16 | if mix test ; then
17 | echo ""
18 | echo "------------------------------------------------------------------"
19 | echo ""
20 | echo "There are changes to both lib/ and test/ which can indicate"
21 | echo "a bugfix with a corresponding test that reproduces the fixed bug"
22 | echo ""
23 | echo "(if this is not a bugfix PR, please ignore the following error)"
24 | echo ""
25 | echo "\e[31mAfter resetting changes in lib/, mix test should have failed"
26 | echo ""
27 | echo "------------------------------------------------------------------"
28 | echo ""
29 | exit 1
30 | else
31 | exit 0
32 | fi
33 |
--------------------------------------------------------------------------------
/test/traverser_test.exs:
--------------------------------------------------------------------------------
1 | defmodule StripEverythingButB do
2 | def scrub({"b", attributes, children}), do: {"b", attributes, children}
3 |
4 | def scrub({_tag, _attributes, children}) do
5 | children
6 | end
7 |
8 | def scrub(text) do
9 | text
10 | end
11 | end
12 |
13 | defmodule HtmlSanitizeExTraverserTest do
14 | use ExUnit.Case, async: true
15 |
16 | def parse_to_tree(html) do
17 | html
18 | |> HtmlSanitizeEx.Parser.parse()
19 | |> HtmlSanitizeEx.Traverser.traverse(StripEverythingButB)
20 | end
21 |
22 | test "should return expected tree" do
23 | input =
24 | "hello! "
25 |
26 | expected = ["hello! ", {"b", [], ["code!"]}, "hello ", "code!"]
27 | assert expected == parse_to_tree(input)
28 | end
29 |
30 | test "should return expected tree 2" do
31 | input =
32 | "This is the test.\n\n\n\nIt no longer contains any HTML.
\n"
33 |
34 | expected = [
35 | "This is ",
36 | {"b", [], ["the ", "test"]},
37 | ".",
38 | " _ \n\n\n\n",
39 | "It no ",
40 | {"b", [], ["longer ", "contains ", "any ", "HTML", "."]},
41 | " _ \n"
42 | ]
43 |
44 | assert expected == parse_to_tree(input)
45 | end
46 |
47 | test "should return expected tree 3" do
48 | input = "This has a here."
49 | expected = ["This has a ", {:comment, " comment "}, " here."]
50 | assert expected == parse_to_tree(input)
51 | end
52 |
53 | test "should return expected tree 4" do
54 | input = "This has a