├── CHANGELOG.md ├── HtmlSanitizer.php ├── HtmlSanitizerAction.php ├── HtmlSanitizerConfig.php ├── HtmlSanitizerInterface.php ├── LICENSE ├── Parser ├── MastermindsParser.php └── ParserInterface.php ├── README.md ├── Reference └── W3CReference.php ├── TextSanitizer ├── StringSanitizer.php └── UrlSanitizer.php ├── Visitor ├── AttributeSanitizer │ ├── AttributeSanitizerInterface.php │ └── UrlAttributeSanitizer.php ├── DomVisitor.php ├── Model │ └── Cursor.php └── Node │ ├── BlockedNode.php │ ├── DocumentNode.php │ ├── Node.php │ ├── NodeInterface.php │ └── TextNode.php └── composer.json /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | CHANGELOG 2 | ========= 3 | 4 | 7.2 5 | --- 6 | 7 | * Add support for configuring the default action to block or allow unconfigured elements instead of dropping them 8 | 9 | 6.4 10 | --- 11 | 12 | * Add support for sanitizing unlimited length of HTML document 13 | 14 | 6.1 15 | --- 16 | 17 | * Add the component as experimental 18 | -------------------------------------------------------------------------------- /HtmlSanitizer.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer; 13 | 14 | use Symfony\Component\HtmlSanitizer\Parser\MastermindsParser; 15 | use Symfony\Component\HtmlSanitizer\Parser\ParserInterface; 16 | use Symfony\Component\HtmlSanitizer\Reference\W3CReference; 17 | use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; 18 | use Symfony\Component\HtmlSanitizer\Visitor\DomVisitor; 19 | 20 | /** 21 | * @author Titouan Galopin 22 | */ 23 | final class HtmlSanitizer implements HtmlSanitizerInterface 24 | { 25 | private ParserInterface $parser; 26 | 27 | /** 28 | * @var array 29 | */ 30 | private array $domVisitors = []; 31 | 32 | public function __construct( 33 | private HtmlSanitizerConfig $config, 34 | ?ParserInterface $parser = null, 35 | ) { 36 | $this->config = $config; 37 | $this->parser = $parser ?? new MastermindsParser(); 38 | } 39 | 40 | public function sanitize(string $input): string 41 | { 42 | return $this->sanitizeWithContext(W3CReference::CONTEXT_BODY, $input); 43 | } 44 | 45 | public function sanitizeFor(string $element, string $input): string 46 | { 47 | return $this->sanitizeWithContext( 48 | W3CReference::CONTEXTS_MAP[StringSanitizer::htmlLower($element)] ?? W3CReference::CONTEXT_BODY, 49 | $input 50 | ); 51 | } 52 | 53 | private function sanitizeWithContext(string $context, string $input): string 54 | { 55 | // Text context: early return with HTML encoding 56 | if (W3CReference::CONTEXT_TEXT === $context) { 57 | return StringSanitizer::encodeHtmlEntities($input); 58 | } 59 | 60 | // Other context: build a DOM visitor 61 | $this->domVisitors[$context] ??= $this->createDomVisitorForContext($context); 62 | 63 | // Prevent DOS attack induced by extremely long HTML strings 64 | if (-1 !== $this->config->getMaxInputLength() && \strlen($input) > $this->config->getMaxInputLength()) { 65 | $input = substr($input, 0, $this->config->getMaxInputLength()); 66 | } 67 | 68 | // Only operate on valid UTF-8 strings. This is necessary to prevent cross 69 | // site scripting issues on Internet Explorer 6. Idea from Drupal (filter_xss). 70 | if (!$this->isValidUtf8($input)) { 71 | return ''; 72 | } 73 | 74 | // Remove NULL character 75 | $input = str_replace(\chr(0), '', $input); 76 | 77 | // Parse as HTML 78 | if (!$parsed = $this->parser->parse($input)) { 79 | return ''; 80 | } 81 | 82 | // Visit the DOM tree and render the sanitized nodes 83 | return $this->domVisitors[$context]->visit($parsed)?->render() ?? ''; 84 | } 85 | 86 | private function isValidUtf8(string $html): bool 87 | { 88 | // preg_match() fails silently on strings containing invalid UTF-8. 89 | return '' === $html || preg_match('//u', $html); 90 | } 91 | 92 | private function createDomVisitorForContext(string $context): DomVisitor 93 | { 94 | $elementsConfig = []; 95 | 96 | // Head: only a few elements are allowed 97 | if (W3CReference::CONTEXT_HEAD === $context) { 98 | foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { 99 | if (\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { 100 | $elementsConfig[$allowedElement] = $allowedAttributes; 101 | } 102 | } 103 | 104 | foreach ($this->config->getBlockedElements() as $blockedElement => $v) { 105 | if (\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { 106 | $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; 107 | } 108 | } 109 | 110 | foreach ($this->config->getDroppedElements() as $droppedElement => $v) { 111 | if (\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) { 112 | $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; 113 | } 114 | } 115 | 116 | return new DomVisitor($this->config, $elementsConfig); 117 | } 118 | 119 | // Body: allow any configured element that isn't in 120 | foreach ($this->config->getAllowedElements() as $allowedElement => $allowedAttributes) { 121 | if (!\array_key_exists($allowedElement, W3CReference::HEAD_ELEMENTS)) { 122 | $elementsConfig[$allowedElement] = $allowedAttributes; 123 | } 124 | } 125 | 126 | foreach ($this->config->getBlockedElements() as $blockedElement => $v) { 127 | if (!\array_key_exists($blockedElement, W3CReference::HEAD_ELEMENTS)) { 128 | $elementsConfig[$blockedElement] = HtmlSanitizerAction::Block; 129 | } 130 | } 131 | 132 | foreach ($this->config->getDroppedElements() as $droppedElement => $v) { 133 | if (!\array_key_exists($droppedElement, W3CReference::HEAD_ELEMENTS)) { 134 | $elementsConfig[$droppedElement] = HtmlSanitizerAction::Drop; 135 | } 136 | } 137 | 138 | return new DomVisitor($this->config, $elementsConfig); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /HtmlSanitizerAction.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer; 13 | 14 | enum HtmlSanitizerAction: string 15 | { 16 | /** 17 | * Dropped elements are elements the sanitizer should remove from the input, including their children. 18 | */ 19 | case Drop = 'drop'; 20 | 21 | /** 22 | * Blocked elements are elements the sanitizer should remove from the input, but retain their children. 23 | */ 24 | case Block = 'block'; 25 | 26 | /** 27 | * Allowed elements are elements the sanitizer should retain from the input. 28 | */ 29 | case Allow = 'allow'; 30 | } 31 | -------------------------------------------------------------------------------- /HtmlSanitizerConfig.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer; 13 | 14 | use Symfony\Component\HtmlSanitizer\Reference\W3CReference; 15 | use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; 16 | 17 | /** 18 | * @author Titouan Galopin 19 | */ 20 | class HtmlSanitizerConfig 21 | { 22 | private HtmlSanitizerAction $defaultAction = HtmlSanitizerAction::Drop; 23 | 24 | /** 25 | * Elements that should be removed. 26 | * 27 | * @var array 28 | */ 29 | private array $droppedElements = []; 30 | 31 | /** 32 | * Elements that should be removed but their children should be retained. 33 | * 34 | * @var array 35 | */ 36 | private array $blockedElements = []; 37 | 38 | /** 39 | * Elements that should be retained, with their allowed attributes. 40 | * 41 | * @var array> 42 | */ 43 | private array $allowedElements = []; 44 | 45 | /** 46 | * Attributes that should always be added to certain elements. 47 | * 48 | * @var array> 49 | */ 50 | private array $forcedAttributes = []; 51 | 52 | /** 53 | * Links schemes that should be retained, other being dropped. 54 | * 55 | * @var list 56 | */ 57 | private array $allowedLinkSchemes = ['http', 'https', 'mailto', 'tel']; 58 | 59 | /** 60 | * Links hosts that should be retained (by default, all hosts are allowed). 61 | * 62 | * @var list|null 63 | */ 64 | private ?array $allowedLinkHosts = null; 65 | 66 | /** 67 | * Should the sanitizer allow relative links (by default, they are dropped). 68 | */ 69 | private bool $allowRelativeLinks = false; 70 | 71 | /** 72 | * Image/Audio/Video schemes that should be retained, other being dropped. 73 | * 74 | * @var list 75 | */ 76 | private array $allowedMediaSchemes = ['http', 'https', 'data']; 77 | 78 | /** 79 | * Image/Audio/Video hosts that should be retained (by default, all hosts are allowed). 80 | * 81 | * @var list|null 82 | */ 83 | private ?array $allowedMediaHosts = null; 84 | 85 | /** 86 | * Should the sanitizer allow relative media URL (by default, they are dropped). 87 | */ 88 | private bool $allowRelativeMedias = false; 89 | 90 | /** 91 | * Should the URL in the sanitized document be transformed to HTTPS if they are using HTTP. 92 | */ 93 | private bool $forceHttpsUrls = false; 94 | 95 | /** 96 | * Sanitizers that should be applied to specific attributes in addition to standard sanitization. 97 | * 98 | * @var list 99 | */ 100 | private array $attributeSanitizers; 101 | 102 | private int $maxInputLength = 20_000; 103 | 104 | public function __construct() 105 | { 106 | $this->attributeSanitizers = [ 107 | new Visitor\AttributeSanitizer\UrlAttributeSanitizer(), 108 | ]; 109 | } 110 | 111 | /** 112 | * Sets the default action for elements which are not otherwise specifically allowed or blocked. 113 | * 114 | * Note that a default action of Allow will allow all tags but they will not have any attributes. 115 | */ 116 | public function defaultAction(HtmlSanitizerAction $action): static 117 | { 118 | $clone = clone $this; 119 | $clone->defaultAction = $action; 120 | 121 | return $clone; 122 | } 123 | 124 | /** 125 | * Allows all static elements and attributes from the W3C Sanitizer API standard. 126 | * 127 | * All scripts will be removed but the output may still contain other dangerous 128 | * behaviors like CSS injection (click-jacking), CSS expressions, ... 129 | */ 130 | public function allowStaticElements(): static 131 | { 132 | $elements = array_merge( 133 | array_keys(W3CReference::HEAD_ELEMENTS), 134 | array_keys(W3CReference::BODY_ELEMENTS) 135 | ); 136 | 137 | $clone = clone $this; 138 | foreach ($elements as $element) { 139 | $clone = $clone->allowElement($element, '*'); 140 | } 141 | 142 | return $clone; 143 | } 144 | 145 | /** 146 | * Allows "safe" elements and attributes. 147 | * 148 | * All scripts will be removed, as well as other dangerous behaviors like CSS injection. 149 | */ 150 | public function allowSafeElements(): static 151 | { 152 | $attributes = []; 153 | foreach (W3CReference::ATTRIBUTES as $attribute => $isSafe) { 154 | if ($isSafe) { 155 | $attributes[] = $attribute; 156 | } 157 | } 158 | 159 | $clone = clone $this; 160 | 161 | foreach (W3CReference::HEAD_ELEMENTS as $element => $isSafe) { 162 | if ($isSafe) { 163 | $clone = $clone->allowElement($element, $attributes); 164 | } 165 | } 166 | 167 | foreach (W3CReference::BODY_ELEMENTS as $element => $isSafe) { 168 | if ($isSafe) { 169 | $clone = $clone->allowElement($element, $attributes); 170 | } 171 | } 172 | 173 | return $clone; 174 | } 175 | 176 | /** 177 | * Allows only a given list of schemes to be used in links href attributes. 178 | * 179 | * All other schemes will be dropped. 180 | * 181 | * @param list $allowLinkSchemes 182 | */ 183 | public function allowLinkSchemes(array $allowLinkSchemes): static 184 | { 185 | $clone = clone $this; 186 | $clone->allowedLinkSchemes = $allowLinkSchemes; 187 | 188 | return $clone; 189 | } 190 | 191 | /** 192 | * Allows only a given list of hosts to be used in links href attributes. 193 | * 194 | * All other hosts will be dropped. By default all hosts are allowed 195 | * ($allowedLinkHosts = null). 196 | * 197 | * @param list|null $allowLinkHosts 198 | */ 199 | public function allowLinkHosts(?array $allowLinkHosts): static 200 | { 201 | $clone = clone $this; 202 | $clone->allowedLinkHosts = $allowLinkHosts; 203 | 204 | return $clone; 205 | } 206 | 207 | /** 208 | * Allows relative URLs to be used in links href attributes. 209 | */ 210 | public function allowRelativeLinks(bool $allowRelativeLinks = true): static 211 | { 212 | $clone = clone $this; 213 | $clone->allowRelativeLinks = $allowRelativeLinks; 214 | 215 | return $clone; 216 | } 217 | 218 | /** 219 | * Allows only a given list of schemes to be used in media source attributes (img, audio, video, ...). 220 | * 221 | * All other schemes will be dropped. 222 | * 223 | * @param list $allowMediaSchemes 224 | */ 225 | public function allowMediaSchemes(array $allowMediaSchemes): static 226 | { 227 | $clone = clone $this; 228 | $clone->allowedMediaSchemes = $allowMediaSchemes; 229 | 230 | return $clone; 231 | } 232 | 233 | /** 234 | * Allows only a given list of hosts to be used in media source attributes (img, audio, video, ...). 235 | * 236 | * All other hosts will be dropped. By default all hosts are allowed 237 | * ($allowMediaHosts = null). 238 | * 239 | * @param list|null $allowMediaHosts 240 | */ 241 | public function allowMediaHosts(?array $allowMediaHosts): static 242 | { 243 | $clone = clone $this; 244 | $clone->allowedMediaHosts = $allowMediaHosts; 245 | 246 | return $clone; 247 | } 248 | 249 | /** 250 | * Allows relative URLs to be used in media source attributes (img, audio, video, ...). 251 | */ 252 | public function allowRelativeMedias(bool $allowRelativeMedias = true): static 253 | { 254 | $clone = clone $this; 255 | $clone->allowRelativeMedias = $allowRelativeMedias; 256 | 257 | return $clone; 258 | } 259 | 260 | /** 261 | * Transforms URLs using the HTTP scheme to use the HTTPS scheme instead. 262 | */ 263 | public function forceHttpsUrls(bool $forceHttpsUrls = true): static 264 | { 265 | $clone = clone $this; 266 | $clone->forceHttpsUrls = $forceHttpsUrls; 267 | 268 | return $clone; 269 | } 270 | 271 | /** 272 | * Configures the given element as allowed. 273 | * 274 | * Allowed elements are elements the sanitizer should retain from the input. 275 | * 276 | * A list of allowed attributes for this element can be passed as a second argument. 277 | * Passing "*" will allow all standard attributes on this element. By default, no 278 | * attributes are allowed on the element. 279 | * 280 | * @param list|string $allowedAttributes 281 | */ 282 | public function allowElement(string $element, array|string $allowedAttributes = []): static 283 | { 284 | $clone = clone $this; 285 | 286 | // Unblock/undrop the element if necessary 287 | unset($clone->blockedElements[$element], $clone->droppedElements[$element]); 288 | 289 | $clone->allowedElements[$element] = []; 290 | 291 | $attrs = ('*' === $allowedAttributes) ? array_keys(W3CReference::ATTRIBUTES) : (array) $allowedAttributes; 292 | foreach ($attrs as $allowedAttr) { 293 | $clone->allowedElements[$element][$allowedAttr] = true; 294 | } 295 | 296 | return $clone; 297 | } 298 | 299 | /** 300 | * Configures the given element as blocked. 301 | * 302 | * Blocked elements are elements the sanitizer should remove from the input, but retain 303 | * their children. 304 | */ 305 | public function blockElement(string $element): static 306 | { 307 | $clone = clone $this; 308 | 309 | // Disallow/undrop the element if necessary 310 | unset($clone->allowedElements[$element], $clone->droppedElements[$element]); 311 | 312 | $clone->blockedElements[$element] = true; 313 | 314 | return $clone; 315 | } 316 | 317 | /** 318 | * Configures the given element as dropped. 319 | * 320 | * Dropped elements are elements the sanitizer should remove from the input, including 321 | * their children. 322 | * 323 | * Note: when using an empty configuration, all unknown elements are dropped 324 | * automatically. This method let you drop elements that were allowed earlier 325 | * in the configuration, or explicitly drop some if you changed the default action. 326 | */ 327 | public function dropElement(string $element): static 328 | { 329 | $clone = clone $this; 330 | unset($clone->allowedElements[$element], $clone->blockedElements[$element]); 331 | 332 | $clone->droppedElements[$element] = true; 333 | 334 | return $clone; 335 | } 336 | 337 | /** 338 | * Configures the given attribute as allowed. 339 | * 340 | * Allowed attributes are attributes the sanitizer should retain from the input. 341 | * 342 | * A list of allowed elements for this attribute can be passed as a second argument. 343 | * Passing "*" will allow all currently allowed elements to use this attribute. 344 | * 345 | * @param list|string $allowedElements 346 | */ 347 | public function allowAttribute(string $attribute, array|string $allowedElements): static 348 | { 349 | $clone = clone $this; 350 | $allowedElements = ('*' === $allowedElements) ? array_keys($clone->allowedElements) : (array) $allowedElements; 351 | 352 | // For each configured element ... 353 | foreach ($clone->allowedElements as $element => $attrs) { 354 | if (\in_array($element, $allowedElements, true)) { 355 | // ... if the attribute should be allowed, add it 356 | $clone->allowedElements[$element][$attribute] = true; 357 | } else { 358 | // ... if the attribute should not be allowed, remove it 359 | unset($clone->allowedElements[$element][$attribute]); 360 | } 361 | } 362 | 363 | return $clone; 364 | } 365 | 366 | /** 367 | * Configures the given attribute as dropped. 368 | * 369 | * Dropped attributes are attributes the sanitizer should remove from the input. 370 | * 371 | * A list of elements on which to drop this attribute can be passed as a second argument. 372 | * Passing "*" will drop this attribute from all currently allowed elements. 373 | * 374 | * Note: when using an empty configuration, all unknown attributes are dropped 375 | * automatically. This method let you drop attributes that were allowed earlier 376 | * in the configuration. 377 | * 378 | * @param list|string $droppedElements 379 | */ 380 | public function dropAttribute(string $attribute, array|string $droppedElements): static 381 | { 382 | $clone = clone $this; 383 | $droppedElements = ('*' === $droppedElements) ? array_keys($clone->allowedElements) : (array) $droppedElements; 384 | 385 | foreach ($droppedElements as $element) { 386 | if (isset($clone->allowedElements[$element][$attribute])) { 387 | unset($clone->allowedElements[$element][$attribute]); 388 | } 389 | } 390 | 391 | return $clone; 392 | } 393 | 394 | /** 395 | * Forcefully set the value of a given attribute on a given element. 396 | * 397 | * The attribute will be created on the nodes if it didn't exist. 398 | */ 399 | public function forceAttribute(string $element, string $attribute, string $value): static 400 | { 401 | $clone = clone $this; 402 | $clone->forcedAttributes[$element][$attribute] = $value; 403 | 404 | return $clone; 405 | } 406 | 407 | /** 408 | * Registers a custom attribute sanitizer. 409 | */ 410 | public function withAttributeSanitizer(AttributeSanitizerInterface $sanitizer): static 411 | { 412 | $clone = clone $this; 413 | $clone->attributeSanitizers[] = $sanitizer; 414 | 415 | return $clone; 416 | } 417 | 418 | /** 419 | * Unregisters a custom attribute sanitizer. 420 | */ 421 | public function withoutAttributeSanitizer(AttributeSanitizerInterface $sanitizer): static 422 | { 423 | $clone = clone $this; 424 | $clone->attributeSanitizers = array_values(array_filter( 425 | $this->attributeSanitizers, 426 | static fn ($current) => $current !== $sanitizer 427 | )); 428 | 429 | return $clone; 430 | } 431 | 432 | /** 433 | * @param int $maxInputLength The maximum length of the input string in bytes 434 | * -1 means no limit 435 | */ 436 | public function withMaxInputLength(int $maxInputLength): static 437 | { 438 | if ($maxInputLength < -1) { 439 | throw new \InvalidArgumentException(\sprintf('The maximum input length must be greater than -1, "%d" given.', $maxInputLength)); 440 | } 441 | 442 | $clone = clone $this; 443 | $clone->maxInputLength = $maxInputLength; 444 | 445 | return $clone; 446 | } 447 | 448 | public function getMaxInputLength(): int 449 | { 450 | return $this->maxInputLength; 451 | } 452 | 453 | public function getDefaultAction(): HtmlSanitizerAction 454 | { 455 | return $this->defaultAction; 456 | } 457 | 458 | /** 459 | * @return array> 460 | */ 461 | public function getAllowedElements(): array 462 | { 463 | return $this->allowedElements; 464 | } 465 | 466 | /** 467 | * @return array 468 | */ 469 | public function getBlockedElements(): array 470 | { 471 | return $this->blockedElements; 472 | } 473 | 474 | /** 475 | * @return array 476 | */ 477 | public function getDroppedElements(): array 478 | { 479 | return $this->droppedElements; 480 | } 481 | 482 | /** 483 | * @return array> 484 | */ 485 | public function getForcedAttributes(): array 486 | { 487 | return $this->forcedAttributes; 488 | } 489 | 490 | /** 491 | * @return list 492 | */ 493 | public function getAllowedLinkSchemes(): array 494 | { 495 | return $this->allowedLinkSchemes; 496 | } 497 | 498 | /** 499 | * @return list|null 500 | */ 501 | public function getAllowedLinkHosts(): ?array 502 | { 503 | return $this->allowedLinkHosts; 504 | } 505 | 506 | public function getAllowRelativeLinks(): bool 507 | { 508 | return $this->allowRelativeLinks; 509 | } 510 | 511 | /** 512 | * @return list 513 | */ 514 | public function getAllowedMediaSchemes(): array 515 | { 516 | return $this->allowedMediaSchemes; 517 | } 518 | 519 | /** 520 | * @return list|null 521 | */ 522 | public function getAllowedMediaHosts(): ?array 523 | { 524 | return $this->allowedMediaHosts; 525 | } 526 | 527 | public function getAllowRelativeMedias(): bool 528 | { 529 | return $this->allowRelativeMedias; 530 | } 531 | 532 | public function getForceHttpsUrls(): bool 533 | { 534 | return $this->forceHttpsUrls; 535 | } 536 | 537 | /** 538 | * @return list 539 | */ 540 | public function getAttributeSanitizers(): array 541 | { 542 | return $this->attributeSanitizers; 543 | } 544 | } 545 | -------------------------------------------------------------------------------- /HtmlSanitizerInterface.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer; 13 | 14 | /** 15 | * Sanitizes an untrusted HTML input for safe insertion into a document's DOM. 16 | * 17 | * This interface is inspired by the W3C Standard Draft about a HTML Sanitizer API 18 | * ({@see https://wicg.github.io/sanitizer-api/}). 19 | * 20 | * @author Titouan Galopin 21 | */ 22 | interface HtmlSanitizerInterface 23 | { 24 | /** 25 | * Sanitizes an untrusted HTML input for a context. 26 | * 27 | * This method is NOT context sensitive: it assumes the returned HTML string 28 | * will be injected in a "body" context, and therefore will drop tags only 29 | * allowed in the "head" element. To sanitize a string for injection 30 | * in the "head" element, use {@see HtmlSanitizerInterface::sanitizeFor()}. 31 | */ 32 | public function sanitize(string $input): string; 33 | 34 | /** 35 | * Sanitizes an untrusted HTML input for a given context. 36 | * 37 | * This method is context sensitive: by providing a parent element name 38 | * (body, head, title, ...), the sanitizer will adapt its rules to only 39 | * allow elements that are valid inside the given parent element. 40 | */ 41 | public function sanitizeFor(string $element, string $input): string; 42 | } 43 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2021-present Fabien Potencier 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy 4 | of this software and associated documentation files (the "Software"), to deal 5 | in the Software without restriction, including without limitation the rights 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 | copies of the Software, and to permit persons to whom the Software is furnished 8 | to do so, subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 | THE SOFTWARE. 20 | -------------------------------------------------------------------------------- /Parser/MastermindsParser.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Parser; 13 | 14 | use Masterminds\HTML5; 15 | 16 | /** 17 | * @author Titouan Galopin 18 | */ 19 | final class MastermindsParser implements ParserInterface 20 | { 21 | public function __construct(private array $defaultOptions = []) 22 | { 23 | } 24 | 25 | public function parse(string $html): ?\DOMNode 26 | { 27 | return (new HTML5($this->defaultOptions))->loadHTMLFragment($html); 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /Parser/ParserInterface.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Parser; 13 | 14 | /** 15 | * Transforms an untrusted HTML input string into a DOM tree. 16 | * 17 | * @author Titouan Galopin 18 | */ 19 | interface ParserInterface 20 | { 21 | /** 22 | * Parse a given string and returns a DOMNode tree. 23 | * 24 | * This method must return null if the string cannot be parsed as HTML. 25 | */ 26 | public function parse(string $html): ?\DOMNode; 27 | } 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | HtmlSanitizer Component 2 | ======================= 3 | 4 | The HtmlSanitizer component provides an object-oriented API to sanitize 5 | untrusted HTML input for safe insertion into a document's DOM. 6 | 7 | Usage 8 | ----- 9 | 10 | ```php 11 | use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; 12 | use Symfony\Component\HtmlSanitizer\HtmlSanitizer; 13 | 14 | // By default, an element not added to the allowed or blocked elements 15 | // will be dropped, including its children 16 | $config = (new HtmlSanitizerConfig()) 17 | // Allow "safe" elements and attributes. All scripts will be removed 18 | // as well as other dangerous behaviors like CSS injection 19 | ->allowSafeElements() 20 | 21 | // Allow all static elements and attributes from the W3C Sanitizer API 22 | // standard. All scripts will be removed but the output may still contain 23 | // other dangerous behaviors like CSS injection (click-jacking), CSS 24 | // expressions, ... 25 | ->allowStaticElements() 26 | 27 | // Allow the "div" element and no attribute can be on it 28 | ->allowElement('div') 29 | 30 | // Allow the "a" element, and the "title" attribute to be on it 31 | ->allowElement('a', ['title']) 32 | 33 | // Allow the "span" element, and any attribute from the Sanitizer API is allowed 34 | // (see https://wicg.github.io/sanitizer-api/#default-configuration) 35 | ->allowElement('span', '*') 36 | 37 | // Block the "section" element: this element will be removed but 38 | // its children will be retained 39 | ->blockElement('section') 40 | 41 | // Drop the "div" element: this element will be removed, including its children 42 | ->dropElement('div') 43 | 44 | // Allow the attribute "title" on the "div" element 45 | ->allowAttribute('title', ['div']) 46 | 47 | // Allow the attribute "data-custom-attr" on all currently allowed elements 48 | ->allowAttribute('data-custom-attr', '*') 49 | 50 | // Drop the "data-custom-attr" attribute from the "div" element: 51 | // this attribute will be removed 52 | ->dropAttribute('data-custom-attr', ['div']) 53 | 54 | // Drop the "data-custom-attr" attribute from all elements: 55 | // this attribute will be removed 56 | ->dropAttribute('data-custom-attr', '*') 57 | 58 | // Forcefully set the value of all "rel" attributes on "a" 59 | // elements to "noopener noreferrer" 60 | ->forceAttribute('a', 'rel', 'noopener noreferrer') 61 | 62 | // Transform all HTTP schemes to HTTPS 63 | ->forceHttpsUrls() 64 | 65 | // Configure which schemes are allowed in links (others will be dropped) 66 | ->allowLinkSchemes(['https', 'http', 'mailto']) 67 | 68 | // Configure which hosts are allowed in links (by default all are allowed) 69 | ->allowLinkHosts(['symfony.com', 'example.com']) 70 | 71 | // Allow relative URL in links (by default they are dropped) 72 | ->allowRelativeLinks() 73 | 74 | // Configure which schemes are allowed in img/audio/video/iframe (others will be dropped) 75 | ->allowMediaSchemes(['https', 'http']) 76 | 77 | // Configure which hosts are allowed in img/audio/video/iframe (by default all are allowed) 78 | ->allowMediaHosts(['symfony.com', 'example.com']) 79 | 80 | // Allow relative URL in img/audio/video/iframe (by default they are dropped) 81 | ->allowRelativeMedias() 82 | 83 | // Configure a custom attribute sanitizer to apply custom sanitization logic 84 | // ($attributeSanitizer instance of AttributeSanitizerInterface) 85 | ->withAttributeSanitizer($attributeSanitizer) 86 | 87 | // Unregister a previously registered attribute sanitizer 88 | // ($attributeSanitizer instance of AttributeSanitizerInterface) 89 | ->withoutAttributeSanitizer($attributeSanitizer) 90 | ; 91 | 92 | $sanitizer = new HtmlSanitizer($config); 93 | 94 | // Sanitize a given string, using the configuration provided and in the 95 | // "body" context (tags only allowed in will be removed) 96 | $sanitizer->sanitize($userInput); 97 | 98 | // Sanitize the given string for a usage in a tag 99 | $sanitizer->sanitizeFor('head', $userInput); 100 | 101 | // Sanitize the given string for a usage in another tag 102 | $sanitizer->sanitizeFor('title', $userInput); // Will encode as HTML entities 103 | $sanitizer->sanitizeFor('textarea', $userInput); // Will encode as HTML entities 104 | $sanitizer->sanitizeFor('div', $userInput); // Will sanitize as body 105 | $sanitizer->sanitizeFor('section', $userInput); // Will sanitize as body 106 | // ... 107 | ``` 108 | 109 | Resources 110 | --------- 111 | 112 | * [Contributing](https://symfony.com/doc/current/contributing/index.html) 113 | * [Report issues](https://github.com/symfony/symfony/issues) and 114 | [send Pull Requests](https://github.com/symfony/symfony/pulls) 115 | in the [main Symfony repository](https://github.com/symfony/symfony) 116 | -------------------------------------------------------------------------------- /Reference/W3CReference.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Reference; 13 | 14 | /** 15 | * Stores reference data from the W3C Sanitizer API standard. 16 | * 17 | * @see https://wicg.github.io/sanitizer-api/#default-configuration 18 | * 19 | * @author Titouan Galopin 20 | * 21 | * @internal 22 | */ 23 | final class W3CReference 24 | { 25 | /** 26 | * Sanitizer supported contexts. 27 | * 28 | * A parent element name can be passed as an argument to {@see HtmlSanitizer::sanitizeFor()}. 29 | * When doing so, depending on the given context, different elements will be allowed. 30 | */ 31 | public const CONTEXT_HEAD = 'head'; 32 | public const CONTEXT_BODY = 'body'; 33 | public const CONTEXT_TEXT = 'text'; 34 | 35 | // Which context to apply depending on the passed parent element name 36 | public const CONTEXTS_MAP = [ 37 | 'head' => self::CONTEXT_HEAD, 38 | 'textarea' => self::CONTEXT_TEXT, 39 | 'title' => self::CONTEXT_TEXT, 40 | ]; 41 | 42 | /** 43 | * Elements allowed by the Sanitizer standard in as keys, including whether 44 | * they are safe or not as values (safe meaning no global display/audio/video impact). 45 | */ 46 | public const HEAD_ELEMENTS = [ 47 | 'head' => true, 48 | 'link' => true, 49 | 'meta' => true, 50 | 'style' => false, 51 | 'title' => true, 52 | ]; 53 | 54 | /** 55 | * Elements allowed by the Sanitizer standard in as keys, including whether 56 | * they are safe or not as values (safe meaning no global display/audio/video impact). 57 | */ 58 | public const BODY_ELEMENTS = [ 59 | 'a' => true, 60 | 'abbr' => true, 61 | 'acronym' => true, 62 | 'address' => true, 63 | 'area' => true, 64 | 'article' => true, 65 | 'aside' => true, 66 | 'audio' => true, 67 | 'b' => true, 68 | 'basefont' => true, 69 | 'bdi' => true, 70 | 'bdo' => true, 71 | 'bgsound' => false, 72 | 'big' => true, 73 | 'blockquote' => true, 74 | 'body' => true, 75 | 'br' => true, 76 | 'button' => true, 77 | 'canvas' => true, 78 | 'caption' => true, 79 | 'center' => true, 80 | 'cite' => true, 81 | 'code' => true, 82 | 'col' => true, 83 | 'colgroup' => true, 84 | 'command' => true, 85 | 'data' => true, 86 | 'datalist' => true, 87 | 'dd' => true, 88 | 'del' => true, 89 | 'details' => true, 90 | 'dfn' => true, 91 | 'dialog' => true, 92 | 'dir' => true, 93 | 'div' => true, 94 | 'dl' => true, 95 | 'dt' => true, 96 | 'em' => true, 97 | 'fieldset' => true, 98 | 'figcaption' => true, 99 | 'figure' => true, 100 | 'font' => true, 101 | 'footer' => true, 102 | 'form' => false, 103 | 'h1' => true, 104 | 'h2' => true, 105 | 'h3' => true, 106 | 'h4' => true, 107 | 'h5' => true, 108 | 'h6' => true, 109 | 'header' => true, 110 | 'hgroup' => true, 111 | 'hr' => true, 112 | 'html' => true, 113 | 'i' => true, 114 | 'image' => true, 115 | 'img' => true, 116 | 'input' => false, 117 | 'ins' => true, 118 | 'kbd' => true, 119 | 'keygen' => true, 120 | 'label' => true, 121 | 'layer' => true, 122 | 'legend' => true, 123 | 'li' => true, 124 | 'listing' => true, 125 | 'main' => true, 126 | 'map' => true, 127 | 'mark' => true, 128 | 'marquee' => true, 129 | 'menu' => true, 130 | 'meter' => true, 131 | 'nav' => true, 132 | 'nobr' => true, 133 | 'ol' => true, 134 | 'optgroup' => true, 135 | 'option' => true, 136 | 'output' => true, 137 | 'p' => true, 138 | 'picture' => true, 139 | 'plaintext' => true, 140 | 'popup' => true, 141 | 'portal' => true, 142 | 'pre' => true, 143 | 'progress' => true, 144 | 'q' => true, 145 | 'rb' => true, 146 | 'rp' => true, 147 | 'rt' => true, 148 | 'rtc' => true, 149 | 'ruby' => true, 150 | 's' => true, 151 | 'samp' => true, 152 | 'section' => true, 153 | 'select' => false, 154 | 'selectmenu' => false, 155 | 'slot' => true, 156 | 'small' => true, 157 | 'source' => true, 158 | 'span' => true, 159 | 'strike' => true, 160 | 'strong' => true, 161 | 'sub' => true, 162 | 'summary' => true, 163 | 'sup' => true, 164 | 'table' => true, 165 | 'tbody' => true, 166 | 'td' => true, 167 | 'template' => true, 168 | 'textarea' => false, 169 | 'tfoot' => true, 170 | 'th' => true, 171 | 'thead' => true, 172 | 'time' => true, 173 | 'tr' => true, 174 | 'track' => true, 175 | 'tt' => true, 176 | 'u' => true, 177 | 'ul' => true, 178 | 'var' => true, 179 | 'video' => true, 180 | 'wbr' => true, 181 | 'xmp' => true, 182 | ]; 183 | 184 | /** 185 | * Attributes allowed by the standard. 186 | */ 187 | public const ATTRIBUTES = [ 188 | 'abbr' => true, 189 | 'accept' => true, 190 | 'accept-charset' => true, 191 | 'accesskey' => true, 192 | 'action' => true, 193 | 'align' => true, 194 | 'alink' => true, 195 | 'allow' => true, 196 | 'allowfullscreen' => true, 197 | 'allowpaymentrequest' => false, 198 | 'alt' => true, 199 | 'anchor' => true, 200 | 'archive' => true, 201 | 'as' => true, 202 | 'async' => false, 203 | 'autocapitalize' => false, 204 | 'autocomplete' => false, 205 | 'autocorrect' => false, 206 | 'autofocus' => false, 207 | 'autopictureinpicture' => false, 208 | 'autoplay' => false, 209 | 'axis' => true, 210 | 'background' => false, 211 | 'behavior' => true, 212 | 'bgcolor' => false, 213 | 'border' => false, 214 | 'bordercolor' => false, 215 | 'capture' => true, 216 | 'cellpadding' => true, 217 | 'cellspacing' => true, 218 | 'challenge' => true, 219 | 'char' => true, 220 | 'charoff' => true, 221 | 'charset' => true, 222 | 'checked' => false, 223 | 'cite' => true, 224 | 'class' => false, 225 | 'classid' => false, 226 | 'clear' => true, 227 | 'code' => true, 228 | 'codebase' => true, 229 | 'codetype' => true, 230 | 'color' => false, 231 | 'cols' => true, 232 | 'colspan' => true, 233 | 'compact' => true, 234 | 'content' => true, 235 | 'contenteditable' => false, 236 | 'controls' => true, 237 | 'controlslist' => true, 238 | 'conversiondestination' => true, 239 | 'coords' => true, 240 | 'crossorigin' => true, 241 | 'csp' => true, 242 | 'data' => true, 243 | 'datetime' => true, 244 | 'declare' => true, 245 | 'decoding' => true, 246 | 'default' => true, 247 | 'defer' => true, 248 | 'dir' => true, 249 | 'direction' => true, 250 | 'dirname' => true, 251 | 'disabled' => true, 252 | 'disablepictureinpicture' => true, 253 | 'disableremoteplayback' => true, 254 | 'disallowdocumentaccess' => true, 255 | 'download' => true, 256 | 'draggable' => true, 257 | 'elementtiming' => true, 258 | 'enctype' => true, 259 | 'end' => true, 260 | 'enterkeyhint' => true, 261 | 'event' => true, 262 | 'exportparts' => true, 263 | 'face' => true, 264 | 'for' => true, 265 | 'form' => false, 266 | 'formaction' => false, 267 | 'formenctype' => false, 268 | 'formmethod' => false, 269 | 'formnovalidate' => false, 270 | 'formtarget' => false, 271 | 'frame' => false, 272 | 'frameborder' => false, 273 | 'headers' => true, 274 | 'height' => true, 275 | 'hidden' => false, 276 | 'high' => true, 277 | 'href' => true, 278 | 'hreflang' => true, 279 | 'hreftranslate' => true, 280 | 'hspace' => true, 281 | 'http-equiv' => false, 282 | 'id' => true, 283 | 'imagesizes' => true, 284 | 'imagesrcset' => true, 285 | 'importance' => true, 286 | 'impressiondata' => true, 287 | 'impressionexpiry' => true, 288 | 'incremental' => true, 289 | 'inert' => true, 290 | 'inputmode' => true, 291 | 'integrity' => true, 292 | 'invisible' => true, 293 | 'is' => true, 294 | 'ismap' => true, 295 | 'keytype' => true, 296 | 'kind' => true, 297 | 'label' => true, 298 | 'lang' => true, 299 | 'language' => true, 300 | 'latencyhint' => true, 301 | 'leftmargin' => true, 302 | 'link' => true, 303 | 'list' => true, 304 | 'loading' => true, 305 | 'longdesc' => true, 306 | 'loop' => true, 307 | 'low' => true, 308 | 'lowsrc' => true, 309 | 'manifest' => true, 310 | 'marginheight' => true, 311 | 'marginwidth' => true, 312 | 'max' => true, 313 | 'maxlength' => true, 314 | 'mayscript' => true, 315 | 'media' => true, 316 | 'method' => true, 317 | 'min' => true, 318 | 'minlength' => true, 319 | 'multiple' => true, 320 | 'muted' => true, 321 | 'name' => true, 322 | 'nohref' => true, 323 | 'nomodule' => true, 324 | 'nonce' => true, 325 | 'noresize' => true, 326 | 'noshade' => true, 327 | 'novalidate' => true, 328 | 'nowrap' => true, 329 | 'object' => true, 330 | 'open' => true, 331 | 'optimum' => true, 332 | 'part' => true, 333 | 'pattern' => true, 334 | 'ping' => false, 335 | 'placeholder' => true, 336 | 'playsinline' => true, 337 | 'policy' => true, 338 | 'poster' => true, 339 | 'preload' => true, 340 | 'pseudo' => true, 341 | 'readonly' => true, 342 | 'referrerpolicy' => true, 343 | 'rel' => true, 344 | 'reportingorigin' => true, 345 | 'required' => true, 346 | 'resources' => true, 347 | 'rev' => true, 348 | 'reversed' => true, 349 | 'role' => true, 350 | 'rows' => true, 351 | 'rowspan' => true, 352 | 'rules' => true, 353 | 'sandbox' => true, 354 | 'scheme' => true, 355 | 'scope' => true, 356 | 'scopes' => true, 357 | 'scrollamount' => true, 358 | 'scrolldelay' => true, 359 | 'scrolling' => true, 360 | 'select' => false, 361 | 'selected' => false, 362 | 'shadowroot' => true, 363 | 'shadowrootdelegatesfocus' => true, 364 | 'shape' => true, 365 | 'size' => true, 366 | 'sizes' => true, 367 | 'slot' => true, 368 | 'span' => true, 369 | 'spellcheck' => true, 370 | 'src' => true, 371 | 'srcdoc' => true, 372 | 'srclang' => true, 373 | 'srcset' => true, 374 | 'standby' => true, 375 | 'start' => true, 376 | 'step' => true, 377 | 'style' => false, 378 | 'summary' => true, 379 | 'tabindex' => true, 380 | 'target' => true, 381 | 'text' => true, 382 | 'title' => true, 383 | 'topmargin' => true, 384 | 'translate' => true, 385 | 'truespeed' => true, 386 | 'trusttoken' => true, 387 | 'type' => true, 388 | 'usemap' => true, 389 | 'valign' => true, 390 | 'value' => false, 391 | 'valuetype' => true, 392 | 'version' => true, 393 | 'virtualkeyboardpolicy' => true, 394 | 'vlink' => false, 395 | 'vspace' => true, 396 | 'webkitdirectory' => true, 397 | 'width' => true, 398 | 'wrap' => true, 399 | ]; 400 | } 401 | -------------------------------------------------------------------------------- /TextSanitizer/StringSanitizer.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\TextSanitizer; 13 | 14 | /** 15 | * @internal 16 | */ 17 | final class StringSanitizer 18 | { 19 | private const LOWERCASE = [ 20 | 'ABCDEFGHIJKLMNOPQRSTUVWXYZ', 21 | 'abcdefghijklmnopqrstuvwxyz', 22 | ]; 23 | 24 | private const REPLACEMENTS = [ 25 | [ 26 | // """ is shorter than """ 27 | '"', 28 | 29 | // Fix several potential issues in how browsers interpret attributes values 30 | '+', 31 | '=', 32 | '@', 33 | '`', 34 | 35 | // Some DB engines will transform UTF8 full-width characters their classical version 36 | // if the data is saved in a non-UTF8 field 37 | '<', 38 | '>', 39 | '+', 40 | '=', 41 | '@', 42 | '`', 43 | ], 44 | [ 45 | '"', 46 | 47 | '+', 48 | '=', 49 | '@', 50 | '`', 51 | 52 | '<', 53 | '>', 54 | '+', 55 | '=', 56 | '@', 57 | '`', 58 | ], 59 | ]; 60 | 61 | /** 62 | * Applies a transformation to lowercase following W3C HTML Standard. 63 | * 64 | * @see https://w3c.github.io/html-reference/terminology.html#case-insensitive 65 | */ 66 | public static function htmlLower(string $string): string 67 | { 68 | return strtr($string, self::LOWERCASE[0], self::LOWERCASE[1]); 69 | } 70 | 71 | /** 72 | * Encodes the HTML entities in the given string for safe injection in a document's DOM. 73 | */ 74 | public static function encodeHtmlEntities(string $string): string 75 | { 76 | return str_replace( 77 | self::REPLACEMENTS[0], 78 | self::REPLACEMENTS[1], 79 | htmlspecialchars($string, \ENT_QUOTES | \ENT_SUBSTITUTE, 'UTF-8') 80 | ); 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /TextSanitizer/UrlSanitizer.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\TextSanitizer; 13 | 14 | use League\Uri\Exceptions\SyntaxError; 15 | use League\Uri\UriString; 16 | 17 | /** 18 | * @internal 19 | */ 20 | final class UrlSanitizer 21 | { 22 | /** 23 | * Sanitizes a given URL string. 24 | * 25 | * In addition to ensuring $input is a valid URL, this sanitizer checks that: 26 | * * the URL's host is allowed ; 27 | * * the URL's scheme is allowed ; 28 | * * the URL is allowed to be relative if it is ; 29 | * 30 | * It also transforms the URL to HTTPS if requested. 31 | */ 32 | public static function sanitize(?string $input, ?array $allowedSchemes = null, bool $forceHttps = false, ?array $allowedHosts = null, bool $allowRelative = false): ?string 33 | { 34 | if (!$input) { 35 | return null; 36 | } 37 | 38 | $url = self::parse($input); 39 | 40 | // Malformed URL 41 | if (!$url || !\is_array($url)) { 42 | return null; 43 | } 44 | 45 | // No scheme and relative not allowed 46 | if (!$allowRelative && !$url['scheme']) { 47 | return null; 48 | } 49 | 50 | // Forbidden scheme 51 | if ($url['scheme'] && null !== $allowedSchemes && !\in_array($url['scheme'], $allowedSchemes, true)) { 52 | return null; 53 | } 54 | 55 | // If the scheme used is not supposed to have a host, do not check the host 56 | if (!self::isHostlessScheme($url['scheme'])) { 57 | // No host and relative not allowed 58 | if (!$allowRelative && !$url['host']) { 59 | return null; 60 | } 61 | 62 | // Forbidden host 63 | if ($url['host'] && null !== $allowedHosts && !self::isAllowedHost($url['host'], $allowedHosts)) { 64 | return null; 65 | } 66 | } 67 | 68 | // Force HTTPS 69 | if ($forceHttps && 'http' === $url['scheme']) { 70 | $url['scheme'] = 'https'; 71 | } 72 | 73 | return UriString::build($url); 74 | } 75 | 76 | /** 77 | * Parses a given URL and returns an array of its components. 78 | * 79 | * @return null|array{ 80 | * scheme:?string, 81 | * user:?string, 82 | * pass:?string, 83 | * host:?string, 84 | * port:?int, 85 | * path:string, 86 | * query:?string, 87 | * fragment:?string 88 | * } 89 | */ 90 | public static function parse(string $url): ?array 91 | { 92 | if (!$url) { 93 | return null; 94 | } 95 | 96 | try { 97 | $parsedUrl = UriString::parse($url); 98 | 99 | if (preg_match('/\s/', $url)) { 100 | return null; 101 | } 102 | 103 | if (isset($parsedUrl['host']) && self::decodeUnreservedCharacters($parsedUrl['host']) !== $parsedUrl['host']) { 104 | return null; 105 | } 106 | 107 | return $parsedUrl; 108 | } catch (SyntaxError) { 109 | return null; 110 | } 111 | } 112 | 113 | private static function isHostlessScheme(?string $scheme): bool 114 | { 115 | return \in_array($scheme, ['blob', 'chrome', 'data', 'file', 'geo', 'mailto', 'maps', 'tel', 'view-source'], true); 116 | } 117 | 118 | private static function isAllowedHost(?string $host, array $allowedHosts): bool 119 | { 120 | if (null === $host) { 121 | return \in_array(null, $allowedHosts, true); 122 | } 123 | 124 | $parts = array_reverse(explode('.', $host)); 125 | 126 | foreach ($allowedHosts as $allowedHost) { 127 | if (self::matchAllowedHostParts($parts, array_reverse(explode('.', $allowedHost)))) { 128 | return true; 129 | } 130 | } 131 | 132 | return false; 133 | } 134 | 135 | private static function matchAllowedHostParts(array $uriParts, array $trustedParts): bool 136 | { 137 | // Check each chunk of the domain is valid 138 | foreach ($trustedParts as $key => $trustedPart) { 139 | if (!\array_key_exists($key, $uriParts) || $uriParts[$key] !== $trustedPart) { 140 | return false; 141 | } 142 | } 143 | 144 | return true; 145 | } 146 | 147 | /** 148 | * Implementation borrowed from League\Uri\Encoder::decodeUnreservedCharacters(). 149 | */ 150 | private static function decodeUnreservedCharacters(string $host): string 151 | { 152 | return preg_replace_callback( 153 | ',%(2[1-9A-Fa-f]|[3-7][0-9A-Fa-f]|61|62|64|65|66|7[AB]|5F),', 154 | static fn (array $matches): string => rawurldecode($matches[0]), 155 | $host 156 | ); 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /Visitor/AttributeSanitizer/AttributeSanitizerInterface.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer; 13 | 14 | use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; 15 | 16 | /** 17 | * Implements attribute-specific sanitization logic. 18 | * 19 | * @author Titouan Galopin 20 | */ 21 | interface AttributeSanitizerInterface 22 | { 23 | /** 24 | * Returns the list of element names supported, or null to support all elements. 25 | * 26 | * @return list|null 27 | */ 28 | public function getSupportedElements(): ?array; 29 | 30 | /** 31 | * Returns the list of attributes names supported, or null to support all attributes. 32 | * 33 | * @return list|null 34 | */ 35 | public function getSupportedAttributes(): ?array; 36 | 37 | /** 38 | * Returns the sanitized value of a given attribute for the given element. 39 | */ 40 | public function sanitizeAttribute(string $element, string $attribute, string $value, HtmlSanitizerConfig $config): ?string; 41 | } 42 | -------------------------------------------------------------------------------- /Visitor/AttributeSanitizer/UrlAttributeSanitizer.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer; 13 | 14 | use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; 15 | use Symfony\Component\HtmlSanitizer\TextSanitizer\UrlSanitizer; 16 | 17 | /** 18 | * @author Titouan Galopin 19 | */ 20 | final class UrlAttributeSanitizer implements AttributeSanitizerInterface 21 | { 22 | public function getSupportedElements(): ?array 23 | { 24 | // Check all elements for URL attributes 25 | return null; 26 | } 27 | 28 | public function getSupportedAttributes(): ?array 29 | { 30 | return ['src', 'href', 'lowsrc', 'background', 'ping']; 31 | } 32 | 33 | public function sanitizeAttribute(string $element, string $attribute, string $value, HtmlSanitizerConfig $config): ?string 34 | { 35 | if ('a' === $element) { 36 | return UrlSanitizer::sanitize( 37 | $value, 38 | $config->getAllowedLinkSchemes(), 39 | $config->getForceHttpsUrls(), 40 | $config->getAllowedLinkHosts(), 41 | $config->getAllowRelativeLinks(), 42 | ); 43 | } 44 | 45 | return UrlSanitizer::sanitize( 46 | $value, 47 | $config->getAllowedMediaSchemes(), 48 | $config->getForceHttpsUrls(), 49 | $config->getAllowedMediaHosts(), 50 | $config->getAllowRelativeMedias(), 51 | ); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /Visitor/DomVisitor.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor; 13 | 14 | use Symfony\Component\HtmlSanitizer\HtmlSanitizerAction; 15 | use Symfony\Component\HtmlSanitizer\HtmlSanitizerConfig; 16 | use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; 17 | use Symfony\Component\HtmlSanitizer\Visitor\AttributeSanitizer\AttributeSanitizerInterface; 18 | use Symfony\Component\HtmlSanitizer\Visitor\Model\Cursor; 19 | use Symfony\Component\HtmlSanitizer\Visitor\Node\BlockedNode; 20 | use Symfony\Component\HtmlSanitizer\Visitor\Node\DocumentNode; 21 | use Symfony\Component\HtmlSanitizer\Visitor\Node\Node; 22 | use Symfony\Component\HtmlSanitizer\Visitor\Node\NodeInterface; 23 | use Symfony\Component\HtmlSanitizer\Visitor\Node\TextNode; 24 | 25 | /** 26 | * Iterates over the parsed DOM tree to build the sanitized tree. 27 | * 28 | * The DomVisitor iterates over the parsed DOM tree, visits its nodes and build 29 | * a sanitized tree with their attributes and content. 30 | * 31 | * @author Titouan Galopin 32 | * 33 | * @internal 34 | */ 35 | final class DomVisitor 36 | { 37 | private HtmlSanitizerAction $defaultAction = HtmlSanitizerAction::Drop; 38 | 39 | /** 40 | * Registry of attributes to forcefully set on nodes, index by element and attribute. 41 | * 42 | * @var array> 43 | */ 44 | private array $forcedAttributes; 45 | 46 | /** 47 | * Registry of attributes sanitizers indexed by element name and attribute name for 48 | * faster sanitization. 49 | * 50 | * @var array>> 51 | */ 52 | private array $attributeSanitizers = []; 53 | 54 | /** 55 | * @param array> $elementsConfig Registry of allowed/blocked elements: 56 | * * If an element is present as a key and contains an array, the element should be allowed 57 | * and the array is the list of allowed attributes. 58 | * * If an element is present as a key and contains an HtmlSanitizerAction, that action applies. 59 | * * If an element is not present as a key, the default action applies. 60 | */ 61 | public function __construct( 62 | private HtmlSanitizerConfig $config, 63 | private array $elementsConfig, 64 | ) { 65 | $this->forcedAttributes = $config->getForcedAttributes(); 66 | 67 | foreach ($config->getAttributeSanitizers() as $attributeSanitizer) { 68 | foreach ($attributeSanitizer->getSupportedElements() ?? ['*'] as $element) { 69 | foreach ($attributeSanitizer->getSupportedAttributes() ?? ['*'] as $attribute) { 70 | $this->attributeSanitizers[$element][$attribute][] = $attributeSanitizer; 71 | } 72 | } 73 | } 74 | 75 | $this->defaultAction = $config->getDefaultAction(); 76 | } 77 | 78 | public function visit(\DOMDocumentFragment $domNode): ?NodeInterface 79 | { 80 | $cursor = new Cursor(new DocumentNode()); 81 | $this->visitChildren($domNode, $cursor); 82 | 83 | return $cursor->node; 84 | } 85 | 86 | private function visitNode(\DOMNode $domNode, Cursor $cursor): void 87 | { 88 | $nodeName = StringSanitizer::htmlLower($domNode->nodeName); 89 | 90 | // Visit recursively if the node was not dropped 91 | if ($this->enterNode($nodeName, $domNode, $cursor)) { 92 | $this->visitChildren($domNode, $cursor); 93 | $cursor->node = $cursor->node->getParent(); 94 | } 95 | } 96 | 97 | private function enterNode(string $domNodeName, \DOMNode $domNode, Cursor $cursor): bool 98 | { 99 | if (!\array_key_exists($domNodeName, $this->elementsConfig)) { 100 | $action = $this->defaultAction; 101 | $allowedAttributes = []; 102 | } else { 103 | if (\is_array($this->elementsConfig[$domNodeName])) { 104 | $action = HtmlSanitizerAction::Allow; 105 | $allowedAttributes = $this->elementsConfig[$domNodeName]; 106 | } else { 107 | $action = $this->elementsConfig[$domNodeName]; 108 | $allowedAttributes = []; 109 | } 110 | } 111 | 112 | if (HtmlSanitizerAction::Drop === $action) { 113 | return false; 114 | } 115 | 116 | // Element should be blocked, retaining its children 117 | if (HtmlSanitizerAction::Block === $action) { 118 | $node = new BlockedNode($cursor->node); 119 | 120 | $cursor->node->addChild($node); 121 | $cursor->node = $node; 122 | 123 | return true; 124 | } 125 | 126 | // Otherwise create the node 127 | $node = new Node($cursor->node, $domNodeName); 128 | $this->setAttributes($domNodeName, $domNode, $node, $allowedAttributes); 129 | 130 | // Force configured attributes 131 | foreach ($this->forcedAttributes[$domNodeName] ?? [] as $attribute => $value) { 132 | $node->setAttribute($attribute, $value); 133 | } 134 | 135 | $cursor->node->addChild($node); 136 | $cursor->node = $node; 137 | 138 | return true; 139 | } 140 | 141 | private function visitChildren(\DOMNode $domNode, Cursor $cursor): void 142 | { 143 | /** @var \DOMNode $child */ 144 | foreach ($domNode->childNodes ?? [] as $child) { 145 | if ('#text' === $child->nodeName) { 146 | // Add text directly for performance 147 | $cursor->node->addChild(new TextNode($cursor->node, $child->nodeValue)); 148 | } elseif (!$child instanceof \DOMText && !$child instanceof \DOMProcessingInstruction) { 149 | // Otherwise continue the visit recursively 150 | // Ignore comments for security reasons (interpreted differently by browsers) 151 | // Ignore processing instructions (treated as comments) 152 | $this->visitNode($child, $cursor); 153 | } 154 | } 155 | } 156 | 157 | /** 158 | * Set attributes from a DOM node to a sanitized node. 159 | */ 160 | private function setAttributes(string $domNodeName, \DOMNode $domNode, Node $node, array $allowedAttributes = []): void 161 | { 162 | /** @var iterable<\DOMAttr> $domAttributes */ 163 | if (!$domAttributes = $domNode->attributes ? $domNode->attributes->getIterator() : []) { 164 | return; 165 | } 166 | 167 | foreach ($domAttributes as $attribute) { 168 | $name = StringSanitizer::htmlLower($attribute->name); 169 | 170 | if (isset($allowedAttributes[$name])) { 171 | $value = $attribute->value; 172 | 173 | // Sanitize the attribute value if there are attribute sanitizers for it 174 | $attributeSanitizers = array_merge( 175 | $this->attributeSanitizers[$domNodeName][$name] ?? [], 176 | $this->attributeSanitizers['*'][$name] ?? [], 177 | $this->attributeSanitizers[$domNodeName]['*'] ?? [], 178 | ); 179 | 180 | foreach ($attributeSanitizers as $sanitizer) { 181 | $value = $sanitizer->sanitizeAttribute($domNodeName, $name, $value, $this->config); 182 | } 183 | 184 | $node->setAttribute($name, $value); 185 | } 186 | } 187 | } 188 | } 189 | -------------------------------------------------------------------------------- /Visitor/Model/Cursor.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\Model; 13 | 14 | use Symfony\Component\HtmlSanitizer\Visitor\Node\NodeInterface; 15 | 16 | /** 17 | * @author Titouan Galopin 18 | * 19 | * @internal 20 | */ 21 | final class Cursor 22 | { 23 | public function __construct(public ?NodeInterface $node) 24 | { 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /Visitor/Node/BlockedNode.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\Node; 13 | 14 | /** 15 | * @author Titouan Galopin 16 | */ 17 | final class BlockedNode implements NodeInterface 18 | { 19 | private array $children = []; 20 | 21 | public function __construct( 22 | private NodeInterface $parentNode, 23 | ) { 24 | } 25 | 26 | public function addChild(NodeInterface $node): void 27 | { 28 | $this->children[] = $node; 29 | } 30 | 31 | public function getParent(): ?NodeInterface 32 | { 33 | return $this->parentNode; 34 | } 35 | 36 | public function render(): string 37 | { 38 | $rendered = ''; 39 | foreach ($this->children as $child) { 40 | $rendered .= $child->render(); 41 | } 42 | 43 | return $rendered; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /Visitor/Node/DocumentNode.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\Node; 13 | 14 | /** 15 | * @author Titouan Galopin 16 | */ 17 | final class DocumentNode implements NodeInterface 18 | { 19 | private array $children = []; 20 | 21 | public function addChild(NodeInterface $node): void 22 | { 23 | $this->children[] = $node; 24 | } 25 | 26 | public function getParent(): ?NodeInterface 27 | { 28 | return null; 29 | } 30 | 31 | public function render(): string 32 | { 33 | $rendered = ''; 34 | foreach ($this->children as $child) { 35 | $rendered .= $child->render(); 36 | } 37 | 38 | return $rendered; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /Visitor/Node/Node.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\Node; 13 | 14 | use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; 15 | 16 | /** 17 | * @author Titouan Galopin 18 | */ 19 | final class Node implements NodeInterface 20 | { 21 | // HTML5 elements which are self-closing 22 | private const VOID_ELEMENTS = [ 23 | 'area' => true, 24 | 'base' => true, 25 | 'br' => true, 26 | 'col' => true, 27 | 'embed' => true, 28 | 'hr' => true, 29 | 'img' => true, 30 | 'input' => true, 31 | 'keygen' => true, 32 | 'link' => true, 33 | 'meta' => true, 34 | 'param' => true, 35 | 'source' => true, 36 | 'track' => true, 37 | 'wbr' => true, 38 | ]; 39 | 40 | private array $attributes = []; 41 | private array $children = []; 42 | 43 | public function __construct( 44 | private NodeInterface $parent, 45 | private string $tagName, 46 | ) { 47 | } 48 | 49 | public function getParent(): ?NodeInterface 50 | { 51 | return $this->parent; 52 | } 53 | 54 | public function getAttribute(string $name): ?string 55 | { 56 | return $this->attributes[$name] ?? null; 57 | } 58 | 59 | public function setAttribute(string $name, ?string $value): void 60 | { 61 | // Always use only the first declaration (ease sanitization) 62 | if (!\array_key_exists($name, $this->attributes)) { 63 | $this->attributes[$name] = $value; 64 | } 65 | } 66 | 67 | public function addChild(NodeInterface $node): void 68 | { 69 | $this->children[] = $node; 70 | } 71 | 72 | public function render(): string 73 | { 74 | if (isset(self::VOID_ELEMENTS[$this->tagName])) { 75 | return '<'.$this->tagName.$this->renderAttributes().' />'; 76 | } 77 | 78 | $rendered = '<'.$this->tagName.$this->renderAttributes().'>'; 79 | foreach ($this->children as $child) { 80 | $rendered .= $child->render(); 81 | } 82 | 83 | return $rendered.'tagName.'>'; 84 | } 85 | 86 | private function renderAttributes(): string 87 | { 88 | $rendered = []; 89 | foreach ($this->attributes as $name => $value) { 90 | if (null === $value) { 91 | // Tag should be removed as a sanitizer found suspect data inside 92 | continue; 93 | } 94 | 95 | $attr = StringSanitizer::encodeHtmlEntities($name); 96 | 97 | if ('' !== $value) { 98 | // In quirks mode, IE8 does a poor job producing innerHTML values. 99 | // If JavaScript does: 100 | // nodeA.innerHTML = nodeB.innerHTML; 101 | // and nodeB contains (or even if ` was encoded properly): 102 | //
103 | // then IE8 will produce: 104 | //
105 | // as the value of nodeB.innerHTML and assign it to nodeA. 106 | // IE8's HTML parser treats `` as a blank attribute value and foo=bar becomes a separate attribute. 107 | // Adding a space at the end of the attribute prevents this by forcing IE8 to put double 108 | // quotes around the attribute when computing nodeB.innerHTML. 109 | if (str_contains($value, '`')) { 110 | $value .= ' '; 111 | } 112 | 113 | $attr .= '="'.StringSanitizer::encodeHtmlEntities($value).'"'; 114 | } 115 | 116 | $rendered[] = $attr; 117 | } 118 | 119 | return $rendered ? ' '.implode(' ', $rendered) : ''; 120 | } 121 | } 122 | -------------------------------------------------------------------------------- /Visitor/Node/NodeInterface.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\Node; 13 | 14 | /** 15 | * Represents the sanitized version of a DOM node in the sanitized tree. 16 | * 17 | * Once the sanitization is done, nodes are rendered into the final output string. 18 | * 19 | * @author Titouan Galopin 20 | */ 21 | interface NodeInterface 22 | { 23 | /** 24 | * Add a child node to this node. 25 | */ 26 | public function addChild(self $node): void; 27 | 28 | /** 29 | * Return the parent node of this node, or null if it has no parent node. 30 | */ 31 | public function getParent(): ?self; 32 | 33 | /** 34 | * Render this node as a string, recursively rendering its children as well. 35 | */ 36 | public function render(): string; 37 | } 38 | -------------------------------------------------------------------------------- /Visitor/Node/TextNode.php: -------------------------------------------------------------------------------- 1 | 7 | * 8 | * For the full copyright and license information, please view the LICENSE 9 | * file that was distributed with this source code. 10 | */ 11 | 12 | namespace Symfony\Component\HtmlSanitizer\Visitor\Node; 13 | 14 | use Symfony\Component\HtmlSanitizer\TextSanitizer\StringSanitizer; 15 | 16 | /** 17 | * @author Titouan Galopin 18 | */ 19 | final class TextNode implements NodeInterface 20 | { 21 | public function __construct(private NodeInterface $parentNode, private string $text) 22 | { 23 | } 24 | 25 | public function addChild(NodeInterface $node): void 26 | { 27 | throw new \LogicException('Text nodes cannot have children.'); 28 | } 29 | 30 | public function getParent(): ?NodeInterface 31 | { 32 | return $this->parentNode; 33 | } 34 | 35 | public function render(): string 36 | { 37 | return StringSanitizer::encodeHtmlEntities($this->text); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /composer.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "symfony/html-sanitizer", 3 | "type": "library", 4 | "description": "Provides an object-oriented API to sanitize untrusted HTML input for safe insertion into a document's DOM.", 5 | "keywords": ["html", "sanitizer", "purifier"], 6 | "homepage": "https://symfony.com", 7 | "license": "MIT", 8 | "authors": [ 9 | { 10 | "name": "Titouan Galopin", 11 | "email": "galopintitouan@gmail.com" 12 | }, 13 | { 14 | "name": "Symfony Community", 15 | "homepage": "https://symfony.com/contributors" 16 | } 17 | ], 18 | "require": { 19 | "php": ">=8.2", 20 | "ext-dom": "*", 21 | "league/uri": "^6.5|^7.0", 22 | "masterminds/html5": "^2.7.2" 23 | }, 24 | "autoload": { 25 | "psr-4": { "Symfony\\Component\\HtmlSanitizer\\": "" }, 26 | "exclude-from-classmap": [ 27 | "/Tests/" 28 | ] 29 | }, 30 | "minimum-stability": "dev" 31 | } 32 | --------------------------------------------------------------------------------