├── .gitattributes ├── .gitignore ├── README.txt └── xsshtml.class.php /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | # Custom for Visual Studio 5 | *.cs diff=csharp 6 | *.sln merge=union 7 | *.csproj merge=union 8 | *.vbproj merge=union 9 | *.fsproj merge=union 10 | *.dbproj merge=union 11 | 12 | # Standard to msysgit 13 | *.doc diff=astextplain 14 | *.DOC diff=astextplain 15 | *.docx diff=astextplain 16 | *.DOCX diff=astextplain 17 | *.dot diff=astextplain 18 | *.DOT diff=astextplain 19 | *.pdf diff=astextplain 20 | *.PDF diff=astextplain 21 | *.rtf diff=astextplain 22 | *.RTF diff=astextplain 23 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ################# 2 | ## Eclipse 3 | ################# 4 | 5 | *.pydevproject 6 | .project 7 | .metadata 8 | bin/ 9 | tmp/ 10 | *.tmp 11 | *.bak 12 | *.swp 13 | *~.nib 14 | local.properties 15 | .classpath 16 | .settings/ 17 | .loadpath 18 | 19 | # External tool builders 20 | .externalToolBuilders/ 21 | 22 | # Locally stored "Eclipse launch configurations" 23 | *.launch 24 | 25 | # CDT-specific 26 | .cproject 27 | 28 | # PDT-specific 29 | .buildpath 30 | 31 | 32 | ################# 33 | ## Visual Studio 34 | ################# 35 | 36 | ## Ignore Visual Studio temporary files, build results, and 37 | ## files generated by popular Visual Studio add-ons. 38 | 39 | # User-specific files 40 | *.suo 41 | *.user 42 | *.sln.docstates 43 | 44 | # Build results 45 | 46 | [Dd]ebug/ 47 | [Rr]elease/ 48 | x64/ 49 | build/ 50 | [Bb]in/ 51 | [Oo]bj/ 52 | 53 | # MSTest test Results 54 | [Tt]est[Rr]esult*/ 55 | [Bb]uild[Ll]og.* 56 | 57 | *_i.c 58 | *_p.c 59 | *.ilk 60 | *.meta 61 | *.obj 62 | *.pch 63 | *.pdb 64 | *.pgc 65 | *.pgd 66 | *.rsp 67 | *.sbr 68 | *.tlb 69 | *.tli 70 | *.tlh 71 | *.tmp 72 | *.tmp_proj 73 | *.log 74 | *.vspscc 75 | *.vssscc 76 | .builds 77 | *.pidb 78 | *.log 79 | *.scc 80 | 81 | # Visual C++ cache files 82 | ipch/ 83 | *.aps 84 | *.ncb 85 | *.opensdf 86 | *.sdf 87 | *.cachefile 88 | 89 | # Visual Studio profiler 90 | *.psess 91 | *.vsp 92 | *.vspx 93 | 94 | # Guidance Automation Toolkit 95 | *.gpState 96 | 97 | # ReSharper is a .NET coding add-in 98 | _ReSharper*/ 99 | *.[Rr]e[Ss]harper 100 | 101 | # TeamCity is a build add-in 102 | _TeamCity* 103 | 104 | # DotCover is a Code Coverage Tool 105 | *.dotCover 106 | 107 | # NCrunch 108 | *.ncrunch* 109 | .*crunch*.local.xml 110 | 111 | # Installshield output folder 112 | [Ee]xpress/ 113 | 114 | # DocProject is a documentation generator add-in 115 | DocProject/buildhelp/ 116 | DocProject/Help/*.HxT 117 | DocProject/Help/*.HxC 118 | DocProject/Help/*.hhc 119 | DocProject/Help/*.hhk 120 | DocProject/Help/*.hhp 121 | DocProject/Help/Html2 122 | DocProject/Help/html 123 | 124 | # Click-Once directory 125 | publish/ 126 | 127 | # Publish Web Output 128 | *.Publish.xml 129 | *.pubxml 130 | 131 | # NuGet Packages Directory 132 | ## TODO: If you have NuGet Package Restore enabled, uncomment the next line 133 | #packages/ 134 | 135 | # Windows Azure Build Output 136 | csx 137 | *.build.csdef 138 | 139 | # Windows Store app package directory 140 | AppPackages/ 141 | 142 | # Others 143 | sql/ 144 | *.Cache 145 | ClientBin/ 146 | [Ss]tyle[Cc]op.* 147 | ~$* 148 | *~ 149 | *.dbmdl 150 | *.[Pp]ublish.xml 151 | *.pfx 152 | *.publishsettings 153 | 154 | # RIA/Silverlight projects 155 | Generated_Code/ 156 | 157 | # Backup & report files from converting an old project file to a newer 158 | # Visual Studio version. Backup files are not needed, because we have git ;-) 159 | _UpgradeReport_Files/ 160 | Backup*/ 161 | UpgradeLog*.XML 162 | UpgradeLog*.htm 163 | 164 | # SQL Server files 165 | App_Data/*.mdf 166 | App_Data/*.ldf 167 | 168 | ############# 169 | ## Windows detritus 170 | ############# 171 | 172 | # Windows image file caches 173 | Thumbs.db 174 | ehthumbs.db 175 | 176 | # Folder config file 177 | Desktop.ini 178 | 179 | # Recycle Bin used on file shares 180 | $RECYCLE.BIN/ 181 | 182 | # Mac crap 183 | .DS_Store 184 | 185 | 186 | ############# 187 | ## Python 188 | ############# 189 | 190 | *.py[co] 191 | 192 | # Packages 193 | *.egg 194 | *.egg-info 195 | dist/ 196 | build/ 197 | eggs/ 198 | parts/ 199 | var/ 200 | sdist/ 201 | develop-eggs/ 202 | .installed.cfg 203 | 204 | # Installer logs 205 | pip-log.txt 206 | 207 | # Unit test / coverage reports 208 | .coverage 209 | .tox 210 | 211 | #Translations 212 | *.mo 213 | 214 | #Mr Developer 215 | .mr.developer.cfg 216 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/phith0n/XssHtml/11870ca3b7fcc9f2ffc767566e42eab90621be97/README.txt -------------------------------------------------------------------------------- /xsshtml.class.php: -------------------------------------------------------------------------------- 1 | in 2014 and placed in 15 | # the public domain. 16 | # 17 | # phithon 编写于20140621 18 | # From: XDSEC & 离别歌 19 | # Usage: 20 | # '; 23 | # $xss = new XssHtml($html); 24 | # $html = $xss->getHtml(); 25 | # ?\> 26 | # 27 | # 需求: 28 | # PHP Version > 5.0 29 | # 浏览器版本:IE7+ 或其他浏览器,无法防御IE6及以下版本浏览器中的XSS 30 | # 更多使用选项见 http://phith0n.github.io/XssHtml 31 | 32 | class XssHtml { 33 | private $m_dom; 34 | private $m_xss; 35 | private $m_ok; 36 | private $m_AllowAttr = array('title', 'src', 'href', 'id', 'class', 'style', 'width', 'height', 'alt', 'target', 'align'); 37 | private $m_AllowTag = array('a', 'img', 'br', 'strong', 'b', 'code', 'pre', 'p', 'div', 'em', 'span', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'table', 'ul', 'ol', 'tr', 'th', 'td', 'hr', 'li', 'u'); 38 | 39 | /** 40 | * 构造函数 41 | * 42 | * @param string $html 待过滤的文本 43 | * @param string $charset 文本编码,默认utf-8 44 | * @param array $AllowTag 允许的标签,如果不清楚请保持默认,默认已涵盖大部分功能,不要增加危险标签 45 | */ 46 | public function __construct($html, $charset = 'utf-8', $AllowTag = array()){ 47 | $this->m_AllowTag = empty($AllowTag) ? $this->m_AllowTag : $AllowTag; 48 | $this->m_xss = strip_tags($html, '<' . implode('><', $this->m_AllowTag) . '>'); 49 | if (empty($this->m_xss)) { 50 | $this->m_ok = FALSE; 51 | return ; 52 | } 53 | $this->m_xss = "" . $this->m_xss . ""; 54 | $this->m_dom = new DOMDocument(); 55 | $this->m_dom->strictErrorChecking = FALSE; 56 | $this->m_ok = @$this->m_dom->loadHTML($this->m_xss); 57 | } 58 | 59 | /** 60 | * 获得过滤后的内容 61 | */ 62 | public function getHtml() 63 | { 64 | if (!$this->m_ok) { 65 | return ''; 66 | } 67 | $nodeList = $this->m_dom->getElementsByTagName('*'); 68 | for ($i = 0; $i < $nodeList->length; $i++){ 69 | $node = $nodeList->item($i); 70 | if (in_array($node->nodeName, $this->m_AllowTag)) { 71 | if (method_exists($this, "__node_{$node->nodeName}")) { 72 | call_user_func(array($this, "__node_{$node->nodeName}"), $node); 73 | }else{ 74 | call_user_func(array($this, '__node_default'), $node); 75 | } 76 | } 77 | } 78 | $html = strip_tags($this->m_dom->saveHTML(), '<' . implode('><', $this->m_AllowTag) . '>'); 79 | $html = preg_replace('/^\n(.*)\n$/s', '$1', $html); 80 | return $html; 81 | } 82 | 83 | private function __true_url($url){ 84 | if (preg_match('#^https?://.+#is', $url)) { 85 | return $url; 86 | }else{ 87 | return 'http://' . $url; 88 | } 89 | } 90 | 91 | private function __get_style($node){ 92 | if ($node->attributes->getNamedItem('style')) { 93 | $style = $node->attributes->getNamedItem('style')->nodeValue; 94 | $style = str_replace('\\', ' ', $style); 95 | $style = str_replace(array('&#', '/*', '*/'), ' ', $style); 96 | $style = preg_replace('#e.*x.*p.*r.*e.*s.*s.*i.*o.*n#Uis', ' ', $style); 97 | return $style; 98 | }else{ 99 | return ''; 100 | } 101 | } 102 | 103 | private function __get_link($node, $att){ 104 | $link = $node->attributes->getNamedItem($att); 105 | if ($link) { 106 | return $this->__true_url($link->nodeValue); 107 | }else{ 108 | return ''; 109 | } 110 | } 111 | 112 | private function __setAttr($dom, $attr, $val){ 113 | if (!empty($val)) { 114 | $dom->setAttribute($attr, $val); 115 | } 116 | } 117 | 118 | private function __set_default_attr($node, $attr, $default = '') 119 | { 120 | $o = $node->attributes->getNamedItem($attr); 121 | if ($o) { 122 | $this->__setAttr($node, $attr, $o->nodeValue); 123 | }else{ 124 | $this->__setAttr($node, $attr, $default); 125 | } 126 | } 127 | 128 | private function __common_attr($node) 129 | { 130 | $list = array(); 131 | foreach ($node->attributes as $attr) { 132 | if (!in_array($attr->nodeName, 133 | $this->m_AllowAttr)) { 134 | $list[] = $attr->nodeName; 135 | } 136 | } 137 | foreach ($list as $attr) { 138 | $node->removeAttribute($attr); 139 | } 140 | $style = $this->__get_style($node); 141 | $this->__setAttr($node, 'style', $style); 142 | $this->__set_default_attr($node, 'title'); 143 | $this->__set_default_attr($node, 'id'); 144 | $this->__set_default_attr($node, 'class'); 145 | } 146 | 147 | private function __node_img($node){ 148 | $this->__common_attr($node); 149 | 150 | $this->__set_default_attr($node, 'src'); 151 | $this->__set_default_attr($node, 'width'); 152 | $this->__set_default_attr($node, 'height'); 153 | $this->__set_default_attr($node, 'alt'); 154 | $this->__set_default_attr($node, 'align'); 155 | 156 | } 157 | 158 | private function __node_a($node){ 159 | $this->__common_attr($node); 160 | $href = $this->__get_link($node, 'href'); 161 | 162 | $this->__setAttr($node, 'href', $href); 163 | $this->__set_default_attr($node, 'target', '_blank'); 164 | } 165 | 166 | private function __node_embed($node){ 167 | $this->__common_attr($node); 168 | $link = $this->__get_link($node, 'src'); 169 | 170 | $this->__setAttr($node, 'src', $link); 171 | $this->__setAttr($node, 'allowscriptaccess', 'never'); 172 | $this->__set_default_attr($node, 'width'); 173 | $this->__set_default_attr($node, 'height'); 174 | } 175 | 176 | private function __node_default($node){ 177 | $this->__common_attr($node); 178 | } 179 | } 180 | 181 | // if(php_sapi_name() == "cli"){ 182 | // $html = $argv[1]; 183 | // $xss = new XssHtml($html); 184 | // $html = $xss->getHtml(); 185 | // echo "'$html'"; 186 | // } 187 | ?> --------------------------------------------------------------------------------