├── DevTools_JSFinder ├── JSFinder.js └── README.md ├── README.md └── test.js /DevTools_JSFinder/JSFinder.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name JSFinder 3 | // @namespace Threezh1 4 | // @version 0.2 5 | // @description Extract interfaces from html and javascript files. 6 | // @author Threezh1 7 | // @match *://*/* 8 | // @require https://greasyfork.org/scripts/12447-mootools-for-greasemonkey/code/MooTools%20for%20Greasemonkey.js?version=74469 9 | // @grant none 10 | // ==/UserScript== 11 | 12 | (function() { 13 | 'use strict'; 14 | console.log("JSFinder by Threezh1"); 15 | let urls = []; let js_content=""; let result_raw = []; let domains = []; 16 | $$('*').forEach(element => { 17 | urls.push(element.src);urls.push(element.href);urls.push(element.url); 18 | if (element.tagName == "SCRIPT") { js_content += element.text } 19 | }); urls = new Set(urls); 20 | urls.forEach(rawurl => { 21 | if (rawurl != undefined && rawurl != "" && typeof(rawurl) == "string" && rawurl.startsWith("http") == true){ 22 | let url = new URL(rawurl); 23 | if (url.host.endsWith(getMainHost()) == true) { domains.push(url.host) }; 24 | if (url.host.endsWith(location.host) == true && url.pathname.endsWith(".js") == true) { 25 | result_raw = result_raw.concat(extract_url(geturlContent(url.pathname))); 26 | } 27 | } 28 | }); 29 | result_raw = result_raw.concat(extract_url(js_content)); 30 | var result = []; 31 | result_raw.forEach(url=>{ 32 | if (new URL(url).host.endsWith(getMainHost()) == true) { domains.push(new URL(url).host) }; 33 | if ("jpeg|png|gif|svg|js|flv|swf|css".search(new URL(url).pathname.split('.').pop().toLowerCase()) == -1){ 34 | result.push(url); 35 | } 36 | }) 37 | console.log("JSFinder get domains: ", Array.from(new Set(domains))); 38 | console.log("JSFinder get urls: ", Array.from(new Set(result))); 39 | function getMainHost() { 40 | let key = `mh_${Math.random()}`; 41 | let keyR = new RegExp( `(^|;)\\s*${key}=12345` ); 42 | let expiredTime = new Date( 0 ); 43 | let domain = document.domain; 44 | let domainList = domain.split( '.' ); 45 | let urlItems = []; 46 | urlItems.unshift( domainList.pop() ); 47 | while( domainList.length ) { 48 | urlItems.unshift( domainList.pop() ); 49 | let mainHost = urlItems.join( '.' ); 50 | let cookie = `${key}=${12345};domain=.${mainHost}`; 51 | document.cookie = cookie; 52 | if ( keyR.test( document.cookie ) ) { 53 | document.cookie = `${cookie};expires=${expiredTime}`; 54 | return mainHost; 55 | }}} 56 | function geturlContent(pathname){ 57 | var result = "" 58 | var request = new XMLHttpRequest(); 59 | request.open("GET", pathname, false); 60 | request.send(); 61 | if(request.status === 200){ 62 | result = request.responseText; 63 | } 64 | return result 65 | } 66 | function extract_url(js_content){ 67 | let regex = /(?:"|')(((?:[a-zA-Z]{1,10}:\/\/|\/\/)[^"'\/]{1,}\.[a-zA-Z]{2,}[^"']{0,})|((?:\/|\.\.\/|\.\/)[^"'><,;| *()(%%$^\/\\\[\]][^"'><,;|()]{1,})|([a-zA-Z0-9_\-\/]{1,}\/[a-zA-Z0-9_\-\/]{1,}\.(?:[a-zA-Z]{1,4}|action)(?:[\?|\/][^"|']{0,}|))|([a-zA-Z0-9_\-]{1,}\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:\?[^"|']{0,}|)))(?:"|')/sg; 68 | let m; result = []; 69 | while ((m = regex.exec(js_content)) !== null) { 70 | if (m.index === regex.lastIndex) { regex.lastIndex++;} 71 | m.forEach((match, groupIndex) => { 72 | if (match != undefined) { 73 | match = match.replaceAll(/('|")/g, ""); 74 | if (match.startsWith("http") == true){ 75 | let suburl = new URL(match); 76 | if (suburl.host.endsWith(getMainHost()) == true){ result.push(match); } 77 | }else{ 78 | let url = new URL(match, location.origin); 79 | if (url.host.endsWith(getMainHost()) == true){ result.push(url.href); } 80 | }}});} 81 | return Array.from(new Set(result)); 82 | } 83 | })(); -------------------------------------------------------------------------------- /DevTools_JSFinder/README.md: -------------------------------------------------------------------------------- 1 | # DevTools里的JSFinder与油猴脚本 2 | 3 | ## 前言 4 | 5 | 于前天在推特上看到Hpdoger师傅转了一个推: 6 | ![-w746](https://sanzhi-1259392731.cos.ap-chengdu.myqcloud.com/2020/12/11/16076219424271.jpg) 7 | 8 | 是一个人分享了一段javascript代码,在DevTools上执行可以直接获取到html所有标签属性里的url。代码如下: 9 | 10 | ```javascript 11 | urls = [] 12 | $$('*').forEach(element => { 13 | urls.push(element.src) 14 | urls.push(element.href) 15 | urls.push(element.url) 16 | }); console.log(...new Set(urls)) 17 | ``` 18 | 19 | 以小米官网为例: 20 | ![-w1790](https://sanzhi-1259392731.cos.ap-chengdu.myqcloud.com/2020/12/11/16076221055725.jpg) 21 | 22 | 这段js代码的作用把我吓到了,6行代码把jsfinder的功能实现的差不多了...我仔细研究了一下,发现它只是遍历的标签的属性,却没有对js文件里的url进行提取。只是简单的遍历的话没什么太大的用,所以我打算自己根据这个思路,把jsfinder上的功能往这个上面挪一挪。 23 | 24 | 下面就是三个不同的程度的利用: 25 | 26 | 1. 通过html源码里的各种连接获取子域名 27 | 2. 实现jsfinder获取所有接口 28 | 3. JSFinder油猴脚本 29 | 30 | ## 通过html源码里的各种连接获取子域名 31 | 32 | 获取根域名我参考了这篇文章:https://developer.aliyun.com/article/195912 相比于直接截断判断,文章中这种获取的方式就靠谱很多了。 33 | 直接贴源码: 34 | 35 | ```js 36 | urls = []; domains = []; 37 | $$('*').forEach(element => { 38 | urls.push(element.src);urls.push(element.href);urls.push(element.url); 39 | }); urls = new Set(urls); 40 | urls.forEach(url => { 41 | if (url != undefined && url != "" && url.startsWith("http") == true){ 42 | url = new URL(url); 43 | if (url.host.endsWith(getMainHost()) == true) { 44 | domains.push(url.host) 45 | } 46 | } 47 | }); 48 | console.log(Array.from(new Set(domains))); 49 | function getMainHost() { 50 | let key = `mh_${Math.random()}`; 51 | let keyR = new RegExp( `(^|;)\\s*${key}=12345` ); 52 | let expiredTime = new Date( 0 ); 53 | let domain = document.domain; 54 | let domainList = domain.split( '.' ); 55 | let urlItems = []; 56 | urlItems.unshift( domainList.pop() ); 57 | while( domainList.length ) { 58 | urlItems.unshift( domainList.pop() ); 59 | let mainHost = urlItems.join( '.' ); 60 | let cookie = `${key}=${12345};domain=.${mainHost}`; 61 | document.cookie = cookie; 62 | if ( keyR.test( document.cookie ) ) { 63 | document.cookie = `${cookie};expires=${expiredTime}`; 64 | return mainHost; 65 | } 66 | } 67 | } 68 | ``` 69 | 70 | ![-w1786](https://sanzhi-1259392731.cos.ap-chengdu.myqcloud.com/2020/12/11/16076228671417.jpg) 71 | 72 | ## 实现jsfinder获取所有接口 73 | 74 | 只获取域名是不太够的,接着来获取接口(严格来说这里其实也是url,只不过把js里面的url也提取出来了)。 75 | 76 | 这里有几个简单的问题存在: 77 | 78 | 1. 接口存在于js文件里,怎么通过js获取js文件内容? 79 | 2. 不同路径的处理是不是跟jsfinder原来一样需要写很多的判断处理语句? 80 | 3. 接口到底是获取全子域的还是只获取当前域的? 81 | 82 | 这些问题对应的解决办法: 83 | 84 | 1. 可以直接用xmlHttpRequest同步方法获取文件内容 85 | 2. 不需要,可以使用`new URL()`这种方式组合url,会自动处理url中的层级关系 (这个有点好用) 86 | 3. 只获取当前域的接口太少了,我选择的方式是连接获取当前域,接口获取全子域。这样既保证了爬取的页面不会太多,接口数量和质量也有所保障。 87 | 88 | ```js 89 | urls = []; js_content=""; result_raw = []; 90 | $$('*').forEach(element => { 91 | urls.push(element.src);urls.push(element.href);urls.push(element.url); 92 | if (element.tagName == "SCRIPT") { js_content += element.text } 93 | }); urls = new Set(urls); 94 | urls.forEach(rawurl => { 95 | if (rawurl != undefined && rawurl != "" && typeof(rawurl) == "string" && rawurl.startsWith("http") == true){ 96 | url = new URL(rawurl); 97 | if (url.host.endsWith(location.host) == true && url.pathname.endsWith(".js") == true) { 98 | result_raw = result_raw.concat(extract_url(geturlContent(url.pathname))); 99 | } 100 | } 101 | }); 102 | result_raw = result_raw.concat(extract_url(js_content)); 103 | result = []; 104 | result_raw.forEach(url=>{ 105 | if ("jpeg|png|gif|svg|js|flv|swf|css".search(new URL(url).pathname.split('.').pop().toLowerCase()) == -1){ 106 | result.push(url); 107 | } 108 | }) 109 | console.log(Array.from(new Set(result))); 110 | function getMainHost() { 111 | let key = `mh_${Math.random()}`; 112 | let keyR = new RegExp( `(^|;)\\s*${key}=12345` ); 113 | let expiredTime = new Date( 0 ); 114 | let domain = document.domain; 115 | let domainList = domain.split( '.' ); 116 | let urlItems = []; 117 | urlItems.unshift( domainList.pop() ); 118 | while( domainList.length ) { 119 | urlItems.unshift( domainList.pop() ); 120 | let mainHost = urlItems.join( '.' ); 121 | let cookie = `${key}=${12345};domain=.${mainHost}`; 122 | document.cookie = cookie; 123 | if ( keyR.test( document.cookie ) ) { 124 | document.cookie = `${cookie};expires=${expiredTime}`; 125 | return mainHost;d 126 | }}} 127 | function geturlContent(pathname){ 128 | var result = "" 129 | var request = new XMLHttpRequest(); 130 | request.open("GET", pathname, false); 131 | request.send(); 132 | if(request.status === 200){ 133 | result = request.responseText; 134 | } 135 | return result 136 | } 137 | function extract_url(js_content){ 138 | let regex = /(?:"|')(((?:[a-zA-Z]{1,10}:\/\/|\/\/)[^"'\/]{1,}\.[a-zA-Z]{2,}[^"']{0,})|((?:\/|\.\.\/|\.\/)[^"'><,;| *()(%%$^\/\\\[\]][^"'><,;|()]{1,})|([a-zA-Z0-9_\-\/]{1,}\/[a-zA-Z0-9_\-\/]{1,}\.(?:[a-zA-Z]{1,4}|action)(?:[\?|\/][^"|']{0,}|))|([a-zA-Z0-9_\-]{1,}\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:\?[^"|']{0,}|)))(?:"|')/sg; 139 | let m; result = []; 140 | while ((m = regex.exec(js_content)) !== null) { 141 | if (m.index === regex.lastIndex) { regex.lastIndex++;} 142 | m.forEach((match, groupIndex) => { 143 | if (match != undefined) { 144 | match = match.replaceAll(/('|")/g, ""); 145 | if (match.startsWith("http") == true){ 146 | suburl = new URL(match); 147 | if (suburl.host.endsWith(getMainHost()) == true){ result.push(match); } 148 | }else{ 149 | url = new URL(match, location.origin); 150 | if (url.host.endsWith(getMainHost()) == true){ result.push(url.href); } 151 | }}});} 152 | return Array.from(new Set(result)); 153 | } 154 | ``` 155 | 156 | ![-w1788](https://sanzhi-1259392731.cos.ap-chengdu.myqcloud.com/2020/12/11/16076232670876.jpg) 157 | 158 | ## JSFinder油猴脚本 159 | 160 | 如果每次去获取域名接口都要复制一段js代码的话,那就太麻烦了。所以就写了一个油猴脚本来方便使用,每次打开网站都会自动的获取域名与接口。自己在网站测试过程中也可以自行设置域名范围(在油猴脚本中修改`match`配置)。 161 | 162 | 最终源码: 163 | 164 | ```js 165 | // ==UserScript== 166 | // @name JSFinder 167 | // @namespace Threezh1 168 | // @version 0.2 169 | // @description Extract interfaces from html and javascript files. 170 | // @author Threezh1 171 | // @match *://*/* 172 | // @require https://greasyfork.org/scripts/12447-mootools-for-greasemonkey/code/MooTools%20for%20Greasemonkey.js?version=74469 173 | // @grant none 174 | // ==/UserScript== 175 | 176 | (function() { 177 | 'use strict'; 178 | console.log("JSFinder by Threezh1"); 179 | let urls = []; let js_content=""; let result_raw = []; let domains = []; 180 | $$('*').forEach(element => { 181 | urls.push(element.src);urls.push(element.href);urls.push(element.url); 182 | if (element.tagName == "SCRIPT") { js_content += element.text } 183 | }); urls = new Set(urls); 184 | urls.forEach(rawurl => { 185 | if (rawurl != undefined && rawurl != "" && typeof(rawurl) == "string" && rawurl.startsWith("http") == true){ 186 | let url = new URL(rawurl); 187 | if (url.host.endsWith(getMainHost()) == true) { domains.push(url.host) }; 188 | if (url.host.endsWith(location.host) == true && url.pathname.endsWith(".js") == true) { 189 | result_raw = result_raw.concat(extract_url(geturlContent(url.pathname))); 190 | } 191 | } 192 | }); 193 | result_raw = result_raw.concat(extract_url(js_content)); 194 | var result = []; 195 | result_raw.forEach(url=>{ 196 | if (new URL(url).host.endsWith(getMainHost()) == true) { domains.push(new URL(url).host) }; 197 | if ("jpeg|png|gif|svg|js|flv|swf|css".search(new URL(url).pathname.split('.').pop().toLowerCase()) == -1){ 198 | result.push(url); 199 | } 200 | }) 201 | console.log("JSFinder get domains: ", Array.from(new Set(domains))); 202 | console.log("JSFinder get urls: ", Array.from(new Set(result))); 203 | function getMainHost() { 204 | let key = `mh_${Math.random()}`; 205 | let keyR = new RegExp( `(^|;)\\s*${key}=12345` ); 206 | let expiredTime = new Date( 0 ); 207 | let domain = document.domain; 208 | let domainList = domain.split( '.' ); 209 | let urlItems = []; 210 | urlItems.unshift( domainList.pop() ); 211 | while( domainList.length ) { 212 | urlItems.unshift( domainList.pop() ); 213 | let mainHost = urlItems.join( '.' ); 214 | let cookie = `${key}=${12345};domain=.${mainHost}`; 215 | document.cookie = cookie; 216 | if ( keyR.test( document.cookie ) ) { 217 | document.cookie = `${cookie};expires=${expiredTime}`; 218 | return mainHost; 219 | }}} 220 | function geturlContent(pathname){ 221 | var result = "" 222 | var request = new XMLHttpRequest(); 223 | request.open("GET", pathname, false); 224 | request.send(); 225 | if(request.status === 200){ 226 | result = request.responseText; 227 | } 228 | return result 229 | } 230 | function extract_url(js_content){ 231 | let regex = /(?:"|')(((?:[a-zA-Z]{1,10}:\/\/|\/\/)[^"'\/]{1,}\.[a-zA-Z]{2,}[^"']{0,})|((?:\/|\.\.\/|\.\/)[^"'><,;| *()(%%$^\/\\\[\]][^"'><,;|()]{1,})|([a-zA-Z0-9_\-\/]{1,}\/[a-zA-Z0-9_\-\/]{1,}\.(?:[a-zA-Z]{1,4}|action)(?:[\?|\/][^"|']{0,}|))|([a-zA-Z0-9_\-]{1,}\.(?:php|asp|aspx|jsp|json|action|html|js|txt|xml)(?:\?[^"|']{0,}|)))(?:"|')/sg; 232 | let m; result = []; 233 | while ((m = regex.exec(js_content)) !== null) { 234 | if (m.index === regex.lastIndex) { regex.lastIndex++;} 235 | m.forEach((match, groupIndex) => { 236 | if (match != undefined) { 237 | match = match.replaceAll(/('|")/g, ""); 238 | if (match.startsWith("http") == true){ 239 | let suburl = new URL(match); 240 | if (suburl.host.endsWith(getMainHost()) == true){ result.push(match); } 241 | }else{ 242 | let url = new URL(match, location.origin); 243 | if (url.host.endsWith(getMainHost()) == true){ result.push(url.href); } 244 | }}});} 245 | return Array.from(new Set(result)); 246 | } 247 | })(); 248 | ``` 249 | 250 | 在油猴处启动脚本之后打开小米官网,可以看到已经成功运行: 251 | 252 | ![-w1789](https://sanzhi-1259392731.cos.ap-chengdu.myqcloud.com/2020/12/11/16076235698941.jpg) 253 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Deconstruct 2 | 把一些零碎的文件存放在这里 3 | -------------------------------------------------------------------------------- /test.js: -------------------------------------------------------------------------------- 1 | // ==UserScript== 2 | // @name JSFinder 3 | // @namespace Threezh1 4 | // @version 0.1 5 | // @description Extract interfaces from html and javascript files. 6 | // @author Threezh1 7 | // @match *://*.cuit.edu.cn/* 8 | // @require https://greasyfork.org/scripts/12447-mootools-for-greasemonkey/code/MooTools%20for%20Greasemonkey.js?version=74469 9 | // @grant none 10 | // ==/UserScript== 11 | 12 | (function() { 13 | 'use strict'; 14 | console.log(location.href); 15 | urls = [] 16 | $$('*').forEach(element => { 17 | urls.push(element.src) 18 | urls.push(element.href) 19 | urls.push(element.url) 20 | }); console.log(...new Set(urls)) 21 | })(); --------------------------------------------------------------------------------