├── .gitignore ├── .gitmodules ├── LICENSE ├── README.md ├── drive-by_mining_app ├── miner.py └── templates │ └── index.html ├── minesweeper_tools ├── WebMinerAnalyzer.py ├── cn_fingerprint.json ├── config.json ├── mine_crawl.js ├── minesweeper.py ├── npm_requirements_install.sh ├── package.json ├── python_requirements.sh ├── run.py ├── run_analysis.py ├── run_web.py ├── util.py └── wast_analyse.py └── wasm_samples ├── benign_samples ├── hebjemijgemist.be_72b2c42dbaadbbb8.ok.wasm ├── hs-anhalt.de_c7f9b5a45e9e6d4d.ok.wasm ├── hs-anhalt.de_f97baec0d132a482.ok.wasm └── usindiaglobalreview.com_d790731c2ef09526.ok.wasm ├── miner_samples ├── 2114.ru.wasm ├── allduniv.ac.in.wasm ├── allduniv.ac.in_dede8f7f53c494aa.ok.wasm ├── avbye.com.wasm ├── bitcoinpeople.online.wasm ├── bluegrassrivals.com.wasm ├── browsermine.com_c777d6679aaddbc6.ok.wasm ├── citytoshi.com.wasm ├── creative-solutions.net_9697d29f961415cc.ok.wasm ├── crtanko.com_4eb88de73e041458.ok.wasm ├── desirefx.me_12ee11d9fb5711b8.ok.wasm ├── diarioelnorte.com.ar_a36158ce9fadf9e2.ok.wasm ├── draftdayprospects.com_fd349fd98c7740ae.ok.wasm ├── dunyatv.az.wasm ├── gostreams.me_ccda046a84285f3b.ok.wasm ├── hevcbay.com_440f91e74c91ee83.ok.wasm ├── iglyfada.gr_7eeeca07b608b8d2.ok.wasm ├── insurance-blogs-moneys.com_df0e1c9bdba02189.ok.wasm ├── javpoo.com_a2f39e310a614d87.ok.wasm ├── justproxy.co.uk.wasm ├── maimunata.com_be7873533b3fd3da.ok.wasm ├── makataka.su_1c976f694fb782f4.ok.wasm ├── mangafap.com_bdba8ea975358ad2.ok.wasm ├── mvvfans.nl_9850eed46b0a9d35.ok.wasm ├── omskbird.tv_3d37de2b64e580f3.ok.wasm ├── pencurimovie.xyz_2de044ff7777cbf3.ok.wasm ├── pharmatips.in.wasm ├── pumpand.win_e606bd1182f278ed.ok.wasm ├── sergeydovlatov.ru_ae3d9c0f6eca08b5.ok.wasm ├── tainiesonline.top.wasm ├── ultigamerz.com_a4f030377720d386.ok.wasm ├── uzhasnik.ru.wasm ├── wastedamateurs.com_d96a60acb9c51252.ok.wasm └── watchdigimonepisodes.com_22ea68bd30c105cb.ok.wasm └── miners_data.json /.gitignore: -------------------------------------------------------------------------------- 1 | # Prerequisites 2 | *.d 3 | 4 | # Object files 5 | *.o 6 | *.ko 7 | *.obj 8 | *.elf 9 | *.pyc 10 | 11 | # Precompiled Headers 12 | *.gch 13 | *.pch 14 | 15 | # Libraries 16 | *.lib 17 | *.a 18 | *.la 19 | *.lo 20 | 21 | # Shared objects (inc. Windows DLLs) 22 | *.dll 23 | *.so 24 | *.so.* 25 | *.dylib 26 | 27 | # Executables 28 | *.exe 29 | *.out 30 | *.app 31 | *.i*86 32 | *.x86_64 33 | *.hex 34 | 35 | # Debug files 36 | *.dSYM/ 37 | *.su 38 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "wabt"] 2 | path = wabt 3 | url = https://github.com/WebAssembly/wabt.git 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | #Code used for MineSweeper project and results 2 | 3 | ##Setting up 4 | 5 | 1. Clone the repo 6 | 2. Run python\_requirements.py 7 | 3. Run npm\_requirements\_install.sh 8 | 4. Compile wabt tool : https://github.com/WebAssembly/wabt (Make sure it generates binary file `wabt/bin/wasm2wat`) 9 | 5. Compile Chromium browser enabling debug flags so that you can use `dump-wasm-module` JS flag to dump wasm module or use this version : https://download.vusec.net/dataset/chrome-build.tar 10 | 6. Update the config.json file 11 | 12 | ##Runing MineSweeper tool 13 | 14 | ```python minesweeper.py -t ``` 15 | 16 | ##Testing the code locally 17 | 18 | To test the code locally, first run a drive-by mining webapplication using 19 | following step. 20 | 21 | ```shell 22 | cd test-miner-app 23 | python miner.py . 24 | ``` 25 | 26 | Then run the minesweeper to analyze the drive-by mining webapplication that 27 | you hosted locally. We tested this code on Ubuntu 16.04. 28 | 29 | ```python minesweeper.py -t ``` 30 | 31 | 32 | You can download the crawled data from the drive-by mining websites here: 33 | https://download.vusec.net/dataset/cryptominers_dataset.tar 34 | 35 | ##Warning 36 | 37 | This code is only for testing purposes. You are responsible for protecting yourself, your property 38 | and data, and others from any risks caused by this code. 39 | 40 | -------------------------------------------------------------------------------- /drive-by_mining_app/miner.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, render_template 2 | app = Flask(__name__) 3 | 4 | @app.route("/") 5 | def home(): 6 | return render_template("index.html") 7 | 8 | 9 | if __name__ == "__main__": 10 | app.run(debug=True) 11 | -------------------------------------------------------------------------------- /drive-by_mining_app/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | A coinhive-based miner 6 | 7 | 8 | 9 | 10 | 30 | 31 | 32 |

Crytocurrency miner is running in the browser

33 | 34 | 35 | -------------------------------------------------------------------------------- /minesweeper_tools/WebMinerAnalyzer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # python WebMinerAnalyzer.py www.mejortorrent.org ./data 10 4 | 5 | import requests 6 | import json 7 | import sys 8 | import os 9 | import subprocess 10 | import time 11 | import json 12 | import re 13 | import time as t 14 | import shutil 15 | import datetime 16 | import magic 17 | import run_web 18 | import util 19 | import pdb 20 | 21 | fil = ['chatbro','tidio.co', 'getcourse.ru', 'bitcoin.com', 'streamlegends.com', 'thankyou.ru', 'rtschannel.com', 'ekranet.com', 'betgamestv.eu', 'camlinq', 'sciaga.pl', 'lockerdome', 'drivezy.com', 'realtime', 'wattbike.com', 'chatwee-api', 'mixer.combotframework.com', 'pusherapp.com', 'kf5.com', 'upscope.io', 'netpeak.cloud', 'curachat.com', 'channel.io', 'livecall.io', 'help.com', 'csgotower.com', 'siteheart.com', 'flyzoo.co', 'synerise.com', 'retain.ir', 'comagic.ru', 'sbtech.com', 'revechat.com', 'truconversion.com', 'forexpros.com', 'byside.com', 'raychat.io', 'deezer.com', 'velaro.com', 'freshrelevance.com', 'inside-graph.com', 'thelivechatsoftware.com', 'onicon.ru', 'czater.pl', 'mycertifiedservice.com', 'rafflecopter.com', '5p4rk13.com', 'active.com', 'alloka.ru', 'aml.ink', 'angelthump.com', 'bgrndi.com', 'cadlearning.com', 'cdnmedia.tv', 'clipinteractive.com', 'convertfox.com', 'csgotrinity.com', 'dealerfire.com', 'destiny.gg', 'episodecalendar.com', 'fifa55u.com', 'footyroom.com', 'hdkinoshka.net', 'lentainform.com', 'livehouse.in', 'lokspeedarma.com', 'lori.ru', 'luxadv.com', 'mixcloud.com', 'native.ai', 'ogaming.tv', 'overrustle.com', 'pancernik.info', 'poooo.ml', 'recreativ.ru', 'repl.it', 'scholastic.com', 'skinhub.com', 'skinup.gg', 'smashcast.tv', 'sochi.camera', 'stocktwits.com', 'straitstimes.com', 'tass.ru', 'tf.rs', 'tvrain.ru', 'viafoura.io', 'vtsmedia.com', 'whatsthescore.com', 'wigzopush.com', 'bugaboo.tv', 'codio.com', 'comode.kz', 'gocdn.ru', 'goshow.tv', 'iflychat.com', 'kuchebraska.com', 'luxup.ru', 'maxizone.win', 'megacdn.ru', 'poorchat.net', 'pregame.com', 'rambler.ru', 'spriteclub.tv', 'suptv.org', 'swarmcdn.com', 'teenslang.su', 'vbrick.com', 'vs3.com', 'bncapp.net', 'jmvstream.com', 'reg.place', 'streamable.com', 'universenetwork.tv', 'wotcase.ru', 'zohopublic.eu', 'vwd-webtech.com', 'geckodev.eu', 'fsdatacentre.com', 'ustream.tv', 'skalhuset.se', 'cleversite.ru', 'naiadsystems.com', 'bcrncdn.com', 'sketchboard.me', 'hotjar.com', 'jivosite.com', 'zohopublic.com', 'visitors.live', 'drift.com', 'crisp.chat', 'mieru-ca', 'hypercomments', 'herokuapp.com', 'giosg.com', 'easybroadcast.fr', 'cloudup.com', 'engine.io', 'FastcastService', '/app/', 'rltracker.pro', 'paintcollar', 'zenrus.ru', 'walls.io', 'spots.im', 'replain.cc', 'smartsupp', 'tunegenie', 'appcues', 'nexus', 'padlet.com', 'ws_client?customer', 'levelupmedia', 'twitch', 'chatango', 'p2pnow', 'banomago', 'rizotina', 'chessbase', 'goodgame', 'wsm', 'wsp', 'dditscdn', 'highwebmedia', 'realtime', 'exitgames', '33across.com', 'cbox', 'bet365affiliates', 'peer5', 'inspectlet', 'tradingview', 'chatbro', 'tawk', 'firebaseio', 'zopim', 'livestream', 'streamroot', 'cackle', 'chess24', 'foes', 'amap', 'feedjit', 'bpasyspro', 'agarioforums', 'intercom', 'webspectator', 'botframework.com'] 22 | 23 | patterns = { 24 | 'js' : 'cryptonight|WASMWrapper|crytenight|load.jsecoin.com|hash_cn', 25 | 'wasm' : b'\x00\x61\x73\x6d', 26 | 'rwasm' : '.wasm|.wasl|.wsm', 27 | 'ppool' : ".pool.:.([A-Za-z0-9\.]+).,.login", 28 | 'login' : ".login.:.([A-Za-z0-9]+).,.password", 29 | 'ws' : '\"type\":\"job\",\"params\":|\"type\":\"auth\",\"params\":|\"type\":\"submit\",\"params\":|\"type\":\"authed\",\"params\":|\"identifier\":\"job\"|\"identifier\":\"handshake\",\"pool\"|\"command\":\"connect\"|\"command\":\"work\"|\"command\":\"share\"|\"command\":\"\"accepted\"|\"command\":\"info\"|\"command\":\"get_job\"|\"command\":\"set_cpu_load\"|\"command\":\"set_job\"', 30 | 'pool' : "\[WS-Creation\](.*)", 31 | } 32 | # 13 known services initial dataset 33 | patterns_types_ini = { 34 | 'coinhive' : "new CoinHive\.Anonymous|coinhive.com/lib/coinhive.min.js|authedmine.com/lib/", 35 | 'cryptonoter' : "minercry.pt/processor.js|\.User\(addr", 36 | 'nfwebminer' : "new NFMiner|nfwebminer.com/lib/", 37 | 'jsecoin' : "load.jsecoin.com/load", 38 | 'webmine' : "webmine.cz/miner", 39 | 'cryptoloot' : "CRLT\.anonymous|webmine.pro/lib/crlt.js|verifier.live/lib/crypta.js", 40 | 'coinimp' : "www.coinimp.com/scripts|new CoinImp.Anonymous|new Client.Anonymous|freecontent.stream|freecontent.data|freecontent.date", 41 | 'deepminer' : "new deepMiner.Anonymous|deepMiner.js", 42 | 'monerise' : "apin.monerise.com|monerise_builder", 43 | 'coinhave' :'minescripts\.info', 44 | 'cpufun' :'snipli.com/[A-Za-z]+\" data-id=', 45 | 'minr' : 'abc\.pema\.cl|metrika\.ron\.si|cdn\.rove\.cl|host\.dns\.ga|static\.hk\.rs|hallaert\.online|st\.kjli\.fi|minr\.pw|cnt\.statistic\.date|cdn\.static-cnt\.bid|ad\.g-content\.bid|cdn\.jquery-uim\.download', 46 | 'mineralt' : 'ecart\.html\?bdata=|/amo\.js\">|mepirtedic\.com', 47 | } 48 | 49 | patterns_types = { 50 | 'coinhive' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 51 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 52 | 'fp': "coinhive"}, 53 | 'cryptonoter' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 54 | 'script_key': "var addr = .([A-Za-z0-9]+)|\.Anonymous\(.([A-Za-z0-9]+)", 55 | 'fp': "cryptonoter|\.User\(addr"}, 56 | 'nfwebminer' : { 'ws_key': ".hash.:.([A-Za-z0-9]+).,.hostname", 57 | 'script_key': "NFMiner(.([A-Za-z0-9]+)", 58 | 'fp': "nfminer"}, 59 | 'jsecoin' : { 'ws_key': None, 60 | 'script_key': "https://load\.jsecoin\.com/load/([0-9]+)/", 61 | 'fp': "jsecoin"}, 62 | 'webmine' : { 'ws_key': "api.:.([A-Za-z0-9]+)", 63 | 'script_key': "https://webmine\.cz/miner\?key=([A-Za-z0-9]+)", 64 | 'fp': "webmine\.cz"}, 65 | 'cryptoloot' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 66 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 67 | 'fp': "crlt\.anonymous|cryptoloot"}, 68 | 'cryptominer' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 69 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 70 | 'fp': "mrwayne|CH.Anonymous"}, 71 | 'coinimp' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 72 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 73 | 'fp': "coinimp|freecontent.stream|freecontent.data"}, 74 | 'deepminer' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 75 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 76 | 'fp': "deepminer"}, 77 | 'monerise' : { 'ws_key': "/proxy/\?pa=([A-Za-z0-9]+)", 78 | 'script_key': "monerise_payment_address=.([A-Za-z0-9]+)", 79 | 'fp': "monerise"}, 80 | 'coincube' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 81 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 82 | 'fp': 'CoinCube'}, 83 | 'grindcash' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 84 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 85 | 'fp': 'ulnawoyyzbljc\.ru'}, 86 | 'coinhave' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 87 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 88 | 'fp': 'minescripts\.info'}, 89 | 'kuku' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 90 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 91 | 'fp': 'KUKU\.Anonymous'}, 92 | 'cpufun' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 93 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 94 | 'fp': 'snipli.com/[A-Za-z]+\" data-id='}, 95 | 'minr' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 96 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 97 | 'fp': 'cnt\.statistic\.date|cdn\.static-cnt\.bid|ad\.g-content\.bid|cdn\.jquery-uim\.download'}, 98 | 'ricewithchicken' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 99 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 100 | 'fp': 'cricewithchicken\.js|RiseCrackerWrapper|datasecu.download|jqcdn.download'}, 101 | 'connection' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 102 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 103 | 'fp': 'interestingz\.pw|unconvulsed\.com|srvs\.stream|unrummaged\.com|artedite\.com|hhb123\.tk'}, 104 | 'mineralt' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 105 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 106 | 'fp': 'ecart\.html\?bdata=|/amo\.js\">|mepirtedic\.com'}, 107 | 'dryptonight' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 108 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 109 | 'fp': 'CTWASMWrapper|dryptonight|drivecdn.com'}, 110 | 'blakcrypto' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 111 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 112 | 'fp': 'blakcrypto\.com'}, 113 | 'generic' : { 'ws_key': ".site_key.:.([A-Za-z0-9]+).,.type", 114 | 'script_key': "\.Anonymous\(.([A-Za-z0-9]+)|\.User\(.([A-Za-z0-9]+)|var addr = .([A-Za-z0-9]+)", 115 | 'fp': None} 116 | } 117 | 118 | 119 | def start_chromium(): 120 | os.system('chromium-browser --headless --no-sandbox --remote-debugging-port=9222 &') 121 | 122 | def kill_chromium(): 123 | subprocess.check_call(["pkill", "chromium-browse"]) 124 | 125 | def init_min_detect(): 126 | 127 | min_detect = dict() 128 | min_detect = dict() 129 | min_detect['js'] = False 130 | min_detect['js_root'] = False 131 | min_detect['wasm'] = False 132 | min_detect['rwasm'] = False 133 | min_detect['cpu'] = 0 134 | min_detect['nblob'] = 0 135 | min_detect['blob_root'] = False 136 | min_detect['ws_present'] = False 137 | min_detect['ws_root'] = False 138 | min_detect['ws'] = False 139 | min_detect['html'] = [] 140 | min_detect['ws_obf'] = False 141 | min_detect['root'] = False 142 | return min_detect 143 | 144 | def extend_min(min_detect): 145 | miner_webs = dict() 146 | miner_webs['key'] = [] 147 | miner_webs['pool'] = [] 148 | miner_webs['js'] = min_detect['js'] 149 | miner_webs['wasm'] = min_detect['wasm'] 150 | miner_webs['cpu'] = min_detect['cpu'] 151 | miner_webs['nblob'] = min_detect['nblob'] 152 | miner_webs['type'] = list(min_detect['html']) 153 | miner_webs['ws'] = min_detect['ws'] 154 | miner_webs['ws_obf'] = min_detect['ws_obf'] 155 | miner_webs['ws_pres'] = min_detect['ws_present'] 156 | miner_webs['js_root'] = min_detect['js_root'] 157 | miner_webs['ws_root'] = min_detect['ws_root'] 158 | miner_webs['blob_root'] = min_detect['blob_root'] 159 | miner_webs['ppool'] = [] 160 | miner_webs['login'] = [] 161 | return miner_webs 162 | 163 | def load_files(url_path): 164 | files = [] 165 | for (dirpath, dirnames, filenames) in os.walk(url_path): 166 | files.extend(filenames) 167 | return files 168 | 169 | def detect_from_file(path, files, pattern): 170 | for file in files: 171 | file_path = path + '/' + file 172 | try: 173 | f = open(file_path,'r') 174 | text = f.read().replace('\n', '') 175 | if re.search(pattern, text, re.IGNORECASE) is not None: 176 | return True 177 | except Exception as e: 178 | print "[!] Couldn't read file " + file_path 179 | print str(e) 180 | return False 181 | 182 | def detect_cpu(path, file): 183 | try: 184 | cpu_path = path + '/' + file 185 | cpu_file = open(cpu_path, 'r') 186 | #Get Line with most cpu usage 187 | cpu_max = 0 188 | for line in cpu_file: 189 | tokens = line.split(); 190 | if float(tokens[3]) > cpu_max: 191 | cpu_max = float(tokens[3]) 192 | except Exception as e: 193 | print "[!][S2] No Cpu file for " + cpu_path 194 | print str(e) 195 | return -1 196 | 197 | return cpu_max 198 | 199 | def count_blob(url_path, file): 200 | try: 201 | path = url_path + '/' + file 202 | rfile = open(path, 'r') 203 | text = rfile.read() 204 | return text.count('ATTACHED') 205 | except Exception as e: 206 | print "[!][S2] (can happen) No service workers file file for " + path 207 | return -1 208 | 209 | def step_2(path, web, min_detect): 210 | global patterns 211 | global patterns_types_ini 212 | 213 | requests = 'requests' 214 | cpu_usage = 'cpuUsage' 215 | ws_dump = 'WSdump' 216 | htmlpage = 'full.html' 217 | sworkers = 'serviceworkes' 218 | 219 | web_path = os.path.join(path,web).split(":")[0] 220 | js_found = False 221 | wasm_found = False 222 | html_found = False 223 | for url in next(os.walk(web_path))[1]: 224 | 225 | url_path = os.path.join(web_path, url) 226 | files = load_files(url_path) 227 | 228 | #Detect Worker code in miners 229 | if not js_found: 230 | js_found = detect_from_file(url_path, files, patterns['js']) 231 | min_detect['js'] = js_found 232 | if js_found and url == '[esc]': 233 | min_detect['js_root'] = True 234 | 235 | #Detect wasm 236 | if not wasm_found: 237 | wasm_found = detect_from_file(url_path, files, patterns['wasm']) 238 | min_detect['wasm'] = wasm_found 239 | 240 | #Detect Orchestrator code in miners 241 | if not html_found: 242 | files = [htmlpage] 243 | for t in patterns_types_ini: 244 | html_found = detect_from_file(url_path, files, patterns_types_ini[t]) 245 | if html_found and t not in min_detect['html']: 246 | min_detect['html'].append(t) 247 | 248 | #Check if request contain wasm 249 | 250 | if not min_detect['rwasm']: 251 | files = [requests] 252 | min_detect['rwasm'] = detect_from_file(url_path, files, patterns['rwasm']) 253 | 254 | nblob = count_blob(url_path, sworkers); 255 | if nblob > min_detect['nblob']: 256 | if url == '[esc]': 257 | min_detect['blob_root'] = True 258 | min_detect['nblob'] = nblob 259 | 260 | #Detect Cpu load 261 | max_cpu = detect_cpu(url_path, cpu_usage) 262 | if min_detect['cpu'] < max_cpu: 263 | min_detect['cpu'] = max_cpu 264 | 265 | #Detect WS comunications 266 | if not min_detect['ws']: 267 | files = [ws_dump] 268 | if (os.path.exists(url_path + '/' + ws_dump)): 269 | min_detect['ws_present'] = True 270 | if url == '[esc]': 271 | min_detect['ws_root'] = True 272 | if(magic.from_file(url_path + '/' + ws_dump) == 'data'): 273 | min_detect['ws_obf'] = True 274 | min_detect['ws'] = detect_from_file(url_path, files, patterns['ws']) 275 | 276 | return min_detect 277 | 278 | def step3(path, miner_webs, web): 279 | global patterns_types 280 | global patterns_types_ini 281 | 282 | # Get the right web path 283 | web_path = os.path.join(path, web).split(":")[0] 284 | found_type = [] 285 | 286 | # If no type was detect try again: 287 | if miner_webs['type'] == []: 288 | # Load Files 289 | for url in next(os.walk(web_path))[1]: 290 | url_path = os.path.join(web_path, url) 291 | files = load_files(url_path) 292 | 293 | # Look for types fingerprints matches 294 | for file in files: 295 | file_path = os.path.join(url_path, file) 296 | try: 297 | f = open(file_path,'r') 298 | text = f.read() 299 | for t in patterns_types: 300 | if t not in found_type and t not in patterns_types[t]['fp'] and t not in patterns_types_ini and re.findall(patterns_types[t]['fp'], text, re.IGNORECASE): 301 | miner_webs['type'].append(t) 302 | found_type.append(t) 303 | except Exception as e: 304 | print "[!] Couldn't read file " + file_path 305 | print str(e) 306 | 307 | # If not types matches are found mark it as generic 308 | if not miner_webs['type']: 309 | miner_webs['type'].append('generic') 310 | #Dump the results in the folder in json 311 | return miner_webs 312 | 313 | def step4(path, miner_webs, web): 314 | ws_dump = 'WSdump' 315 | htmlpage = 'full.html' 316 | global patterns 317 | global patterns_types 318 | 319 | web_path = os.path.join(path, web).split(":")[0] 320 | 321 | 322 | # Load Files 323 | for url in next(os.walk(web_path))[1]: 324 | url_path = os.path.join(web_path, url) 325 | 326 | # 1. If WS try to extract key and pool 327 | try: 328 | f = open(os.path.join(url_path, ws_dump),'r') 329 | text = f.read() 330 | 331 | # Get public pools and login 332 | res = re.findall(patterns['ppool'], text, re.IGNORECASE) 333 | for r in res: 334 | if r not in miner_webs['ppool']: 335 | miner_webs['ppool'].append(r) 336 | res = [] 337 | res = re.findall(patterns['login'], text, re.IGNORECASE) 338 | for r in res: 339 | if r not in miner_webs['login']: 340 | miner_webs['login'].append(r) 341 | res = [] 342 | 343 | # Get keys 344 | for t in miner_webs['type']: 345 | if patterns_types[t]['ws_key']: 346 | res = re.findall(patterns_types[t]['ws_key'], 347 | text, re.IGNORECASE) 348 | for r in res: 349 | if r not in miner_webs['key']: 350 | miner_webs['key'].append(r) 351 | # Get pool address 352 | res = re.findall(patterns['pool'], 353 | text, re.IGNORECASE) 354 | for r in res: 355 | if r not in miner_webs['pool']: 356 | miner_webs['pool'].append(r) 357 | 358 | except Exception as e: 359 | print("key and pool detection: " + str(e)) 360 | 361 | # Special case for jsecoin 362 | if 'jsecoin' in miner_webs['type']: 363 | try: 364 | f = open(os.path.join(url_path, htmlpage),'r') 365 | text = f.read() 366 | res = re.findall(patterns_types['jsecoin']['script_key'], 367 | text, re.IGNORECASE) 368 | for r in res: 369 | if r not in miner_webs['key']: 370 | miner_webs['key'].append(r) 371 | except Exception as e: 372 | print("key and pool jse coin detection: " + str(e)) 373 | 374 | # 2. If (1.) failed try to extract key from js 375 | if not miner_webs['key'] and miner_webs['js']: 376 | try: 377 | f = open(os.path.join(url_path, htmlpage),'r') 378 | text = f.read() 379 | for t in miner_webs['type']: 380 | res = re.findall(patterns_types[t]['script_key'], 381 | text, re.IGNORECASE) 382 | for r in res: 383 | if r not in miner_webs['key']: 384 | miner_webs['key'].append(r) 385 | except Exception as e: 386 | print("key from js detection: " + str(e)) 387 | 388 | 389 | return miner_webs 390 | 391 | def filter_rm_pools(data): 392 | global fil 393 | if data['pool']: 394 | for p in data['pool']: 395 | if any(x in p for x in fil): 396 | data['pool'].remove(p) 397 | return data 398 | def print_profile(data): 399 | ''' 400 | { 401 | "blob_root": false, 402 | "cpu": 127.88643533122735, 403 | "js": false, 404 | "js_root": false, 405 | "key": [ 406 | "37efd635d0ec154de4d0b17dd1952aa3b5e88acd6bbe" 407 | ], 408 | "login": [], 409 | "nblob": 16, 410 | "pool": [ 411 | " wss://sea.reauthenticator.com/", 412 | " wss://sass.reauthenticator.com/" 413 | ], 414 | "ppool": [], 415 | "type": [ 416 | "cryptoloot" 417 | ], 418 | "wasm": true, 419 | "ws": true, 420 | "ws_obf": false, 421 | "ws_pres": true, 422 | "ws_root": false 423 | } 424 | ''' 425 | print "CPU: %.2f" %(data['cpu']) 426 | print "Mining payload: " + str(data['js']) 427 | print "Miners from root page: " + str(data["js_root"]) 428 | print "Public pool: " + str(data["ppool"]) + " with login id " + str(data["login"]) 429 | print "Pool proxy: " + str(data["pool"]) 430 | print "Matching type: " + str(data["type"]) 431 | print "Open WebSocket: " + str(data["ws_pres"]) + " - Obfuscated: " + str(data["ws_obf"]) 432 | print "Stratum communication: " + str(data["ws"]) 433 | print "Service workers: " + str(data["nblob"]) 434 | 435 | def crawl_and_profile(target, out, time, links): 436 | 437 | if not os.path.exists(out): 438 | os.makedirs(out) 439 | ############################################################################### 440 | # Crawl phase 441 | ############################################################################### 442 | # We first want to crawl the website 443 | 444 | print "Launched the crawler! " 445 | start_chromium() 446 | t.sleep(2) 447 | run_web.crawl(out, target, time, links) 448 | kill_chromium() 449 | 450 | ############################################################################### 451 | # Analisis phase 452 | ############################################################################### 453 | # Then we want to enrich the data 454 | 455 | 456 | # Step 1: check crawl status 457 | crawl_succ = False 458 | 459 | try: 460 | status = open(os.path.join(out, status_f),'r') 461 | lines = status.read().splitlines() 462 | for l in lines: 463 | if "Crawl-complete" in l: 464 | crawl_succ = True 465 | break 466 | except Exception as e: 467 | print "[!][S1] Can't find previous output" 468 | print e 469 | 470 | if not crawl_succ: 471 | print "[!] Couldn't reach the host " + target 472 | print "[!] Exiting... " 473 | sys.exit() 474 | 475 | # Step 2 476 | print "Step 2" 477 | min_detect = init_min_detect() 478 | min_detect = step_2(out, target, min_detect) 479 | 480 | min_detect = extend_min(min_detect) 481 | 482 | # Step 3: 483 | print "Step 3" 484 | min_detect = step3(out, min_detect, target) 485 | 486 | # Step 4: 487 | print "Step 4" 488 | data = step4(out, min_detect, target) 489 | 490 | ############################################################################### 491 | # Detection phase 492 | ############################################################################### 493 | # Finally we want to detect if the website is a miner 494 | # Remvove whitelisted websocket connections from pool 495 | data = filter_rm_pools(data) 496 | 497 | 498 | # Pretty print JSON 499 | 500 | shutil.rmtree(os.path.join(out,"")) 501 | 502 | return data 503 | 504 | # Names 505 | status_f = "status" 506 | 507 | if __name__ == "__main__": 508 | try: 509 | target = sys.argv[1] 510 | out = sys.argv[2] 511 | time = int(sys.argv[3]) 512 | links = int(sys.argv[4]) 513 | except: 514 | print "Usage: python ./WebMinerAnalyzer.py [TARGET WEB] [OUT PATH] [CRAWL TIMEOUT] [INTERNAL LINKS]\n"\ 515 | "Example: python WebMinerAnalyzer.py www.coinhive.com ./data 10 3" 516 | sys.exit() 517 | data = crawl_and_profile(target, out, time, links) 518 | print json.dumps(data, indent=4, sort_keys=True) 519 | 520 | if (((data['js']) or data['key']!= [] or (data['type'] != [] and data['type'][0] != 'generic')) and data['ws_pres'] and data['nblob'] > 0) or data['ws'] or data['ppool'] or data['login']: 521 | print "[*] Miner found!" 522 | elif data['ws_pres'] and data['nblob'] > 0 and data['cpu']>50 and data['pool']: 523 | print "[*] Found WS and high cpu and service workers" 524 | elif data['ws_pres'] and data['cpu']>50: 525 | print "[*] Found WS and high cpu" 526 | else: 527 | print "[*] No Miner found" 528 | 529 | -------------------------------------------------------------------------------- /minesweeper_tools/cn_fingerprint.json: -------------------------------------------------------------------------------- 1 | {"aes1": {"i32.shr_u": 12, "i32.shl": 16, "i32.xor": 16}, "aes2": {"i32.shr_u": 120, "i32.shl": 160, "i32.xor": 160}, "keccak": {"i32.shl": 4, "i64.shl": 6, "i64.xor": 101, "i64.shr_u": 6}, "keccak2": {"i32.shl": 10, "i64.xor": 61, "i64.shl": 6, "i64.shr_u": 6, "loop": 3, "if": 2}, "skein1": {"i64.shl": 288, "i64.xor": 305, "i64.shr_u": 288}, "skein2": {"i64.shl": 144, "i64.xor": 153, "i64.shr_u": 144}, "skein3": {"i32.shl": 24, "i64.shl": 64, "i64.xor": 97, "i64.shr_u": 64}, "blake3": {"i32.shr_u": 32, "i32.shl": 128, "i32.xor": 80}, "blake4" : {"loop": 2, "if": 1, "i32.xor": 80, "i32.shl": 85, "i32.shr_u": 32}, "blake5" : {"loop": 2, "if": 1, "i32.xor": 80, "i32.shl": 69, "i32.shr_u": 32}, "groestl1": {"i32.shr_u": 99, "i32.shl": 286, "i32.xor": 127}, "groestl2": {"i32.shr_u": 99, "i32.shl": 286, "i32.xor": 136}} 2 | 3 | 4 | -------------------------------------------------------------------------------- /minesweeper_tools/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "chrome": "../chrome-build/chrome", 3 | "out": "./data/", 4 | "wabt": "../wabt/bin/wasm2wat", 5 | "fingerprint": "cn_fingerprint.json", 6 | "wast_analyse": "./wast_analyse.py", 7 | "fp_function_count": {"keccak": 2, "blake":3 , "aes": 2, "groestl": 2, "skein": 3}, 8 | "fp_function_types": 5 9 | } 10 | -------------------------------------------------------------------------------- /minesweeper_tools/mine_crawl.js: -------------------------------------------------------------------------------- 1 | /* 2 | To run the script: 3 | google-chrome --remote-debugging-port=9222 & 4 | node mine_crawl.js [target-url] 5 | */ 6 | 7 | /* 8 | Modules import 9 | */ 10 | const CDP = require('chrome-remote-interface'); 11 | const { URL } = require('url'); 12 | const path = require('path'); 13 | const md5 = require('md5'); 14 | const fs = require('fs'); 15 | const os = require('os'); 16 | const ps = require('ps-node'); 17 | const usage = require('usage'); 18 | const request = require('request'); 19 | const atob = require('atob'); 20 | var validUrl = require('valid-url'); 21 | 22 | 23 | var noMedia = false; 24 | var deepCrawlFlag = true; 25 | var deepIdx = 0; 26 | var maxDepth = 0; 27 | var maxDepthFlag = false; 28 | var logErrorFlag = false; 29 | var loadTimeOut = 4000; 30 | var newDomsList; 31 | var browser = "chrome"; 32 | var mainFolder = "./"; 33 | var currFolder = "./"; 34 | var dumpPath = "./"; 35 | var errorLog = "./errorsLog"; 36 | var crawlStatusLog = "./status"; 37 | var popPages = "./popPages"; 38 | 39 | //Debug enabled 40 | var DB = false; 41 | var DBv = false; //verbose debugging 42 | var NOut = true; 43 | var NoScript = true; 44 | /* 45 | Console log debugging messages wrapper 46 | */ 47 | function db_out(message,select){ 48 | 49 | if(select == 1 && DB) 50 | console.log(message); 51 | else if(select == 2 && DBv) 52 | console.log(message); 53 | else if(select == 0) 54 | console.log(message); 55 | } 56 | 57 | function replaceAll(str, find, replace) { 58 | return str.replace(new RegExp(find, 'g'), replace); 59 | } 60 | 61 | function crawlUrl(targetUrl,callback){ 62 | const folder = path.join(mainFolder, replaceAll(targetUrl.pathname,'/','[esc]') + 63 | replaceAll(targetUrl.search,'/','[esc]')); 64 | if(folder.length > 250){ folder = folder.substring(0,249);} 65 | currFolder = folder; 66 | try{ 67 | fs.mkdirSync(folder); 68 | }catch(err){ 69 | var errorString = "Folder already present: " + folder; 70 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 71 | else db_out(errorString,0); 72 | callback(); 73 | return 74 | } 75 | const media = path.join(folder,"media/"); 76 | if(!noMedia) fs.mkdirSync(media); 77 | const requests = path.join(folder,'requests'); 78 | const WS = "WSdump"; 79 | const cpuUse = path.join(folder,'cpuUsage'); 80 | const newDoms = path.join(folder,'newDomains'); 81 | const htmlPage = path.join(folder, 'full.html'); 82 | const allcookies = path.join(folder, 'cookies'); 83 | const servworkers = path.join(folder, 'serviceworkes'); 84 | var currTab; 85 | 86 | if(logErrorFlag){ 87 | fs.appendFile(errorLog,"\nErrors from: " + targetUrl.href, function(err){}); 88 | } 89 | /* 90 | Chrome Debugging Protocol instance 91 | */ 92 | CDP.New().then( (tab) =>{ 93 | currTab = tab; 94 | CDP((client) => { 95 | 96 | // extract domains 97 | const {Network, Page, 98 | DOM, Runtime, Performance, Target} = client; 99 | 100 | var downloadFile = {}; // to download list 101 | var downloadWFile = {}; // service workers to download files list 102 | var targetList = []; 103 | var targetSList = []; 104 | var linksChecked = 0; // links checks counter 105 | var htmlScripts = 0; // html scripts checks counter 106 | var tid = 0; // transactions Ids for workers messages 107 | 108 | // Enable target discovery and auto attachment to intercept TargetAttached events 109 | Target.setAutoAttach({autoAttach:true, waitForDebuggerOnStart: false}) 110 | Target.setDiscoverTargets({discover: true}); 111 | 112 | /* 113 | Final Callback 114 | */ 115 | function callbackClose(){ 116 | db_out("Links finsihed?:" + linksChecked + " script finished?:" + htmlScripts,1); 117 | if (linksChecked && htmlScripts == 'finished'){ 118 | //Testing the output of the performance monitoring functions 119 | try{ 120 | CDP.Close({id: currTab.id}); 121 | client.close(); 122 | db_out("BROWSING: end... client closed",1); 123 | fs.appendFileSync(crawlStatusLog,targetUrl.href + ", Crawl-complete\n", function(err){}); 124 | callback(); 125 | }catch(err){ 126 | var errorString = "CallBackClose: " + err; 127 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 128 | else db_out(errorString,1); 129 | callback(); 130 | } 131 | } 132 | } 133 | 134 | 135 | /* 136 | Target event handlers 137 | */ 138 | Target.targetCreated((params) => { 139 | db_out("Target Created: " + params.targetInfo.url + " (" + params.targetInfo.type + ")",1); 140 | if(params.targetInfo.type == "page" && (params.targetInfo.url != "about:blank" || params.targetInfo.url != "")) 141 | fs.appendFileSync(popPages, params.targetInfo.url + "\n", function(err){}); 142 | }); 143 | 144 | // Enable networking events from new ServiceWorkes 145 | Target.attachedToTarget(({sessionId, targetInfo}) => { 146 | targetList.push(targetInfo.targetId); 147 | targetSList.push(sessionId); 148 | if(targetInfo.type != 'page'){ 149 | db_out("ATTACHED " + sessionId + ": " + targetInfo.url,1); 150 | fs.appendFileSync(servworkers, "ATTACHED " + sessionId + ": " + targetInfo.url + "\n", function(err){}); 151 | } 152 | else{ 153 | db_out("PAGE " + sessionId + ": " + targetInfo.url,1); 154 | fs.appendFileSync(servworkers, "PAGE " + sessionId + ": " + targetInfo.url + "\n", function(err){}); 155 | } 156 | fs.appendFileSync(servworkers, "ATTACHED " + sessionId + ": " + targetInfo.url + "\n", function(err){}); 157 | Target.sendMessageToTarget({ 158 | sessionId, 159 | message: JSON.stringify({ // <-- a JSON string! 160 | id: tid++, 161 | method: 'Network.enable' 162 | }) 163 | }).catch(function(error) { 164 | var errorString = "Send Network.enable to worker: " + error; 165 | if(logErrorFlag){fs.appendFile(errorLog, errorString, function(err){});} 166 | else db_out(errorString,1); 167 | }); 168 | Target.sendMessageToTarget({ 169 | sessionId, 170 | message: JSON.stringify({ // <-- a JSON string! 171 | id: tid++, 172 | method: 'Debugger.enable' 173 | }) 174 | }).catch(function(error) { 175 | var errorString = "Send Network.enable to worker: " + error; 176 | if(logErrorFlag){fs.appendFile(errorLog, errorString, function(err){});} 177 | else db_out(errorString,1); 178 | });; 179 | }); 180 | 181 | // Let's deal with separate ServiceWorkers events 182 | Target.receivedMessageFromTarget(({sessionId, message}) => { 183 | const {id, method, result, params} = JSON.parse(message); // <-- a JSON string! 184 | //Check if we have any reply messages received from the worker 185 | if(id){ 186 | //Dump the worker body response 187 | if(result){ 188 | if(result.base64Encoded){ 189 | fs.appendFileSync(path.join(folder,"WorkerRq-" + downloadWFile[id]), 190 | atob(result.body), function(err){}); 191 | }else{ 192 | fs.appendFileSync(path.join(folder,"WorkerRq-" + downloadWFile[id]), 193 | result.body, function(err){}); 194 | } 195 | 196 | } 197 | } 198 | //Check if we have any events from the worker 199 | switch (method) { 200 | case 'Network.requestWillBeSent':{ 201 | const {request: {url}} = params; 202 | fs.appendFile(requests, params.requestId + 203 | ',' + params.request.url + '\n', function(err){}); 204 | db_out(`WORKER REQUEST: ${sessionId}: ${url}` + " RequestId:" + params.requestId,1); 205 | break; 206 | } 207 | case "Network.responseReceived":{ 208 | db_out("WORKER RECEIVED: " + params.requestId + " " + params.type,1); 209 | downloadFile[params.requestId] = params.type; 210 | break; 211 | 212 | } 213 | case "Network.loadingFinished" :{ 214 | db_out("WORKER LOADED: " + params.requestId + " " + " id:" + tid,1); 215 | downloadWFile[tid] = tid; 216 | if (downloadFile[params.requestId]) { 217 | Target.sendMessageToTarget({ 218 | sessionId, 219 | message: JSON.stringify({ 220 | id : tid++, 221 | method: "Network.getResponseBody", 222 | params: {requestId: params.requestId} 223 | }) 224 | }).catch(function(error) { 225 | var errorString = "Send Network.getResponseBody to worker: " + error; 226 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 227 | else db_out(errorString,1); 228 | }); 229 | } 230 | break; 231 | } 232 | default: //no default 233 | } 234 | }); 235 | 236 | /* 237 | Setup Network events handlers 238 | */ 239 | 240 | //This is triggered when the client initiate a request 241 | // it dumps all the network documents requests from initiated by the client 242 | Network.requestWillBeSent((params) => { 243 | db_out("REQUEST: " + params.request.url,2); 244 | fs.appendFile(requests, params.requestId + 245 | ',' + params.request.url + '\n', function(err){}); 246 | }); 247 | 248 | 249 | //This is triggered when a http response is received 250 | Network.responseReceived((params) => { 251 | //Better to log every response 252 | if((params.type === 'Image' || 253 | params.type === "Media" || 254 | params.type === "Font" || 255 | params.type === "Stylesheet") && noMedia){} 256 | else{ 257 | downloadFile[params.requestId] = params.type; 258 | db_out("RECEIVED: " + params.requestId + " type: " + downloadFile[params.requestId],2); 259 | } 260 | }); 261 | 262 | //This is triggered when a document is loaded into the client 263 | // it dumps the loaded documents (of every kind - so even wasm and js) 264 | Network.loadingFinished((params) => { 265 | if (downloadFile[params.requestId]) { 266 | db_out("LOADED: " + params.requestId + " type: " + downloadFile[params.requestId],2); 267 | Network.getResponseBody({ 268 | requestId: params.requestId 269 | }).then(response => { 270 | var fol = (downloadFile[params.requestId] === "Media" 271 | || downloadFile[params.requestId] === "Image")? media:folder; 272 | if(response.base64Encoded){ 273 | fs.appendFileSync(path.join(fol, downloadFile[params.requestId] + "-" + params.requestId), 274 | atob(response.body), function(err){}); 275 | }else{ 276 | fs.appendFileSync(path.join(fol, downloadFile[params.requestId] + "-" + params.requestId), 277 | response.body, function(err){}); 278 | } 279 | 280 | }).catch(function(error) { 281 | var errorString = "Loading finished - Network.getResponseBody: " + error; 282 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 283 | else db_out(errorString,1); 284 | }); 285 | } 286 | }); 287 | 288 | // Log a WebSocket creation 289 | Network.webSocketCreated((params) => { 290 | db_out("WS CREATION:" + params.url,2); 291 | fs.appendFileSync(path.join(folder, WS), 292 | "[WS-Creation] " + params.url, function(err){}); 293 | }); 294 | 295 | // Log when a new frame is received from a WS 296 | Network.webSocketFrameReceived((params) => { 297 | db_out("WS: RECEIVED:" + params.response.payloadData,2); 298 | fs.appendFileSync(path.join(folder, WS), 299 | "\n[IN]" + params.response.payloadData, function(err){}); 300 | }); 301 | 302 | // Log when a new frame is sent to a WS 303 | Network.webSocketFrameSent((params) => { 304 | db_out("WS: SENT:" + params.response.payloadData,2); 305 | fs.appendFileSync(path.join(folder, WS), 306 | "\n[OUT]" + params.response.payloadData, function(err){}); 307 | }); 308 | 309 | /* 310 | Setup Page events handlers 311 | */ 312 | //This is triggered when all the elements in a page are loaded 313 | Page.loadEventFired(() => { 314 | setTimeout(function() { 315 | logcpu(browser); 316 | obtainPage(); 317 | obtainCookies(); 318 | obtainNew(callbackClose); 319 | }, loadTimeOut) 320 | }); 321 | 322 | /* 323 | This function dump the main html page of the target dom 324 | */ 325 | function obtainPage(){ 326 | Runtime.evaluate({ 327 | expression: 'document.documentElement.outerHTML' 328 | }).then(result => { 329 | const html = result.result.value; 330 | fs.appendFileSync(htmlPage, html, function(err){}); 331 | db_out("HTML: page dumped",1); 332 | }); 333 | } 334 | 335 | /* 336 | This function log all the active cookies 337 | */ 338 | function obtainCookies(){ 339 | Network.getAllCookies().then( 340 | cookies => { 341 | fs.appendFileSync(allcookies, JSON.stringify(cookies), function(err){}); 342 | db_out("COOKIES: dumped",1); 343 | }); 344 | } 345 | 346 | /* 347 | This function extract all the reachable domains and scripts from a page 348 | newdom is set to true if we want to log the reachable new domains 349 | */ 350 | function obtainNew(callback) { 351 | DOM.getDocument().then(doc =>{ 352 | if(NoScript) htmlScripts = 'finished'; 353 | else extractScript(doc.root.nodeId,callback); 354 | obtainDOM(callback, doc); 355 | }); 356 | } 357 | /* 358 | This function extract the new domains from the html page 359 | */ 360 | function obtainDOM(callback, doc){ 361 | DOM.querySelectorAll({nodeId : doc.root.nodeId, 362 | selector : 'a[href]'}).then(links =>{ 363 | if(Object.keys(links.nodeIds).length === 0){ 364 | linksChecked = 'finished'; 365 | callback(); 366 | } 367 | for (let link of links.nodeIds) { 368 | DOM.resolveNode({nodeId : link 369 | }).then(rObject => { 370 | Runtime.getProperties({ 371 | objectId: rObject.object.objectId 372 | }).then(properties => { 373 | var nDom = properties.result.find(function (obj) 374 | { return obj.name === 'href'; }).value.value 375 | nDom = new URL(nDom); 376 | if (nDom.host == targetUrl.host) { 377 | fs.appendFileSync(newDoms, nDom.href + '\n', 378 | function(err){}); 379 | } 380 | linksChecked++; 381 | if (linksChecked == links.nodeIds.length) { 382 | linksChecked = 'finished'; 383 | db_out("NEW DOMS: collected",1); 384 | callback(); 385 | } 386 | 387 | }).catch(function(error) { 388 | linksChecked++; 389 | if (linksChecked == links.nodeIds.length) { 390 | linksChecked = 'finished'; 391 | db_out("NEW DOMS: collected",1); 392 | callback(); 393 | } 394 | var errorString = "obtainDOM - getProperties: " + error; 395 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 396 | else db_out(errorString,1); 397 | }); 398 | }).catch(function(error) { 399 | var errorString = "obtainDOM - resolveNode: " + error; 400 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 401 | else db_out(errorString,1); 402 | }); 403 | } 404 | }).catch(function(error) { 405 | var errorString = "obtainDOM - querySelectorAll: " + error; 406 | if(logErrorFlag){fs.appendFile(errorLog,errorString, function(err){});} 407 | else db_out(errorString,1); 408 | }); 409 | } 410 | /* 411 | This function extract js files from