├── etc ├── revision ├── version ├── site ├── uwsgi_only ├── version_string ├── limits ├── memcached ├── database ├── elasticsearch ├── deploy.cfg ├── proxy ├── banned_words └── privoxy.config.default ├── torscraper ├── __init__.py ├── spiders │ └── __init__.py ├── pipelines.py └── items.py ├── .env ├── 3rd_party ├── .gitignore └── WhatWeb │ ├── INSTALL │ ├── plugins │ ├── country-ips.dat │ ├── airtiesrouter.rb │ ├── myzone.rb │ ├── samsung-printer.rb │ ├── backbee.rb │ ├── openid.rb │ ├── ip.rb │ ├── lightbox.rb │ ├── dwr.rb │ ├── crazyegg.rb │ ├── typekit.rb │ ├── watson.rb │ ├── clicky.rb │ ├── ecomat-cms.rb │ ├── html5.rb │ ├── dvr-webclient.rb │ ├── mint.rb │ ├── xtra-business-hosting.rb │ ├── intrasrv.rb │ ├── sillysmart.rb │ ├── wolfcms.rb │ ├── wooframework.rb │ ├── strict-transport-security.rb │ ├── bing-searchengine.rb │ ├── ez-oscommerce.rb │ ├── dublin_core.rb │ ├── incapsula-waf.rb │ ├── litespeed.rb │ ├── toshiba-printer.rb │ ├── typepad.rb │ ├── brightcove.rb │ ├── echo.rb │ ├── empirecms.rb │ ├── php-cake.rb │ ├── star-network.rb │ ├── x-vortech-php.rb │ ├── ee.rb │ ├── extjs.rb │ ├── adxstudio-cms.rb │ ├── addthis.rb │ ├── citrix-web-pn-server.rb │ ├── code_igniter_profiler.rb │ ├── hotaru-cms.rb │ ├── mobile-joomla.rb │ ├── proxy-agent.rb │ ├── aladdin-hasp-license-manager.rb │ ├── cruxpa.rb │ ├── hp-printer.rb │ ├── sharethis.rb │ ├── vimeo.rb │ ├── teamviewer.rb │ ├── wordpress-supercache.rb │ ├── symfony.rb │ ├── twistedweb.rb │ ├── utorrent.rb │ ├── deluge-web.rb │ ├── blogsmithmedia.rb │ ├── 4d.rb │ ├── frame.rb │ ├── recaptcha.rb │ ├── alcatel-lucent-omniswitch.rb │ ├── citrix-confproxy.rb │ ├── meta-geo.rb │ ├── nucleus.rb │ ├── web2py.rb │ ├── windweb.rb │ ├── akamai-global-host.rb │ ├── essentia-web-server.rb │ ├── hikvision.rb │ ├── qtranslate.rb │ ├── citrix-netscaler.rb │ ├── embedthis.rb │ ├── youtube.rb │ ├── clicktale.rb │ ├── w3mfc.rb │ ├── x-hacker.rb │ ├── maxmind-geoip.rb │ ├── siemens-router.rb │ ├── ben-ssl.rb │ ├── cdn-cache-server.rb │ ├── haproxy.rb │ ├── mongrel.rb │ ├── tinyproxy.rb │ ├── laserwash.rb │ ├── mapserver-4-windows.rb │ ├── trend-micro.rb │ ├── wordpress-stats.rb │ ├── edito-cms.rb │ ├── webiis.rb │ ├── intrinsyc-deviceweb.rb │ ├── mason.rb │ ├── mcafee-secure.rb │ ├── toshiba-network-camera.rb │ ├── cgi-backdoor.rb │ ├── content-language.rb │ ├── emc-networker.rb │ ├── facebook-plugin.rb │ ├── xavante.rb │ ├── amr-wincontrol.rb │ ├── cougar.rb │ ├── cscssm.rb │ ├── google-talk-chatback.rb │ ├── motorito.rb │ ├── redirect-location.rb │ ├── google-maps.rb │ ├── pantheon.rb │ ├── tinybb.rb │ ├── freenac.rb │ ├── speakker.rb │ ├── wp-slimstat.rb │ ├── commerce-builder.rb │ ├── controlstar-scada.rb │ ├── quickweb.rb │ ├── restlet-framework.rb │ ├── profense-firewall.rb │ ├── rcttools.rb │ ├── adsubtract.rb │ ├── cl-http.rb │ ├── cups.rb │ ├── extplorer.rb │ ├── falcon-web-server.rb │ ├── footprint.rb │ ├── meta-powered-by.rb │ ├── whizzy-cms.rb │ ├── xeneo-web-server.rb │ ├── comanche.rb │ ├── kampyle.rb │ ├── proxy-authenticate.rb │ ├── svn.rb │ ├── werkzeug.rb │ ├── abyss-web-server.rb │ ├── edgeprism.rb │ ├── sagem-router.rb │ ├── abo.cms.rb │ ├── chance-i-divis-dvr.rb │ ├── kandidat-cms.rb │ ├── meta-author.rb │ ├── access-control-allow-methods.rb │ ├── allow.rb │ ├── astaro-end-user-portal.rb │ └── avaya-aura-utility-server.rb │ ├── plugin-development │ ├── wikipedia-top-5.txt │ ├── charset-test-list.txt │ ├── alexa-top-10.txt │ └── wget-list │ ├── plugins-disabled │ ├── length.rb │ ├── html-comments.rb │ ├── md5.rb │ ├── meta-keywords.rb │ ├── meta-contact.rb │ ├── shortcut-icon.rb │ ├── meta-description.rb │ ├── content-type.rb │ ├── google-webmaster-verify.rb │ ├── address.rb │ ├── meta-state.rb │ ├── http-headers.rb │ ├── script-urls.rb │ ├── frame-urls.rb │ └── rss_feed.rb │ └── my-plugins │ └── plugin-tutorial-4.rb ├── web ├── templates │ ├── blank.html │ ├── layout_footer.html │ ├── ruler.macro.html │ ├── error.html │ ├── layout.html │ ├── info.html │ ├── clones_list.html │ ├── email_list.html │ ├── bitcoin_list.html │ ├── port_list.html │ ├── language_list.html │ ├── ssh_list.html │ ├── languages.html │ ├── path_list.html │ ├── index_domains_only.html │ ├── index_fulltext.html │ └── whatweb_list.html ├── static │ ├── robots.txt │ ├── logo.png │ ├── favicon.ico │ ├── apple-icon.png │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── favicon-96x96.png │ ├── ms-icon-70x70.png │ ├── ms-icon-144x144.png │ ├── ms-icon-150x150.png │ ├── ms-icon-310x310.png │ ├── android-icon-36x36.png │ ├── android-icon-48x48.png │ ├── android-icon-72x72.png │ ├── android-icon-96x96.png │ ├── apple-icon-114x114.png │ ├── apple-icon-120x120.png │ ├── apple-icon-144x144.png │ ├── apple-icon-152x152.png │ ├── apple-icon-180x180.png │ ├── apple-icon-57x57.png │ ├── apple-icon-60x60.png │ ├── apple-icon-72x72.png │ ├── apple-icon-76x76.png │ ├── android-icon-144x144.png │ ├── android-icon-192x192.png │ ├── apple-icon-precomposed.png │ ├── test.html │ ├── a9050ec9cc7032ec6ff977dff3190fae.ico.zip │ ├── browserconfig.xml │ └── b.js ├── app.pyc ├── wsgi.pyc ├── app.wsgi ├── wsgi.py ├── docker-entrypoint.sh ├── Dockerfile ├── requirements.txt └── app.ini ├── scripts ├── sleep.sh ├── kill.sh ├── blank.sh ├── process_whatweb.py ├── python.sh ├── reaper.py ├── detect_clones.py ├── ban.sh ├── elasticsearch_migrate.py ├── stats.sh ├── web.sh ├── ban_list.sh ├── reaper.sh ├── unban.sh ├── dont_have.sh ├── build_corpus.sh ├── gen_random.sh ├── portscan_up.sh ├── domains_all_alive.sh ├── fix_page_paths.sh ├── fix_subdomains.sh ├── generate_gexf.sh ├── process_whatweb.sh ├── test_up.sh ├── build_lda_model.sh ├── clean_clone_groups.sh ├── update_clone_fakes.sh ├── update_daily_stats.sh ├── create_flask_secret.sh ├── mark_fake.sh ├── referers.sh ├── scrape.sh ├── useful_404_detection.sh ├── elasticsearch_migrate.sh ├── extract_from_url.sh ├── links2.sh ├── mark_genuine.sh ├── push_if_up.sh ├── add_domains_to_elasticsearch.sh ├── crawl_from.sh ├── test_up_alive.sh ├── resurrect.sh ├── push_list.sh ├── detect_clones.sh ├── test_up-downonly.sh ├── update_daily_stats.py ├── update_schema.sh ├── update_clone_fakes.py ├── clean_clone_groups.py ├── tor_extract_from_url.sh ├── import_tor2web.py ├── harvest_relay.sh ├── docker_haproxy_harvest_scrape.sh ├── push.sh ├── test_random.sh ├── checkurl.sh ├── purify.sh ├── update_fingerprints.sh ├── build_dictionary.py ├── deploy_test.sh ├── domains_all_alive.py ├── update_and_pull_schema.sh ├── ban_list.py ├── tor_rip.sh ├── check_fingerprint.sh ├── stats.py ├── dont_have.py ├── fix_subdomains.py ├── rip.sh ├── gen_random.py ├── n_relationships.sh ├── portscan_up.py ├── stronghold_paste_rip.sh ├── ban.py ├── make_genuine.py ├── unban.py ├── add_ssh_fingerprint.py ├── add_domains_to_elasticsearch.py ├── get_valid.sh ├── create_flask_secret.py ├── fix_page_paths.py ├── start.sh ├── domains_since_and_touch.py ├── mark_genuine_or_fake.py └── pastebin.sh ├── lib ├── tor_db │ ├── constants.py │ ├── db.py │ ├── models │ │ ├── ssh_fingerprint.py │ │ ├── headless_bot.py │ │ ├── open_port.py │ │ ├── email.py │ │ ├── bitcoin_address.py │ │ ├── __init__.py │ │ └── request_log.py │ └── __init__.py ├── tor_db.pyc ├── tor_paths.py ├── email_util.py ├── version.py ├── banned.py ├── tor_text.py ├── interesting_paths.py └── detect_language.py ├── init ├── down_proxy.sh ├── isup-service.sh ├── scraper-service.sh ├── isup-service-alive.sh └── isup-service_alive.sh ├── torscraper.code-workspace ├── db └── Dockerfile ├── .gitignore ├── scrapy.cfg ├── requirements.txt ├── tor-privoxy └── Dockerfile ├── tor-privoxy2 └── Dockerfile ├── tor-privoxy3 └── Dockerfile ├── tor-privoxy4 └── Dockerfile └── AUTHOR /etc/revision: -------------------------------------------------------------------------------- 1 | 209 2 | -------------------------------------------------------------------------------- /etc/version: -------------------------------------------------------------------------------- 1 | 0.2 2 | -------------------------------------------------------------------------------- /torscraper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.env: -------------------------------------------------------------------------------- 1 | PYTHONPATH=lib/ 2 | -------------------------------------------------------------------------------- /3rd_party/.gitignore: -------------------------------------------------------------------------------- 1 | .project -------------------------------------------------------------------------------- /web/templates/blank.html: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /etc/site: -------------------------------------------------------------------------------- 1 | SITE_DOMAIN=zlal32teyptf4tvi.onion -------------------------------------------------------------------------------- /etc/uwsgi_only: -------------------------------------------------------------------------------- 1 | BASEDIR=/opt/torscraper/ 2 | -------------------------------------------------------------------------------- /etc/version_string: -------------------------------------------------------------------------------- 1 | 0.2r209-201802192032 2 | -------------------------------------------------------------------------------- /etc/limits: -------------------------------------------------------------------------------- 1 | RESULT_LIMIT=500 2 | MAX_RESULT_LIMIT=10000 -------------------------------------------------------------------------------- /web/static/robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: / -------------------------------------------------------------------------------- /web/templates/layout_footer.html: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /scripts/sleep.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | while [ 1 ]; do echo dodo; sleep 5; done -------------------------------------------------------------------------------- /lib/tor_db/constants.py: -------------------------------------------------------------------------------- 1 | from datetime import * 2 | NEVER = datetime.fromtimestamp(0) 3 | -------------------------------------------------------------------------------- /scripts/kill.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | kill `ps aux | grep $1 | tr -s ' ' | cut -f 2 -d ' '` 3 | -------------------------------------------------------------------------------- /scripts/blank.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | -------------------------------------------------------------------------------- /web/app.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/app.pyc -------------------------------------------------------------------------------- /web/wsgi.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/wsgi.pyc -------------------------------------------------------------------------------- /etc/memcached: -------------------------------------------------------------------------------- 1 | MEMCACHED_ENABLED=true 2 | MEMCACHED_HOST=127.0.0.1 3 | MEMCACHED_PORT=11211 4 | -------------------------------------------------------------------------------- /lib/tor_db.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/lib/tor_db.pyc -------------------------------------------------------------------------------- /scripts/process_whatweb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import whatweb 3 | whatweb.process_all() 4 | -------------------------------------------------------------------------------- /scripts/python.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python -------------------------------------------------------------------------------- /scripts/reaper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from tor_db import * 3 | 4 | Page.delete_old() 5 | -------------------------------------------------------------------------------- /scripts/detect_clones.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import clone_detection 3 | clone_detection.detect() 4 | -------------------------------------------------------------------------------- /web/static/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/logo.png -------------------------------------------------------------------------------- /etc/database: -------------------------------------------------------------------------------- 1 | DB_HOST=freshonions-torscraper-db 2 | DB_USER=torscraper 3 | DB_PASS=password 4 | DB_BASE=tor 5 | -------------------------------------------------------------------------------- /web/static/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/favicon.ico -------------------------------------------------------------------------------- /web/static/apple-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon.png -------------------------------------------------------------------------------- /scripts/ban.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/ban.py $1 5 | -------------------------------------------------------------------------------- /scripts/elasticsearch_migrate.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import tor_elasticsearch 3 | tor_elasticsearch.migrate() 4 | -------------------------------------------------------------------------------- /scripts/stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/stats.py 5 | -------------------------------------------------------------------------------- /scripts/web.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $BASEDIR/web/app.py 5 | -------------------------------------------------------------------------------- /web/static/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/favicon-16x16.png -------------------------------------------------------------------------------- /web/static/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/favicon-32x32.png -------------------------------------------------------------------------------- /web/static/favicon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/favicon-96x96.png -------------------------------------------------------------------------------- /web/static/ms-icon-70x70.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/ms-icon-70x70.png -------------------------------------------------------------------------------- /scripts/ban_list.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/ban_list.py 5 | -------------------------------------------------------------------------------- /scripts/reaper.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/reaper.py 5 | -------------------------------------------------------------------------------- /scripts/unban.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/unban.py $1 5 | -------------------------------------------------------------------------------- /web/static/ms-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/ms-icon-144x144.png -------------------------------------------------------------------------------- /web/static/ms-icon-150x150.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/ms-icon-150x150.png -------------------------------------------------------------------------------- /web/static/ms-icon-310x310.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/ms-icon-310x310.png -------------------------------------------------------------------------------- /scripts/dont_have.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/dont_have.py $1 5 | -------------------------------------------------------------------------------- /web/static/android-icon-36x36.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/android-icon-36x36.png -------------------------------------------------------------------------------- /web/static/android-icon-48x48.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/android-icon-48x48.png -------------------------------------------------------------------------------- /web/static/android-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/android-icon-72x72.png -------------------------------------------------------------------------------- /web/static/android-icon-96x96.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/android-icon-96x96.png -------------------------------------------------------------------------------- /web/static/apple-icon-114x114.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-114x114.png -------------------------------------------------------------------------------- /web/static/apple-icon-120x120.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-120x120.png -------------------------------------------------------------------------------- /web/static/apple-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-144x144.png -------------------------------------------------------------------------------- /web/static/apple-icon-152x152.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-152x152.png -------------------------------------------------------------------------------- /web/static/apple-icon-180x180.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-180x180.png -------------------------------------------------------------------------------- /web/static/apple-icon-57x57.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-57x57.png -------------------------------------------------------------------------------- /web/static/apple-icon-60x60.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-60x60.png -------------------------------------------------------------------------------- /web/static/apple-icon-72x72.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-72x72.png -------------------------------------------------------------------------------- /web/static/apple-icon-76x76.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-76x76.png -------------------------------------------------------------------------------- /scripts/build_corpus.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/build_corpus.py 5 | -------------------------------------------------------------------------------- /scripts/gen_random.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/gen_random.py $1 5 | -------------------------------------------------------------------------------- /scripts/portscan_up.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/portscan_up.py 5 | -------------------------------------------------------------------------------- /web/static/android-icon-144x144.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/android-icon-144x144.png -------------------------------------------------------------------------------- /web/static/android-icon-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/android-icon-192x192.png -------------------------------------------------------------------------------- /scripts/domains_all_alive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | $SCRIPTDIR/domains_all_alive.py 5 | -------------------------------------------------------------------------------- /scripts/fix_page_paths.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/fix_page_paths.py 5 | -------------------------------------------------------------------------------- /scripts/fix_subdomains.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/fix_subdomains.py 5 | -------------------------------------------------------------------------------- /scripts/generate_gexf.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/generate_gexf.py $1 5 | -------------------------------------------------------------------------------- /scripts/process_whatweb.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/process_whatweb.py 5 | -------------------------------------------------------------------------------- /scripts/test_up.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | scrapy crawl tor -a test=yes 6 | -------------------------------------------------------------------------------- /web/static/apple-icon-precomposed.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/apple-icon-precomposed.png -------------------------------------------------------------------------------- /scripts/build_lda_model.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/build_lda_model.py $1 $2 5 | -------------------------------------------------------------------------------- /scripts/clean_clone_groups.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/clean_clone_groups.py 5 | -------------------------------------------------------------------------------- /scripts/update_clone_fakes.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/update_clone_fakes.py 5 | -------------------------------------------------------------------------------- /scripts/update_daily_stats.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/update_daily_stats.py 5 | -------------------------------------------------------------------------------- /web/templates/ruler.macro.html: -------------------------------------------------------------------------------- 1 | {% macro ruler() %} 2 |
3 | ¯`·.¸¸.·´¯`·.¸¸.·´¯`·.¸¸.·´¯`·><(((º> 4 |
5 | {% endmacro %} -------------------------------------------------------------------------------- /3rd_party/WhatWeb/INSTALL: -------------------------------------------------------------------------------- 1 | INSTALL 2 | 3 | Visit https://github.com/urbanadventurer/WhatWeb/wiki/Installation for installation instructions. 4 | 5 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/country-ips.dat: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/3rd_party/WhatWeb/plugins/country-ips.dat -------------------------------------------------------------------------------- /scripts/create_flask_secret.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/create_flask_secret.py 5 | -------------------------------------------------------------------------------- /scripts/mark_fake.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/mark_genuine_or_fake.py $1 fake 5 | -------------------------------------------------------------------------------- /scripts/referers.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | cat /var/log/nginx/access.log | cut -f 4 -d '"' | grep -E -v '^-' | grep -v "zlal32teyptf4tvi.onion" | sort | uniq 3 | -------------------------------------------------------------------------------- /scripts/scrape.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | ( 5 | cd $BASEDIR 6 | scrapy crawl tor -a test=no 7 | ) 8 | -------------------------------------------------------------------------------- /scripts/useful_404_detection.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/useful_404_detection.py 5 | -------------------------------------------------------------------------------- /web/static/test.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | hello 7 | 8 | 9 | -------------------------------------------------------------------------------- /scripts/elasticsearch_migrate.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | $SCRIPTDIR/elasticsearch_migrate.py 5 | 6 | 7 | -------------------------------------------------------------------------------- /scripts/extract_from_url.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | http_proxy="" https_proxy="" wget --no-check-certificate --tries=1 -T 10 -O - $1 | grep -E -o '[0-9a-zA_Z]+\.onion' -------------------------------------------------------------------------------- /scripts/links2.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | links2 --http-proxy $TOR_PROXY_HOST:$TOR_PROXY_PORT $1 5 | 6 | -------------------------------------------------------------------------------- /scripts/mark_genuine.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/mark_genuine_or_fake.py $1 genuine 5 | -------------------------------------------------------------------------------- /scripts/push_if_up.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | if $DIR/checkurl.sh http://$1/; then 4 | $DIR/push.sh http://$1/ 5 | fi 6 | -------------------------------------------------------------------------------- /etc/elasticsearch: -------------------------------------------------------------------------------- 1 | ELASTICSEARCH_ENABLED=true 2 | ELASTICSEARCH_HOST=freshonions-torscraper-elasticsearch 3 | ELASTICSEARCH_PORT=9200 4 | ELASTICSEARCH_TIMEOUT=60 -------------------------------------------------------------------------------- /scripts/add_domains_to_elasticsearch.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | $SCRIPTDIR/add_domains_to_elasticsearch.py 5 | -------------------------------------------------------------------------------- /scripts/crawl_from.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | scrapy crawl tor -a test=no -a passed_url=$1 6 | -------------------------------------------------------------------------------- /scripts/test_up_alive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | scrapy crawl tor -a test=yes -a alive=yes 6 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugin-development/wikipedia-top-5.txt: -------------------------------------------------------------------------------- 1 | www.google.com 2 | www.britannica.com 3 | www.bartleby.com 4 | www.imdb.com 5 | scienceworld.wolfram.com 6 | -------------------------------------------------------------------------------- /init/down_proxy.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | BASEDIR=$DIR/.. 4 | $BASEDIR/scripts/kill.sh haproxy 5 | $BASEDIR/scripts/kill.sh tor 6 | -------------------------------------------------------------------------------- /lib/tor_paths.py: -------------------------------------------------------------------------------- 1 | import os 2 | BASEDIR = os.environ["BASEDIR"] 3 | ETCDIR = BASEDIR + "/etc" 4 | VARDIR = BASEDIR + "/var" 5 | THIRDPARTY_DIR = BASEDIR + "/3rd_party" -------------------------------------------------------------------------------- /scripts/resurrect.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | scrapy crawl tor -a test=yes -a load_links=resurrect 6 | -------------------------------------------------------------------------------- /etc/deploy.cfg: -------------------------------------------------------------------------------- 1 | FRONTEND_USER=root 2 | FRONTEND_HOST=127.0.0.1 3 | BACKEND_USER=scraper 4 | BACKEND_HOST=127.0.0.1 5 | SERVICE_NAME=freshonions 6 | TOP_DIR=torscraper 7 | -------------------------------------------------------------------------------- /scripts/push_list.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | ( 5 | cd $BASEDIR 6 | scrapy crawl tor -a load_links=$1 -a test=yes 7 | ) 8 | -------------------------------------------------------------------------------- /web/app.wsgi: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '/home/freshonions/torscraper/lib/') 3 | sys.path.insert(0, '/home/freshonions/torscraper/web/') 4 | from app import app 5 | -------------------------------------------------------------------------------- /web/static/a9050ec9cc7032ec6ff977dff3190fae.ico.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/GoSecure/freshonions-torscraper/HEAD/web/static/a9050ec9cc7032ec6ff977dff3190fae.ico.zip -------------------------------------------------------------------------------- /web/wsgi.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.insert(0, '/home/freshonions/torscraper/lib/') 3 | sys.path.insert(0, '/home/freshonions/torscraper/web/') 4 | from app import app 5 | -------------------------------------------------------------------------------- /scripts/detect_clones.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | python $SCRIPTDIR/detect_clones.py 5 | $SCRIPTDIR/clean_clone_groups.sh 6 | -------------------------------------------------------------------------------- /scripts/test_up-downonly.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | scrapy crawl tor -a test=yes -a load_links=downonly 6 | -------------------------------------------------------------------------------- /torscraper.code-workspace: -------------------------------------------------------------------------------- 1 | { 2 | "folders": [ 3 | { 4 | "path": "." 5 | } 6 | ], 7 | "settings": { 8 | "python.pythonPath": "/usr/bin/python2" 9 | } 10 | } -------------------------------------------------------------------------------- /scripts/update_daily_stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | DailyStat.new_day() 8 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/update_schema.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | 5 | mysqldump -d -h $DB_HOST -u $DB_USER --password=$DB_PASS $DB_BASE > $BASEDIR/schema.sql 6 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugin-development/charset-test-list.txt: -------------------------------------------------------------------------------- 1 | www.amazon.co.jp 2 | www.pravda.ru 3 | www.118114.cn 4 | 360.cn 5 | www.cntv.cn 6 | fastpic.ru 7 | http://www.columbia.edu/~fdc/utf8/ 8 | 9 | -------------------------------------------------------------------------------- /scripts/update_clone_fakes.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | 8 | CloneGroup.update_fakes() 9 | sys.exit(0) -------------------------------------------------------------------------------- /lib/tor_db/db.py: -------------------------------------------------------------------------------- 1 | import os 2 | from pony.orm import * 3 | db = Database() 4 | db.bind('mysql', host=os.environ['DB_HOST'], user=os.environ['DB_USER'], passwd=os.environ['DB_PASS'], db=os.environ['DB_BASE']) -------------------------------------------------------------------------------- /scripts/clean_clone_groups.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | 8 | CloneGroup.delete_empty_groups() 9 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/tor_extract_from_url.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | curl --socks5-hostname $SOCKS_PROXY --connect-timeout 30 $1| grep -E -o '[0-9a-zA_Z]+\.onion' 5 | -------------------------------------------------------------------------------- /torscraper/spiders/__init__.py: -------------------------------------------------------------------------------- 1 | # This package will contain the spiders of your Scrapy project 2 | # 3 | # Please refer to the documentation for information on how to create and manage 4 | # your spiders. 5 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugin-development/alexa-top-10.txt: -------------------------------------------------------------------------------- 1 | google.com 2 | youtube.com 3 | facebook.com 4 | baidu.com 5 | yahoo.com 6 | amazon.com 7 | wikipedia.org 8 | qq.com 9 | google.co.in 10 | twitter.com 11 | -------------------------------------------------------------------------------- /db/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM mariadb:10.2 2 | ENV MYSQL_USER=torscraper 3 | ENV MYSQL_PASSWORD=password 4 | ENV MYSQL_DATABASE=tor 5 | ENV MYSQL_ROOT_PASSWORD=somethingcomplicated 6 | COPY schema.sql /docker-entrypoint-initdb.d/ 7 | -------------------------------------------------------------------------------- /scripts/import_tor2web.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import json 3 | import sys 4 | json_data=open(sys.argv[1]).read() 5 | data = json.loads(json_data) 6 | for hs in data["hidden_services"]: 7 | print("%s.onion" % hs["id"]) -------------------------------------------------------------------------------- /init/isup-service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export PATH=$PATH:/bin/:/sbin/:/usr/bin/:/usr/sbin:/usr/local/bin 3 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 4 | . $DIR/../scripts/env.sh 5 | while true 6 | do 7 | $SCRIPTDIR/test_up.sh 8 | done 9 | -------------------------------------------------------------------------------- /init/scraper-service.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export PATH=$PATH:/bin/:/sbin/:/usr/bin/:/usr/sbin:/usr/local/bin 3 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 4 | . $DIR/../scripts/env.sh 5 | while true 6 | do 7 | $SCRIPTDIR/scrape.sh 8 | done 9 | -------------------------------------------------------------------------------- /scripts/harvest_relay.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | LOGFILE=`mktemp` 5 | TEMP1=`mktemp` 6 | scp torlogs@growl:/var/log/tor/info.log $LOGFILE | grep -E -o '[0-7a-zA_Z]+ 7 | cat $LOGFILE | 8 | -------------------------------------------------------------------------------- /init/isup-service-alive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export PATH=$PATH:/bin/:/sbin/:/usr/bin/:/usr/sbin:/usr/local/bin 3 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 4 | . $DIR/../scripts/env.sh 5 | while true 6 | do 7 | $SCRIPTDIR/test_up_alive.sh 8 | done 9 | -------------------------------------------------------------------------------- /init/isup-service_alive.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | export PATH=$PATH:/bin/:/sbin/:/usr/bin/:/usr/sbin:/usr/local/bin 3 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 4 | . $DIR/../scripts/env.sh 5 | while true 6 | do 7 | $SCRIPTDIR/test_up_alive.sh 8 | done 9 | -------------------------------------------------------------------------------- /scripts/docker_haproxy_harvest_scrape.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | service haproxy restart 4 | /opt/torscraper/scripts/harvest.sh 5 | /opt/torscraper/scripts/push_list.sh /opt/torscraper/onions_list/onions.txt 6 | /opt/torscraper/scripts/scrape.sh 7 | -------------------------------------------------------------------------------- /web/templates/error.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block body %} 3 |
4 | {{code}} 5 |
6 | {% if message %} 7 |
8 | {{message}} 9 |
10 | {% endif %} 11 | {% endblock %} -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.tar.gz 2 | *.pyc 3 | /init/data/ 4 | /var/ 5 | /venv/ 6 | etc/private 7 | 8 | # Visual Studio Code 9 | .vscode/* 10 | !.vscode/settings.json 11 | !.vscode/tasks.json 12 | !.vscode/launch.json 13 | !.vscode/extensions.json 14 | -------------------------------------------------------------------------------- /lib/email_util.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | REGEX = re.compile(r'\b[a-zA-Z0-9_.+-]{1,50}@[a-zA-Z0-9-]{1,50}\.[a-zA-Z0-9-.]{1,50}[a-zA-Z0-9]\b') 4 | REGEX_ALL = re.compile('^[a-zA-Z0-9_.+-]{1,50}@[a-zA-Z0-9-]{1,50}\.[a-zA-Z0-9-.]{1,50}[a-zA-Z0-9]$') 5 | -------------------------------------------------------------------------------- /web/templates/layout.html: -------------------------------------------------------------------------------- 1 | {% if not is_cached() %} 2 | {% include "layout_header.html" %} 3 | {% endif %} 4 | 5 | {% block body %}{% endblock %} 6 | 7 | {% if not is_cached() %} 8 | {% include "layout_footer.html" %} 9 | {% endif %} 10 | 11 | 12 | -------------------------------------------------------------------------------- /lib/tor_db/models/ssh_fingerprint.py: -------------------------------------------------------------------------------- 1 | from pony.orm import * 2 | from tor_db.db import db 3 | class SSHFingerprint(db.Entity): 4 | _table_ = "ssh_fingerprint" 5 | fingerprint = Required(str, 450, unique=True) 6 | domains = Set('Domain', reverse="ssh_fingerprint") -------------------------------------------------------------------------------- /scripts/push.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | URL=$1 6 | if echo $1 | grep -q -v -E "^http:"; then 7 | URL=http://$1/ 8 | fi; 9 | echo "Pushing $URL" 10 | scrapy crawl tor -a passed_url=$URL -a test=yes 11 | -------------------------------------------------------------------------------- /scripts/test_random.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | cd $BASEDIR 5 | RAND_LIST=`mktemp` 6 | $SCRIPTDIR/gen_random.sh $1 > $RAND_LIST 7 | scrapy crawl tor -a test=yes -a load_links=$RAND_LIST -a only_success=yes 8 | rm $RAND_LIST 9 | -------------------------------------------------------------------------------- /scripts/checkurl.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | #if curl --socks5 localhost:9050 --silent $1 &>/dev/null; then 3 | echo "Checking $1" 4 | if curl --connect-timeout 20 --socks5-hostname localhost:9050 -I --silent $1 > /dev/null; then 5 | echo "OK $1" 6 | exit 0 7 | fi; 8 | echo "ERR $1" 9 | exit 1 10 | -------------------------------------------------------------------------------- /scripts/purify.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | TEMP1=`mktemp` 5 | TEMP2=`mktemp` 6 | cat $1 | grep -E -o '[0-9a-zA_Z]+\.onion' | sort | uniq | sort -R > $TEMP1 7 | $SCRIPTDIR/dont_have.sh $TEMP1 > $TEMP2 8 | cat $TEMP2 9 | rm $TEMP1 $TEMP2 10 | -------------------------------------------------------------------------------- /scripts/update_fingerprints.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | DOMAINS=`mktemp` 5 | TOUCHFILE="$VARDIR/last_fingerprint_check" 6 | $SCRIPTDIR/domains_since_and_touch.py $TOUCHFILE > $DOMAINS 7 | cat $DOMAINS | xargs -n 1 -P 10 $SCRIPTDIR/check_fingerprint.sh 8 | -------------------------------------------------------------------------------- /lib/tor_db/models/headless_bot.py: -------------------------------------------------------------------------------- 1 | from pony.orm import * 2 | from tor_db.db import db 3 | from datetime import * 4 | class HeadlessBot(db.Entity): 5 | _table_ = "headless_bot" 6 | uuid = PrimaryKey(str, 36) 7 | kind = Optional(str, 128) 8 | created_at = Required(datetime) 9 | 10 | -------------------------------------------------------------------------------- /web/static/browserconfig.xml: -------------------------------------------------------------------------------- 1 | 2 | #ffffff -------------------------------------------------------------------------------- /scrapy.cfg: -------------------------------------------------------------------------------- 1 | # Automatically created by: scrapy startproject 2 | # 3 | # For more information about the [deploy] section see: 4 | # https://scrapyd.readthedocs.org/en/latest/deploy.html 5 | 6 | [settings] 7 | default = torscraper.settings 8 | 9 | [deploy] 10 | #url = http://localhost:6800/ 11 | project = torscraper 12 | -------------------------------------------------------------------------------- /scripts/build_dictionary.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import autocategorize.corpus as corpus 3 | import sys 4 | print "Building dictionary..." 5 | tokenized = corpus.tokenize_documents(corpus.FrontpageDocuments()) 6 | dictionary = corpus.build_dictionary(tokenized) 7 | dictionary.save(corpus.DICTIONARY_PATH) 8 | print "Done!" 9 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/deploy_test.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | . $ETCDIR/deploy.cfg 5 | 6 | $SCRIPTDIR/update_and_pull_schema.sh 7 | 8 | echo "rsyncing to upstream hosts..." 9 | ( 10 | cd $BASEDIR/.. 11 | rsync -a -i --exclude=.git $TOP_DIR/ $BACKEND_USER@$BACKEND_HOST:$TOP_DIR 12 | ) 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /scripts/domains_all_alive.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | @db_session 8 | def get_domains(): 9 | domains = select(d for d in Domain if d.is_up == True) 10 | for domain in domains: 11 | print(domain.host) 12 | 13 | 14 | get_domains() 15 | sys.exit(0) -------------------------------------------------------------------------------- /web/templates/info.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% block body %} 3 |
4 |

Domain Information Search

5 |

6 |

7 | Enter an onion address to see what information is in the database: 8 | 9 |
10 |

11 | {% endblock %} -------------------------------------------------------------------------------- /torscraper/pipelines.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define your item pipelines here 4 | # 5 | # Don't forget to add your pipeline to the ITEM_PIPELINES setting 6 | # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html 7 | 8 | 9 | class TorscraperPipeline(object): 10 | def process_item(self, item, spider): 11 | return item 12 | -------------------------------------------------------------------------------- /lib/tor_db/models/open_port.py: -------------------------------------------------------------------------------- 1 | from pony.orm import * 2 | from tor_db.db import db 3 | class OpenPort(db.Entity): 4 | _table_ = "open_port" 5 | port = Required(int) 6 | domain = Required('Domain') 7 | 8 | @classmethod 9 | @db_session 10 | def count_open(klass, port): 11 | return count(p for p in OpenPort if p.port==port) -------------------------------------------------------------------------------- /torscraper/items.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Define here the models for your scraped items 4 | # 5 | # See documentation in: 6 | # http://doc.scrapy.org/en/latest/topics/items.html 7 | 8 | import scrapy 9 | 10 | 11 | class TorscraperItem(scrapy.Item): 12 | # define the fields for your item here like: 13 | # name = scrapy.Field() 14 | pass 15 | -------------------------------------------------------------------------------- /lib/tor_db/__init__.py: -------------------------------------------------------------------------------- 1 | import urlparse 2 | import re 3 | import os 4 | from pony.orm import * 5 | from datetime import * 6 | import dateutil.parser 7 | import pretty 8 | import banned 9 | from tor_elasticsearch import * 10 | 11 | from tor_db.db import db 12 | from tor_db.constants import * 13 | from tor_db.models import * 14 | 15 | db.generate_mapping(create_tables=True) -------------------------------------------------------------------------------- /scripts/update_and_pull_schema.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | . $ETCDIR/deploy.cfg 5 | 6 | # get version 7 | 8 | echo "Updating schema.sql..." 9 | ssh $FRONTEND_USER@$FRONTEND_HOST "cd $TOP_DIR/scripts/ && ./update_schema.sh" 10 | scp $FRONTEND_USER@$FRONTEND_HOST:$TOP_DIR/schema.sql $BASEDIR/schema.sql 11 | 12 | 13 | 14 | -------------------------------------------------------------------------------- /scripts/ban_list.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | from tabulate import tabulate 7 | 8 | @db_session 9 | def list_banned(): 10 | domains = Domain.banned() 11 | data = map(lambda d: [d.index_url(), d.title], domains) 12 | print(tabulate(data)) 13 | 14 | 15 | list_banned() 16 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/tor_rip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | LIST=`mktemp` 5 | LIST2=`mktemp` 6 | $SCRIPTDIR/tor_extract_from_url.sh $1 > $LIST 7 | $SCRIPTDIR/purify.sh $LIST > $LIST2 8 | NUMBER=`wc -l $LIST2 | tr -s ' ' | cut -f 1 -d ' '` 9 | echo "Harvested $NUMBER onion links..." 10 | $SCRIPTDIR/push_list.sh $LIST2 11 | rm $LIST $LIST2 12 | 13 | -------------------------------------------------------------------------------- /lib/tor_db/models/email.py: -------------------------------------------------------------------------------- 1 | from pony.orm import * 2 | from tor_db.db import db 3 | import tor_db.models.domain 4 | class Email(db.Entity): 5 | address = Required(str, 100, unique=True) 6 | pages = Set('Page', reverse="emails", column="page", table="email_link") 7 | 8 | def domains(self): 9 | return select(d for d in tor_db.models.domain.Domain for p in d.pages for e in p.emails if e == self) -------------------------------------------------------------------------------- /scripts/check_fingerprint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | HOST=$1 5 | TIMEOUT=60 6 | FPRINT=`torify $SCRIPTDIR/ssh_fingerprint.py $HOST 2>/dev/null | grep ssh-rsa | cut -f 2 -d ' '` 7 | if [ -n "$FPRINT" ]; then 8 | echo "Got $FPRINT for $HOST" 9 | $SCRIPTDIR/add_ssh_fingerprint.py "$HOST" "$FPRINT" 10 | else 11 | echo "No fingerprint for $HOST" 12 | fi -------------------------------------------------------------------------------- /scripts/stats.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | from operator import itemgetter 6 | import sys 7 | from tabulate import tabulate 8 | 9 | @db_session 10 | def list_stats(): 11 | data = list(DailyStat.get_stats().iteritems()) 12 | print(tabulate( sorted(data, key=itemgetter(0)) )) 13 | 14 | 15 | list_stats() 16 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/dont_have.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | @db_session 8 | def print_lines(): 9 | lines = [line.strip() for line in open(sys.argv[1])] 10 | for line in lines: 11 | try: 12 | d = Domain.get(host=line) 13 | if not d: 14 | print line 15 | except: 16 | continue 17 | 18 | print_lines() 19 | -------------------------------------------------------------------------------- /scripts/fix_subdomains.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import os 6 | import sys 7 | 8 | @db_session 9 | def fix_subdomains(): 10 | domains = select(d for d in Domain) 11 | for domain in domains: 12 | print(domain.host) 13 | if domain.host.count(".") > 1: 14 | domain.is_subdomain = True 15 | 16 | 17 | fix_subdomains() 18 | sys.exit(0) -------------------------------------------------------------------------------- /web/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 4 | . /opt/torscraper/scripts/env.sh 5 | 6 | echo "Waiting for $DB_HOST to be ready" 7 | while ! mysqladmin ping -h $DB_HOST --silent; do 8 | # Show some progress 9 | echo -n '.'; 10 | sleep 1; 11 | done 12 | echo "$DB_HOST is ready" 13 | # Give it another second. 14 | sleep 1; 15 | 16 | /opt/torscraper/scripts/web.sh 17 | -------------------------------------------------------------------------------- /scripts/rip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | LIST=`mktemp` 5 | LIST2=`mktemp` 6 | $SCRIPTDIR/extract_from_url.sh $1 > $LIST 7 | $SCRIPTDIR/purify.sh $LIST > $LIST2 8 | NUMBER=`wc -l $LIST2 | tr -s ' ' | cut -f 1 -d ' '` 9 | echo "Harvested $NUMBER onion links..." 10 | ( 11 | cd $BASEDIR 12 | scrapy crawl tor -a load_links=$LIST2 -a test=yes 13 | ) 14 | rm $LIST $LIST2 15 | 16 | -------------------------------------------------------------------------------- /scripts/gen_random.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | def get_domains(): 8 | 9 | if len(sys.argv) < 2: 10 | print("Usage %s NUMBER" % sys.argv[0]) 11 | sys.exit(1) 12 | number = int(sys.argv[1]) 13 | domains = Domain.random(number) 14 | for domain in domains: 15 | print(domain) 16 | 17 | 18 | get_domains() 19 | sys.exit(0) -------------------------------------------------------------------------------- /lib/version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tor_paths 3 | VERSION_PATH = tor_paths.ETCDIR + "/version_string" 4 | REVISION_PATH = tor_paths.ETCDIR + "/revision" 5 | 6 | def version(): 7 | with open(VERSION_PATH,'r') as f: 8 | version_string = f.read().strip() 9 | return version_string 10 | 11 | def revision(): 12 | with open(REVISION_PATH,'r') as f: 13 | revision_string = f.read().strip() 14 | return int(revision_string) 15 | 16 | 17 | -------------------------------------------------------------------------------- /web/templates/clones_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {{ ruler() }} 7 | 8 |
9 |

Clones

10 |

Showing possible clones for "{{onion}}" [JSON]

11 |

12 | {{ domain_table(domains)}} 13 | 14 |
15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /web/templates/email_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {{ ruler() }} 7 | 8 |
9 |

Email Address

10 |

Showing domains for address "{{email}}" [JSON]

11 |

12 | {{ domain_table(domains)}} 13 | 14 |
15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /scripts/n_relationships.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | . $ETCDIR/database 5 | SQL="SELECT COUNT(DISTINCT d1.host, d2.host) FROM domain AS d1 LEFT JOIN page AS p1 ON p1.domain=d1.id LEFT JOIN page_link AS pl ON pl.link_from=p1.id LEFT JOIN page AS p2 ON pl.link_to=p2.id LEFT JOIN domain AS d2 ON d2.id=p2.domain WHERE d1.id IS NOT NULL AND d2.id IS NOT NULL" 6 | echo $SQL | mysql -u $DB_USER -h $DB_HOST --password=$DB_PASS $DB_BASE 7 | -------------------------------------------------------------------------------- /web/templates/bitcoin_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {{ ruler() }} 7 | 8 |
9 |

Bitcoin Address

10 |

Showing domains for address "{{addr}}" [JSON]

11 |

12 | {{ domain_table(domains)}} 13 | 14 |
15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /web/templates/port_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {{ ruler() }} 7 | 8 |
9 |

Open Port

10 |

Showing domains which have {{port_list_str}} open [JSON]

11 |

12 | {{ domain_table(domains)}} 13 | 14 |
15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | scrapy 2 | pony==0.7.3 3 | PyMySQL 4 | py-pretty 5 | pycrypto 6 | elasticsearch>=5.0.0,<6.0.0 7 | elasticsearch-dsl>=5.0.0,<6.0.0 8 | python-dateutil 9 | Twisted 10 | txsocksx 11 | numpy 12 | scipy 13 | scikit-learn 14 | tabulate 15 | langdetect 16 | pycountry 17 | networkx 18 | python-memcached 19 | pillow 20 | flask 21 | app 22 | timeout_decorator 23 | pretty 24 | crypto 25 | SHA256 26 | gensim 27 | sklearn 28 | paramiko 29 | PyPyDispatcher>=2.1.0 30 | urllib3==1.21.1 -------------------------------------------------------------------------------- /web/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:2.7-alpine 2 | MAINTAINER Olivier Bilodeau 3 | 4 | RUN apk update && \ 5 | apk add python python-dev libffi-dev gcc make musl-dev py-pip mysql-client openssl-dev 6 | 7 | RUN mkdir -p /opt/torscraper/web 8 | COPY . /opt/torscraper/web 9 | WORKDIR /opt/torscraper/web 10 | 11 | RUN pip install -r requirements.txt 12 | 13 | RUN chmod +x /opt/torscraper/web/docker-entrypoint.sh 14 | 15 | CMD ["/opt/torscraper/web/docker-entrypoint.sh"] 16 | -------------------------------------------------------------------------------- /lib/tor_db/models/bitcoin_address.py: -------------------------------------------------------------------------------- 1 | from pony.orm import * 2 | from tor_db.db import db 3 | import tor_db.models.domain 4 | class BitcoinAddress(db.Entity): 5 | _table_ = "bitcoin_address" 6 | address = Required(str, 100, unique=True) 7 | pages = Set('Page', reverse="bitcoin_addresses", column="page", table="bitcoin_address_link") 8 | 9 | def domains(self): 10 | return select(d for d in tor_db.models.domain.Domain for p in d.pages for b in p.bitcoin_addresses if b == self) -------------------------------------------------------------------------------- /scripts/portscan_up.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import portscanner 6 | import sys 7 | 8 | @db_session 9 | def get_domains(): 10 | hostlist = [] 11 | domains = select(d for d in Domain if d.is_up == True and d.portscanned_at == NEVER) 12 | for domain in domains: 13 | hostlist.append(domain.host) 14 | return list(set(hostlist)) 15 | 16 | hostlist = get_domains() 17 | p=portscanner.PortScanner(hostlist) 18 | sys.exit(0) -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins-disabled/length.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | Plugin.define "Len" do 9 | author "Andrew Horton" 10 | version "0.1" 11 | description "The length of the HTML body" 12 | 13 | 14 | def passive 15 | [{:string=>@body.size}] 16 | end 17 | 18 | end 19 | 20 | -------------------------------------------------------------------------------- /scripts/stronghold_paste_rip.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | LIST=`mktemp` 5 | LIST2=`mktemp` 6 | TEMP=`mktemp` 7 | wget -r --level=1 --no-check-certificate -O $TEMP http://nzxj65x32vh2fkhk.onion/all 8 | cat $TEMP | grep -E -o '[0-9a-zA_Z]+\.onion' > $LIST 9 | $SCRIPTDIR/purify.sh $LIST > $LIST2 10 | NUMBER=`wc -l $LIST2 | tr -s ' ' | cut -f 1 -d ' '` 11 | echo "Harvested $NUMBER onion links..." 12 | $SCRIPTDIR/push_list.sh $LIST2 13 | rm $LIST $LIST2 $TEMP 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /lib/tor_db/models/__init__.py: -------------------------------------------------------------------------------- 1 | from tor_db.models.domain import * 2 | from tor_db.models.bitcoin_address import * 3 | from tor_db.models.email import * 4 | from tor_db.models.page import * 5 | from tor_db.models.ssh_fingerprint import * 6 | from tor_db.models.open_port import * 7 | from tor_db.models.clone_group import * 8 | from tor_db.models.request_log import * 9 | from tor_db.models.search_log import * 10 | from tor_db.models.daily_stat import * 11 | from tor_db.models.headless_bot import * 12 | from tor_db.models.web_component import * -------------------------------------------------------------------------------- /web/templates/language_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {{ ruler() }} 7 | 8 |
9 |

Language

10 |

Showing domains which use {{language}} language [JSON]

11 |

You can find other languages here

12 | {{ domain_table(domains)}} 13 | 14 |
15 | {% endblock %} 16 | -------------------------------------------------------------------------------- /scripts/ban.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | from tabulate import tabulate 7 | 8 | @db_session 9 | def ban(url): 10 | if not url: 11 | print("Usage: %s http://domain.onion/" % sys.argv[0]) 12 | sys.exit(1) 13 | domain = Domain.find_by_url(url) 14 | if not domain: 15 | print("Could not find '%s'" % url) 16 | sys.exit(1) 17 | domain.is_banned = True 18 | domain.ban_exempt = False 19 | print("Banned '%s'" % url) 20 | 21 | ban(sys.argv[1]) 22 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/make_genuine.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | @db_session 8 | def make_genuine(): 9 | lines = [line.strip() for line in open(sys.argv[1])] 10 | for host in lines: 11 | domain = select(d for d in Domain if d.host==host).first() 12 | if not domain: 13 | print("Couldn't find %s" % host) 14 | continue 15 | if domain.is_genuine: 16 | continue 17 | print("Marking %s as genuine" % host) 18 | Domain.make_genuine(host) 19 | 20 | make_genuine() 21 | -------------------------------------------------------------------------------- /scripts/unban.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | from tabulate import tabulate 7 | 8 | @db_session 9 | def unban(url): 10 | if not url: 11 | print("Usage: %s http://domain.onion/" % sys.argv[0]) 12 | sys.exit(1) 13 | domain = Domain.find_by_url(url) 14 | if not domain: 15 | print("Could not find '%s'" % url) 16 | sys.exit(1) 17 | domain.is_banned = False 18 | domain.ban_exempt = True 19 | print("Unbanned '%s'" % url) 20 | 21 | unban(sys.argv[1]) 22 | sys.exit(0) -------------------------------------------------------------------------------- /web/templates/ssh_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {{ ruler() }} 7 | 8 |
9 |

SSH Fingerprint

10 |

Showing domains for fingerprint [JSON]

11 |
12 |
13 | {{fingerprint}} 14 |
15 |
16 |

17 | {{ domain_table(domains)}} 18 | 19 |
20 | {% endblock %} 21 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/airtiesrouter.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | Plugin.define "AirTiesRouter" do 9 | author "Andrew Horton" 10 | version "0.1" 11 | description "AirTies Wireless Router" 12 | website "http://www.airties.com/" 13 | 14 | 15 | matches [ 16 | {:version=>/Airties ([^<]+)</} 17 | ] 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/myzone.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | Plugin.define "MyZone" do 9 | author "Andrew Horton" 10 | version "0.1" 11 | description "Mobile 3G WiFi Rrouter" 12 | website "www.netcomm.com.au" 13 | 14 | 15 | matches [ 16 | {:regexp=>/<title>MyZone<\/title>.*www\.netcomm\.com\.au/m} 17 | ] 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /scripts/add_ssh_fingerprint.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | import sys 6 | 7 | @db_session 8 | def create_fingerprint(): 9 | host = sys.argv[1] 10 | fprint = sys.argv[2] 11 | domain = Domain.get(host=host) 12 | if not domain: 13 | sys.exit(1) 14 | 15 | ssh_fprint = SSHFingerprint.get(fingerprint=fprint) 16 | if not ssh_fprint: 17 | ssh_fprint = SSHFingerprint(fingerprint=fprint) 18 | 19 | domain.ssh_fingerprint = ssh_fprint 20 | return None 21 | 22 | create_fingerprint() 23 | sys.exit(0) -------------------------------------------------------------------------------- /scripts/add_domains_to_elasticsearch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from pony.orm import * 3 | from datetime import * 4 | from tor_db import * 5 | from tor_elasticsearch import * 6 | import sys 7 | 8 | @db_session 9 | def add_domains(): 10 | domains = select(d for d in Domain) 11 | for domain in domains: 12 | dom = DomainDocType.from_obj(domain) 13 | dom.save() 14 | print(domain.host) 15 | 16 | 17 | if is_elasticsearch_enabled(): 18 | print("[+] Elastic search enabled") 19 | else: 20 | print("[!] Elastic search disabled") 21 | sys.exit(1) 22 | 23 | add_domains() 24 | sys.exit(0) -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugin-development/wget-list: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | AGENT="Mozilla/5.0 (X11; U; Linux i686; en-US; rv:1.9.0.12) Gecko/2009070811 Ubuntu/9.04 (jaunty) Firefox/3.0.12" 4 | 5 | if [ -z $1 ]; then 6 | echo "Usage: $0 <file with list of urls>" 7 | echo "downloads each URL's html and headers into the current directory" 8 | exit 9 | fi 10 | 11 | 12 | for i in `cat $1` ; do 13 | u=$i; f=`echo $i | sed 's/http:\/\///g' | sed 's/\//-/g'` 14 | # wget -U "$AGENT" -t 2 -O $f.html $u 15 | # curl -A "$AGENT" -I $u > $f.meta 16 | curl -A "$AGENT" -k -i $u > $f.http 17 | done 18 | 19 | -------------------------------------------------------------------------------- /web/requirements.txt: -------------------------------------------------------------------------------- 1 | # TODO: slim down the requirements 2 | pony 3 | PyMySQL 4 | py-pretty 5 | elasticsearch>=5.0.0,<6.0.0 6 | elasticsearch-dsl>=5.0.0,<6.0.0 7 | python-dateutil 8 | # removed to see if web interface works w/o this 9 | #numpy 10 | #scipy 11 | #scikit-learn 12 | #sklearn 13 | # build fails, is it really required? 14 | #pillow 15 | #gensim 16 | #tabulate 17 | #app 18 | #networkx 19 | #paramiko 20 | crypto 21 | pycrypto 22 | SHA256 23 | langdetect 24 | pycountry 25 | python-memcached 26 | flask 27 | timeout_decorator 28 | pretty 29 | PyPyDispatcher>=2.1.0 30 | urllib3==1.21.1 31 | -------------------------------------------------------------------------------- /web/templates/languages.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% import 'input.macro.html' as input %} 5 | {% block body %} 6 | 7 | {{ ruler() }} 8 | 9 | <div class="contents"> 10 | <h2>Find Onions By Language</h2> 11 | <form> 12 | <p> 13 | Pick a language. The number in (brackets) is the current count of sites in that language. 14 | </p> 15 | <p> 16 | {{input.dropdown('lang', options, selected='')}} 17 | {{input.submit('GO >>>')}} 18 | </p> 19 | </form> 20 | </div> 21 | {% endblock %} 22 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/samsung-printer.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | Plugin.define "Samsung-Printer" do 9 | author "Andrew Horton" 10 | version "0.1" 11 | description "Samsung. SyncThru Web Service - Embedded Web Server" 12 | 13 | 14 | 15 | matches [ 16 | {:text=>'var debugMode = ("$$GSI_TCPIP_IP_ADDR$$".indexOf(".")' } 17 | ] 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/backbee.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "BackBee" do 8 | author "Guillaume Delacour <gui@iroqwa.org>" # 2016-04-22 9 | version "0.1" 10 | description "BackBee is an open source Content Management System (CMS)" 11 | 12 | # Matches # 13 | matches [ 14 | { :certainty=>100, :text=>'<div id="bb5-site-wrapper">' }, 15 | ] 16 | 17 | end 18 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/openid.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | 9 | Plugin.define "OpenID" do 10 | author "Caleb Anderson" 11 | version "0.1" 12 | description "openid detection" 13 | 14 | 15 | 16 | matches [ 17 | {:name=>"openid", 18 | :regexp=>/<link [^>]*rel=['"](openid\.server|openid\.delegate)['"][^>]*>/i 19 | }, 20 | 21 | ] 22 | 23 | 24 | end 25 | 26 | 27 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/ip.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | # Version 0.2 9 | # added - unless @ip.empty? 10 | 11 | Plugin.define "IP" do 12 | author "Andrew Horton" 13 | version "0.2" 14 | description "IP address of the target, if available." 15 | 16 | def passive 17 | m=[] 18 | 19 | m << {:string=>@ip } unless @ip.nil? or @ip.empty? 20 | m 21 | end 22 | 23 | end 24 | 25 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/lightbox.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | 9 | # Version 0.2 10 | # removed :probability & :name 11 | 12 | Plugin.define "Lightbox" do 13 | author "Andrew Horton" 14 | version "0.2" 15 | description "Javascript for nice image popups" 16 | 17 | # identifying strings 18 | 19 | matches [ 20 | {:regexp=>/<script [^>]*(lightbox[^>]*.js)[^>]*/}, 21 | ] 22 | 23 | 24 | end 25 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/dwr.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "dwr" do 8 | author "Aung Khant <http://yehg.net/>" # 2012-02-12 9 | version "0.1" 10 | description "Direct Web Remoting - http://directwebremoting.org" 11 | 12 | 13 | 14 | # Matches # 15 | matches [ 16 | {:name=>'HTML Body',:text=>'/dwr/engine.js\'>'}, 17 | {:name=>'HTML Body',:text=>'/dwr/engine.js">'} 18 | ] 19 | 20 | end 21 | -------------------------------------------------------------------------------- /web/templates/path_list.html: -------------------------------------------------------------------------------- 1 | {% extends "layout.html" %} 2 | {% from 'domain_table.macro.html' import domain_table %} 3 | {% from 'ruler.macro.html' import ruler %} 4 | {% block body %} 5 | 6 | {% macro path_url(domain, path) %} 7 | <a href="{{domain.construct_url(path)}}" class="title_link small">[PATH LINK]</a> 8 | {% endmacro %} 9 | 10 | {{ ruler() }} 11 | 12 | <div class="contents"> 13 | <h2>Path List</h2> 14 | <p>Showing domains for path "{{path}}" <a href="{{url_for('path_list_json', path=path[1:])}}">[JSON]</a></p> 15 | <p></p> 16 | {{ domain_table(domains, extra=path_url, extra_args=path)}} 17 | 18 | </div> 19 | {% endblock %} 20 | -------------------------------------------------------------------------------- /etc/proxy: -------------------------------------------------------------------------------- 1 | export TOR_PROXY_PORT=8118 2 | export http_proxy=http://127.0.0.1:3128 3 | export TOR_PROXY_HOST=freshonions-torscraper-tor-privoxy 4 | export https_proxy=https://127.0.0.1:3128 5 | export SOCKS_PROXY=freshonions-torscraper-tor-privoxy:9050 6 | HIDDEN_SERVICE_PROXY_HOST=freshonions-torscraper-tor-privoxy 7 | HIDDEN_SERVICE_PROXY_PORT=9050 8 | HIDDEN_SERVICE_PROXY_HOST2=freshonions-torscraper-tor-privoxy2 9 | HIDDEN_SERVICE_PROXY_PORT2=9051 10 | HIDDEN_SERVICE_PROXY_HOST3=freshonions-torscraper-tor-privoxy3 11 | HIDDEN_SERVICE_PROXY_PORT3=9052 12 | HIDDEN_SERVICE_PROXY_HOST4=freshonions-torscraper-tor-privoxy4 13 | HIDDEN_SERVICE_PROXY_PORT4=9053 -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins-disabled/html-comments.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "HTML-Comments" do 8 | author "Brendan Coles <bcoles@gmail.com>" # 2010-10-20 9 | version "0.1" 10 | description "This plugin extracts the HTML comments from the HTML source." 11 | 12 | 13 | 14 | # Matches # 15 | matches [ 16 | 17 | # Get HTML comments 18 | { :string=>/<!--([^>]+)-->/ }, 19 | 20 | ] 21 | 22 | end 23 | 24 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/crazyegg.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "CrazyEgg" do 8 | author "Peter van der Laan" 9 | version "0.1" 10 | description "Visualizes where your visitors click" 11 | website "http://www.crazyegg.com/" 12 | 13 | matches [ 14 | 15 | # HTML 16 | { :text=>"cetrk.com" }, 17 | { :account=>/dnn506yrbagrg\.cloudfront\.net\/pages\/scripts\/(\d+\/\d+)/} 18 | 19 | ] 20 | 21 | end 22 | 23 | -------------------------------------------------------------------------------- /etc/banned_words: -------------------------------------------------------------------------------- 1 | pthc 2 | cp 3 | jailbait 4 | pedo 5 | child porn 6 | childporn 7 | kiddy porn 8 | kiddyporn 9 | preteen 10 | pre-teen 11 | pre teen 12 | preeteen 13 | pedobox 14 | pedofap 15 | pedoempire 16 | child 17 | childs 18 | hurtcore 19 | toddler 20 | toddlers 21 | pedophile 22 | paedophile 23 | children 24 | underage 25 | young 26 | boy 27 | boys 28 | jail bait 29 | loli 30 | lolita 31 | молоденькие 32 | детское 33 | pedomom 34 | kids 35 | kid 36 | r@ygold 37 | pt 38 | boylove 39 | boyslove 40 | girllove 41 | girlslove 42 | girlove 43 | preteens 44 | hardcandy 45 | hard candy 46 | hurt2core 47 | pedos 48 | Youngest 49 | youngest 50 | teen 51 | Teen 52 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/typekit.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Typekit" do 8 | author "Brendan Coles <bcoles@gmail.com>" # 2011-04-17 9 | version "0.1" 10 | description "JavaScript font manager" 11 | website "http://typekit.com/" 12 | 13 | 14 | 15 | # Matches # 16 | matches [ 17 | 18 | # JavaScript Source 19 | { :regexp=>/<script [^>]*src=["'][^>]*use\.typekit\.com/i }, 20 | 21 | ] 22 | 23 | end 24 | 25 | 26 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/watson.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | Plugin.define "Watson-Router" do 9 | author "Andrew Horton" 10 | version "0.1" 11 | description "Home Router. Default username/password is admin/admin" 12 | website "http://www.schmid-telecom.com/" 13 | 14 | 15 | matches [ 16 | {:text=>"<TITLE>Watson Management Console" }, 17 | {:text=>""} 18 | ] 19 | 20 | end 21 | 22 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/clicky.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Clicky" do 8 | author "Brendan Coles " # 2012-04-07 9 | version "0.1" 10 | description "Clicky - Real Time Web Analytics" 11 | website "https://www.getclicky.com/" 12 | 13 | 14 | 15 | # Matches # 16 | matches [ 17 | 18 | # JavaScript 19 | { :regexp=>/]+src=["'](https?:)?\/\/static\.getclicky\.com/i }, 20 | 21 | ] 22 | 23 | end 24 | 25 | -------------------------------------------------------------------------------- /scripts/get_valid.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | DIR=$( cd "$(dirname "$0")" ; pwd -P ) 3 | . $DIR/env.sh 4 | . $ETCDIR/database 5 | SQL="UPDATE domain SET is_fake=0, is_genuine=0 WHERE manual_genuine=0;" 6 | LIST=`mktemp` 7 | echo "[+] Wiping current fake / genuine" 8 | echo $SQL | mysql -u $DB_USER -h $DB_HOST --password=$DB_PASS $DB_BASE 9 | echo "[+] Getting superlist" 10 | $SCRIPTDIR/extract_from_url.sh https://www.reddit.com/r/DNMSuperlist/wiki/superlist.json > $LIST 11 | echo "[?} VALID LIST:" 12 | cat $LIST 13 | echo "[+] Marking genuine ... " 14 | $SCRIPTDIR/make_genuine.py $LIST 15 | echo "[+] Updating fakes from clone groups ... " 16 | $SCRIPTDIR/update_clone_fakes.sh 17 | rm $LIST 18 | -------------------------------------------------------------------------------- /scripts/create_flask_secret.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | privateDirectory = os.environ['ETCDIR'] + "/private/" 4 | 5 | if not os.path.exists(privateDirectory): 6 | os.mkdir(privateDirectory) 7 | print("Directory " , privateDirectory , " Created ") 8 | else: 9 | print("Directory " , privateDirectory , " already exists") 10 | 11 | PATH = os.environ['ETCDIR'] + "/private/flask.secret" 12 | secret=os.urandom(32) 13 | file = open(PATH, "w") 14 | while('"' in secret.encode("string-escape") or '`' in secret.encode("string-escape")): 15 | secret=os.urandom(32) 16 | file.write('FLASK_SECRET="%s"\n' % secret.encode("string-escape")) 17 | print("Written flask secret to '%s'" % PATH) 18 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/ecomat-cms.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Ecomat-CMS" do 8 | author "Brendan Coles " # 2011-02-27 9 | version "0.1" 10 | description "Ecomat CMS" 11 | website "http://www.ecomat.ch/" 12 | 13 | 14 | 15 | # Matches # 16 | matches [ 17 | 18 | # Version Detection # Meta Generator 19 | { :version=>// }, 20 | 21 | ] 22 | 23 | end 24 | 25 | 26 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/html5.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | # Version 0.2 8 | # removed :probability 9 | ## 10 | Plugin.define "HTML5" do 11 | author "Andrew Horton" 12 | version "0.2" 13 | description "HTML version 5, detected by the doctype declaration" 14 | 15 | 16 | 17 | 18 | # Matches # 19 | matches [ 20 | 21 | { :regexp=>//i }, 22 | { :string=>"applicationCache", :regexp=>/]* manifest=/ }, 23 | 24 | ] 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/dvr-webclient.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "DVR-WebClient" do 8 | author "Andrew Horton" 9 | version "0.1" 10 | description "DVR camera" 11 | 12 | # Dorks # 13 | dorks [ 14 | 'intitle:"DVR WebClient"' 15 | ] 16 | 17 | 18 | 19 | # Matches # 20 | matches [ 21 | 22 | {:md5=>'8cf9f140f2ec4f5d3e533b5bc2b221ea'}, 23 | 24 | # clsid 25 | {:text=>'259F9FDF-97EA-4C59-B957-5160CAB6884E'} 26 | 27 | ] 28 | 29 | 30 | end 31 | 32 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/mint.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Mint" do 8 | author "Brendan Coles " # 2011-04-15 9 | version "0.1" 10 | description "Mint is an extensible, self-hosted web site analytics program" 11 | website "http://www.haveamint.com/" 12 | 13 | 14 | 15 | # Matches # 16 | matches [ 17 | 18 | # JavaScript Source 19 | { :regexp=>/", re.IGNORECASE | re.DOTALL) 3 | STYLE_TAG_REGEX = re.compile(r"", re.IGNORECASE | re.DOTALL) 4 | COMPRESS_WS_REGEX = re.compile(r"[\r\t ]*\n[\r\t\n ]*", re.IGNORECASE | re.DOTALL) 5 | def break_long_words(text): 6 | return re.sub("([^ <>\\t\\n\\r\\f\\v]{35,70})","\\1 ", text) 7 | 8 | def strip_html(text): 9 | cleaned = re.sub(SCRIPT_TAG_REGEX, '', text) 10 | cleaned = re.sub(STYLE_TAG_REGEX, '', cleaned) 11 | cleaned = re.sub('<[^<]+?>', '', cleaned) 12 | cleaned = re.sub(COMPRESS_WS_REGEX, "\n", cleaned) 13 | return cleaned 14 | 15 | 16 | def utf8_conv(s): 17 | return unicode(s, "utf-8", "replace") -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/bing-searchengine.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | # Version 0.2 9 | # removed :name, :certainty=>100, and changed regexp to text 10 | 11 | Plugin.define "Bing-SearchEngine" do 12 | author "Andrew Horton" 13 | version "0.1" 14 | description "Bing.com is Microsoft's search engine" 15 | 16 | 17 | # 18 | 19 | matches [ 20 | {:text=>'var curUrl="http://www.bing.com/"' }, 21 | {:text=>'" # 2010-10-12 9 | version "0.1" 10 | description "PHP powered shopping cart" 11 | website "http://www.eptcel.com.br/" 12 | 13 | # 1 result for "powered by ez oscommerce" @ 2010-10-12 14 | 15 | 16 | matches [ 17 | 18 | { :text=>'Powered by ez oscommerce' }, 19 | 20 | ] 21 | 22 | end 23 | 24 | -------------------------------------------------------------------------------- /tor-privoxy/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dockage/alpine:3.9-openrc 2 | 3 | MAINTAINER Olivier Bilodeau 4 | # Modified from a Dockerfile by Dockage 5 | # Under the MIT license 6 | # https://github.com/dockage/tor-privoxy 7 | 8 | RUN apk --no-cache --update add tor privoxy \ 9 | && mv /etc/tor/torrc.sample /etc/tor/torrc \ 10 | && echo "forward-socks5 / 0.0.0.0:9050 ." >> /etc/privoxy/config \ 11 | && sed -i 's/listen-address\s*127.0.0.1:8118/listen-address 0.0.0.0:8118/g' /etc/privoxy/config \ 12 | && sed -i 's/#SOCKSPort 192.168.0.1:9100/SOCKSPort 0.0.0.0:9050/g' /etc/tor/torrc \ 13 | && rc-update add tor \ 14 | && rc-update add privoxy 15 | 16 | EXPOSE 9050/tcp 8118/tcp 17 | -------------------------------------------------------------------------------- /tor-privoxy2/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dockage/alpine:3.9-openrc 2 | 3 | MAINTAINER Olivier Bilodeau 4 | # Modified from a Dockerfile by Dockage 5 | # Under the MIT license 6 | # https://github.com/dockage/tor-privoxy 7 | 8 | RUN apk --no-cache --update add tor privoxy \ 9 | && mv /etc/tor/torrc.sample /etc/tor/torrc \ 10 | && echo "forward-socks5 / 0.0.0.0:9051 ." >> /etc/privoxy/config \ 11 | && sed -i 's/listen-address\s*127.0.0.1:8118/listen-address 0.0.0.0:8119/g' /etc/privoxy/config \ 12 | && sed -i 's/#SOCKSPort 192.168.0.1:9100/SOCKSPort 0.0.0.0:9051/g' /etc/tor/torrc \ 13 | && rc-update add tor \ 14 | && rc-update add privoxy 15 | 16 | EXPOSE 9051/tcp 8119/tcp 17 | -------------------------------------------------------------------------------- /tor-privoxy3/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dockage/alpine:3.9-openrc 2 | 3 | MAINTAINER Olivier Bilodeau 4 | # Modified from a Dockerfile by Dockage 5 | # Under the MIT license 6 | # https://github.com/dockage/tor-privoxy 7 | 8 | RUN apk --no-cache --update add tor privoxy \ 9 | && mv /etc/tor/torrc.sample /etc/tor/torrc \ 10 | && echo "forward-socks5 / 0.0.0.0:9052 ." >> /etc/privoxy/config \ 11 | && sed -i 's/listen-address\s*127.0.0.1:8118/listen-address 0.0.0.0:8120/g' /etc/privoxy/config \ 12 | && sed -i 's/#SOCKSPort 192.168.0.1:9100/SOCKSPort 0.0.0.0:9052/g' /etc/tor/torrc \ 13 | && rc-update add tor \ 14 | && rc-update add privoxy 15 | 16 | EXPOSE 9052/tcp 8120/tcp 17 | -------------------------------------------------------------------------------- /tor-privoxy4/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM dockage/alpine:3.9-openrc 2 | 3 | MAINTAINER Olivier Bilodeau 4 | # Modified from a Dockerfile by Dockage 5 | # Under the MIT license 6 | # https://github.com/dockage/tor-privoxy 7 | 8 | RUN apk --no-cache --update add tor privoxy \ 9 | && mv /etc/tor/torrc.sample /etc/tor/torrc \ 10 | && echo "forward-socks5 / 0.0.0.0:9053 ." >> /etc/privoxy/config \ 11 | && sed -i 's/listen-address\s*127.0.0.1:8118/listen-address 0.0.0.0:8121/g' /etc/privoxy/config \ 12 | && sed -i 's/#SOCKSPort 192.168.0.1:9100/SOCKSPort 0.0.0.0:9053/g' /etc/tor/torrc \ 13 | && rc-update add tor \ 14 | && rc-update add privoxy 15 | 16 | EXPOSE 9053/tcp 8121/tcp 17 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/dublin_core.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | 9 | Plugin.define "DublinCore" do 10 | author "Caleb Anderson" 11 | version "0.1" 12 | description "Dublin Core Metadata Initiative (DCMI) supports shared innovation in metadata design and best practices across a broad range of purposes and business models." 13 | website "http://dublincore.org" 14 | 15 | 16 | matches [ 17 | {:name=>"dublin core", :regexp=>/]*name="DC\.title"[^>]*>/i}, 18 | ] 19 | end 20 | 21 | 22 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/incapsula-waf.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Incapsula-WAF" do 8 | author "Aung Khant " # 2012-02-10 9 | version "0.1" 10 | description "Incapsula-WAF - http://www.incapsula.com" 11 | 12 | 13 | 14 | # Matches # 15 | matches [ 16 | {:name => 'Set-cookie Header', :search=>"headers[set-cookie]", :regexp=>/incap_ses_/i}, 17 | {:name => 'Set-cookie Header', :search=>"headers[set-cookie]", :regexp=>/incap_visid_83_/i} 18 | ] 19 | 20 | end 21 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/litespeed.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "LiteSpeed" do 8 | author "Sagar Prakash Junnarkar " # 2012-11-09" 9 | version "1.0" 10 | description "LiteSpeed web server, which is able to read Apache configuration directly and used together with web hosting control panels by replacing Apache" 11 | 12 | def passive 13 | m=[] 14 | m << {:name=>"LiteSpeed server" } if @headers["server"] =~ /LiteSpeed/i 15 | m 16 | end 17 | end 18 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/toshiba-printer.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | # Version 0.2 9 | # removed :certainty=>100 & :name. convert :regexp to :text 10 | 11 | Plugin.define "ToshibaPrinter" do 12 | author "Andrew Horton" 13 | version "0.2" 14 | description "Toshiba printer Top Access" 15 | 16 | 17 | # TopAccess 18 | # Server: TOSHIBA TEC CORPORATION 19 | 20 | matches [ 21 | {:text=>'TopAccess' } 22 | ] 23 | 24 | end 25 | 26 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/typepad.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | # Version 0.2 9 | # removed :certainty=>100 & :name 10 | 11 | Plugin.define "TypePad" do 12 | author "Andrew Horton" 13 | version "0.2" 14 | description "Blogging platform http://www.typepad.com/" 15 | # identifying strings 16 | # 17 | 18 | matches [ 19 | {:text=>'/goku\.brightcove\.com|admin\.brightcove\.com\/js/}, 15 | ] 16 | 17 | end 18 | 19 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/echo.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | # Version 0.2 8 | # removed :name and :certainty=>100 9 | ## 10 | Plugin.define "Echo" do 11 | author "Andrew Horton" 12 | version "0.2" 13 | description "CMS" 14 | website "http://www.helloecho.com/" 15 | 16 | # Dorks # 17 | dorks [ 18 | '"powered by echo"' 19 | ] 20 | 21 | # Matches # 22 | matches [ 23 | {:regexp=>/powered by echo<\/a>/}, 24 | ] 25 | 26 | end 27 | 28 | 29 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/empirecms.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Empire-CMS" do 8 | author "Brendan Coles " # 2010-08-04 9 | version "0.1" 10 | description "Open source CMS" 11 | website "http://www.phome.net/" 12 | 13 | # 371 results for "powered by EmpireCMS" @ 2010-08-04 14 | 15 | # Dorks # 16 | dorks [ 17 | '"powered by EmpireCMS"' 18 | ] 19 | 20 | 21 | 22 | matches [ 23 | 24 | { :text=>' - Powered by EmpireCMS' }, 25 | 26 | ] 27 | 28 | end 29 | 30 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/php-cake.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | 8 | 9 | # Version 0.2 10 | # remove :certainty 11 | 12 | Plugin.define "PHPCake" do 13 | author "Andrew Horton" 14 | version "0.2" 15 | description "PHP MVC web framework" 16 | 17 | # Set-Cookie: CAKEPHP=f3780e7684b29ac421af120d774f1ca9; expires=Tue, 08 Aug 2034 13:47:56 GMT; path=/ 18 | 19 | def passive 20 | m=[] 21 | m << {:name=>"CAKEPHP Cookie" } if @headers["set-cookie"] =~ /CAKEPHP=.*/ 22 | m 23 | end 24 | 25 | end 26 | 27 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/star-network.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Star-Network" do 8 | author "Brendan Coles " # 2010-08-08 9 | version "0.1" 10 | description "hompage: http://www.starltd.net/" 11 | 12 | # 5 results for "Powered by Star Network" @ 2010-08-08 13 | 14 | 15 | matches [ 16 | { :regexp=>/Powered [b|B]+y Star Network[\ and\ Promotion\ LTD|\&\;\ Promotion\ LTD]*<\/a>/ }, 17 | ] 18 | 19 | end 20 | 21 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/x-vortech-php.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "X-Vortech-PHP" do 8 | author "Brendan Coles " # 2010-10-30 9 | version "0.1" 10 | description "X-Vortech-PHP HTTP header." 11 | 12 | # About 440 ShodanHQ results for "X-Vortech-PHP" @ 2010-10-30 13 | 14 | 15 | # HTTP Header 16 | def passive 17 | m=[] 18 | 19 | m << { :version=>@headers["x-vortech-php"].to_s } unless @headers["x-vortech-php"].nil? 20 | 21 | m 22 | 23 | end 24 | 25 | end 26 | 27 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/ee.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "?" do 8 | author "Brendan Coles" 9 | description "In celebration of our 500th plugin - 2010-10-18" 10 | version "1.0" 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | def passive 27 | m=[] 28 | m << { :version=>"When you look into an abyss, the abyss also looks into you."} if @base_uri.to_s =~ /^http:\/\/(www\.)?morningstarsecurity.com\/research\/whatweb/i 29 | m 30 | end 31 | end 32 | 33 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/extjs.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "ExtJS" do 8 | author "Brendan Coles " # 2011-04-15 9 | version "0.1" 10 | description "Ext JS is a cross-browser JavaScript library for building rich internet applications." 11 | website "http://www.sencha.com/products/extjs/" 12 | 13 | 14 | 15 | # Matches # 16 | matches [ 17 | 18 | # JavaScript Source 19 | { :regexp=>/' }, 20 | 21 | # /jape/ellipsis.xml 22 | { :url=>"/jape/ellipsis.xml", :md5=>"9639763b8c7f8caef097be4f3ffe5106" }, 23 | 24 | ] 25 | 26 | end 27 | 28 | -------------------------------------------------------------------------------- /3rd_party/WhatWeb/plugins/avaya-aura-utility-server.rb: -------------------------------------------------------------------------------- 1 | ## 2 | # This file is part of WhatWeb and may be subject to 3 | # redistribution and commercial restrictions. Please see the WhatWeb 4 | # web site for more information on licensing and terms of use. 5 | # http://www.morningstarsecurity.com/research/whatweb 6 | ## 7 | Plugin.define "Avaya-Aura-Utility-Server" do 8 | author "Brendan Coles " # 2012-08-26 9 | version "0.1" 10 | description "Avaya Aura Utility Server" 11 | website "http://www.avaya.com/" 12 | 13 | 14 | 15 | # Matches # 16 | matches [ 17 | 18 | # / # span class="vmsTitle" 19 | { :text=>'Avaya Aura™ Utility Server' }, 20 | 21 | # / # help link 22 | { :text=>'