├── .github └── workflows │ ├── gem.yml │ └── ruby.yml ├── .gitignore ├── .rspec ├── .rubocop.yml ├── Gemfile ├── LICENSE ├── README.md ├── Rakefile ├── bin ├── console └── setup ├── docker-compose.yml ├── docker └── Dockerfile ├── exe └── miteru ├── lefthook.yml ├── lib ├── miteru.rb └── miteru │ ├── cache.rb │ ├── cli │ ├── application.rb │ ├── base.rb │ └── database.rb │ ├── commands │ ├── database.rb │ ├── main.rb │ ├── sidekiq.rb │ └── web.rb │ ├── concerns │ ├── database_connectable.rb │ ├── error_unwrappable.rb │ └── url_truncatable.rb │ ├── config.rb │ ├── crawler.rb │ ├── database.rb │ ├── downloader.rb │ ├── errors.rb │ ├── feeds │ ├── ayashige.rb │ ├── base.rb │ ├── phishing_database.rb │ ├── tweetfeed.rb │ ├── urlscan.rb │ └── urlscan_pro.rb │ ├── helpers.rb │ ├── http.rb │ ├── kit.rb │ ├── mixin.rb │ ├── notifiers │ ├── base.rb │ ├── slack.rb │ └── urlscan.rb │ ├── orchestrator.rb │ ├── record.rb │ ├── service.rb │ ├── sidekiq │ ├── application.rb │ └── jobs.rb │ ├── version.rb │ ├── web │ └── application.rb │ └── website.rb ├── miteru.gemspec ├── renovate.json └── spec ├── cli_spec.rb ├── crawler_spec.rb ├── downloader_spec.rb ├── feeds └── feed_spec.rb ├── fixtures ├── index.html ├── test.tar.gz └── test.zip ├── kit_spec.rb ├── miteru_spec.rb ├── orchestrator_spec.rb ├── spec_helper.rb ├── support └── shared_contexts │ ├── fake_http_server_context.rb │ └── mocked_logger_context.rb └── website_spec.rb /.github/workflows/gem.yml: -------------------------------------------------------------------------------- 1 | name: Release gem 2 | 3 | on: 4 | workflow_dispatch: 5 | inputs: 6 | rubygems-otp-code: 7 | description: RubyGems OTP code 8 | required: true 9 | type: string 10 | 11 | jobs: 12 | release: 13 | runs-on: ubuntu-latest 14 | strategy: 15 | matrix: 16 | ruby-version: 17 | - 3.3 18 | steps: 19 | - name: Checkout 20 | uses: actions/checkout@v4 21 | - name: Set up Ruby 22 | uses: ruby/setup-ruby@v1 23 | with: 24 | ruby-version: ${{ matrix.ruby-version }} 25 | bundler: latest 26 | bundler-cache: true 27 | - name: Configure Git 28 | # Configure Git to push a tag 29 | run: | 30 | git config --global user.name "${GITHUB_ACTOR}" 31 | git config --global user.email "${GITHUB_ACTOR}@users.noreply.github.com" 32 | - name: Release gem 33 | run: bundle exec rake release 34 | env: 35 | GEM_HOST_API_KEY: ${{ secrets.GEM_HOST_API_KEY }} 36 | GEM_HOST_OTP_CODE: ${{ inputs.rubygems-otp-code }} 37 | -------------------------------------------------------------------------------- /.github/workflows/ruby.yml: -------------------------------------------------------------------------------- 1 | name: Ruby CI 2 | 3 | on: [push, pull_request] 4 | 5 | jobs: 6 | build: 7 | runs-on: ubuntu-latest 8 | services: 9 | postgres: 10 | image: postgres:16 11 | env: 12 | POSTGRES_USER: postgres 13 | POSTGRES_PASSWORD: postgres 14 | POSTGRES_DB: test 15 | options: >- 16 | --health-cmd pg_isready 17 | --health-interval 10s 18 | --health-timeout 5s 19 | --health-retries 5 20 | ports: 21 | - 5432:5432 22 | mysql: 23 | image: mysql:8.0 24 | env: 25 | MYSQL_USER: mysql 26 | MYSQL_PASSWORD: mysql 27 | MYSQL_DATABASE: test 28 | MYSQL_ROOT_PASSWORD: rootpassword 29 | ports: 30 | - 3306:3306 31 | options: >- 32 | --health-cmd="mysqladmin ping" 33 | --health-interval=10s 34 | --health-timeout=5s 35 | --health-retries=3 36 | strategy: 37 | matrix: 38 | ruby: [3.2, 3.3] 39 | steps: 40 | - uses: actions/checkout@v4 41 | - name: Set up Ruby 42 | uses: ruby/setup-ruby@v1 43 | with: 44 | ruby-version: ${{ matrix.ruby }} 45 | bundler: latest 46 | bundler-cache: true 47 | - name: Install dependencies 48 | run: sudo apt-get -yqq install libpq-dev libmysqlclient-dev 49 | - name: Test with PostgreSQL 50 | env: 51 | MITERU_DATABASE: postgresql://postgres:postgres@localhost:5432/test 52 | run: bundle exec rake 53 | - name: Test with MySQL 54 | env: 55 | MITERU_DATABASE: mysql2://mysql:mysql@127.0.0.1:3306/test 56 | run: bundle exec rake 57 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *.gem 2 | *.rbc 3 | /.config 4 | /coverage/ 5 | /InstalledFiles 6 | /pkg/ 7 | /spec/reports/ 8 | /spec/examples.txt 9 | /test/tmp/ 10 | /test/version_tmp/ 11 | /tmp/ 12 | 13 | # Used by dotenv library to load environment variables. 14 | # .env 15 | 16 | ## Specific to RubyMotion: 17 | .dat* 18 | .repl_history 19 | build/ 20 | *.bridgesupport 21 | build-iPhoneOS/ 22 | build-iPhoneSimulator/ 23 | 24 | ## Specific to RubyMotion (use of CocoaPods): 25 | # 26 | # We recommend against adding the Pods directory to your .gitignore. However 27 | # you should judge for yourself, the pros and cons are mentioned at: 28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control 29 | # 30 | # vendor/Pods/ 31 | 32 | ## Documentation cache and generated files: 33 | /.yardoc/ 34 | /_yardoc/ 35 | /doc/ 36 | /rdoc/ 37 | 38 | ## Environment normalization: 39 | /.bundle/ 40 | /vendor/bundle 41 | /lib/bundler/man/ 42 | 43 | # for a library or gem, you might want to ignore these files since the code is 44 | # intended to run in multiple environments; otherwise, check them in: 45 | Gemfile.lock 46 | .ruby-version 47 | .ruby-gemset 48 | 49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this: 50 | .rvmrc 51 | 52 | ## RSpec 53 | .rspec_status 54 | 55 | # SQLite 56 | *.db 57 | *.db-shm 58 | *.db-wal 59 | 60 | # .env 61 | .env 62 | -------------------------------------------------------------------------------- /.rspec: -------------------------------------------------------------------------------- 1 | --format Fuubar 2 | --color 3 | --require spec_helper 4 | -------------------------------------------------------------------------------- /.rubocop.yml: -------------------------------------------------------------------------------- 1 | Style/StringLiterals: 2 | EnforcedStyle: double_quotes 3 | -------------------------------------------------------------------------------- /Gemfile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | source "https://rubygems.org" 4 | 5 | git_source(:github) { |repo_name| "https://github.com/#{repo_name}" } 6 | 7 | # Specify your gem's dependencies in miteru.gemspec 8 | gemspec 9 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Manabu Niseki 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Miteru 2 | 3 | [![Gem Version](https://badge.fury.io/rb/miteru.svg)](https://badge.fury.io/rb/miteru) 4 | [![Ruby CI](https://github.com/ninoseki/miteru/actions/workflows/ruby.yml/badge.svg)](https://github.com/ninoseki/miteru/actions/workflows/ruby.yml) 5 | [![CodeFactor](https://www.codefactor.io/repository/github/ninoseki/miteru/badge)](https://www.codefactor.io/repository/github/ninoseki/miteru) 6 | [![Coverage Status](https://coveralls.io/repos/github/ninoseki/miteru/badge.svg?branch=master)](https://coveralls.io/github/ninoseki/miteru?branch=master) 7 | 8 | A phishing kit collector for scavengers. 9 | 10 | ## Disclaimer 11 | 12 | This tool is for research purposes only. The use of this tool is your responsibility. 13 | I take no responsibility and/or liability for how you choose to use this tool. 14 | 15 | ## How It Works 16 | 17 | - Collect phishy URLs from the following feeds: 18 | - urlscan.io's automatic submissions. (`task.method:automatic AND NOT task.source:urlscan-observe`) 19 | - urlscan.io phish feed. (available for Pro users) 20 | - [mitchellkrogza/Phishing.Database](https://github.com/mitchellkrogza/Phishing.Database)'s `phishing-links-ACTIVE-NOW.txt`. 21 | - [ninoseki/ayashige](https://github.com/ninoseki/ayashige) feed. 22 | - Check each phishy URL whether it enables directory listing and contains phishing kits (compressed files) or not. 23 | - Note: Supported compressed files are: `*.zip`, `*.rar`, `*.7z`, `*.tar` and `*.gz`. 24 | 25 | ## Docs 26 | 27 | - [Requirements & Installation](https://github.com/ninoseki/miteru/wiki/Requirements-&-Installation) 28 | - [Usage](https://github.com/ninoseki/miteru/wiki/Usage) 29 | - [Configuration](https://github.com/ninoseki/miteru/wiki/Configuration) 30 | - [Alternatives](https://github.com/ninoseki/miteru/wiki/Alternatives) 31 | -------------------------------------------------------------------------------- /Rakefile: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/gem_tasks" 4 | require "rspec/core/rake_task" 5 | 6 | RSpec::Core::RakeTask.new(:spec) 7 | 8 | task default: :spec 9 | -------------------------------------------------------------------------------- /bin/console: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | require "bundler/setup" 5 | require "miteru" 6 | 7 | # You can add fixtures and/or initialization code here to make experimenting 8 | # with your gem easier. You can also use a different console, if you like. 9 | 10 | # (If you use this, don't forget to add pry to your Gemfile!) 11 | # require "pry" 12 | # Pry.start 13 | 14 | require "irb" 15 | IRB.start(__FILE__) 16 | -------------------------------------------------------------------------------- /bin/setup: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -euo pipefail 3 | IFS=$'\n\t' 4 | set -vx 5 | 6 | bundle install 7 | 8 | # Do any other automated setup that you need to do here 9 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | redis: 3 | image: "redis/redis-stack:6.2.6-v10" 4 | restart: always 5 | ports: 6 | - ${REDIS_PORT:-6379}:6379 7 | - ${REDIS_INSIGHT_PORT:-8001}:8001 8 | volumes: 9 | - redis:/data 10 | volumes: 11 | redis: 12 | -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ruby:3-alpine3.13 2 | 3 | RUN apk --no-cache add git build-base ruby-dev mysql-client mysql-dev sqlite-dev postgresql-client postgresql-dev \ 4 | && cd /tmp/ \ 5 | && git clone https://github.com/ninoseki/miteru.git \ 6 | && cd miteru \ 7 | && gem build miteru.gemspec -o miteru.gem \ 8 | && gem install miteru.gem \ 9 | && gem install mysql2 \ 10 | && gem install pg \ 11 | && rm -rf /tmp/miteru \ 12 | && apk del --purge git build-base ruby-dev 13 | 14 | ENTRYPOINT ["miteru"] 15 | 16 | CMD ["--help"] 17 | -------------------------------------------------------------------------------- /exe/miteru: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | # frozen_string_literal: true 3 | 4 | $LOAD_PATH.unshift("#{__dir__}/../lib") 5 | 6 | require "miteru" 7 | 8 | ARGV.unshift(Miteru::CLI::App.default_task) unless Miteru::CLI::App.all_tasks.key?(ARGV[0]) 9 | Miteru::CLI::App.start(ARGV) 10 | -------------------------------------------------------------------------------- /lefthook.yml: -------------------------------------------------------------------------------- 1 | pre-commit: 2 | commands: 3 | standard: 4 | glob: "*.rb" 5 | run: bundle exec standardrb --fix {staged_files} 6 | stage_fixed: true 7 | actionlint: 8 | root: ".github/workflows" 9 | glob: "*.{yaml,yml}" 10 | run: actionlint 11 | -------------------------------------------------------------------------------- /lib/miteru.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Core standard libraries 4 | require "cgi" 5 | require "json" 6 | require "uri" 7 | require "uuidtools" 8 | 9 | # Core 3rd party libraries 10 | require "colorize" 11 | require "memo_wise" 12 | require "semantic_logger" 13 | require "sentry-ruby" 14 | 15 | require "dry/files" 16 | require "dry/monads" 17 | 18 | # Load .env 19 | require "dotenv/load" 20 | 21 | # Active Support & Active Record 22 | require "active_support" 23 | require "active_record" 24 | 25 | # Version 26 | require "miteru/version" 27 | # Errors 28 | require "miteru/errors" 29 | 30 | # Concerns 31 | require "miteru/concerns/database_connectable" 32 | require "miteru/concerns/error_unwrappable" 33 | require "miteru/concerns/url_truncatable" 34 | 35 | # Helpers 36 | require "miteru/helpers" 37 | 38 | # Core classes 39 | require "miteru/service" 40 | 41 | require "miteru/cache" 42 | require "miteru/config" 43 | require "miteru/http" 44 | 45 | # Database + ActiveRecord 46 | require "miteru/database" 47 | require "miteru/record" 48 | 49 | module Miteru 50 | class << self 51 | prepend MemoWise 52 | 53 | # 54 | # @return [SematicLogger] 55 | # 56 | def logger 57 | SemanticLogger.default_level = :info 58 | SemanticLogger.add_appender(io: $stderr, formatter: :color) 59 | SemanticLogger["Miteru"] 60 | end 61 | memo_wise :logger 62 | 63 | # 64 | # @return [Array] 65 | # 66 | def feeds 67 | [] 68 | end 69 | memo_wise :feeds 70 | 71 | # 72 | # @return [Array] 73 | # 74 | def notifiers 75 | [] 76 | end 77 | memo_wise :notifiers 78 | 79 | # 80 | # @return [Miteru::Config] 81 | # 82 | def config 83 | @config ||= Config.new 84 | end 85 | 86 | # 87 | # @return [String] 88 | # 89 | def env 90 | ENV["APP_ENV"] || ENV["RACK_ENV"] 91 | end 92 | 93 | # 94 | # @return [Boolean] 95 | # 96 | def development? 97 | env == "development" 98 | end 99 | 100 | def cache? 101 | !Miteru.config.cache_redis_url.nil? 102 | end 103 | 104 | def cache 105 | @cache ||= Cache.new(Miteru.config.cache_redis_url) 106 | end 107 | 108 | def sentry? 109 | !Miteru.config.sentry_dsn.nil? 110 | end 111 | 112 | def initialize_sentry 113 | return if Sentry.initialized? 114 | 115 | Sentry.init do |config| 116 | config.dsn = Miteru.config.sentry_dsn 117 | config.traces_sample_rate = Miteru.config.sentry_trace_sample_rate 118 | config.breadcrumbs_logger = %i[sentry_logger http_logger] 119 | end 120 | end 121 | end 122 | end 123 | 124 | # Services 125 | require "miteru/crawler" 126 | require "miteru/downloader" 127 | require "miteru/kit" 128 | require "miteru/orchestrator" 129 | require "miteru/website" 130 | 131 | # Notifiers 132 | require "miteru/notifiers/base" 133 | require "miteru/notifiers/slack" 134 | require "miteru/notifiers/urlscan" 135 | 136 | # Feeds 137 | require "miteru/feeds/base" 138 | 139 | require "miteru/feeds/ayashige" 140 | require "miteru/feeds/phishing_database" 141 | require "miteru/feeds/tweetfeed" 142 | require "miteru/feeds/urlscan_pro" 143 | require "miteru/feeds/urlscan" 144 | 145 | # CLI 146 | require "miteru/cli/application" 147 | 148 | # Sidekiq 149 | require "sidekiq" 150 | 151 | require "miteru/sidekiq/application" 152 | require "miteru/sidekiq/jobs" 153 | 154 | Miteru.initialize_sentry if Miteru.sentry? 155 | -------------------------------------------------------------------------------- /lib/miteru/cache.rb: -------------------------------------------------------------------------------- 1 | require "redis" 2 | 3 | module Miteru 4 | class Cache < Service 5 | # @return [String] 6 | attr_reader :url 7 | 8 | # 9 | # @param [String] url 10 | # 11 | def initialize(url) 12 | super() 13 | @url = url 14 | end 15 | 16 | # 17 | # @param [String] key 18 | # @param [String] value 19 | # @param [Integer. nil] ex 20 | # 21 | def set(key, value, ex:) 22 | value = redis.set("#{cache_prefix}:#{key}", value, ex:) 23 | logger.info("Cache:#{key} is set.") if verbose? 24 | value 25 | end 26 | 27 | # 28 | # @param [String] key 29 | # 30 | def cached?(key) 31 | value = redis.exists?("#{cache_prefix}:#{key}") 32 | logger.info("Cache:#{key} found.") if verbose? 33 | value 34 | end 35 | 36 | private 37 | 38 | # 39 | # @return [Redis] 40 | # 41 | def redis 42 | @redis ||= Redis.new(url:) 43 | end 44 | end 45 | end 46 | -------------------------------------------------------------------------------- /lib/miteru/cli/application.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "thor" 4 | require "thor/hollaback" 5 | 6 | require "miteru/cli/base" 7 | require "miteru/cli/database" 8 | 9 | require "miteru/commands/main" 10 | require "miteru/commands/sidekiq" 11 | require "miteru/commands/web" 12 | 13 | module Miteru 14 | module CLI 15 | # 16 | # Main CLI class 17 | # 18 | class App < Base 19 | include Commands::Main 20 | include Commands::Sidekiq 21 | include Commands::Web 22 | 23 | desc "db", "Sub commands for DB" 24 | subcommand "db", CLI::Database 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /lib/miteru/cli/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module CLI 5 | # 6 | # Base class for Thor classes 7 | # 8 | class Base < Thor 9 | class << self 10 | def exit_on_failure? 11 | true 12 | end 13 | end 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/miteru/cli/database.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "miteru/commands/database" 4 | 5 | module Miteru 6 | module CLI 7 | class Database < Base 8 | include Commands::Database 9 | end 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /lib/miteru/commands/database.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Commands 5 | module Database 6 | class << self 7 | def included(thor) 8 | thor.class_eval do 9 | include Concerns::DatabaseConnectable 10 | 11 | desc "migrate", "Migrate DB schemas" 12 | around :with_db_connection 13 | method_option :verbose, type: :boolean, default: true 14 | def migrate(direction = "up") 15 | ActiveRecord::Migration.verbose = options["verbose"] 16 | Miteru::Database.migrate direction.to_sym 17 | end 18 | end 19 | end 20 | end 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /lib/miteru/commands/main.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Commands 5 | module Main 6 | class << self 7 | def included(thor) 8 | thor.class_eval do 9 | include Concerns::DatabaseConnectable 10 | 11 | method_option :auto_download, type: :boolean, default: false, 12 | desc: "Enable or disable auto-downloading of phishing kits" 13 | method_option :directory_traveling, type: :boolean, default: false, 14 | desc: "Enable or disable directory traveling" 15 | method_option :download_to, type: :string, default: "/tmp", desc: "Directory to download phishing kits" 16 | method_option :verbose, type: :boolean, default: true 17 | desc "execute", "Execute the crawler" 18 | around :with_db_connection 19 | def execute 20 | Miteru.config.tap do |config| 21 | config.auto_download = options["auto_download"] 22 | config.directory_traveling = options["directory_traveling"] 23 | config.download_to = options["download_to"] 24 | config.verbose = options["verbose"] 25 | end 26 | Orchestrator.call 27 | end 28 | default_command :execute 29 | end 30 | end 31 | end 32 | end 33 | end 34 | end 35 | -------------------------------------------------------------------------------- /lib/miteru/commands/sidekiq.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Commands 5 | # 6 | # Sidekiq sub-commands 7 | # 8 | module Sidekiq 9 | class << self 10 | def included(thor) 11 | thor.class_eval do 12 | desc "sidekiq", "Start Sidekiq" 13 | method_option :env, type: :string, default: "production", desc: "Environment", aliases: "-e" 14 | method_option :concurrency, type: :numeric, default: 5, desc: "Sidekiq concurrency", aliases: "-c" 15 | def sidekiq 16 | require "sidekiq/cli" 17 | 18 | ENV["APP_ENV"] ||= options["env"] 19 | concurrency = options["concurrency"].to_s 20 | 21 | cli = ::Sidekiq::CLI.instance 22 | cli.parse [ 23 | "-r", 24 | File.expand_path(File.join(__dir__, "..", "sidekiq", "application.rb")), 25 | "-c", 26 | concurrency 27 | ] 28 | cli.run 29 | end 30 | end 31 | end 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /lib/miteru/commands/web.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Commands 5 | # 6 | # Web sub-commands 7 | # 8 | module Web 9 | class << self 10 | def included(thor) 11 | thor.class_eval do 12 | desc "web", "Start the web app" 13 | method_option :port, type: :numeric, default: 9292, desc: "Port to listen on" 14 | method_option :host, type: :string, default: "localhost", desc: "Hostname to listen on" 15 | method_option :threads, type: :string, default: "0:3", desc: "min:max threads to use" 16 | method_option :verbose, type: :boolean, default: false, desc: "Don't report each request" 17 | method_option :worker_timeout, type: :numeric, default: 60, desc: "Worker timeout value (in seconds)" 18 | method_option :env, type: :string, default: "production", desc: "Environment" 19 | def web 20 | require "miteru/web/application" 21 | 22 | ENV["APP_ENV"] ||= options["env"] 23 | 24 | Miteru::Web::App.run!( 25 | port: options["port"], 26 | host: options["host"], 27 | threads: options["threads"], 28 | verbose: options["verbose"], 29 | worker_timeout: options["worker_timeout"] 30 | ) 31 | end 32 | end 33 | end 34 | end 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/miteru/concerns/database_connectable.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Concerns 5 | # 6 | # Database connectable concern 7 | # 8 | module DatabaseConnectable 9 | extend ActiveSupport::Concern 10 | 11 | def with_db_connection(&block) 12 | Database.with_db_connection(&block) 13 | end 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/miteru/concerns/error_unwrappable.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Concerns 5 | # 6 | # Error unwrappable concern 7 | # 8 | module ErrorUnwrappable 9 | extend ActiveSupport::Concern 10 | 11 | def unwrap_error(err) 12 | return err unless err.is_a?(Dry::Monads::UnwrapError) 13 | 14 | # NOTE: UnwrapError's receiver can be either of: 15 | # - Dry::Monads::Try::Error 16 | # - Dry::Monads::Result::Failure 17 | receiver = err.receiver 18 | case receiver 19 | when Dry::Monads::Try::Error 20 | # Error may be wrapped like Matryoshka 21 | unwrap_error receiver.exception 22 | when Dry::Monads::Failure 23 | unwrap_error receiver.failure 24 | else 25 | err 26 | end 27 | end 28 | end 29 | end 30 | end 31 | -------------------------------------------------------------------------------- /lib/miteru/concerns/url_truncatable.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Concerns 5 | module UrlTruncatable 6 | extend ActiveSupport::Concern 7 | 8 | def decoded_url 9 | @decoded_url ||= URI.decode_www_form_component(url) 10 | end 11 | 12 | # 13 | # @return [String] 14 | # 15 | def truncated_url 16 | @truncated_url ||= decoded_url.truncate(64) 17 | end 18 | 19 | def defanged_truncated_url 20 | @defanged_truncated_url ||= truncated_url.to_s.gsub(".", "[.]") 21 | end 22 | end 23 | end 24 | end 25 | -------------------------------------------------------------------------------- /lib/miteru/config.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "anyway_config" 4 | 5 | module Miteru 6 | class Config < Anyway::Config 7 | config_name :miteru 8 | env_prefix "" 9 | 10 | attr_config( 11 | auto_download: false, 12 | database_url: URI("sqlite3:miteru.db"), 13 | directory_traveling: false, 14 | download_to: "/tmp", 15 | file_max_size: 1024 * 1024 * 100, 16 | file_extensions: [".zip", ".rar", ".7z", ".tar", ".gz"], 17 | file_mime_types: [ 18 | "application/zip", 19 | "application/vnd.rar", 20 | "application/x-7z-compressed", 21 | "application/x-tar", 22 | "application/gzip" 23 | ], 24 | api_timeout: 60, 25 | http_timeout: 60, 26 | download_timeout: 60, 27 | sentry_dsn: nil, 28 | sentry_trace_sample_rate: 0.25, 29 | sidekiq_redis_url: "redis://localhost:6379", 30 | sidekiq_job_retry: 0, 31 | sidekiq_batch_size: 50, 32 | sidekiq_job_timeout: 600, 33 | cache_redis_url: nil, 34 | cache_ex: nil, 35 | cache_prefix: "miteru:cache", 36 | slack_channel: "#general", 37 | slack_webhook_url: nil, 38 | urlscan_api_key: nil, 39 | urlscan_submit_visibility: "public", 40 | urlscan_date_condition: "date:>now-1h", 41 | urlscan_base_condition: "task.method:automatic AND NOT task.source:urlscan-observe", 42 | verbose: false 43 | ) 44 | 45 | # @!attribute [r] sentry_dsn 46 | # @return [String, nil] 47 | 48 | # @!attribute [r] sentry_trace_sample_rate 49 | # @return [Float] 50 | 51 | # @!attribute [r] sidekiq_redis_url 52 | # @return [String] 53 | 54 | # @!attribute [r] sidekiq_job_retry 55 | # @return [Integer] 56 | 57 | # @!attribute [r] sidekiq_batch_size 58 | # @return [Integer] 59 | 60 | # @!attribute [r] sidekiq_job_timeout 61 | # @return [Integer] 62 | 63 | # @!attribute [r] cache_redis_url 64 | # @return [String, nil] 65 | 66 | # @!attribute [r] cache_ex 67 | # @return [Integer, nil] 68 | 69 | # @!attribute [r] cache_prefix 70 | # @return [String] 71 | 72 | # @!attribute [r] http_timeout 73 | # @return [Integer] 74 | 75 | # @!attribute [r] api_timeout 76 | # @return [Integer] 77 | 78 | # @!attribute [r] download_timeout 79 | # @return [Integer] 80 | 81 | # @!attribute [rw] auto_download 82 | # @return [Boolean] 83 | 84 | # @!attribute [rw] directory_traveling 85 | # @return [Boolean] 86 | 87 | # @!attribute [rw] download_to 88 | # @return [String] 89 | 90 | # @!attribute [r] cache_redis_url 91 | # @return [String, nil] 92 | 93 | # @!attribute [r] cache_ex 94 | # @return [Integer, nil] 95 | 96 | # @!attribute [r] cache_prefix 97 | # @return [String] 98 | 99 | # @!attribute [r] http_timeout 100 | # @return [Integer] 101 | 102 | # @!attribute [r] api_timeout 103 | # @return [Integer] 104 | 105 | # @!attribute [r] download_timeout 106 | # @return [Integer] 107 | 108 | # @!attribute [rw] auto_download 109 | # @return [Boolean] 110 | 111 | # @!attribute [rw] directory_traveling 112 | # @return [Boolean] 113 | 114 | # @!attribute [rw] download_to 115 | # @return [String] 116 | 117 | # @!attribute [rw] threads 118 | # @return [Integer] 119 | 120 | # @!attribute [rw] verbose 121 | # @return [Boolean] 122 | 123 | # @!attribute [r] database_url 124 | # @return [URI] 125 | 126 | # @!attribute [r] file_max_size 127 | # @return [Integer] 128 | 129 | # @!attribute [r] file_extensions 130 | # @return [Array] 131 | 132 | # @!attribute [r] file_mime_types 133 | # @return [Array] 134 | 135 | # @!attribute [r] slack_webhook_url 136 | # @return [String, nil] 137 | 138 | # @!attribute [r] slack_channel 139 | # @return [String] 140 | 141 | # @!attribute [r] urlscan_api_key 142 | # @return [String, nil] 143 | 144 | # @!attribute [r] urlscan_submit_visibility 145 | # @return [String] 146 | 147 | # @!attribute [r] urlscan_date_condition 148 | # @return [String] 149 | 150 | # @!attribute [r] urlscan_base_condition 151 | # @return [String] 152 | 153 | def database_url=(val) 154 | super(URI(val.to_s)) 155 | end 156 | end 157 | end 158 | -------------------------------------------------------------------------------- /lib/miteru/crawler.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "colorize" 4 | 5 | module Miteru 6 | class Crawler < Service 7 | # 8 | # @param [Miteru::Website] website 9 | # 10 | def call(website) 11 | Try[OpenSSL::SSL::SSLError, ::HTTP::Error, Addressable::URI::InvalidURIError] do 12 | info = "Website:#{website.info}." 13 | info = info.colorize(:red) if website.kits? 14 | logger.info(info) 15 | 16 | website.kits.each do |kit| 17 | downloader = Downloader.new(kit) 18 | result = downloader.result 19 | unless result.success? 20 | logger.warn("Kit:#{kit.truncated_url} failed to download - #{result.failure}.") 21 | next 22 | end 23 | destination = result.value! 24 | logger.info("Kit:#{kit.truncated_url} downloaded as #{destination}.") 25 | # Remove downloaded file if auto_download is not allowed 26 | FileUtils.rm(destination, force: true) unless auto_download? 27 | # Notify the kit 28 | notify(kit) 29 | end 30 | 31 | # Cache the website 32 | cache.set(website.url, website.source, ex: cache_ex) if cache? 33 | end.recover { nil }.value! 34 | end 35 | 36 | private 37 | 38 | # 39 | # @param [Miteru::Kit] kit 40 | # 41 | def notify(kit) 42 | notifiers.each do |notifier| 43 | result = notifier.result(kit) 44 | if result.success? 45 | logger.info("Notifier:#{notifier.name} succeeded.") 46 | else 47 | logger.warn("Notifier:#{notifier.name} failed - #{result.failure}.") 48 | end 49 | end 50 | end 51 | 52 | def notifiers 53 | @notifiers ||= Miteru.notifiers.map(&:new) 54 | end 55 | end 56 | end 57 | -------------------------------------------------------------------------------- /lib/miteru/database.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class V2Schema < ActiveRecord::Migration[8.0] 4 | def change 5 | create_table :records, if_not_exists: true do |t| 6 | t.string :sha256, null: false, index: {unique: true} 7 | t.string :hostname, null: false 8 | t.json :headers, null: false 9 | t.text :filename, null: false 10 | t.string :downloaded_as, null: false 11 | t.integer :filesize, null: false 12 | t.string :mime_type, null: false 13 | t.text :url, null: false 14 | t.string :source, null: false 15 | 16 | t.timestamps 17 | end 18 | end 19 | end 20 | 21 | # 22 | # @return [Array] schemas 23 | # 24 | def schemas 25 | [V2Schema] 26 | end 27 | 28 | module Miteru 29 | class Database 30 | class << self 31 | # 32 | # DB migration 33 | # 34 | # @param [Symbol] direction 35 | # 36 | def migrate(direction) 37 | schemas.each { |schema| schema.migrate direction } 38 | end 39 | 40 | # 41 | # Establish DB connection 42 | # 43 | def connect 44 | return if connected? 45 | 46 | ActiveRecord::Base.establish_connection Miteru.config.database_url.to_s 47 | ActiveRecord::Base.logger = Logger.new($stdout) if Miteru.development? 48 | end 49 | 50 | # 51 | # @return [Boolean] 52 | # 53 | def connected? 54 | ActiveRecord::Base.connected? 55 | end 56 | 57 | # 58 | # Close DB connection(s) 59 | # 60 | def close 61 | return unless connected? 62 | 63 | ActiveRecord::Base.connection_handler.clear_active_connections! 64 | end 65 | 66 | def with_db_connection 67 | Miteru::Database.connect unless connected? 68 | yield 69 | rescue ActiveRecord::StatementInvalid 70 | Miteru.logger.error("DB migration is not yet complete. Please run 'miteru db migrate'.") 71 | ensure 72 | Miteru::Database.close 73 | end 74 | 75 | private 76 | 77 | def adapter 78 | return "postgresql" if %w[postgresql postgres].include?(Miteru.config.database_url.scheme) 79 | return "mysql2" if Miteru.config.database_url.scheme == "mysql2" 80 | 81 | "sqlite3" 82 | end 83 | end 84 | end 85 | end 86 | -------------------------------------------------------------------------------- /lib/miteru/downloader.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "digest" 4 | require "fileutils" 5 | 6 | require "down/http" 7 | 8 | module Miteru 9 | class Downloader < Service 10 | prepend MemoWise 11 | 12 | # @return [String] 13 | attr_reader :base_dir 14 | 15 | # @return [Miteru::Kit] 16 | attr_reader :kit 17 | 18 | # 19 | # 20 | # 21 | # @param [Miteru::Kit] kit 22 | # @param [String] base_dir 23 | # 24 | def initialize(kit, base_dir: Miteru.config.download_to) 25 | super() 26 | @kit = kit 27 | @base_dir = base_dir 28 | end 29 | 30 | # 31 | # @return [String] 32 | # 33 | def call 34 | destination = kit.filepath_to_download 35 | 36 | # downloader.download(kit.url, destination:, max_size:) 37 | downloader.download(kit.url, destination:, max_size:) 38 | 39 | unless Record.unique_sha256?(sha256(destination)) 40 | FileUtils.rm destination 41 | raise UniquenessError, "Kit:#{sha256(destination)} is registered already." 42 | end 43 | 44 | # Record a kit in DB 45 | Record.create_by_kit_and_hash(kit, sha256: sha256(destination)) 46 | logger.info "Download #{kit.url} as #{destination}" 47 | 48 | destination 49 | end 50 | 51 | private 52 | 53 | def timeout 54 | Miteru.config.download_timeout 55 | end 56 | 57 | def downloader 58 | Down::Http.new(ssl_context:) { |client| client.timeout(timeout) } 59 | end 60 | 61 | def ssl_context 62 | OpenSSL::SSL::SSLContext.new.tap do |ctx| 63 | ctx.verify_mode = OpenSSL::SSL::VERIFY_NONE 64 | end 65 | end 66 | 67 | def max_size 68 | Miteru.config.file_max_size 69 | end 70 | 71 | def sha256(path) 72 | digest = Digest::SHA256.file(path) 73 | digest.hexdigest 74 | end 75 | memo_wise :sha256 76 | end 77 | end 78 | -------------------------------------------------------------------------------- /lib/miteru/errors.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "http" 4 | 5 | module Miteru 6 | class Error < StandardError; end 7 | 8 | class FileSizeError < Error; end 9 | 10 | class DownloadError < Error; end 11 | 12 | class UniquenessError < Error; end 13 | 14 | class StatusError < ::HTTP::Error 15 | # @return [Integer] 16 | attr_reader :status_code 17 | 18 | # @return [String, nil] 19 | attr_reader :body 20 | 21 | # 22 | # @param [String] msg 23 | # @param [Integer] status_code 24 | # @param [String, nil] body 25 | # 26 | def initialize(msg, status_code, body) 27 | super(msg) 28 | 29 | @status_code = status_code 30 | @body = body 31 | end 32 | 33 | def detail 34 | {status_code:, body:} 35 | end 36 | end 37 | end 38 | -------------------------------------------------------------------------------- /lib/miteru/feeds/ayashige.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Feeds 5 | class Ayashige < Base 6 | def initialize(base_url = "https://ayashige.herokuapp.com") 7 | super 8 | end 9 | 10 | def urls 11 | json.map { |item| item["fqdn"] }.map { |fqdn| "https://#{fqdn}" } 12 | end 13 | 14 | private 15 | 16 | def json 17 | get_json "/api/v1/domains/" 18 | end 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/miteru/feeds/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Feeds 5 | class Base < Service 6 | IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze 7 | 8 | # @return [String] 9 | attr_reader :base_url 10 | 11 | # @return [Hash] 12 | attr_reader :headers 13 | 14 | # 15 | # @param [String] base_url 16 | # 17 | def initialize(base_url) 18 | super() 19 | 20 | @base_url = base_url 21 | @headers = {} 22 | end 23 | 24 | def source 25 | @source ||= self.class.to_s.split("::").last 26 | end 27 | 28 | # 29 | # Return URLs 30 | # 31 | # @return [Array] URLs 32 | # 33 | def urls 34 | raise NotImplementedError, "You must implement #{self.class}##{__method__}" 35 | end 36 | 37 | # 38 | # Return decomposed URLs 39 | # 40 | # @return [Array] Decomposed URLs 41 | # 42 | def decomposed_urls 43 | urls.uniq.select { |url| url.start_with?("http://", "https://") }.map { |url| decompose(url) }.flatten.uniq 44 | end 45 | 46 | # 47 | # @return [Array] 48 | # 49 | def call 50 | decomposed_urls.map { |url| Website.new(url, source:) } 51 | end 52 | 53 | class << self 54 | def inherited(child) 55 | super 56 | Miteru.feeds << child 57 | end 58 | end 59 | 60 | private 61 | 62 | def timeout 63 | Miteru.config.api_timeout 64 | end 65 | 66 | def directory_traveling? 67 | Miteru.config.directory_traveling 68 | end 69 | 70 | # 71 | # Validate extension of a URL 72 | # 73 | # @param [String] url 74 | # 75 | # @return [Boolean] 76 | # 77 | def invalid_extension?(url) 78 | IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext } 79 | end 80 | 81 | # 82 | # Decompose a URL into URLs 83 | # 84 | # @param [String] url 85 | # 86 | # @return [Array] 87 | # 88 | def decompose(url) 89 | Try[URI::InvalidURIError] do 90 | parsed = URI.parse(url) 91 | 92 | base = "#{parsed.scheme}://#{parsed.hostname}" 93 | return [base] unless directory_traveling? 94 | 95 | segments = parsed.path.split("/") 96 | return [base] if segments.empty? 97 | 98 | urls = (0...segments.length).map { |idx| "#{base}#{segments[0..idx].join("/")}" } 99 | urls.reject { |url| invalid_extension? url } 100 | end.recover { [] }.value! 101 | end 102 | 103 | # 104 | # @return [::HTTP::Client] 105 | # 106 | def http 107 | @http ||= HTTP::Factory.build(headers:, timeout:) 108 | end 109 | 110 | # 111 | # @param [String] path 112 | # 113 | # @return [URI] 114 | # 115 | def url_for(path) 116 | URI.join base_url, path 117 | end 118 | 119 | # 120 | # @param [String] path 121 | # @param [Hash, nil] params 122 | # 123 | # @return [::HTTP::Response] 124 | # 125 | def get(path, params: nil) 126 | http.get(url_for(path), params:) 127 | end 128 | 129 | # 130 | # @param [String] path 131 | # @param [Hash, nil] params 132 | # 133 | # @return [Hash] 134 | # 135 | def get_json(path, params: nil) 136 | res = get(path, params:) 137 | JSON.parse res.body.to_s 138 | end 139 | end 140 | end 141 | end 142 | -------------------------------------------------------------------------------- /lib/miteru/feeds/phishing_database.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Feeds 5 | class PhishingDatabase < Base 6 | def initialize(base_url = "https://raw.githubusercontent.com") 7 | super 8 | end 9 | 10 | def urls 11 | text.lines.map(&:chomp) 12 | end 13 | 14 | private 15 | 16 | def text 17 | get("/mitchellkrogza/Phishing.Database/master/phishing-links-ACTIVE-NOW.txt").body.to_s 18 | end 19 | end 20 | end 21 | end 22 | -------------------------------------------------------------------------------- /lib/miteru/feeds/tweetfeed.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Feeds 5 | class TweetFeed < Base 6 | def initialize(base_url = "https://api.tweetfeed.live") 7 | super 8 | end 9 | 10 | def urls 11 | data = get_json("/v1/today/url") 12 | data.filter_map { |item| item["value"] } 13 | end 14 | end 15 | end 16 | end 17 | -------------------------------------------------------------------------------- /lib/miteru/feeds/urlscan.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Feeds 5 | class UrlScan < Base 6 | # 7 | # @param [String] base_url 8 | # 9 | def initialize(base_url = "https://urlscan.io") 10 | super 11 | 12 | @headers = {"api-key": api_key} 13 | end 14 | 15 | def urls 16 | search_with_pagination.flat_map do |json| 17 | (json["results"] || []).map { |result| result.dig("task", "url") } 18 | end.uniq 19 | end 20 | 21 | private 22 | 23 | def size 24 | 10_000 25 | end 26 | 27 | # @return [] 28 | # 29 | def api_key 30 | Miteru.config.urlscan_api_key 31 | end 32 | 33 | def q 34 | "#{base_condition} AND #{date_condition}" 35 | end 36 | 37 | # 38 | # @param [String, nil] search_after 39 | # 40 | # @return [Hash] 41 | # 42 | def search(search_after: nil) 43 | get_json("/api/v1/search/", params: {q:, size:, search_after:}.compact) 44 | end 45 | 46 | def search_with_pagination 47 | search_after = nil 48 | 49 | Enumerator.new do |y| 50 | loop do 51 | res = search(search_after:) 52 | 53 | y.yield res 54 | 55 | has_more = res["has_more"] 56 | break unless has_more 57 | 58 | search_after = res["results"].last["sort"].join(",") 59 | end 60 | end 61 | end 62 | 63 | def base_condition 64 | Miteru.config.urlscan_base_condition 65 | end 66 | 67 | def date_condition 68 | Miteru.config.urlscan_date_condition 69 | end 70 | end 71 | end 72 | end 73 | -------------------------------------------------------------------------------- /lib/miteru/feeds/urlscan_pro.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Feeds 5 | class UrlScanPro < Base 6 | # 7 | # @param [String] base_url 8 | # 9 | def initialize(base_url = "https://urlscan.io") 10 | super 11 | 12 | @headers = {"api-key": api_key} 13 | end 14 | 15 | def urls 16 | (json["results"] || []).map { |result| result["page_url"] } 17 | end 18 | 19 | private 20 | 21 | def api_key 22 | Miteru.config.urlscan_api_key 23 | end 24 | 25 | def q 26 | Miteru.config.urlscan_date_condition 27 | end 28 | 29 | def format 30 | "json" 31 | end 32 | 33 | def json 34 | get_json("/api/v1/pro/phishfeed", params: {q:, format:}) 35 | end 36 | end 37 | end 38 | end 39 | -------------------------------------------------------------------------------- /lib/miteru/helpers.rb: -------------------------------------------------------------------------------- 1 | require "active_support/number_helper" 2 | 3 | module Miteru 4 | module Helpers 5 | extend ActiveSupport::NumberHelper 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /lib/miteru/http.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "http" 4 | 5 | module Miteru 6 | module HTTP 7 | # 8 | # Better error handling feature 9 | # 10 | class BetterError < ::HTTP::Feature 11 | def wrap_response(response) 12 | return response if response.status.success? 13 | 14 | raise StatusError.new( 15 | "Unsuccessful response code returned: #{response.code}", 16 | response.code, 17 | response.body.to_s 18 | ) 19 | end 20 | 21 | ::HTTP::Options.register_feature(:better_error, self) 22 | end 23 | 24 | # 25 | # HTTP client factory 26 | # 27 | class Factory 28 | class << self 29 | USER_AGENT = "miteru/#{Miteru::VERSION}".freeze 30 | 31 | # 32 | # @param [Integer, nil] timeout 33 | # @param [Hash] headers 34 | # @param [Boolean] raise_exception 35 | # 36 | # @return [::HTTP::Client] 37 | # 38 | # @param [Object] raise_exception 39 | def build(headers: {}, timeout: nil, raise_exception: true) 40 | client = raise_exception ? ::HTTP.use(:better_error) : ::HTTP 41 | 42 | headers["User-Agent"] ||= USER_AGENT 43 | 44 | client = client.headers(headers) 45 | client = client.timeout(timeout) unless timeout.nil? 46 | client 47 | end 48 | end 49 | end 50 | end 51 | end 52 | -------------------------------------------------------------------------------- /lib/miteru/kit.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Kit < Service 5 | include Concerns::UrlTruncatable 6 | 7 | # @return [String] 8 | attr_reader :url 9 | 10 | # @return [String] 11 | attr_reader :source 12 | 13 | # @return [Integer, nil] 14 | attr_reader :status 15 | 16 | # @return [Integer, nil] 17 | attr_reader :content_length 18 | 19 | # @return [String, nil] 20 | attr_reader :mime_type 21 | 22 | # @return [Hash, nil] 23 | attr_reader :headers 24 | 25 | # 26 | # @param [String] url 27 | # @param [String] source 28 | # 29 | def initialize(url, source:) 30 | super() 31 | 32 | @url = url 33 | @source = source 34 | 35 | @content_length = nil 36 | @mime_type = nil 37 | @status = nil 38 | @headers = nil 39 | end 40 | 41 | def valid? 42 | # make a HEAD request for the validation 43 | before_validation 44 | valid_ext? && reachable? && valid_mime_type? && valid_content_length? 45 | end 46 | 47 | def extname 48 | return ".tar.gz" if url.end_with?("tar.gz") 49 | 50 | File.extname(url) 51 | end 52 | 53 | def basename 54 | @basename ||= File.basename(url) 55 | end 56 | 57 | def filename 58 | @filename ||= CGI.unescape(basename) 59 | end 60 | 61 | def filepath_to_download 62 | "#{base_dir}/#{filename_to_download}" 63 | end 64 | 65 | def downloaded? 66 | File.exist?(filepath_to_download) 67 | end 68 | 69 | def filesize 70 | return nil unless downloaded? 71 | 72 | File.size filepath_to_download 73 | end 74 | 75 | def filename_with_size 76 | return filename unless filesize 77 | 78 | "#{filename} (#{Helpers.number_to_human_size(filesize)})" 79 | end 80 | 81 | def id 82 | @id ||= UUIDTools::UUID.random_create.to_s 83 | end 84 | 85 | def hostname 86 | @hostname ||= URI(url).hostname 87 | end 88 | 89 | private 90 | 91 | def filename_to_download 92 | "#{id}#{extname}" 93 | end 94 | 95 | def base_dir 96 | @base_dir ||= Miteru.config.download_to 97 | end 98 | 99 | def valid_ext? 100 | Miteru.config.file_extensions.include? extname 101 | end 102 | 103 | def http 104 | HTTP::Factory.build(raise_exception: false) 105 | end 106 | 107 | def before_validation 108 | Try[StandardError] do 109 | res = http.head(url) 110 | @content_length = res.content_length 111 | @mime_type = res.content_type.mime_type.to_s 112 | @status = res.status 113 | @headers = res.headers.to_h 114 | end.recover { nil }.value! 115 | end 116 | 117 | def reachable? 118 | status&.success? 119 | end 120 | 121 | def valid_mime_type? 122 | Miteru.config.file_mime_types.include? mime_type 123 | end 124 | 125 | def valid_content_length? 126 | content_length.to_i.positive? 127 | end 128 | end 129 | end 130 | -------------------------------------------------------------------------------- /lib/miteru/mixin.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Mixins 5 | module URL 6 | IGNORE_EXTENSIONS = %w[.htm .html .php .asp .aspx .exe .txt].freeze 7 | 8 | # 9 | # Validate extension of a URL 10 | # 11 | # @param [String] url 12 | # 13 | # @return [Boolean] 14 | # 15 | def invalid_extension?(url) 16 | IGNORE_EXTENSIONS.any? { |ext| url.end_with? ext } 17 | end 18 | end 19 | end 20 | end 21 | -------------------------------------------------------------------------------- /lib/miteru/notifiers/base.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Notifiers 5 | class Base < Service 6 | def call(kit) 7 | raise NotImplementedError 8 | end 9 | 10 | def callable? 11 | raise NotImplementedError 12 | end 13 | 14 | def name 15 | @name ||= self.class.to_s.split("::").last 16 | end 17 | 18 | class << self 19 | def inherited(child) 20 | super 21 | Miteru.notifiers << child 22 | end 23 | end 24 | end 25 | end 26 | end 27 | -------------------------------------------------------------------------------- /lib/miteru/notifiers/slack.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "slack-notifier" 4 | 5 | module Miteru 6 | module Notifiers 7 | class SlackAttachment 8 | # @return [String] 9 | attr_reader :url 10 | 11 | def initialize(url) 12 | @url = url 13 | end 14 | 15 | def to_a 16 | [ 17 | { 18 | text:, 19 | fallback: "VT & urlscan.io links", 20 | actions: 21 | } 22 | ] 23 | end 24 | 25 | private 26 | 27 | def actions 28 | [vt_link, urlscan_link].compact 29 | end 30 | 31 | def vt_link 32 | return nil unless _vt_link 33 | 34 | { 35 | type: "button", 36 | text: "Lookup on VirusTotal", 37 | url: _vt_link 38 | } 39 | end 40 | 41 | def urlscan_link 42 | return nil unless _urlscan_link 43 | 44 | { 45 | type: "button", 46 | text: "Lookup on urlscan.io", 47 | url: _urlscan_link 48 | } 49 | end 50 | 51 | def domain 52 | @domain ||= [].tap do |out| 53 | out << URI(url).hostname 54 | rescue URI::Error => _e 55 | out << nil 56 | end.first 57 | end 58 | 59 | def text 60 | domain.to_s.gsub(".", "[.]") 61 | end 62 | 63 | def _urlscan_link 64 | return nil unless domain 65 | 66 | "https://urlscan.io/domain/#{domain}" 67 | end 68 | 69 | def _vt_link 70 | return nil unless domain 71 | 72 | "https://www.virustotal.com/#/domain/#{domain}" 73 | end 74 | end 75 | 76 | class Slack < Base 77 | # 78 | # Notify to Slack 79 | # 80 | # @param [Miteru::Kit] kit 81 | # 82 | def call(kit) 83 | return unless callable? 84 | 85 | attachment = SlackAttachment.new(kit.url) 86 | notifier.post(text: kit.filename_with_size, attachments: attachment.to_a) 87 | end 88 | 89 | def callable? 90 | !webhook_url.nil? 91 | end 92 | 93 | private 94 | 95 | def webhook_url 96 | Miteru.config.slack_webhook_url 97 | end 98 | 99 | def channel 100 | Miteru.config.slack_channel 101 | end 102 | 103 | def notifier 104 | ::Slack::Notifier.new(webhook_url, channel:) 105 | end 106 | end 107 | end 108 | end 109 | -------------------------------------------------------------------------------- /lib/miteru/notifiers/urlscan.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | module Notifiers 5 | class UrlScan < Base 6 | # 7 | # @param [Miteru::Kit] kit 8 | # 9 | def call(kit) 10 | return unless callable? 11 | 12 | submit(kit.decoded_url, source: kit.source) 13 | end 14 | 15 | def callable? 16 | !Miteru.config.urlscan_api_key.nil? 17 | end 18 | 19 | private 20 | 21 | # 22 | # @return [::HTTP::Client] 23 | # 24 | def http 25 | @http ||= HTTP::Factory.build(headers:, timeout:) 26 | end 27 | 28 | def headers 29 | {"api-key": Miteru.config.urlscan_api_key} 30 | end 31 | 32 | def timeout 33 | Miteru.config.api_timeout 34 | end 35 | 36 | def tags 37 | %w[miteru phishkit] 38 | end 39 | 40 | def visibility 41 | Miteru.config.urlscan_submit_visibility 42 | end 43 | 44 | # 45 | # @param [String] url 46 | # @param [String] source 47 | # 48 | def submit(url, source:) 49 | http.post("https://urlscan.io/api/v1/scan/", json: {tags: tags + ["source:#{source}"], visibility:, url:}) 50 | end 51 | end 52 | end 53 | end 54 | -------------------------------------------------------------------------------- /lib/miteru/orchestrator.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Orchestrator < Service 5 | def call 6 | logger.info("#{non_cached_websites.length} websites loaded in total.") if verbose? 7 | array_of_args = non_cached_websites.map { |website| [website.url, website.source] } 8 | Jobs::CrawleJob.perform_bulk(array_of_args, batch_size: Miteru.config.sidekiq_batch_size) 9 | end 10 | 11 | # 12 | # @return [Array] 13 | # 14 | def websites 15 | @websites ||= [].tap do |out| 16 | feeds.each do |feed| 17 | result = feed.result 18 | if result.success? 19 | websites = result.value! 20 | logger.info("Feed:#{feed.source} has #{websites.length} websites.") if verbose? 21 | out << websites 22 | else 23 | logger.warn("Feed:#{feed.source} failed - #{result.failure}") 24 | end 25 | end 26 | end.flatten.uniq(&:url) 27 | end 28 | 29 | # 30 | # @return [Array] 31 | # 32 | def non_cached_websites 33 | @non_cached_websites ||= [].tap do |out| 34 | out << if cache? 35 | websites.reject { |website| cache.cached?(website.url) } 36 | else 37 | websites 38 | end 39 | end.flatten.uniq(&:url) 40 | end 41 | 42 | # 43 | # @return [Array] 44 | # 45 | def feeds 46 | @feeds ||= Miteru.feeds.map(&:new) 47 | end 48 | end 49 | end 50 | -------------------------------------------------------------------------------- /lib/miteru/record.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | class Record < ActiveRecord::Base 5 | class << self 6 | # 7 | # @param [String] sha256 8 | # 9 | # @return [Boolean] true if it is unique. Otherwise false. 10 | # 11 | def unique_sha256?(sha256) 12 | !where(sha256:).exists? 13 | end 14 | 15 | # 16 | # Create a new record based on a kit 17 | # 18 | # @param [Miteru::Kit] kit 19 | # @param [String] sha256 20 | # 21 | # @return [Miteru::Record] 22 | # 23 | def create_by_kit_and_hash(kit, sha256:) 24 | record = new( 25 | source: kit.source, 26 | hostname: kit.hostname, 27 | url: kit.decoded_url, 28 | headers: kit.headers, 29 | filename: kit.filename, 30 | filesize: kit.filesize, 31 | mime_type: kit.mime_type, 32 | downloaded_as: kit.filepath_to_download, 33 | sha256: 34 | ) 35 | record.save 36 | record 37 | rescue TypeError, ActiveRecord::RecordNotUnique 38 | nil 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/miteru/service.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | # 5 | # Base class for services 6 | # 7 | class Service 8 | include Dry::Monads[:result, :try] 9 | 10 | def call(*args, **kwargs) 11 | raise NotImplementedError, "You must implement #{self.class}##{__method__}" 12 | end 13 | 14 | def result(...) 15 | Try[StandardError] { call(...) }.to_result 16 | end 17 | 18 | class << self 19 | def call(...) 20 | new.call(...) 21 | end 22 | 23 | def result(...) 24 | new.result(...) 25 | end 26 | end 27 | 28 | private 29 | 30 | def auto_download? 31 | Miteru.config.auto_download 32 | end 33 | 34 | # 35 | # @return [SemanticLogger] 36 | # 37 | def logger 38 | Miteru.logger 39 | end 40 | 41 | def cache? 42 | Miteru.cache? 43 | end 44 | 45 | # 46 | # @return [Miteru::Cache] 47 | # 48 | def cache 49 | Miteru.cache 50 | end 51 | 52 | def threads 53 | Miteru.config.threads 54 | end 55 | 56 | def verbose? 57 | Miteru.config.verbose 58 | end 59 | 60 | def cache_prefix 61 | Miteru.config.cache_prefix 62 | end 63 | 64 | def cache_ex 65 | Miteru.config.cache_ex 66 | end 67 | end 68 | end 69 | -------------------------------------------------------------------------------- /lib/miteru/sidekiq/application.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "sidekiq" 4 | 5 | require "miteru/sidekiq/jobs" 6 | 7 | Sidekiq.configure_server do |config| 8 | config.redis = {url: Miteru.config.sidekiq_redis_url.to_s} 9 | config.default_job_options = { 10 | retry: Miteru.config.sidekiq_job_retry, 11 | expires_in: 0.second 12 | } 13 | end 14 | 15 | Sidekiq.configure_client do |config| 16 | config.redis = {url: Miteru.config.sidekiq_redis_url.to_s} 17 | end 18 | -------------------------------------------------------------------------------- /lib/miteru/sidekiq/jobs.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "sidekiq" 4 | require "timeout" 5 | 6 | module Miteru 7 | module Jobs 8 | class CrawleJob 9 | include Sidekiq::Job 10 | include Concerns::DatabaseConnectable 11 | 12 | # 13 | # @param [String] url 14 | # @param [String] source 15 | # 16 | def perform(url, source) 17 | website = Miteru::Website.new(url, source:) 18 | 19 | with_db_connection do 20 | Timeout.timeout(Miteru.config.sidekiq_job_timeout) do 21 | result = Crawler.result(website) 22 | if result.success? 23 | Miteru.logger.info("Crawler:#{website.truncated_url} succeeded.") 24 | else 25 | Miteru.logger.info("Crawler:#{website.truncated_url} failed - #{result.failure}.") 26 | end 27 | end 28 | end 29 | end 30 | end 31 | end 32 | end 33 | -------------------------------------------------------------------------------- /lib/miteru/version.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | module Miteru 4 | VERSION = "3.0.1" 5 | end 6 | -------------------------------------------------------------------------------- /lib/miteru/web/application.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | # Rack 4 | require "rack" 5 | require "rack/session" 6 | require "rackup" 7 | 8 | require "rack/handler/puma" 9 | 10 | # Sidekiq 11 | require "sidekiq/web" 12 | 13 | module Miteru 14 | module Web 15 | class App 16 | class << self 17 | def instance 18 | Rack::Builder.new do 19 | use Rack::Session::Cookie, secret: SecureRandom.hex(32), same_site: true, max_age: 86_400 20 | 21 | map "/" do 22 | run Sidekiq::Web 23 | end 24 | 25 | run App.new 26 | end.to_app 27 | end 28 | 29 | def run!(port: 9292, host: "localhost", threads: "0:3", verbose: false, worker_timeout: 60, open: true) 30 | Rackup::Handler::Puma.run( 31 | instance, 32 | Port: port, 33 | Host: host, 34 | Threads: threads, 35 | Verbose: verbose, 36 | worker_timeout: 37 | ) 38 | end 39 | end 40 | end 41 | end 42 | end 43 | -------------------------------------------------------------------------------- /lib/miteru/website.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "oga" 4 | 5 | module Miteru 6 | class Website < Service 7 | include Concerns::UrlTruncatable 8 | 9 | # @return [String] 10 | attr_reader :url 11 | 12 | # @return [String] 13 | attr_reader :source 14 | 15 | # 16 | # @param [String] url 17 | # @param [String] source 18 | # 19 | def initialize(url, source:) 20 | super() 21 | 22 | @url = url 23 | @source = source 24 | end 25 | 26 | def title 27 | doc&.at_css("title")&.text 28 | end 29 | 30 | def kits 31 | @kits ||= links.map { |link| Kit.new(link, source:) }.select(&:valid?) 32 | end 33 | 34 | def index? 35 | title.to_s.start_with? "Index of" 36 | end 37 | 38 | def kits? 39 | kits.any? 40 | end 41 | 42 | def links 43 | (href_links + possible_file_links).compact.uniq 44 | end 45 | 46 | def info 47 | "#{defanged_truncated_url} has #{kits.length} kit(s) (Source: #{source})" 48 | end 49 | 50 | private 51 | 52 | def timeout 53 | Miteru.config.http_timeout 54 | end 55 | 56 | def http 57 | @http ||= HTTP::Factory.build(timeout:) 58 | end 59 | 60 | def get 61 | http.get url 62 | end 63 | 64 | def response 65 | @response ||= get 66 | end 67 | 68 | def doc 69 | Oga.parse_html response.body.to_s 70 | end 71 | 72 | def href_links 73 | Try[Addressable::URI::InvalidURIError, Encoding::CompatibilityError, ::HTTP::Error, LL::ParserError, 74 | OpenSSL::SSL::SSLError, StatusError, ArgumentError] do 75 | doc.css("a").filter_map { |a| a.get("href") }.map do |href| 76 | normalized_href = href.start_with?("/") ? href : "/#{href}" 77 | normalized_url = url.end_with?("/") ? url.delete_suffix("/") : url 78 | normalized_url + normalized_href 79 | end 80 | end.recover { [] }.value! 81 | end 82 | 83 | def file_extensions 84 | Miteru.config.file_extensions 85 | end 86 | 87 | def possible_file_links 88 | parsed = URI.parse(url) 89 | 90 | segments = parsed.path.split("/") 91 | return [] if segments.empty? 92 | 93 | last = segments.last 94 | file_extensions.map do |ext| 95 | new_segments = segments[0..-2] + ["#{last}#{ext}"] 96 | parsed.path = new_segments.join("/") 97 | parsed.to_s 98 | end 99 | end 100 | end 101 | end 102 | -------------------------------------------------------------------------------- /miteru.gemspec: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | lib = File.expand_path("lib", __dir__) 4 | $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib) 5 | require "miteru/version" 6 | 7 | Gem::Specification.new do |spec| 8 | spec.name = "miteru" 9 | spec.version = Miteru::VERSION 10 | spec.authors = ["Manabu Niseki"] 11 | spec.email = ["manabu.niseki@gmail.com"] 12 | spec.metadata["rubygems_mfa_required"] = "true" 13 | 14 | spec.summary = "A phishing kit collector for scavengers" 15 | spec.description = "A phishing kit collector for scavengers" 16 | spec.homepage = "https://github.com/ninoseki/miteru" 17 | spec.license = "MIT" 18 | 19 | spec.required_ruby_version = ">= 3.2" 20 | 21 | # Specify which files should be added to the gem when it is released. 22 | # The `git ls-files -z` loads the files in the RubyGem that have been added into git. 23 | spec.files = Dir.chdir(File.expand_path(__dir__)) do 24 | `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) } 25 | end 26 | spec.bindir = "exe" 27 | spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } 28 | spec.require_paths = ["lib"] 29 | 30 | spec.add_development_dependency "bundler", "~> 2.5" 31 | spec.add_development_dependency "capybara", "~> 3.40" 32 | spec.add_development_dependency "coveralls_reborn", "~> 0.28" 33 | spec.add_development_dependency "fuubar", "~> 2.5.1" 34 | spec.add_development_dependency "mysql2", "~> 0.5.6" 35 | spec.add_development_dependency "pg", "~> 1.5.9" 36 | spec.add_development_dependency "rake", "~> 13.2.1" 37 | spec.add_development_dependency "rspec", "~> 3.13" 38 | spec.add_development_dependency "simplecov-lcov", "~> 0.8" 39 | spec.add_development_dependency "standard", "~> 1.42.1" 40 | spec.add_development_dependency "test-prof", "~> 1.4.2" 41 | spec.add_development_dependency "vcr", "~> 6.3.1" 42 | spec.add_development_dependency "webmock", "~> 3.24.0" 43 | 44 | spec.add_dependency "activerecord", "8.0.0" 45 | spec.add_dependency "addressable", "2.8.7" 46 | spec.add_dependency "anyway_config", "2.6.4" 47 | spec.add_dependency "colorize", "1.1.0" 48 | spec.add_dependency "dotenv", "3.1.4" 49 | spec.add_dependency "down", "5.4.2" 50 | spec.add_dependency "dry-files", "1.1.0" 51 | spec.add_dependency "dry-monads", "1.6.0" 52 | spec.add_dependency "http", "5.2.0" 53 | spec.add_dependency "memo_wise", "1.10.0" 54 | spec.add_dependency "oga", "3.4" 55 | spec.add_dependency "puma", "6.5.0" 56 | spec.add_dependency "rack", "3.1.8" 57 | spec.add_dependency "rack-session", "2.0.0" 58 | spec.add_dependency "rackup", "2.2.1" 59 | spec.add_dependency "redis", "5.3.0" 60 | spec.add_dependency "semantic_logger", "4.16.1" 61 | spec.add_dependency "sentry-ruby", "5.22.0" 62 | spec.add_dependency "sentry-sidekiq", "5.22.0" 63 | spec.add_dependency "sidekiq", "7.3.6" 64 | spec.add_dependency "slack-notifier", "2.4.0" 65 | spec.add_dependency "sqlite3", "2.4.1" 66 | spec.add_dependency "thor", "1.3.2" 67 | spec.add_dependency "thor-hollaback", "0.2.1" 68 | spec.add_dependency "uuidtools", "2.2.0" 69 | end 70 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": [ 3 | "config:base" 4 | ] 5 | } 6 | -------------------------------------------------------------------------------- /spec/cli_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru::CLI::App do 4 | subject { described_class.new } 5 | 6 | describe ".exit_on_failure?" do 7 | it do 8 | expect(described_class.exit_on_failure?).to eq(true) 9 | end 10 | end 11 | end 12 | -------------------------------------------------------------------------------- /spec/crawler_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru::Crawler do 4 | include_context "with fake HTTP server" 5 | include_context "with mocked logger" 6 | 7 | subject(:crawler) { described_class.new } 8 | let!(:website) { Miteru::Website.new("#{server.base_url}/has_kit", source: "dummy") } 9 | 10 | describe "#call" do 11 | it do 12 | expect { crawler.call(website) }.not_to raise_error 13 | end 14 | end 15 | end 16 | -------------------------------------------------------------------------------- /spec/downloader_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru::Downloader do 4 | include_context "with fake HTTP server" 5 | include_context "with mocked logger" 6 | 7 | let!(:url) { "#{server.base_url}/has_kit" } 8 | let!(:kit) { Miteru::Kit.new("#{url}/test.zip", source: "dummy") } 9 | 10 | describe "#call" do 11 | before do 12 | kit.valid? 13 | end 14 | 15 | it do 16 | downloader = described_class.new(kit) 17 | destination = downloader.call 18 | expect(File.exist?(destination)).to eq(true) 19 | 20 | expect { downloader.call }.to raise_error(Miteru::UniquenessError) 21 | end 22 | end 23 | end 24 | -------------------------------------------------------------------------------- /spec/feeds/feed_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru::Feeds::Base do 4 | subject(:feed) { described_class.new("http://127.0.0.1") } 5 | 6 | describe "#decomposed_urls" do 7 | context "with a URL without path" do 8 | before { allow(feed).to receive(:urls).and_return(["http://127.0.0.1"]) } 9 | 10 | it do 11 | expect(subject.decomposed_urls).to eq(["http://127.0.0.1"]) 12 | end 13 | end 14 | 15 | context "with a URL has path" do 16 | before { allow(feed).to receive(:urls).and_return(["http://127.0.0.1/test/test/index.htm"]) } 17 | 18 | context "without directory traveling" do 19 | before { allow(Miteru.config).to receive(:directory_traveling).and_return(false) } 20 | 21 | it do 22 | expect(subject.decomposed_urls).to eq(["http://127.0.0.1"]) 23 | end 24 | end 25 | 26 | context "with directory traveling" do 27 | before { allow(Miteru.config).to receive(:directory_traveling).and_return(true) } 28 | 29 | it do 30 | expect(subject.decomposed_urls).to eq(["http://127.0.0.1", "http://127.0.0.1/test", "http://127.0.0.1/test/test"]) 31 | end 32 | end 33 | end 34 | end 35 | end 36 | -------------------------------------------------------------------------------- /spec/fixtures/index.html: -------------------------------------------------------------------------------- 1 | 2 | Index of / 3 | 4 | 5 | 6 |

Index of /

7 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /spec/fixtures/test.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ninoseki/miteru/f63c010c7bf44f18d8cf3b5b8ab9360fb554cd1a/spec/fixtures/test.tar.gz -------------------------------------------------------------------------------- /spec/fixtures/test.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ninoseki/miteru/f63c010c7bf44f18d8cf3b5b8ab9360fb554cd1a/spec/fixtures/test.zip -------------------------------------------------------------------------------- /spec/kit_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru::Kit do 4 | include_context "with fake HTTP server" 5 | 6 | let!(:base_url) { server.base_url } 7 | let!(:extname) { ".zip" } 8 | let!(:filename) { "test#{extname}" } 9 | let!(:link) { "/has_kit/#{filename}" } 10 | 11 | subject(:kit) { described_class.new(base_url + link, source: "dummy") } 12 | 13 | describe "#basename" do 14 | it do 15 | expect(kit.basename).to eq(filename) 16 | end 17 | end 18 | 19 | describe "#filename" do 20 | it do 21 | expect(kit.filename).to eq(filename) 22 | end 23 | end 24 | 25 | describe "#extname" do 26 | it do 27 | expect(kit.extname).to eq(extname) 28 | end 29 | end 30 | 31 | describe "#url" do 32 | it do 33 | expect(kit.url).to eq("#{base_url}#{link}") 34 | end 35 | end 36 | 37 | describe "#valid?" do 38 | it do 39 | expect(kit.valid?).to eq(true) 40 | end 41 | end 42 | 43 | describe "#filepath_to_download" do 44 | it do 45 | expect(kit.filepath_to_download).to include("/tmp/#{subject.id}#{subject.extname}") 46 | end 47 | end 48 | 49 | describe "#filename_with_size" do 50 | it do 51 | expect(kit.filename_with_size).to eq(filename) 52 | end 53 | 54 | context "with filesize" do 55 | before { allow(kit).to receive(:filesize).and_return(1024 * 1024) } 56 | it do 57 | expect(kit.filename_with_size).to eq("#{filename} (1 MB)") 58 | end 59 | end 60 | end 61 | 62 | context "when given a URL encoded link" do 63 | subject(:kit) { described_class.new("#{base_url}/test%201.zip", source: "dummy") } 64 | 65 | describe "#filename" do 66 | it do 67 | expect(kit.filename).to eq("test 1.zip") 68 | end 69 | end 70 | end 71 | 72 | context "when given an index.html" do 73 | subject(:kit) { described_class.new("#{base_url}/index.html", source: "dummy") } 74 | 75 | describe "#valid?" do 76 | it do 77 | expect(kit.valid?).to eq(false) 78 | end 79 | end 80 | end 81 | end 82 | -------------------------------------------------------------------------------- /spec/miteru_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru do 4 | it do 5 | expect(Miteru::VERSION).not_to be nil 6 | end 7 | end 8 | -------------------------------------------------------------------------------- /spec/orchestrator_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | class DummyFeed < Miteru::Feeds::Base 4 | def initialize(base_url = "http://example.com") 5 | super 6 | end 7 | 8 | def urls 9 | ["http://example.com"] * 10 10 | end 11 | end 12 | 13 | RSpec.describe Miteru::Orchestrator do 14 | include_context "with mocked logger" 15 | 16 | subject(:orchestrator) { described_class.new } 17 | 18 | before do 19 | allow(orchestrator).to receive(:feeds).and_return([DummyFeed.new] * 10) 20 | end 21 | 22 | describe "#websites" do 23 | it do 24 | expect(orchestrator.websites.map(&:url)).to eq(["http://example.com"]) 25 | end 26 | end 27 | end 28 | -------------------------------------------------------------------------------- /spec/spec_helper.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "bundler/setup" 4 | 5 | require "simplecov" 6 | require "vcr" 7 | 8 | def ci_env? 9 | # CI=true in GitHub Actions 10 | ENV["CI"] 11 | end 12 | 13 | # setup simplecov formatter for coveralls 14 | class InceptionFormatter 15 | def format(result) 16 | Coveralls::SimpleCov::Formatter.new.format(result) 17 | end 18 | end 19 | 20 | def formatter 21 | if ENV["CI"] || ENV["COVERALLS_REPO_TOKEN"] 22 | if ENV["GITHUB_ACTIONS"] 23 | SimpleCov::Formatter::MultiFormatter.new([InceptionFormatter, SimpleCov::Formatter::LcovFormatter]) 24 | else 25 | InceptionFormatter 26 | end 27 | else 28 | SimpleCov::Formatter::HTMLFormatter 29 | end 30 | end 31 | 32 | def setup_formatter 33 | if ENV["GITHUB_ACTIONS"] 34 | require "simplecov-lcov" 35 | 36 | SimpleCov::Formatter::LcovFormatter.config do |c| 37 | c.report_with_single_file = true 38 | c.single_report_path = "coverage/lcov.info" 39 | end 40 | end 41 | SimpleCov.formatter = formatter 42 | end 43 | 44 | setup_formatter 45 | 46 | SimpleCov.start do 47 | add_filter do |source_file| 48 | source_file.filename.include?("spec") && !source_file.filename.include?("fixture") 49 | end 50 | add_filter %r{/.bundle/} 51 | end 52 | 53 | require "coveralls" 54 | 55 | # Use in-memory SQLite in local test 56 | ENV["DATABASE_URL"] = "sqlite3::memory:" unless ci_env? 57 | 58 | require "miteru" 59 | 60 | require "test_prof/recipes/rspec/let_it_be" 61 | 62 | require_relative "support/shared_contexts/fake_http_server_context" 63 | require_relative "support/shared_contexts/mocked_logger_context" 64 | 65 | VCR.configure do |config| 66 | config.cassette_library_dir = "spec/fixtures/vcr_cassettes" 67 | config.configure_rspec_metadata! 68 | config.ignore_localhost = true 69 | config.filter_sensitive_data("") { ENV["URLSCAN_API_KEY"] } 70 | end 71 | 72 | RSpec.configure do |config| 73 | # Enable flags like --only-failures and --next-failure 74 | config.example_status_persistence_file_path = ".rspec_status" 75 | 76 | # Disable RSpec exposing methods globally on `Module` and `main` 77 | config.disable_monkey_patching! 78 | 79 | config.expect_with :rspec do |c| 80 | c.syntax = :expect 81 | end 82 | 83 | config.order = "random" 84 | 85 | config.before(:suite) do 86 | Miteru::Database.connect 87 | 88 | ActiveRecord::Migration.verbose = false 89 | Miteru::Database.migrate :up 90 | end 91 | 92 | config.after(:suite) do 93 | Miteru::Database.close 94 | end 95 | end 96 | -------------------------------------------------------------------------------- /spec/support/shared_contexts/fake_http_server_context.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | require "capybara" 4 | 5 | class FakeHTTP 6 | extend Forwardable 7 | 8 | attr_reader :req 9 | 10 | def initialize(env) 11 | @req = Rack::Request.new(env) 12 | end 13 | 14 | def_delegators :req, :path_info 15 | 16 | def call 17 | case path_info 18 | when "/has_kit" 19 | ["200", {"Content-Type" => "text/html"}, [File.read(File.expand_path("../../fixtures/index.html", __dir__))]] 20 | when "/has_kit/test.tar.gz" 21 | [ 22 | "200", 23 | {"Content-Type" => "application/gzip"}, 24 | [File.binread(File.expand_path("../../fixtures/test.tar.gz", __dir__))] 25 | ] 26 | when "/has_kit/test.zip" 27 | [ 28 | "200", 29 | {"Content-Type" => "application/zip"}, 30 | [File.binread(File.expand_path("../../fixtures/test.tar.gz", __dir__))] 31 | ] 32 | else 33 | ["404", {"Content-Type" => "application/text"}, "404"] 34 | end 35 | end 36 | 37 | class << self 38 | def call(env) 39 | new(env).call 40 | end 41 | end 42 | end 43 | 44 | RSpec.shared_context "with fake HTTP server" do 45 | let_it_be(:server) do 46 | server = Capybara::Server.new(FakeHTTP) 47 | server.boot 48 | server 49 | end 50 | end 51 | -------------------------------------------------------------------------------- /spec/support/shared_contexts/mocked_logger_context.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.shared_context "with mocked logger" do 4 | let(:sio) { StringIO.new } 5 | 6 | let(:logger) do 7 | SemanticLogger.sync! 8 | SemanticLogger.default_level = :info 9 | SemanticLogger.add_appender(io: sio, formatter: :color) 10 | SemanticLogger["Miteru"] 11 | end 12 | 13 | let(:logger_output) do 14 | SemanticLogger.flush 15 | sio.rewind 16 | sio.read 17 | end 18 | 19 | before { allow(Miteru).to receive(:logger).and_return(logger) } 20 | 21 | after { SemanticLogger.flush } 22 | end 23 | -------------------------------------------------------------------------------- /spec/website_spec.rb: -------------------------------------------------------------------------------- 1 | # frozen_string_literal: true 2 | 3 | RSpec.describe Miteru::Website do 4 | include_context "with fake HTTP server" 5 | 6 | let!(:url) { "#{server.base_url}/has_kit" } 7 | 8 | subject(:website) { described_class.new(url, source: "dummy") } 9 | 10 | describe "#title" do 11 | it do 12 | expect(website.title).to be_a(String) 13 | end 14 | end 15 | 16 | describe "#kits" do 17 | it do 18 | expect(website.kits.length).to eq(1) 19 | end 20 | end 21 | 22 | describe "#links" do 23 | it do 24 | expect(website.links).to be_an(Array) 25 | end 26 | end 27 | 28 | describe "#kits?" do 29 | it do 30 | expect(website.kits?).to be(true) 31 | end 32 | end 33 | 34 | describe "#info" do 35 | it do 36 | expect(website.info).to be_a(String) 37 | end 38 | end 39 | end 40 | --------------------------------------------------------------------------------