├── webapp
├── views
│ ├── index.erb
│ ├── console
│ │ └── index.erb
│ ├── layout.erb
│ ├── buckets
│ │ ├── index.erb
│ │ └── show.erb
│ ├── objects
│ │ └── index.erb
│ └── starred
│ │ └── index.erb
└── public
│ └── javascripts
│ └── main.js
├── config.yml.example
├── db
├── models
│ ├── bucket.rb
│ └── bucket_object.rb
└── migrations
│ ├── 003_add_starred_at_to_buckets.rb
│ ├── 004_add_starred_at_to_bucket_objects.rb
│ ├── 001_create_buckets.rb
│ └── 002_create_bucket_objects.rb
├── Gemfile
├── bootstrap.rb
├── LICENSE.txt
├── .gitignore
├── bin
├── bucket_crawler
├── webapp
└── bucket_finder
└── README.md
/webapp/views/index.erb:
--------------------------------------------------------------------------------
1 |
Index
2 |
--------------------------------------------------------------------------------
/config.yml.example:
--------------------------------------------------------------------------------
1 | ---
2 | db_host: localhost
3 | db_port: 5432
4 | db: bucketlist
5 | db_username: bucketlist
6 | db_password: bucketlist
7 |
--------------------------------------------------------------------------------
/db/models/bucket.rb:
--------------------------------------------------------------------------------
1 | class Bucket < Sequel::Model
2 | one_to_many :bucket_objects
3 |
4 | def starred?
5 | !self.starred_at.nil?
6 | end
7 | end
8 |
--------------------------------------------------------------------------------
/db/models/bucket_object.rb:
--------------------------------------------------------------------------------
1 | class BucketObject < Sequel::Model
2 | many_to_one :bucket
3 |
4 | def starred?
5 | !self.starred_at.nil?
6 | end
7 | end
8 |
--------------------------------------------------------------------------------
/Gemfile:
--------------------------------------------------------------------------------
1 | source "https://rubygems.org"
2 |
3 | gem "ox"
4 | gem "httparty"
5 | gem "thread"
6 | gem "sequel"
7 | gem "pg"
8 | gem "sinatra"
9 | gem "thin"
10 | gem "colorize"
11 |
--------------------------------------------------------------------------------
/db/migrations/003_add_starred_at_to_buckets.rb:
--------------------------------------------------------------------------------
1 | Sequel.migration do
2 | up do
3 | alter_table(:buckets) do
4 | add_column :starred_at, DateTime
5 | add_index :starred_at
6 | end
7 | end
8 |
9 | down do
10 | alter_table(:buckets) do
11 | drop_column :starred_at
12 | end
13 | end
14 | end
15 |
--------------------------------------------------------------------------------
/db/migrations/004_add_starred_at_to_bucket_objects.rb:
--------------------------------------------------------------------------------
1 | Sequel.migration do
2 | up do
3 | alter_table(:bucket_objects) do
4 | add_column :starred_at, DateTime
5 | add_index :starred_at
6 | end
7 | end
8 |
9 | down do
10 | alter_table(:bucket_objects) do
11 | drop_column :starred_at
12 | end
13 | end
14 | end
15 |
--------------------------------------------------------------------------------
/db/migrations/001_create_buckets.rb:
--------------------------------------------------------------------------------
1 | Sequel.migration do
2 | change do
3 | create_table(:buckets) do
4 | primary_key :id
5 | String :name, :index => true, :unique => true
6 | String :url
7 | Boolean :exists, :index => true
8 | Boolean :public, :index => true
9 | DateTime :crawled_at, :index => true
10 | DateTime :updated_at
11 | DateTime :created_at, :index => true
12 | end
13 | end
14 | end
15 |
--------------------------------------------------------------------------------
/db/migrations/002_create_bucket_objects.rb:
--------------------------------------------------------------------------------
1 | Sequel.migration do
2 | change do
3 | create_table(:bucket_objects) do
4 | primary_key :id
5 | foreign_key :bucket_id, :buckets, :on_delete => :cascade, :index => true
6 | String :key, :index => true
7 | String :url
8 | String :etag, :index => true
9 | Integer :size, :index => true
10 | Boolean :public, :index => true
11 | String :storage_class, :index => true
12 | DateTime :last_modified_at, :index => true
13 | DateTime :updated_at
14 | DateTime :created_at, :index => true
15 | end
16 | end
17 | end
18 |
--------------------------------------------------------------------------------
/bootstrap.rb:
--------------------------------------------------------------------------------
1 | require "rubygems"
2 | require "sequel"
3 | require "pg"
4 | require "httparty"
5 | require "thread/pool"
6 | require "ox"
7 | require "cgi"
8 | require "logger"
9 | require "timeout"
10 | require "sinatra"
11 | require "colorize"
12 | require "yaml"
13 |
14 | CONFIG_FILE_PATH = File.join(File.dirname(__FILE__), "config.yml").freeze
15 |
16 | if !File.exists?(CONFIG_FILE_PATH)
17 | puts "Error: Configuration file has not been created!\n".red
18 | puts "Copy " + "config.yml.example".bold + " to " + "config.yml".bold + " and change configuration to work with your setup."
19 | exit 1
20 | end
21 |
22 | CONFIG = YAML.load_file(CONFIG_FILE_PATH)
23 |
24 | DB = Sequel.connect("postgres://#{CONFIG['db_username']}:#{CONFIG['db_password']}@#{CONFIG['db_host']}:#{CONFIG['db_port']}/#{CONFIG['db']}", :max_connections => 25)
25 | Sequel.extension :migration
26 | Sequel::Model.db.extension(:pagination)
27 | Sequel::Model.plugin :timestamps
28 | Sequel::Migrator.run(DB, File.join(File.dirname(__FILE__), "db", "migrations"), :use_transactions => true)
29 |
30 | require_relative "db/models/bucket"
31 | require_relative "db/models/bucket_object"
32 |
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 | The MIT License (MIT)
2 |
3 | Copyright (c) 2017 Michael Henriksen
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in
13 | all copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 | THE SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.gem
2 | *.rbc
3 | /.config
4 | /coverage/
5 | /InstalledFiles
6 | /pkg/
7 | /spec/reports/
8 | /spec/examples.txt
9 | /test/tmp/
10 | /test/version_tmp/
11 | /tmp/
12 |
13 | # Used by dotenv library to load environment variables.
14 | # .env
15 |
16 | ## Specific to RubyMotion:
17 | .dat*
18 | .repl_history
19 | build/
20 | *.bridgesupport
21 | build-iPhoneOS/
22 | build-iPhoneSimulator/
23 |
24 | ## Specific to RubyMotion (use of CocoaPods):
25 | #
26 | # We recommend against adding the Pods directory to your .gitignore. However
27 | # you should judge for yourself, the pros and cons are mentioned at:
28 | # https://guides.cocoapods.org/using/using-cocoapods.html#should-i-check-the-pods-directory-into-source-control
29 | #
30 | # vendor/Pods/
31 |
32 | ## Documentation cache and generated files:
33 | /.yardoc/
34 | /_yardoc/
35 | /doc/
36 | /rdoc/
37 |
38 | ## Environment normalization:
39 | /.bundle/
40 | /vendor/bundle
41 | /lib/bundler/man/
42 |
43 | # for a library or gem, you might want to ignore these files since the code is
44 | # intended to run in multiple environments; otherwise, check them in:
45 | Gemfile.lock
46 | .ruby-version
47 | .ruby-gemset
48 |
49 | # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50 | .rvmrc
51 |
52 | config.yml
53 | /wordlists/
54 |
--------------------------------------------------------------------------------
/webapp/views/console/index.erb:
--------------------------------------------------------------------------------
1 |
4 |
5 |
13 |
14 | <% if !@query %>
15 | Enter an SQL query to execute...
16 | <% elsif @error %>
17 | <%=h @error %>
18 | <% elsif @query && @rows && @rows.count.zero? %>
19 | Nothing to show
20 | <% else %>
21 |
22 |
23 |
24 | <% @columns.each do |column| %>
25 | | <%=h column.to_s %> |
26 | <% end %>
27 |
28 |
29 |
30 | <% @rows.each do |row| %>
31 |
32 | <% row.values.each do |value| %>
33 | <% if url?(value) %>
34 | | <%=h value %> |
35 | <% else %>
36 | <%=h value %> |
37 | <% end %>
38 | <% end %>
39 |
40 | <% end %>
41 |
42 |
43 | <% end %>
44 |
--------------------------------------------------------------------------------
/webapp/public/javascripts/main.js:
--------------------------------------------------------------------------------
1 | $(document).on("ready", function() {
2 | $("a.star-bucket").on("click", function(e) {
3 | e.preventDefault();
4 | $(this).addClass("disabled").attr("disabled", "disabled");
5 | var that = this
6 | if ($(this).hasClass("active")) {
7 | $.post("/buckets/" + $(this).attr("data-bucket-id") + "/unstar", function(data) {
8 | $(that).removeClass("disabled");
9 | $(that).removeAttr("disabled");
10 | $(that).removeClass("btn-warning active");
11 | $(that).addClass("btn-default");
12 | $(that).html('');
13 | });
14 | } else {
15 | $.post("/buckets/" + $(this).attr("data-bucket-id") + "/star", function(data) {
16 | $(that).removeClass("disabled");
17 | $(that).removeAttr("disabled");
18 | $(that).addClass("btn-warning active");
19 | $(that).html('');
20 | });
21 | }
22 | return false;
23 | });
24 |
25 | $("a.star-object").on("click", function(e) {
26 | e.preventDefault();
27 | $(this).addClass("disabled").attr("disabled", "disabled");
28 | var that = this
29 | if ($(this).hasClass("active")) {
30 | $.post("/objects/" + $(this).attr("data-object-id") + "/unstar", function(data) {
31 | $(that).removeClass("disabled");
32 | $(that).removeAttr("disabled");
33 | $(that).removeClass("btn-warning active");
34 | $(that).addClass("btn-default");
35 | $(that).html('');
36 | });
37 | } else {
38 | $.post("/objects/" + $(this).attr("data-object-id") + "/star", function(data) {
39 | $(that).removeClass("disabled");
40 | $(that).removeAttr("disabled");
41 | $(that).addClass("btn-warning active");
42 | $(that).html('');
43 | });
44 | }
45 | return false;
46 | });
47 | })
48 |
--------------------------------------------------------------------------------
/bin/bucket_crawler:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require_relative "../bootstrap"
3 |
4 | THREADS = 10
5 | MUTEX = Mutex.new
6 |
7 | module Util
8 | HUMAN_PREFIXES = %w(TB GB MB KB B).freeze
9 |
10 | def self.number_to_human_size(number)
11 | s = number.to_f
12 | i = HUMAN_PREFIXES.length - 1
13 | while s > 512 && i > 0
14 | i -= 1
15 | s /= 1024
16 | end
17 | ((s > 9 || s.modulo(1) < 0.1 ? "%d" : "%.1f") % s) + "#{HUMAN_PREFIXES[i]}"
18 | end
19 | end
20 |
21 | def with_mutex
22 | MUTEX.synchronize { yield }
23 | end
24 |
25 | buckets = Bucket.where(:public => true, :crawled_at => nil)
26 |
27 | if buckets.count.zero?
28 | puts "All public buckets have been crawled. Time to find more with bucket_finder."
29 | exit
30 | end
31 |
32 | buckets.each do |bucket|
33 | puts "├── #{bucket.url.bold}"
34 | begin
35 | response = HTTParty.get(bucket.url, :verify => false, :timeout => 30)
36 | body = Ox.parse(response.body)
37 | contents = body.root.locate("Contents")
38 | thread_pool = Thread.pool(THREADS)
39 | contents.each do |content|
40 | thread_pool.process do
41 | begin
42 | key = content.Key.text
43 | object = BucketObject.new(
44 | :bucket_id => bucket.id,
45 | :key => key,
46 | :url => "#{bucket.url}#{CGI.escape(key).gsub('%2F', '/')}",
47 | :etag => content.ETag.text.gsub('"', ""),
48 | :size => content.Size.text.to_i,
49 | :storage_class => content.StorageClass.text,
50 | :last_modified_at => Time.parse(content.LastModified.text)
51 | )
52 | if !object.key.end_with?("/")
53 | content_response = HTTParty.head(object.url, :verify => false, :timeout => 30)
54 | if content_response.code == 200
55 | object.public = true
56 | with_mutex { puts "│ ├── " + "PUBLIC: #{object.url.bold} (#{Util.number_to_human_size(object.size)})".green }
57 | else
58 | object.public = false
59 | with_mutex { puts "│ ├── " + "PRIVATE: #{object.url.bold} (#{Util.number_to_human_size(object.size)})".yellow }
60 | end
61 | object.save
62 | end
63 | rescue => e
64 | with_mutex { puts "│ ├── " + " ERROR: Key: #{key}: #{e.class}: #{e.message}".red }
65 | end
66 | end
67 | end
68 | thread_pool.shutdown
69 | bucket.crawled_at = Time.now
70 | bucket.save
71 | rescue => e
72 | puts "│ ├── " + " ERROR: #{key}: #{e.class}: #{e.message}".red
73 | end
74 | end
75 |
--------------------------------------------------------------------------------
/webapp/views/layout.erb:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 | Bucketlist Browser
8 |
9 |
13 |
14 |
15 |
43 |
44 |
45 | <%= yield %>
46 |
47 |
48 |
49 |
51 |
52 |
--------------------------------------------------------------------------------
/webapp/views/buckets/index.erb:
--------------------------------------------------------------------------------
1 |
4 |
5 |
10 |
11 | <% if @buckets.count.zero? %>
12 | Nothing to show
13 | <% else %>
14 |
15 |
16 |
17 | | Name |
18 | URL |
19 | Crawled At |
20 | Discovered At |
21 | |
22 |
23 |
24 |
25 | <% @buckets.each do |bucket| %>
26 |
27 | | <%=h bucket.name %> |
28 | <%=h bucket.url %> |
29 | <%=h bucket.crawled_at ? bucket.crawled_at.strftime("%d/%m/%Y %H:%M") : "Never" %> |
30 | <%=h bucket.created_at.strftime("%d/%m/%Y %H:%M") %> |
31 | <% if bucket.starred? %>
32 | |
33 | <% else %>
34 | |
35 | <% end %>
36 |
37 | <% end %>
38 |
39 |
40 |
41 |
61 | <% end %>
62 |
--------------------------------------------------------------------------------
/webapp/views/objects/index.erb:
--------------------------------------------------------------------------------
1 |
8 |
9 |
14 |
15 | <% if @objects.count.zero? %>
16 | Nothing to show
17 | <% else %>
18 |
19 |
20 |
21 | | Key |
22 | Bucket |
23 | Size |
24 | Access |
25 | Last Modified At |
26 | |
27 |
28 |
29 |
30 | <% @objects.all.each do |object| %>
31 |
32 | | <%= format_path(object.key) %> |
33 | <%=h object.bucket.name %> |
34 | <%= number_to_human_size(object.size) %> |
35 | <%=h object.public ? "Public" : "Private" %> |
36 | <%=h object.last_modified_at.strftime("%d/%m/%Y %H:%M") %> |
37 | <% if object.starred? %>
38 | |
39 | <% else %>
40 | |
41 | <% end %>
42 |
43 | <% end %>
44 |
45 |
46 |
47 |
67 | <% end %>
68 |
--------------------------------------------------------------------------------
/webapp/views/buckets/show.erb:
--------------------------------------------------------------------------------
1 |
10 |
11 |
16 |
17 | <% if @objects.count.zero? %>
18 | Nothing to show
19 | <% else %>
20 |
21 |
22 |
23 | | Key |
24 | Size |
25 | Access |
26 | Last Modified At |
27 | |
28 |
29 |
30 |
31 | <% @objects.each do |object| %>
32 |
33 | | <%= format_path(object.key) %> |
34 | <%= number_to_human_size(object.size) %> |
35 | <%=h object.public ? "Public" : "Private" %> |
36 | <%=h object.last_modified_at.strftime("%d/%m/%Y %H:%M") %> |
37 | <% if object.starred? %>
38 | |
39 | <% else %>
40 | |
41 | <% end %>
42 |
43 | <% end %>
44 |
45 |
46 |
47 |
67 | <% end %>
68 |
--------------------------------------------------------------------------------
/webapp/views/starred/index.erb:
--------------------------------------------------------------------------------
1 |
4 |
5 | <% if @buckets.count.zero? %>
6 | You haven't starred any Buckets yet.
7 | <% else %>
8 |
9 |
10 |
11 | | Name |
12 | URL |
13 | Crawled At |
14 | Discovered At |
15 | |
16 |
17 |
18 |
19 | <% @buckets.each do |bucket| %>
20 |
21 | | <%=h bucket.name %> |
22 | <%=h bucket.url %> |
23 | <%=h bucket.crawled_at ? bucket.crawled_at.strftime("%d/%m/%Y %H:%M") : "Never" %> |
24 | <%=h bucket.created_at.strftime("%d/%m/%Y %H:%M") %> |
25 | <% if bucket.starred? %>
26 | |
27 | <% else %>
28 | |
29 | <% end %>
30 |
31 | <% end %>
32 |
33 |
34 | <% end %>
35 |
36 |
39 |
40 | <% if @objects.count.zero? %>
41 | You haven't starred any Objects yet.
42 | <% else %>
43 |
44 |
45 |
46 | | Key |
47 | Bucket |
48 | Size |
49 | Access |
50 | Last Modified At |
51 | |
52 |
53 |
54 |
55 | <% @objects.all.each do |object| %>
56 |
57 | | <%= format_path(object.key) %> |
58 | <%=h object.bucket.name %> |
59 | <%= number_to_human_size(object.size) %> |
60 | <%=h object.public ? "Public" : "Private" %> |
61 | <%=h object.last_modified_at.strftime("%d/%m/%Y %H:%M") %> |
62 | <% if object.starred? %>
63 | |
64 | <% else %>
65 | |
66 | <% end %>
67 |
68 | <% end %>
69 |
70 |
71 | <% end %>
72 |
--------------------------------------------------------------------------------
/bin/webapp:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | require_relative "../bootstrap"
4 | DB.loggers << Logger.new($stdout)
5 |
6 | class WebApp < Sinatra::Base
7 | set :server, :thin
8 | set :environment, :production
9 | set :logging, false
10 | set :sessions, true
11 | set :app_file, __FILE__
12 | set :root, File.expand_path(File.dirname(__FILE__))
13 | set :public_folder, proc { File.join(root, "..", "webapp", "public") }
14 | set :views, proc { File.join(root, "..", "webapp", "views") }
15 |
16 | helpers do
17 | HUMAN_PREFIXES = %w(TB GB MB KB B).freeze
18 |
19 | alias_method :h, :escape_html
20 |
21 | def number_to_human_size(number)
22 | s = number.to_f
23 | i = HUMAN_PREFIXES.length - 1
24 | while s > 512 && i > 0
25 | i -= 1
26 | s /= 1024
27 | end
28 | ((s > 9 || s.modulo(1) < 0.1 ? "%d" : "%.1f") % s) + "#{HUMAN_PREFIXES[i]}"
29 | end
30 |
31 | def format_path(path)
32 | dirname = File.dirname(path)
33 | basename = File.basename(path)
34 | if dirname == "."
35 | "#{h basename}"
36 | else
37 | "#{h ellipsisize(dirname, 60, 25)}/#{h basename}"
38 | end
39 | end
40 |
41 | def ellipsisize(string, minimum_length=4, edge_length=3)
42 | return string if string.length < minimum_length || string.length <= edge_length * 2
43 | edge = "." * edge_length
44 | mid_length = string.length - edge_length * 2
45 | string.gsub(/(#{edge}).{#{mid_length},}(#{edge})/, '\1...\2')
46 | end
47 |
48 | def url?(string)
49 | uri = URI.parse(string)
50 | %w(http https).include?(uri.scheme)
51 | rescue URI::BadURIError, URI::InvalidURIError
52 | false
53 | end
54 | end
55 |
56 | before do
57 | response.headers["X-Content-Type-Options"] = "nosniff"
58 | response.headers["X-XSS-Protection"] = "1; mode=block"
59 | response.headers["X-Frame-Options"] = "deny"
60 | end
61 |
62 | get "/" do
63 | redirect("/buckets")
64 | end
65 |
66 | get "/buckets" do
67 | if params[:query] && params[:query] != ""
68 | @query = params[:query]
69 | @buckets = Bucket.where("LOWER(name) LIKE ?", "%#{params[:query].downcase}%").where("crawled_at IS NOT NULL").order(:name).paginate((params[:page] || 1).to_i, 100)
70 | else
71 | @buckets = Bucket.order(:name).where("crawled_at IS NOT NULL").paginate((params[:page] || 1).to_i, 100)
72 | end
73 | erb :"buckets/index"
74 | end
75 |
76 | get "/buckets/:name" do
77 | @bucket = Bucket.where("name = ?", params[:name]).where("crawled_at IS NOT NULL").first
78 | halt(404) unless @bucket
79 | if params[:query] && params[:query] != ""
80 | @query = params[:query]
81 | @objects = @bucket.bucket_objects_dataset.where("LOWER(key) LIKE ?", "%#{params[:query].downcase}%").order(:key).paginate((params[:page] || 1).to_i, 250)
82 | else
83 | @objects = @bucket.bucket_objects_dataset.order(:key).paginate((params[:page] || 1).to_i, 250)
84 | end
85 | erb :"buckets/show"
86 | end
87 |
88 | post "/buckets/:id/star" do
89 | @bucket = Bucket.where(:id => params[:id]).first
90 | halt(404) unless @bucket
91 | @bucket.starred_at = Time.now
92 | @bucket.save
93 | halt 200
94 | end
95 |
96 | post "/buckets/:id/unstar" do
97 | @bucket = Bucket.where(:id => params[:id]).first
98 | halt(404) unless @bucket
99 | @bucket.starred_at = nil
100 | @bucket.save
101 | halt 200
102 | end
103 |
104 | get "/objects" do
105 | if params[:query] && params[:query] != ""
106 | @query = params[:query]
107 | @objects = BucketObject.where("LOWER(key) LIKE ? ", "%#{params[:query].downcase}%").order(:key).paginate((params[:page] || 1).to_i, 250).eager(:bucket)
108 | else
109 | @objects = BucketObject.order(Sequel.desc(:created_at), :key).paginate((params[:page] || 1).to_i, 250).eager(:bucket)
110 | end
111 | erb :"objects/index"
112 | end
113 |
114 | post "/objects/:id/star" do
115 | @object = BucketObject.where(:id => params[:id]).first
116 | halt(404) unless @object
117 | @object.starred_at = Time.now
118 | @object.save
119 | halt 200
120 | end
121 |
122 | post "/objects/:id/unstar" do
123 | @object = BucketObject.where(:id => params[:id]).first
124 | halt(404) unless @object
125 | @object.starred_at = nil
126 | @object.save
127 | halt 200
128 | end
129 |
130 | get "/console" do
131 | erb :"console/index"
132 | end
133 |
134 | post "/console" do
135 | if params[:query] && params[:query] != ""
136 | begin
137 | @query = params[:query]
138 | @rows = DB[params[:query]]
139 | @columns = @rows.columns.map(&:to_s)
140 | rescue Sequel::DatabaseError => e
141 | @error = e.message
142 | end
143 | end
144 | erb :"console/index"
145 | end
146 |
147 | get "/starred" do
148 | @buckets = Bucket.where("starred_at IS NOT NULL").order(Sequel.asc(:created_at))
149 | @objects = BucketObject.where("starred_at IS NOT NULL").order(Sequel.asc(:created_at)).eager(:bucket)
150 | erb :"starred/index"
151 | end
152 |
153 | get "/random_bucket" do
154 | bucket = Bucket.where("public IS TRUE").where("crawled_at IS NOT NULL").order(Sequel.lit("RANDOM()")).first
155 | redirect("/buckets/#{h(bucket.name)}")
156 | end
157 | end
158 |
159 | WebApp.run!(:port => 3000)
160 |
--------------------------------------------------------------------------------
/bin/bucket_finder:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 | require_relative "../bootstrap"
3 |
4 | THREADS = 10
5 | MUTEX = Mutex.new
6 |
7 | class Wordlist
8 | PERMUTATION_PATTERNS = %w(%s.%s %s-%s %s%s).freeze
9 | PERMUTATION_WORDS = %w(backup backups dev development prod production stage staging test testing).freeze
10 |
11 | def initialize(filepath)
12 | if !File.readable?(filepath)
13 | raise "Wordlist #{filepath} does not exist or is not readable"
14 | end
15 | @filepath = filepath
16 | end
17 |
18 | def run
19 | File.read(@filepath).each_line do |word|
20 | word.strip!
21 | next if word.empty?
22 | yield word
23 | PERMUTATION_PATTERNS.each do |permutation_pattern|
24 | PERMUTATION_WORDS.each do |permutation_word|
25 | yield format(permutation_pattern, word, permutation_word)
26 | yield format(permutation_pattern, permutation_word, word)
27 | end
28 | end
29 | end
30 | end
31 | end
32 |
33 | class S3Bucket
34 | REGION = "s3.amazonaws.com".freeze
35 |
36 | USER_AGENTS = [
37 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
38 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
39 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
40 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/603.2.4 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.4",
41 | "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
42 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
43 | "Mozilla/5.0 (Windows NT 6.1; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
44 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
45 | "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
46 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
47 | "Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
48 | "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0",
49 | "Mozilla/5.0 (Windows NT 6.1; WOW64; Trident/7.0; rv:11.0) like Gecko",
50 | "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
51 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.12; rv:54.0) Gecko/20100101 Firefox/54.0",
52 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36",
53 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:54.0) Gecko/20100101 Firefox/54.0",
54 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36 Edge/15.15063",
55 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36",
56 | "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.79 Safari/537.36 Edge/14.14393",
57 | "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_6) AppleWebKit/603.2.5 (KHTML, like Gecko) Version/10.1.1 Safari/603.2.5",
58 | "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/56.0.2924.87 Safari/537.36",
59 | "Mozilla/5.0 (Windows NT 6.3; WOW64; rv:54.0) Gecko/20100101 Firefox/54.0",
60 | "Mozilla/5.0 (Windows NT 10.0; WOW64; Trident/7.0; rv:11.0) like Gecko"
61 | ].freeze
62 |
63 | attr_reader :name
64 |
65 | def initialize(name)
66 | @name = name
67 | end
68 |
69 | def exists?
70 | response.code != 404
71 | end
72 |
73 | def public?
74 | response.code == 200
75 | end
76 |
77 | def url
78 | @url
79 | end
80 |
81 | private
82 |
83 | def response
84 | @response ||= request_bucket
85 | end
86 |
87 | def request_bucket
88 | @url = "https://#{REGION}/#{escape(name)}/"
89 | response = HTTParty.head(@url, :headers => { "User-Agent" => USER_AGENTS.sample }, :verify => false, :timeout => 30)
90 | if response.code == 301
91 | @url = "https://#{name}.#{REGION}/"
92 | response = HTTParty.head(@url, :headers => { "User-Agent" => USER_AGENTS.sample }, :verify => false, :timeout => 30)
93 | end
94 | response
95 | end
96 |
97 | def escape(string)
98 | CGI.escape(string)
99 | end
100 | end
101 |
102 | def with_mutex
103 | MUTEX.synchronize { yield }
104 | end
105 |
106 | if ARGV.empty?
107 | puts "USAGE: #{0} WORDLIST"
108 | exit 1
109 | end
110 |
111 | wordlist = Wordlist.new(ARGV.first)
112 | thread_pool = Thread.pool(THREADS)
113 |
114 | wordlist.run do |bucket_name|
115 | thread_pool.process do
116 | begin
117 | if Bucket.where(:name => bucket_name).limit(1).count.zero?
118 | bucket = S3Bucket.new(bucket_name)
119 | if bucket.exists?
120 | if bucket.public?
121 | with_mutex { puts " + PUBLIC: #{bucket.url.bold}".green }
122 | else
123 | with_mutex { puts " - PRIVATE: #{bucket.url.bold}".yellow }
124 | end
125 | end
126 | Bucket.new(
127 | :name => bucket.name,
128 | :url => bucket.url,
129 | :exists => bucket.exists?,
130 | :public => bucket.public?,
131 | ).save
132 | end
133 | rescue => e
134 | with_mutex { puts " ! ERROR: Bucket: #{bucket_name.bold}: #{e.class}: #{e.message}".red }
135 | end
136 | end
137 | end
138 | thread_pool.shutdown
139 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Bucketlist
2 |
3 | Bucketlist is a quick project I threw together to find and crawl [Amazon S3] buckets and put all the data into a PostgreSQL database for querying.
4 |
5 | ## Requirements
6 |
7 | Bucketlist requires a recent version of Ruby and the PostgreSQL database system installed.
8 |
9 | ## Setup
10 |
11 | * Check out the code to a location of your choice and navigate to it in a terminal
12 | * Install Bundler (unless you already have it) with: `gem install bundler`
13 | * Install gem dependencies with `bundle install`
14 | * Create a new PostgreSQL user with: `createuser -s bucketlist --pwprompt` (you might need to `sudo su postgres` first)
15 | * Create a new PostgreSQL database with: `createdb -O bucketlist bucketlist` (you might need to `sudo su postgres` first)
16 | * Copy the example configuration file with: `cp config.yml.example config.yml`
17 | * Edit the settings in `config.yml` to match your setup
18 | * ???
19 | * Profit!
20 |
21 | ## Finding Buckets
22 |
23 | Bucketlist finds buckets using a dictionary brute force, a bit like subdomain bruteforcing, so you will need a dictionary of words. The [SecLists] project on GitHub has a good collection of wordlists.
24 |
25 | When you have a wordlist, simply run the `bucket_finder` script in a terminal:
26 |
27 | $ bin/bucket_finder path/to/wordlist.lst
28 | - PRIVATE: https://s3.amazonaws.com/somebucket.backups/
29 | - PRIVATE: https://s3.amazonaws.com/somebucket.backup/
30 | - PRIVATE: https://s3.amazonaws.com/backups.somebucket/
31 | - PRIVATE: https://s3.amazonaws.com/backup.somebucket/
32 | + PUBLIC: https://somebucket.dev.s3.amazonaws.com/
33 | - PRIVATE: https://s3.amazonaws.com/production.somebucket/
34 | ...
35 |
36 | The script will find buckets and store information about them in the database. The script can be stopped at any time. If you run it again with the same wordlist, it will proceed where it left off.
37 |
38 | ### Bucket name permutations
39 |
40 | To maximize discovery, bucket_finder will perform simple permutations on each word in the given wordlist. As an example, if the wordlist contains the word `example`, bucket_finder will check for the existance of any of following buckets:
41 |
42 | ```
43 | example
44 | example.backup
45 | backup.example
46 | example.backups
47 | backups.example
48 | example.dev
49 | dev.example
50 | example.development
51 | development.example
52 | example.prod
53 | prod.example
54 | example.production
55 | production.example
56 | example.stage
57 | stage.example
58 | example.staging
59 | staging.example
60 | example.test
61 | test.example
62 | example.testing
63 | testing.example
64 | example-backup
65 | backup-example
66 | example-backups
67 | backups-example
68 | example-dev
69 | dev-example
70 | example-development
71 | development-example
72 | example-prod
73 | prod-example
74 | example-production
75 | production-example
76 | example-stage
77 | stage-example
78 | example-staging
79 | staging-example
80 | example-test
81 | test-example
82 | example-testing
83 | testing-example
84 | examplebackup
85 | backupexample
86 | examplebackups
87 | backupsexample
88 | exampledev
89 | devexample
90 | exampledevelopment
91 | developmentexample
92 | exampleprod
93 | prodexample
94 | exampleproduction
95 | productionexample
96 | examplestage
97 | stageexample
98 | examplestaging
99 | stagingexample
100 | exampletest
101 | testexample
102 | exampletesting
103 | testingexample
104 | ```
105 |
106 | ## Crawling Buckets
107 |
108 | When buckets have been discovered with `bucket_finder`, the `bucket_crawler` script can be used to crawl the contents of the public buckets and save information about the files to the database:
109 |
110 | $ bin/bucket_crawler
111 | ├── https://somebucket.dev.s3.amazonaws.com/
112 | │ ├── PRIVATE: https://somebucket.dev.s3.amazonaws.com/logs/2014-10-11-21-44-41-0DE7B75AC6F56AB6 (276B)
113 | │ ├── PRIVATE: https://somebucket.dev.s3.amazonaws.com/logs/2014-10-11-22-17-33-0EF1F7575568BC41 (374B)
114 | │ ├── PRIVATE: https://somebucket.dev.s3.amazonaws.com/logs/2014-10-11-21-30-12-9517510CD37C9D98 (320B)
115 | ...
116 | │ ├── PRIVATE: https://somebucket.dev.s3.amazonaws.com/logs/2014-11-07-09-34-44-A23E12B5C822DEB0 (375B)
117 | │ ├── PRIVATE: https://somebucket.dev.s3.amazonaws.com/logs/2014-11-07-10-51-12-4DB562D370986482 (374B)
118 | │ ├── PRIVATE: https://somebucket.dev.s3.amazonaws.com/logs/2014-11-07-11-17-56-A58FF2F17296FB3E (375B)
119 | ├── https://s3.amazonaws.com/someotherbucket/
120 | │ ├── PUBLIC: https://s3.amazonaws.com/someotherbucket/3-DuisUtRisusCursus.mp4 (9MB)
121 | │ ├── PUBLIC: https://s3.amazonaws.com/someotherbucket/crossdomain.xml (198B)
122 | │ ├── PUBLIC: https://s3.amazonaws.com/someotherbucket/6-AeneanLobortisRutrumLoremEuFermentum.mp4 (19MB)
123 | ...
124 |
125 | The bucket_crawler script will find any public bucket in the database that hasn't been crawled yet, and can be run at any time.
126 |
127 | ## Browsing the Loot
128 |
129 | All the data collected by `bucket_finder` and `bucket_crawler` is stored in a simple database schema and can of course be queried in all kinds of interesting ways with SQL, but Bucketlist also includes a simple web application for browsing the information in a convenient way. You can start the web application with:
130 |
131 | $ bin/webapp
132 | == Sinatra (v2.0.0) has taken the stage on 3000 for production with backup from Thin
133 | Thin web server (v1.7.2 codename Bachmanity)
134 | Maximum connections set to 1024
135 | Listening on 0.0.0.0:3000, CTRL+C to stop
136 |
137 | Now you can browse to [http://localhost:3000/](http://localhost:3000/) and go Bucket spelunking!
138 |
139 | ## DISCLAIMER
140 |
141 | This code is meant for security professionals. I take **no** responsibility and assume no liability for the manner in which this code is used by you.
142 |
143 | [Amazon S3]: https://aws.amazon.com/s3/
144 | [SecLists]: https://github.com/danielmiessler/SecLists
145 |
--------------------------------------------------------------------------------