├── .gitignore ├── .gitmodules ├── .travis.yml ├── Crawl.go ├── Layout.sql ├── README.md ├── analytics ├── db.go └── main.go ├── cron ├── Crawl.go ├── README.md └── database.go ├── database.go ├── frontend ├── db.go ├── main.go └── stats.go └── list.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Compiled Object files, Static and Dynamic libs (Shared Objects) 2 | *.o 3 | *.a 4 | *.so 5 | 6 | # Folders 7 | _obj 8 | _test 9 | 10 | # Architecture specific extensions/prefixes 11 | *.[568vq] 12 | [568vq].out 13 | 14 | *.cgo1.go 15 | *.cgo2.c 16 | _cgo_defun.c 17 | _cgo_gotypes.go 18 | _cgo_export.* 19 | 20 | _testmain.go 21 | 22 | *.exe 23 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "frontend/public"] 2 | path = frontend/public 3 | url=git://github.com/benjojo/Domainiator_frontend.git -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 4 | - 1.2 5 | - 1.4 6 | - tip 7 | 8 | before_install: 9 | - go get 10 | - mysql -e 'create database Domaniator;' 11 | - cat Layout.sql | mysql 12 | 13 | 14 | script: 15 | - go build 16 | - ./Domainiator -input list.txt 17 | - mysql -e "SELECT * FROM Domaniator.Results" 18 | - cd cron 19 | - go build 20 | - ./cron 21 | -------------------------------------------------------------------------------- /Crawl.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "flag" 7 | "fmt" 8 | "io/ioutil" 9 | "net" 10 | "net/http" 11 | "os" 12 | "runtime" 13 | "strings" 14 | "sync" 15 | "time" 16 | ) 17 | 18 | type LogPayload struct { 19 | Sucessful bool 20 | Headers http.Header 21 | DNSIP string 22 | FailReason string 23 | DomainName string 24 | RequestTime time.Duration 25 | StatusCode int 26 | } 27 | 28 | func worker(linkChan chan string, resultsChan chan LogPayload, wg *sync.WaitGroup) { 29 | // Decreasing internal counter for wait-group as soon as goroutine finishes 30 | defer wg.Done() 31 | http.DefaultTransport.(*http.Transport).ResponseHeaderTimeout = time.Second * 15 32 | http.DefaultTransport.(*http.Transport).DisableKeepAlives = true 33 | 34 | for url := range linkChan { 35 | start := time.Now() 36 | // Construct the HTTP request, I have to go this the rather complex way because I want 37 | // To add a useragent 38 | tld := "" 39 | if *presumecom { 40 | tld = ".com" 41 | } 42 | 43 | formattedurl := fmt.Sprintf("%s://%s%s/%s", *protocal, strings.TrimSpace(url), tld, *pathtoquery) 44 | req, err := http.NewRequest("GET", formattedurl, nil) 45 | 46 | if err != nil { 47 | fmt.Printf("Issue with URL provided, %s makes no sence to net/url\n", formattedurl) 48 | continue 49 | } 50 | 51 | client := &http.Client{} 52 | client.CheckRedirect = 53 | func(req *http.Request, via []*http.Request) error { 54 | 55 | if req.URL.RequestURI() == "/robots.txt" && len(via) < 3 { 56 | // fmt.Printf("PASSED %s%s\n", req.URL.Host, req.URL.RequestURI()) 57 | req.Header.Set("User-Agent", *useragent) 58 | return nil 59 | } 60 | fmt.Printf("FAILED %s%s\n", req.URL.Host, req.URL.RequestURI()) 61 | 62 | e := errors.New("Invalid redirect") 63 | return e 64 | } 65 | req.Header.Set("User-Agent", *useragent) 66 | 67 | // Avoid calling our own loopback, or calling on anything that does not have 68 | // DNS responce. 69 | ip, err := net.LookupIP(fmt.Sprintf("%s%s", strings.TrimSpace(url), tld)) 70 | if err != nil || len(ip) < 1 || strings.HasPrefix("127.", ip[0].String()) || strings.HasPrefix("0.", ip[0].String()) { 71 | continue 72 | } 73 | urlobj, e := client.Do(req) 74 | if e == nil { 75 | elapsed := time.Since(start) 76 | if *saveoutput && urlobj.StatusCode == 200 { 77 | b, e := ioutil.ReadAll(urlobj.Body) 78 | if e == nil { 79 | os.Mkdir(fmt.Sprintf("./%s", string(strings.TrimSpace(url)[0])), 0775) 80 | ioutil.WriteFile(fmt.Sprintf("./%s/%s.%s", string(strings.TrimSpace(url)[0]), strings.TrimSpace(url), *pathtoquery), b, 0664) 81 | } 82 | } else { 83 | urlobj.Body.Close() 84 | } 85 | 86 | Payload := LogPayload{ 87 | DomainName: strings.TrimSpace(url), 88 | Headers: urlobj.Header, 89 | Sucessful: true, 90 | DNSIP: ip[0].String(), 91 | RequestTime: elapsed, 92 | StatusCode: urlobj.StatusCode, 93 | } 94 | resultsChan <- Payload 95 | } else { 96 | 97 | fakeheaders := make(http.Header) 98 | Payload := LogPayload{ 99 | DomainName: strings.TrimSpace(url), 100 | Headers: fakeheaders, 101 | Sucessful: false, 102 | DNSIP: ip[0].String(), 103 | RequestTime: 0, 104 | StatusCode: 0, 105 | } 106 | 107 | resultsChan <- Payload 108 | } 109 | } 110 | 111 | } 112 | 113 | func Logger(resultChan chan LogPayload) { 114 | Database, e := GetDB() 115 | Query, _ := Database.Prepare("INSERT INTO `Results` (`Domain`, `Data`) VALUES (?, ?)") 116 | 117 | if e != nil { 118 | panic("Logger could not connect to the database") 119 | } 120 | 121 | for results := range resultChan { 122 | b, e := json.Marshal(results) 123 | 124 | if e != nil { 125 | fmt.Println("Could not JSON encode packet") 126 | } 127 | _, e = Query.Exec(results.DomainName, string(b)) 128 | 129 | if e != nil { 130 | fmt.Printf("Could not store data for domain %s for reason: %s\n", results.DomainName, e.Error()) 131 | } 132 | 133 | } 134 | } 135 | 136 | var pathtoquery *string 137 | var saveoutput *bool 138 | var presumecom *bool 139 | var databasestring *string 140 | var useragent *string 141 | var protocal *string 142 | 143 | func main() { 144 | runtime.GOMAXPROCS(3) 145 | inputfile := flag.String("input", "", "The file that will be read.") 146 | pathtoquery = flag.String("querypath", "/", "The path that will be queried.") 147 | saveoutput = flag.Bool("savepage", false, "Save the file that is queried to disk") 148 | presumecom = flag.Bool("presumecom", false, "Presume that the file lines need .com adding to them") 149 | concurrencycount := flag.Int("concount", 600, "How many go routines you want to start") 150 | databasestring = flag.String("dbstring", "root:@tcp(127.0.0.1:3306)/Domaniator", "What to connect to the database with") 151 | useragent = flag.String("ua", "Mozilla/5.0 (HTTP Header Survey By Benjojo +https://github.com/benjojo/Domainiator) (Like elinks)", "What UA to send the request with") 152 | protocal = flag.String("protocal", "http", "http or https") 153 | logbuffer := flag.Int("logbuffer", 100, "How many logging entries should be buffered before blocking") 154 | 155 | flag.Parse() 156 | 157 | if *inputfile == "" { 158 | fmt.Println("No input file, put one in with -input") 159 | os.Exit(0) 160 | } 161 | 162 | b, e := ioutil.ReadFile(*inputfile) 163 | if e != nil { 164 | panic(e) 165 | } 166 | File := strings.Split(string(b), "\n") 167 | 168 | lCh := make(chan string) 169 | rCh := make(chan LogPayload, *logbuffer) 170 | wg := new(sync.WaitGroup) 171 | go Logger(rCh) 172 | // Adding routines to workgroup and running then 173 | for i := 0; i < *concurrencycount; i++ { 174 | wg.Add(1) 175 | go worker(lCh, rCh, wg) 176 | } 177 | 178 | for _, link := range File { 179 | lCh <- link 180 | } 181 | // Closing channel (waiting in goroutines won't continue any more) 182 | close(lCh) 183 | wg.Wait() 184 | } 185 | -------------------------------------------------------------------------------- /Layout.sql: -------------------------------------------------------------------------------- 1 | -- -------------------------------------------------------- 2 | -- Host: localhost 3 | -- Server version: 5.5.31-0+wheezy1-log - (Debian) 4 | -- Server OS: debian-linux-gnu 5 | -- HeidiSQL version: 7.0.0.4053 6 | -- Date/time: 2013-11-30 14:25:53 7 | -- -------------------------------------------------------- 8 | 9 | /*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */; 10 | /*!40101 SET NAMES utf8 */; 11 | /*!40014 SET FOREIGN_KEY_CHECKS=0 */; 12 | 13 | -- Dumping database structure for Domaniator 14 | DROP DATABASE IF EXISTS `Domaniator`; 15 | CREATE DATABASE IF NOT EXISTS `Domaniator` /*!40100 DEFAULT CHARACTER SET latin1 */; 16 | USE `Domaniator`; 17 | 18 | 19 | -- Dumping structure for table Domaniator.CachedResults 20 | DROP TABLE IF EXISTS `CachedResults`; 21 | CREATE TABLE IF NOT EXISTS `CachedResults` ( 22 | `Day` int(10) NOT NULL AUTO_INCREMENT, 23 | `RequestCount` bigint(20) DEFAULT '0', 24 | `FailedCount` int(11) DEFAULT '0', 25 | `TopHeaders` text, 26 | `AvgContentSize` float DEFAULT '0', 27 | PRIMARY KEY (`Day`) 28 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 29 | 30 | -- Data exporting was unselected. 31 | 32 | 33 | -- Dumping structure for table Domaniator.Results 34 | DROP TABLE IF EXISTS `Results`; 35 | CREATE TABLE IF NOT EXISTS `Results` ( 36 | `id` bigint(20) NOT NULL AUTO_INCREMENT, 37 | `Timestamp` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP, 38 | `Domain` varchar(65) DEFAULT NULL, 39 | `Data` text, 40 | PRIMARY KEY (`id`), 41 | KEY `Domain` (`Domain`) 42 | ) ENGINE=InnoDB DEFAULT CHARSET=latin1; 43 | 44 | -- Data exporting was unselected. 45 | /*!40014 SET FOREIGN_KEY_CHECKS=1 */; 46 | /*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */; 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Domainiator 2 | =========== 3 | 4 | This is a little service I am developing in my free time to keep a eye on the status headers of all the .com domain names. 5 | 6 | Because I get daily updates from Verisign about the .com domain file I request them and store the headers they send back. 7 | -------------------------------------------------------------------------------- /analytics/db.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | _ "github.com/go-sql-driver/mysql" 7 | ) 8 | 9 | func GetDB() (con *sql.DB, e error) { 10 | con, err := sql.Open("mysql", "root:@tcp(127.0.0.1:3306)/Domaniator") 11 | con.Exec("SET NAMES UTF8") 12 | if err != nil { 13 | fmt.Println("[DB] An error happened in the setup of a SQL connection") 14 | } 15 | con.Ping() 16 | return con, err 17 | } 18 | -------------------------------------------------------------------------------- /analytics/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | func main() { 9 | fmt.Println("DB analyitcs started -- Connecting to Database") 10 | database, e := GetDB() 11 | if e != nil { 12 | fmt.Errorf("Oh dear, I was unable to connect to the database for this reason %s\n I will exit now.", e.Error()) 13 | return 14 | } 15 | fmt.Println("DB connected. Getting the top of the ID stack") 16 | 17 | var rowcount int 18 | database.QueryRow("SELECT `id` FROM `Results` ORDER BY `id` DESC LIMIT 1").Scan(&rowcount) 19 | fmt.Printf("There are %d rows, I will start scanning though them 10,000 at a time\n", rowcount) 20 | // Now I need to test how long this is going to take by doing the most legit way of testing this... 21 | prestart := time.Now() 22 | 23 | // Do the complex query mid way though the result set. 24 | // This will give us a rough idea how long it will take... 25 | database.QueryRow("SELECT SUM(LENGTH(`Data`)) FROM `Results` WHERE id > (94605236/2) AND id < (94605236/2) + 1001") 26 | 27 | timetaken := time.Since(prestart) 28 | 29 | fmt.Printf("Highly optomistic estimation is %f mins or %f Hours\n", timetaken.Minutes()*float64(rowcount/1000), timetaken.Hours()*float64(rowcount/1000)) 30 | 31 | } 32 | -------------------------------------------------------------------------------- /cron/Crawl.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "time" 6 | ) 7 | 8 | func main() { 9 | fmt.Println("Attemping to connect to DB") 10 | database, _ := GetDB() 11 | // Okay so we are gonna grab a few really basic stats here. 12 | start := time.Now() 13 | // So the first bit we are going to get is the total done today. 14 | // SELECT COUNT(*) FROM Results WHERE `Timestamp` > timestampadd(hour, -24, now()) 15 | var Total int 16 | 17 | database.QueryRow("SELECT COUNT(*) FROM Results WHERE `Data` != 'f'").Scan(&Total) 18 | // This one takes approx 10 mins :eek: 19 | 20 | var TotalFailed int 21 | database.QueryRow("SELECT COUNT(*) FROM Results WHERE `Data` = 'f'").Scan(&TotalFailed) 22 | 23 | // TODO: make it process the headers and get the avg content length. 24 | 25 | database.Exec("INSERT INTO `Domaniator`.`CachedResults` (`RequestCount`, `FailedCount`) VALUES (?, ?);", Total, TotalFailed) 26 | 27 | database.Close() 28 | elapsed := time.Since(start) 29 | fmt.Println("Done in %s", elapsed) 30 | } 31 | -------------------------------------------------------------------------------- /cron/README.md: -------------------------------------------------------------------------------- 1 | Cronner 2 | =========== 3 | 4 | While the crawlers are working away, This tool needs to be ran every 24 hours 5 | to insert the data rows for the web interface to use. -------------------------------------------------------------------------------- /cron/database.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | _ "github.com/go-sql-driver/mysql" 7 | ) 8 | 9 | func GetDB() (con *sql.DB, e error) { 10 | con, err := sql.Open("mysql", "root:@tcp(127.0.0.1:3306)/Domaniator") 11 | con.Exec("SET NAMES UTF8") 12 | if err != nil { 13 | fmt.Println("[DB] An error happened in the setup of a SQL connection") 14 | } 15 | con.Ping() 16 | return con, err 17 | } 18 | -------------------------------------------------------------------------------- /database.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | _ "github.com/go-sql-driver/mysql" 7 | ) 8 | 9 | func GetDB() (con *sql.DB, e error) { 10 | con, err := sql.Open("mysql", *databasestring) 11 | if err != nil { 12 | fmt.Println("[DB] An error happened in the setup of a SQL connection") 13 | return con, err 14 | } 15 | con.Ping() 16 | _, err = con.Exec("SET NAMES UTF8") 17 | return con, err 18 | } 19 | -------------------------------------------------------------------------------- /frontend/db.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "database/sql" 5 | "fmt" 6 | _ "github.com/go-sql-driver/mysql" 7 | ) 8 | 9 | func GetDB() (con *sql.DB, e error) { 10 | con, err := sql.Open("mysql", "dom:demo@tcp(127.0.0.1:3306)/Domaniator") 11 | con.Exec("SET NAMES UTF8") 12 | if err != nil { 13 | fmt.Println("[DB] An error happened in the setup of a SQL connection") 14 | } 15 | con.Ping() 16 | return con, err 17 | } 18 | -------------------------------------------------------------------------------- /frontend/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "github.com/codegangsta/martini" 6 | "github.com/pmylund/go-cache" 7 | "net/http" 8 | "strings" 9 | "time" 10 | ) 11 | 12 | func main() { 13 | // Test if the database works. 14 | fmt.Println("Dominiator FrontEnd Server. Attempting DB connection") 15 | Database, e := GetDB() 16 | if e != nil { 17 | panic(e) 18 | } 19 | // Make a cache that all the general stats will be put in. 20 | cacheobj := cache.New(60*time.Minute, 1*time.Minute) 21 | 22 | fmt.Println("DB connection possible") 23 | Database.Exec("SHOW TABLES") 24 | // Okay so now we have a database connection. 25 | m := martini.Classic() 26 | m.Map(cacheobj) // ensure that the cache obj is delivered to each request 27 | m.Get("/api/search/:q", SearchForDomains) 28 | m.Get("/api/stats/", GetOverviewStats) 29 | m.Run() 30 | } 31 | 32 | func API2JSON(res http.ResponseWriter, req *http.Request) { 33 | if strings.HasPrefix(req.RequestURI, "/api") { // This causes anything with a /api prefix to have the content type of json. 34 | res.Header().Set("Content-Type", "application/json") 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /frontend/stats.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | // "fmt" 6 | "github.com/codegangsta/martini" 7 | "github.com/pmylund/go-cache" 8 | "net/http" 9 | // "strings" 10 | ) 11 | 12 | func SearchForDomains(res http.ResponseWriter, req *http.Request, cache *cache.Cache, prams martini.Params) string { 13 | database, _ := GetDB() 14 | defer database.Close() 15 | if prams["q"] == "" { 16 | http.Error(res, "No search query", http.StatusBadRequest) 17 | return "" 18 | } 19 | rows, _ := database.Query("SELECT Domain FROM `Domaniator`.`Results` WHERE Domain LIKE ? AND `Data` != 'f' LIMIT 10", prams["q"]+"%") 20 | resultsArray := make([]string, 0) 21 | defer rows.Close() // Ensure we don't leak connectctions 22 | for rows.Next() { 23 | var databack string 24 | err := rows.Scan(&databack) 25 | if err != nil { 26 | http.Error(res, "Error reading from database", 500) 27 | } 28 | resultsArray = append(resultsArray, databack) 29 | } 30 | b, _ := json.Marshal(resultsArray) 31 | return string(b) 32 | } 33 | 34 | type StatsResponce struct { 35 | RequestCount int64 36 | FailedCount int64 37 | TopHeaders string 38 | AvgContentSize int64 39 | } 40 | 41 | func GetOverviewStats(res http.ResponseWriter, req *http.Request, cache *cache.Cache, prams martini.Params) string { 42 | database, _ := GetDB() 43 | defer database.Close() 44 | 45 | var RequestCount int64 46 | var FailedCount int64 47 | var TopHeaders string 48 | var AvgContentSize int64 49 | row := database.QueryRow("SELECT `RequestCount`, `FailedCount`, `TopHeaders`, `AvgContentSize` FROM `Domaniator`.`CachedResults` ORDER BY `Day` DESC LIMIT 1") 50 | row.Scan(&RequestCount, &FailedCount, &TopHeaders, &AvgContentSize) 51 | 52 | Result := StatsResponce{ 53 | RequestCount: RequestCount, 54 | FailedCount: FailedCount, 55 | TopHeaders: TopHeaders, 56 | AvgContentSize: AvgContentSize, 57 | } 58 | b, _ := json.Marshal(Result) 59 | return string(b) 60 | } 61 | -------------------------------------------------------------------------------- /list.txt: -------------------------------------------------------------------------------- 1 | benjojo 2 | google 3 | github 4 | --------------------------------------------------------------------------------