├── .gitignore ├── LICENSE ├── README.md ├── cmd └── mildew │ └── main.go ├── go.mod ├── go.sum └── mildew ├── crt.go ├── crt_test.go ├── dir.go ├── dir_test.go ├── mildew.go ├── out.go ├── parse.go └── parse_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/* 2 | TODO.md 3 | tmp/* 4 | 5 | # Binaries for programs and plugins 6 | *.exe 7 | *.exe~ 8 | *.dll 9 | *.so 10 | *.dylib 11 | 12 | # Test binary, built with `go test -c` 13 | *.test 14 | 15 | # Output of the go coverage tool, specifically when used with LiteIDE 16 | *.out 17 | 18 | # Dependency directories (remove the comment below to include it) 19 | # vendor/ 20 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Daehee Park 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # mildew 2 | 3 | Seed your [DoD VDP](https://hackerone.com/deptofdefense) recon with the latest list of official "dotmil" domains. mildew crawls all the DoD-maintained website directories to scrape unique `.mil` domains. Then it extracts [certificate transparency logs](https://www.certificate-transparency.org/what-is-ct) for each discovered root domain for deeper public domain discovery. 4 | 5 | Based on the work of [dotmil-domains](https://github.com/esonderegger/dotmil-domains/) a research project by [esonderegger](https://twitter.com/esonderegger) mapping out the DoD's public-facing domain listings: 6 | > There currently isn't a publicly available directory of all the domain names registered under the US military's .mil top-level domain. Such a directory would be useful for people looking to get an aggregate view of military websites and how they are hosted. 7 | 8 | ## Install 9 | ``` 10 | go get -u github.com/daehee/mildew/cmd/mildew 11 | ``` 12 | 13 | ## Usage 14 | ``` 15 | mildew 16 | ``` 17 | 18 | ## Data Sources 19 | The official DoD website directories: 20 | * [U.S. Department of Defense](https://www.defense.gov/Resources/Military-Departments/DOD-Websites/) 21 | * [Air Force](http://www.af.mil/AFSites.aspx) 22 | * [Army](http://www.army.mil/info/a-z/) 23 | * [Navy](https://www.navy.mil/Resources/Navy-Directory/) 24 | 25 | Certificate transparency logs: 26 | * [Crt.sh](https://crt.sh) 27 | 28 | ## Report Vulnerabilities 29 | Read the DoD Vulnerability Disclosure Policy and submit a vulnerability report at [HackerOne](https://hackerone.com/deptofdefense). 30 | -------------------------------------------------------------------------------- /cmd/mildew/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "context" 5 | "log" 6 | 7 | "github.com/daehee/mildew/mildew" 8 | ) 9 | 10 | // mildew scrapes domains from official DoD website directories 11 | // and certificate transparency logs 12 | // 13 | // 1: Scrape DoD directories for subdomains 14 | // 2: Request additional certificate transparency subdomains using root domains from 1 15 | // 3: Output to stdout and file 16 | func main() { 17 | var err error 18 | ctx, cancel := context.WithCancel(context.Background()) 19 | defer cancel() 20 | mw := mildew.NewMildew() 21 | 22 | log.Printf("scraping DoD web directories") 23 | err = mw.ScrapeDirs(ctx) 24 | if err != nil { 25 | log.Fatal(err) 26 | } 27 | 28 | log.Printf("scraping certificate transparency data") 29 | err = mw.ScrapeCrts(ctx) 30 | if err != nil { 31 | log.Fatal(err) 32 | } 33 | 34 | // mw.OutputScreen() 35 | mw.OutputFile("mildew.out") 36 | } 37 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/daehee/mildew 2 | 3 | go 1.15 4 | 5 | require ( 6 | github.com/antchfx/xmlquery v1.3.0 // indirect 7 | github.com/caffix/stringset v0.0.0-20201218054545-37e95a70826c 8 | github.com/gocolly/colly/v2 v2.1.1-0.20201130153714-389278068768 9 | github.com/stretchr/testify v1.3.0 10 | golang.org/x/net v0.0.0-20200822124328-c89045814202 // indirect 11 | golang.org/x/text v0.3.3 // indirect 12 | google.golang.org/protobuf v1.25.0 // indirect 13 | ) 14 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw= 2 | github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= 3 | github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE= 4 | github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc= 5 | github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo= 6 | github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y= 7 | github.com/andybalholm/cascadia v1.2.0 h1:vuRCkM5Ozh/BfmsaTm26kbjm0mIOM3yS5Ek/F5h18aE= 8 | github.com/andybalholm/cascadia v1.2.0/go.mod h1:YCyR8vOZT9aZ1CHEd8ap0gMVm2aFgxBp0T0eFw1RUQY= 9 | github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M= 10 | github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0= 11 | github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM= 12 | github.com/antchfx/xmlquery v1.3.0 h1:YvWny6c+VzYrTBMw9aopGqO3BfTUW6MHRAnHW2kYoQ0= 13 | github.com/antchfx/xmlquery v1.3.0/go.mod h1:64w0Xesg2sTaawIdNqMB+7qaW/bSqkQm+ssPaCMWNnc= 14 | github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 15 | github.com/antchfx/xpath v1.1.8/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 16 | github.com/antchfx/xpath v1.1.10 h1:cJ0pOvEdN/WvYXxvRrzQH9x5QWKpzHacYO8qzCcDYAg= 17 | github.com/antchfx/xpath v1.1.10/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk= 18 | github.com/caffix/stringset v0.0.0-20201218054545-37e95a70826c h1:gW2jpj1YEl72H4x7BL2K1ZNkGJUVu3HZ0Hp9xU83cKE= 19 | github.com/caffix/stringset v0.0.0-20201218054545-37e95a70826c/go.mod h1:28GU9FTlJHzfjrFJ5Ep7vmXNkSSM3JF0miNt7ZM9V5w= 20 | github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= 21 | github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= 22 | github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8= 23 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 24 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 25 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 26 | github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= 27 | github.com/envoyproxy/protoc-gen-validate v0.1.0/go.mod h1:iSmxcyjqTsJpI2R4NaDN7+kN2VEUnK/pcBlmesArF7c= 28 | github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y= 29 | github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8= 30 | github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI= 31 | github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA= 32 | github.com/gocolly/colly/v2 v2.1.1-0.20201130153714-389278068768 h1:wDTvgmsveVI97zzCSeS2Yt3jf5x2jIqRaFLWFOXcmeM= 33 | github.com/gocolly/colly/v2 v2.1.1-0.20201130153714-389278068768/go.mod h1:I2MuhsLjQ+Ex+IzK3afNS8/1qP3AedHOusRPcRdC5o0= 34 | github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= 35 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY= 36 | github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= 37 | github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= 38 | github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 39 | github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg= 40 | github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 41 | github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= 42 | github.com/golang/protobuf v1.4.0-rc.1/go.mod h1:ceaxUfeHdC40wWswd/P6IGgMaK3YpKi5j83Wpe3EHw8= 43 | github.com/golang/protobuf v1.4.0-rc.1.0.20200221234624-67d41d38c208/go.mod h1:xKAWHe0F5eneWXFV3EuXVDTCmh+JuBKY0li0aMyXATA= 44 | github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrUpVNzEA03Pprs= 45 | github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= 46 | github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= 47 | github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= 48 | github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= 49 | github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= 50 | github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= 51 | github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 52 | github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= 53 | github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 54 | github.com/google/go-cmp v0.5.0 h1:/QaMHBdZ26BB3SSst0Iwl10Epc+xhTquomWX0oZEB6w= 55 | github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 56 | github.com/jawher/mow.cli v1.1.0/go.mod h1:aNaQlc7ozF3vw6IJ2dHjp2ZFiA4ozMIYY6PyuRJwlUg= 57 | github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= 58 | github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= 59 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 60 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 61 | github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= 62 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI= 63 | github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= 64 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 65 | github.com/stretchr/objx v0.2.0/go.mod h1:qt09Ya8vawLte6SNmTgCsAVtYtaKzEcn8ATUoHMkEqE= 66 | github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= 67 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 68 | github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA= 69 | github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo= 70 | golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= 71 | golang.org/x/crypto v0.0.0-20190605123033-f99c8df09eb5/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= 72 | golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= 73 | golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= 74 | golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= 75 | golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= 76 | golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= 77 | golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 78 | golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 79 | golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 80 | golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= 81 | golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 82 | golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= 83 | golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks= 84 | golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= 85 | golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 86 | golang.org/x/net v0.0.0-20200602114024-627f9648deb9/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= 87 | golang.org/x/net v0.0.0-20200813134508-3edf25e44fcc/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= 88 | golang.org/x/net v0.0.0-20200822124328-c89045814202 h1:VvcQYSHwXgi7W+TpUR6A9g6Up98WAHf3f/ulnJ62IyA= 89 | golang.org/x/net v0.0.0-20200822124328-c89045814202/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA= 90 | golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= 91 | golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 92 | golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 93 | golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= 94 | golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 95 | golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= 96 | golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 97 | golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= 98 | golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= 99 | golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs= 100 | golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= 101 | golang.org/x/text v0.3.3 h1:cokOdA+Jmi5PJGXLlLllQSgYigAEfHXJAERHVMaCc2k= 102 | golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= 103 | golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 104 | golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= 105 | golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= 106 | golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= 107 | golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= 108 | golang.org/x/tools v0.0.0-20190606124116-d0a3d012864b/go.mod h1:/rFqwRUd4F7ZHNgwSSTFct+R/Kf4OFW1sUzUTQQTgfc= 109 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 110 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 111 | google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= 112 | google.golang.org/appengine v1.4.0/go.mod h1:xpcJRLb0r/rnEns0DIKYYv+WjYCduHsrkT7/EB5XEv4= 113 | google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc= 114 | google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc= 115 | google.golang.org/genproto v0.0.0-20180817151627-c66870c02cf8/go.mod h1:JiN7NxoALGmiZfu7CAH4rXhgtRTLTxftemlI0sWmxmc= 116 | google.golang.org/genproto v0.0.0-20190819201941-24fa4b261c55/go.mod h1:DMBHOl98Agz4BDEuKkezgsaosCRResVns1a3J2ZsMNc= 117 | google.golang.org/genproto v0.0.0-20200526211855-cb27e3aa2013/go.mod h1:NbSheEEYHJ7i3ixzK3sjbqSGDJWnxyFXZblF3eUsNvo= 118 | google.golang.org/grpc v1.19.0/go.mod h1:mqu4LbDTu4XGKhr4mRzUsmM4RtVoemTSY81AxZiDr8c= 119 | google.golang.org/grpc v1.23.0/go.mod h1:Y5yQAOtifL1yxbo5wqy6BxZv8vAUGQwXBOALyacEbxg= 120 | google.golang.org/grpc v1.27.0/go.mod h1:qbnxyOmOxrQa7FizSgH+ReBfzJrCY1pSN7KXBS8abTk= 121 | google.golang.org/protobuf v0.0.0-20200109180630-ec00e32a8dfd/go.mod h1:DFci5gLYBciE7Vtevhsrf46CRTquxDuWsQurQQe4oz8= 122 | google.golang.org/protobuf v0.0.0-20200221191635-4d8936d0db64/go.mod h1:kwYJMbMJ01Woi6D6+Kah6886xMZcty6N08ah7+eCXa0= 123 | google.golang.org/protobuf v0.0.0-20200228230310-ab0ca4ff8a60/go.mod h1:cfTl7dwQJ+fmap5saPgwCLgHXTUD7jkjRqWcaiX5VyM= 124 | google.golang.org/protobuf v1.20.1-0.20200309200217-e05f789c0967/go.mod h1:A+miEFZTKqfCUM6K7xSMQL9OKL/b6hQv+e19PK+JZNE= 125 | google.golang.org/protobuf v1.21.0/go.mod h1:47Nbq4nVaFHyn7ilMalzfO3qCViNmqZ2kzikPIcrTAo= 126 | google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 127 | google.golang.org/protobuf v1.23.0 h1:4MY060fB1DLGMB/7MBTLnwQUY6+F09GEiz6SsrNqyzM= 128 | google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 129 | google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= 130 | google.golang.org/protobuf v1.24.0/go.mod h1:r/3tXBNzIEhYS9I1OUVjXDlt8tc493IdKGjtUeSXeh4= 131 | google.golang.org/protobuf v1.25.0 h1:Ejskq+SyPohKW+1uil0JJMtmHCgJPJ/qWTxr8qp+R4c= 132 | google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= 133 | honnef.co/go/tools v0.0.0-20190102054323-c2f93a96b099/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 134 | honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= 135 | -------------------------------------------------------------------------------- /mildew/crt.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "log" 8 | "strings" 9 | "time" 10 | 11 | "github.com/gocolly/colly/v2" 12 | "github.com/gocolly/colly/v2/queue" 13 | ) 14 | 15 | func (mw *Mildew) ScrapeCrts(ctx context.Context) error { 16 | c := colly.NewCollector(colly.StdlibContext(ctx)) 17 | 18 | // rate limit colly and set delay 19 | c.Limit(&colly.LimitRule{ 20 | DomainGlob: "*crt.*", 21 | Delay: 5 * time.Second, 22 | }) 23 | 24 | // create a request queue with 2 consumer threads 25 | q, _ := queue.New( 26 | 2, // number of consumer threads 27 | &queue.InMemoryQueueStorage{MaxSize: 10000}, // use default queue storage 28 | ) 29 | 30 | c.OnRequest(func(r *colly.Request) { 31 | log.Printf("%s", r.URL) 32 | }) 33 | 34 | c.OnResponse(func(r *colly.Response) { 35 | // Parse subdomains from JSON data 36 | jsonData := string(r.Body) 37 | var data []struct { 38 | CaId int `json:"issuer_ca_id"` 39 | NameValue string `json:"name_value"` 40 | } 41 | err := json.Unmarshal([]byte(jsonData), &data) 42 | if err != nil { 43 | return 44 | } 45 | 46 | for _, d := range data { 47 | // account for name_values data containing newlines 48 | split := strings.Split(d.NameValue, "\n") 49 | for _, s := range split { 50 | match := dotmilRx.FindStringSubmatch(s) 51 | s = match[0] 52 | sub := cleanDomain(s) 53 | if mw.Subs.Has(sub) { 54 | continue 55 | } 56 | mw.Subs.Insert(sub) 57 | } 58 | } 59 | 60 | }) 61 | 62 | // receive roots as crt scraper rootStream 63 | for _, root := range mw.GetRoots() { 64 | // queue colly request to download JSON format for the root from crt.sh 65 | err := q.AddURL(fmt.Sprintf("https://crt.sh/?dNSName=%%25.%s&output=json", root)) 66 | if err != nil { 67 | return fmt.Errorf("error queueing crt scrape for %s: %v", root, err) 68 | } 69 | } 70 | // Execute colly queue 71 | err := q.Run(c) 72 | if err != nil { 73 | return fmt.Errorf("error executing crt scrape: %v", err) 74 | } 75 | 76 | // go func() { 77 | // // Wait until threads are finished; may be redundant with Queue Run, which 78 | // // blocks while queue has active requests 79 | // c.Wait() 80 | // // Done sending crt scraping results, close channel 81 | // done <- struct{}{} 82 | // }() 83 | 84 | return nil 85 | } 86 | -------------------------------------------------------------------------------- /mildew/crt_test.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestMildew_ScrapeCrts(t *testing.T) { 12 | testSubs := []string{ 13 | // "www.public.navy.mil", 14 | // "www.benning.army.mil", 15 | // "www.192wg.ang.af.mil", 16 | // "www.airforcemedicine.af.mil", 17 | // "www.af.mil", 18 | "www.dau.mil", 19 | } 20 | 21 | ctx, cancel := context.WithCancel(context.Background()) 22 | defer cancel() 23 | mw := NewMildew() 24 | for _, v := range testSubs { 25 | mw.Subs.Insert(v) 26 | } 27 | err := mw.ScrapeCrts(ctx) 28 | assert.NoError(t, err) 29 | fmt.Printf("%v\n", mw.Subs.Slice()) 30 | } 31 | -------------------------------------------------------------------------------- /mildew/dir.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "strings" 8 | 9 | "github.com/gocolly/colly/v2" 10 | ) 11 | 12 | // ScrapeDirs scrapes all DoD website directories and saves to Mildew object's Subs field 13 | func (mw *Mildew) ScrapeDirs(ctx context.Context) error { 14 | dirStream := make(chan string) 15 | // Launch goroutine for scraping directories 16 | go func() { 17 | defer close(dirStream) 18 | // Initialize base colly collector to be used by each directory scraper function 19 | // TODO tune colly options 20 | c := colly.NewCollector() 21 | var err error 22 | 23 | err = dirDod(c, dirStream) 24 | if err != nil { 25 | fmt.Println(err) 26 | return 27 | } 28 | 29 | err = dirAf(c, dirStream) 30 | if err != nil { 31 | fmt.Println(err) 32 | return 33 | } 34 | 35 | err = dirArmy(c, dirStream) 36 | if err != nil { 37 | fmt.Println(err) 38 | return 39 | } 40 | 41 | err = dirNavy(c, dirStream) 42 | if err != nil { 43 | fmt.Println(err) 44 | return 45 | } 46 | }() 47 | 48 | // Process incoming URLs from web directories 49 | for { 50 | select { 51 | case <-ctx.Done(): 52 | return fmt.Errorf("scrape cancelled") 53 | case v, ok := <-dirStream: 54 | // nil value signals closed channels, so return from function 55 | if ok == false { 56 | return nil 57 | } 58 | sub := urlToSub(v) 59 | if !isDotmil(sub) { 60 | continue 61 | } 62 | if mw.Subs.Has(sub) { 63 | continue 64 | } 65 | mw.Subs.Insert(sub) 66 | } 67 | } 68 | } 69 | 70 | // dirDod scrapes defense.gov directory 71 | func dirDod(c *colly.Collector, dirStream chan<- string) error { 72 | cc := c.Clone() 73 | cc.OnHTML("div.DGOVLinkBox > div", func(e *colly.HTMLElement) { 74 | dirStream <- e.ChildAttr("a[href]", "href") 75 | }) 76 | err := cc.Visit("https://www.defense.gov/Resources/Military-Departments/DOD-Websites/") 77 | if err != nil { 78 | return fmt.Errorf("error scraping defense.gov: %v", err) 79 | } 80 | return nil 81 | } 82 | 83 | // dirDod scrapes af.mil directory 84 | func dirAf(c *colly.Collector, dirStream chan<- string) error { 85 | cc := c.Clone() 86 | cc.OnHTML("a.AFSiteLink, a.AFSiteBaseLink", func(e *colly.HTMLElement) { 87 | dirStream <- e.Attr("href") 88 | }) 89 | // These links are duplicates of each other, go straight to srBaseList 90 | // cc.OnHTML("a.AFAlphaLink", func(e *colly.HTMLElement) { 91 | // e.Request.Visit(e.Attr("href")) 92 | // }) 93 | err := cc.Visit("http://www.af.mil/AFSites.aspx") 94 | if err != nil { 95 | return fmt.Errorf("error scraping af.mil: %v", err) 96 | } 97 | err = cc.Visit("https://www.af.mil/AF-Sites/srBaseList/A/#A") 98 | if err != nil { 99 | return fmt.Errorf("error scraping af.mil: %v", err) 100 | } 101 | return err 102 | } 103 | 104 | // dirArmy scrapes army.mil directory 105 | func dirArmy(c *colly.Collector, dirStream chan<- string) error { 106 | cc := c.Clone() 107 | cc.OnHTML("div.links-list a", func(e *colly.HTMLElement) { 108 | dirStream <- e.Attr("href") 109 | }) 110 | err := cc.Visit("http://www.army.mil/info/a-z/") 111 | if err != nil { 112 | return fmt.Errorf("error scraping army.mil: %v", err) 113 | } 114 | return nil 115 | } 116 | 117 | // dirNavy scrapes website URLs from Navy's VueJS SPA, 118 | // requires some JSON parsing 119 | func dirNavy(c *colly.Collector, dirStream chan<- string) error { 120 | cc := c.Clone() 121 | cc.OnHTML("#dnn_ctr752_ModuleContent > script:nth-of-type(2)", func(e *colly.HTMLElement) { 122 | // find JSON string feeding VueJS website directory 123 | jsonData := e.Text[strings.Index(e.Text, "[{") : strings.Index(e.Text, "}]}]")+4] 124 | 125 | // parse JSON 126 | var data []struct { 127 | SiteUrl string `json:"url"` 128 | } 129 | err := json.Unmarshal([]byte(jsonData), &data) 130 | if err != nil { 131 | return 132 | } 133 | 134 | for _, d := range data { 135 | // discard empty site URL fields, since some Navy units only list their social media profiles 136 | if len(d.SiteUrl) > 0 { 137 | dirStream <- d.SiteUrl 138 | } 139 | } 140 | }) 141 | err := cc.Visit("https://www.navy.mil/Resources/Navy-Directory/") 142 | if err != nil { 143 | return fmt.Errorf("error scraping navy.mil: %v", err) 144 | } 145 | return nil 146 | } 147 | -------------------------------------------------------------------------------- /mildew/dir_test.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "testing" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestMildew_ScrapeDirs(t *testing.T) { 12 | want := []string{ 13 | "www.public.navy.mil", 14 | "www.benning.army.mil", 15 | "www.192wg.ang.af.mil", 16 | "www.airforcemedicine.af.mil", 17 | "www.af.mil", 18 | "www.dau.mil", 19 | } 20 | notWant := []string{ 21 | "www.facebook.com", 22 | "www.linkedin.com", 23 | "nationalguard.com", 24 | "www.armymwr.com", 25 | "diversity.defense.gov", 26 | "www.afneurope.net", 27 | } 28 | 29 | ctx, cancel := context.WithCancel(context.Background()) 30 | defer cancel() 31 | mw := NewMildew() 32 | err := mw.ScrapeDirs(ctx) 33 | assert.NoError(t, err) 34 | res := mw.Subs.Slice() 35 | fmt.Printf("unique subdomains: %d\n", mw.Subs.Len()) 36 | for _, v := range want { 37 | assert.Contains(t, res, v) 38 | } 39 | for _, v := range notWant { 40 | assert.NotContains(t, res, v) 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /mildew/mildew.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "github.com/caffix/stringset" 5 | ) 6 | 7 | type Mildew struct { 8 | Subs stringset.Set 9 | } 10 | 11 | func NewMildew() *Mildew { 12 | return &Mildew{ 13 | Subs: stringset.New(), 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /mildew/out.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "bufio" 5 | "fmt" 6 | "os" 7 | ) 8 | 9 | func (mw *Mildew) OutputFile(filename string) error { 10 | f, err := os.Create(filename) 11 | if err != nil { 12 | return fmt.Errorf("error creating file: %v", err) 13 | } 14 | 15 | w := bufio.NewWriter(f) 16 | for _, v := range mw.Subs.Slice() { 17 | w.WriteString(fmt.Sprintf("%s\n", v)) 18 | } 19 | w.Flush() 20 | 21 | return nil 22 | } 23 | 24 | func (mw *Mildew) OutputScreen() { 25 | f := os.Stdout 26 | w := bufio.NewWriter(f) 27 | for _, v := range mw.Subs.Slice() { 28 | w.WriteString(fmt.Sprintf("%s\n", v)) 29 | } 30 | w.Flush() 31 | } 32 | -------------------------------------------------------------------------------- /mildew/parse.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "net/url" 5 | "regexp" 6 | "strings" 7 | ) 8 | 9 | // GetRoots returns all unique root domains from Subs field 10 | func (mw *Mildew) GetRoots() (roots []string) { 11 | seenRoot := make(map[string]bool) 12 | for _, v := range mw.Subs.Slice() { 13 | root := extractRoot(v) 14 | if _, ok := seenRoot[root]; ok { 15 | continue 16 | } 17 | seenRoot[root] = true 18 | roots = append(roots, root) 19 | } 20 | return roots 21 | } 22 | 23 | // regex extract dotmil domain only, case-insensitive 24 | var dotmilRx = regexp.MustCompile(`(?i)((?:([a-z0-9]\.|[a-z0-9][a-z0-9\-]{0,61}[a-z0-9])\.)+)(mil)\.?`) 25 | 26 | func urlToSub(u string) string { 27 | p, err := url.Parse(u) 28 | if err != nil { 29 | return "" 30 | } 31 | 32 | sub := cleanDomain(p.Hostname()) 33 | return sub 34 | } 35 | 36 | func cleanDomain(d string) string { 37 | d = strings.ToLower(d) 38 | return d 39 | } 40 | 41 | func isDotmil(d string) bool { 42 | return strings.HasSuffix(d, "mil") 43 | } 44 | 45 | func extractRoot(d string) string { 46 | split := strings.Split(d, ".") 47 | split = split[len(split)-2:] 48 | root := strings.Join(split, ".") 49 | return root 50 | } 51 | -------------------------------------------------------------------------------- /mildew/parse_test.go: -------------------------------------------------------------------------------- 1 | package mildew 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMildew_GetRoots(t *testing.T) { 10 | testSubs := []string{ 11 | "www.public.navy.mil", 12 | "www.benning.army.mil", 13 | "www.192wg.ang.af.mil", 14 | "www.airforcemedicine.af.mil", 15 | "www.af.mil", 16 | "www.dau.mil", 17 | } 18 | want := []string{ 19 | "navy.mil", 20 | "army.mil", 21 | "af.mil", 22 | "dau.mil", 23 | } 24 | 25 | mw := NewMildew() 26 | 27 | for _, v := range testSubs { 28 | mw.Subs.Insert(v) 29 | } 30 | 31 | got := mw.GetRoots() 32 | 33 | assert.Len(t, got, len(want)) 34 | 35 | for _, v := range want { 36 | assert.Contains(t, got, v) 37 | } 38 | 39 | } 40 | --------------------------------------------------------------------------------