├── .gitignore ├── LICENSE ├── README ├── fetch.go └── loader.go /.gitignore: -------------------------------------------------------------------------------- 1 | .*.swp 2 | imgs 3 | dst.png 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Time to learn the go language. 2 | We crack the recaptcha because it's Christmas. 3 | We want the Merry Christmas for all! 4 | 5 | Sorry I am golang noob and code is low quality and shit 6 | Will eventually use handcoded low quality convnet maybe 7 | 8 | Anyone have clever ideas to get ground truth? 9 | Maybe we unsupervised cluster and then write a paper 10 | Because to date that will be the #1 use of unsupervised learning 11 | 12 | I go on some dates in NYC tomorrow so maybe we finish this today? 13 | --> Probs not dawg it takes 2 hours in golang what takes you 5 minutes in Python 14 | --> Maybe breaking recaptcha will be a fun date activity? Will ask. Very cheap! 15 | 16 | Goal is breaking the demo @ https://www.google.com/recaptcha/api2/demo 17 | 18 | Also I realize clicking street signs is probably training the Google Self Driving Car 19 | Cheaters. 20 | 21 | == Project Updates == 22 | 23 | Christmas afternoon: 24 | * Lots of people in my house. It is loud. The fetcher is working. 25 | 26 | Christmas night: 27 | * Been in battle to get more data. Google loves data and hates sharing. Got blocked. 28 | * Made threads to get fast data. I now have 50842 alleged street sign pictures. 29 | * They are alleged only though. We need to train a binary classifier. 30 | 31 | Morning after christmas: 32 | * Got bagels. I love bagels 33 | 34 | == Project Bullshit == 35 | 36 | * WE ARE ON HACKER NEWS 37 | https://news.ycombinator.com/item?id=13256266 38 | * OMG THIS IS LITERALLY A BIGGER JOKE THAN SHIA LABEOUF 39 | * (jkjkjk Shia LaBeouf is way more famous than I will ever be) 40 | 41 | DyingLlama is my hero, I found his youtube last night and got inspired 42 | https://www.youtube.com/channel/UC88oKpyXNid09t1m_PZlvfQ 43 | 44 | -------------------------------------------------------------------------------- /fetch.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "crypto/md5" 6 | "encoding/hex" 7 | "fmt" 8 | "github.com/disintegration/imaging" 9 | "golang.org/x/net/html" 10 | "image" 11 | "image/jpeg" 12 | "image/png" 13 | "io" 14 | "io/ioutil" 15 | "log" 16 | "math/rand" 17 | "net/http" 18 | "net/url" 19 | "os" 20 | "strings" 21 | "time" 22 | ) 23 | 24 | const apiKey string = "6Le-wvkSAAAAAPBMRTvw0Q4Muexq9bi0DJwx_mJ-" 25 | 26 | func fetchImg(ck string) image.Image { 27 | // fetch the image 28 | u, err := url.Parse("http://google.com/recaptcha/api2/payload") 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | q := u.Query() 33 | q.Set("c", ck) 34 | q.Set("k", apiKey) 35 | u.RawQuery = q.Encode() 36 | 37 | // do fetch 38 | imgresponse, err := http.Get(u.String()) 39 | if err != nil { 40 | log.Fatal(err) 41 | } 42 | 43 | img, err := jpeg.Decode(imgresponse.Body) 44 | if err != nil { 45 | log.Fatal(err) 46 | } 47 | 48 | return img 49 | } 50 | 51 | func getChallengeKey() (string, string, image.Image) { 52 | // build the request 53 | u, err := url.Parse("http://google.com/recaptcha/api/fallback") 54 | if err != nil { 55 | log.Fatal(err) 56 | } 57 | q := u.Query() 58 | q.Set("k", apiKey) 59 | u.RawQuery = q.Encode() 60 | //fmt.Println(u) 61 | 62 | // fetch the webpage 63 | response, err := http.Get(u.String()) 64 | if err != nil { 65 | log.Fatal(err) 66 | } 67 | defer response.Body.Close() 68 | 69 | // print it 70 | bodyBytes, _ := ioutil.ReadAll(response.Body) 71 | 72 | z := html.NewTokenizer(ioutil.NopCloser(bytes.NewBuffer(bodyBytes))) 73 | tmparr := []string{} 74 | ck := "" 75 | for { 76 | tt := z.Next() 77 | switch tt { 78 | case html.ErrorToken: 79 | return ck, tmparr[3], fetchImg(ck) 80 | case html.StartTagToken, html.SelfClosingTagToken: 81 | tn, attr := z.TagName() 82 | if string(tn) == "img" && attr { 83 | for { 84 | k, v, attr := z.TagAttr() 85 | if string(k) == "src" { 86 | //fmt.Println(string(v)) 87 | u, err := url.Parse(string(v)) 88 | if err != nil { 89 | log.Fatal(err) 90 | } 91 | q := u.Query() 92 | //fmt.Println(q) 93 | if q["k"][0] != apiKey { 94 | log.Fatal("apiKey doesn't match") 95 | } 96 | ck = q["c"][0] 97 | } 98 | if !attr { 99 | break 100 | } 101 | } 102 | } 103 | case html.TextToken: 104 | //fmt.Println(z.Token()) 105 | tmparr = append(tmparr, z.Token().String()) 106 | } 107 | } 108 | } 109 | 110 | func downloader() { 111 | bigcnt := 0 112 | for { 113 | // parse it 114 | ck, typ, img := getChallengeKey() 115 | 116 | h := md5.New() 117 | io.WriteString(h, ck) 118 | hh := hex.EncodeToString(h.Sum(nil)) 119 | typ = strings.Replace(typ, " ", "_", -1) 120 | //fmt.Println(ck, typ, img.Bounds()) 121 | fmt.Println(bigcnt, hh, typ, img.Bounds()) 122 | 123 | if img.Bounds() != image.Rect(0, 0, 300, 300) { 124 | log.Fatal("IMAGE IS THE WRONG SIZE") 125 | } 126 | 127 | // write it 128 | os.MkdirAll("imgs/"+typ, 0755) 129 | 130 | cnt := 0 131 | for h := 0; h < 300; h += 100 { 132 | for w := 0; w < 300; w += 100 { 133 | lilimg := imaging.Crop(img, image.Rect(w, h, w+100, h+100)) 134 | 135 | fn := fmt.Sprintf("imgs/%s/%s_%d.png", typ, hh, cnt) 136 | f, err := os.OpenFile(fn, os.O_CREATE|os.O_WRONLY, 0644) 137 | if err != nil { 138 | log.Fatal(err) 139 | } 140 | png.Encode(f, lilimg) 141 | f.Close() 142 | 143 | cnt += 1 144 | } 145 | } 146 | bigcnt += 1 147 | time.Sleep(time.Duration(rand.Intn(2000)) * time.Millisecond) 148 | } 149 | } 150 | 151 | func main() { 152 | fmt.Println("my first golang program") 153 | 154 | /*for i := 0; i < 8; i += 1 { 155 | go downloader() 156 | }*/ 157 | downloader() 158 | 159 | // move on 160 | fmt.Println("still alive!") 161 | } 162 | -------------------------------------------------------------------------------- /loader.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | /* 4 | so like wow there's no neural networks for go, CNN anyone? 5 | idea is this and if you cheat and use python you are a big cheater 6 | give all alleged street sign images 0.4 of street sign 7 | and give all other images 0.01 chance of street sign 8 | and maybe with the magic of neural networks we will learn? 9 | 10 | TODO: don't be cheater and use python only golang pull request accepted 11 | */ 12 | 13 | import ( 14 | "fmt" 15 | "github.com/disintegration/gift" 16 | _ "github.com/disintegration/imaging" 17 | "image" 18 | "image/png" 19 | "log" 20 | "math/rand" 21 | "os" 22 | "path/filepath" 23 | "strings" 24 | //"github.com/NOX73/go-neural" 25 | //"github.com/NOX73/go-neural/learn" 26 | //"github.com/sajari/random-forest/RF" 27 | ) 28 | 29 | func randomArray(n int) []float32 { 30 | ret := make([]float32, n) 31 | for i := 0; i < n; i++ { 32 | ret[i] = (rand.Float32() - 0.5) * 5 33 | } 34 | return ret 35 | } 36 | 37 | func main() { 38 | log.Print("use log so we don't have to put an underscore before the import") 39 | 40 | type Example struct { 41 | features []float32 42 | category string 43 | } 44 | 45 | paths := make(chan string) 46 | processed := make(chan Example) 47 | 48 | // the Seed for the network is 7 49 | rand.Seed(7) 50 | 51 | g := gift.New( 52 | // edge detector 53 | gift.Convolution( 54 | []float32{ 55 | -1, -1, -1, 56 | -1, 8, -1, 57 | -1, -1, -1, 58 | }, 59 | false, false, false, 0.0), 60 | // is this max pool? 61 | gift.Maximum(2, true), 62 | gift.Resize(50, 0, gift.LinearResampling), 63 | 64 | // random 5x5 conv, hmm but like the color channels bro this is a shit neural network 65 | gift.Convolution( 66 | randomArray(25), 67 | false, false, false, 0.0), 68 | // is this max pool? 69 | gift.Maximum(2, true), 70 | gift.Resize(25, 0, gift.LinearResampling), 71 | 72 | // random 3x3 conv, hmm but like the color channels bro this is a shit neural network 73 | gift.Convolution( 74 | randomArray(9), 75 | false, false, false, 0.0), 76 | // is this max pool? 77 | gift.Maximum(2, true), 78 | gift.Resize(10, 0, gift.LinearResampling), 79 | 80 | // 300 features one for each spartan RIP 81 | ) 82 | 83 | //n := neural.NewNetwork(300, []int{100,20,1}) 84 | //n.RandomizeSynapses() 85 | 86 | // forest builder 87 | go func() { 88 | // is this a proper design pattern? 89 | // probs not it's awkward ROS node shit 90 | for { 91 | sample := <-processed 92 | fmt.Println(sample) 93 | 94 | // ugh no inline if? 95 | /*prob := []float64{0.01} 96 | if sample.yes { 97 | prob = []float64{0.4} 98 | } 99 | 100 | learn.Learn(n, sample.features, prob, 0.05) 101 | 102 | println(prob[0], learn.Evaluation(n, sample.features, prob))*/ 103 | } 104 | }() 105 | 106 | // image loader and network runner 107 | go func() { 108 | for { 109 | path := <-paths 110 | 111 | // load the image, this is 5 lines 112 | // i hate all this error handling does go have exceptions? 113 | f, err := os.Open(path) 114 | if err != nil { 115 | log.Fatal(err, path) 116 | } 117 | img, err := png.Decode(f) 118 | if err != nil { 119 | log.Fatal(err, path) 120 | } 121 | f.Close() 122 | 123 | dst := image.NewRGBA(g.Bounds(img.Bounds())) 124 | g.Draw(dst, img) 125 | 126 | // extract features 127 | // i can write much better than this wow shit 128 | ret := make([]float32, 300) 129 | cnt := 0 130 | for i := 0; i < 400; i++ { 131 | if i%4 == 3 { 132 | continue 133 | } 134 | ret[cnt] = float32(dst.Pix[i]) / 256.0 135 | cnt += 1 136 | } 137 | 138 | processed <- Example{features: ret, category: strings.Split(path, "/")[1]} 139 | 140 | //imaging.Save(dst, "dst.png") 141 | //println(dst) 142 | } 143 | }() 144 | 145 | files := []string{} 146 | 147 | filepath.Walk("imgs/", func(path string, finfo os.FileInfo, err error) error { 148 | if finfo.IsDir() { 149 | return nil 150 | } 151 | //paths <- path 152 | files = append(files, path) 153 | return nil 154 | }) 155 | fmt.Println("files list built") 156 | println(len(files)) 157 | 158 | /*perm := rand.Perm(len(files)) 159 | for _, v := range perm { 160 | paths <- files[v] 161 | }*/ 162 | } 163 | --------------------------------------------------------------------------------