├── README.md ├── coco_labels.txt ├── frozen_inference_graph.pb ├── main.go └── output.jpg /README.md: -------------------------------------------------------------------------------- 1 | # go-object-detect-from-image 2 | 3 | detect objects from image file 4 | 5 | ![](https://raw.githubusercontent.com/mattn/go-object-detect-from-image/master/output.jpg) 6 | 7 | ## Usage 8 | 9 | ``` 10 | ./go-object-detect-from-image input.jpg 11 | ``` 12 | 13 | ## Requirements 14 | 15 | tensorflow 1.10 16 | 17 | ## Installation 18 | 19 | ``` 20 | $ go get github.com/mattn/go-object-detect-from-image 21 | ``` 22 | 23 | frozen_inference_graph.pb is archived in ssd_mobilenet_v1_coco_11_06_2017.tar.gz 24 | 25 | ## License 26 | 27 | MIT 28 | 29 | ## Author 30 | 31 | Yasuhiro Matsumoto (a.k.a. mattn) 32 | -------------------------------------------------------------------------------- /coco_labels.txt: -------------------------------------------------------------------------------- 1 | 0: unlabeled 2 | 1: person 3 | 2: bicycle 4 | 3: car 5 | 4: motorcycle 6 | 5: airplane 7 | 6: bus 8 | 7: train 9 | 8: truck 10 | 9: boat 11 | 10: traffic light 12 | 11: fire hydrant 13 | 12: street sign 14 | 13: stop sign 15 | 14: parking meter 16 | 15: bench 17 | 16: bird 18 | 17: cat 19 | 18: dog 20 | 19: horse 21 | 20: sheep 22 | 21: cow 23 | 22: elephant 24 | 23: bear 25 | 24: zebra 26 | 25: giraffe 27 | 26: hat 28 | 27: backpack 29 | 28: umbrella 30 | 29: shoe 31 | 30: eye glasses 32 | 31: handbag 33 | 32: tie 34 | 33: suitcase 35 | 34: frisbee 36 | 35: skis 37 | 36: snowboard 38 | 37: sports ball 39 | 38: kite 40 | 39: baseball bat 41 | 40: baseball glove 42 | 41: skateboard 43 | 42: surfboard 44 | 43: tennis racket 45 | 44: bottle 46 | 45: plate 47 | 46: wine glass 48 | 47: cup 49 | 48: fork 50 | 49: knife 51 | 50: spoon 52 | 51: bowl 53 | 52: banana 54 | 53: apple 55 | 54: sandwich 56 | 55: orange 57 | 56: broccoli 58 | 57: carrot 59 | 58: hot dog 60 | 59: pizza 61 | 60: donut 62 | 61: cake 63 | 62: chair 64 | 63: couch 65 | 64: potted plant 66 | 65: bed 67 | 66: mirror 68 | 67: dining table 69 | 68: window 70 | 69: desk 71 | 70: toilet 72 | 71: door 73 | 72: tv 74 | 73: laptop 75 | 74: mouse 76 | 75: remote 77 | 76: keyboard 78 | 77: cell phone 79 | 78: microwave 80 | 79: oven 81 | 80: toaster 82 | 81: sink 83 | 82: refrigerator 84 | 83: blender 85 | 84: book 86 | 85: clock 87 | 86: vase 88 | 87: scissors 89 | 88: teddy bear 90 | 89: hair drier 91 | 90: toothbrush 92 | 91: hair brush 93 | 92: banner 94 | 93: blanket 95 | 94: branch 96 | 95: bridge 97 | 96: building-other 98 | 97: bush 99 | 98: cabinet 100 | 99: cage 101 | 100: cardboard 102 | 101: carpet 103 | 102: ceiling-other 104 | 103: ceiling-tile 105 | 104: cloth 106 | 105: clothes 107 | 106: clouds 108 | 107: counter 109 | 108: cupboard 110 | 109: curtain 111 | 110: desk-stuff 112 | 111: dirt 113 | 112: door-stuff 114 | 113: fence 115 | 114: floor-marble 116 | 115: floor-other 117 | 116: floor-stone 118 | 117: floor-tile 119 | 118: floor-wood 120 | 119: flower 121 | 120: fog 122 | 121: food-other 123 | 122: fruit 124 | 123: furniture-other 125 | 124: grass 126 | 125: gravel 127 | 126: ground-other 128 | 127: hill 129 | 128: house 130 | 129: leaves 131 | 130: light 132 | 131: mat 133 | 132: metal 134 | 133: mirror-stuff 135 | 134: moss 136 | 135: mountain 137 | 136: mud 138 | 137: napkin 139 | 138: net 140 | 139: paper 141 | 140: pavement 142 | 141: pillow 143 | 142: plant-other 144 | 143: plastic 145 | 144: platform 146 | 145: playingfield 147 | 146: railing 148 | 147: railroad 149 | 148: river 150 | 149: road 151 | 150: rock 152 | 151: roof 153 | 152: rug 154 | 153: salad 155 | 154: sand 156 | 155: sea 157 | 156: shelf 158 | 157: sky-other 159 | 158: skyscraper 160 | 159: snow 161 | 160: solid-other 162 | 161: stairs 163 | 162: stone 164 | 163: straw 165 | 164: structural-other 166 | 165: table 167 | 166: tent 168 | 167: textile-other 169 | 168: towel 170 | 169: tree 171 | 170: vegetable 172 | 171: wall-brick 173 | 172: wall-concrete 174 | 173: wall-other 175 | 174: wall-panel 176 | 175: wall-stone 177 | 176: wall-tile 178 | 177: wall-wood 179 | 178: water-other 180 | 179: waterdrops 181 | 180: window-blind 182 | 181: window-other 183 | 182: wood -------------------------------------------------------------------------------- /frozen_inference_graph.pb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattn/go-object-detect-from-image/fa515ca074a14107251a76adc9f6eb466b48b15c/frozen_inference_graph.pb -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "encoding/json" 7 | "flag" 8 | "fmt" 9 | "go/build" 10 | "image" 11 | "image/color" 12 | "image/draw" 13 | "image/jpeg" 14 | "io/ioutil" 15 | "log" 16 | "os" 17 | "path/filepath" 18 | 19 | "golang.org/x/image/bmp" 20 | "golang.org/x/image/colornames" 21 | "golang.org/x/image/font" 22 | "golang.org/x/image/font/basicfont" 23 | "golang.org/x/image/math/fixed" 24 | 25 | tf "github.com/tensorflow/tensorflow/tensorflow/go" 26 | "github.com/tensorflow/tensorflow/tensorflow/go/op" 27 | ) 28 | 29 | type jsonResult struct { 30 | Name string `json:"name"` 31 | Probability float64 `json:"probability"` 32 | } 33 | 34 | func drawString(img *image.RGBA, p image.Point, c color.Color, s string) { 35 | d := &font.Drawer{ 36 | Dst: img, 37 | Src: image.NewUniform(c), 38 | Face: basicfont.Face7x13, 39 | Dot: fixed.Point26_6{fixed.Int26_6(p.X * 64), fixed.Int26_6(p.Y * 64)}, 40 | } 41 | d.DrawString(s) 42 | } 43 | 44 | func drawRect(img *image.RGBA, r image.Rectangle, c color.Color) { 45 | for x := r.Min.X; x <= r.Max.X; x++ { 46 | img.Set(x, r.Min.Y, c) 47 | img.Set(x, r.Max.Y, c) 48 | } 49 | for y := r.Min.Y; y <= r.Max.Y; y++ { 50 | img.Set(r.Min.X, y, c) 51 | img.Set(r.Max.X, y, c) 52 | } 53 | } 54 | 55 | func loadLabels(name string) ([]string, error) { 56 | f, err := os.Open(name) 57 | if err != nil { 58 | return nil, err 59 | } 60 | defer f.Close() 61 | 62 | scanner := bufio.NewScanner(f) 63 | var labels []string 64 | for scanner.Scan() { 65 | labels = append(labels, scanner.Text()) 66 | } 67 | if err := scanner.Err(); err != nil { 68 | return nil, err 69 | } 70 | return labels, nil 71 | } 72 | 73 | func decodeBitmapGraph() (*tf.Graph, tf.Output, tf.Output, error) { 74 | s := op.NewScope() 75 | input := op.Placeholder(s, tf.String) 76 | output := op.ExpandDims( 77 | s, 78 | op.DecodeBmp(s, input, op.DecodeBmpChannels(3)), 79 | op.Const(s.SubScope("make_batch"), int32(0))) 80 | graph, err := s.Finalize() 81 | return graph, input, output, err 82 | } 83 | 84 | func makeTensorFromImage(img []byte) (*tf.Tensor, image.Image, error) { 85 | tensor, err := tf.NewTensor(string(img)) 86 | if err != nil { 87 | return nil, nil, err 88 | } 89 | normalizeGraph, input, output, err := decodeBitmapGraph() 90 | if err != nil { 91 | return nil, nil, err 92 | } 93 | normalizeSession, err := tf.NewSession(normalizeGraph, nil) 94 | if err != nil { 95 | return nil, nil, err 96 | } 97 | defer normalizeSession.Close() 98 | normalized, err := normalizeSession.Run( 99 | map[tf.Output]*tf.Tensor{input: tensor}, 100 | []tf.Output{output}, 101 | nil) 102 | if err != nil { 103 | return nil, nil, err 104 | } 105 | 106 | r := bytes.NewReader(img) 107 | i, _, err := image.Decode(r) 108 | if err != nil { 109 | return nil, nil, err 110 | } 111 | return normalized[0], i, nil 112 | } 113 | 114 | func detectObjects(session *tf.Session, graph *tf.Graph, input *tf.Tensor) ([]float32, []float32, [][]float32, error) { 115 | inputop := graph.Operation("image_tensor") 116 | output, err := session.Run( 117 | map[tf.Output]*tf.Tensor{ 118 | inputop.Output(0): input, 119 | }, 120 | []tf.Output{ 121 | graph.Operation("detection_boxes").Output(0), 122 | graph.Operation("detection_scores").Output(0), 123 | graph.Operation("detection_classes").Output(0), 124 | graph.Operation("num_detections").Output(0), 125 | }, 126 | nil) 127 | if err != nil { 128 | return nil, nil, nil, fmt.Errorf("Error running session: %v", err) 129 | } 130 | probabilities := output[1].Value().([][]float32)[0] 131 | classes := output[2].Value().([][]float32)[0] 132 | boxes := output[0].Value().([][][]float32)[0] 133 | return probabilities, classes, boxes, nil 134 | } 135 | 136 | func main() { 137 | var jsoninfo bool 138 | var probability float64 139 | var dir string 140 | var output string 141 | 142 | flag.BoolVar(&jsoninfo, "json", false, "Output JSON information (instead of output image)") 143 | flag.Float64Var(&probability, "prob", 0.4, "Probability") 144 | flag.StringVar(&dir, "dir", filepath.Join(filepath.SplitList(build.Default.GOPATH)[0], "src/github.com/mattn/go-object-detect-from-image"), "Directory containing the trained model and labels files") 145 | flag.StringVar(&output, "output", "output.jpg", "Output file name") 146 | flag.Parse() 147 | 148 | model, err := ioutil.ReadFile(filepath.Join(dir, "frozen_inference_graph.pb")) 149 | if err != nil { 150 | log.Fatal(err) 151 | } 152 | 153 | labels, err := loadLabels(filepath.Join(dir, "coco_labels.txt")) 154 | if err != nil { 155 | log.Fatal(err) 156 | } 157 | 158 | graph := tf.NewGraph() 159 | if err := graph.Import(model, ""); err != nil { 160 | log.Fatal(err) 161 | } 162 | 163 | session, err := tf.NewSession(graph, nil) 164 | if err != nil { 165 | log.Fatal(err) 166 | } 167 | defer session.Close() 168 | 169 | var f *os.File 170 | if flag.NArg() == 1 { 171 | f, err = os.Open(flag.Arg(0)) 172 | if err != nil { 173 | log.Fatal(err) 174 | } 175 | defer f.Close() 176 | } else { 177 | f = os.Stdin 178 | } 179 | img, _, err := image.Decode(f) 180 | if err != nil { 181 | log.Fatal(err) 182 | } 183 | 184 | var buf bytes.Buffer 185 | err = bmp.Encode(&buf, img) 186 | if err != nil { 187 | log.Fatal(err) 188 | } 189 | 190 | tensor, img, err := makeTensorFromImage(buf.Bytes()) 191 | if err != nil { 192 | log.Fatalf("error making input tensor: %v", err) 193 | } 194 | 195 | probabilities, classes, boxes, err := detectObjects(session, graph, tensor) 196 | if err != nil { 197 | log.Fatalf("error making prediction: %v", err) 198 | } 199 | 200 | if jsoninfo { 201 | var result []jsonResult 202 | i := 0 203 | for float64(probabilities[i]) > probability { 204 | idx := int(classes[i]) 205 | result = append(result, jsonResult{ 206 | Name: labels[idx], 207 | Probability: float64(probabilities[i]), 208 | }) 209 | i++ 210 | } 211 | json.NewEncoder(os.Stdout).Encode(result) 212 | return 213 | } 214 | 215 | bounds := img.Bounds() 216 | canvas := image.NewRGBA(bounds) 217 | draw.Draw(canvas, bounds, img, image.Pt(0, 0), draw.Src) 218 | i := 0 219 | for float64(probabilities[i]) > probability { 220 | idx := int(classes[i]) 221 | y1 := int(float64(bounds.Min.Y) + float64(bounds.Dy())*float64(boxes[i][0])) 222 | x1 := int(float64(bounds.Min.X) + float64(bounds.Dx())*float64(boxes[i][1])) 223 | y2 := int(float64(bounds.Min.Y) + float64(bounds.Dy())*float64(boxes[i][2])) 224 | x2 := int(float64(bounds.Min.X) + float64(bounds.Dx())*float64(boxes[i][3])) 225 | drawRect(canvas, image.Rect(x1, y1, x2, y2), color.RGBA{255, 0, 0, 0}) 226 | drawString( 227 | canvas, 228 | image.Pt(x1, y1), 229 | colornames.Map[colornames.Names[idx]], 230 | fmt.Sprintf("%s (%3.0f%%)", labels[idx], probabilities[i]*100.0)) 231 | i++ 232 | } 233 | 234 | out, err := os.Create(output) 235 | if err != nil { 236 | log.Fatal(err) 237 | } 238 | defer out.Close() 239 | 240 | err = jpeg.Encode(out, canvas, nil) 241 | if err != nil { 242 | log.Fatal(err) 243 | } 244 | } 245 | -------------------------------------------------------------------------------- /output.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mattn/go-object-detect-from-image/fa515ca074a14107251a76adc9f6eb466b48b15c/output.jpg --------------------------------------------------------------------------------