├── .travis.yml ├── LICENSE ├── README.md ├── _output_example └── docker.html ├── go.mod ├── job ├── buildtree.go └── parse.go ├── main.go ├── printer ├── html.go ├── html_test.go ├── plumbing.go ├── printer.go └── text.go ├── suffixtree ├── dupl.go ├── dupl_test.go ├── suffixtree.go └── suffixtree_test.go └── syntax ├── golang └── golang.go ├── syntax.go └── syntax_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.14 4 | - 1.15 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Michal Bohuslávek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dupl [![Build Status](https://travis-ci.org/mibk/dupl.png)](https://travis-ci.org/mibk/dupl) 2 | 3 | **dupl** is a tool written in Go for finding code clones. So far it can find clones only 4 | in the Go source files. The method uses suffix tree for serialized ASTs. It ignores values 5 | of AST nodes. It just operates with their types (e.g. `if a == 13 {}` and `if x == 100 {}` are 6 | considered the same provided it exceeds the minimal token sequence size). 7 | 8 | Due to the used method dupl can report so called "false positives" on the output. These are 9 | the ones we do not consider clones (whether they are too small, or the values of the matched 10 | tokens are completely different). 11 | 12 | ## Installation 13 | 14 | ```bash 15 | go get -u github.com/mibk/dupl 16 | ``` 17 | 18 | ## Usage 19 | 20 | ``` 21 | Usage of dupl: 22 | dupl [flags] [paths] 23 | 24 | Paths: 25 | If the given path is a file, dupl will use it regardless of 26 | the file extension. If it is a directory it will recursively 27 | search for *.go files in that directory. 28 | 29 | If no path is given dupl will recursively search for *.go 30 | files in the current directory. 31 | 32 | Flags: 33 | -files 34 | read file names from stdin one at each line 35 | -html 36 | output the results as HTML, including duplicate code fragments 37 | -plumbing 38 | plumbing (easy-to-parse) output for consumption by scripts or tools 39 | -t, -threshold size 40 | minimum token sequence size as a clone (default 15) 41 | -vendor 42 | check files in vendor directory 43 | -v, -verbose 44 | explain what is being done 45 | 46 | Examples: 47 | dupl -t 100 48 | Search clones in the current directory of size at least 49 | 100 tokens. 50 | dupl $(find app/ -name '*_test.go') 51 | Search for clones in tests in the app directory. 52 | find app/ -name '*_test.go' |dupl -files 53 | The same as above. 54 | ``` 55 | 56 | ## Example 57 | 58 | The reduced output of this command with the following parameters for the [Docker](https://www.docker.com) source code 59 | looks like [this](http://htmlpreview.github.io/?https://github.com/mibk/dupl/blob/master/_output_example/docker.html). 60 | 61 | ```bash 62 | $ dupl -t 200 -html >docker.html 63 | ``` 64 | -------------------------------------------------------------------------------- /_output_example/docker.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Duplicates 4 | 11 |

#1 found 2 clones

12 |

vendor/src/code.google.com/p/go.net/ipv6/multicast_test.go:131

13 |
for i, toggle := range []bool{true, false, true} {
  14 | 	wb, err := (&icmpMessage{
  15 | 		Type: ipv6.ICMPTypeEchoRequest, Code: 0,
  16 | 		Body: &icmpEcho{
  17 | 			ID: os.Getpid() & 0xffff, Seq: i + 1,
  18 | 			Data: []byte("HELLO-R-U-THERE"),
  19 | 		},
  20 | 	}).Marshal()
  21 | 	if err != nil {
  22 | 		t.Fatalf("icmpMessage.Marshal failed: %v", err)
  23 | 	}
  24 | 	if err := p.SetControlMessage(cf, toggle); err != nil {
  25 | 		t.Fatalf("ipv6.PacketConn.SetControlMessage failed: %v", err)
  26 | 	}
  27 | 	cm.HopLimit = i + 1
  28 | 	if _, err := p.WriteTo(wb, &cm, dst); err != nil {
  29 | 		t.Fatalf("ipv6.PacketConn.WriteTo failed: %v", err)
  30 | 	}
  31 | 	b := make([]byte, 128)
  32 | 	if n, cm, _, err := p.ReadFrom(b); err != nil {
  33 | 		t.Fatalf("ipv6.PacketConn.ReadFrom failed: %v", err)
  34 | 	} else {
  35 | 		t.Logf("rcvd cmsg: %v", cm)
  36 | 		if m, err := parseICMPMessage(b[:n]); err != nil {
  37 | 			t.Fatalf("parseICMPMessage failed: %v", err)
  38 | 		} else if m.Type != ipv6.ICMPTypeEchoReply || m.Code != 0 {
  39 | 			t.Fatalf("got type=%v, code=%v; expected type=%v, code=%v", m.Type, m.Code, ipv6.ICMPTypeEchoReply, 0)
  40 | 		}
  41 | 	}
  42 | }
43 |

vendor/src/code.google.com/p/go.net/ipv6/unicast_test.go:173

44 |
for i, toggle := range []bool{true, false, true} {
  45 | 	wb, err := (&icmpMessage{
  46 | 		Type: ipv6.ICMPTypeEchoRequest, Code: 0,
  47 | 		Body: &icmpEcho{
  48 | 			ID: os.Getpid() & 0xffff, Seq: i + 1,
  49 | 			Data: []byte("HELLO-R-U-THERE"),
  50 | 		},
  51 | 	}).Marshal()
  52 | 	if err != nil {
  53 | 		t.Fatalf("icmpMessage.Marshal failed: %v", err)
  54 | 	}
  55 | 	if err := p.SetControlMessage(cf, toggle); err != nil {
  56 | 		t.Fatalf("ipv6.PacketConn.SetControlMessage failed: %v", err)
  57 | 	}
  58 | 	cm.HopLimit = i + 1
  59 | 	if _, err := p.WriteTo(wb, &cm, dst); err != nil {
  60 | 		t.Fatalf("ipv6.PacketConn.WriteTo failed: %v", err)
  61 | 	}
  62 | 	b := make([]byte, 128)
  63 | 	if n, cm, _, err := p.ReadFrom(b); err != nil {
  64 | 		t.Fatalf("ipv6.PacketConn.ReadFrom failed: %v", err)
  65 | 	} else {
  66 | 		t.Logf("rcvd cmsg: %v", cm)
  67 | 		if m, err := parseICMPMessage(b[:n]); err != nil {
  68 | 			t.Fatalf("parseICMPMessage failed: %v", err)
  69 | 		} else if m.Type != ipv6.ICMPTypeEchoReply || m.Code != 0 {
  70 | 			t.Fatalf("got type=%v, code=%v; expected type=%v, code=%v", m.Type, m.Code, ipv6.ICMPTypeEchoReply, 0)
  71 | 		}
  72 | 	}
  73 | }
74 |

#2 found 2 clones

75 |

vendor/src/code.google.com/p/go.net/websocket/hybi_test.go:33

76 |
func TestHybiClientHandshake(t *testing.T) {
  77 | 	b := bytes.NewBuffer([]byte{})
  78 | 	bw := bufio.NewWriter(b)
  79 | 	br := bufio.NewReader(strings.NewReader(`HTTP/1.1 101 Switching Protocols
  80 | Upgrade: websocket
  81 | Connection: Upgrade
  82 | Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo=
  83 | Sec-WebSocket-Protocol: chat
  84 | 
  85 | `))
  86 | 	var err error
  87 | 	config := new(Config)
  88 | 	config.Location, err = url.ParseRequestURI("ws://server.example.com/chat")
  89 | 	if err != nil {
  90 | 		t.Fatal("location url", err)
  91 | 	}
  92 | 	config.Origin, err = url.ParseRequestURI("http://example.com")
  93 | 	if err != nil {
  94 | 		t.Fatal("origin url", err)
  95 | 	}
  96 | 	config.Protocol = append(config.Protocol, "chat")
  97 | 	config.Protocol = append(config.Protocol, "superchat")
  98 | 	config.Version = ProtocolVersionHybi13
  99 | 
 100 | 	config.handshakeData = map[string]string{
 101 | 		"key": "dGhlIHNhbXBsZSBub25jZQ==",
 102 | 	}
 103 | 	err = hybiClientHandshake(config, br, bw)
 104 | 	if err != nil {
 105 | 		t.Errorf("handshake failed: %v", err)
 106 | 	}
 107 | 	req, err := http.ReadRequest(bufio.NewReader(b))
 108 | 	if err != nil {
 109 | 		t.Fatalf("read request: %v", err)
 110 | 	}
 111 | 	if req.Method != "GET" {
 112 | 		t.Errorf("request method expected GET, but got %q", req.Method)
 113 | 	}
 114 | 	if req.URL.Path != "/chat" {
 115 | 		t.Errorf("request path expected /chat, but got %q", req.URL.Path)
 116 | 	}
 117 | 	if req.Proto != "HTTP/1.1" {
 118 | 		t.Errorf("request proto expected HTTP/1.1, but got %q", req.Proto)
 119 | 	}
 120 | 	if req.Host != "server.example.com" {
 121 | 		t.Errorf("request Host expected server.example.com, but got %v", req.Host)
 122 | 	}
 123 | 	var expectedHeader = map[string]string{
 124 | 		"Connection":             "Upgrade",
 125 | 		"Upgrade":                "websocket",
 126 | 		"Sec-Websocket-Key":      config.handshakeData["key"],
 127 | 		"Origin":                 config.Origin.String(),
 128 | 		"Sec-Websocket-Protocol": "chat, superchat",
 129 | 		"Sec-Websocket-Version":  fmt.Sprintf("%d", ProtocolVersionHybi13),
 130 | 	}
 131 | 	for k, v := range expectedHeader {
 132 | 		if req.Header.Get(k) != v {
 133 | 			t.Errorf(fmt.Sprintf("%s expected %q but got %q", k, v, req.Header.Get(k)))
 134 | 		}
 135 | 	}
 136 | }
137 |

vendor/src/code.google.com/p/go.net/websocket/hybi_test.go:160

138 |
func TestHybiClientHandshakeHybi08(t *testing.T) {
 139 | 	b := bytes.NewBuffer([]byte{})
 140 | 	bw := bufio.NewWriter(b)
 141 | 	br := bufio.NewReader(strings.NewReader(`HTTP/1.1 101 Switching Protocols
 142 | Upgrade: websocket
 143 | Connection: Upgrade
 144 | Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo=
 145 | Sec-WebSocket-Protocol: chat
 146 | 
 147 | `))
 148 | 	var err error
 149 | 	config := new(Config)
 150 | 	config.Location, err = url.ParseRequestURI("ws://server.example.com/chat")
 151 | 	if err != nil {
 152 | 		t.Fatal("location url", err)
 153 | 	}
 154 | 	config.Origin, err = url.ParseRequestURI("http://example.com")
 155 | 	if err != nil {
 156 | 		t.Fatal("origin url", err)
 157 | 	}
 158 | 	config.Protocol = append(config.Protocol, "chat")
 159 | 	config.Protocol = append(config.Protocol, "superchat")
 160 | 	config.Version = ProtocolVersionHybi08
 161 | 
 162 | 	config.handshakeData = map[string]string{
 163 | 		"key": "dGhlIHNhbXBsZSBub25jZQ==",
 164 | 	}
 165 | 	err = hybiClientHandshake(config, br, bw)
 166 | 	if err != nil {
 167 | 		t.Errorf("handshake failed: %v", err)
 168 | 	}
 169 | 	req, err := http.ReadRequest(bufio.NewReader(b))
 170 | 	if err != nil {
 171 | 		t.Fatalf("read request: %v", err)
 172 | 	}
 173 | 	if req.Method != "GET" {
 174 | 		t.Errorf("request method expected GET, but got %q", req.Method)
 175 | 	}
 176 | 	if req.URL.Path != "/chat" {
 177 | 		t.Errorf("request path expected /demo, but got %q", req.URL.Path)
 178 | 	}
 179 | 	if req.Proto != "HTTP/1.1" {
 180 | 		t.Errorf("request proto expected HTTP/1.1, but got %q", req.Proto)
 181 | 	}
 182 | 	if req.Host != "server.example.com" {
 183 | 		t.Errorf("request Host expected example.com, but got %v", req.Host)
 184 | 	}
 185 | 	var expectedHeader = map[string]string{
 186 | 		"Connection":             "Upgrade",
 187 | 		"Upgrade":                "websocket",
 188 | 		"Sec-Websocket-Key":      config.handshakeData["key"],
 189 | 		"Sec-Websocket-Origin":   config.Origin.String(),
 190 | 		"Sec-Websocket-Protocol": "chat, superchat",
 191 | 		"Sec-Websocket-Version":  fmt.Sprintf("%d", ProtocolVersionHybi08),
 192 | 	}
 193 | 	for k, v := range expectedHeader {
 194 | 		if req.Header.Get(k) != v {
 195 | 			t.Errorf(fmt.Sprintf("%s expected %q but got %q", k, v, req.Header.Get(k)))
 196 | 		}
 197 | 	}
 198 | }
199 |

#3 found 2 clones

200 |

vendor/src/code.google.com/p/go.net/ipv4/gen.go:122

201 |
func (icp *icmpv4Parameters) escape() []canonICMPv4ParamRecord {
 202 | 	id := -1
 203 | 	for i, r := range icp.Registries {
 204 | 		if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") {
 205 | 			id = i
 206 | 			break
 207 | 		}
 208 | 	}
 209 | 	if id < 0 {
 210 | 		return nil
 211 | 	}
 212 | 	prs := make([]canonICMPv4ParamRecord, len(icp.Registries[id].Records))
 213 | 	sr := strings.NewReplacer(
 214 | 		"Messages", "",
 215 | 		"Message", "",
 216 | 		"ICMP", "",
 217 | 		"+", "P",
 218 | 		"-", "",
 219 | 		"/", "",
 220 | 		".", "",
 221 | 		" ", "",
 222 | 	)
 223 | 	for i, pr := range icp.Registries[id].Records {
 224 | 		if strings.Contains(pr.Descr, "Reserved") ||
 225 | 			strings.Contains(pr.Descr, "Unassigned") ||
 226 | 			strings.Contains(pr.Descr, "Deprecated") ||
 227 | 			strings.Contains(pr.Descr, "Experiment") ||
 228 | 			strings.Contains(pr.Descr, "experiment") {
 229 | 			continue
 230 | 		}
 231 | 		ss := strings.Split(pr.Descr, "\n")
 232 | 		if len(ss) > 1 {
 233 | 			prs[i].Descr = strings.Join(ss, " ")
 234 | 		} else {
 235 | 			prs[i].Descr = ss[0]
 236 | 		}
 237 | 		s := strings.TrimSpace(prs[i].Descr)
 238 | 		prs[i].OrigDescr = s
 239 | 		prs[i].Descr = sr.Replace(s)
 240 | 		prs[i].Value, _ = strconv.Atoi(pr.Value)
 241 | 	}
 242 | 	return prs
 243 | }
244 |

vendor/src/code.google.com/p/go.net/ipv6/gen.go:122

245 |
func (icp *icmpv6Parameters) escape() []canonICMPv6ParamRecord {
 246 | 	id := -1
 247 | 	for i, r := range icp.Registries {
 248 | 		if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") {
 249 | 			id = i
 250 | 			break
 251 | 		}
 252 | 	}
 253 | 	if id < 0 {
 254 | 		return nil
 255 | 	}
 256 | 	prs := make([]canonICMPv6ParamRecord, len(icp.Registries[id].Records))
 257 | 	sr := strings.NewReplacer(
 258 | 		"Messages", "",
 259 | 		"Message", "",
 260 | 		"ICMP", "",
 261 | 		"+", "P",
 262 | 		"-", "",
 263 | 		"/", "",
 264 | 		".", "",
 265 | 		" ", "",
 266 | 	)
 267 | 	for i, pr := range icp.Registries[id].Records {
 268 | 		if strings.Contains(pr.Name, "Reserved") ||
 269 | 			strings.Contains(pr.Name, "Unassigned") ||
 270 | 			strings.Contains(pr.Name, "Deprecated") ||
 271 | 			strings.Contains(pr.Name, "Experiment") ||
 272 | 			strings.Contains(pr.Name, "experiment") {
 273 | 			continue
 274 | 		}
 275 | 		ss := strings.Split(pr.Name, "\n")
 276 | 		if len(ss) > 1 {
 277 | 			prs[i].Name = strings.Join(ss, " ")
 278 | 		} else {
 279 | 			prs[i].Name = ss[0]
 280 | 		}
 281 | 		s := strings.TrimSpace(prs[i].Name)
 282 | 		prs[i].OrigName = s
 283 | 		prs[i].Name = sr.Replace(s)
 284 | 		prs[i].Value, _ = strconv.Atoi(pr.Value)
 285 | 	}
 286 | 	return prs
 287 | }
288 |

#4 found 2 clones

289 |

pkg/term/termios_darwin.go:1

290 |
package term
 291 | 
 292 | import (
 293 | 	"syscall"
 294 | 	"unsafe"
 295 | )
 296 | 
 297 | const (
 298 | 	getTermios = syscall.TIOCGETA
 299 | 	setTermios = syscall.TIOCSETA
 300 | 
 301 | 	IGNBRK = syscall.IGNBRK
 302 | 	PARMRK = syscall.PARMRK
 303 | 	INLCR  = syscall.INLCR
 304 | 	IGNCR  = syscall.IGNCR
 305 | 	ECHONL = syscall.ECHONL
 306 | 	CSIZE  = syscall.CSIZE
 307 | 	ICRNL  = syscall.ICRNL
 308 | 	ISTRIP = syscall.ISTRIP
 309 | 	PARENB = syscall.PARENB
 310 | 	ECHO   = syscall.ECHO
 311 | 	ICANON = syscall.ICANON
 312 | 	ISIG   = syscall.ISIG
 313 | 	IXON   = syscall.IXON
 314 | 	BRKINT = syscall.BRKINT
 315 | 	INPCK  = syscall.INPCK
 316 | 	OPOST  = syscall.OPOST
 317 | 	CS8    = syscall.CS8
 318 | 	IEXTEN = syscall.IEXTEN
 319 | )
 320 | 
 321 | type Termios struct {
 322 | 	Iflag  uint64
 323 | 	Oflag  uint64
 324 | 	Cflag  uint64
 325 | 	Lflag  uint64
 326 | 	Cc     [20]byte
 327 | 	Ispeed uint64
 328 | 	Ospeed uint64
 329 | }
 330 | 
 331 | // MakeRaw put the terminal connected to the given file descriptor into raw
 332 | // mode and returns the previous state of the terminal so that it can be
 333 | // restored.
 334 | func MakeRaw(fd uintptr) (*State, error) {
 335 | 	var oldState State
 336 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 {
 337 | 		return nil, err
 338 | 	}
 339 | 
 340 | 	newState := oldState.termios
 341 | 	newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON)
 342 | 	newState.Oflag &^= OPOST
 343 | 	newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN)
 344 | 	newState.Cflag &^= (CSIZE | PARENB)
 345 | 	newState.Cflag |= CS8
 346 | 	newState.Cc[syscall.VMIN] = 1
 347 | 	newState.Cc[syscall.VTIME] = 0
 348 | 
 349 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 {
 350 | 		return nil, err
 351 | 	}
 352 | 
 353 | 	return &oldState, nil
 354 | }
355 |

pkg/term/termios_freebsd.go:1

356 |
package term
 357 | 
 358 | import (
 359 | 	"syscall"
 360 | 	"unsafe"
 361 | )
 362 | 
 363 | const (
 364 | 	getTermios = syscall.TIOCGETA
 365 | 	setTermios = syscall.TIOCSETA
 366 | 
 367 | 	IGNBRK = syscall.IGNBRK
 368 | 	PARMRK = syscall.PARMRK
 369 | 	INLCR  = syscall.INLCR
 370 | 	IGNCR  = syscall.IGNCR
 371 | 	ECHONL = syscall.ECHONL
 372 | 	CSIZE  = syscall.CSIZE
 373 | 	ICRNL  = syscall.ICRNL
 374 | 	ISTRIP = syscall.ISTRIP
 375 | 	PARENB = syscall.PARENB
 376 | 	ECHO   = syscall.ECHO
 377 | 	ICANON = syscall.ICANON
 378 | 	ISIG   = syscall.ISIG
 379 | 	IXON   = syscall.IXON
 380 | 	BRKINT = syscall.BRKINT
 381 | 	INPCK  = syscall.INPCK
 382 | 	OPOST  = syscall.OPOST
 383 | 	CS8    = syscall.CS8
 384 | 	IEXTEN = syscall.IEXTEN
 385 | )
 386 | 
 387 | type Termios struct {
 388 | 	Iflag  uint32
 389 | 	Oflag  uint32
 390 | 	Cflag  uint32
 391 | 	Lflag  uint32
 392 | 	Cc     [20]byte
 393 | 	Ispeed uint32
 394 | 	Ospeed uint32
 395 | }
 396 | 
 397 | // MakeRaw put the terminal connected to the given file descriptor into raw
 398 | // mode and returns the previous state of the terminal so that it can be
 399 | // restored.
 400 | func MakeRaw(fd uintptr) (*State, error) {
 401 | 	var oldState State
 402 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 {
 403 | 		return nil, err
 404 | 	}
 405 | 
 406 | 	newState := oldState.termios
 407 | 	newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON)
 408 | 	newState.Oflag &^= OPOST
 409 | 	newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN)
 410 | 	newState.Cflag &^= (CSIZE | PARENB)
 411 | 	newState.Cflag |= CS8
 412 | 	newState.Cc[syscall.VMIN] = 1
 413 | 	newState.Cc[syscall.VTIME] = 0
 414 | 
 415 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 {
 416 | 		return nil, err
 417 | 	}
 418 | 
 419 | 	return &oldState, nil
 420 | }
421 |

#5 found 2 clones

422 |

vendor/src/code.google.com/p/go.net/ipv4/gentest.go:12

423 |
package main
 424 | 
 425 | import (
 426 | 	"bytes"
 427 | 	"encoding/xml"
 428 | 	"fmt"
 429 | 	"go/format"
 430 | 	"io"
 431 | 	"net/http"
 432 | 	"os"
 433 | 	"strconv"
 434 | 	"strings"
 435 | )
 436 | 
 437 | var registries = []struct {
 438 | 	url   string
 439 | 	parse func(io.Writer, io.Reader) error
 440 | }{
 441 | 	{
 442 | 		"http://www.iana.org/assignments/dscp-registry/dscp-registry.xml",
 443 | 		parseDSCPRegistry,
 444 | 	},
 445 | 	{
 446 | 		"http://www.iana.org/assignments/ipv4-tos-byte/ipv4-tos-byte.xml",
 447 | 		parseTOSTCByte,
 448 | 	},
 449 | }
 450 | 
 451 | func main() {
 452 | 	var bb bytes.Buffer
 453 | 	fmt.Fprintf(&bb, "// go run gentv.go\n")
 454 | 	fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
 455 | 	fmt.Fprintf(&bb, "package ipv4_test\n\n")
 456 | 	for _, r := range registries {
 457 | 		resp, err := http.Get(r.url)
 458 | 		if err != nil {
 459 | 			fmt.Fprintln(os.Stderr, err)
 460 | 			os.Exit(1)
 461 | 		}
 462 | 		defer resp.Body.Close()
 463 | 		if resp.StatusCode != http.StatusOK {
 464 | 			fmt.Fprintf(os.Stderr, "got HTTP status code %v for %v\n", resp.StatusCode, r.url)
 465 | 			os.Exit(1)
 466 | 		}
 467 | 		if err := r.parse(&bb, resp.Body); err != nil {
 468 | 			fmt.Fprintln(os.Stderr, err)
 469 | 			os.Exit(1)
 470 | 		}
 471 | 		fmt.Fprintf(&bb, "\n")
 472 | 	}
 473 | 	b, err := format.Source(bb.Bytes())
 474 | 	if err != nil {
 475 | 		fmt.Fprintln(os.Stderr, err)
 476 | 		os.Exit(1)
 477 | 	}
 478 | 	os.Stdout.Write(b)
 479 | }
 480 | 
 481 | func parseDSCPRegistry(w io.Writer, r io.Reader) error {
 482 | 	dec := xml.NewDecoder(r)
 483 | 	var dr dscpRegistry
 484 | 	if err := dec.Decode(&dr); err != nil {
 485 | 		return err
 486 | 	}
 487 | 	drs := dr.escape()
 488 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", dr.Title, dr.Updated)
 489 | 	fmt.Fprintf(w, "const (\n")
 490 | 	for _, dr := range drs {
 491 | 		fmt.Fprintf(w, "DiffServ%s = %#x", dr.Name, dr.Value)
 492 | 		fmt.Fprintf(w, "// %s\n", dr.OrigName)
 493 | 	}
 494 | 	fmt.Fprintf(w, ")\n")
 495 | 	return nil
 496 | }
 497 | 
 498 | type dscpRegistry struct {
 499 | 	XMLName     xml.Name     `xml:"registry"`
 500 | 	Title       string       `xml:"title"`
 501 | 	Updated     string       `xml:"updated"`
 502 | 	Note        string       `xml:"note"`
 503 | 	RegTitle    string       `xml:"registry>title"`
 504 | 	PoolRecords []dscpRecord `xml:"registry>record"`
 505 | 	Records     []dscpRecord `xml:"registry>registry>record"`
 506 | }
 507 | 
 508 | type dscpRecord struct {
 509 | 	Name  string `xml:"name"`
 510 | 	Space string `xml:"space"`
 511 | }
 512 | 
 513 | type canonDSCPRecord struct {
 514 | 	OrigName string
 515 | 	Name     string
 516 | 	Value    int
 517 | }
 518 | 
 519 | func (drr *dscpRegistry) escape() []canonDSCPRecord {
 520 | 	drs := make([]canonDSCPRecord, len(drr.Records))
 521 | 	sr := strings.NewReplacer(
 522 | 		"+", "",
 523 | 		"-", "",
 524 | 		"/", "",
 525 | 		".", "",
 526 | 		" ", "",
 527 | 	)
 528 | 	for i, dr := range drr.Records {
 529 | 		s := strings.TrimSpace(dr.Name)
 530 | 		drs[i].OrigName = s
 531 | 		drs[i].Name = sr.Replace(s)
 532 | 		n, err := strconv.ParseUint(dr.Space, 2, 8)
 533 | 		if err != nil {
 534 | 			continue
 535 | 		}
 536 | 		drs[i].Value = int(n) << 2
 537 | 	}
 538 | 	return drs
 539 | }
 540 | 
 541 | func parseTOSTCByte(w io.Writer, r io.Reader) error {
 542 | 	dec := xml.NewDecoder(r)
 543 | 	var ttb tosTCByte
 544 | 	if err := dec.Decode(&ttb); err != nil {
 545 | 		return err
 546 | 	}
 547 | 	trs := ttb.escape()
 548 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", ttb.Title, ttb.Updated)
 549 | 	fmt.Fprintf(w, "const (\n")
 550 | 	for _, tr := range trs {
 551 | 		fmt.Fprintf(w, "%s = %#x", tr.Keyword, tr.Value)
 552 | 		fmt.Fprintf(w, "// %s\n", tr.OrigKeyword)
 553 | 	}
 554 | 	fmt.Fprintf(w, ")\n")
 555 | 	return nil
 556 | }
 557 | 
 558 | type tosTCByte struct {
 559 | 	XMLName  xml.Name          `xml:"registry"`
 560 | 	Title    string            `xml:"title"`
 561 | 	Updated  string            `xml:"updated"`
 562 | 	Note     string            `xml:"note"`
 563 | 	RegTitle string            `xml:"registry>title"`
 564 | 	Records  []tosTCByteRecord `xml:"registry>record"`
 565 | }
 566 | 
 567 | type tosTCByteRecord struct {
 568 | 	Binary  string `xml:"binary"`
 569 | 	Keyword string `xml:"keyword"`
 570 | }
 571 | 
 572 | type canonTOSTCByteRecord struct {
 573 | 	OrigKeyword string
 574 | 	Keyword     string
 575 | 	Value       int
 576 | }
 577 | 
 578 | func (ttb *tosTCByte) escape() []canonTOSTCByteRecord {
 579 | 	trs := make([]canonTOSTCByteRecord, len(ttb.Records))
 580 | 	sr := strings.NewReplacer(
 581 | 		"Capable", "",
 582 | 		"(", "",
 583 | 		")", "",
 584 | 		"+", "",
 585 | 		"-", "",
 586 | 		"/", "",
 587 | 		".", "",
 588 | 		" ", "",
 589 | 	)
 590 | 	for i, tr := range ttb.Records {
 591 | 		s := strings.TrimSpace(tr.Keyword)
 592 | 		trs[i].OrigKeyword = s
 593 | 		ss := strings.Split(s, " ")
 594 | 		if len(ss) > 1 {
 595 | 			trs[i].Keyword = strings.Join(ss[1:], " ")
 596 | 		} else {
 597 | 			trs[i].Keyword = ss[0]
 598 | 		}
 599 | 		trs[i].Keyword = sr.Replace(trs[i].Keyword)
 600 | 		n, err := strconv.ParseUint(tr.Binary, 2, 8)
 601 | 		if err != nil {
 602 | 			continue
 603 | 		}
 604 | 		trs[i].Value = int(n)
 605 | 	}
 606 | 	return trs
 607 | }
608 |

vendor/src/code.google.com/p/go.net/ipv6/gentest.go:12

609 |
package main
 610 | 
 611 | import (
 612 | 	"bytes"
 613 | 	"encoding/xml"
 614 | 	"fmt"
 615 | 	"go/format"
 616 | 	"io"
 617 | 	"net/http"
 618 | 	"os"
 619 | 	"strconv"
 620 | 	"strings"
 621 | )
 622 | 
 623 | var registries = []struct {
 624 | 	url   string
 625 | 	parse func(io.Writer, io.Reader) error
 626 | }{
 627 | 	{
 628 | 		"http://www.iana.org/assignments/dscp-registry/dscp-registry.xml",
 629 | 		parseDSCPRegistry,
 630 | 	},
 631 | 	{
 632 | 		"http://www.iana.org/assignments/ipv4-tos-byte/ipv4-tos-byte.xml",
 633 | 		parseTOSTCByte,
 634 | 	},
 635 | }
 636 | 
 637 | func main() {
 638 | 	var bb bytes.Buffer
 639 | 	fmt.Fprintf(&bb, "// go run gentv.go\n")
 640 | 	fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
 641 | 	fmt.Fprintf(&bb, "package ipv6_test\n\n")
 642 | 	for _, r := range registries {
 643 | 		resp, err := http.Get(r.url)
 644 | 		if err != nil {
 645 | 			fmt.Fprintln(os.Stderr, err)
 646 | 			os.Exit(1)
 647 | 		}
 648 | 		defer resp.Body.Close()
 649 | 		if resp.StatusCode != http.StatusOK {
 650 | 			fmt.Fprintf(os.Stderr, "got HTTP status code %v for %v\n", resp.StatusCode, r.url)
 651 | 			os.Exit(1)
 652 | 		}
 653 | 		if err := r.parse(&bb, resp.Body); err != nil {
 654 | 			fmt.Fprintln(os.Stderr, err)
 655 | 			os.Exit(1)
 656 | 		}
 657 | 		fmt.Fprintf(&bb, "\n")
 658 | 	}
 659 | 	b, err := format.Source(bb.Bytes())
 660 | 	if err != nil {
 661 | 		fmt.Fprintln(os.Stderr, err)
 662 | 		os.Exit(1)
 663 | 	}
 664 | 	os.Stdout.Write(b)
 665 | }
 666 | 
 667 | func parseDSCPRegistry(w io.Writer, r io.Reader) error {
 668 | 	dec := xml.NewDecoder(r)
 669 | 	var dr dscpRegistry
 670 | 	if err := dec.Decode(&dr); err != nil {
 671 | 		return err
 672 | 	}
 673 | 	drs := dr.escape()
 674 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", dr.Title, dr.Updated)
 675 | 	fmt.Fprintf(w, "const (\n")
 676 | 	for _, dr := range drs {
 677 | 		fmt.Fprintf(w, "DiffServ%s = %#x", dr.Name, dr.Value)
 678 | 		fmt.Fprintf(w, "// %s\n", dr.OrigName)
 679 | 	}
 680 | 	fmt.Fprintf(w, ")\n")
 681 | 	return nil
 682 | }
 683 | 
 684 | type dscpRegistry struct {
 685 | 	XMLName     xml.Name     `xml:"registry"`
 686 | 	Title       string       `xml:"title"`
 687 | 	Updated     string       `xml:"updated"`
 688 | 	Note        string       `xml:"note"`
 689 | 	RegTitle    string       `xml:"registry>title"`
 690 | 	PoolRecords []dscpRecord `xml:"registry>record"`
 691 | 	Records     []dscpRecord `xml:"registry>registry>record"`
 692 | }
 693 | 
 694 | type dscpRecord struct {
 695 | 	Name  string `xml:"name"`
 696 | 	Space string `xml:"space"`
 697 | }
 698 | 
 699 | type canonDSCPRecord struct {
 700 | 	OrigName string
 701 | 	Name     string
 702 | 	Value    int
 703 | }
 704 | 
 705 | func (drr *dscpRegistry) escape() []canonDSCPRecord {
 706 | 	drs := make([]canonDSCPRecord, len(drr.Records))
 707 | 	sr := strings.NewReplacer(
 708 | 		"+", "",
 709 | 		"-", "",
 710 | 		"/", "",
 711 | 		".", "",
 712 | 		" ", "",
 713 | 	)
 714 | 	for i, dr := range drr.Records {
 715 | 		s := strings.TrimSpace(dr.Name)
 716 | 		drs[i].OrigName = s
 717 | 		drs[i].Name = sr.Replace(s)
 718 | 		n, err := strconv.ParseUint(dr.Space, 2, 8)
 719 | 		if err != nil {
 720 | 			continue
 721 | 		}
 722 | 		drs[i].Value = int(n) << 2
 723 | 	}
 724 | 	return drs
 725 | }
 726 | 
 727 | func parseTOSTCByte(w io.Writer, r io.Reader) error {
 728 | 	dec := xml.NewDecoder(r)
 729 | 	var ttb tosTCByte
 730 | 	if err := dec.Decode(&ttb); err != nil {
 731 | 		return err
 732 | 	}
 733 | 	trs := ttb.escape()
 734 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", ttb.Title, ttb.Updated)
 735 | 	fmt.Fprintf(w, "const (\n")
 736 | 	for _, tr := range trs {
 737 | 		fmt.Fprintf(w, "%s = %#x", tr.Keyword, tr.Value)
 738 | 		fmt.Fprintf(w, "// %s\n", tr.OrigKeyword)
 739 | 	}
 740 | 	fmt.Fprintf(w, ")\n")
 741 | 	return nil
 742 | }
 743 | 
 744 | type tosTCByte struct {
 745 | 	XMLName  xml.Name          `xml:"registry"`
 746 | 	Title    string            `xml:"title"`
 747 | 	Updated  string            `xml:"updated"`
 748 | 	Note     string            `xml:"note"`
 749 | 	RegTitle string            `xml:"registry>title"`
 750 | 	Records  []tosTCByteRecord `xml:"registry>record"`
 751 | }
 752 | 
 753 | type tosTCByteRecord struct {
 754 | 	Binary  string `xml:"binary"`
 755 | 	Keyword string `xml:"keyword"`
 756 | }
 757 | 
 758 | type canonTOSTCByteRecord struct {
 759 | 	OrigKeyword string
 760 | 	Keyword     string
 761 | 	Value       int
 762 | }
 763 | 
 764 | func (ttb *tosTCByte) escape() []canonTOSTCByteRecord {
 765 | 	trs := make([]canonTOSTCByteRecord, len(ttb.Records))
 766 | 	sr := strings.NewReplacer(
 767 | 		"Capable", "",
 768 | 		"(", "",
 769 | 		")", "",
 770 | 		"+", "",
 771 | 		"-", "",
 772 | 		"/", "",
 773 | 		".", "",
 774 | 		" ", "",
 775 | 	)
 776 | 	for i, tr := range ttb.Records {
 777 | 		s := strings.TrimSpace(tr.Keyword)
 778 | 		trs[i].OrigKeyword = s
 779 | 		ss := strings.Split(s, " ")
 780 | 		if len(ss) > 1 {
 781 | 			trs[i].Keyword = strings.Join(ss[1:], " ")
 782 | 		} else {
 783 | 			trs[i].Keyword = ss[0]
 784 | 		}
 785 | 		trs[i].Keyword = sr.Replace(trs[i].Keyword)
 786 | 		n, err := strconv.ParseUint(tr.Binary, 2, 8)
 787 | 		if err != nil {
 788 | 			continue
 789 | 		}
 790 | 		trs[i].Value = int(n)
 791 | 	}
 792 | 	return trs
 793 | }
794 |

#6 found 2 clones

795 |

pkg/jsonlog/jsonlog_marshalling.go:114

796 |
                                                         {
 797 | 	const hex = "0123456789abcdef"
 798 | 
 799 | 	buf.WriteByte('"')
 800 | 	start := 0
 801 | 	for i := 0; i < len(s); {
 802 | 		if b := s[i]; b < utf8.RuneSelf {
 803 | 			if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
 804 | 				i++
 805 | 				continue
 806 | 			}
 807 | 			if start < i {
 808 | 				buf.WriteString(s[start:i])
 809 | 			}
 810 | 			switch b {
 811 | 			case '\\', '"':
 812 | 				buf.WriteByte('\\')
 813 | 				buf.WriteByte(b)
 814 | 			case '\n':
 815 | 				buf.WriteByte('\\')
 816 | 				buf.WriteByte('n')
 817 | 			case '\r':
 818 | 				buf.WriteByte('\\')
 819 | 				buf.WriteByte('r')
 820 | 			default:
 821 | 
 822 | 				buf.WriteString(`\u00`)
 823 | 				buf.WriteByte(hex[b>>4])
 824 | 				buf.WriteByte(hex[b&0xF])
 825 | 			}
 826 | 			i++
 827 | 			start = i
 828 | 			continue
 829 | 		}
 830 | 		c, size := utf8.DecodeRuneInString(s[i:])
 831 | 		if c == utf8.RuneError && size == 1 {
 832 | 			if start < i {
 833 | 				buf.WriteString(s[start:i])
 834 | 			}
 835 | 			buf.WriteString(`\ufffd`)
 836 | 			i += size
 837 | 			start = i
 838 | 			continue
 839 | 		}
 840 | 
 841 | 		if c == '\u2028' || c == '\u2029' {
 842 | 			if start < i {
 843 | 				buf.WriteString(s[start:i])
 844 | 			}
 845 | 			buf.WriteString(`\u202`)
 846 | 			buf.WriteByte(hex[c&0xF])
 847 | 			i += size
 848 | 			start = i
 849 | 			continue
 850 | 		}
 851 | 		i += size
 852 | 	}
 853 | 	if start < len(s) {
 854 | 		buf.WriteString(s[start:])
 855 | 	}
 856 | 	buf.WriteByte('"')
 857 | }
858 |

pkg/jsonlog/jsonlogbytes.go:54

859 |
                                                                {
 860 | 	const hex = "0123456789abcdef"
 861 | 
 862 | 	buf.WriteByte('"')
 863 | 	start := 0
 864 | 	for i := 0; i < len(s); {
 865 | 		if b := s[i]; b < utf8.RuneSelf {
 866 | 			if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
 867 | 				i++
 868 | 				continue
 869 | 			}
 870 | 			if start < i {
 871 | 				buf.Write(s[start:i])
 872 | 			}
 873 | 			switch b {
 874 | 			case '\\', '"':
 875 | 				buf.WriteByte('\\')
 876 | 				buf.WriteByte(b)
 877 | 			case '\n':
 878 | 				buf.WriteByte('\\')
 879 | 				buf.WriteByte('n')
 880 | 			case '\r':
 881 | 				buf.WriteByte('\\')
 882 | 				buf.WriteByte('r')
 883 | 			default:
 884 | 
 885 | 				buf.WriteString(`\u00`)
 886 | 				buf.WriteByte(hex[b>>4])
 887 | 				buf.WriteByte(hex[b&0xF])
 888 | 			}
 889 | 			i++
 890 | 			start = i
 891 | 			continue
 892 | 		}
 893 | 		c, size := utf8.DecodeRune(s[i:])
 894 | 		if c == utf8.RuneError && size == 1 {
 895 | 			if start < i {
 896 | 				buf.Write(s[start:i])
 897 | 			}
 898 | 			buf.WriteString(`\ufffd`)
 899 | 			i += size
 900 | 			start = i
 901 | 			continue
 902 | 		}
 903 | 
 904 | 		if c == '\u2028' || c == '\u2029' {
 905 | 			if start < i {
 906 | 				buf.Write(s[start:i])
 907 | 			}
 908 | 			buf.WriteString(`\u202`)
 909 | 			buf.WriteByte(hex[c&0xF])
 910 | 			i += size
 911 | 			start = i
 912 | 			continue
 913 | 		}
 914 | 		i += size
 915 | 	}
 916 | 	if start < len(s) {
 917 | 		buf.Write(s[start:])
 918 | 	}
 919 | 	buf.WriteByte('"')
 920 | }
921 |

#7 found 3 clones

922 |

daemon/container_unit_test.go:8

923 |
func TestParseNetworkOptsPrivateOnly(t *testing.T) {
 924 | 	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::80"})
 925 | 	if err != nil {
 926 | 		t.Fatal(err)
 927 | 	}
 928 | 	if len(ports) != 1 {
 929 | 		t.Logf("Expected 1 got %d", len(ports))
 930 | 		t.FailNow()
 931 | 	}
 932 | 	if len(bindings) != 1 {
 933 | 		t.Logf("Expected 1 got %d", len(bindings))
 934 | 		t.FailNow()
 935 | 	}
 936 | 	for k := range ports {
 937 | 		if k.Proto() != "tcp" {
 938 | 			t.Logf("Expected tcp got %s", k.Proto())
 939 | 			t.Fail()
 940 | 		}
 941 | 		if k.Port() != "80" {
 942 | 			t.Logf("Expected 80 got %s", k.Port())
 943 | 			t.Fail()
 944 | 		}
 945 | 		b, exists := bindings[k]
 946 | 		if !exists {
 947 | 			t.Log("Binding does not exist")
 948 | 			t.FailNow()
 949 | 		}
 950 | 		if len(b) != 1 {
 951 | 			t.Logf("Expected 1 got %d", len(b))
 952 | 			t.FailNow()
 953 | 		}
 954 | 		s := b[0]
 955 | 		if s.HostPort != "" {
 956 | 			t.Logf("Expected \"\" got %s", s.HostPort)
 957 | 			t.Fail()
 958 | 		}
 959 | 		if s.HostIp != "192.168.1.100" {
 960 | 			t.Fail()
 961 | 		}
 962 | 	}
 963 | }
964 |

daemon/container_unit_test.go:50

965 |
func TestParseNetworkOptsPublic(t *testing.T) {
 966 | 	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100:8080:80"})
 967 | 	if err != nil {
 968 | 		t.Fatal(err)
 969 | 	}
 970 | 	if len(ports) != 1 {
 971 | 		t.Logf("Expected 1 got %d", len(ports))
 972 | 		t.FailNow()
 973 | 	}
 974 | 	if len(bindings) != 1 {
 975 | 		t.Logf("Expected 1 got %d", len(bindings))
 976 | 		t.FailNow()
 977 | 	}
 978 | 	for k := range ports {
 979 | 		if k.Proto() != "tcp" {
 980 | 			t.Logf("Expected tcp got %s", k.Proto())
 981 | 			t.Fail()
 982 | 		}
 983 | 		if k.Port() != "80" {
 984 | 			t.Logf("Expected 80 got %s", k.Port())
 985 | 			t.Fail()
 986 | 		}
 987 | 		b, exists := bindings[k]
 988 | 		if !exists {
 989 | 			t.Log("Binding does not exist")
 990 | 			t.FailNow()
 991 | 		}
 992 | 		if len(b) != 1 {
 993 | 			t.Logf("Expected 1 got %d", len(b))
 994 | 			t.FailNow()
 995 | 		}
 996 | 		s := b[0]
 997 | 		if s.HostPort != "8080" {
 998 | 			t.Logf("Expected 8080 got %s", s.HostPort)
 999 | 			t.Fail()
1000 | 		}
1001 | 		if s.HostIp != "192.168.1.100" {
1002 | 			t.Fail()
1003 | 		}
1004 | 	}
1005 | }
1006 |

daemon/container_unit_test.go:127

1007 |
func TestParseNetworkOptsUdp(t *testing.T) {
1008 | 	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::6000/udp"})
1009 | 	if err != nil {
1010 | 		t.Fatal(err)
1011 | 	}
1012 | 	if len(ports) != 1 {
1013 | 		t.Logf("Expected 1 got %d", len(ports))
1014 | 		t.FailNow()
1015 | 	}
1016 | 	if len(bindings) != 1 {
1017 | 		t.Logf("Expected 1 got %d", len(bindings))
1018 | 		t.FailNow()
1019 | 	}
1020 | 	for k := range ports {
1021 | 		if k.Proto() != "udp" {
1022 | 			t.Logf("Expected udp got %s", k.Proto())
1023 | 			t.Fail()
1024 | 		}
1025 | 		if k.Port() != "6000" {
1026 | 			t.Logf("Expected 6000 got %s", k.Port())
1027 | 			t.Fail()
1028 | 		}
1029 | 		b, exists := bindings[k]
1030 | 		if !exists {
1031 | 			t.Log("Binding does not exist")
1032 | 			t.FailNow()
1033 | 		}
1034 | 		if len(b) != 1 {
1035 | 			t.Logf("Expected 1 got %d", len(b))
1036 | 			t.FailNow()
1037 | 		}
1038 | 		s := b[0]
1039 | 		if s.HostPort != "" {
1040 | 			t.Logf("Expected \"\" got %s", s.HostPort)
1041 | 			t.Fail()
1042 | 		}
1043 | 		if s.HostIp != "192.168.1.100" {
1044 | 			t.Fail()
1045 | 		}
1046 | 	}
1047 | }
1048 |

#8 found 2 clones

1049 |

vendor/src/github.com/docker/libnetwork/portmapper/mapper_test.go:39

1050 |
func TestMapTCPPorts(t *testing.T) {
1051 | 	defer netutils.SetupTestNetNS(t)()
1052 | 	pm := New()
1053 | 	dstIP1 := net.ParseIP("192.168.0.1")
1054 | 	dstIP2 := net.ParseIP("192.168.0.2")
1055 | 	dstAddr1 := &net.TCPAddr{IP: dstIP1, Port: 80}
1056 | 	dstAddr2 := &net.TCPAddr{IP: dstIP2, Port: 80}
1057 | 
1058 | 	srcAddr1 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
1059 | 	srcAddr2 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
1060 | 
1061 | 	addrEqual := func(addr1, addr2 net.Addr) bool {
1062 | 		return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String())
1063 | 	}
1064 | 
1065 | 	if host, err := pm.Map(srcAddr1, dstIP1, 80, true); err != nil {
1066 | 		t.Fatalf("Failed to allocate port: %s", err)
1067 | 	} else if !addrEqual(dstAddr1, host) {
1068 | 		t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s",
1069 | 			dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network())
1070 | 	}
1071 | 
1072 | 	if _, err := pm.Map(srcAddr1, dstIP1, 80, true); err == nil {
1073 | 		t.Fatalf("Port is in use - mapping should have failed")
1074 | 	}
1075 | 
1076 | 	if _, err := pm.Map(srcAddr2, dstIP1, 80, true); err == nil {
1077 | 		t.Fatalf("Port is in use - mapping should have failed")
1078 | 	}
1079 | 
1080 | 	if _, err := pm.Map(srcAddr2, dstIP2, 80, true); err != nil {
1081 | 		t.Fatalf("Failed to allocate port: %s", err)
1082 | 	}
1083 | 
1084 | 	if pm.Unmap(dstAddr1) != nil {
1085 | 		t.Fatalf("Failed to release port")
1086 | 	}
1087 | 
1088 | 	if pm.Unmap(dstAddr2) != nil {
1089 | 		t.Fatalf("Failed to release port")
1090 | 	}
1091 | 
1092 | 	if pm.Unmap(dstAddr2) == nil {
1093 | 		t.Fatalf("Port already released, but no error reported")
1094 | 	}
1095 | }
1096 |

vendor/src/github.com/docker/libnetwork/portmapper/mapper_test.go:119

1097 |
func TestMapUDPPorts(t *testing.T) {
1098 | 	defer netutils.SetupTestNetNS(t)()
1099 | 	pm := New()
1100 | 	dstIP1 := net.ParseIP("192.168.0.1")
1101 | 	dstIP2 := net.ParseIP("192.168.0.2")
1102 | 	dstAddr1 := &net.UDPAddr{IP: dstIP1, Port: 80}
1103 | 	dstAddr2 := &net.UDPAddr{IP: dstIP2, Port: 80}
1104 | 
1105 | 	srcAddr1 := &net.UDPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
1106 | 	srcAddr2 := &net.UDPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
1107 | 
1108 | 	addrEqual := func(addr1, addr2 net.Addr) bool {
1109 | 		return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String())
1110 | 	}
1111 | 
1112 | 	if host, err := pm.Map(srcAddr1, dstIP1, 80, true); err != nil {
1113 | 		t.Fatalf("Failed to allocate port: %s", err)
1114 | 	} else if !addrEqual(dstAddr1, host) {
1115 | 		t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s",
1116 | 			dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network())
1117 | 	}
1118 | 
1119 | 	if _, err := pm.Map(srcAddr1, dstIP1, 80, true); err == nil {
1120 | 		t.Fatalf("Port is in use - mapping should have failed")
1121 | 	}
1122 | 
1123 | 	if _, err := pm.Map(srcAddr2, dstIP1, 80, true); err == nil {
1124 | 		t.Fatalf("Port is in use - mapping should have failed")
1125 | 	}
1126 | 
1127 | 	if _, err := pm.Map(srcAddr2, dstIP2, 80, true); err != nil {
1128 | 		t.Fatalf("Failed to allocate port: %s", err)
1129 | 	}
1130 | 
1131 | 	if pm.Unmap(dstAddr1) != nil {
1132 | 		t.Fatalf("Failed to release port")
1133 | 	}
1134 | 
1135 | 	if pm.Unmap(dstAddr2) != nil {
1136 | 		t.Fatalf("Failed to release port")
1137 | 	}
1138 | 
1139 | 	if pm.Unmap(dstAddr2) == nil {
1140 | 		t.Fatalf("Port already released, but no error reported")
1141 | 	}
1142 | }
1143 |

#9 found 2 clones

1144 |

integration-cli/docker_cli_create_test.go:104

1145 |
func (s *DockerSuite) TestCreateWithPortRange(c *check.C) {
1146 | 
1147 | 	runCmd := exec.Command(dockerBinary, "create", "-p", "3300-3303:3300-3303/tcp", "busybox", "echo")
1148 | 	out, _, _, err := runCommandWithStdoutStderr(runCmd)
1149 | 	if err != nil {
1150 | 		c.Fatal(out, err)
1151 | 	}
1152 | 
1153 | 	cleanedContainerID := strings.TrimSpace(out)
1154 | 
1155 | 	inspectCmd := exec.Command(dockerBinary, "inspect", cleanedContainerID)
1156 | 	out, _, err = runCommandWithOutput(inspectCmd)
1157 | 	if err != nil {
1158 | 		c.Fatalf("out should've been a container id: %s, %v", out, err)
1159 | 	}
1160 | 
1161 | 	containers := []struct {
1162 | 		HostConfig *struct {
1163 | 			PortBindings map[nat.Port][]nat.PortBinding
1164 | 		}
1165 | 	}{}
1166 | 	if err := json.Unmarshal([]byte(out), &containers); err != nil {
1167 | 		c.Fatalf("Error inspecting the container: %s", err)
1168 | 	}
1169 | 	if len(containers) != 1 {
1170 | 		c.Fatalf("Unexpected container count. Expected 0, received: %d", len(containers))
1171 | 	}
1172 | 
1173 | 	cont := containers[0]
1174 | 	if cont.HostConfig == nil {
1175 | 		c.Fatalf("Expected HostConfig, got none")
1176 | 	}
1177 | 
1178 | 	if len(cont.HostConfig.PortBindings) != 4 {
1179 | 		c.Fatalf("Expected 4 ports bindings, got %d", len(cont.HostConfig.PortBindings))
1180 | 	}
1181 | 	for k, v := range cont.HostConfig.PortBindings {
1182 | 		if len(v) != 1 {
1183 | 			c.Fatalf("Expected 1 ports binding, for the port  %s but found %s", k, v)
1184 | 		}
1185 | 		if k.Port() != v[0].HostPort {
1186 | 			c.Fatalf("Expected host port %d to match published port  %d", k.Port(), v[0].HostPort)
1187 | 		}
1188 | 	}
1189 | 
1190 | }
1191 |

integration-cli/docker_cli_create_test.go:151

1192 |
func (s *DockerSuite) TestCreateWithiLargePortRange(c *check.C) {
1193 | 
1194 | 	runCmd := exec.Command(dockerBinary, "create", "-p", "1-65535:1-65535/tcp", "busybox", "echo")
1195 | 	out, _, _, err := runCommandWithStdoutStderr(runCmd)
1196 | 	if err != nil {
1197 | 		c.Fatal(out, err)
1198 | 	}
1199 | 
1200 | 	cleanedContainerID := strings.TrimSpace(out)
1201 | 
1202 | 	inspectCmd := exec.Command(dockerBinary, "inspect", cleanedContainerID)
1203 | 	out, _, err = runCommandWithOutput(inspectCmd)
1204 | 	if err != nil {
1205 | 		c.Fatalf("out should've been a container id: %s, %v", out, err)
1206 | 	}
1207 | 
1208 | 	containers := []struct {
1209 | 		HostConfig *struct {
1210 | 			PortBindings map[nat.Port][]nat.PortBinding
1211 | 		}
1212 | 	}{}
1213 | 	if err := json.Unmarshal([]byte(out), &containers); err != nil {
1214 | 		c.Fatalf("Error inspecting the container: %s", err)
1215 | 	}
1216 | 	if len(containers) != 1 {
1217 | 		c.Fatalf("Unexpected container count. Expected 0, received: %d", len(containers))
1218 | 	}
1219 | 
1220 | 	cont := containers[0]
1221 | 	if cont.HostConfig == nil {
1222 | 		c.Fatalf("Expected HostConfig, got none")
1223 | 	}
1224 | 
1225 | 	if len(cont.HostConfig.PortBindings) != 65535 {
1226 | 		c.Fatalf("Expected 65535 ports bindings, got %d", len(cont.HostConfig.PortBindings))
1227 | 	}
1228 | 	for k, v := range cont.HostConfig.PortBindings {
1229 | 		if len(v) != 1 {
1230 | 			c.Fatalf("Expected 1 ports binding, for the port  %s but found %s", k, v)
1231 | 		}
1232 | 		if k.Port() != v[0].HostPort {
1233 | 			c.Fatalf("Expected host port %d to match published port  %d", k.Port(), v[0].HostPort)
1234 | 		}
1235 | 	}
1236 | 
1237 | }
1238 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mibk/dupl 2 | 3 | go 1.14 4 | -------------------------------------------------------------------------------- /job/buildtree.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | import ( 4 | "github.com/mibk/dupl/suffixtree" 5 | "github.com/mibk/dupl/syntax" 6 | ) 7 | 8 | func BuildTree(schan chan []*syntax.Node) (t *suffixtree.STree, d *[]*syntax.Node, done chan bool) { 9 | t = suffixtree.New() 10 | data := make([]*syntax.Node, 0, 100) 11 | done = make(chan bool) 12 | go func() { 13 | for seq := range schan { 14 | data = append(data, seq...) 15 | for _, node := range seq { 16 | t.Update(node) 17 | } 18 | } 19 | done <- true 20 | }() 21 | return t, &data, done 22 | } 23 | -------------------------------------------------------------------------------- /job/parse.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | import ( 4 | "log" 5 | 6 | "github.com/mibk/dupl/syntax" 7 | "github.com/mibk/dupl/syntax/golang" 8 | ) 9 | 10 | func Parse(fchan chan string) chan []*syntax.Node { 11 | 12 | // parse AST 13 | achan := make(chan *syntax.Node) 14 | go func() { 15 | for file := range fchan { 16 | ast, err := golang.Parse(file) 17 | if err != nil { 18 | log.Println(err) 19 | continue 20 | } 21 | achan <- ast 22 | } 23 | close(achan) 24 | }() 25 | 26 | // serialize 27 | schan := make(chan []*syntax.Node) 28 | go func() { 29 | for ast := range achan { 30 | seq := syntax.Serialize(ast) 31 | schan <- seq 32 | } 33 | close(schan) 34 | }() 35 | return schan 36 | } 37 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "log" 9 | "os" 10 | "path/filepath" 11 | "sort" 12 | "strings" 13 | 14 | "github.com/mibk/dupl/job" 15 | "github.com/mibk/dupl/printer" 16 | "github.com/mibk/dupl/syntax" 17 | ) 18 | 19 | const defaultThreshold = 15 20 | 21 | var ( 22 | paths = []string{"."} 23 | vendor = flag.Bool("vendor", false, "") 24 | verbose = flag.Bool("verbose", false, "") 25 | threshold = flag.Int("threshold", defaultThreshold, "") 26 | files = flag.Bool("files", false, "") 27 | 28 | html = flag.Bool("html", false, "") 29 | plumbing = flag.Bool("plumbing", false, "") 30 | ) 31 | 32 | const ( 33 | vendorDirPrefix = "vendor" + string(filepath.Separator) 34 | vendorDirInPath = string(filepath.Separator) + vendorDirPrefix 35 | ) 36 | 37 | func init() { 38 | flag.BoolVar(verbose, "v", false, "alias for -verbose") 39 | flag.IntVar(threshold, "t", defaultThreshold, "alias for -threshold") 40 | } 41 | 42 | func main() { 43 | flag.Usage = usage 44 | flag.Parse() 45 | if *html && *plumbing { 46 | log.Fatal("you can have either plumbing or HTML output") 47 | } 48 | if flag.NArg() > 0 { 49 | paths = flag.Args() 50 | } 51 | 52 | if *verbose { 53 | log.Println("Building suffix tree") 54 | } 55 | schan := job.Parse(filesFeed()) 56 | t, data, done := job.BuildTree(schan) 57 | <-done 58 | 59 | // finish stream 60 | t.Update(&syntax.Node{Type: -1}) 61 | 62 | if *verbose { 63 | log.Println("Searching for clones") 64 | } 65 | mchan := t.FindDuplOver(*threshold) 66 | duplChan := make(chan syntax.Match) 67 | go func() { 68 | for m := range mchan { 69 | match := syntax.FindSyntaxUnits(*data, m, *threshold) 70 | if len(match.Frags) > 0 { 71 | duplChan <- match 72 | } 73 | } 74 | close(duplChan) 75 | }() 76 | 77 | newPrinter := printer.NewText 78 | if *html { 79 | newPrinter = printer.NewHTML 80 | } else if *plumbing { 81 | newPrinter = printer.NewPlumbing 82 | } 83 | p := newPrinter(os.Stdout, ioutil.ReadFile) 84 | if err := printDupls(p, duplChan); err != nil { 85 | log.Fatal(err) 86 | } 87 | } 88 | 89 | func filesFeed() chan string { 90 | if *files { 91 | fchan := make(chan string) 92 | go func() { 93 | s := bufio.NewScanner(os.Stdin) 94 | for s.Scan() { 95 | f := s.Text() 96 | fchan <- strings.TrimPrefix(f, "./") 97 | } 98 | close(fchan) 99 | }() 100 | return fchan 101 | } 102 | return crawlPaths(paths) 103 | } 104 | 105 | func crawlPaths(paths []string) chan string { 106 | fchan := make(chan string) 107 | go func() { 108 | for _, path := range paths { 109 | info, err := os.Lstat(path) 110 | if err != nil { 111 | log.Fatal(err) 112 | } 113 | if !info.IsDir() { 114 | fchan <- path 115 | continue 116 | } 117 | err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error { 118 | if !*vendor && (strings.HasPrefix(path, vendorDirPrefix) || 119 | strings.Contains(path, vendorDirInPath)) { 120 | return nil 121 | } 122 | if !info.IsDir() && strings.HasSuffix(info.Name(), ".go") { 123 | fchan <- path 124 | } 125 | return nil 126 | }) 127 | if err != nil { 128 | log.Fatal(err) 129 | } 130 | } 131 | close(fchan) 132 | }() 133 | return fchan 134 | } 135 | 136 | func printDupls(p printer.Printer, duplChan <-chan syntax.Match) error { 137 | groups := make(map[string][][]*syntax.Node) 138 | for dupl := range duplChan { 139 | groups[dupl.Hash] = append(groups[dupl.Hash], dupl.Frags...) 140 | } 141 | keys := make([]string, 0, len(groups)) 142 | for k := range groups { 143 | keys = append(keys, k) 144 | } 145 | sort.Strings(keys) 146 | 147 | if err := p.PrintHeader(); err != nil { 148 | return err 149 | } 150 | for _, k := range keys { 151 | uniq := unique(groups[k]) 152 | if len(uniq) > 1 { 153 | if err := p.PrintClones(uniq); err != nil { 154 | return err 155 | } 156 | } 157 | } 158 | return p.PrintFooter() 159 | } 160 | 161 | func unique(group [][]*syntax.Node) [][]*syntax.Node { 162 | fileMap := make(map[string]map[int]struct{}) 163 | 164 | var newGroup [][]*syntax.Node 165 | for _, seq := range group { 166 | node := seq[0] 167 | file, ok := fileMap[node.Filename] 168 | if !ok { 169 | file = make(map[int]struct{}) 170 | fileMap[node.Filename] = file 171 | } 172 | if _, ok := file[node.Pos]; !ok { 173 | file[node.Pos] = struct{}{} 174 | newGroup = append(newGroup, seq) 175 | } 176 | } 177 | return newGroup 178 | } 179 | 180 | func usage() { 181 | fmt.Fprintln(os.Stderr, `Usage: dupl [flags] [paths] 182 | 183 | Paths: 184 | If the given path is a file, dupl will use it regardless of 185 | the file extension. If it is a directory, it will recursively 186 | search for *.go files in that directory. 187 | 188 | If no path is given, dupl will recursively search for *.go 189 | files in the current directory. 190 | 191 | Flags: 192 | -files 193 | read file names from stdin one at each line 194 | -html 195 | output the results as HTML, including duplicate code fragments 196 | -plumbing 197 | plumbing (easy-to-parse) output for consumption by scripts or tools 198 | -t, -threshold size 199 | minimum token sequence size as a clone (default 15) 200 | -vendor 201 | check files in vendor directory 202 | -v, -verbose 203 | explain what is being done 204 | 205 | Examples: 206 | dupl -t 100 207 | Search clones in the current directory of size at least 208 | 100 tokens. 209 | dupl $(find app/ -name '*_test.go') 210 | Search for clones in tests in the app directory. 211 | find app/ -name '*_test.go' |dupl -files 212 | The same as above.`) 213 | os.Exit(2) 214 | } 215 | -------------------------------------------------------------------------------- /printer/html.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "html" 7 | "io" 8 | "regexp" 9 | "sort" 10 | 11 | "github.com/mibk/dupl/syntax" 12 | ) 13 | 14 | type htmlprinter struct { 15 | iota int 16 | w io.Writer 17 | ReadFile 18 | } 19 | 20 | func NewHTML(w io.Writer, fread ReadFile) Printer { 21 | return &htmlprinter{w: w, ReadFile: fread} 22 | } 23 | 24 | func (p *htmlprinter) PrintHeader() error { 25 | _, err := fmt.Fprint(p.w, ` 26 | 27 | Duplicates 28 | 35 | `) 36 | return err 37 | } 38 | 39 | func (p *htmlprinter) PrintClones(dups [][]*syntax.Node) error { 40 | p.iota++ 41 | fmt.Fprintf(p.w, "

#%d found %d clones

\n", p.iota, len(dups)) 42 | 43 | clones := make([]clone, len(dups)) 44 | for i, dup := range dups { 45 | cnt := len(dup) 46 | if cnt == 0 { 47 | panic("zero length dup") 48 | } 49 | nstart := dup[0] 50 | nend := dup[cnt-1] 51 | 52 | file, err := p.ReadFile(nstart.Filename) 53 | if err != nil { 54 | return err 55 | } 56 | 57 | lineStart, _ := blockLines(file, nstart.Pos, nend.End) 58 | cl := clone{filename: nstart.Filename, lineStart: lineStart} 59 | start := findLineBeg(file, nstart.Pos) 60 | content := append(toWhitespace(file[start:nstart.Pos]), file[nstart.Pos:nend.End]...) 61 | cl.fragment = deindent(content) 62 | clones[i] = cl 63 | } 64 | 65 | sort.Sort(byNameAndLine(clones)) 66 | for _, cl := range clones { 67 | fmt.Fprintf(p.w, "

%s:%d

\n
%s
\n", cl.filename, cl.lineStart, 68 | html.EscapeString(string(cl.fragment))) 69 | } 70 | return nil 71 | } 72 | 73 | func (*htmlprinter) PrintFooter() error { return nil } 74 | 75 | func findLineBeg(file []byte, index int) int { 76 | for i := index; i >= 0; i-- { 77 | if file[i] == '\n' { 78 | return i + 1 79 | } 80 | } 81 | return 0 82 | } 83 | 84 | func toWhitespace(str []byte) []byte { 85 | var out []byte 86 | for _, c := range bytes.Runes(str) { 87 | if c == '\t' { 88 | out = append(out, '\t') 89 | } else { 90 | out = append(out, ' ') 91 | } 92 | } 93 | return out 94 | } 95 | 96 | func deindent(block []byte) []byte { 97 | const maxVal = 99 98 | min := maxVal 99 | re := regexp.MustCompile(`(^|\n)(\t*)\S`) 100 | for _, line := range re.FindAllSubmatch(block, -1) { 101 | indent := line[2] 102 | if len(indent) < min { 103 | min = len(indent) 104 | } 105 | } 106 | if min == 0 || min == maxVal { 107 | return block 108 | } 109 | block = block[min:] 110 | Loop: 111 | for i := 0; i < len(block); i++ { 112 | if block[i] == '\n' && i != len(block)-1 { 113 | for j := 0; j < min; j++ { 114 | if block[i+j+1] != '\t' { 115 | continue Loop 116 | } 117 | } 118 | block = append(block[:i+1], block[i+1+min:]...) 119 | } 120 | } 121 | return block 122 | } 123 | -------------------------------------------------------------------------------- /printer/html_test.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import "testing" 4 | 5 | func TestToWhitespace(t *testing.T) { 6 | testCases := []struct { 7 | in string 8 | expect string 9 | }{ 10 | {"\t ", "\t "}, 11 | {"\tčřď", "\t "}, 12 | {" \ta", " \t "}, 13 | } 14 | 15 | for _, tc := range testCases { 16 | actual := toWhitespace([]byte(tc.in)) 17 | if tc.expect != string(actual) { 18 | t.Errorf("got '%s', want '%s'", actual, tc.expect) 19 | } 20 | } 21 | } 22 | 23 | func TestDeindent(t *testing.T) { 24 | testCases := []struct { 25 | in string 26 | expect string 27 | }{ 28 | {"\t$\n\t\t$\n\t$", "$\n\t$\n$"}, 29 | {"\t$\r\n\t\t$\r\n\t$", "$\r\n\t$\r\n$"}, 30 | {"\t$\n\t\t$\n", "$\n\t$\n"}, 31 | {"\t$\n\n\t\t$", "$\n\n\t$"}, 32 | } 33 | for _, tc := range testCases { 34 | actual := deindent([]byte(tc.in)) 35 | if tc.expect != string(actual) { 36 | t.Errorf("got '%s', want '%s'", actual, tc.expect) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /printer/plumbing.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sort" 7 | 8 | "github.com/mibk/dupl/syntax" 9 | ) 10 | 11 | type plumbing struct { 12 | w io.Writer 13 | ReadFile 14 | } 15 | 16 | func NewPlumbing(w io.Writer, fread ReadFile) Printer { 17 | return &plumbing{w, fread} 18 | } 19 | 20 | func (p *plumbing) PrintHeader() error { return nil } 21 | 22 | func (p *plumbing) PrintClones(dups [][]*syntax.Node) error { 23 | clones, err := prepareClonesInfo(p.ReadFile, dups) 24 | if err != nil { 25 | return err 26 | } 27 | sort.Sort(byNameAndLine(clones)) 28 | for i, cl := range clones { 29 | nextCl := clones[(i+1)%len(clones)] 30 | fmt.Fprintf(p.w, "%s:%d-%d: duplicate of %s:%d-%d\n", cl.filename, cl.lineStart, cl.lineEnd, 31 | nextCl.filename, nextCl.lineStart, nextCl.lineEnd) 32 | } 33 | return nil 34 | } 35 | 36 | func (p *plumbing) PrintFooter() error { return nil } 37 | -------------------------------------------------------------------------------- /printer/printer.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import "github.com/mibk/dupl/syntax" 4 | 5 | type ReadFile func(filename string) ([]byte, error) 6 | 7 | type Printer interface { 8 | PrintHeader() error 9 | PrintClones(dups [][]*syntax.Node) error 10 | PrintFooter() error 11 | } 12 | -------------------------------------------------------------------------------- /printer/text.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sort" 7 | 8 | "github.com/mibk/dupl/syntax" 9 | ) 10 | 11 | type text struct { 12 | cnt int 13 | w io.Writer 14 | ReadFile 15 | } 16 | 17 | func NewText(w io.Writer, fread ReadFile) Printer { 18 | return &text{w: w, ReadFile: fread} 19 | } 20 | 21 | func (p *text) PrintHeader() error { return nil } 22 | 23 | func (p *text) PrintClones(dups [][]*syntax.Node) error { 24 | p.cnt++ 25 | fmt.Fprintf(p.w, "found %d clones:\n", len(dups)) 26 | clones, err := prepareClonesInfo(p.ReadFile, dups) 27 | if err != nil { 28 | return err 29 | } 30 | sort.Sort(byNameAndLine(clones)) 31 | for _, cl := range clones { 32 | fmt.Fprintf(p.w, " %s:%d,%d\n", cl.filename, cl.lineStart, cl.lineEnd) 33 | } 34 | return nil 35 | } 36 | 37 | func (p *text) PrintFooter() error { 38 | _, err := fmt.Fprintf(p.w, "\nFound total %d clone groups.\n", p.cnt) 39 | return err 40 | } 41 | 42 | func prepareClonesInfo(fread ReadFile, dups [][]*syntax.Node) ([]clone, error) { 43 | clones := make([]clone, len(dups)) 44 | for i, dup := range dups { 45 | cnt := len(dup) 46 | if cnt == 0 { 47 | panic("zero length dup") 48 | } 49 | nstart := dup[0] 50 | nend := dup[cnt-1] 51 | 52 | file, err := fread(nstart.Filename) 53 | if err != nil { 54 | return nil, err 55 | } 56 | 57 | cl := clone{filename: nstart.Filename} 58 | cl.lineStart, cl.lineEnd = blockLines(file, nstart.Pos, nend.End) 59 | clones[i] = cl 60 | } 61 | return clones, nil 62 | } 63 | 64 | func blockLines(file []byte, from, to int) (int, int) { 65 | line := 1 66 | lineStart, lineEnd := 0, 0 67 | for offset, b := range file { 68 | if b == '\n' { 69 | line++ 70 | } 71 | if offset == from { 72 | lineStart = line 73 | } 74 | if offset == to-1 { 75 | lineEnd = line 76 | break 77 | } 78 | } 79 | return lineStart, lineEnd 80 | } 81 | 82 | type clone struct { 83 | filename string 84 | lineStart int 85 | lineEnd int 86 | fragment []byte 87 | } 88 | 89 | type byNameAndLine []clone 90 | 91 | func (c byNameAndLine) Len() int { return len(c) } 92 | 93 | func (c byNameAndLine) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 94 | 95 | func (c byNameAndLine) Less(i, j int) bool { 96 | if c[i].filename == c[j].filename { 97 | return c[i].lineStart < c[j].lineStart 98 | } 99 | return c[i].filename < c[j].filename 100 | } 101 | -------------------------------------------------------------------------------- /suffixtree/dupl.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import "sort" 4 | 5 | type Match struct { 6 | Ps []Pos 7 | Len Pos 8 | } 9 | 10 | type posList struct { 11 | positions []Pos 12 | } 13 | 14 | func newPosList() *posList { 15 | return &posList{make([]Pos, 0)} 16 | } 17 | 18 | func (p *posList) append(p2 *posList) { 19 | p.positions = append(p.positions, p2.positions...) 20 | } 21 | 22 | func (p *posList) add(pos Pos) { 23 | p.positions = append(p.positions, pos) 24 | } 25 | 26 | type contextList struct { 27 | lists map[int]*posList 28 | } 29 | 30 | func newContextList() *contextList { 31 | return &contextList{make(map[int]*posList)} 32 | } 33 | 34 | func (c *contextList) getAll() []Pos { 35 | keys := make([]int, 0, len(c.lists)) 36 | for k := range c.lists { 37 | keys = append(keys, k) 38 | } 39 | sort.Ints(keys) 40 | var ps []Pos 41 | for _, k := range keys { 42 | ps = append(ps, c.lists[k].positions...) 43 | } 44 | return ps 45 | } 46 | 47 | func (c *contextList) append(c2 *contextList) { 48 | for lc, pl := range c2.lists { 49 | if _, ok := c.lists[lc]; ok { 50 | c.lists[lc].append(pl) 51 | } else { 52 | c.lists[lc] = pl 53 | } 54 | } 55 | } 56 | 57 | // FindDuplOver find pairs of maximal duplicities over a threshold 58 | // length. 59 | func (t *STree) FindDuplOver(threshold int) <-chan Match { 60 | auxTran := newTran(0, 0, t.root) 61 | ch := make(chan Match) 62 | go func() { 63 | walkTrans(auxTran, 0, threshold, ch) 64 | close(ch) 65 | }() 66 | return ch 67 | } 68 | 69 | func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList { 70 | s := parent.state 71 | 72 | cl := newContextList() 73 | 74 | if len(s.trans) == 0 { 75 | pl := newPosList() 76 | start := parent.end + 1 - Pos(length) 77 | pl.add(start) 78 | ch := 0 79 | if start > 0 { 80 | ch = s.tree.data[start-1].Val() 81 | } 82 | cl.lists[ch] = pl 83 | return cl 84 | } 85 | 86 | for _, t := range s.trans { 87 | ln := length + t.len() 88 | cl2 := walkTrans(t, ln, threshold, ch) 89 | if ln >= threshold { 90 | cl.append(cl2) 91 | } 92 | } 93 | if length >= threshold && len(cl.lists) > 1 { 94 | m := Match{cl.getAll(), Pos(length)} 95 | ch <- m 96 | } 97 | return cl 98 | } 99 | -------------------------------------------------------------------------------- /suffixtree/dupl_test.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "testing" 7 | ) 8 | 9 | func (m Match) String() string { 10 | str := "([" 11 | for _, p := range m.Ps { 12 | str += fmt.Sprintf("%d, ", p) 13 | } 14 | return str[:len(str)-2] + fmt.Sprintf("], %d)", m.Len) 15 | } 16 | 17 | func sliceCmp(sl1, sl2 []Pos) bool { 18 | if len(sl1) != len(sl2) { 19 | return false 20 | } 21 | sort.Sort(ByPos(sl1)) 22 | sort.Sort(ByPos(sl2)) 23 | for i := range sl1 { 24 | if sl1[i] != sl2[i] { 25 | return false 26 | } 27 | } 28 | return true 29 | } 30 | 31 | type ByPos []Pos 32 | 33 | func (p ByPos) Len() int { 34 | return len(p) 35 | } 36 | 37 | func (p ByPos) Swap(i, j int) { 38 | p[i], p[j] = p[j], p[i] 39 | } 40 | 41 | func (p ByPos) Less(i, j int) bool { 42 | return p[i] < p[j] 43 | } 44 | 45 | func TestFindingDupl(t *testing.T) { 46 | testCases := []struct { 47 | s string 48 | threshold int 49 | matches []Match 50 | }{ 51 | {"abab$", 3, []Match{}}, 52 | {"abab$", 2, []Match{{[]Pos{0, 2}, 2}}}, 53 | {"abcbcabc$", 3, []Match{{[]Pos{0, 5}, 3}}}, 54 | {"abcbcabc$", 2, []Match{{[]Pos{0, 5}, 3}, {[]Pos{1, 3, 6}, 2}}}, 55 | {`All work and no play makes Jack a dull boy 56 | All work and no play makes Jack a dull boy$`, 4, []Match{{[]Pos{0, 43}, 42}}}, 57 | } 58 | 59 | for _, tc := range testCases { 60 | tree := New() 61 | tree.Update(str2tok(tc.s)...) 62 | ch := tree.FindDuplOver(tc.threshold) 63 | for _, exp := range tc.matches { 64 | act, ok := <-ch 65 | if !ok { 66 | t.Errorf("missing match %v for '%s'", exp, tc.s) 67 | } else if exp.Len != act.Len || !sliceCmp(exp.Ps, act.Ps) { 68 | t.Errorf("got %v, want %v", act, exp) 69 | } 70 | } 71 | for act := range ch { 72 | t.Errorf("beyond expected match %v for '%s'", act, tc.s) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /suffixtree/suffixtree.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "math" 7 | "strings" 8 | ) 9 | 10 | const infinity = math.MaxInt32 11 | 12 | // Pos denotes position in data slice. 13 | type Pos int32 14 | 15 | type Token interface { 16 | Val() int 17 | } 18 | 19 | // STree is a struct representing a suffix tree. 20 | type STree struct { 21 | data []Token 22 | root *state 23 | auxState *state // auxiliary state 24 | 25 | // active point 26 | s *state 27 | start, end Pos 28 | } 29 | 30 | // New creates new suffix tree. 31 | func New() *STree { 32 | t := new(STree) 33 | t.data = make([]Token, 0, 50) 34 | t.root = newState(t) 35 | t.auxState = newState(t) 36 | t.root.linkState = t.auxState 37 | t.s = t.root 38 | return t 39 | } 40 | 41 | // Update refreshes the suffix tree to by new data. 42 | func (t *STree) Update(data ...Token) { 43 | t.data = append(t.data, data...) 44 | for range data { 45 | t.update() 46 | t.s, t.start = t.canonize(t.s, t.start, t.end) 47 | t.end++ 48 | } 49 | } 50 | 51 | // update transforms suffix tree T(n) to T(n+1). 52 | func (t *STree) update() { 53 | oldr := t.root 54 | 55 | // (s, (start, end)) is the canonical reference pair for the active point 56 | s := t.s 57 | start, end := t.start, t.end 58 | var r *state 59 | for { 60 | var endPoint bool 61 | r, endPoint = t.testAndSplit(s, start, end-1) 62 | if endPoint { 63 | break 64 | } 65 | r.fork(end) 66 | if oldr != t.root { 67 | oldr.linkState = r 68 | } 69 | oldr = r 70 | s, start = t.canonize(s.linkState, start, end-1) 71 | } 72 | if oldr != t.root { 73 | oldr.linkState = r 74 | } 75 | 76 | // update active point 77 | t.s = s 78 | t.start = start 79 | } 80 | 81 | // testAndSplit tests whether a state with canonical ref. pair 82 | // (s, (start, end)) is the end point, that is, a state that have 83 | // a c-transition. If not, then state (exs, (start, end)) is made 84 | // explicit (if not already so). 85 | func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) { 86 | c := t.data[t.end] 87 | if start <= end { 88 | tr := s.findTran(t.data[start]) 89 | splitPoint := tr.start + end - start + 1 90 | if t.data[splitPoint].Val() == c.Val() { 91 | return s, true 92 | } 93 | // make the (s, (start, end)) state explicit 94 | newSt := newState(s.tree) 95 | newSt.addTran(splitPoint, tr.end, tr.state) 96 | tr.end = splitPoint - 1 97 | tr.state = newSt 98 | return newSt, false 99 | } 100 | if s == t.auxState || s.findTran(c) != nil { 101 | return s, true 102 | } 103 | return s, false 104 | } 105 | 106 | // canonize returns updated state and start position for ref. pair 107 | // (s, (start, end)) of state r so the new ref. pair is canonical, 108 | // that is, referenced from the closest explicit ancestor of r. 109 | func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) { 110 | if s == t.auxState { 111 | s, start = t.root, start+1 112 | } 113 | if start > end { 114 | return s, start 115 | } 116 | 117 | var tr *tran 118 | for { 119 | if start <= end { 120 | tr = s.findTran(t.data[start]) 121 | if tr == nil { 122 | panic(fmt.Sprintf("there should be some transition for '%d' at %d", 123 | t.data[start].Val(), start)) 124 | } 125 | } 126 | if tr.end-tr.start > end-start { 127 | break 128 | } 129 | start += tr.end - tr.start + 1 130 | s = tr.state 131 | } 132 | if s == nil { 133 | panic("there should always be some suffix link resolution") 134 | } 135 | return s, start 136 | } 137 | 138 | func (t *STree) At(p Pos) Token { 139 | if p < 0 || p >= Pos(len(t.data)) { 140 | panic("position out of bounds") 141 | } 142 | return t.data[p] 143 | } 144 | 145 | func (t *STree) String() string { 146 | buf := new(bytes.Buffer) 147 | printState(buf, t.root, 0) 148 | return buf.String() 149 | } 150 | 151 | func printState(buf *bytes.Buffer, s *state, ident int) { 152 | for _, tr := range s.trans { 153 | fmt.Fprint(buf, strings.Repeat(" ", ident)) 154 | fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd()) 155 | printState(buf, tr.state, ident+1) 156 | } 157 | } 158 | 159 | // state is an explicit state of the suffix tree. 160 | type state struct { 161 | tree *STree 162 | trans []*tran 163 | linkState *state 164 | } 165 | 166 | func newState(t *STree) *state { 167 | return &state{ 168 | tree: t, 169 | trans: make([]*tran, 0), 170 | linkState: nil, 171 | } 172 | } 173 | 174 | func (s *state) addTran(start, end Pos, r *state) { 175 | s.trans = append(s.trans, newTran(start, end, r)) 176 | } 177 | 178 | // fork creates a new branch from the state s. 179 | func (s *state) fork(i Pos) *state { 180 | r := newState(s.tree) 181 | s.addTran(i, infinity, r) 182 | return r 183 | } 184 | 185 | // findTran finds c-transition. 186 | func (s *state) findTran(c Token) *tran { 187 | for _, tran := range s.trans { 188 | if s.tree.data[tran.start].Val() == c.Val() { 189 | return tran 190 | } 191 | } 192 | return nil 193 | } 194 | 195 | // tran represents a state's transition. 196 | type tran struct { 197 | start, end Pos 198 | state *state 199 | } 200 | 201 | func newTran(start, end Pos, s *state) *tran { 202 | return &tran{start, end, s} 203 | } 204 | 205 | func (t *tran) len() int { 206 | return int(t.end - t.start + 1) 207 | } 208 | 209 | // ActEnd returns actual end position as consistent with 210 | // the actual length of the data in the STree. 211 | func (t *tran) ActEnd() Pos { 212 | if t.end == infinity { 213 | return Pos(len(t.state.tree.data)) - 1 214 | } 215 | return t.end 216 | } 217 | -------------------------------------------------------------------------------- /suffixtree/suffixtree_test.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import "testing" 4 | 5 | type char byte 6 | 7 | func (c char) Val() int { 8 | return int(c) 9 | } 10 | 11 | func str2tok(str string) []Token { 12 | toks := make([]Token, len(str)) 13 | for i, c := range str { 14 | toks[i] = char(c) 15 | } 16 | return toks 17 | } 18 | 19 | func TestConstruction(t *testing.T) { 20 | str := "cacao" 21 | _, s := genStates(8, str) 22 | // s[0] is root 23 | s[0].addTran(0, 1, s[1]) // ca 24 | s[0].addTran(1, 1, s[2]) // a 25 | s[0].addTran(4, 4, s[3]) // o 26 | 27 | s[1].addTran(2, 4, s[4]) // cao 28 | s[1].addTran(4, 4, s[5]) // o 29 | 30 | s[2].addTran(2, 4, s[4]) // cao 31 | s[2].addTran(4, 4, s[5]) // o 32 | 33 | cacao := New() 34 | cacao.Update(str2tok(str)...) 35 | compareTrees(t, s[0], cacao.root) 36 | 37 | str2 := "banana" 38 | _, r := genStates(4, str2) 39 | r[0].addTran(0, 5, r[1]) // banana 40 | r[0].addTran(1, 5, r[2]) // anana 41 | r[0].addTran(2, 5, r[3]) // nana 42 | 43 | banana := New() 44 | banana.Update(str2tok(str2)...) 45 | compareTrees(t, r[0], banana.root) 46 | 47 | _, q := genStates(11, str2+"$") 48 | // r[0] is root 49 | q[0].addTran(0, 6, q[1]) // banana$ 50 | q[0].addTran(1, 1, q[2]) // a 51 | q[0].addTran(2, 3, q[3]) // na 52 | q[0].addTran(6, 6, q[4]) // $ 53 | 54 | q[2].addTran(2, 3, q[5]) // na 55 | q[2].addTran(6, 6, q[6]) // $ 56 | 57 | q[3].addTran(4, 6, q[7]) // na$ 58 | q[3].addTran(6, 6, q[8]) // $ 59 | 60 | q[5].addTran(4, 6, q[9]) // na$ 61 | q[5].addTran(6, 6, q[10]) // $ 62 | 63 | banana.Update(char('$')) 64 | compareTrees(t, q[0], banana.root) 65 | 66 | foo := New() 67 | foo.Update(str2tok("a b ac c ")...) 68 | } 69 | 70 | func compareTrees(t *testing.T, expected, actual *state) { 71 | ch1, ch2 := walker(expected), walker(actual) 72 | for { 73 | etran, ok1 := <-ch1 74 | atran, ok2 := <-ch2 75 | if !ok1 || !ok2 { 76 | if ok1 { 77 | t.Error("expected tree is longer") 78 | } else if ok2 { 79 | t.Error("actual tree is longer") 80 | } 81 | break 82 | } 83 | if etran.start != atran.start || etran.ActEnd() != atran.ActEnd() { 84 | t.Errorf("got transition (%d, %d) '%s', want (%d, %d) '%s'", 85 | atran.start, atran.ActEnd(), actual.tree.data[atran.start:atran.ActEnd()+1], 86 | etran.start, etran.ActEnd(), expected.tree.data[etran.start:etran.ActEnd()+1], 87 | ) 88 | } 89 | } 90 | } 91 | 92 | func walker(s *state) <-chan *tran { 93 | ch := make(chan *tran) 94 | go func() { 95 | walk(s, ch) 96 | close(ch) 97 | }() 98 | return ch 99 | } 100 | 101 | func walk(s *state, ch chan<- *tran) { 102 | for _, tr := range s.trans { 103 | ch <- tr 104 | walk(tr.state, ch) 105 | } 106 | } 107 | 108 | func genStates(count int, data string) (*STree, []*state) { 109 | t := new(STree) 110 | t.data = str2tok(data) 111 | states := make([]*state, count) 112 | for i := range states { 113 | states[i] = newState(t) 114 | } 115 | return t, states 116 | } 117 | 118 | type refPair struct { 119 | s *state 120 | start, end Pos 121 | } 122 | 123 | func TestCanonize(t *testing.T) { 124 | tree, s := genStates(5, "somebanana") 125 | tree.auxState, tree.root = s[4], s[0] 126 | s[0].addTran(0, 3, s[1]) 127 | s[1].addTran(4, 6, s[2]) 128 | s[2].addTran(7, infinity, s[3]) 129 | 130 | find := func(needle *state) int { 131 | for i, state := range s { 132 | if state == needle { 133 | return i 134 | } 135 | } 136 | return -1 137 | } 138 | 139 | var testCases = []struct { 140 | origin, expected refPair 141 | }{ 142 | {refPair{s[0], 0, 0}, refPair{s[0], 0, 0}}, 143 | {refPair{s[0], 0, 2}, refPair{s[0], 0, 0}}, 144 | {refPair{s[0], 0, 3}, refPair{s[1], 4, 0}}, 145 | {refPair{s[0], 0, 8}, refPair{s[2], 7, 0}}, 146 | {refPair{s[0], 0, 6}, refPair{s[2], 7, 0}}, 147 | {refPair{s[0], 0, 100}, refPair{s[2], 7, 0}}, 148 | {refPair{s[4], -1, 100}, refPair{s[2], 7, 0}}, 149 | } 150 | 151 | for _, tc := range testCases { 152 | s, start := tree.canonize(tc.origin.s, tc.origin.start, tc.origin.end) 153 | if s != tc.expected.s || start != tc.expected.start { 154 | t.Errorf("for origin ref. pair (%d, (%d, %d)) got (%d, %d), want (%d, %d)", 155 | find(tc.origin.s), tc.origin.start, tc.origin.end, 156 | find(s), start, 157 | find(tc.expected.s), tc.expected.start, 158 | ) 159 | } 160 | } 161 | } 162 | 163 | func TestSplitting(t *testing.T) { 164 | tree := new(STree) 165 | tree.data = str2tok("banana|cbao") 166 | s1 := newState(tree) 167 | s2 := newState(tree) 168 | s1.addTran(0, 3, s2) 169 | 170 | // active point is (s1, 0, -1), an explicit state 171 | tree.end = 7 // c 172 | rets, end := tree.testAndSplit(s1, 0, -1) 173 | if rets != s1 { 174 | t.Errorf("got state %p, want %p", rets, s1) 175 | } 176 | if end { 177 | t.Error("should not be an end-point") 178 | } 179 | tree.end = 8 // b 180 | _, end = tree.testAndSplit(s1, 0, -1) 181 | if !end { 182 | t.Error("should be an end-point") 183 | } 184 | 185 | // active point is (s1, 0, 2), an implicit state 186 | tree.end = 9 // a 187 | rets, end = tree.testAndSplit(s1, 0, 2) 188 | if rets != s1 { 189 | t.Error("returned state should be unchanged") 190 | } 191 | if !end { 192 | t.Error("should be an end-point") 193 | } 194 | 195 | // [s1]-banana->[s2] => [s1]-ban->[rets]-ana->[s2] 196 | tree.end = 10 // o 197 | rets, end = tree.testAndSplit(s1, 0, 2) 198 | tr := s1.findTran(char('b')) 199 | if tr == nil { 200 | t.Error("should have a b-transition") 201 | } else if tr.state != rets { 202 | t.Errorf("got state %p, want %p", tr.state, rets) 203 | } 204 | tr2 := rets.findTran(char('a')) 205 | if tr2 == nil { 206 | t.Error("should have an a-transition") 207 | } else if tr2.state != s2 { 208 | t.Errorf("got state %p, want %p", tr2.state, s2) 209 | } 210 | if end { 211 | t.Error("should not be an end-point") 212 | } 213 | } 214 | 215 | func TestPosMaxValue(t *testing.T) { 216 | var p Pos = infinity 217 | if p+1 > 0 { 218 | t.Error("const infinity is not max value") 219 | } 220 | } 221 | 222 | func BenchmarkConstruction(b *testing.B) { 223 | stream := str2tok(`all work and no play makes jack a dull boy 224 | all work and no play makes jack a dull boy 225 | all work and no play makes jack a dull boy`) 226 | 227 | for i := 0; i < b.N; i++ { 228 | t := New() 229 | t.Update(stream...) 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /syntax/golang/golang.go: -------------------------------------------------------------------------------- 1 | package golang 2 | 3 | import ( 4 | "go/ast" 5 | "go/parser" 6 | "go/token" 7 | 8 | "github.com/mibk/dupl/syntax" 9 | ) 10 | 11 | const ( 12 | BadNode = iota 13 | File 14 | ArrayType 15 | AssignStmt 16 | BasicLit 17 | BinaryExpr 18 | BlockStmt 19 | BranchStmt 20 | CallExpr 21 | CaseClause 22 | ChanType 23 | CommClause 24 | CompositeLit 25 | DeclStmt 26 | DeferStmt 27 | Ellipsis 28 | EmptyStmt 29 | ExprStmt 30 | Field 31 | FieldList 32 | ForStmt 33 | FuncDecl 34 | FuncLit 35 | FuncType 36 | GenDecl 37 | GoStmt 38 | Ident 39 | IfStmt 40 | IncDecStmt 41 | IndexExpr 42 | InterfaceType 43 | KeyValueExpr 44 | LabeledStmt 45 | MapType 46 | ParenExpr 47 | RangeStmt 48 | ReturnStmt 49 | SelectStmt 50 | SelectorExpr 51 | SendStmt 52 | SliceExpr 53 | StarExpr 54 | StructType 55 | SwitchStmt 56 | TypeAssertExpr 57 | TypeSpec 58 | TypeSwitchStmt 59 | UnaryExpr 60 | ValueSpec 61 | ) 62 | 63 | // Parse the given file and return uniform syntax tree. 64 | func Parse(filename string) (*syntax.Node, error) { 65 | fset := token.NewFileSet() 66 | file, err := parser.ParseFile(fset, filename, nil, 0) 67 | if err != nil { 68 | return nil, err 69 | } 70 | t := &transformer{ 71 | fileset: fset, 72 | filename: filename, 73 | } 74 | return t.trans(file), nil 75 | } 76 | 77 | type transformer struct { 78 | fileset *token.FileSet 79 | filename string 80 | } 81 | 82 | // trans transforms given golang AST to uniform tree structure. 83 | func (t *transformer) trans(node ast.Node) (o *syntax.Node) { 84 | o = syntax.NewNode() 85 | o.Filename = t.filename 86 | st, end := node.Pos(), node.End() 87 | o.Pos, o.End = t.fileset.File(st).Offset(st), t.fileset.File(end).Offset(end) 88 | 89 | switch n := node.(type) { 90 | case *ast.ArrayType: 91 | o.Type = ArrayType 92 | if n.Len != nil { 93 | o.AddChildren(t.trans(n.Len)) 94 | } 95 | o.AddChildren(t.trans(n.Elt)) 96 | 97 | case *ast.AssignStmt: 98 | o.Type = AssignStmt 99 | for _, e := range n.Rhs { 100 | o.AddChildren(t.trans(e)) 101 | } 102 | 103 | for _, e := range n.Lhs { 104 | o.AddChildren(t.trans(e)) 105 | } 106 | 107 | case *ast.BasicLit: 108 | o.Type = BasicLit 109 | 110 | case *ast.BinaryExpr: 111 | o.Type = BinaryExpr 112 | o.AddChildren(t.trans(n.X), t.trans(n.Y)) 113 | 114 | case *ast.BlockStmt: 115 | o.Type = BlockStmt 116 | for _, stmt := range n.List { 117 | o.AddChildren(t.trans(stmt)) 118 | } 119 | 120 | case *ast.BranchStmt: 121 | o.Type = BranchStmt 122 | if n.Label != nil { 123 | o.AddChildren(t.trans(n.Label)) 124 | } 125 | 126 | case *ast.CallExpr: 127 | o.Type = CallExpr 128 | o.AddChildren(t.trans(n.Fun)) 129 | for _, arg := range n.Args { 130 | o.AddChildren(t.trans(arg)) 131 | } 132 | 133 | case *ast.CaseClause: 134 | o.Type = CaseClause 135 | for _, e := range n.List { 136 | o.AddChildren(t.trans(e)) 137 | } 138 | for _, stmt := range n.Body { 139 | o.AddChildren(t.trans(stmt)) 140 | } 141 | 142 | case *ast.ChanType: 143 | o.Type = ChanType 144 | o.AddChildren(t.trans(n.Value)) 145 | 146 | case *ast.CommClause: 147 | o.Type = CommClause 148 | if n.Comm != nil { 149 | o.AddChildren(t.trans(n.Comm)) 150 | } 151 | for _, stmt := range n.Body { 152 | o.AddChildren(t.trans(stmt)) 153 | } 154 | 155 | case *ast.CompositeLit: 156 | o.Type = CompositeLit 157 | if n.Type != nil { 158 | o.AddChildren(t.trans(n.Type)) 159 | } 160 | for _, e := range n.Elts { 161 | o.AddChildren(t.trans(e)) 162 | } 163 | 164 | case *ast.DeclStmt: 165 | o.Type = DeclStmt 166 | o.AddChildren(t.trans(n.Decl)) 167 | 168 | case *ast.DeferStmt: 169 | o.Type = DeferStmt 170 | o.AddChildren(t.trans(n.Call)) 171 | 172 | case *ast.Ellipsis: 173 | o.Type = Ellipsis 174 | if n.Elt != nil { 175 | o.AddChildren(t.trans(n.Elt)) 176 | } 177 | 178 | case *ast.EmptyStmt: 179 | o.Type = EmptyStmt 180 | 181 | case *ast.ExprStmt: 182 | o.Type = ExprStmt 183 | o.AddChildren(t.trans(n.X)) 184 | 185 | case *ast.Field: 186 | o.Type = Field 187 | for _, name := range n.Names { 188 | o.AddChildren(t.trans(name)) 189 | } 190 | o.AddChildren(t.trans(n.Type)) 191 | 192 | case *ast.FieldList: 193 | o.Type = FieldList 194 | for _, field := range n.List { 195 | o.AddChildren(t.trans(field)) 196 | } 197 | 198 | case *ast.File: 199 | o.Type = File 200 | for _, decl := range n.Decls { 201 | if genDecl, ok := decl.(*ast.GenDecl); ok && genDecl.Tok == token.IMPORT { 202 | // skip import declarations 203 | continue 204 | } 205 | o.AddChildren(t.trans(decl)) 206 | } 207 | 208 | case *ast.ForStmt: 209 | o.Type = ForStmt 210 | if n.Init != nil { 211 | o.AddChildren(t.trans(n.Init)) 212 | } 213 | if n.Cond != nil { 214 | o.AddChildren(t.trans(n.Cond)) 215 | } 216 | if n.Post != nil { 217 | o.AddChildren(t.trans(n.Post)) 218 | } 219 | o.AddChildren(t.trans(n.Body)) 220 | 221 | case *ast.FuncDecl: 222 | o.Type = FuncDecl 223 | if n.Recv != nil { 224 | o.AddChildren(t.trans(n.Recv)) 225 | } 226 | o.AddChildren(t.trans(n.Name), t.trans(n.Type)) 227 | if n.Body != nil { 228 | o.AddChildren(t.trans(n.Body)) 229 | } 230 | 231 | case *ast.FuncLit: 232 | o.Type = FuncLit 233 | o.AddChildren(t.trans(n.Type), t.trans(n.Body)) 234 | 235 | case *ast.FuncType: 236 | o.Type = FuncType 237 | o.AddChildren(t.trans(n.Params)) 238 | if n.Results != nil { 239 | o.AddChildren(t.trans(n.Results)) 240 | } 241 | 242 | case *ast.GenDecl: 243 | o.Type = GenDecl 244 | for _, spec := range n.Specs { 245 | o.AddChildren(t.trans(spec)) 246 | } 247 | 248 | case *ast.GoStmt: 249 | o.Type = GoStmt 250 | o.AddChildren(t.trans(n.Call)) 251 | 252 | case *ast.Ident: 253 | o.Type = Ident 254 | 255 | case *ast.IfStmt: 256 | o.Type = IfStmt 257 | if n.Init != nil { 258 | o.AddChildren(t.trans(n.Init)) 259 | } 260 | o.AddChildren(t.trans(n.Cond), t.trans(n.Body)) 261 | if n.Else != nil { 262 | o.AddChildren(t.trans(n.Else)) 263 | } 264 | 265 | case *ast.IncDecStmt: 266 | o.Type = IncDecStmt 267 | o.AddChildren(t.trans(n.X)) 268 | 269 | case *ast.IndexExpr: 270 | o.Type = IndexExpr 271 | o.AddChildren(t.trans(n.X), t.trans(n.Index)) 272 | 273 | case *ast.InterfaceType: 274 | o.Type = InterfaceType 275 | o.AddChildren(t.trans(n.Methods)) 276 | 277 | case *ast.KeyValueExpr: 278 | o.Type = KeyValueExpr 279 | o.AddChildren(t.trans(n.Key), t.trans(n.Value)) 280 | 281 | case *ast.LabeledStmt: 282 | o.Type = LabeledStmt 283 | o.AddChildren(t.trans(n.Label), t.trans(n.Stmt)) 284 | 285 | case *ast.MapType: 286 | o.Type = MapType 287 | o.AddChildren(t.trans(n.Key), t.trans(n.Value)) 288 | 289 | case *ast.ParenExpr: 290 | o.Type = ParenExpr 291 | o.AddChildren(t.trans(n.X)) 292 | 293 | case *ast.RangeStmt: 294 | o.Type = RangeStmt 295 | if n.Key != nil { 296 | o.AddChildren(t.trans(n.Key)) 297 | } 298 | if n.Value != nil { 299 | o.AddChildren(t.trans(n.Value)) 300 | } 301 | o.AddChildren(t.trans(n.X), t.trans(n.Body)) 302 | 303 | case *ast.ReturnStmt: 304 | o.Type = ReturnStmt 305 | for _, e := range n.Results { 306 | o.AddChildren(t.trans(e)) 307 | } 308 | 309 | case *ast.SelectStmt: 310 | o.Type = SelectStmt 311 | o.AddChildren(t.trans(n.Body)) 312 | 313 | case *ast.SelectorExpr: 314 | o.Type = SelectorExpr 315 | o.AddChildren(t.trans(n.X), t.trans(n.Sel)) 316 | 317 | case *ast.SendStmt: 318 | o.Type = SendStmt 319 | o.AddChildren(t.trans(n.Chan), t.trans(n.Value)) 320 | 321 | case *ast.SliceExpr: 322 | o.Type = SliceExpr 323 | o.AddChildren(t.trans(n.X)) 324 | if n.Low != nil { 325 | o.AddChildren(t.trans(n.Low)) 326 | } 327 | if n.High != nil { 328 | o.AddChildren(t.trans(n.High)) 329 | } 330 | if n.Max != nil { 331 | o.AddChildren(t.trans(n.Max)) 332 | } 333 | 334 | case *ast.StarExpr: 335 | o.Type = StarExpr 336 | o.AddChildren(t.trans(n.X)) 337 | 338 | case *ast.StructType: 339 | o.Type = StructType 340 | o.AddChildren(t.trans(n.Fields)) 341 | 342 | case *ast.SwitchStmt: 343 | o.Type = SwitchStmt 344 | if n.Init != nil { 345 | o.AddChildren(t.trans(n.Init)) 346 | } 347 | if n.Tag != nil { 348 | o.AddChildren(t.trans(n.Tag)) 349 | } 350 | o.AddChildren(t.trans(n.Body)) 351 | 352 | case *ast.TypeAssertExpr: 353 | o.Type = TypeAssertExpr 354 | o.AddChildren(t.trans(n.X)) 355 | if n.Type != nil { 356 | o.AddChildren(t.trans(n.Type)) 357 | } 358 | 359 | case *ast.TypeSpec: 360 | o.Type = TypeSpec 361 | o.AddChildren(t.trans(n.Name), t.trans(n.Type)) 362 | 363 | case *ast.TypeSwitchStmt: 364 | o.Type = TypeSwitchStmt 365 | if n.Init != nil { 366 | o.AddChildren(t.trans(n.Init)) 367 | } 368 | o.AddChildren(t.trans(n.Assign), t.trans(n.Body)) 369 | 370 | case *ast.UnaryExpr: 371 | o.Type = UnaryExpr 372 | o.AddChildren(t.trans(n.X)) 373 | 374 | case *ast.ValueSpec: 375 | o.Type = ValueSpec 376 | for _, name := range n.Names { 377 | o.AddChildren(t.trans(name)) 378 | } 379 | if n.Type != nil { 380 | o.AddChildren(t.trans(n.Type)) 381 | } 382 | for _, val := range n.Values { 383 | o.AddChildren(t.trans(val)) 384 | } 385 | 386 | default: 387 | o.Type = BadNode 388 | 389 | } 390 | 391 | return o 392 | } 393 | -------------------------------------------------------------------------------- /syntax/syntax.go: -------------------------------------------------------------------------------- 1 | package syntax 2 | 3 | import ( 4 | "crypto/sha1" 5 | 6 | "github.com/mibk/dupl/suffixtree" 7 | ) 8 | 9 | type Node struct { 10 | Type int 11 | Filename string 12 | Pos, End int 13 | Children []*Node 14 | Owns int 15 | } 16 | 17 | func NewNode() *Node { 18 | return &Node{} 19 | } 20 | 21 | func (n *Node) AddChildren(children ...*Node) { 22 | n.Children = append(n.Children, children...) 23 | } 24 | 25 | func (n *Node) Val() int { 26 | return n.Type 27 | } 28 | 29 | type Match struct { 30 | Hash string 31 | Frags [][]*Node 32 | } 33 | 34 | func Serialize(n *Node) []*Node { 35 | stream := make([]*Node, 0, 10) 36 | serial(n, &stream) 37 | return stream 38 | } 39 | 40 | func serial(n *Node, stream *[]*Node) int { 41 | *stream = append(*stream, n) 42 | var count int 43 | for _, child := range n.Children { 44 | count += serial(child, stream) 45 | } 46 | n.Owns = count 47 | return count + 1 48 | } 49 | 50 | // FindSyntaxUnits finds all complete syntax units in the match group and returns them 51 | // with the corresponding hash. 52 | func FindSyntaxUnits(data []*Node, m suffixtree.Match, threshold int) Match { 53 | if len(m.Ps) == 0 { 54 | return Match{} 55 | } 56 | firstSeq := data[m.Ps[0] : m.Ps[0]+m.Len] 57 | indexes := getUnitsIndexes(firstSeq, threshold) 58 | 59 | // TODO: is this really working? 60 | indexCnt := len(indexes) 61 | if indexCnt > 0 { 62 | lasti := indexes[indexCnt-1] 63 | firstn := firstSeq[lasti] 64 | for i := 1; i < len(m.Ps); i++ { 65 | n := data[int(m.Ps[i])+lasti] 66 | if firstn.Owns != n.Owns { 67 | indexes = indexes[:indexCnt-1] 68 | break 69 | } 70 | } 71 | } 72 | if len(indexes) == 0 || isCyclic(indexes, firstSeq) || spansMultipleFiles(indexes, firstSeq) { 73 | return Match{} 74 | } 75 | 76 | match := Match{Frags: make([][]*Node, len(m.Ps))} 77 | for i, pos := range m.Ps { 78 | match.Frags[i] = make([]*Node, len(indexes)) 79 | for j, index := range indexes { 80 | match.Frags[i][j] = data[int(pos)+index] 81 | } 82 | } 83 | 84 | lastIndex := indexes[len(indexes)-1] 85 | match.Hash = hashSeq(firstSeq[indexes[0] : lastIndex+firstSeq[lastIndex].Owns]) 86 | return match 87 | } 88 | 89 | func getUnitsIndexes(nodeSeq []*Node, threshold int) []int { 90 | var indexes []int 91 | var split bool 92 | for i := 0; i < len(nodeSeq); { 93 | n := nodeSeq[i] 94 | switch { 95 | case n.Owns >= len(nodeSeq)-i: 96 | // not complete syntax unit 97 | i++ 98 | split = true 99 | continue 100 | case n.Owns+1 < threshold: 101 | split = true 102 | default: 103 | if split { 104 | indexes = indexes[:0] 105 | split = false 106 | } 107 | indexes = append(indexes, i) 108 | } 109 | i += n.Owns + 1 110 | } 111 | return indexes 112 | } 113 | 114 | // isCyclic finds out whether there is a repetive pattern in the found clone. If positive, 115 | // it return false to point out that the clone would be redundant. 116 | func isCyclic(indexes []int, nodes []*Node) bool { 117 | cnt := len(indexes) 118 | if cnt <= 1 { 119 | return false 120 | } 121 | 122 | alts := make(map[int]bool) 123 | for i := 1; i <= cnt/2; i++ { 124 | if cnt%i == 0 { 125 | alts[i] = true 126 | } 127 | } 128 | 129 | for i := 0; i < indexes[cnt/2]; i++ { 130 | nstart := nodes[i+indexes[0]] 131 | AltLoop: 132 | for alt := range alts { 133 | for j := alt; j < cnt; j += alt { 134 | index := i + indexes[j] 135 | if index < len(nodes) { 136 | nalt := nodes[index] 137 | if nstart.Owns == nalt.Owns && nstart.Type == nalt.Type { 138 | continue 139 | } 140 | } else if i >= indexes[alt] { 141 | return true 142 | } 143 | delete(alts, alt) 144 | continue AltLoop 145 | } 146 | } 147 | if len(alts) == 0 { 148 | return false 149 | } 150 | } 151 | return true 152 | } 153 | 154 | func spansMultipleFiles(indexes []int, nodes []*Node) bool { 155 | if len(indexes) < 2 { 156 | return false 157 | } 158 | f := nodes[indexes[0]].Filename 159 | for i := 1; i < len(indexes); i++ { 160 | if nodes[indexes[i]].Filename != f { 161 | return true 162 | } 163 | } 164 | return false 165 | } 166 | 167 | func hashSeq(nodes []*Node) string { 168 | h := sha1.New() 169 | bytes := make([]byte, len(nodes)) 170 | for i, node := range nodes { 171 | bytes[i] = byte(node.Type) 172 | } 173 | h.Write(bytes) 174 | return string(h.Sum(nil)) 175 | } 176 | -------------------------------------------------------------------------------- /syntax/syntax_test.go: -------------------------------------------------------------------------------- 1 | package syntax 2 | 3 | import "testing" 4 | 5 | func TestSerialization(t *testing.T) { 6 | n := genNodes(7) 7 | n[0].AddChildren(n[1], n[2], n[3]) 8 | n[1].AddChildren(n[4], n[5]) 9 | n[2].AddChildren(n[6]) 10 | m := genNodes(6) 11 | m[0].AddChildren(m[1], m[2], m[3], m[4], m[5]) 12 | testCases := []struct { 13 | t *Node 14 | expected []int 15 | }{ 16 | {n[0], []int{6, 2, 0, 0, 1, 0, 0}}, 17 | {m[0], []int{5, 0, 0, 0, 0, 0}}, 18 | } 19 | 20 | for _, tc := range testCases { 21 | compareSeries(t, Serialize(tc.t), tc.expected) 22 | } 23 | } 24 | 25 | func genNodes(cnt int) []*Node { 26 | nodes := make([]*Node, cnt) 27 | for i := range nodes { 28 | nodes[i] = NewNode() 29 | } 30 | return nodes 31 | } 32 | 33 | func compareSeries(t *testing.T, stream []*Node, owns []int) { 34 | if len(stream) != len(owns) { 35 | t.Errorf("series aren't the same length; got %d, want %d", len(stream), len(owns)) 36 | return 37 | } 38 | for i, item := range stream { 39 | if item.Owns != owns[i] { 40 | t.Errorf("got %d, want %d", item.Owns, owns[i]) 41 | } 42 | } 43 | } 44 | 45 | func TestGetUnitsIndexes(t *testing.T) { 46 | testCases := []struct { 47 | seq string 48 | threshold int 49 | expected []int 50 | }{ 51 | {"a8 a0 a2 a0 a0", 3, []int{2}}, 52 | {"a0 a8 a2 a0 a0", 1, []int{2}}, 53 | {"a3 a0 a0 a0 a1", 3, []int{0}}, 54 | {"a3 a0 a0 a0 a0", 1, []int{0, 4}}, 55 | {"a1 a0 a1 a0 a0", 2, []int{0, 2}}, 56 | } 57 | 58 | Loop: 59 | for _, tc := range testCases { 60 | nodes := str2nodes(tc.seq) 61 | indexes := getUnitsIndexes(nodes, tc.threshold) 62 | for i := range tc.expected { 63 | if i > len(indexes)-1 || tc.expected[i] != indexes[i] { 64 | t.Errorf("for seq '%s', got %v, want %v", tc.seq, indexes, tc.expected) 65 | } 66 | continue Loop 67 | } 68 | } 69 | } 70 | 71 | func TestCyclicDupl(t *testing.T) { 72 | testCases := []struct { 73 | seq string 74 | indexes []int 75 | expected bool 76 | }{ 77 | {"a1 b0 a2 b0", []int{0, 2}, false}, 78 | {"a1 b0 a1 b0", []int{0, 2}, true}, 79 | {"a0 a0", []int{0, 1}, true}, 80 | {"a1 b0 c1 b0 a1 b0 c1 b0", []int{0, 2, 4, 6}, true}, 81 | {"a1 b0 c1 b0 a1 b0", []int{0, 2, 4}, false}, 82 | {"a0 b0 a0 c0", []int{0, 1, 2, 3}, false}, 83 | {"a0 b0 a0 b0 a0", []int{0, 1, 2}, false}, 84 | {"a1 b0 a1 b0 c1 b0", []int{0, 2, 4}, false}, 85 | {"a1 a1 a1 a1 a1 a1", []int{0, 4}, false}, 86 | {"a2 b0 b0 a2 b0 b0 a2 b0 b0 a2 b0 b0 a2 b0 b0", []int{0, 3, 6, 9, 12}, true}, 87 | } 88 | 89 | for _, tc := range testCases { 90 | nodes := str2nodes(tc.seq) 91 | if tc.expected != isCyclic(tc.indexes, nodes) { 92 | t.Errorf("for seq '%s', indexes %v, got %t, want %t", tc.seq, tc.indexes, !tc.expected, tc.expected) 93 | } 94 | } 95 | } 96 | 97 | // str2nodes converts strint to a sequence of *Node by following principle: 98 | // - node is represented by 2 characters 99 | // - first character is node type 100 | // - second character is the number for Node.Owns. 101 | func str2nodes(str string) []*Node { 102 | chars := []rune(str) 103 | nodes := make([]*Node, (len(chars)+1)/3) 104 | for i := 0; i < len(chars)-1; i += 3 { 105 | nodes[i/3] = &Node{Type: int(chars[i]), Owns: int(chars[i+1] - '0')} 106 | } 107 | return nodes 108 | } 109 | --------------------------------------------------------------------------------