├── .travis.yml
├── LICENSE
├── README.md
├── _output_example
    └── docker.html
├── go.mod
├── job
    ├── buildtree.go
    └── parse.go
├── main.go
├── printer
    ├── html.go
    ├── html_test.go
    ├── plumbing.go
    ├── printer.go
    └── text.go
├── suffixtree
    ├── dupl.go
    ├── dupl_test.go
    ├── suffixtree.go
    └── suffixtree_test.go
└── syntax
    ├── golang
        └── golang.go
    ├── syntax.go
    └── syntax_test.go


/.travis.yml:
--------------------------------------------------------------------------------
1 | language: go
2 | go:
3 |   - 1.14
4 |   - 1.15
5 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Michal Bohuslávek
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # dupl [![Build Status](https://travis-ci.org/mibk/dupl.png)](https://travis-ci.org/mibk/dupl)
 2 | 
 3 | **dupl** is a tool written in Go for finding code clones. So far it can find clones only
 4 | in the Go source files. The method uses suffix tree for serialized ASTs. It ignores values
 5 | of AST nodes. It just operates with their types (e.g. `if a == 13 {}` and `if x == 100 {}` are
 6 | considered the same provided it exceeds the minimal token sequence size).
 7 | 
 8 | Due to the used method dupl can report so called "false positives" on the output. These are
 9 | the ones we do not consider clones (whether they are too small, or the values of the matched
10 | tokens are completely different).
11 | 
12 | ## Installation
13 | 
14 | ```bash
15 | go get -u github.com/mibk/dupl
16 | ```
17 | 
18 | ## Usage
19 | 
20 | ```
21 | Usage of dupl:
22 |   dupl [flags] [paths]
23 | 
24 | Paths:
25 |   If the given path is a file, dupl will use it regardless of
26 |   the file extension. If it is a directory it will recursively
27 |   search for *.go files in that directory.
28 | 
29 |   If no path is given dupl will recursively search for *.go
30 |   files in the current directory.
31 | 
32 | Flags:
33 |   -files
34 |         read file names from stdin one at each line
35 |   -html
36 |         output the results as HTML, including duplicate code fragments
37 |   -plumbing
38 |         plumbing (easy-to-parse) output for consumption by scripts or tools
39 |   -t, -threshold size
40 |         minimum token sequence size as a clone (default 15)
41 |   -vendor
42 |         check files in vendor directory
43 |   -v, -verbose
44 |         explain what is being done
45 | 
46 | Examples:
47 |   dupl -t 100
48 |         Search clones in the current directory of size at least
49 |         100 tokens.
50 |   dupl $(find app/ -name '*_test.go')
51 |         Search for clones in tests in the app directory.
52 |   find app/ -name '*_test.go' |dupl -files
53 |         The same as above.
54 | ```
55 | 
56 | ## Example
57 | 
58 | The reduced output of this command with the following parameters for the [Docker](https://www.docker.com) source code
59 | looks like [this](http://htmlpreview.github.io/?https://github.com/mibk/dupl/blob/master/_output_example/docker.html).
60 | 
61 | ```bash
62 | $ dupl -t 200 -html >docker.html
63 | ```
64 | 


--------------------------------------------------------------------------------
/_output_example/docker.html:
--------------------------------------------------------------------------------
   1 | <!DOCTYPE html>
   2 | <meta charset="utf-8"/>
   3 | <title>Duplicates</title>
   4 | <style>
   5 | 	pre {
   6 | 		background-color: #FFD;
   7 | 		border: 1px solid #E2E2E2;
   8 | 		padding: 1ex;
   9 | 	}
  10 | </style>
  11 | <h1>#1 found 2 clones</h1>
  12 | <h2>vendor/src/code.google.com/p/go.net/ipv6/multicast_test.go:131</h2>
  13 | <pre>for i, toggle := range []bool{true, false, true} {
  14 | 	wb, err := (&icmpMessage{
  15 | 		Type: ipv6.ICMPTypeEchoRequest, Code: 0,
  16 | 		Body: &icmpEcho{
  17 | 			ID: os.Getpid() & 0xffff, Seq: i + 1,
  18 | 			Data: []byte("HELLO-R-U-THERE"),
  19 | 		},
  20 | 	}).Marshal()
  21 | 	if err != nil {
  22 | 		t.Fatalf("icmpMessage.Marshal failed: %v", err)
  23 | 	}
  24 | 	if err := p.SetControlMessage(cf, toggle); err != nil {
  25 | 		t.Fatalf("ipv6.PacketConn.SetControlMessage failed: %v", err)
  26 | 	}
  27 | 	cm.HopLimit = i + 1
  28 | 	if _, err := p.WriteTo(wb, &cm, dst); err != nil {
  29 | 		t.Fatalf("ipv6.PacketConn.WriteTo failed: %v", err)
  30 | 	}
  31 | 	b := make([]byte, 128)
  32 | 	if n, cm, _, err := p.ReadFrom(b); err != nil {
  33 | 		t.Fatalf("ipv6.PacketConn.ReadFrom failed: %v", err)
  34 | 	} else {
  35 | 		t.Logf("rcvd cmsg: %v", cm)
  36 | 		if m, err := parseICMPMessage(b[:n]); err != nil {
  37 | 			t.Fatalf("parseICMPMessage failed: %v", err)
  38 | 		} else if m.Type != ipv6.ICMPTypeEchoReply || m.Code != 0 {
  39 | 			t.Fatalf("got type=%v, code=%v; expected type=%v, code=%v", m.Type, m.Code, ipv6.ICMPTypeEchoReply, 0)
  40 | 		}
  41 | 	}
  42 | }</pre>
  43 | <h2>vendor/src/code.google.com/p/go.net/ipv6/unicast_test.go:173</h2>
  44 | <pre>for i, toggle := range []bool{true, false, true} {
  45 | 	wb, err := (&icmpMessage{
  46 | 		Type: ipv6.ICMPTypeEchoRequest, Code: 0,
  47 | 		Body: &icmpEcho{
  48 | 			ID: os.Getpid() & 0xffff, Seq: i + 1,
  49 | 			Data: []byte("HELLO-R-U-THERE"),
  50 | 		},
  51 | 	}).Marshal()
  52 | 	if err != nil {
  53 | 		t.Fatalf("icmpMessage.Marshal failed: %v", err)
  54 | 	}
  55 | 	if err := p.SetControlMessage(cf, toggle); err != nil {
  56 | 		t.Fatalf("ipv6.PacketConn.SetControlMessage failed: %v", err)
  57 | 	}
  58 | 	cm.HopLimit = i + 1
  59 | 	if _, err := p.WriteTo(wb, &cm, dst); err != nil {
  60 | 		t.Fatalf("ipv6.PacketConn.WriteTo failed: %v", err)
  61 | 	}
  62 | 	b := make([]byte, 128)
  63 | 	if n, cm, _, err := p.ReadFrom(b); err != nil {
  64 | 		t.Fatalf("ipv6.PacketConn.ReadFrom failed: %v", err)
  65 | 	} else {
  66 | 		t.Logf("rcvd cmsg: %v", cm)
  67 | 		if m, err := parseICMPMessage(b[:n]); err != nil {
  68 | 			t.Fatalf("parseICMPMessage failed: %v", err)
  69 | 		} else if m.Type != ipv6.ICMPTypeEchoReply || m.Code != 0 {
  70 | 			t.Fatalf("got type=%v, code=%v; expected type=%v, code=%v", m.Type, m.Code, ipv6.ICMPTypeEchoReply, 0)
  71 | 		}
  72 | 	}
  73 | }</pre>
  74 | <h1>#2 found 2 clones</h1>
  75 | <h2>vendor/src/code.google.com/p/go.net/websocket/hybi_test.go:33</h2>
  76 | <pre>func TestHybiClientHandshake(t *testing.T) {
  77 | 	b := bytes.NewBuffer([]byte{})
  78 | 	bw := bufio.NewWriter(b)
  79 | 	br := bufio.NewReader(strings.NewReader(`HTTP/1.1 101 Switching Protocols
  80 | Upgrade: websocket
  81 | Connection: Upgrade
  82 | Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo=
  83 | Sec-WebSocket-Protocol: chat
  84 | 
  85 | `))
  86 | 	var err error
  87 | 	config := new(Config)
  88 | 	config.Location, err = url.ParseRequestURI("ws://server.example.com/chat")
  89 | 	if err != nil {
  90 | 		t.Fatal("location url", err)
  91 | 	}
  92 | 	config.Origin, err = url.ParseRequestURI("http://example.com")
  93 | 	if err != nil {
  94 | 		t.Fatal("origin url", err)
  95 | 	}
  96 | 	config.Protocol = append(config.Protocol, "chat")
  97 | 	config.Protocol = append(config.Protocol, "superchat")
  98 | 	config.Version = ProtocolVersionHybi13
  99 | 
 100 | 	config.handshakeData = map[string]string{
 101 | 		"key": "dGhlIHNhbXBsZSBub25jZQ==",
 102 | 	}
 103 | 	err = hybiClientHandshake(config, br, bw)
 104 | 	if err != nil {
 105 | 		t.Errorf("handshake failed: %v", err)
 106 | 	}
 107 | 	req, err := http.ReadRequest(bufio.NewReader(b))
 108 | 	if err != nil {
 109 | 		t.Fatalf("read request: %v", err)
 110 | 	}
 111 | 	if req.Method != "GET" {
 112 | 		t.Errorf("request method expected GET, but got %q", req.Method)
 113 | 	}
 114 | 	if req.URL.Path != "/chat" {
 115 | 		t.Errorf("request path expected /chat, but got %q", req.URL.Path)
 116 | 	}
 117 | 	if req.Proto != "HTTP/1.1" {
 118 | 		t.Errorf("request proto expected HTTP/1.1, but got %q", req.Proto)
 119 | 	}
 120 | 	if req.Host != "server.example.com" {
 121 | 		t.Errorf("request Host expected server.example.com, but got %v", req.Host)
 122 | 	}
 123 | 	var expectedHeader = map[string]string{
 124 | 		"Connection":             "Upgrade",
 125 | 		"Upgrade":                "websocket",
 126 | 		"Sec-Websocket-Key":      config.handshakeData["key"],
 127 | 		"Origin":                 config.Origin.String(),
 128 | 		"Sec-Websocket-Protocol": "chat, superchat",
 129 | 		"Sec-Websocket-Version":  fmt.Sprintf("%d", ProtocolVersionHybi13),
 130 | 	}
 131 | 	for k, v := range expectedHeader {
 132 | 		if req.Header.Get(k) != v {
 133 | 			t.Errorf(fmt.Sprintf("%s expected %q but got %q", k, v, req.Header.Get(k)))
 134 | 		}
 135 | 	}
 136 | }</pre>
 137 | <h2>vendor/src/code.google.com/p/go.net/websocket/hybi_test.go:160</h2>
 138 | <pre>func TestHybiClientHandshakeHybi08(t *testing.T) {
 139 | 	b := bytes.NewBuffer([]byte{})
 140 | 	bw := bufio.NewWriter(b)
 141 | 	br := bufio.NewReader(strings.NewReader(`HTTP/1.1 101 Switching Protocols
 142 | Upgrade: websocket
 143 | Connection: Upgrade
 144 | Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo=
 145 | Sec-WebSocket-Protocol: chat
 146 | 
 147 | `))
 148 | 	var err error
 149 | 	config := new(Config)
 150 | 	config.Location, err = url.ParseRequestURI("ws://server.example.com/chat")
 151 | 	if err != nil {
 152 | 		t.Fatal("location url", err)
 153 | 	}
 154 | 	config.Origin, err = url.ParseRequestURI("http://example.com")
 155 | 	if err != nil {
 156 | 		t.Fatal("origin url", err)
 157 | 	}
 158 | 	config.Protocol = append(config.Protocol, "chat")
 159 | 	config.Protocol = append(config.Protocol, "superchat")
 160 | 	config.Version = ProtocolVersionHybi08
 161 | 
 162 | 	config.handshakeData = map[string]string{
 163 | 		"key": "dGhlIHNhbXBsZSBub25jZQ==",
 164 | 	}
 165 | 	err = hybiClientHandshake(config, br, bw)
 166 | 	if err != nil {
 167 | 		t.Errorf("handshake failed: %v", err)
 168 | 	}
 169 | 	req, err := http.ReadRequest(bufio.NewReader(b))
 170 | 	if err != nil {
 171 | 		t.Fatalf("read request: %v", err)
 172 | 	}
 173 | 	if req.Method != "GET" {
 174 | 		t.Errorf("request method expected GET, but got %q", req.Method)
 175 | 	}
 176 | 	if req.URL.Path != "/chat" {
 177 | 		t.Errorf("request path expected /demo, but got %q", req.URL.Path)
 178 | 	}
 179 | 	if req.Proto != "HTTP/1.1" {
 180 | 		t.Errorf("request proto expected HTTP/1.1, but got %q", req.Proto)
 181 | 	}
 182 | 	if req.Host != "server.example.com" {
 183 | 		t.Errorf("request Host expected example.com, but got %v", req.Host)
 184 | 	}
 185 | 	var expectedHeader = map[string]string{
 186 | 		"Connection":             "Upgrade",
 187 | 		"Upgrade":                "websocket",
 188 | 		"Sec-Websocket-Key":      config.handshakeData["key"],
 189 | 		"Sec-Websocket-Origin":   config.Origin.String(),
 190 | 		"Sec-Websocket-Protocol": "chat, superchat",
 191 | 		"Sec-Websocket-Version":  fmt.Sprintf("%d", ProtocolVersionHybi08),
 192 | 	}
 193 | 	for k, v := range expectedHeader {
 194 | 		if req.Header.Get(k) != v {
 195 | 			t.Errorf(fmt.Sprintf("%s expected %q but got %q", k, v, req.Header.Get(k)))
 196 | 		}
 197 | 	}
 198 | }</pre>
 199 | <h1>#3 found 2 clones</h1>
 200 | <h2>vendor/src/code.google.com/p/go.net/ipv4/gen.go:122</h2>
 201 | <pre>func (icp *icmpv4Parameters) escape() []canonICMPv4ParamRecord {
 202 | 	id := -1
 203 | 	for i, r := range icp.Registries {
 204 | 		if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") {
 205 | 			id = i
 206 | 			break
 207 | 		}
 208 | 	}
 209 | 	if id < 0 {
 210 | 		return nil
 211 | 	}
 212 | 	prs := make([]canonICMPv4ParamRecord, len(icp.Registries[id].Records))
 213 | 	sr := strings.NewReplacer(
 214 | 		"Messages", "",
 215 | 		"Message", "",
 216 | 		"ICMP", "",
 217 | 		"+", "P",
 218 | 		"-", "",
 219 | 		"/", "",
 220 | 		".", "",
 221 | 		" ", "",
 222 | 	)
 223 | 	for i, pr := range icp.Registries[id].Records {
 224 | 		if strings.Contains(pr.Descr, "Reserved") ||
 225 | 			strings.Contains(pr.Descr, "Unassigned") ||
 226 | 			strings.Contains(pr.Descr, "Deprecated") ||
 227 | 			strings.Contains(pr.Descr, "Experiment") ||
 228 | 			strings.Contains(pr.Descr, "experiment") {
 229 | 			continue
 230 | 		}
 231 | 		ss := strings.Split(pr.Descr, "\n")
 232 | 		if len(ss) > 1 {
 233 | 			prs[i].Descr = strings.Join(ss, " ")
 234 | 		} else {
 235 | 			prs[i].Descr = ss[0]
 236 | 		}
 237 | 		s := strings.TrimSpace(prs[i].Descr)
 238 | 		prs[i].OrigDescr = s
 239 | 		prs[i].Descr = sr.Replace(s)
 240 | 		prs[i].Value, _ = strconv.Atoi(pr.Value)
 241 | 	}
 242 | 	return prs
 243 | }</pre>
 244 | <h2>vendor/src/code.google.com/p/go.net/ipv6/gen.go:122</h2>
 245 | <pre>func (icp *icmpv6Parameters) escape() []canonICMPv6ParamRecord {
 246 | 	id := -1
 247 | 	for i, r := range icp.Registries {
 248 | 		if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") {
 249 | 			id = i
 250 | 			break
 251 | 		}
 252 | 	}
 253 | 	if id < 0 {
 254 | 		return nil
 255 | 	}
 256 | 	prs := make([]canonICMPv6ParamRecord, len(icp.Registries[id].Records))
 257 | 	sr := strings.NewReplacer(
 258 | 		"Messages", "",
 259 | 		"Message", "",
 260 | 		"ICMP", "",
 261 | 		"+", "P",
 262 | 		"-", "",
 263 | 		"/", "",
 264 | 		".", "",
 265 | 		" ", "",
 266 | 	)
 267 | 	for i, pr := range icp.Registries[id].Records {
 268 | 		if strings.Contains(pr.Name, "Reserved") ||
 269 | 			strings.Contains(pr.Name, "Unassigned") ||
 270 | 			strings.Contains(pr.Name, "Deprecated") ||
 271 | 			strings.Contains(pr.Name, "Experiment") ||
 272 | 			strings.Contains(pr.Name, "experiment") {
 273 | 			continue
 274 | 		}
 275 | 		ss := strings.Split(pr.Name, "\n")
 276 | 		if len(ss) > 1 {
 277 | 			prs[i].Name = strings.Join(ss, " ")
 278 | 		} else {
 279 | 			prs[i].Name = ss[0]
 280 | 		}
 281 | 		s := strings.TrimSpace(prs[i].Name)
 282 | 		prs[i].OrigName = s
 283 | 		prs[i].Name = sr.Replace(s)
 284 | 		prs[i].Value, _ = strconv.Atoi(pr.Value)
 285 | 	}
 286 | 	return prs
 287 | }</pre>
 288 | <h1>#4 found 2 clones</h1>
 289 | <h2>pkg/term/termios_darwin.go:1</h2>
 290 | <pre>package term
 291 | 
 292 | import (
 293 | 	"syscall"
 294 | 	"unsafe"
 295 | )
 296 | 
 297 | const (
 298 | 	getTermios = syscall.TIOCGETA
 299 | 	setTermios = syscall.TIOCSETA
 300 | 
 301 | 	IGNBRK = syscall.IGNBRK
 302 | 	PARMRK = syscall.PARMRK
 303 | 	INLCR  = syscall.INLCR
 304 | 	IGNCR  = syscall.IGNCR
 305 | 	ECHONL = syscall.ECHONL
 306 | 	CSIZE  = syscall.CSIZE
 307 | 	ICRNL  = syscall.ICRNL
 308 | 	ISTRIP = syscall.ISTRIP
 309 | 	PARENB = syscall.PARENB
 310 | 	ECHO   = syscall.ECHO
 311 | 	ICANON = syscall.ICANON
 312 | 	ISIG   = syscall.ISIG
 313 | 	IXON   = syscall.IXON
 314 | 	BRKINT = syscall.BRKINT
 315 | 	INPCK  = syscall.INPCK
 316 | 	OPOST  = syscall.OPOST
 317 | 	CS8    = syscall.CS8
 318 | 	IEXTEN = syscall.IEXTEN
 319 | )
 320 | 
 321 | type Termios struct {
 322 | 	Iflag  uint64
 323 | 	Oflag  uint64
 324 | 	Cflag  uint64
 325 | 	Lflag  uint64
 326 | 	Cc     [20]byte
 327 | 	Ispeed uint64
 328 | 	Ospeed uint64
 329 | }
 330 | 
 331 | // MakeRaw put the terminal connected to the given file descriptor into raw
 332 | // mode and returns the previous state of the terminal so that it can be
 333 | // restored.
 334 | func MakeRaw(fd uintptr) (*State, error) {
 335 | 	var oldState State
 336 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 {
 337 | 		return nil, err
 338 | 	}
 339 | 
 340 | 	newState := oldState.termios
 341 | 	newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON)
 342 | 	newState.Oflag &^= OPOST
 343 | 	newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN)
 344 | 	newState.Cflag &^= (CSIZE | PARENB)
 345 | 	newState.Cflag |= CS8
 346 | 	newState.Cc[syscall.VMIN] = 1
 347 | 	newState.Cc[syscall.VTIME] = 0
 348 | 
 349 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 {
 350 | 		return nil, err
 351 | 	}
 352 | 
 353 | 	return &oldState, nil
 354 | }</pre>
 355 | <h2>pkg/term/termios_freebsd.go:1</h2>
 356 | <pre>package term
 357 | 
 358 | import (
 359 | 	"syscall"
 360 | 	"unsafe"
 361 | )
 362 | 
 363 | const (
 364 | 	getTermios = syscall.TIOCGETA
 365 | 	setTermios = syscall.TIOCSETA
 366 | 
 367 | 	IGNBRK = syscall.IGNBRK
 368 | 	PARMRK = syscall.PARMRK
 369 | 	INLCR  = syscall.INLCR
 370 | 	IGNCR  = syscall.IGNCR
 371 | 	ECHONL = syscall.ECHONL
 372 | 	CSIZE  = syscall.CSIZE
 373 | 	ICRNL  = syscall.ICRNL
 374 | 	ISTRIP = syscall.ISTRIP
 375 | 	PARENB = syscall.PARENB
 376 | 	ECHO   = syscall.ECHO
 377 | 	ICANON = syscall.ICANON
 378 | 	ISIG   = syscall.ISIG
 379 | 	IXON   = syscall.IXON
 380 | 	BRKINT = syscall.BRKINT
 381 | 	INPCK  = syscall.INPCK
 382 | 	OPOST  = syscall.OPOST
 383 | 	CS8    = syscall.CS8
 384 | 	IEXTEN = syscall.IEXTEN
 385 | )
 386 | 
 387 | type Termios struct {
 388 | 	Iflag  uint32
 389 | 	Oflag  uint32
 390 | 	Cflag  uint32
 391 | 	Lflag  uint32
 392 | 	Cc     [20]byte
 393 | 	Ispeed uint32
 394 | 	Ospeed uint32
 395 | }
 396 | 
 397 | // MakeRaw put the terminal connected to the given file descriptor into raw
 398 | // mode and returns the previous state of the terminal so that it can be
 399 | // restored.
 400 | func MakeRaw(fd uintptr) (*State, error) {
 401 | 	var oldState State
 402 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 {
 403 | 		return nil, err
 404 | 	}
 405 | 
 406 | 	newState := oldState.termios
 407 | 	newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON)
 408 | 	newState.Oflag &^= OPOST
 409 | 	newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN)
 410 | 	newState.Cflag &^= (CSIZE | PARENB)
 411 | 	newState.Cflag |= CS8
 412 | 	newState.Cc[syscall.VMIN] = 1
 413 | 	newState.Cc[syscall.VTIME] = 0
 414 | 
 415 | 	if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 {
 416 | 		return nil, err
 417 | 	}
 418 | 
 419 | 	return &oldState, nil
 420 | }</pre>
 421 | <h1>#5 found 2 clones</h1>
 422 | <h2>vendor/src/code.google.com/p/go.net/ipv4/gentest.go:12</h2>
 423 | <pre>package main
 424 | 
 425 | import (
 426 | 	"bytes"
 427 | 	"encoding/xml"
 428 | 	"fmt"
 429 | 	"go/format"
 430 | 	"io"
 431 | 	"net/http"
 432 | 	"os"
 433 | 	"strconv"
 434 | 	"strings"
 435 | )
 436 | 
 437 | var registries = []struct {
 438 | 	url   string
 439 | 	parse func(io.Writer, io.Reader) error
 440 | }{
 441 | 	{
 442 | 		"http://www.iana.org/assignments/dscp-registry/dscp-registry.xml",
 443 | 		parseDSCPRegistry,
 444 | 	},
 445 | 	{
 446 | 		"http://www.iana.org/assignments/ipv4-tos-byte/ipv4-tos-byte.xml",
 447 | 		parseTOSTCByte,
 448 | 	},
 449 | }
 450 | 
 451 | func main() {
 452 | 	var bb bytes.Buffer
 453 | 	fmt.Fprintf(&bb, "// go run gentv.go\n")
 454 | 	fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
 455 | 	fmt.Fprintf(&bb, "package ipv4_test\n\n")
 456 | 	for _, r := range registries {
 457 | 		resp, err := http.Get(r.url)
 458 | 		if err != nil {
 459 | 			fmt.Fprintln(os.Stderr, err)
 460 | 			os.Exit(1)
 461 | 		}
 462 | 		defer resp.Body.Close()
 463 | 		if resp.StatusCode != http.StatusOK {
 464 | 			fmt.Fprintf(os.Stderr, "got HTTP status code %v for %v\n", resp.StatusCode, r.url)
 465 | 			os.Exit(1)
 466 | 		}
 467 | 		if err := r.parse(&bb, resp.Body); err != nil {
 468 | 			fmt.Fprintln(os.Stderr, err)
 469 | 			os.Exit(1)
 470 | 		}
 471 | 		fmt.Fprintf(&bb, "\n")
 472 | 	}
 473 | 	b, err := format.Source(bb.Bytes())
 474 | 	if err != nil {
 475 | 		fmt.Fprintln(os.Stderr, err)
 476 | 		os.Exit(1)
 477 | 	}
 478 | 	os.Stdout.Write(b)
 479 | }
 480 | 
 481 | func parseDSCPRegistry(w io.Writer, r io.Reader) error {
 482 | 	dec := xml.NewDecoder(r)
 483 | 	var dr dscpRegistry
 484 | 	if err := dec.Decode(&dr); err != nil {
 485 | 		return err
 486 | 	}
 487 | 	drs := dr.escape()
 488 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", dr.Title, dr.Updated)
 489 | 	fmt.Fprintf(w, "const (\n")
 490 | 	for _, dr := range drs {
 491 | 		fmt.Fprintf(w, "DiffServ%s = %#x", dr.Name, dr.Value)
 492 | 		fmt.Fprintf(w, "// %s\n", dr.OrigName)
 493 | 	}
 494 | 	fmt.Fprintf(w, ")\n")
 495 | 	return nil
 496 | }
 497 | 
 498 | type dscpRegistry struct {
 499 | 	XMLName     xml.Name     `xml:"registry"`
 500 | 	Title       string       `xml:"title"`
 501 | 	Updated     string       `xml:"updated"`
 502 | 	Note        string       `xml:"note"`
 503 | 	RegTitle    string       `xml:"registry>title"`
 504 | 	PoolRecords []dscpRecord `xml:"registry>record"`
 505 | 	Records     []dscpRecord `xml:"registry>registry>record"`
 506 | }
 507 | 
 508 | type dscpRecord struct {
 509 | 	Name  string `xml:"name"`
 510 | 	Space string `xml:"space"`
 511 | }
 512 | 
 513 | type canonDSCPRecord struct {
 514 | 	OrigName string
 515 | 	Name     string
 516 | 	Value    int
 517 | }
 518 | 
 519 | func (drr *dscpRegistry) escape() []canonDSCPRecord {
 520 | 	drs := make([]canonDSCPRecord, len(drr.Records))
 521 | 	sr := strings.NewReplacer(
 522 | 		"+", "",
 523 | 		"-", "",
 524 | 		"/", "",
 525 | 		".", "",
 526 | 		" ", "",
 527 | 	)
 528 | 	for i, dr := range drr.Records {
 529 | 		s := strings.TrimSpace(dr.Name)
 530 | 		drs[i].OrigName = s
 531 | 		drs[i].Name = sr.Replace(s)
 532 | 		n, err := strconv.ParseUint(dr.Space, 2, 8)
 533 | 		if err != nil {
 534 | 			continue
 535 | 		}
 536 | 		drs[i].Value = int(n) << 2
 537 | 	}
 538 | 	return drs
 539 | }
 540 | 
 541 | func parseTOSTCByte(w io.Writer, r io.Reader) error {
 542 | 	dec := xml.NewDecoder(r)
 543 | 	var ttb tosTCByte
 544 | 	if err := dec.Decode(&ttb); err != nil {
 545 | 		return err
 546 | 	}
 547 | 	trs := ttb.escape()
 548 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", ttb.Title, ttb.Updated)
 549 | 	fmt.Fprintf(w, "const (\n")
 550 | 	for _, tr := range trs {
 551 | 		fmt.Fprintf(w, "%s = %#x", tr.Keyword, tr.Value)
 552 | 		fmt.Fprintf(w, "// %s\n", tr.OrigKeyword)
 553 | 	}
 554 | 	fmt.Fprintf(w, ")\n")
 555 | 	return nil
 556 | }
 557 | 
 558 | type tosTCByte struct {
 559 | 	XMLName  xml.Name          `xml:"registry"`
 560 | 	Title    string            `xml:"title"`
 561 | 	Updated  string            `xml:"updated"`
 562 | 	Note     string            `xml:"note"`
 563 | 	RegTitle string            `xml:"registry>title"`
 564 | 	Records  []tosTCByteRecord `xml:"registry>record"`
 565 | }
 566 | 
 567 | type tosTCByteRecord struct {
 568 | 	Binary  string `xml:"binary"`
 569 | 	Keyword string `xml:"keyword"`
 570 | }
 571 | 
 572 | type canonTOSTCByteRecord struct {
 573 | 	OrigKeyword string
 574 | 	Keyword     string
 575 | 	Value       int
 576 | }
 577 | 
 578 | func (ttb *tosTCByte) escape() []canonTOSTCByteRecord {
 579 | 	trs := make([]canonTOSTCByteRecord, len(ttb.Records))
 580 | 	sr := strings.NewReplacer(
 581 | 		"Capable", "",
 582 | 		"(", "",
 583 | 		")", "",
 584 | 		"+", "",
 585 | 		"-", "",
 586 | 		"/", "",
 587 | 		".", "",
 588 | 		" ", "",
 589 | 	)
 590 | 	for i, tr := range ttb.Records {
 591 | 		s := strings.TrimSpace(tr.Keyword)
 592 | 		trs[i].OrigKeyword = s
 593 | 		ss := strings.Split(s, " ")
 594 | 		if len(ss) > 1 {
 595 | 			trs[i].Keyword = strings.Join(ss[1:], " ")
 596 | 		} else {
 597 | 			trs[i].Keyword = ss[0]
 598 | 		}
 599 | 		trs[i].Keyword = sr.Replace(trs[i].Keyword)
 600 | 		n, err := strconv.ParseUint(tr.Binary, 2, 8)
 601 | 		if err != nil {
 602 | 			continue
 603 | 		}
 604 | 		trs[i].Value = int(n)
 605 | 	}
 606 | 	return trs
 607 | }</pre>
 608 | <h2>vendor/src/code.google.com/p/go.net/ipv6/gentest.go:12</h2>
 609 | <pre>package main
 610 | 
 611 | import (
 612 | 	"bytes"
 613 | 	"encoding/xml"
 614 | 	"fmt"
 615 | 	"go/format"
 616 | 	"io"
 617 | 	"net/http"
 618 | 	"os"
 619 | 	"strconv"
 620 | 	"strings"
 621 | )
 622 | 
 623 | var registries = []struct {
 624 | 	url   string
 625 | 	parse func(io.Writer, io.Reader) error
 626 | }{
 627 | 	{
 628 | 		"http://www.iana.org/assignments/dscp-registry/dscp-registry.xml",
 629 | 		parseDSCPRegistry,
 630 | 	},
 631 | 	{
 632 | 		"http://www.iana.org/assignments/ipv4-tos-byte/ipv4-tos-byte.xml",
 633 | 		parseTOSTCByte,
 634 | 	},
 635 | }
 636 | 
 637 | func main() {
 638 | 	var bb bytes.Buffer
 639 | 	fmt.Fprintf(&bb, "// go run gentv.go\n")
 640 | 	fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n")
 641 | 	fmt.Fprintf(&bb, "package ipv6_test\n\n")
 642 | 	for _, r := range registries {
 643 | 		resp, err := http.Get(r.url)
 644 | 		if err != nil {
 645 | 			fmt.Fprintln(os.Stderr, err)
 646 | 			os.Exit(1)
 647 | 		}
 648 | 		defer resp.Body.Close()
 649 | 		if resp.StatusCode != http.StatusOK {
 650 | 			fmt.Fprintf(os.Stderr, "got HTTP status code %v for %v\n", resp.StatusCode, r.url)
 651 | 			os.Exit(1)
 652 | 		}
 653 | 		if err := r.parse(&bb, resp.Body); err != nil {
 654 | 			fmt.Fprintln(os.Stderr, err)
 655 | 			os.Exit(1)
 656 | 		}
 657 | 		fmt.Fprintf(&bb, "\n")
 658 | 	}
 659 | 	b, err := format.Source(bb.Bytes())
 660 | 	if err != nil {
 661 | 		fmt.Fprintln(os.Stderr, err)
 662 | 		os.Exit(1)
 663 | 	}
 664 | 	os.Stdout.Write(b)
 665 | }
 666 | 
 667 | func parseDSCPRegistry(w io.Writer, r io.Reader) error {
 668 | 	dec := xml.NewDecoder(r)
 669 | 	var dr dscpRegistry
 670 | 	if err := dec.Decode(&dr); err != nil {
 671 | 		return err
 672 | 	}
 673 | 	drs := dr.escape()
 674 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", dr.Title, dr.Updated)
 675 | 	fmt.Fprintf(w, "const (\n")
 676 | 	for _, dr := range drs {
 677 | 		fmt.Fprintf(w, "DiffServ%s = %#x", dr.Name, dr.Value)
 678 | 		fmt.Fprintf(w, "// %s\n", dr.OrigName)
 679 | 	}
 680 | 	fmt.Fprintf(w, ")\n")
 681 | 	return nil
 682 | }
 683 | 
 684 | type dscpRegistry struct {
 685 | 	XMLName     xml.Name     `xml:"registry"`
 686 | 	Title       string       `xml:"title"`
 687 | 	Updated     string       `xml:"updated"`
 688 | 	Note        string       `xml:"note"`
 689 | 	RegTitle    string       `xml:"registry>title"`
 690 | 	PoolRecords []dscpRecord `xml:"registry>record"`
 691 | 	Records     []dscpRecord `xml:"registry>registry>record"`
 692 | }
 693 | 
 694 | type dscpRecord struct {
 695 | 	Name  string `xml:"name"`
 696 | 	Space string `xml:"space"`
 697 | }
 698 | 
 699 | type canonDSCPRecord struct {
 700 | 	OrigName string
 701 | 	Name     string
 702 | 	Value    int
 703 | }
 704 | 
 705 | func (drr *dscpRegistry) escape() []canonDSCPRecord {
 706 | 	drs := make([]canonDSCPRecord, len(drr.Records))
 707 | 	sr := strings.NewReplacer(
 708 | 		"+", "",
 709 | 		"-", "",
 710 | 		"/", "",
 711 | 		".", "",
 712 | 		" ", "",
 713 | 	)
 714 | 	for i, dr := range drr.Records {
 715 | 		s := strings.TrimSpace(dr.Name)
 716 | 		drs[i].OrigName = s
 717 | 		drs[i].Name = sr.Replace(s)
 718 | 		n, err := strconv.ParseUint(dr.Space, 2, 8)
 719 | 		if err != nil {
 720 | 			continue
 721 | 		}
 722 | 		drs[i].Value = int(n) << 2
 723 | 	}
 724 | 	return drs
 725 | }
 726 | 
 727 | func parseTOSTCByte(w io.Writer, r io.Reader) error {
 728 | 	dec := xml.NewDecoder(r)
 729 | 	var ttb tosTCByte
 730 | 	if err := dec.Decode(&ttb); err != nil {
 731 | 		return err
 732 | 	}
 733 | 	trs := ttb.escape()
 734 | 	fmt.Fprintf(w, "// %s, Updated: %s\n", ttb.Title, ttb.Updated)
 735 | 	fmt.Fprintf(w, "const (\n")
 736 | 	for _, tr := range trs {
 737 | 		fmt.Fprintf(w, "%s = %#x", tr.Keyword, tr.Value)
 738 | 		fmt.Fprintf(w, "// %s\n", tr.OrigKeyword)
 739 | 	}
 740 | 	fmt.Fprintf(w, ")\n")
 741 | 	return nil
 742 | }
 743 | 
 744 | type tosTCByte struct {
 745 | 	XMLName  xml.Name          `xml:"registry"`
 746 | 	Title    string            `xml:"title"`
 747 | 	Updated  string            `xml:"updated"`
 748 | 	Note     string            `xml:"note"`
 749 | 	RegTitle string            `xml:"registry>title"`
 750 | 	Records  []tosTCByteRecord `xml:"registry>record"`
 751 | }
 752 | 
 753 | type tosTCByteRecord struct {
 754 | 	Binary  string `xml:"binary"`
 755 | 	Keyword string `xml:"keyword"`
 756 | }
 757 | 
 758 | type canonTOSTCByteRecord struct {
 759 | 	OrigKeyword string
 760 | 	Keyword     string
 761 | 	Value       int
 762 | }
 763 | 
 764 | func (ttb *tosTCByte) escape() []canonTOSTCByteRecord {
 765 | 	trs := make([]canonTOSTCByteRecord, len(ttb.Records))
 766 | 	sr := strings.NewReplacer(
 767 | 		"Capable", "",
 768 | 		"(", "",
 769 | 		")", "",
 770 | 		"+", "",
 771 | 		"-", "",
 772 | 		"/", "",
 773 | 		".", "",
 774 | 		" ", "",
 775 | 	)
 776 | 	for i, tr := range ttb.Records {
 777 | 		s := strings.TrimSpace(tr.Keyword)
 778 | 		trs[i].OrigKeyword = s
 779 | 		ss := strings.Split(s, " ")
 780 | 		if len(ss) > 1 {
 781 | 			trs[i].Keyword = strings.Join(ss[1:], " ")
 782 | 		} else {
 783 | 			trs[i].Keyword = ss[0]
 784 | 		}
 785 | 		trs[i].Keyword = sr.Replace(trs[i].Keyword)
 786 | 		n, err := strconv.ParseUint(tr.Binary, 2, 8)
 787 | 		if err != nil {
 788 | 			continue
 789 | 		}
 790 | 		trs[i].Value = int(n)
 791 | 	}
 792 | 	return trs
 793 | }</pre>
 794 | <h1>#6 found 2 clones</h1>
 795 | <h2>pkg/jsonlog/jsonlog_marshalling.go:114</h2>
 796 | <pre>                                                         {
 797 | 	const hex = "0123456789abcdef"
 798 | 
 799 | 	buf.WriteByte('"')
 800 | 	start := 0
 801 | 	for i := 0; i < len(s); {
 802 | 		if b := s[i]; b < utf8.RuneSelf {
 803 | 			if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
 804 | 				i++
 805 | 				continue
 806 | 			}
 807 | 			if start < i {
 808 | 				buf.WriteString(s[start:i])
 809 | 			}
 810 | 			switch b {
 811 | 			case '\\', '"':
 812 | 				buf.WriteByte('\\')
 813 | 				buf.WriteByte(b)
 814 | 			case '\n':
 815 | 				buf.WriteByte('\\')
 816 | 				buf.WriteByte('n')
 817 | 			case '\r':
 818 | 				buf.WriteByte('\\')
 819 | 				buf.WriteByte('r')
 820 | 			default:
 821 | 
 822 | 				buf.WriteString(`\u00`)
 823 | 				buf.WriteByte(hex[b>>4])
 824 | 				buf.WriteByte(hex[b&0xF])
 825 | 			}
 826 | 			i++
 827 | 			start = i
 828 | 			continue
 829 | 		}
 830 | 		c, size := utf8.DecodeRuneInString(s[i:])
 831 | 		if c == utf8.RuneError && size == 1 {
 832 | 			if start < i {
 833 | 				buf.WriteString(s[start:i])
 834 | 			}
 835 | 			buf.WriteString(`\ufffd`)
 836 | 			i += size
 837 | 			start = i
 838 | 			continue
 839 | 		}
 840 | 
 841 | 		if c == '\u2028' || c == '\u2029' {
 842 | 			if start < i {
 843 | 				buf.WriteString(s[start:i])
 844 | 			}
 845 | 			buf.WriteString(`\u202`)
 846 | 			buf.WriteByte(hex[c&0xF])
 847 | 			i += size
 848 | 			start = i
 849 | 			continue
 850 | 		}
 851 | 		i += size
 852 | 	}
 853 | 	if start < len(s) {
 854 | 		buf.WriteString(s[start:])
 855 | 	}
 856 | 	buf.WriteByte('"')
 857 | }</pre>
 858 | <h2>pkg/jsonlog/jsonlogbytes.go:54</h2>
 859 | <pre>                                                                {
 860 | 	const hex = "0123456789abcdef"
 861 | 
 862 | 	buf.WriteByte('"')
 863 | 	start := 0
 864 | 	for i := 0; i < len(s); {
 865 | 		if b := s[i]; b < utf8.RuneSelf {
 866 | 			if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' {
 867 | 				i++
 868 | 				continue
 869 | 			}
 870 | 			if start < i {
 871 | 				buf.Write(s[start:i])
 872 | 			}
 873 | 			switch b {
 874 | 			case '\\', '"':
 875 | 				buf.WriteByte('\\')
 876 | 				buf.WriteByte(b)
 877 | 			case '\n':
 878 | 				buf.WriteByte('\\')
 879 | 				buf.WriteByte('n')
 880 | 			case '\r':
 881 | 				buf.WriteByte('\\')
 882 | 				buf.WriteByte('r')
 883 | 			default:
 884 | 
 885 | 				buf.WriteString(`\u00`)
 886 | 				buf.WriteByte(hex[b>>4])
 887 | 				buf.WriteByte(hex[b&0xF])
 888 | 			}
 889 | 			i++
 890 | 			start = i
 891 | 			continue
 892 | 		}
 893 | 		c, size := utf8.DecodeRune(s[i:])
 894 | 		if c == utf8.RuneError && size == 1 {
 895 | 			if start < i {
 896 | 				buf.Write(s[start:i])
 897 | 			}
 898 | 			buf.WriteString(`\ufffd`)
 899 | 			i += size
 900 | 			start = i
 901 | 			continue
 902 | 		}
 903 | 
 904 | 		if c == '\u2028' || c == '\u2029' {
 905 | 			if start < i {
 906 | 				buf.Write(s[start:i])
 907 | 			}
 908 | 			buf.WriteString(`\u202`)
 909 | 			buf.WriteByte(hex[c&0xF])
 910 | 			i += size
 911 | 			start = i
 912 | 			continue
 913 | 		}
 914 | 		i += size
 915 | 	}
 916 | 	if start < len(s) {
 917 | 		buf.Write(s[start:])
 918 | 	}
 919 | 	buf.WriteByte('"')
 920 | }</pre>
 921 | <h1>#7 found 3 clones</h1>
 922 | <h2>daemon/container_unit_test.go:8</h2>
 923 | <pre>func TestParseNetworkOptsPrivateOnly(t *testing.T) {
 924 | 	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::80"})
 925 | 	if err != nil {
 926 | 		t.Fatal(err)
 927 | 	}
 928 | 	if len(ports) != 1 {
 929 | 		t.Logf("Expected 1 got %d", len(ports))
 930 | 		t.FailNow()
 931 | 	}
 932 | 	if len(bindings) != 1 {
 933 | 		t.Logf("Expected 1 got %d", len(bindings))
 934 | 		t.FailNow()
 935 | 	}
 936 | 	for k := range ports {
 937 | 		if k.Proto() != "tcp" {
 938 | 			t.Logf("Expected tcp got %s", k.Proto())
 939 | 			t.Fail()
 940 | 		}
 941 | 		if k.Port() != "80" {
 942 | 			t.Logf("Expected 80 got %s", k.Port())
 943 | 			t.Fail()
 944 | 		}
 945 | 		b, exists := bindings[k]
 946 | 		if !exists {
 947 | 			t.Log("Binding does not exist")
 948 | 			t.FailNow()
 949 | 		}
 950 | 		if len(b) != 1 {
 951 | 			t.Logf("Expected 1 got %d", len(b))
 952 | 			t.FailNow()
 953 | 		}
 954 | 		s := b[0]
 955 | 		if s.HostPort != "" {
 956 | 			t.Logf("Expected \"\" got %s", s.HostPort)
 957 | 			t.Fail()
 958 | 		}
 959 | 		if s.HostIp != "192.168.1.100" {
 960 | 			t.Fail()
 961 | 		}
 962 | 	}
 963 | }</pre>
 964 | <h2>daemon/container_unit_test.go:50</h2>
 965 | <pre>func TestParseNetworkOptsPublic(t *testing.T) {
 966 | 	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100:8080:80"})
 967 | 	if err != nil {
 968 | 		t.Fatal(err)
 969 | 	}
 970 | 	if len(ports) != 1 {
 971 | 		t.Logf("Expected 1 got %d", len(ports))
 972 | 		t.FailNow()
 973 | 	}
 974 | 	if len(bindings) != 1 {
 975 | 		t.Logf("Expected 1 got %d", len(bindings))
 976 | 		t.FailNow()
 977 | 	}
 978 | 	for k := range ports {
 979 | 		if k.Proto() != "tcp" {
 980 | 			t.Logf("Expected tcp got %s", k.Proto())
 981 | 			t.Fail()
 982 | 		}
 983 | 		if k.Port() != "80" {
 984 | 			t.Logf("Expected 80 got %s", k.Port())
 985 | 			t.Fail()
 986 | 		}
 987 | 		b, exists := bindings[k]
 988 | 		if !exists {
 989 | 			t.Log("Binding does not exist")
 990 | 			t.FailNow()
 991 | 		}
 992 | 		if len(b) != 1 {
 993 | 			t.Logf("Expected 1 got %d", len(b))
 994 | 			t.FailNow()
 995 | 		}
 996 | 		s := b[0]
 997 | 		if s.HostPort != "8080" {
 998 | 			t.Logf("Expected 8080 got %s", s.HostPort)
 999 | 			t.Fail()
1000 | 		}
1001 | 		if s.HostIp != "192.168.1.100" {
1002 | 			t.Fail()
1003 | 		}
1004 | 	}
1005 | }</pre>
1006 | <h2>daemon/container_unit_test.go:127</h2>
1007 | <pre>func TestParseNetworkOptsUdp(t *testing.T) {
1008 | 	ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::6000/udp"})
1009 | 	if err != nil {
1010 | 		t.Fatal(err)
1011 | 	}
1012 | 	if len(ports) != 1 {
1013 | 		t.Logf("Expected 1 got %d", len(ports))
1014 | 		t.FailNow()
1015 | 	}
1016 | 	if len(bindings) != 1 {
1017 | 		t.Logf("Expected 1 got %d", len(bindings))
1018 | 		t.FailNow()
1019 | 	}
1020 | 	for k := range ports {
1021 | 		if k.Proto() != "udp" {
1022 | 			t.Logf("Expected udp got %s", k.Proto())
1023 | 			t.Fail()
1024 | 		}
1025 | 		if k.Port() != "6000" {
1026 | 			t.Logf("Expected 6000 got %s", k.Port())
1027 | 			t.Fail()
1028 | 		}
1029 | 		b, exists := bindings[k]
1030 | 		if !exists {
1031 | 			t.Log("Binding does not exist")
1032 | 			t.FailNow()
1033 | 		}
1034 | 		if len(b) != 1 {
1035 | 			t.Logf("Expected 1 got %d", len(b))
1036 | 			t.FailNow()
1037 | 		}
1038 | 		s := b[0]
1039 | 		if s.HostPort != "" {
1040 | 			t.Logf("Expected \"\" got %s", s.HostPort)
1041 | 			t.Fail()
1042 | 		}
1043 | 		if s.HostIp != "192.168.1.100" {
1044 | 			t.Fail()
1045 | 		}
1046 | 	}
1047 | }</pre>
1048 | <h1>#8 found 2 clones</h1>
1049 | <h2>vendor/src/github.com/docker/libnetwork/portmapper/mapper_test.go:39</h2>
1050 | <pre>func TestMapTCPPorts(t *testing.T) {
1051 | 	defer netutils.SetupTestNetNS(t)()
1052 | 	pm := New()
1053 | 	dstIP1 := net.ParseIP("192.168.0.1")
1054 | 	dstIP2 := net.ParseIP("192.168.0.2")
1055 | 	dstAddr1 := &net.TCPAddr{IP: dstIP1, Port: 80}
1056 | 	dstAddr2 := &net.TCPAddr{IP: dstIP2, Port: 80}
1057 | 
1058 | 	srcAddr1 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
1059 | 	srcAddr2 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
1060 | 
1061 | 	addrEqual := func(addr1, addr2 net.Addr) bool {
1062 | 		return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String())
1063 | 	}
1064 | 
1065 | 	if host, err := pm.Map(srcAddr1, dstIP1, 80, true); err != nil {
1066 | 		t.Fatalf("Failed to allocate port: %s", err)
1067 | 	} else if !addrEqual(dstAddr1, host) {
1068 | 		t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s",
1069 | 			dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network())
1070 | 	}
1071 | 
1072 | 	if _, err := pm.Map(srcAddr1, dstIP1, 80, true); err == nil {
1073 | 		t.Fatalf("Port is in use - mapping should have failed")
1074 | 	}
1075 | 
1076 | 	if _, err := pm.Map(srcAddr2, dstIP1, 80, true); err == nil {
1077 | 		t.Fatalf("Port is in use - mapping should have failed")
1078 | 	}
1079 | 
1080 | 	if _, err := pm.Map(srcAddr2, dstIP2, 80, true); err != nil {
1081 | 		t.Fatalf("Failed to allocate port: %s", err)
1082 | 	}
1083 | 
1084 | 	if pm.Unmap(dstAddr1) != nil {
1085 | 		t.Fatalf("Failed to release port")
1086 | 	}
1087 | 
1088 | 	if pm.Unmap(dstAddr2) != nil {
1089 | 		t.Fatalf("Failed to release port")
1090 | 	}
1091 | 
1092 | 	if pm.Unmap(dstAddr2) == nil {
1093 | 		t.Fatalf("Port already released, but no error reported")
1094 | 	}
1095 | }</pre>
1096 | <h2>vendor/src/github.com/docker/libnetwork/portmapper/mapper_test.go:119</h2>
1097 | <pre>func TestMapUDPPorts(t *testing.T) {
1098 | 	defer netutils.SetupTestNetNS(t)()
1099 | 	pm := New()
1100 | 	dstIP1 := net.ParseIP("192.168.0.1")
1101 | 	dstIP2 := net.ParseIP("192.168.0.2")
1102 | 	dstAddr1 := &net.UDPAddr{IP: dstIP1, Port: 80}
1103 | 	dstAddr2 := &net.UDPAddr{IP: dstIP2, Port: 80}
1104 | 
1105 | 	srcAddr1 := &net.UDPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")}
1106 | 	srcAddr2 := &net.UDPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")}
1107 | 
1108 | 	addrEqual := func(addr1, addr2 net.Addr) bool {
1109 | 		return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String())
1110 | 	}
1111 | 
1112 | 	if host, err := pm.Map(srcAddr1, dstIP1, 80, true); err != nil {
1113 | 		t.Fatalf("Failed to allocate port: %s", err)
1114 | 	} else if !addrEqual(dstAddr1, host) {
1115 | 		t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s",
1116 | 			dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network())
1117 | 	}
1118 | 
1119 | 	if _, err := pm.Map(srcAddr1, dstIP1, 80, true); err == nil {
1120 | 		t.Fatalf("Port is in use - mapping should have failed")
1121 | 	}
1122 | 
1123 | 	if _, err := pm.Map(srcAddr2, dstIP1, 80, true); err == nil {
1124 | 		t.Fatalf("Port is in use - mapping should have failed")
1125 | 	}
1126 | 
1127 | 	if _, err := pm.Map(srcAddr2, dstIP2, 80, true); err != nil {
1128 | 		t.Fatalf("Failed to allocate port: %s", err)
1129 | 	}
1130 | 
1131 | 	if pm.Unmap(dstAddr1) != nil {
1132 | 		t.Fatalf("Failed to release port")
1133 | 	}
1134 | 
1135 | 	if pm.Unmap(dstAddr2) != nil {
1136 | 		t.Fatalf("Failed to release port")
1137 | 	}
1138 | 
1139 | 	if pm.Unmap(dstAddr2) == nil {
1140 | 		t.Fatalf("Port already released, but no error reported")
1141 | 	}
1142 | }</pre>
1143 | <h1>#9 found 2 clones</h1>
1144 | <h2>integration-cli/docker_cli_create_test.go:104</h2>
1145 | <pre>func (s *DockerSuite) TestCreateWithPortRange(c *check.C) {
1146 | 
1147 | 	runCmd := exec.Command(dockerBinary, "create", "-p", "3300-3303:3300-3303/tcp", "busybox", "echo")
1148 | 	out, _, _, err := runCommandWithStdoutStderr(runCmd)
1149 | 	if err != nil {
1150 | 		c.Fatal(out, err)
1151 | 	}
1152 | 
1153 | 	cleanedContainerID := strings.TrimSpace(out)
1154 | 
1155 | 	inspectCmd := exec.Command(dockerBinary, "inspect", cleanedContainerID)
1156 | 	out, _, err = runCommandWithOutput(inspectCmd)
1157 | 	if err != nil {
1158 | 		c.Fatalf("out should've been a container id: %s, %v", out, err)
1159 | 	}
1160 | 
1161 | 	containers := []struct {
1162 | 		HostConfig *struct {
1163 | 			PortBindings map[nat.Port][]nat.PortBinding
1164 | 		}
1165 | 	}{}
1166 | 	if err := json.Unmarshal([]byte(out), &containers); err != nil {
1167 | 		c.Fatalf("Error inspecting the container: %s", err)
1168 | 	}
1169 | 	if len(containers) != 1 {
1170 | 		c.Fatalf("Unexpected container count. Expected 0, received: %d", len(containers))
1171 | 	}
1172 | 
1173 | 	cont := containers[0]
1174 | 	if cont.HostConfig == nil {
1175 | 		c.Fatalf("Expected HostConfig, got none")
1176 | 	}
1177 | 
1178 | 	if len(cont.HostConfig.PortBindings) != 4 {
1179 | 		c.Fatalf("Expected 4 ports bindings, got %d", len(cont.HostConfig.PortBindings))
1180 | 	}
1181 | 	for k, v := range cont.HostConfig.PortBindings {
1182 | 		if len(v) != 1 {
1183 | 			c.Fatalf("Expected 1 ports binding, for the port  %s but found %s", k, v)
1184 | 		}
1185 | 		if k.Port() != v[0].HostPort {
1186 | 			c.Fatalf("Expected host port %d to match published port  %d", k.Port(), v[0].HostPort)
1187 | 		}
1188 | 	}
1189 | 
1190 | }</pre>
1191 | <h2>integration-cli/docker_cli_create_test.go:151</h2>
1192 | <pre>func (s *DockerSuite) TestCreateWithiLargePortRange(c *check.C) {
1193 | 
1194 | 	runCmd := exec.Command(dockerBinary, "create", "-p", "1-65535:1-65535/tcp", "busybox", "echo")
1195 | 	out, _, _, err := runCommandWithStdoutStderr(runCmd)
1196 | 	if err != nil {
1197 | 		c.Fatal(out, err)
1198 | 	}
1199 | 
1200 | 	cleanedContainerID := strings.TrimSpace(out)
1201 | 
1202 | 	inspectCmd := exec.Command(dockerBinary, "inspect", cleanedContainerID)
1203 | 	out, _, err = runCommandWithOutput(inspectCmd)
1204 | 	if err != nil {
1205 | 		c.Fatalf("out should've been a container id: %s, %v", out, err)
1206 | 	}
1207 | 
1208 | 	containers := []struct {
1209 | 		HostConfig *struct {
1210 | 			PortBindings map[nat.Port][]nat.PortBinding
1211 | 		}
1212 | 	}{}
1213 | 	if err := json.Unmarshal([]byte(out), &containers); err != nil {
1214 | 		c.Fatalf("Error inspecting the container: %s", err)
1215 | 	}
1216 | 	if len(containers) != 1 {
1217 | 		c.Fatalf("Unexpected container count. Expected 0, received: %d", len(containers))
1218 | 	}
1219 | 
1220 | 	cont := containers[0]
1221 | 	if cont.HostConfig == nil {
1222 | 		c.Fatalf("Expected HostConfig, got none")
1223 | 	}
1224 | 
1225 | 	if len(cont.HostConfig.PortBindings) != 65535 {
1226 | 		c.Fatalf("Expected 65535 ports bindings, got %d", len(cont.HostConfig.PortBindings))
1227 | 	}
1228 | 	for k, v := range cont.HostConfig.PortBindings {
1229 | 		if len(v) != 1 {
1230 | 			c.Fatalf("Expected 1 ports binding, for the port  %s but found %s", k, v)
1231 | 		}
1232 | 		if k.Port() != v[0].HostPort {
1233 | 			c.Fatalf("Expected host port %d to match published port  %d", k.Port(), v[0].HostPort)
1234 | 		}
1235 | 	}
1236 | 
1237 | }</pre>
1238 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/mibk/dupl
2 | 
3 | go 1.14
4 | 


--------------------------------------------------------------------------------
/job/buildtree.go:
--------------------------------------------------------------------------------
 1 | package job
 2 | 
 3 | import (
 4 | 	"github.com/mibk/dupl/suffixtree"
 5 | 	"github.com/mibk/dupl/syntax"
 6 | )
 7 | 
 8 | func BuildTree(schan chan []*syntax.Node) (t *suffixtree.STree, d *[]*syntax.Node, done chan bool) {
 9 | 	t = suffixtree.New()
10 | 	data := make([]*syntax.Node, 0, 100)
11 | 	done = make(chan bool)
12 | 	go func() {
13 | 		for seq := range schan {
14 | 			data = append(data, seq...)
15 | 			for _, node := range seq {
16 | 				t.Update(node)
17 | 			}
18 | 		}
19 | 		done <- true
20 | 	}()
21 | 	return t, &data, done
22 | }
23 | 


--------------------------------------------------------------------------------
/job/parse.go:
--------------------------------------------------------------------------------
 1 | package job
 2 | 
 3 | import (
 4 | 	"log"
 5 | 
 6 | 	"github.com/mibk/dupl/syntax"
 7 | 	"github.com/mibk/dupl/syntax/golang"
 8 | )
 9 | 
10 | func Parse(fchan chan string) chan []*syntax.Node {
11 | 
12 | 	// parse AST
13 | 	achan := make(chan *syntax.Node)
14 | 	go func() {
15 | 		for file := range fchan {
16 | 			ast, err := golang.Parse(file)
17 | 			if err != nil {
18 | 				log.Println(err)
19 | 				continue
20 | 			}
21 | 			achan <- ast
22 | 		}
23 | 		close(achan)
24 | 	}()
25 | 
26 | 	// serialize
27 | 	schan := make(chan []*syntax.Node)
28 | 	go func() {
29 | 		for ast := range achan {
30 | 			seq := syntax.Serialize(ast)
31 | 			schan <- seq
32 | 		}
33 | 		close(schan)
34 | 	}()
35 | 	return schan
36 | }
37 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"bufio"
  5 | 	"flag"
  6 | 	"fmt"
  7 | 	"io/ioutil"
  8 | 	"log"
  9 | 	"os"
 10 | 	"path/filepath"
 11 | 	"sort"
 12 | 	"strings"
 13 | 
 14 | 	"github.com/mibk/dupl/job"
 15 | 	"github.com/mibk/dupl/printer"
 16 | 	"github.com/mibk/dupl/syntax"
 17 | )
 18 | 
 19 | const defaultThreshold = 15
 20 | 
 21 | var (
 22 | 	paths     = []string{"."}
 23 | 	vendor    = flag.Bool("vendor", false, "")
 24 | 	verbose   = flag.Bool("verbose", false, "")
 25 | 	threshold = flag.Int("threshold", defaultThreshold, "")
 26 | 	files     = flag.Bool("files", false, "")
 27 | 
 28 | 	html     = flag.Bool("html", false, "")
 29 | 	plumbing = flag.Bool("plumbing", false, "")
 30 | )
 31 | 
 32 | const (
 33 | 	vendorDirPrefix = "vendor" + string(filepath.Separator)
 34 | 	vendorDirInPath = string(filepath.Separator) + vendorDirPrefix
 35 | )
 36 | 
 37 | func init() {
 38 | 	flag.BoolVar(verbose, "v", false, "alias for -verbose")
 39 | 	flag.IntVar(threshold, "t", defaultThreshold, "alias for -threshold")
 40 | }
 41 | 
 42 | func main() {
 43 | 	flag.Usage = usage
 44 | 	flag.Parse()
 45 | 	if *html && *plumbing {
 46 | 		log.Fatal("you can have either plumbing or HTML output")
 47 | 	}
 48 | 	if flag.NArg() > 0 {
 49 | 		paths = flag.Args()
 50 | 	}
 51 | 
 52 | 	if *verbose {
 53 | 		log.Println("Building suffix tree")
 54 | 	}
 55 | 	schan := job.Parse(filesFeed())
 56 | 	t, data, done := job.BuildTree(schan)
 57 | 	<-done
 58 | 
 59 | 	// finish stream
 60 | 	t.Update(&syntax.Node{Type: -1})
 61 | 
 62 | 	if *verbose {
 63 | 		log.Println("Searching for clones")
 64 | 	}
 65 | 	mchan := t.FindDuplOver(*threshold)
 66 | 	duplChan := make(chan syntax.Match)
 67 | 	go func() {
 68 | 		for m := range mchan {
 69 | 			match := syntax.FindSyntaxUnits(*data, m, *threshold)
 70 | 			if len(match.Frags) > 0 {
 71 | 				duplChan <- match
 72 | 			}
 73 | 		}
 74 | 		close(duplChan)
 75 | 	}()
 76 | 
 77 | 	newPrinter := printer.NewText
 78 | 	if *html {
 79 | 		newPrinter = printer.NewHTML
 80 | 	} else if *plumbing {
 81 | 		newPrinter = printer.NewPlumbing
 82 | 	}
 83 | 	p := newPrinter(os.Stdout, ioutil.ReadFile)
 84 | 	if err := printDupls(p, duplChan); err != nil {
 85 | 		log.Fatal(err)
 86 | 	}
 87 | }
 88 | 
 89 | func filesFeed() chan string {
 90 | 	if *files {
 91 | 		fchan := make(chan string)
 92 | 		go func() {
 93 | 			s := bufio.NewScanner(os.Stdin)
 94 | 			for s.Scan() {
 95 | 				f := s.Text()
 96 | 				fchan <- strings.TrimPrefix(f, "./")
 97 | 			}
 98 | 			close(fchan)
 99 | 		}()
100 | 		return fchan
101 | 	}
102 | 	return crawlPaths(paths)
103 | }
104 | 
105 | func crawlPaths(paths []string) chan string {
106 | 	fchan := make(chan string)
107 | 	go func() {
108 | 		for _, path := range paths {
109 | 			info, err := os.Lstat(path)
110 | 			if err != nil {
111 | 				log.Fatal(err)
112 | 			}
113 | 			if !info.IsDir() {
114 | 				fchan <- path
115 | 				continue
116 | 			}
117 | 			err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error {
118 | 				if !*vendor && (strings.HasPrefix(path, vendorDirPrefix) ||
119 | 					strings.Contains(path, vendorDirInPath)) {
120 | 					return nil
121 | 				}
122 | 				if !info.IsDir() && strings.HasSuffix(info.Name(), ".go") {
123 | 					fchan <- path
124 | 				}
125 | 				return nil
126 | 			})
127 | 			if err != nil {
128 | 				log.Fatal(err)
129 | 			}
130 | 		}
131 | 		close(fchan)
132 | 	}()
133 | 	return fchan
134 | }
135 | 
136 | func printDupls(p printer.Printer, duplChan <-chan syntax.Match) error {
137 | 	groups := make(map[string][][]*syntax.Node)
138 | 	for dupl := range duplChan {
139 | 		groups[dupl.Hash] = append(groups[dupl.Hash], dupl.Frags...)
140 | 	}
141 | 	keys := make([]string, 0, len(groups))
142 | 	for k := range groups {
143 | 		keys = append(keys, k)
144 | 	}
145 | 	sort.Strings(keys)
146 | 
147 | 	if err := p.PrintHeader(); err != nil {
148 | 		return err
149 | 	}
150 | 	for _, k := range keys {
151 | 		uniq := unique(groups[k])
152 | 		if len(uniq) > 1 {
153 | 			if err := p.PrintClones(uniq); err != nil {
154 | 				return err
155 | 			}
156 | 		}
157 | 	}
158 | 	return p.PrintFooter()
159 | }
160 | 
161 | func unique(group [][]*syntax.Node) [][]*syntax.Node {
162 | 	fileMap := make(map[string]map[int]struct{})
163 | 
164 | 	var newGroup [][]*syntax.Node
165 | 	for _, seq := range group {
166 | 		node := seq[0]
167 | 		file, ok := fileMap[node.Filename]
168 | 		if !ok {
169 | 			file = make(map[int]struct{})
170 | 			fileMap[node.Filename] = file
171 | 		}
172 | 		if _, ok := file[node.Pos]; !ok {
173 | 			file[node.Pos] = struct{}{}
174 | 			newGroup = append(newGroup, seq)
175 | 		}
176 | 	}
177 | 	return newGroup
178 | }
179 | 
180 | func usage() {
181 | 	fmt.Fprintln(os.Stderr, `Usage: dupl [flags] [paths]
182 | 
183 | Paths:
184 |   If the given path is a file, dupl will use it regardless of
185 |   the file extension. If it is a directory, it will recursively
186 |   search for *.go files in that directory.
187 | 
188 |   If no path is given, dupl will recursively search for *.go
189 |   files in the current directory.
190 | 
191 | Flags:
192 |   -files
193 |     	read file names from stdin one at each line
194 |   -html
195 |     	output the results as HTML, including duplicate code fragments
196 |   -plumbing
197 |     	plumbing (easy-to-parse) output for consumption by scripts or tools
198 |   -t, -threshold size
199 |     	minimum token sequence size as a clone (default 15)
200 |   -vendor
201 |     	check files in vendor directory
202 |   -v, -verbose
203 |     	explain what is being done
204 | 
205 | Examples:
206 |   dupl -t 100
207 |     	Search clones in the current directory of size at least
208 |     	100 tokens.
209 |   dupl $(find app/ -name '*_test.go')
210 |     	Search for clones in tests in the app directory.
211 |   find app/ -name '*_test.go' |dupl -files
212 |     	The same as above.`)
213 | 	os.Exit(2)
214 | }
215 | 


--------------------------------------------------------------------------------
/printer/html.go:
--------------------------------------------------------------------------------
  1 | package printer
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"html"
  7 | 	"io"
  8 | 	"regexp"
  9 | 	"sort"
 10 | 
 11 | 	"github.com/mibk/dupl/syntax"
 12 | )
 13 | 
 14 | type htmlprinter struct {
 15 | 	iota int
 16 | 	w    io.Writer
 17 | 	ReadFile
 18 | }
 19 | 
 20 | func NewHTML(w io.Writer, fread ReadFile) Printer {
 21 | 	return &htmlprinter{w: w, ReadFile: fread}
 22 | }
 23 | 
 24 | func (p *htmlprinter) PrintHeader() error {
 25 | 	_, err := fmt.Fprint(p.w, `<!DOCTYPE html>
 26 | <meta charset="utf-8"/>
 27 | <title>Duplicates</title>
 28 | <style>
 29 | 	pre {
 30 | 		background-color: #FFD;
 31 | 		border: 1px solid #E2E2E2;
 32 | 		padding: 1ex;
 33 | 	}
 34 | </style>
 35 | `)
 36 | 	return err
 37 | }
 38 | 
 39 | func (p *htmlprinter) PrintClones(dups [][]*syntax.Node) error {
 40 | 	p.iota++
 41 | 	fmt.Fprintf(p.w, "<h1>#%d found %d clones</h1>\n", p.iota, len(dups))
 42 | 
 43 | 	clones := make([]clone, len(dups))
 44 | 	for i, dup := range dups {
 45 | 		cnt := len(dup)
 46 | 		if cnt == 0 {
 47 | 			panic("zero length dup")
 48 | 		}
 49 | 		nstart := dup[0]
 50 | 		nend := dup[cnt-1]
 51 | 
 52 | 		file, err := p.ReadFile(nstart.Filename)
 53 | 		if err != nil {
 54 | 			return err
 55 | 		}
 56 | 
 57 | 		lineStart, _ := blockLines(file, nstart.Pos, nend.End)
 58 | 		cl := clone{filename: nstart.Filename, lineStart: lineStart}
 59 | 		start := findLineBeg(file, nstart.Pos)
 60 | 		content := append(toWhitespace(file[start:nstart.Pos]), file[nstart.Pos:nend.End]...)
 61 | 		cl.fragment = deindent(content)
 62 | 		clones[i] = cl
 63 | 	}
 64 | 
 65 | 	sort.Sort(byNameAndLine(clones))
 66 | 	for _, cl := range clones {
 67 | 		fmt.Fprintf(p.w, "<h2>%s:%d</h2>\n<pre>%s</pre>\n", cl.filename, cl.lineStart,
 68 | 			html.EscapeString(string(cl.fragment)))
 69 | 	}
 70 | 	return nil
 71 | }
 72 | 
 73 | func (*htmlprinter) PrintFooter() error { return nil }
 74 | 
 75 | func findLineBeg(file []byte, index int) int {
 76 | 	for i := index; i >= 0; i-- {
 77 | 		if file[i] == '\n' {
 78 | 			return i + 1
 79 | 		}
 80 | 	}
 81 | 	return 0
 82 | }
 83 | 
 84 | func toWhitespace(str []byte) []byte {
 85 | 	var out []byte
 86 | 	for _, c := range bytes.Runes(str) {
 87 | 		if c == '\t' {
 88 | 			out = append(out, '\t')
 89 | 		} else {
 90 | 			out = append(out, ' ')
 91 | 		}
 92 | 	}
 93 | 	return out
 94 | }
 95 | 
 96 | func deindent(block []byte) []byte {
 97 | 	const maxVal = 99
 98 | 	min := maxVal
 99 | 	re := regexp.MustCompile(`(^|\n)(\t*)\S`)
100 | 	for _, line := range re.FindAllSubmatch(block, -1) {
101 | 		indent := line[2]
102 | 		if len(indent) < min {
103 | 			min = len(indent)
104 | 		}
105 | 	}
106 | 	if min == 0 || min == maxVal {
107 | 		return block
108 | 	}
109 | 	block = block[min:]
110 | Loop:
111 | 	for i := 0; i < len(block); i++ {
112 | 		if block[i] == '\n' && i != len(block)-1 {
113 | 			for j := 0; j < min; j++ {
114 | 				if block[i+j+1] != '\t' {
115 | 					continue Loop
116 | 				}
117 | 			}
118 | 			block = append(block[:i+1], block[i+1+min:]...)
119 | 		}
120 | 	}
121 | 	return block
122 | }
123 | 


--------------------------------------------------------------------------------
/printer/html_test.go:
--------------------------------------------------------------------------------
 1 | package printer
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestToWhitespace(t *testing.T) {
 6 | 	testCases := []struct {
 7 | 		in     string
 8 | 		expect string
 9 | 	}{
10 | 		{"\t   ", "\t   "},
11 | 		{"\tčřď", "\t   "},
12 | 		{"  \ta", "  \t "},
13 | 	}
14 | 
15 | 	for _, tc := range testCases {
16 | 		actual := toWhitespace([]byte(tc.in))
17 | 		if tc.expect != string(actual) {
18 | 			t.Errorf("got '%s', want '%s'", actual, tc.expect)
19 | 		}
20 | 	}
21 | }
22 | 
23 | func TestDeindent(t *testing.T) {
24 | 	testCases := []struct {
25 | 		in     string
26 | 		expect string
27 | 	}{
28 | 		{"\t$\n\t\t$\n\t$", "$\n\t$\n$"},
29 | 		{"\t$\r\n\t\t$\r\n\t$", "$\r\n\t$\r\n$"},
30 | 		{"\t$\n\t\t$\n", "$\n\t$\n"},
31 | 		{"\t$\n\n\t\t$", "$\n\n\t$"},
32 | 	}
33 | 	for _, tc := range testCases {
34 | 		actual := deindent([]byte(tc.in))
35 | 		if tc.expect != string(actual) {
36 | 			t.Errorf("got '%s', want '%s'", actual, tc.expect)
37 | 		}
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/printer/plumbing.go:
--------------------------------------------------------------------------------
 1 | package printer
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"io"
 6 | 	"sort"
 7 | 
 8 | 	"github.com/mibk/dupl/syntax"
 9 | )
10 | 
11 | type plumbing struct {
12 | 	w io.Writer
13 | 	ReadFile
14 | }
15 | 
16 | func NewPlumbing(w io.Writer, fread ReadFile) Printer {
17 | 	return &plumbing{w, fread}
18 | }
19 | 
20 | func (p *plumbing) PrintHeader() error { return nil }
21 | 
22 | func (p *plumbing) PrintClones(dups [][]*syntax.Node) error {
23 | 	clones, err := prepareClonesInfo(p.ReadFile, dups)
24 | 	if err != nil {
25 | 		return err
26 | 	}
27 | 	sort.Sort(byNameAndLine(clones))
28 | 	for i, cl := range clones {
29 | 		nextCl := clones[(i+1)%len(clones)]
30 | 		fmt.Fprintf(p.w, "%s:%d-%d: duplicate of %s:%d-%d\n", cl.filename, cl.lineStart, cl.lineEnd,
31 | 			nextCl.filename, nextCl.lineStart, nextCl.lineEnd)
32 | 	}
33 | 	return nil
34 | }
35 | 
36 | func (p *plumbing) PrintFooter() error { return nil }
37 | 


--------------------------------------------------------------------------------
/printer/printer.go:
--------------------------------------------------------------------------------
 1 | package printer
 2 | 
 3 | import "github.com/mibk/dupl/syntax"
 4 | 
 5 | type ReadFile func(filename string) ([]byte, error)
 6 | 
 7 | type Printer interface {
 8 | 	PrintHeader() error
 9 | 	PrintClones(dups [][]*syntax.Node) error
10 | 	PrintFooter() error
11 | }
12 | 


--------------------------------------------------------------------------------
/printer/text.go:
--------------------------------------------------------------------------------
  1 | package printer
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 	"io"
  6 | 	"sort"
  7 | 
  8 | 	"github.com/mibk/dupl/syntax"
  9 | )
 10 | 
 11 | type text struct {
 12 | 	cnt int
 13 | 	w   io.Writer
 14 | 	ReadFile
 15 | }
 16 | 
 17 | func NewText(w io.Writer, fread ReadFile) Printer {
 18 | 	return &text{w: w, ReadFile: fread}
 19 | }
 20 | 
 21 | func (p *text) PrintHeader() error { return nil }
 22 | 
 23 | func (p *text) PrintClones(dups [][]*syntax.Node) error {
 24 | 	p.cnt++
 25 | 	fmt.Fprintf(p.w, "found %d clones:\n", len(dups))
 26 | 	clones, err := prepareClonesInfo(p.ReadFile, dups)
 27 | 	if err != nil {
 28 | 		return err
 29 | 	}
 30 | 	sort.Sort(byNameAndLine(clones))
 31 | 	for _, cl := range clones {
 32 | 		fmt.Fprintf(p.w, "  %s:%d,%d\n", cl.filename, cl.lineStart, cl.lineEnd)
 33 | 	}
 34 | 	return nil
 35 | }
 36 | 
 37 | func (p *text) PrintFooter() error {
 38 | 	_, err := fmt.Fprintf(p.w, "\nFound total %d clone groups.\n", p.cnt)
 39 | 	return err
 40 | }
 41 | 
 42 | func prepareClonesInfo(fread ReadFile, dups [][]*syntax.Node) ([]clone, error) {
 43 | 	clones := make([]clone, len(dups))
 44 | 	for i, dup := range dups {
 45 | 		cnt := len(dup)
 46 | 		if cnt == 0 {
 47 | 			panic("zero length dup")
 48 | 		}
 49 | 		nstart := dup[0]
 50 | 		nend := dup[cnt-1]
 51 | 
 52 | 		file, err := fread(nstart.Filename)
 53 | 		if err != nil {
 54 | 			return nil, err
 55 | 		}
 56 | 
 57 | 		cl := clone{filename: nstart.Filename}
 58 | 		cl.lineStart, cl.lineEnd = blockLines(file, nstart.Pos, nend.End)
 59 | 		clones[i] = cl
 60 | 	}
 61 | 	return clones, nil
 62 | }
 63 | 
 64 | func blockLines(file []byte, from, to int) (int, int) {
 65 | 	line := 1
 66 | 	lineStart, lineEnd := 0, 0
 67 | 	for offset, b := range file {
 68 | 		if b == '\n' {
 69 | 			line++
 70 | 		}
 71 | 		if offset == from {
 72 | 			lineStart = line
 73 | 		}
 74 | 		if offset == to-1 {
 75 | 			lineEnd = line
 76 | 			break
 77 | 		}
 78 | 	}
 79 | 	return lineStart, lineEnd
 80 | }
 81 | 
 82 | type clone struct {
 83 | 	filename  string
 84 | 	lineStart int
 85 | 	lineEnd   int
 86 | 	fragment  []byte
 87 | }
 88 | 
 89 | type byNameAndLine []clone
 90 | 
 91 | func (c byNameAndLine) Len() int { return len(c) }
 92 | 
 93 | func (c byNameAndLine) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
 94 | 
 95 | func (c byNameAndLine) Less(i, j int) bool {
 96 | 	if c[i].filename == c[j].filename {
 97 | 		return c[i].lineStart < c[j].lineStart
 98 | 	}
 99 | 	return c[i].filename < c[j].filename
100 | }
101 | 


--------------------------------------------------------------------------------
/suffixtree/dupl.go:
--------------------------------------------------------------------------------
 1 | package suffixtree
 2 | 
 3 | import "sort"
 4 | 
 5 | type Match struct {
 6 | 	Ps  []Pos
 7 | 	Len Pos
 8 | }
 9 | 
10 | type posList struct {
11 | 	positions []Pos
12 | }
13 | 
14 | func newPosList() *posList {
15 | 	return &posList{make([]Pos, 0)}
16 | }
17 | 
18 | func (p *posList) append(p2 *posList) {
19 | 	p.positions = append(p.positions, p2.positions...)
20 | }
21 | 
22 | func (p *posList) add(pos Pos) {
23 | 	p.positions = append(p.positions, pos)
24 | }
25 | 
26 | type contextList struct {
27 | 	lists map[int]*posList
28 | }
29 | 
30 | func newContextList() *contextList {
31 | 	return &contextList{make(map[int]*posList)}
32 | }
33 | 
34 | func (c *contextList) getAll() []Pos {
35 | 	keys := make([]int, 0, len(c.lists))
36 | 	for k := range c.lists {
37 | 		keys = append(keys, k)
38 | 	}
39 | 	sort.Ints(keys)
40 | 	var ps []Pos
41 | 	for _, k := range keys {
42 | 		ps = append(ps, c.lists[k].positions...)
43 | 	}
44 | 	return ps
45 | }
46 | 
47 | func (c *contextList) append(c2 *contextList) {
48 | 	for lc, pl := range c2.lists {
49 | 		if _, ok := c.lists[lc]; ok {
50 | 			c.lists[lc].append(pl)
51 | 		} else {
52 | 			c.lists[lc] = pl
53 | 		}
54 | 	}
55 | }
56 | 
57 | // FindDuplOver find pairs of maximal duplicities over a threshold
58 | // length.
59 | func (t *STree) FindDuplOver(threshold int) <-chan Match {
60 | 	auxTran := newTran(0, 0, t.root)
61 | 	ch := make(chan Match)
62 | 	go func() {
63 | 		walkTrans(auxTran, 0, threshold, ch)
64 | 		close(ch)
65 | 	}()
66 | 	return ch
67 | }
68 | 
69 | func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList {
70 | 	s := parent.state
71 | 
72 | 	cl := newContextList()
73 | 
74 | 	if len(s.trans) == 0 {
75 | 		pl := newPosList()
76 | 		start := parent.end + 1 - Pos(length)
77 | 		pl.add(start)
78 | 		ch := 0
79 | 		if start > 0 {
80 | 			ch = s.tree.data[start-1].Val()
81 | 		}
82 | 		cl.lists[ch] = pl
83 | 		return cl
84 | 	}
85 | 
86 | 	for _, t := range s.trans {
87 | 		ln := length + t.len()
88 | 		cl2 := walkTrans(t, ln, threshold, ch)
89 | 		if ln >= threshold {
90 | 			cl.append(cl2)
91 | 		}
92 | 	}
93 | 	if length >= threshold && len(cl.lists) > 1 {
94 | 		m := Match{cl.getAll(), Pos(length)}
95 | 		ch <- m
96 | 	}
97 | 	return cl
98 | }
99 | 


--------------------------------------------------------------------------------
/suffixtree/dupl_test.go:
--------------------------------------------------------------------------------
 1 | package suffixtree
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"sort"
 6 | 	"testing"
 7 | )
 8 | 
 9 | func (m Match) String() string {
10 | 	str := "(["
11 | 	for _, p := range m.Ps {
12 | 		str += fmt.Sprintf("%d, ", p)
13 | 	}
14 | 	return str[:len(str)-2] + fmt.Sprintf("], %d)", m.Len)
15 | }
16 | 
17 | func sliceCmp(sl1, sl2 []Pos) bool {
18 | 	if len(sl1) != len(sl2) {
19 | 		return false
20 | 	}
21 | 	sort.Sort(ByPos(sl1))
22 | 	sort.Sort(ByPos(sl2))
23 | 	for i := range sl1 {
24 | 		if sl1[i] != sl2[i] {
25 | 			return false
26 | 		}
27 | 	}
28 | 	return true
29 | }
30 | 
31 | type ByPos []Pos
32 | 
33 | func (p ByPos) Len() int {
34 | 	return len(p)
35 | }
36 | 
37 | func (p ByPos) Swap(i, j int) {
38 | 	p[i], p[j] = p[j], p[i]
39 | }
40 | 
41 | func (p ByPos) Less(i, j int) bool {
42 | 	return p[i] < p[j]
43 | }
44 | 
45 | func TestFindingDupl(t *testing.T) {
46 | 	testCases := []struct {
47 | 		s         string
48 | 		threshold int
49 | 		matches   []Match
50 | 	}{
51 | 		{"abab$", 3, []Match{}},
52 | 		{"abab$", 2, []Match{{[]Pos{0, 2}, 2}}},
53 | 		{"abcbcabc$", 3, []Match{{[]Pos{0, 5}, 3}}},
54 | 		{"abcbcabc$", 2, []Match{{[]Pos{0, 5}, 3}, {[]Pos{1, 3, 6}, 2}}},
55 | 		{`All work and no play makes Jack a dull boy
56 | All work and no play makes Jack a dull boy$`, 4, []Match{{[]Pos{0, 43}, 42}}},
57 | 	}
58 | 
59 | 	for _, tc := range testCases {
60 | 		tree := New()
61 | 		tree.Update(str2tok(tc.s)...)
62 | 		ch := tree.FindDuplOver(tc.threshold)
63 | 		for _, exp := range tc.matches {
64 | 			act, ok := <-ch
65 | 			if !ok {
66 | 				t.Errorf("missing match %v for '%s'", exp, tc.s)
67 | 			} else if exp.Len != act.Len || !sliceCmp(exp.Ps, act.Ps) {
68 | 				t.Errorf("got %v, want %v", act, exp)
69 | 			}
70 | 		}
71 | 		for act := range ch {
72 | 			t.Errorf("beyond expected match %v for '%s'", act, tc.s)
73 | 		}
74 | 	}
75 | }
76 | 


--------------------------------------------------------------------------------
/suffixtree/suffixtree.go:
--------------------------------------------------------------------------------
  1 | package suffixtree
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"math"
  7 | 	"strings"
  8 | )
  9 | 
 10 | const infinity = math.MaxInt32
 11 | 
 12 | // Pos denotes position in data slice.
 13 | type Pos int32
 14 | 
 15 | type Token interface {
 16 | 	Val() int
 17 | }
 18 | 
 19 | // STree is a struct representing a suffix tree.
 20 | type STree struct {
 21 | 	data     []Token
 22 | 	root     *state
 23 | 	auxState *state // auxiliary state
 24 | 
 25 | 	// active point
 26 | 	s          *state
 27 | 	start, end Pos
 28 | }
 29 | 
 30 | // New creates new suffix tree.
 31 | func New() *STree {
 32 | 	t := new(STree)
 33 | 	t.data = make([]Token, 0, 50)
 34 | 	t.root = newState(t)
 35 | 	t.auxState = newState(t)
 36 | 	t.root.linkState = t.auxState
 37 | 	t.s = t.root
 38 | 	return t
 39 | }
 40 | 
 41 | // Update refreshes the suffix tree to by new data.
 42 | func (t *STree) Update(data ...Token) {
 43 | 	t.data = append(t.data, data...)
 44 | 	for range data {
 45 | 		t.update()
 46 | 		t.s, t.start = t.canonize(t.s, t.start, t.end)
 47 | 		t.end++
 48 | 	}
 49 | }
 50 | 
 51 | // update transforms suffix tree T(n) to T(n+1).
 52 | func (t *STree) update() {
 53 | 	oldr := t.root
 54 | 
 55 | 	// (s, (start, end)) is the canonical reference pair for the active point
 56 | 	s := t.s
 57 | 	start, end := t.start, t.end
 58 | 	var r *state
 59 | 	for {
 60 | 		var endPoint bool
 61 | 		r, endPoint = t.testAndSplit(s, start, end-1)
 62 | 		if endPoint {
 63 | 			break
 64 | 		}
 65 | 		r.fork(end)
 66 | 		if oldr != t.root {
 67 | 			oldr.linkState = r
 68 | 		}
 69 | 		oldr = r
 70 | 		s, start = t.canonize(s.linkState, start, end-1)
 71 | 	}
 72 | 	if oldr != t.root {
 73 | 		oldr.linkState = r
 74 | 	}
 75 | 
 76 | 	// update active point
 77 | 	t.s = s
 78 | 	t.start = start
 79 | }
 80 | 
 81 | // testAndSplit tests whether a state with canonical ref. pair
 82 | // (s, (start, end)) is the end point, that is, a state that have
 83 | // a c-transition. If not, then state (exs, (start, end)) is made
 84 | // explicit (if not already so).
 85 | func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) {
 86 | 	c := t.data[t.end]
 87 | 	if start <= end {
 88 | 		tr := s.findTran(t.data[start])
 89 | 		splitPoint := tr.start + end - start + 1
 90 | 		if t.data[splitPoint].Val() == c.Val() {
 91 | 			return s, true
 92 | 		}
 93 | 		// make the (s, (start, end)) state explicit
 94 | 		newSt := newState(s.tree)
 95 | 		newSt.addTran(splitPoint, tr.end, tr.state)
 96 | 		tr.end = splitPoint - 1
 97 | 		tr.state = newSt
 98 | 		return newSt, false
 99 | 	}
100 | 	if s == t.auxState || s.findTran(c) != nil {
101 | 		return s, true
102 | 	}
103 | 	return s, false
104 | }
105 | 
106 | // canonize returns updated state and start position for ref. pair
107 | // (s, (start, end)) of state r so the new ref. pair is canonical,
108 | // that is, referenced from the closest explicit ancestor of r.
109 | func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) {
110 | 	if s == t.auxState {
111 | 		s, start = t.root, start+1
112 | 	}
113 | 	if start > end {
114 | 		return s, start
115 | 	}
116 | 
117 | 	var tr *tran
118 | 	for {
119 | 		if start <= end {
120 | 			tr = s.findTran(t.data[start])
121 | 			if tr == nil {
122 | 				panic(fmt.Sprintf("there should be some transition for '%d' at %d",
123 | 					t.data[start].Val(), start))
124 | 			}
125 | 		}
126 | 		if tr.end-tr.start > end-start {
127 | 			break
128 | 		}
129 | 		start += tr.end - tr.start + 1
130 | 		s = tr.state
131 | 	}
132 | 	if s == nil {
133 | 		panic("there should always be some suffix link resolution")
134 | 	}
135 | 	return s, start
136 | }
137 | 
138 | func (t *STree) At(p Pos) Token {
139 | 	if p < 0 || p >= Pos(len(t.data)) {
140 | 		panic("position out of bounds")
141 | 	}
142 | 	return t.data[p]
143 | }
144 | 
145 | func (t *STree) String() string {
146 | 	buf := new(bytes.Buffer)
147 | 	printState(buf, t.root, 0)
148 | 	return buf.String()
149 | }
150 | 
151 | func printState(buf *bytes.Buffer, s *state, ident int) {
152 | 	for _, tr := range s.trans {
153 | 		fmt.Fprint(buf, strings.Repeat("  ", ident))
154 | 		fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd())
155 | 		printState(buf, tr.state, ident+1)
156 | 	}
157 | }
158 | 
159 | // state is an explicit state of the suffix tree.
160 | type state struct {
161 | 	tree      *STree
162 | 	trans     []*tran
163 | 	linkState *state
164 | }
165 | 
166 | func newState(t *STree) *state {
167 | 	return &state{
168 | 		tree:      t,
169 | 		trans:     make([]*tran, 0),
170 | 		linkState: nil,
171 | 	}
172 | }
173 | 
174 | func (s *state) addTran(start, end Pos, r *state) {
175 | 	s.trans = append(s.trans, newTran(start, end, r))
176 | }
177 | 
178 | // fork creates a new branch from the state s.
179 | func (s *state) fork(i Pos) *state {
180 | 	r := newState(s.tree)
181 | 	s.addTran(i, infinity, r)
182 | 	return r
183 | }
184 | 
185 | // findTran finds c-transition.
186 | func (s *state) findTran(c Token) *tran {
187 | 	for _, tran := range s.trans {
188 | 		if s.tree.data[tran.start].Val() == c.Val() {
189 | 			return tran
190 | 		}
191 | 	}
192 | 	return nil
193 | }
194 | 
195 | // tran represents a state's transition.
196 | type tran struct {
197 | 	start, end Pos
198 | 	state      *state
199 | }
200 | 
201 | func newTran(start, end Pos, s *state) *tran {
202 | 	return &tran{start, end, s}
203 | }
204 | 
205 | func (t *tran) len() int {
206 | 	return int(t.end - t.start + 1)
207 | }
208 | 
209 | // ActEnd returns actual end position as consistent with
210 | // the actual length of the data in the STree.
211 | func (t *tran) ActEnd() Pos {
212 | 	if t.end == infinity {
213 | 		return Pos(len(t.state.tree.data)) - 1
214 | 	}
215 | 	return t.end
216 | }
217 | 


--------------------------------------------------------------------------------
/suffixtree/suffixtree_test.go:
--------------------------------------------------------------------------------
  1 | package suffixtree
  2 | 
  3 | import "testing"
  4 | 
  5 | type char byte
  6 | 
  7 | func (c char) Val() int {
  8 | 	return int(c)
  9 | }
 10 | 
 11 | func str2tok(str string) []Token {
 12 | 	toks := make([]Token, len(str))
 13 | 	for i, c := range str {
 14 | 		toks[i] = char(c)
 15 | 	}
 16 | 	return toks
 17 | }
 18 | 
 19 | func TestConstruction(t *testing.T) {
 20 | 	str := "cacao"
 21 | 	_, s := genStates(8, str)
 22 | 	// s[0] is root
 23 | 	s[0].addTran(0, 1, s[1]) // ca
 24 | 	s[0].addTran(1, 1, s[2]) // a
 25 | 	s[0].addTran(4, 4, s[3]) // o
 26 | 
 27 | 	s[1].addTran(2, 4, s[4]) // cao
 28 | 	s[1].addTran(4, 4, s[5]) // o
 29 | 
 30 | 	s[2].addTran(2, 4, s[4]) // cao
 31 | 	s[2].addTran(4, 4, s[5]) // o
 32 | 
 33 | 	cacao := New()
 34 | 	cacao.Update(str2tok(str)...)
 35 | 	compareTrees(t, s[0], cacao.root)
 36 | 
 37 | 	str2 := "banana"
 38 | 	_, r := genStates(4, str2)
 39 | 	r[0].addTran(0, 5, r[1]) // banana
 40 | 	r[0].addTran(1, 5, r[2]) // anana
 41 | 	r[0].addTran(2, 5, r[3]) // nana
 42 | 
 43 | 	banana := New()
 44 | 	banana.Update(str2tok(str2)...)
 45 | 	compareTrees(t, r[0], banana.root)
 46 | 
 47 | 	_, q := genStates(11, str2+"$")
 48 | 	// r[0] is root
 49 | 	q[0].addTran(0, 6, q[1]) // banana$
 50 | 	q[0].addTran(1, 1, q[2]) // a
 51 | 	q[0].addTran(2, 3, q[3]) // na
 52 | 	q[0].addTran(6, 6, q[4]) // $
 53 | 
 54 | 	q[2].addTran(2, 3, q[5]) // na
 55 | 	q[2].addTran(6, 6, q[6]) // $
 56 | 
 57 | 	q[3].addTran(4, 6, q[7]) // na$
 58 | 	q[3].addTran(6, 6, q[8]) // $
 59 | 
 60 | 	q[5].addTran(4, 6, q[9])  // na$
 61 | 	q[5].addTran(6, 6, q[10]) // $
 62 | 
 63 | 	banana.Update(char('$'))
 64 | 	compareTrees(t, q[0], banana.root)
 65 | 
 66 | 	foo := New()
 67 | 	foo.Update(str2tok("a b ac c ")...)
 68 | }
 69 | 
 70 | func compareTrees(t *testing.T, expected, actual *state) {
 71 | 	ch1, ch2 := walker(expected), walker(actual)
 72 | 	for {
 73 | 		etran, ok1 := <-ch1
 74 | 		atran, ok2 := <-ch2
 75 | 		if !ok1 || !ok2 {
 76 | 			if ok1 {
 77 | 				t.Error("expected tree is longer")
 78 | 			} else if ok2 {
 79 | 				t.Error("actual tree is longer")
 80 | 			}
 81 | 			break
 82 | 		}
 83 | 		if etran.start != atran.start || etran.ActEnd() != atran.ActEnd() {
 84 | 			t.Errorf("got transition (%d, %d) '%s', want (%d, %d) '%s'",
 85 | 				atran.start, atran.ActEnd(), actual.tree.data[atran.start:atran.ActEnd()+1],
 86 | 				etran.start, etran.ActEnd(), expected.tree.data[etran.start:etran.ActEnd()+1],
 87 | 			)
 88 | 		}
 89 | 	}
 90 | }
 91 | 
 92 | func walker(s *state) <-chan *tran {
 93 | 	ch := make(chan *tran)
 94 | 	go func() {
 95 | 		walk(s, ch)
 96 | 		close(ch)
 97 | 	}()
 98 | 	return ch
 99 | }
100 | 
101 | func walk(s *state, ch chan<- *tran) {
102 | 	for _, tr := range s.trans {
103 | 		ch <- tr
104 | 		walk(tr.state, ch)
105 | 	}
106 | }
107 | 
108 | func genStates(count int, data string) (*STree, []*state) {
109 | 	t := new(STree)
110 | 	t.data = str2tok(data)
111 | 	states := make([]*state, count)
112 | 	for i := range states {
113 | 		states[i] = newState(t)
114 | 	}
115 | 	return t, states
116 | }
117 | 
118 | type refPair struct {
119 | 	s          *state
120 | 	start, end Pos
121 | }
122 | 
123 | func TestCanonize(t *testing.T) {
124 | 	tree, s := genStates(5, "somebanana")
125 | 	tree.auxState, tree.root = s[4], s[0]
126 | 	s[0].addTran(0, 3, s[1])
127 | 	s[1].addTran(4, 6, s[2])
128 | 	s[2].addTran(7, infinity, s[3])
129 | 
130 | 	find := func(needle *state) int {
131 | 		for i, state := range s {
132 | 			if state == needle {
133 | 				return i
134 | 			}
135 | 		}
136 | 		return -1
137 | 	}
138 | 
139 | 	var testCases = []struct {
140 | 		origin, expected refPair
141 | 	}{
142 | 		{refPair{s[0], 0, 0}, refPair{s[0], 0, 0}},
143 | 		{refPair{s[0], 0, 2}, refPair{s[0], 0, 0}},
144 | 		{refPair{s[0], 0, 3}, refPair{s[1], 4, 0}},
145 | 		{refPair{s[0], 0, 8}, refPair{s[2], 7, 0}},
146 | 		{refPair{s[0], 0, 6}, refPair{s[2], 7, 0}},
147 | 		{refPair{s[0], 0, 100}, refPair{s[2], 7, 0}},
148 | 		{refPair{s[4], -1, 100}, refPair{s[2], 7, 0}},
149 | 	}
150 | 
151 | 	for _, tc := range testCases {
152 | 		s, start := tree.canonize(tc.origin.s, tc.origin.start, tc.origin.end)
153 | 		if s != tc.expected.s || start != tc.expected.start {
154 | 			t.Errorf("for origin ref. pair (%d, (%d, %d)) got (%d, %d), want (%d, %d)",
155 | 				find(tc.origin.s), tc.origin.start, tc.origin.end,
156 | 				find(s), start,
157 | 				find(tc.expected.s), tc.expected.start,
158 | 			)
159 | 		}
160 | 	}
161 | }
162 | 
163 | func TestSplitting(t *testing.T) {
164 | 	tree := new(STree)
165 | 	tree.data = str2tok("banana|cbao")
166 | 	s1 := newState(tree)
167 | 	s2 := newState(tree)
168 | 	s1.addTran(0, 3, s2)
169 | 
170 | 	// active point is (s1, 0, -1), an explicit state
171 | 	tree.end = 7 // c
172 | 	rets, end := tree.testAndSplit(s1, 0, -1)
173 | 	if rets != s1 {
174 | 		t.Errorf("got state %p, want %p", rets, s1)
175 | 	}
176 | 	if end {
177 | 		t.Error("should not be an end-point")
178 | 	}
179 | 	tree.end = 8 // b
180 | 	_, end = tree.testAndSplit(s1, 0, -1)
181 | 	if !end {
182 | 		t.Error("should be an end-point")
183 | 	}
184 | 
185 | 	// active point is (s1, 0, 2), an implicit state
186 | 	tree.end = 9 // a
187 | 	rets, end = tree.testAndSplit(s1, 0, 2)
188 | 	if rets != s1 {
189 | 		t.Error("returned state should be unchanged")
190 | 	}
191 | 	if !end {
192 | 		t.Error("should be an end-point")
193 | 	}
194 | 
195 | 	// [s1]-banana->[s2] => [s1]-ban->[rets]-ana->[s2]
196 | 	tree.end = 10 // o
197 | 	rets, end = tree.testAndSplit(s1, 0, 2)
198 | 	tr := s1.findTran(char('b'))
199 | 	if tr == nil {
200 | 		t.Error("should have a b-transition")
201 | 	} else if tr.state != rets {
202 | 		t.Errorf("got state %p, want %p", tr.state, rets)
203 | 	}
204 | 	tr2 := rets.findTran(char('a'))
205 | 	if tr2 == nil {
206 | 		t.Error("should have an a-transition")
207 | 	} else if tr2.state != s2 {
208 | 		t.Errorf("got state %p, want %p", tr2.state, s2)
209 | 	}
210 | 	if end {
211 | 		t.Error("should not be an end-point")
212 | 	}
213 | }
214 | 
215 | func TestPosMaxValue(t *testing.T) {
216 | 	var p Pos = infinity
217 | 	if p+1 > 0 {
218 | 		t.Error("const infinity is not max value")
219 | 	}
220 | }
221 | 
222 | func BenchmarkConstruction(b *testing.B) {
223 | 	stream := str2tok(`all work and no play makes jack a dull boy
224 | all work and no play makes jack a dull boy
225 | all work and no play makes jack a dull boy`)
226 | 
227 | 	for i := 0; i < b.N; i++ {
228 | 		t := New()
229 | 		t.Update(stream...)
230 | 	}
231 | }
232 | 


--------------------------------------------------------------------------------
/syntax/golang/golang.go:
--------------------------------------------------------------------------------
  1 | package golang
  2 | 
  3 | import (
  4 | 	"go/ast"
  5 | 	"go/parser"
  6 | 	"go/token"
  7 | 
  8 | 	"github.com/mibk/dupl/syntax"
  9 | )
 10 | 
 11 | const (
 12 | 	BadNode = iota
 13 | 	File
 14 | 	ArrayType
 15 | 	AssignStmt
 16 | 	BasicLit
 17 | 	BinaryExpr
 18 | 	BlockStmt
 19 | 	BranchStmt
 20 | 	CallExpr
 21 | 	CaseClause
 22 | 	ChanType
 23 | 	CommClause
 24 | 	CompositeLit
 25 | 	DeclStmt
 26 | 	DeferStmt
 27 | 	Ellipsis
 28 | 	EmptyStmt
 29 | 	ExprStmt
 30 | 	Field
 31 | 	FieldList
 32 | 	ForStmt
 33 | 	FuncDecl
 34 | 	FuncLit
 35 | 	FuncType
 36 | 	GenDecl
 37 | 	GoStmt
 38 | 	Ident
 39 | 	IfStmt
 40 | 	IncDecStmt
 41 | 	IndexExpr
 42 | 	InterfaceType
 43 | 	KeyValueExpr
 44 | 	LabeledStmt
 45 | 	MapType
 46 | 	ParenExpr
 47 | 	RangeStmt
 48 | 	ReturnStmt
 49 | 	SelectStmt
 50 | 	SelectorExpr
 51 | 	SendStmt
 52 | 	SliceExpr
 53 | 	StarExpr
 54 | 	StructType
 55 | 	SwitchStmt
 56 | 	TypeAssertExpr
 57 | 	TypeSpec
 58 | 	TypeSwitchStmt
 59 | 	UnaryExpr
 60 | 	ValueSpec
 61 | )
 62 | 
 63 | // Parse the given file and return uniform syntax tree.
 64 | func Parse(filename string) (*syntax.Node, error) {
 65 | 	fset := token.NewFileSet()
 66 | 	file, err := parser.ParseFile(fset, filename, nil, 0)
 67 | 	if err != nil {
 68 | 		return nil, err
 69 | 	}
 70 | 	t := &transformer{
 71 | 		fileset:  fset,
 72 | 		filename: filename,
 73 | 	}
 74 | 	return t.trans(file), nil
 75 | }
 76 | 
 77 | type transformer struct {
 78 | 	fileset  *token.FileSet
 79 | 	filename string
 80 | }
 81 | 
 82 | // trans transforms given golang AST to uniform tree structure.
 83 | func (t *transformer) trans(node ast.Node) (o *syntax.Node) {
 84 | 	o = syntax.NewNode()
 85 | 	o.Filename = t.filename
 86 | 	st, end := node.Pos(), node.End()
 87 | 	o.Pos, o.End = t.fileset.File(st).Offset(st), t.fileset.File(end).Offset(end)
 88 | 
 89 | 	switch n := node.(type) {
 90 | 	case *ast.ArrayType:
 91 | 		o.Type = ArrayType
 92 | 		if n.Len != nil {
 93 | 			o.AddChildren(t.trans(n.Len))
 94 | 		}
 95 | 		o.AddChildren(t.trans(n.Elt))
 96 | 
 97 | 	case *ast.AssignStmt:
 98 | 		o.Type = AssignStmt
 99 | 		for _, e := range n.Rhs {
100 | 			o.AddChildren(t.trans(e))
101 | 		}
102 | 
103 | 		for _, e := range n.Lhs {
104 | 			o.AddChildren(t.trans(e))
105 | 		}
106 | 
107 | 	case *ast.BasicLit:
108 | 		o.Type = BasicLit
109 | 
110 | 	case *ast.BinaryExpr:
111 | 		o.Type = BinaryExpr
112 | 		o.AddChildren(t.trans(n.X), t.trans(n.Y))
113 | 
114 | 	case *ast.BlockStmt:
115 | 		o.Type = BlockStmt
116 | 		for _, stmt := range n.List {
117 | 			o.AddChildren(t.trans(stmt))
118 | 		}
119 | 
120 | 	case *ast.BranchStmt:
121 | 		o.Type = BranchStmt
122 | 		if n.Label != nil {
123 | 			o.AddChildren(t.trans(n.Label))
124 | 		}
125 | 
126 | 	case *ast.CallExpr:
127 | 		o.Type = CallExpr
128 | 		o.AddChildren(t.trans(n.Fun))
129 | 		for _, arg := range n.Args {
130 | 			o.AddChildren(t.trans(arg))
131 | 		}
132 | 
133 | 	case *ast.CaseClause:
134 | 		o.Type = CaseClause
135 | 		for _, e := range n.List {
136 | 			o.AddChildren(t.trans(e))
137 | 		}
138 | 		for _, stmt := range n.Body {
139 | 			o.AddChildren(t.trans(stmt))
140 | 		}
141 | 
142 | 	case *ast.ChanType:
143 | 		o.Type = ChanType
144 | 		o.AddChildren(t.trans(n.Value))
145 | 
146 | 	case *ast.CommClause:
147 | 		o.Type = CommClause
148 | 		if n.Comm != nil {
149 | 			o.AddChildren(t.trans(n.Comm))
150 | 		}
151 | 		for _, stmt := range n.Body {
152 | 			o.AddChildren(t.trans(stmt))
153 | 		}
154 | 
155 | 	case *ast.CompositeLit:
156 | 		o.Type = CompositeLit
157 | 		if n.Type != nil {
158 | 			o.AddChildren(t.trans(n.Type))
159 | 		}
160 | 		for _, e := range n.Elts {
161 | 			o.AddChildren(t.trans(e))
162 | 		}
163 | 
164 | 	case *ast.DeclStmt:
165 | 		o.Type = DeclStmt
166 | 		o.AddChildren(t.trans(n.Decl))
167 | 
168 | 	case *ast.DeferStmt:
169 | 		o.Type = DeferStmt
170 | 		o.AddChildren(t.trans(n.Call))
171 | 
172 | 	case *ast.Ellipsis:
173 | 		o.Type = Ellipsis
174 | 		if n.Elt != nil {
175 | 			o.AddChildren(t.trans(n.Elt))
176 | 		}
177 | 
178 | 	case *ast.EmptyStmt:
179 | 		o.Type = EmptyStmt
180 | 
181 | 	case *ast.ExprStmt:
182 | 		o.Type = ExprStmt
183 | 		o.AddChildren(t.trans(n.X))
184 | 
185 | 	case *ast.Field:
186 | 		o.Type = Field
187 | 		for _, name := range n.Names {
188 | 			o.AddChildren(t.trans(name))
189 | 		}
190 | 		o.AddChildren(t.trans(n.Type))
191 | 
192 | 	case *ast.FieldList:
193 | 		o.Type = FieldList
194 | 		for _, field := range n.List {
195 | 			o.AddChildren(t.trans(field))
196 | 		}
197 | 
198 | 	case *ast.File:
199 | 		o.Type = File
200 | 		for _, decl := range n.Decls {
201 | 			if genDecl, ok := decl.(*ast.GenDecl); ok && genDecl.Tok == token.IMPORT {
202 | 				// skip import declarations
203 | 				continue
204 | 			}
205 | 			o.AddChildren(t.trans(decl))
206 | 		}
207 | 
208 | 	case *ast.ForStmt:
209 | 		o.Type = ForStmt
210 | 		if n.Init != nil {
211 | 			o.AddChildren(t.trans(n.Init))
212 | 		}
213 | 		if n.Cond != nil {
214 | 			o.AddChildren(t.trans(n.Cond))
215 | 		}
216 | 		if n.Post != nil {
217 | 			o.AddChildren(t.trans(n.Post))
218 | 		}
219 | 		o.AddChildren(t.trans(n.Body))
220 | 
221 | 	case *ast.FuncDecl:
222 | 		o.Type = FuncDecl
223 | 		if n.Recv != nil {
224 | 			o.AddChildren(t.trans(n.Recv))
225 | 		}
226 | 		o.AddChildren(t.trans(n.Name), t.trans(n.Type))
227 | 		if n.Body != nil {
228 | 			o.AddChildren(t.trans(n.Body))
229 | 		}
230 | 
231 | 	case *ast.FuncLit:
232 | 		o.Type = FuncLit
233 | 		o.AddChildren(t.trans(n.Type), t.trans(n.Body))
234 | 
235 | 	case *ast.FuncType:
236 | 		o.Type = FuncType
237 | 		o.AddChildren(t.trans(n.Params))
238 | 		if n.Results != nil {
239 | 			o.AddChildren(t.trans(n.Results))
240 | 		}
241 | 
242 | 	case *ast.GenDecl:
243 | 		o.Type = GenDecl
244 | 		for _, spec := range n.Specs {
245 | 			o.AddChildren(t.trans(spec))
246 | 		}
247 | 
248 | 	case *ast.GoStmt:
249 | 		o.Type = GoStmt
250 | 		o.AddChildren(t.trans(n.Call))
251 | 
252 | 	case *ast.Ident:
253 | 		o.Type = Ident
254 | 
255 | 	case *ast.IfStmt:
256 | 		o.Type = IfStmt
257 | 		if n.Init != nil {
258 | 			o.AddChildren(t.trans(n.Init))
259 | 		}
260 | 		o.AddChildren(t.trans(n.Cond), t.trans(n.Body))
261 | 		if n.Else != nil {
262 | 			o.AddChildren(t.trans(n.Else))
263 | 		}
264 | 
265 | 	case *ast.IncDecStmt:
266 | 		o.Type = IncDecStmt
267 | 		o.AddChildren(t.trans(n.X))
268 | 
269 | 	case *ast.IndexExpr:
270 | 		o.Type = IndexExpr
271 | 		o.AddChildren(t.trans(n.X), t.trans(n.Index))
272 | 
273 | 	case *ast.InterfaceType:
274 | 		o.Type = InterfaceType
275 | 		o.AddChildren(t.trans(n.Methods))
276 | 
277 | 	case *ast.KeyValueExpr:
278 | 		o.Type = KeyValueExpr
279 | 		o.AddChildren(t.trans(n.Key), t.trans(n.Value))
280 | 
281 | 	case *ast.LabeledStmt:
282 | 		o.Type = LabeledStmt
283 | 		o.AddChildren(t.trans(n.Label), t.trans(n.Stmt))
284 | 
285 | 	case *ast.MapType:
286 | 		o.Type = MapType
287 | 		o.AddChildren(t.trans(n.Key), t.trans(n.Value))
288 | 
289 | 	case *ast.ParenExpr:
290 | 		o.Type = ParenExpr
291 | 		o.AddChildren(t.trans(n.X))
292 | 
293 | 	case *ast.RangeStmt:
294 | 		o.Type = RangeStmt
295 | 		if n.Key != nil {
296 | 			o.AddChildren(t.trans(n.Key))
297 | 		}
298 | 		if n.Value != nil {
299 | 			o.AddChildren(t.trans(n.Value))
300 | 		}
301 | 		o.AddChildren(t.trans(n.X), t.trans(n.Body))
302 | 
303 | 	case *ast.ReturnStmt:
304 | 		o.Type = ReturnStmt
305 | 		for _, e := range n.Results {
306 | 			o.AddChildren(t.trans(e))
307 | 		}
308 | 
309 | 	case *ast.SelectStmt:
310 | 		o.Type = SelectStmt
311 | 		o.AddChildren(t.trans(n.Body))
312 | 
313 | 	case *ast.SelectorExpr:
314 | 		o.Type = SelectorExpr
315 | 		o.AddChildren(t.trans(n.X), t.trans(n.Sel))
316 | 
317 | 	case *ast.SendStmt:
318 | 		o.Type = SendStmt
319 | 		o.AddChildren(t.trans(n.Chan), t.trans(n.Value))
320 | 
321 | 	case *ast.SliceExpr:
322 | 		o.Type = SliceExpr
323 | 		o.AddChildren(t.trans(n.X))
324 | 		if n.Low != nil {
325 | 			o.AddChildren(t.trans(n.Low))
326 | 		}
327 | 		if n.High != nil {
328 | 			o.AddChildren(t.trans(n.High))
329 | 		}
330 | 		if n.Max != nil {
331 | 			o.AddChildren(t.trans(n.Max))
332 | 		}
333 | 
334 | 	case *ast.StarExpr:
335 | 		o.Type = StarExpr
336 | 		o.AddChildren(t.trans(n.X))
337 | 
338 | 	case *ast.StructType:
339 | 		o.Type = StructType
340 | 		o.AddChildren(t.trans(n.Fields))
341 | 
342 | 	case *ast.SwitchStmt:
343 | 		o.Type = SwitchStmt
344 | 		if n.Init != nil {
345 | 			o.AddChildren(t.trans(n.Init))
346 | 		}
347 | 		if n.Tag != nil {
348 | 			o.AddChildren(t.trans(n.Tag))
349 | 		}
350 | 		o.AddChildren(t.trans(n.Body))
351 | 
352 | 	case *ast.TypeAssertExpr:
353 | 		o.Type = TypeAssertExpr
354 | 		o.AddChildren(t.trans(n.X))
355 | 		if n.Type != nil {
356 | 			o.AddChildren(t.trans(n.Type))
357 | 		}
358 | 
359 | 	case *ast.TypeSpec:
360 | 		o.Type = TypeSpec
361 | 		o.AddChildren(t.trans(n.Name), t.trans(n.Type))
362 | 
363 | 	case *ast.TypeSwitchStmt:
364 | 		o.Type = TypeSwitchStmt
365 | 		if n.Init != nil {
366 | 			o.AddChildren(t.trans(n.Init))
367 | 		}
368 | 		o.AddChildren(t.trans(n.Assign), t.trans(n.Body))
369 | 
370 | 	case *ast.UnaryExpr:
371 | 		o.Type = UnaryExpr
372 | 		o.AddChildren(t.trans(n.X))
373 | 
374 | 	case *ast.ValueSpec:
375 | 		o.Type = ValueSpec
376 | 		for _, name := range n.Names {
377 | 			o.AddChildren(t.trans(name))
378 | 		}
379 | 		if n.Type != nil {
380 | 			o.AddChildren(t.trans(n.Type))
381 | 		}
382 | 		for _, val := range n.Values {
383 | 			o.AddChildren(t.trans(val))
384 | 		}
385 | 
386 | 	default:
387 | 		o.Type = BadNode
388 | 
389 | 	}
390 | 
391 | 	return o
392 | }
393 | 


--------------------------------------------------------------------------------
/syntax/syntax.go:
--------------------------------------------------------------------------------
  1 | package syntax
  2 | 
  3 | import (
  4 | 	"crypto/sha1"
  5 | 
  6 | 	"github.com/mibk/dupl/suffixtree"
  7 | )
  8 | 
  9 | type Node struct {
 10 | 	Type     int
 11 | 	Filename string
 12 | 	Pos, End int
 13 | 	Children []*Node
 14 | 	Owns     int
 15 | }
 16 | 
 17 | func NewNode() *Node {
 18 | 	return &Node{}
 19 | }
 20 | 
 21 | func (n *Node) AddChildren(children ...*Node) {
 22 | 	n.Children = append(n.Children, children...)
 23 | }
 24 | 
 25 | func (n *Node) Val() int {
 26 | 	return n.Type
 27 | }
 28 | 
 29 | type Match struct {
 30 | 	Hash  string
 31 | 	Frags [][]*Node
 32 | }
 33 | 
 34 | func Serialize(n *Node) []*Node {
 35 | 	stream := make([]*Node, 0, 10)
 36 | 	serial(n, &stream)
 37 | 	return stream
 38 | }
 39 | 
 40 | func serial(n *Node, stream *[]*Node) int {
 41 | 	*stream = append(*stream, n)
 42 | 	var count int
 43 | 	for _, child := range n.Children {
 44 | 		count += serial(child, stream)
 45 | 	}
 46 | 	n.Owns = count
 47 | 	return count + 1
 48 | }
 49 | 
 50 | // FindSyntaxUnits finds all complete syntax units in the match group and returns them
 51 | // with the corresponding hash.
 52 | func FindSyntaxUnits(data []*Node, m suffixtree.Match, threshold int) Match {
 53 | 	if len(m.Ps) == 0 {
 54 | 		return Match{}
 55 | 	}
 56 | 	firstSeq := data[m.Ps[0] : m.Ps[0]+m.Len]
 57 | 	indexes := getUnitsIndexes(firstSeq, threshold)
 58 | 
 59 | 	// TODO: is this really working?
 60 | 	indexCnt := len(indexes)
 61 | 	if indexCnt > 0 {
 62 | 		lasti := indexes[indexCnt-1]
 63 | 		firstn := firstSeq[lasti]
 64 | 		for i := 1; i < len(m.Ps); i++ {
 65 | 			n := data[int(m.Ps[i])+lasti]
 66 | 			if firstn.Owns != n.Owns {
 67 | 				indexes = indexes[:indexCnt-1]
 68 | 				break
 69 | 			}
 70 | 		}
 71 | 	}
 72 | 	if len(indexes) == 0 || isCyclic(indexes, firstSeq) || spansMultipleFiles(indexes, firstSeq) {
 73 | 		return Match{}
 74 | 	}
 75 | 
 76 | 	match := Match{Frags: make([][]*Node, len(m.Ps))}
 77 | 	for i, pos := range m.Ps {
 78 | 		match.Frags[i] = make([]*Node, len(indexes))
 79 | 		for j, index := range indexes {
 80 | 			match.Frags[i][j] = data[int(pos)+index]
 81 | 		}
 82 | 	}
 83 | 
 84 | 	lastIndex := indexes[len(indexes)-1]
 85 | 	match.Hash = hashSeq(firstSeq[indexes[0] : lastIndex+firstSeq[lastIndex].Owns])
 86 | 	return match
 87 | }
 88 | 
 89 | func getUnitsIndexes(nodeSeq []*Node, threshold int) []int {
 90 | 	var indexes []int
 91 | 	var split bool
 92 | 	for i := 0; i < len(nodeSeq); {
 93 | 		n := nodeSeq[i]
 94 | 		switch {
 95 | 		case n.Owns >= len(nodeSeq)-i:
 96 | 			// not complete syntax unit
 97 | 			i++
 98 | 			split = true
 99 | 			continue
100 | 		case n.Owns+1 < threshold:
101 | 			split = true
102 | 		default:
103 | 			if split {
104 | 				indexes = indexes[:0]
105 | 				split = false
106 | 			}
107 | 			indexes = append(indexes, i)
108 | 		}
109 | 		i += n.Owns + 1
110 | 	}
111 | 	return indexes
112 | }
113 | 
114 | // isCyclic finds out whether there is a repetive pattern in the found clone. If positive,
115 | // it return false to point out that the clone would be redundant.
116 | func isCyclic(indexes []int, nodes []*Node) bool {
117 | 	cnt := len(indexes)
118 | 	if cnt <= 1 {
119 | 		return false
120 | 	}
121 | 
122 | 	alts := make(map[int]bool)
123 | 	for i := 1; i <= cnt/2; i++ {
124 | 		if cnt%i == 0 {
125 | 			alts[i] = true
126 | 		}
127 | 	}
128 | 
129 | 	for i := 0; i < indexes[cnt/2]; i++ {
130 | 		nstart := nodes[i+indexes[0]]
131 | 	AltLoop:
132 | 		for alt := range alts {
133 | 			for j := alt; j < cnt; j += alt {
134 | 				index := i + indexes[j]
135 | 				if index < len(nodes) {
136 | 					nalt := nodes[index]
137 | 					if nstart.Owns == nalt.Owns && nstart.Type == nalt.Type {
138 | 						continue
139 | 					}
140 | 				} else if i >= indexes[alt] {
141 | 					return true
142 | 				}
143 | 				delete(alts, alt)
144 | 				continue AltLoop
145 | 			}
146 | 		}
147 | 		if len(alts) == 0 {
148 | 			return false
149 | 		}
150 | 	}
151 | 	return true
152 | }
153 | 
154 | func spansMultipleFiles(indexes []int, nodes []*Node) bool {
155 | 	if len(indexes) < 2 {
156 | 		return false
157 | 	}
158 | 	f := nodes[indexes[0]].Filename
159 | 	for i := 1; i < len(indexes); i++ {
160 | 		if nodes[indexes[i]].Filename != f {
161 | 			return true
162 | 		}
163 | 	}
164 | 	return false
165 | }
166 | 
167 | func hashSeq(nodes []*Node) string {
168 | 	h := sha1.New()
169 | 	bytes := make([]byte, len(nodes))
170 | 	for i, node := range nodes {
171 | 		bytes[i] = byte(node.Type)
172 | 	}
173 | 	h.Write(bytes)
174 | 	return string(h.Sum(nil))
175 | }
176 | 


--------------------------------------------------------------------------------
/syntax/syntax_test.go:
--------------------------------------------------------------------------------
  1 | package syntax
  2 | 
  3 | import "testing"
  4 | 
  5 | func TestSerialization(t *testing.T) {
  6 | 	n := genNodes(7)
  7 | 	n[0].AddChildren(n[1], n[2], n[3])
  8 | 	n[1].AddChildren(n[4], n[5])
  9 | 	n[2].AddChildren(n[6])
 10 | 	m := genNodes(6)
 11 | 	m[0].AddChildren(m[1], m[2], m[3], m[4], m[5])
 12 | 	testCases := []struct {
 13 | 		t        *Node
 14 | 		expected []int
 15 | 	}{
 16 | 		{n[0], []int{6, 2, 0, 0, 1, 0, 0}},
 17 | 		{m[0], []int{5, 0, 0, 0, 0, 0}},
 18 | 	}
 19 | 
 20 | 	for _, tc := range testCases {
 21 | 		compareSeries(t, Serialize(tc.t), tc.expected)
 22 | 	}
 23 | }
 24 | 
 25 | func genNodes(cnt int) []*Node {
 26 | 	nodes := make([]*Node, cnt)
 27 | 	for i := range nodes {
 28 | 		nodes[i] = NewNode()
 29 | 	}
 30 | 	return nodes
 31 | }
 32 | 
 33 | func compareSeries(t *testing.T, stream []*Node, owns []int) {
 34 | 	if len(stream) != len(owns) {
 35 | 		t.Errorf("series aren't the same length; got %d, want %d", len(stream), len(owns))
 36 | 		return
 37 | 	}
 38 | 	for i, item := range stream {
 39 | 		if item.Owns != owns[i] {
 40 | 			t.Errorf("got %d, want %d", item.Owns, owns[i])
 41 | 		}
 42 | 	}
 43 | }
 44 | 
 45 | func TestGetUnitsIndexes(t *testing.T) {
 46 | 	testCases := []struct {
 47 | 		seq       string
 48 | 		threshold int
 49 | 		expected  []int
 50 | 	}{
 51 | 		{"a8 a0 a2 a0 a0", 3, []int{2}},
 52 | 		{"a0 a8 a2 a0 a0", 1, []int{2}},
 53 | 		{"a3 a0 a0 a0 a1", 3, []int{0}},
 54 | 		{"a3 a0 a0 a0 a0", 1, []int{0, 4}},
 55 | 		{"a1 a0 a1 a0 a0", 2, []int{0, 2}},
 56 | 	}
 57 | 
 58 | Loop:
 59 | 	for _, tc := range testCases {
 60 | 		nodes := str2nodes(tc.seq)
 61 | 		indexes := getUnitsIndexes(nodes, tc.threshold)
 62 | 		for i := range tc.expected {
 63 | 			if i > len(indexes)-1 || tc.expected[i] != indexes[i] {
 64 | 				t.Errorf("for seq '%s', got %v, want %v", tc.seq, indexes, tc.expected)
 65 | 			}
 66 | 			continue Loop
 67 | 		}
 68 | 	}
 69 | }
 70 | 
 71 | func TestCyclicDupl(t *testing.T) {
 72 | 	testCases := []struct {
 73 | 		seq      string
 74 | 		indexes  []int
 75 | 		expected bool
 76 | 	}{
 77 | 		{"a1 b0 a2 b0", []int{0, 2}, false},
 78 | 		{"a1 b0 a1 b0", []int{0, 2}, true},
 79 | 		{"a0 a0", []int{0, 1}, true},
 80 | 		{"a1 b0 c1 b0 a1 b0 c1 b0", []int{0, 2, 4, 6}, true},
 81 | 		{"a1 b0 c1 b0 a1 b0", []int{0, 2, 4}, false},
 82 | 		{"a0 b0 a0 c0", []int{0, 1, 2, 3}, false},
 83 | 		{"a0 b0 a0 b0 a0", []int{0, 1, 2}, false},
 84 | 		{"a1 b0 a1 b0 c1 b0", []int{0, 2, 4}, false},
 85 | 		{"a1 a1 a1 a1 a1 a1", []int{0, 4}, false},
 86 | 		{"a2 b0 b0 a2 b0 b0 a2 b0 b0 a2 b0 b0 a2 b0 b0", []int{0, 3, 6, 9, 12}, true},
 87 | 	}
 88 | 
 89 | 	for _, tc := range testCases {
 90 | 		nodes := str2nodes(tc.seq)
 91 | 		if tc.expected != isCyclic(tc.indexes, nodes) {
 92 | 			t.Errorf("for seq '%s', indexes %v, got %t, want %t", tc.seq, tc.indexes, !tc.expected, tc.expected)
 93 | 		}
 94 | 	}
 95 | }
 96 | 
 97 | // str2nodes converts strint to a sequence of *Node by following principle:
 98 | //   - node is represented by 2 characters
 99 | //   - first character is node type
100 | //   - second character is the number for Node.Owns.
101 | func str2nodes(str string) []*Node {
102 | 	chars := []rune(str)
103 | 	nodes := make([]*Node, (len(chars)+1)/3)
104 | 	for i := 0; i < len(chars)-1; i += 3 {
105 | 		nodes[i/3] = &Node{Type: int(chars[i]), Owns: int(chars[i+1] - '0')}
106 | 	}
107 | 	return nodes
108 | }
109 | 


--------------------------------------------------------------------------------