├── .travis.yml ├── LICENSE ├── README.md ├── _output_example └── docker.html ├── go.mod ├── job ├── buildtree.go └── parse.go ├── main.go ├── printer ├── html.go ├── html_test.go ├── plumbing.go ├── printer.go └── text.go ├── suffixtree ├── dupl.go ├── dupl_test.go ├── suffixtree.go └── suffixtree_test.go └── syntax ├── golang └── golang.go ├── syntax.go └── syntax_test.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | go: 3 | - 1.14 4 | - 1.15 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2015 Michal Bohuslávek 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # dupl [](https://travis-ci.org/mibk/dupl) 2 | 3 | **dupl** is a tool written in Go for finding code clones. So far it can find clones only 4 | in the Go source files. The method uses suffix tree for serialized ASTs. It ignores values 5 | of AST nodes. It just operates with their types (e.g. `if a == 13 {}` and `if x == 100 {}` are 6 | considered the same provided it exceeds the minimal token sequence size). 7 | 8 | Due to the used method dupl can report so called "false positives" on the output. These are 9 | the ones we do not consider clones (whether they are too small, or the values of the matched 10 | tokens are completely different). 11 | 12 | ## Installation 13 | 14 | ```bash 15 | go get -u github.com/mibk/dupl 16 | ``` 17 | 18 | ## Usage 19 | 20 | ``` 21 | Usage of dupl: 22 | dupl [flags] [paths] 23 | 24 | Paths: 25 | If the given path is a file, dupl will use it regardless of 26 | the file extension. If it is a directory it will recursively 27 | search for *.go files in that directory. 28 | 29 | If no path is given dupl will recursively search for *.go 30 | files in the current directory. 31 | 32 | Flags: 33 | -files 34 | read file names from stdin one at each line 35 | -html 36 | output the results as HTML, including duplicate code fragments 37 | -plumbing 38 | plumbing (easy-to-parse) output for consumption by scripts or tools 39 | -t, -threshold size 40 | minimum token sequence size as a clone (default 15) 41 | -vendor 42 | check files in vendor directory 43 | -v, -verbose 44 | explain what is being done 45 | 46 | Examples: 47 | dupl -t 100 48 | Search clones in the current directory of size at least 49 | 100 tokens. 50 | dupl $(find app/ -name '*_test.go') 51 | Search for clones in tests in the app directory. 52 | find app/ -name '*_test.go' |dupl -files 53 | The same as above. 54 | ``` 55 | 56 | ## Example 57 | 58 | The reduced output of this command with the following parameters for the [Docker](https://www.docker.com) source code 59 | looks like [this](http://htmlpreview.github.io/?https://github.com/mibk/dupl/blob/master/_output_example/docker.html). 60 | 61 | ```bash 62 | $ dupl -t 200 -html >docker.html 63 | ``` 64 | -------------------------------------------------------------------------------- /_output_example/docker.html: -------------------------------------------------------------------------------- 1 | 2 | 3 |
for i, toggle := range []bool{true, false, true} { 14 | wb, err := (&icmpMessage{ 15 | Type: ipv6.ICMPTypeEchoRequest, Code: 0, 16 | Body: &icmpEcho{ 17 | ID: os.Getpid() & 0xffff, Seq: i + 1, 18 | Data: []byte("HELLO-R-U-THERE"), 19 | }, 20 | }).Marshal() 21 | if err != nil { 22 | t.Fatalf("icmpMessage.Marshal failed: %v", err) 23 | } 24 | if err := p.SetControlMessage(cf, toggle); err != nil { 25 | t.Fatalf("ipv6.PacketConn.SetControlMessage failed: %v", err) 26 | } 27 | cm.HopLimit = i + 1 28 | if _, err := p.WriteTo(wb, &cm, dst); err != nil { 29 | t.Fatalf("ipv6.PacketConn.WriteTo failed: %v", err) 30 | } 31 | b := make([]byte, 128) 32 | if n, cm, _, err := p.ReadFrom(b); err != nil { 33 | t.Fatalf("ipv6.PacketConn.ReadFrom failed: %v", err) 34 | } else { 35 | t.Logf("rcvd cmsg: %v", cm) 36 | if m, err := parseICMPMessage(b[:n]); err != nil { 37 | t.Fatalf("parseICMPMessage failed: %v", err) 38 | } else if m.Type != ipv6.ICMPTypeEchoReply || m.Code != 0 { 39 | t.Fatalf("got type=%v, code=%v; expected type=%v, code=%v", m.Type, m.Code, ipv6.ICMPTypeEchoReply, 0) 40 | } 41 | } 42 | }43 |
for i, toggle := range []bool{true, false, true} { 45 | wb, err := (&icmpMessage{ 46 | Type: ipv6.ICMPTypeEchoRequest, Code: 0, 47 | Body: &icmpEcho{ 48 | ID: os.Getpid() & 0xffff, Seq: i + 1, 49 | Data: []byte("HELLO-R-U-THERE"), 50 | }, 51 | }).Marshal() 52 | if err != nil { 53 | t.Fatalf("icmpMessage.Marshal failed: %v", err) 54 | } 55 | if err := p.SetControlMessage(cf, toggle); err != nil { 56 | t.Fatalf("ipv6.PacketConn.SetControlMessage failed: %v", err) 57 | } 58 | cm.HopLimit = i + 1 59 | if _, err := p.WriteTo(wb, &cm, dst); err != nil { 60 | t.Fatalf("ipv6.PacketConn.WriteTo failed: %v", err) 61 | } 62 | b := make([]byte, 128) 63 | if n, cm, _, err := p.ReadFrom(b); err != nil { 64 | t.Fatalf("ipv6.PacketConn.ReadFrom failed: %v", err) 65 | } else { 66 | t.Logf("rcvd cmsg: %v", cm) 67 | if m, err := parseICMPMessage(b[:n]); err != nil { 68 | t.Fatalf("parseICMPMessage failed: %v", err) 69 | } else if m.Type != ipv6.ICMPTypeEchoReply || m.Code != 0 { 70 | t.Fatalf("got type=%v, code=%v; expected type=%v, code=%v", m.Type, m.Code, ipv6.ICMPTypeEchoReply, 0) 71 | } 72 | } 73 | }74 |
func TestHybiClientHandshake(t *testing.T) { 77 | b := bytes.NewBuffer([]byte{}) 78 | bw := bufio.NewWriter(b) 79 | br := bufio.NewReader(strings.NewReader(`HTTP/1.1 101 Switching Protocols 80 | Upgrade: websocket 81 | Connection: Upgrade 82 | Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo= 83 | Sec-WebSocket-Protocol: chat 84 | 85 | `)) 86 | var err error 87 | config := new(Config) 88 | config.Location, err = url.ParseRequestURI("ws://server.example.com/chat") 89 | if err != nil { 90 | t.Fatal("location url", err) 91 | } 92 | config.Origin, err = url.ParseRequestURI("http://example.com") 93 | if err != nil { 94 | t.Fatal("origin url", err) 95 | } 96 | config.Protocol = append(config.Protocol, "chat") 97 | config.Protocol = append(config.Protocol, "superchat") 98 | config.Version = ProtocolVersionHybi13 99 | 100 | config.handshakeData = map[string]string{ 101 | "key": "dGhlIHNhbXBsZSBub25jZQ==", 102 | } 103 | err = hybiClientHandshake(config, br, bw) 104 | if err != nil { 105 | t.Errorf("handshake failed: %v", err) 106 | } 107 | req, err := http.ReadRequest(bufio.NewReader(b)) 108 | if err != nil { 109 | t.Fatalf("read request: %v", err) 110 | } 111 | if req.Method != "GET" { 112 | t.Errorf("request method expected GET, but got %q", req.Method) 113 | } 114 | if req.URL.Path != "/chat" { 115 | t.Errorf("request path expected /chat, but got %q", req.URL.Path) 116 | } 117 | if req.Proto != "HTTP/1.1" { 118 | t.Errorf("request proto expected HTTP/1.1, but got %q", req.Proto) 119 | } 120 | if req.Host != "server.example.com" { 121 | t.Errorf("request Host expected server.example.com, but got %v", req.Host) 122 | } 123 | var expectedHeader = map[string]string{ 124 | "Connection": "Upgrade", 125 | "Upgrade": "websocket", 126 | "Sec-Websocket-Key": config.handshakeData["key"], 127 | "Origin": config.Origin.String(), 128 | "Sec-Websocket-Protocol": "chat, superchat", 129 | "Sec-Websocket-Version": fmt.Sprintf("%d", ProtocolVersionHybi13), 130 | } 131 | for k, v := range expectedHeader { 132 | if req.Header.Get(k) != v { 133 | t.Errorf(fmt.Sprintf("%s expected %q but got %q", k, v, req.Header.Get(k))) 134 | } 135 | } 136 | }137 |
func TestHybiClientHandshakeHybi08(t *testing.T) { 139 | b := bytes.NewBuffer([]byte{}) 140 | bw := bufio.NewWriter(b) 141 | br := bufio.NewReader(strings.NewReader(`HTTP/1.1 101 Switching Protocols 142 | Upgrade: websocket 143 | Connection: Upgrade 144 | Sec-WebSocket-Accept: s3pPLMBiTxaQ9kYGzzhZRbK+xOo= 145 | Sec-WebSocket-Protocol: chat 146 | 147 | `)) 148 | var err error 149 | config := new(Config) 150 | config.Location, err = url.ParseRequestURI("ws://server.example.com/chat") 151 | if err != nil { 152 | t.Fatal("location url", err) 153 | } 154 | config.Origin, err = url.ParseRequestURI("http://example.com") 155 | if err != nil { 156 | t.Fatal("origin url", err) 157 | } 158 | config.Protocol = append(config.Protocol, "chat") 159 | config.Protocol = append(config.Protocol, "superchat") 160 | config.Version = ProtocolVersionHybi08 161 | 162 | config.handshakeData = map[string]string{ 163 | "key": "dGhlIHNhbXBsZSBub25jZQ==", 164 | } 165 | err = hybiClientHandshake(config, br, bw) 166 | if err != nil { 167 | t.Errorf("handshake failed: %v", err) 168 | } 169 | req, err := http.ReadRequest(bufio.NewReader(b)) 170 | if err != nil { 171 | t.Fatalf("read request: %v", err) 172 | } 173 | if req.Method != "GET" { 174 | t.Errorf("request method expected GET, but got %q", req.Method) 175 | } 176 | if req.URL.Path != "/chat" { 177 | t.Errorf("request path expected /demo, but got %q", req.URL.Path) 178 | } 179 | if req.Proto != "HTTP/1.1" { 180 | t.Errorf("request proto expected HTTP/1.1, but got %q", req.Proto) 181 | } 182 | if req.Host != "server.example.com" { 183 | t.Errorf("request Host expected example.com, but got %v", req.Host) 184 | } 185 | var expectedHeader = map[string]string{ 186 | "Connection": "Upgrade", 187 | "Upgrade": "websocket", 188 | "Sec-Websocket-Key": config.handshakeData["key"], 189 | "Sec-Websocket-Origin": config.Origin.String(), 190 | "Sec-Websocket-Protocol": "chat, superchat", 191 | "Sec-Websocket-Version": fmt.Sprintf("%d", ProtocolVersionHybi08), 192 | } 193 | for k, v := range expectedHeader { 194 | if req.Header.Get(k) != v { 195 | t.Errorf(fmt.Sprintf("%s expected %q but got %q", k, v, req.Header.Get(k))) 196 | } 197 | } 198 | }199 |
func (icp *icmpv4Parameters) escape() []canonICMPv4ParamRecord { 202 | id := -1 203 | for i, r := range icp.Registries { 204 | if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") { 205 | id = i 206 | break 207 | } 208 | } 209 | if id < 0 { 210 | return nil 211 | } 212 | prs := make([]canonICMPv4ParamRecord, len(icp.Registries[id].Records)) 213 | sr := strings.NewReplacer( 214 | "Messages", "", 215 | "Message", "", 216 | "ICMP", "", 217 | "+", "P", 218 | "-", "", 219 | "/", "", 220 | ".", "", 221 | " ", "", 222 | ) 223 | for i, pr := range icp.Registries[id].Records { 224 | if strings.Contains(pr.Descr, "Reserved") || 225 | strings.Contains(pr.Descr, "Unassigned") || 226 | strings.Contains(pr.Descr, "Deprecated") || 227 | strings.Contains(pr.Descr, "Experiment") || 228 | strings.Contains(pr.Descr, "experiment") { 229 | continue 230 | } 231 | ss := strings.Split(pr.Descr, "\n") 232 | if len(ss) > 1 { 233 | prs[i].Descr = strings.Join(ss, " ") 234 | } else { 235 | prs[i].Descr = ss[0] 236 | } 237 | s := strings.TrimSpace(prs[i].Descr) 238 | prs[i].OrigDescr = s 239 | prs[i].Descr = sr.Replace(s) 240 | prs[i].Value, _ = strconv.Atoi(pr.Value) 241 | } 242 | return prs 243 | }244 |
func (icp *icmpv6Parameters) escape() []canonICMPv6ParamRecord { 246 | id := -1 247 | for i, r := range icp.Registries { 248 | if strings.Contains(r.Title, "Type") || strings.Contains(r.Title, "type") { 249 | id = i 250 | break 251 | } 252 | } 253 | if id < 0 { 254 | return nil 255 | } 256 | prs := make([]canonICMPv6ParamRecord, len(icp.Registries[id].Records)) 257 | sr := strings.NewReplacer( 258 | "Messages", "", 259 | "Message", "", 260 | "ICMP", "", 261 | "+", "P", 262 | "-", "", 263 | "/", "", 264 | ".", "", 265 | " ", "", 266 | ) 267 | for i, pr := range icp.Registries[id].Records { 268 | if strings.Contains(pr.Name, "Reserved") || 269 | strings.Contains(pr.Name, "Unassigned") || 270 | strings.Contains(pr.Name, "Deprecated") || 271 | strings.Contains(pr.Name, "Experiment") || 272 | strings.Contains(pr.Name, "experiment") { 273 | continue 274 | } 275 | ss := strings.Split(pr.Name, "\n") 276 | if len(ss) > 1 { 277 | prs[i].Name = strings.Join(ss, " ") 278 | } else { 279 | prs[i].Name = ss[0] 280 | } 281 | s := strings.TrimSpace(prs[i].Name) 282 | prs[i].OrigName = s 283 | prs[i].Name = sr.Replace(s) 284 | prs[i].Value, _ = strconv.Atoi(pr.Value) 285 | } 286 | return prs 287 | }288 |
package term 291 | 292 | import ( 293 | "syscall" 294 | "unsafe" 295 | ) 296 | 297 | const ( 298 | getTermios = syscall.TIOCGETA 299 | setTermios = syscall.TIOCSETA 300 | 301 | IGNBRK = syscall.IGNBRK 302 | PARMRK = syscall.PARMRK 303 | INLCR = syscall.INLCR 304 | IGNCR = syscall.IGNCR 305 | ECHONL = syscall.ECHONL 306 | CSIZE = syscall.CSIZE 307 | ICRNL = syscall.ICRNL 308 | ISTRIP = syscall.ISTRIP 309 | PARENB = syscall.PARENB 310 | ECHO = syscall.ECHO 311 | ICANON = syscall.ICANON 312 | ISIG = syscall.ISIG 313 | IXON = syscall.IXON 314 | BRKINT = syscall.BRKINT 315 | INPCK = syscall.INPCK 316 | OPOST = syscall.OPOST 317 | CS8 = syscall.CS8 318 | IEXTEN = syscall.IEXTEN 319 | ) 320 | 321 | type Termios struct { 322 | Iflag uint64 323 | Oflag uint64 324 | Cflag uint64 325 | Lflag uint64 326 | Cc [20]byte 327 | Ispeed uint64 328 | Ospeed uint64 329 | } 330 | 331 | // MakeRaw put the terminal connected to the given file descriptor into raw 332 | // mode and returns the previous state of the terminal so that it can be 333 | // restored. 334 | func MakeRaw(fd uintptr) (*State, error) { 335 | var oldState State 336 | if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 { 337 | return nil, err 338 | } 339 | 340 | newState := oldState.termios 341 | newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON) 342 | newState.Oflag &^= OPOST 343 | newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN) 344 | newState.Cflag &^= (CSIZE | PARENB) 345 | newState.Cflag |= CS8 346 | newState.Cc[syscall.VMIN] = 1 347 | newState.Cc[syscall.VTIME] = 0 348 | 349 | if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 { 350 | return nil, err 351 | } 352 | 353 | return &oldState, nil 354 | }355 |
package term 357 | 358 | import ( 359 | "syscall" 360 | "unsafe" 361 | ) 362 | 363 | const ( 364 | getTermios = syscall.TIOCGETA 365 | setTermios = syscall.TIOCSETA 366 | 367 | IGNBRK = syscall.IGNBRK 368 | PARMRK = syscall.PARMRK 369 | INLCR = syscall.INLCR 370 | IGNCR = syscall.IGNCR 371 | ECHONL = syscall.ECHONL 372 | CSIZE = syscall.CSIZE 373 | ICRNL = syscall.ICRNL 374 | ISTRIP = syscall.ISTRIP 375 | PARENB = syscall.PARENB 376 | ECHO = syscall.ECHO 377 | ICANON = syscall.ICANON 378 | ISIG = syscall.ISIG 379 | IXON = syscall.IXON 380 | BRKINT = syscall.BRKINT 381 | INPCK = syscall.INPCK 382 | OPOST = syscall.OPOST 383 | CS8 = syscall.CS8 384 | IEXTEN = syscall.IEXTEN 385 | ) 386 | 387 | type Termios struct { 388 | Iflag uint32 389 | Oflag uint32 390 | Cflag uint32 391 | Lflag uint32 392 | Cc [20]byte 393 | Ispeed uint32 394 | Ospeed uint32 395 | } 396 | 397 | // MakeRaw put the terminal connected to the given file descriptor into raw 398 | // mode and returns the previous state of the terminal so that it can be 399 | // restored. 400 | func MakeRaw(fd uintptr) (*State, error) { 401 | var oldState State 402 | if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(getTermios), uintptr(unsafe.Pointer(&oldState.termios))); err != 0 { 403 | return nil, err 404 | } 405 | 406 | newState := oldState.termios 407 | newState.Iflag &^= (IGNBRK | BRKINT | PARMRK | ISTRIP | INLCR | IGNCR | ICRNL | IXON) 408 | newState.Oflag &^= OPOST 409 | newState.Lflag &^= (ECHO | ECHONL | ICANON | ISIG | IEXTEN) 410 | newState.Cflag &^= (CSIZE | PARENB) 411 | newState.Cflag |= CS8 412 | newState.Cc[syscall.VMIN] = 1 413 | newState.Cc[syscall.VTIME] = 0 414 | 415 | if _, _, err := syscall.Syscall(syscall.SYS_IOCTL, fd, uintptr(setTermios), uintptr(unsafe.Pointer(&newState))); err != 0 { 416 | return nil, err 417 | } 418 | 419 | return &oldState, nil 420 | }421 |
package main 424 | 425 | import ( 426 | "bytes" 427 | "encoding/xml" 428 | "fmt" 429 | "go/format" 430 | "io" 431 | "net/http" 432 | "os" 433 | "strconv" 434 | "strings" 435 | ) 436 | 437 | var registries = []struct { 438 | url string 439 | parse func(io.Writer, io.Reader) error 440 | }{ 441 | { 442 | "http://www.iana.org/assignments/dscp-registry/dscp-registry.xml", 443 | parseDSCPRegistry, 444 | }, 445 | { 446 | "http://www.iana.org/assignments/ipv4-tos-byte/ipv4-tos-byte.xml", 447 | parseTOSTCByte, 448 | }, 449 | } 450 | 451 | func main() { 452 | var bb bytes.Buffer 453 | fmt.Fprintf(&bb, "// go run gentv.go\n") 454 | fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n") 455 | fmt.Fprintf(&bb, "package ipv4_test\n\n") 456 | for _, r := range registries { 457 | resp, err := http.Get(r.url) 458 | if err != nil { 459 | fmt.Fprintln(os.Stderr, err) 460 | os.Exit(1) 461 | } 462 | defer resp.Body.Close() 463 | if resp.StatusCode != http.StatusOK { 464 | fmt.Fprintf(os.Stderr, "got HTTP status code %v for %v\n", resp.StatusCode, r.url) 465 | os.Exit(1) 466 | } 467 | if err := r.parse(&bb, resp.Body); err != nil { 468 | fmt.Fprintln(os.Stderr, err) 469 | os.Exit(1) 470 | } 471 | fmt.Fprintf(&bb, "\n") 472 | } 473 | b, err := format.Source(bb.Bytes()) 474 | if err != nil { 475 | fmt.Fprintln(os.Stderr, err) 476 | os.Exit(1) 477 | } 478 | os.Stdout.Write(b) 479 | } 480 | 481 | func parseDSCPRegistry(w io.Writer, r io.Reader) error { 482 | dec := xml.NewDecoder(r) 483 | var dr dscpRegistry 484 | if err := dec.Decode(&dr); err != nil { 485 | return err 486 | } 487 | drs := dr.escape() 488 | fmt.Fprintf(w, "// %s, Updated: %s\n", dr.Title, dr.Updated) 489 | fmt.Fprintf(w, "const (\n") 490 | for _, dr := range drs { 491 | fmt.Fprintf(w, "DiffServ%s = %#x", dr.Name, dr.Value) 492 | fmt.Fprintf(w, "// %s\n", dr.OrigName) 493 | } 494 | fmt.Fprintf(w, ")\n") 495 | return nil 496 | } 497 | 498 | type dscpRegistry struct { 499 | XMLName xml.Name `xml:"registry"` 500 | Title string `xml:"title"` 501 | Updated string `xml:"updated"` 502 | Note string `xml:"note"` 503 | RegTitle string `xml:"registry>title"` 504 | PoolRecords []dscpRecord `xml:"registry>record"` 505 | Records []dscpRecord `xml:"registry>registry>record"` 506 | } 507 | 508 | type dscpRecord struct { 509 | Name string `xml:"name"` 510 | Space string `xml:"space"` 511 | } 512 | 513 | type canonDSCPRecord struct { 514 | OrigName string 515 | Name string 516 | Value int 517 | } 518 | 519 | func (drr *dscpRegistry) escape() []canonDSCPRecord { 520 | drs := make([]canonDSCPRecord, len(drr.Records)) 521 | sr := strings.NewReplacer( 522 | "+", "", 523 | "-", "", 524 | "/", "", 525 | ".", "", 526 | " ", "", 527 | ) 528 | for i, dr := range drr.Records { 529 | s := strings.TrimSpace(dr.Name) 530 | drs[i].OrigName = s 531 | drs[i].Name = sr.Replace(s) 532 | n, err := strconv.ParseUint(dr.Space, 2, 8) 533 | if err != nil { 534 | continue 535 | } 536 | drs[i].Value = int(n) << 2 537 | } 538 | return drs 539 | } 540 | 541 | func parseTOSTCByte(w io.Writer, r io.Reader) error { 542 | dec := xml.NewDecoder(r) 543 | var ttb tosTCByte 544 | if err := dec.Decode(&ttb); err != nil { 545 | return err 546 | } 547 | trs := ttb.escape() 548 | fmt.Fprintf(w, "// %s, Updated: %s\n", ttb.Title, ttb.Updated) 549 | fmt.Fprintf(w, "const (\n") 550 | for _, tr := range trs { 551 | fmt.Fprintf(w, "%s = %#x", tr.Keyword, tr.Value) 552 | fmt.Fprintf(w, "// %s\n", tr.OrigKeyword) 553 | } 554 | fmt.Fprintf(w, ")\n") 555 | return nil 556 | } 557 | 558 | type tosTCByte struct { 559 | XMLName xml.Name `xml:"registry"` 560 | Title string `xml:"title"` 561 | Updated string `xml:"updated"` 562 | Note string `xml:"note"` 563 | RegTitle string `xml:"registry>title"` 564 | Records []tosTCByteRecord `xml:"registry>record"` 565 | } 566 | 567 | type tosTCByteRecord struct { 568 | Binary string `xml:"binary"` 569 | Keyword string `xml:"keyword"` 570 | } 571 | 572 | type canonTOSTCByteRecord struct { 573 | OrigKeyword string 574 | Keyword string 575 | Value int 576 | } 577 | 578 | func (ttb *tosTCByte) escape() []canonTOSTCByteRecord { 579 | trs := make([]canonTOSTCByteRecord, len(ttb.Records)) 580 | sr := strings.NewReplacer( 581 | "Capable", "", 582 | "(", "", 583 | ")", "", 584 | "+", "", 585 | "-", "", 586 | "/", "", 587 | ".", "", 588 | " ", "", 589 | ) 590 | for i, tr := range ttb.Records { 591 | s := strings.TrimSpace(tr.Keyword) 592 | trs[i].OrigKeyword = s 593 | ss := strings.Split(s, " ") 594 | if len(ss) > 1 { 595 | trs[i].Keyword = strings.Join(ss[1:], " ") 596 | } else { 597 | trs[i].Keyword = ss[0] 598 | } 599 | trs[i].Keyword = sr.Replace(trs[i].Keyword) 600 | n, err := strconv.ParseUint(tr.Binary, 2, 8) 601 | if err != nil { 602 | continue 603 | } 604 | trs[i].Value = int(n) 605 | } 606 | return trs 607 | }608 |
package main 610 | 611 | import ( 612 | "bytes" 613 | "encoding/xml" 614 | "fmt" 615 | "go/format" 616 | "io" 617 | "net/http" 618 | "os" 619 | "strconv" 620 | "strings" 621 | ) 622 | 623 | var registries = []struct { 624 | url string 625 | parse func(io.Writer, io.Reader) error 626 | }{ 627 | { 628 | "http://www.iana.org/assignments/dscp-registry/dscp-registry.xml", 629 | parseDSCPRegistry, 630 | }, 631 | { 632 | "http://www.iana.org/assignments/ipv4-tos-byte/ipv4-tos-byte.xml", 633 | parseTOSTCByte, 634 | }, 635 | } 636 | 637 | func main() { 638 | var bb bytes.Buffer 639 | fmt.Fprintf(&bb, "// go run gentv.go\n") 640 | fmt.Fprintf(&bb, "// GENERATED BY THE COMMAND ABOVE; DO NOT EDIT\n\n") 641 | fmt.Fprintf(&bb, "package ipv6_test\n\n") 642 | for _, r := range registries { 643 | resp, err := http.Get(r.url) 644 | if err != nil { 645 | fmt.Fprintln(os.Stderr, err) 646 | os.Exit(1) 647 | } 648 | defer resp.Body.Close() 649 | if resp.StatusCode != http.StatusOK { 650 | fmt.Fprintf(os.Stderr, "got HTTP status code %v for %v\n", resp.StatusCode, r.url) 651 | os.Exit(1) 652 | } 653 | if err := r.parse(&bb, resp.Body); err != nil { 654 | fmt.Fprintln(os.Stderr, err) 655 | os.Exit(1) 656 | } 657 | fmt.Fprintf(&bb, "\n") 658 | } 659 | b, err := format.Source(bb.Bytes()) 660 | if err != nil { 661 | fmt.Fprintln(os.Stderr, err) 662 | os.Exit(1) 663 | } 664 | os.Stdout.Write(b) 665 | } 666 | 667 | func parseDSCPRegistry(w io.Writer, r io.Reader) error { 668 | dec := xml.NewDecoder(r) 669 | var dr dscpRegistry 670 | if err := dec.Decode(&dr); err != nil { 671 | return err 672 | } 673 | drs := dr.escape() 674 | fmt.Fprintf(w, "// %s, Updated: %s\n", dr.Title, dr.Updated) 675 | fmt.Fprintf(w, "const (\n") 676 | for _, dr := range drs { 677 | fmt.Fprintf(w, "DiffServ%s = %#x", dr.Name, dr.Value) 678 | fmt.Fprintf(w, "// %s\n", dr.OrigName) 679 | } 680 | fmt.Fprintf(w, ")\n") 681 | return nil 682 | } 683 | 684 | type dscpRegistry struct { 685 | XMLName xml.Name `xml:"registry"` 686 | Title string `xml:"title"` 687 | Updated string `xml:"updated"` 688 | Note string `xml:"note"` 689 | RegTitle string `xml:"registry>title"` 690 | PoolRecords []dscpRecord `xml:"registry>record"` 691 | Records []dscpRecord `xml:"registry>registry>record"` 692 | } 693 | 694 | type dscpRecord struct { 695 | Name string `xml:"name"` 696 | Space string `xml:"space"` 697 | } 698 | 699 | type canonDSCPRecord struct { 700 | OrigName string 701 | Name string 702 | Value int 703 | } 704 | 705 | func (drr *dscpRegistry) escape() []canonDSCPRecord { 706 | drs := make([]canonDSCPRecord, len(drr.Records)) 707 | sr := strings.NewReplacer( 708 | "+", "", 709 | "-", "", 710 | "/", "", 711 | ".", "", 712 | " ", "", 713 | ) 714 | for i, dr := range drr.Records { 715 | s := strings.TrimSpace(dr.Name) 716 | drs[i].OrigName = s 717 | drs[i].Name = sr.Replace(s) 718 | n, err := strconv.ParseUint(dr.Space, 2, 8) 719 | if err != nil { 720 | continue 721 | } 722 | drs[i].Value = int(n) << 2 723 | } 724 | return drs 725 | } 726 | 727 | func parseTOSTCByte(w io.Writer, r io.Reader) error { 728 | dec := xml.NewDecoder(r) 729 | var ttb tosTCByte 730 | if err := dec.Decode(&ttb); err != nil { 731 | return err 732 | } 733 | trs := ttb.escape() 734 | fmt.Fprintf(w, "// %s, Updated: %s\n", ttb.Title, ttb.Updated) 735 | fmt.Fprintf(w, "const (\n") 736 | for _, tr := range trs { 737 | fmt.Fprintf(w, "%s = %#x", tr.Keyword, tr.Value) 738 | fmt.Fprintf(w, "// %s\n", tr.OrigKeyword) 739 | } 740 | fmt.Fprintf(w, ")\n") 741 | return nil 742 | } 743 | 744 | type tosTCByte struct { 745 | XMLName xml.Name `xml:"registry"` 746 | Title string `xml:"title"` 747 | Updated string `xml:"updated"` 748 | Note string `xml:"note"` 749 | RegTitle string `xml:"registry>title"` 750 | Records []tosTCByteRecord `xml:"registry>record"` 751 | } 752 | 753 | type tosTCByteRecord struct { 754 | Binary string `xml:"binary"` 755 | Keyword string `xml:"keyword"` 756 | } 757 | 758 | type canonTOSTCByteRecord struct { 759 | OrigKeyword string 760 | Keyword string 761 | Value int 762 | } 763 | 764 | func (ttb *tosTCByte) escape() []canonTOSTCByteRecord { 765 | trs := make([]canonTOSTCByteRecord, len(ttb.Records)) 766 | sr := strings.NewReplacer( 767 | "Capable", "", 768 | "(", "", 769 | ")", "", 770 | "+", "", 771 | "-", "", 772 | "/", "", 773 | ".", "", 774 | " ", "", 775 | ) 776 | for i, tr := range ttb.Records { 777 | s := strings.TrimSpace(tr.Keyword) 778 | trs[i].OrigKeyword = s 779 | ss := strings.Split(s, " ") 780 | if len(ss) > 1 { 781 | trs[i].Keyword = strings.Join(ss[1:], " ") 782 | } else { 783 | trs[i].Keyword = ss[0] 784 | } 785 | trs[i].Keyword = sr.Replace(trs[i].Keyword) 786 | n, err := strconv.ParseUint(tr.Binary, 2, 8) 787 | if err != nil { 788 | continue 789 | } 790 | trs[i].Value = int(n) 791 | } 792 | return trs 793 | }794 |
{ 797 | const hex = "0123456789abcdef" 798 | 799 | buf.WriteByte('"') 800 | start := 0 801 | for i := 0; i < len(s); { 802 | if b := s[i]; b < utf8.RuneSelf { 803 | if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' { 804 | i++ 805 | continue 806 | } 807 | if start < i { 808 | buf.WriteString(s[start:i]) 809 | } 810 | switch b { 811 | case '\\', '"': 812 | buf.WriteByte('\\') 813 | buf.WriteByte(b) 814 | case '\n': 815 | buf.WriteByte('\\') 816 | buf.WriteByte('n') 817 | case '\r': 818 | buf.WriteByte('\\') 819 | buf.WriteByte('r') 820 | default: 821 | 822 | buf.WriteString(`\u00`) 823 | buf.WriteByte(hex[b>>4]) 824 | buf.WriteByte(hex[b&0xF]) 825 | } 826 | i++ 827 | start = i 828 | continue 829 | } 830 | c, size := utf8.DecodeRuneInString(s[i:]) 831 | if c == utf8.RuneError && size == 1 { 832 | if start < i { 833 | buf.WriteString(s[start:i]) 834 | } 835 | buf.WriteString(`\ufffd`) 836 | i += size 837 | start = i 838 | continue 839 | } 840 | 841 | if c == '\u2028' || c == '\u2029' { 842 | if start < i { 843 | buf.WriteString(s[start:i]) 844 | } 845 | buf.WriteString(`\u202`) 846 | buf.WriteByte(hex[c&0xF]) 847 | i += size 848 | start = i 849 | continue 850 | } 851 | i += size 852 | } 853 | if start < len(s) { 854 | buf.WriteString(s[start:]) 855 | } 856 | buf.WriteByte('"') 857 | }858 |
{ 860 | const hex = "0123456789abcdef" 861 | 862 | buf.WriteByte('"') 863 | start := 0 864 | for i := 0; i < len(s); { 865 | if b := s[i]; b < utf8.RuneSelf { 866 | if 0x20 <= b && b != '\\' && b != '"' && b != '<' && b != '>' && b != '&' { 867 | i++ 868 | continue 869 | } 870 | if start < i { 871 | buf.Write(s[start:i]) 872 | } 873 | switch b { 874 | case '\\', '"': 875 | buf.WriteByte('\\') 876 | buf.WriteByte(b) 877 | case '\n': 878 | buf.WriteByte('\\') 879 | buf.WriteByte('n') 880 | case '\r': 881 | buf.WriteByte('\\') 882 | buf.WriteByte('r') 883 | default: 884 | 885 | buf.WriteString(`\u00`) 886 | buf.WriteByte(hex[b>>4]) 887 | buf.WriteByte(hex[b&0xF]) 888 | } 889 | i++ 890 | start = i 891 | continue 892 | } 893 | c, size := utf8.DecodeRune(s[i:]) 894 | if c == utf8.RuneError && size == 1 { 895 | if start < i { 896 | buf.Write(s[start:i]) 897 | } 898 | buf.WriteString(`\ufffd`) 899 | i += size 900 | start = i 901 | continue 902 | } 903 | 904 | if c == '\u2028' || c == '\u2029' { 905 | if start < i { 906 | buf.Write(s[start:i]) 907 | } 908 | buf.WriteString(`\u202`) 909 | buf.WriteByte(hex[c&0xF]) 910 | i += size 911 | start = i 912 | continue 913 | } 914 | i += size 915 | } 916 | if start < len(s) { 917 | buf.Write(s[start:]) 918 | } 919 | buf.WriteByte('"') 920 | }921 |
func TestParseNetworkOptsPrivateOnly(t *testing.T) { 924 | ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::80"}) 925 | if err != nil { 926 | t.Fatal(err) 927 | } 928 | if len(ports) != 1 { 929 | t.Logf("Expected 1 got %d", len(ports)) 930 | t.FailNow() 931 | } 932 | if len(bindings) != 1 { 933 | t.Logf("Expected 1 got %d", len(bindings)) 934 | t.FailNow() 935 | } 936 | for k := range ports { 937 | if k.Proto() != "tcp" { 938 | t.Logf("Expected tcp got %s", k.Proto()) 939 | t.Fail() 940 | } 941 | if k.Port() != "80" { 942 | t.Logf("Expected 80 got %s", k.Port()) 943 | t.Fail() 944 | } 945 | b, exists := bindings[k] 946 | if !exists { 947 | t.Log("Binding does not exist") 948 | t.FailNow() 949 | } 950 | if len(b) != 1 { 951 | t.Logf("Expected 1 got %d", len(b)) 952 | t.FailNow() 953 | } 954 | s := b[0] 955 | if s.HostPort != "" { 956 | t.Logf("Expected \"\" got %s", s.HostPort) 957 | t.Fail() 958 | } 959 | if s.HostIp != "192.168.1.100" { 960 | t.Fail() 961 | } 962 | } 963 | }964 |
func TestParseNetworkOptsPublic(t *testing.T) { 966 | ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100:8080:80"}) 967 | if err != nil { 968 | t.Fatal(err) 969 | } 970 | if len(ports) != 1 { 971 | t.Logf("Expected 1 got %d", len(ports)) 972 | t.FailNow() 973 | } 974 | if len(bindings) != 1 { 975 | t.Logf("Expected 1 got %d", len(bindings)) 976 | t.FailNow() 977 | } 978 | for k := range ports { 979 | if k.Proto() != "tcp" { 980 | t.Logf("Expected tcp got %s", k.Proto()) 981 | t.Fail() 982 | } 983 | if k.Port() != "80" { 984 | t.Logf("Expected 80 got %s", k.Port()) 985 | t.Fail() 986 | } 987 | b, exists := bindings[k] 988 | if !exists { 989 | t.Log("Binding does not exist") 990 | t.FailNow() 991 | } 992 | if len(b) != 1 { 993 | t.Logf("Expected 1 got %d", len(b)) 994 | t.FailNow() 995 | } 996 | s := b[0] 997 | if s.HostPort != "8080" { 998 | t.Logf("Expected 8080 got %s", s.HostPort) 999 | t.Fail() 1000 | } 1001 | if s.HostIp != "192.168.1.100" { 1002 | t.Fail() 1003 | } 1004 | } 1005 | }1006 |
func TestParseNetworkOptsUdp(t *testing.T) { 1008 | ports, bindings, err := nat.ParsePortSpecs([]string{"192.168.1.100::6000/udp"}) 1009 | if err != nil { 1010 | t.Fatal(err) 1011 | } 1012 | if len(ports) != 1 { 1013 | t.Logf("Expected 1 got %d", len(ports)) 1014 | t.FailNow() 1015 | } 1016 | if len(bindings) != 1 { 1017 | t.Logf("Expected 1 got %d", len(bindings)) 1018 | t.FailNow() 1019 | } 1020 | for k := range ports { 1021 | if k.Proto() != "udp" { 1022 | t.Logf("Expected udp got %s", k.Proto()) 1023 | t.Fail() 1024 | } 1025 | if k.Port() != "6000" { 1026 | t.Logf("Expected 6000 got %s", k.Port()) 1027 | t.Fail() 1028 | } 1029 | b, exists := bindings[k] 1030 | if !exists { 1031 | t.Log("Binding does not exist") 1032 | t.FailNow() 1033 | } 1034 | if len(b) != 1 { 1035 | t.Logf("Expected 1 got %d", len(b)) 1036 | t.FailNow() 1037 | } 1038 | s := b[0] 1039 | if s.HostPort != "" { 1040 | t.Logf("Expected \"\" got %s", s.HostPort) 1041 | t.Fail() 1042 | } 1043 | if s.HostIp != "192.168.1.100" { 1044 | t.Fail() 1045 | } 1046 | } 1047 | }1048 |
func TestMapTCPPorts(t *testing.T) { 1051 | defer netutils.SetupTestNetNS(t)() 1052 | pm := New() 1053 | dstIP1 := net.ParseIP("192.168.0.1") 1054 | dstIP2 := net.ParseIP("192.168.0.2") 1055 | dstAddr1 := &net.TCPAddr{IP: dstIP1, Port: 80} 1056 | dstAddr2 := &net.TCPAddr{IP: dstIP2, Port: 80} 1057 | 1058 | srcAddr1 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")} 1059 | srcAddr2 := &net.TCPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")} 1060 | 1061 | addrEqual := func(addr1, addr2 net.Addr) bool { 1062 | return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String()) 1063 | } 1064 | 1065 | if host, err := pm.Map(srcAddr1, dstIP1, 80, true); err != nil { 1066 | t.Fatalf("Failed to allocate port: %s", err) 1067 | } else if !addrEqual(dstAddr1, host) { 1068 | t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s", 1069 | dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network()) 1070 | } 1071 | 1072 | if _, err := pm.Map(srcAddr1, dstIP1, 80, true); err == nil { 1073 | t.Fatalf("Port is in use - mapping should have failed") 1074 | } 1075 | 1076 | if _, err := pm.Map(srcAddr2, dstIP1, 80, true); err == nil { 1077 | t.Fatalf("Port is in use - mapping should have failed") 1078 | } 1079 | 1080 | if _, err := pm.Map(srcAddr2, dstIP2, 80, true); err != nil { 1081 | t.Fatalf("Failed to allocate port: %s", err) 1082 | } 1083 | 1084 | if pm.Unmap(dstAddr1) != nil { 1085 | t.Fatalf("Failed to release port") 1086 | } 1087 | 1088 | if pm.Unmap(dstAddr2) != nil { 1089 | t.Fatalf("Failed to release port") 1090 | } 1091 | 1092 | if pm.Unmap(dstAddr2) == nil { 1093 | t.Fatalf("Port already released, but no error reported") 1094 | } 1095 | }1096 |
func TestMapUDPPorts(t *testing.T) { 1098 | defer netutils.SetupTestNetNS(t)() 1099 | pm := New() 1100 | dstIP1 := net.ParseIP("192.168.0.1") 1101 | dstIP2 := net.ParseIP("192.168.0.2") 1102 | dstAddr1 := &net.UDPAddr{IP: dstIP1, Port: 80} 1103 | dstAddr2 := &net.UDPAddr{IP: dstIP2, Port: 80} 1104 | 1105 | srcAddr1 := &net.UDPAddr{Port: 1080, IP: net.ParseIP("172.16.0.1")} 1106 | srcAddr2 := &net.UDPAddr{Port: 1080, IP: net.ParseIP("172.16.0.2")} 1107 | 1108 | addrEqual := func(addr1, addr2 net.Addr) bool { 1109 | return (addr1.Network() == addr2.Network()) && (addr1.String() == addr2.String()) 1110 | } 1111 | 1112 | if host, err := pm.Map(srcAddr1, dstIP1, 80, true); err != nil { 1113 | t.Fatalf("Failed to allocate port: %s", err) 1114 | } else if !addrEqual(dstAddr1, host) { 1115 | t.Fatalf("Incorrect mapping result: expected %s:%s, got %s:%s", 1116 | dstAddr1.String(), dstAddr1.Network(), host.String(), host.Network()) 1117 | } 1118 | 1119 | if _, err := pm.Map(srcAddr1, dstIP1, 80, true); err == nil { 1120 | t.Fatalf("Port is in use - mapping should have failed") 1121 | } 1122 | 1123 | if _, err := pm.Map(srcAddr2, dstIP1, 80, true); err == nil { 1124 | t.Fatalf("Port is in use - mapping should have failed") 1125 | } 1126 | 1127 | if _, err := pm.Map(srcAddr2, dstIP2, 80, true); err != nil { 1128 | t.Fatalf("Failed to allocate port: %s", err) 1129 | } 1130 | 1131 | if pm.Unmap(dstAddr1) != nil { 1132 | t.Fatalf("Failed to release port") 1133 | } 1134 | 1135 | if pm.Unmap(dstAddr2) != nil { 1136 | t.Fatalf("Failed to release port") 1137 | } 1138 | 1139 | if pm.Unmap(dstAddr2) == nil { 1140 | t.Fatalf("Port already released, but no error reported") 1141 | } 1142 | }1143 |
func (s *DockerSuite) TestCreateWithPortRange(c *check.C) { 1146 | 1147 | runCmd := exec.Command(dockerBinary, "create", "-p", "3300-3303:3300-3303/tcp", "busybox", "echo") 1148 | out, _, _, err := runCommandWithStdoutStderr(runCmd) 1149 | if err != nil { 1150 | c.Fatal(out, err) 1151 | } 1152 | 1153 | cleanedContainerID := strings.TrimSpace(out) 1154 | 1155 | inspectCmd := exec.Command(dockerBinary, "inspect", cleanedContainerID) 1156 | out, _, err = runCommandWithOutput(inspectCmd) 1157 | if err != nil { 1158 | c.Fatalf("out should've been a container id: %s, %v", out, err) 1159 | } 1160 | 1161 | containers := []struct { 1162 | HostConfig *struct { 1163 | PortBindings map[nat.Port][]nat.PortBinding 1164 | } 1165 | }{} 1166 | if err := json.Unmarshal([]byte(out), &containers); err != nil { 1167 | c.Fatalf("Error inspecting the container: %s", err) 1168 | } 1169 | if len(containers) != 1 { 1170 | c.Fatalf("Unexpected container count. Expected 0, received: %d", len(containers)) 1171 | } 1172 | 1173 | cont := containers[0] 1174 | if cont.HostConfig == nil { 1175 | c.Fatalf("Expected HostConfig, got none") 1176 | } 1177 | 1178 | if len(cont.HostConfig.PortBindings) != 4 { 1179 | c.Fatalf("Expected 4 ports bindings, got %d", len(cont.HostConfig.PortBindings)) 1180 | } 1181 | for k, v := range cont.HostConfig.PortBindings { 1182 | if len(v) != 1 { 1183 | c.Fatalf("Expected 1 ports binding, for the port %s but found %s", k, v) 1184 | } 1185 | if k.Port() != v[0].HostPort { 1186 | c.Fatalf("Expected host port %d to match published port %d", k.Port(), v[0].HostPort) 1187 | } 1188 | } 1189 | 1190 | }1191 |
func (s *DockerSuite) TestCreateWithiLargePortRange(c *check.C) { 1193 | 1194 | runCmd := exec.Command(dockerBinary, "create", "-p", "1-65535:1-65535/tcp", "busybox", "echo") 1195 | out, _, _, err := runCommandWithStdoutStderr(runCmd) 1196 | if err != nil { 1197 | c.Fatal(out, err) 1198 | } 1199 | 1200 | cleanedContainerID := strings.TrimSpace(out) 1201 | 1202 | inspectCmd := exec.Command(dockerBinary, "inspect", cleanedContainerID) 1203 | out, _, err = runCommandWithOutput(inspectCmd) 1204 | if err != nil { 1205 | c.Fatalf("out should've been a container id: %s, %v", out, err) 1206 | } 1207 | 1208 | containers := []struct { 1209 | HostConfig *struct { 1210 | PortBindings map[nat.Port][]nat.PortBinding 1211 | } 1212 | }{} 1213 | if err := json.Unmarshal([]byte(out), &containers); err != nil { 1214 | c.Fatalf("Error inspecting the container: %s", err) 1215 | } 1216 | if len(containers) != 1 { 1217 | c.Fatalf("Unexpected container count. Expected 0, received: %d", len(containers)) 1218 | } 1219 | 1220 | cont := containers[0] 1221 | if cont.HostConfig == nil { 1222 | c.Fatalf("Expected HostConfig, got none") 1223 | } 1224 | 1225 | if len(cont.HostConfig.PortBindings) != 65535 { 1226 | c.Fatalf("Expected 65535 ports bindings, got %d", len(cont.HostConfig.PortBindings)) 1227 | } 1228 | for k, v := range cont.HostConfig.PortBindings { 1229 | if len(v) != 1 { 1230 | c.Fatalf("Expected 1 ports binding, for the port %s but found %s", k, v) 1231 | } 1232 | if k.Port() != v[0].HostPort { 1233 | c.Fatalf("Expected host port %d to match published port %d", k.Port(), v[0].HostPort) 1234 | } 1235 | } 1236 | 1237 | }1238 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mibk/dupl 2 | 3 | go 1.14 4 | -------------------------------------------------------------------------------- /job/buildtree.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | import ( 4 | "github.com/mibk/dupl/suffixtree" 5 | "github.com/mibk/dupl/syntax" 6 | ) 7 | 8 | func BuildTree(schan chan []*syntax.Node) (t *suffixtree.STree, d *[]*syntax.Node, done chan bool) { 9 | t = suffixtree.New() 10 | data := make([]*syntax.Node, 0, 100) 11 | done = make(chan bool) 12 | go func() { 13 | for seq := range schan { 14 | data = append(data, seq...) 15 | for _, node := range seq { 16 | t.Update(node) 17 | } 18 | } 19 | done <- true 20 | }() 21 | return t, &data, done 22 | } 23 | -------------------------------------------------------------------------------- /job/parse.go: -------------------------------------------------------------------------------- 1 | package job 2 | 3 | import ( 4 | "log" 5 | 6 | "github.com/mibk/dupl/syntax" 7 | "github.com/mibk/dupl/syntax/golang" 8 | ) 9 | 10 | func Parse(fchan chan string) chan []*syntax.Node { 11 | 12 | // parse AST 13 | achan := make(chan *syntax.Node) 14 | go func() { 15 | for file := range fchan { 16 | ast, err := golang.Parse(file) 17 | if err != nil { 18 | log.Println(err) 19 | continue 20 | } 21 | achan <- ast 22 | } 23 | close(achan) 24 | }() 25 | 26 | // serialize 27 | schan := make(chan []*syntax.Node) 28 | go func() { 29 | for ast := range achan { 30 | seq := syntax.Serialize(ast) 31 | schan <- seq 32 | } 33 | close(schan) 34 | }() 35 | return schan 36 | } 37 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bufio" 5 | "flag" 6 | "fmt" 7 | "io/ioutil" 8 | "log" 9 | "os" 10 | "path/filepath" 11 | "sort" 12 | "strings" 13 | 14 | "github.com/mibk/dupl/job" 15 | "github.com/mibk/dupl/printer" 16 | "github.com/mibk/dupl/syntax" 17 | ) 18 | 19 | const defaultThreshold = 15 20 | 21 | var ( 22 | paths = []string{"."} 23 | vendor = flag.Bool("vendor", false, "") 24 | verbose = flag.Bool("verbose", false, "") 25 | threshold = flag.Int("threshold", defaultThreshold, "") 26 | files = flag.Bool("files", false, "") 27 | 28 | html = flag.Bool("html", false, "") 29 | plumbing = flag.Bool("plumbing", false, "") 30 | ) 31 | 32 | const ( 33 | vendorDirPrefix = "vendor" + string(filepath.Separator) 34 | vendorDirInPath = string(filepath.Separator) + vendorDirPrefix 35 | ) 36 | 37 | func init() { 38 | flag.BoolVar(verbose, "v", false, "alias for -verbose") 39 | flag.IntVar(threshold, "t", defaultThreshold, "alias for -threshold") 40 | } 41 | 42 | func main() { 43 | flag.Usage = usage 44 | flag.Parse() 45 | if *html && *plumbing { 46 | log.Fatal("you can have either plumbing or HTML output") 47 | } 48 | if flag.NArg() > 0 { 49 | paths = flag.Args() 50 | } 51 | 52 | if *verbose { 53 | log.Println("Building suffix tree") 54 | } 55 | schan := job.Parse(filesFeed()) 56 | t, data, done := job.BuildTree(schan) 57 | <-done 58 | 59 | // finish stream 60 | t.Update(&syntax.Node{Type: -1}) 61 | 62 | if *verbose { 63 | log.Println("Searching for clones") 64 | } 65 | mchan := t.FindDuplOver(*threshold) 66 | duplChan := make(chan syntax.Match) 67 | go func() { 68 | for m := range mchan { 69 | match := syntax.FindSyntaxUnits(*data, m, *threshold) 70 | if len(match.Frags) > 0 { 71 | duplChan <- match 72 | } 73 | } 74 | close(duplChan) 75 | }() 76 | 77 | newPrinter := printer.NewText 78 | if *html { 79 | newPrinter = printer.NewHTML 80 | } else if *plumbing { 81 | newPrinter = printer.NewPlumbing 82 | } 83 | p := newPrinter(os.Stdout, ioutil.ReadFile) 84 | if err := printDupls(p, duplChan); err != nil { 85 | log.Fatal(err) 86 | } 87 | } 88 | 89 | func filesFeed() chan string { 90 | if *files { 91 | fchan := make(chan string) 92 | go func() { 93 | s := bufio.NewScanner(os.Stdin) 94 | for s.Scan() { 95 | f := s.Text() 96 | fchan <- strings.TrimPrefix(f, "./") 97 | } 98 | close(fchan) 99 | }() 100 | return fchan 101 | } 102 | return crawlPaths(paths) 103 | } 104 | 105 | func crawlPaths(paths []string) chan string { 106 | fchan := make(chan string) 107 | go func() { 108 | for _, path := range paths { 109 | info, err := os.Lstat(path) 110 | if err != nil { 111 | log.Fatal(err) 112 | } 113 | if !info.IsDir() { 114 | fchan <- path 115 | continue 116 | } 117 | err = filepath.Walk(path, func(path string, info os.FileInfo, err error) error { 118 | if !*vendor && (strings.HasPrefix(path, vendorDirPrefix) || 119 | strings.Contains(path, vendorDirInPath)) { 120 | return nil 121 | } 122 | if !info.IsDir() && strings.HasSuffix(info.Name(), ".go") { 123 | fchan <- path 124 | } 125 | return nil 126 | }) 127 | if err != nil { 128 | log.Fatal(err) 129 | } 130 | } 131 | close(fchan) 132 | }() 133 | return fchan 134 | } 135 | 136 | func printDupls(p printer.Printer, duplChan <-chan syntax.Match) error { 137 | groups := make(map[string][][]*syntax.Node) 138 | for dupl := range duplChan { 139 | groups[dupl.Hash] = append(groups[dupl.Hash], dupl.Frags...) 140 | } 141 | keys := make([]string, 0, len(groups)) 142 | for k := range groups { 143 | keys = append(keys, k) 144 | } 145 | sort.Strings(keys) 146 | 147 | if err := p.PrintHeader(); err != nil { 148 | return err 149 | } 150 | for _, k := range keys { 151 | uniq := unique(groups[k]) 152 | if len(uniq) > 1 { 153 | if err := p.PrintClones(uniq); err != nil { 154 | return err 155 | } 156 | } 157 | } 158 | return p.PrintFooter() 159 | } 160 | 161 | func unique(group [][]*syntax.Node) [][]*syntax.Node { 162 | fileMap := make(map[string]map[int]struct{}) 163 | 164 | var newGroup [][]*syntax.Node 165 | for _, seq := range group { 166 | node := seq[0] 167 | file, ok := fileMap[node.Filename] 168 | if !ok { 169 | file = make(map[int]struct{}) 170 | fileMap[node.Filename] = file 171 | } 172 | if _, ok := file[node.Pos]; !ok { 173 | file[node.Pos] = struct{}{} 174 | newGroup = append(newGroup, seq) 175 | } 176 | } 177 | return newGroup 178 | } 179 | 180 | func usage() { 181 | fmt.Fprintln(os.Stderr, `Usage: dupl [flags] [paths] 182 | 183 | Paths: 184 | If the given path is a file, dupl will use it regardless of 185 | the file extension. If it is a directory, it will recursively 186 | search for *.go files in that directory. 187 | 188 | If no path is given, dupl will recursively search for *.go 189 | files in the current directory. 190 | 191 | Flags: 192 | -files 193 | read file names from stdin one at each line 194 | -html 195 | output the results as HTML, including duplicate code fragments 196 | -plumbing 197 | plumbing (easy-to-parse) output for consumption by scripts or tools 198 | -t, -threshold size 199 | minimum token sequence size as a clone (default 15) 200 | -vendor 201 | check files in vendor directory 202 | -v, -verbose 203 | explain what is being done 204 | 205 | Examples: 206 | dupl -t 100 207 | Search clones in the current directory of size at least 208 | 100 tokens. 209 | dupl $(find app/ -name '*_test.go') 210 | Search for clones in tests in the app directory. 211 | find app/ -name '*_test.go' |dupl -files 212 | The same as above.`) 213 | os.Exit(2) 214 | } 215 | -------------------------------------------------------------------------------- /printer/html.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "html" 7 | "io" 8 | "regexp" 9 | "sort" 10 | 11 | "github.com/mibk/dupl/syntax" 12 | ) 13 | 14 | type htmlprinter struct { 15 | iota int 16 | w io.Writer 17 | ReadFile 18 | } 19 | 20 | func NewHTML(w io.Writer, fread ReadFile) Printer { 21 | return &htmlprinter{w: w, ReadFile: fread} 22 | } 23 | 24 | func (p *htmlprinter) PrintHeader() error { 25 | _, err := fmt.Fprint(p.w, ` 26 | 27 |
%s\n", cl.filename, cl.lineStart, 68 | html.EscapeString(string(cl.fragment))) 69 | } 70 | return nil 71 | } 72 | 73 | func (*htmlprinter) PrintFooter() error { return nil } 74 | 75 | func findLineBeg(file []byte, index int) int { 76 | for i := index; i >= 0; i-- { 77 | if file[i] == '\n' { 78 | return i + 1 79 | } 80 | } 81 | return 0 82 | } 83 | 84 | func toWhitespace(str []byte) []byte { 85 | var out []byte 86 | for _, c := range bytes.Runes(str) { 87 | if c == '\t' { 88 | out = append(out, '\t') 89 | } else { 90 | out = append(out, ' ') 91 | } 92 | } 93 | return out 94 | } 95 | 96 | func deindent(block []byte) []byte { 97 | const maxVal = 99 98 | min := maxVal 99 | re := regexp.MustCompile(`(^|\n)(\t*)\S`) 100 | for _, line := range re.FindAllSubmatch(block, -1) { 101 | indent := line[2] 102 | if len(indent) < min { 103 | min = len(indent) 104 | } 105 | } 106 | if min == 0 || min == maxVal { 107 | return block 108 | } 109 | block = block[min:] 110 | Loop: 111 | for i := 0; i < len(block); i++ { 112 | if block[i] == '\n' && i != len(block)-1 { 113 | for j := 0; j < min; j++ { 114 | if block[i+j+1] != '\t' { 115 | continue Loop 116 | } 117 | } 118 | block = append(block[:i+1], block[i+1+min:]...) 119 | } 120 | } 121 | return block 122 | } 123 | -------------------------------------------------------------------------------- /printer/html_test.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import "testing" 4 | 5 | func TestToWhitespace(t *testing.T) { 6 | testCases := []struct { 7 | in string 8 | expect string 9 | }{ 10 | {"\t ", "\t "}, 11 | {"\tčřď", "\t "}, 12 | {" \ta", " \t "}, 13 | } 14 | 15 | for _, tc := range testCases { 16 | actual := toWhitespace([]byte(tc.in)) 17 | if tc.expect != string(actual) { 18 | t.Errorf("got '%s', want '%s'", actual, tc.expect) 19 | } 20 | } 21 | } 22 | 23 | func TestDeindent(t *testing.T) { 24 | testCases := []struct { 25 | in string 26 | expect string 27 | }{ 28 | {"\t$\n\t\t$\n\t$", "$\n\t$\n$"}, 29 | {"\t$\r\n\t\t$\r\n\t$", "$\r\n\t$\r\n$"}, 30 | {"\t$\n\t\t$\n", "$\n\t$\n"}, 31 | {"\t$\n\n\t\t$", "$\n\n\t$"}, 32 | } 33 | for _, tc := range testCases { 34 | actual := deindent([]byte(tc.in)) 35 | if tc.expect != string(actual) { 36 | t.Errorf("got '%s', want '%s'", actual, tc.expect) 37 | } 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /printer/plumbing.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sort" 7 | 8 | "github.com/mibk/dupl/syntax" 9 | ) 10 | 11 | type plumbing struct { 12 | w io.Writer 13 | ReadFile 14 | } 15 | 16 | func NewPlumbing(w io.Writer, fread ReadFile) Printer { 17 | return &plumbing{w, fread} 18 | } 19 | 20 | func (p *plumbing) PrintHeader() error { return nil } 21 | 22 | func (p *plumbing) PrintClones(dups [][]*syntax.Node) error { 23 | clones, err := prepareClonesInfo(p.ReadFile, dups) 24 | if err != nil { 25 | return err 26 | } 27 | sort.Sort(byNameAndLine(clones)) 28 | for i, cl := range clones { 29 | nextCl := clones[(i+1)%len(clones)] 30 | fmt.Fprintf(p.w, "%s:%d-%d: duplicate of %s:%d-%d\n", cl.filename, cl.lineStart, cl.lineEnd, 31 | nextCl.filename, nextCl.lineStart, nextCl.lineEnd) 32 | } 33 | return nil 34 | } 35 | 36 | func (p *plumbing) PrintFooter() error { return nil } 37 | -------------------------------------------------------------------------------- /printer/printer.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import "github.com/mibk/dupl/syntax" 4 | 5 | type ReadFile func(filename string) ([]byte, error) 6 | 7 | type Printer interface { 8 | PrintHeader() error 9 | PrintClones(dups [][]*syntax.Node) error 10 | PrintFooter() error 11 | } 12 | -------------------------------------------------------------------------------- /printer/text.go: -------------------------------------------------------------------------------- 1 | package printer 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | "sort" 7 | 8 | "github.com/mibk/dupl/syntax" 9 | ) 10 | 11 | type text struct { 12 | cnt int 13 | w io.Writer 14 | ReadFile 15 | } 16 | 17 | func NewText(w io.Writer, fread ReadFile) Printer { 18 | return &text{w: w, ReadFile: fread} 19 | } 20 | 21 | func (p *text) PrintHeader() error { return nil } 22 | 23 | func (p *text) PrintClones(dups [][]*syntax.Node) error { 24 | p.cnt++ 25 | fmt.Fprintf(p.w, "found %d clones:\n", len(dups)) 26 | clones, err := prepareClonesInfo(p.ReadFile, dups) 27 | if err != nil { 28 | return err 29 | } 30 | sort.Sort(byNameAndLine(clones)) 31 | for _, cl := range clones { 32 | fmt.Fprintf(p.w, " %s:%d,%d\n", cl.filename, cl.lineStart, cl.lineEnd) 33 | } 34 | return nil 35 | } 36 | 37 | func (p *text) PrintFooter() error { 38 | _, err := fmt.Fprintf(p.w, "\nFound total %d clone groups.\n", p.cnt) 39 | return err 40 | } 41 | 42 | func prepareClonesInfo(fread ReadFile, dups [][]*syntax.Node) ([]clone, error) { 43 | clones := make([]clone, len(dups)) 44 | for i, dup := range dups { 45 | cnt := len(dup) 46 | if cnt == 0 { 47 | panic("zero length dup") 48 | } 49 | nstart := dup[0] 50 | nend := dup[cnt-1] 51 | 52 | file, err := fread(nstart.Filename) 53 | if err != nil { 54 | return nil, err 55 | } 56 | 57 | cl := clone{filename: nstart.Filename} 58 | cl.lineStart, cl.lineEnd = blockLines(file, nstart.Pos, nend.End) 59 | clones[i] = cl 60 | } 61 | return clones, nil 62 | } 63 | 64 | func blockLines(file []byte, from, to int) (int, int) { 65 | line := 1 66 | lineStart, lineEnd := 0, 0 67 | for offset, b := range file { 68 | if b == '\n' { 69 | line++ 70 | } 71 | if offset == from { 72 | lineStart = line 73 | } 74 | if offset == to-1 { 75 | lineEnd = line 76 | break 77 | } 78 | } 79 | return lineStart, lineEnd 80 | } 81 | 82 | type clone struct { 83 | filename string 84 | lineStart int 85 | lineEnd int 86 | fragment []byte 87 | } 88 | 89 | type byNameAndLine []clone 90 | 91 | func (c byNameAndLine) Len() int { return len(c) } 92 | 93 | func (c byNameAndLine) Swap(i, j int) { c[i], c[j] = c[j], c[i] } 94 | 95 | func (c byNameAndLine) Less(i, j int) bool { 96 | if c[i].filename == c[j].filename { 97 | return c[i].lineStart < c[j].lineStart 98 | } 99 | return c[i].filename < c[j].filename 100 | } 101 | -------------------------------------------------------------------------------- /suffixtree/dupl.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import "sort" 4 | 5 | type Match struct { 6 | Ps []Pos 7 | Len Pos 8 | } 9 | 10 | type posList struct { 11 | positions []Pos 12 | } 13 | 14 | func newPosList() *posList { 15 | return &posList{make([]Pos, 0)} 16 | } 17 | 18 | func (p *posList) append(p2 *posList) { 19 | p.positions = append(p.positions, p2.positions...) 20 | } 21 | 22 | func (p *posList) add(pos Pos) { 23 | p.positions = append(p.positions, pos) 24 | } 25 | 26 | type contextList struct { 27 | lists map[int]*posList 28 | } 29 | 30 | func newContextList() *contextList { 31 | return &contextList{make(map[int]*posList)} 32 | } 33 | 34 | func (c *contextList) getAll() []Pos { 35 | keys := make([]int, 0, len(c.lists)) 36 | for k := range c.lists { 37 | keys = append(keys, k) 38 | } 39 | sort.Ints(keys) 40 | var ps []Pos 41 | for _, k := range keys { 42 | ps = append(ps, c.lists[k].positions...) 43 | } 44 | return ps 45 | } 46 | 47 | func (c *contextList) append(c2 *contextList) { 48 | for lc, pl := range c2.lists { 49 | if _, ok := c.lists[lc]; ok { 50 | c.lists[lc].append(pl) 51 | } else { 52 | c.lists[lc] = pl 53 | } 54 | } 55 | } 56 | 57 | // FindDuplOver find pairs of maximal duplicities over a threshold 58 | // length. 59 | func (t *STree) FindDuplOver(threshold int) <-chan Match { 60 | auxTran := newTran(0, 0, t.root) 61 | ch := make(chan Match) 62 | go func() { 63 | walkTrans(auxTran, 0, threshold, ch) 64 | close(ch) 65 | }() 66 | return ch 67 | } 68 | 69 | func walkTrans(parent *tran, length, threshold int, ch chan<- Match) *contextList { 70 | s := parent.state 71 | 72 | cl := newContextList() 73 | 74 | if len(s.trans) == 0 { 75 | pl := newPosList() 76 | start := parent.end + 1 - Pos(length) 77 | pl.add(start) 78 | ch := 0 79 | if start > 0 { 80 | ch = s.tree.data[start-1].Val() 81 | } 82 | cl.lists[ch] = pl 83 | return cl 84 | } 85 | 86 | for _, t := range s.trans { 87 | ln := length + t.len() 88 | cl2 := walkTrans(t, ln, threshold, ch) 89 | if ln >= threshold { 90 | cl.append(cl2) 91 | } 92 | } 93 | if length >= threshold && len(cl.lists) > 1 { 94 | m := Match{cl.getAll(), Pos(length)} 95 | ch <- m 96 | } 97 | return cl 98 | } 99 | -------------------------------------------------------------------------------- /suffixtree/dupl_test.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import ( 4 | "fmt" 5 | "sort" 6 | "testing" 7 | ) 8 | 9 | func (m Match) String() string { 10 | str := "([" 11 | for _, p := range m.Ps { 12 | str += fmt.Sprintf("%d, ", p) 13 | } 14 | return str[:len(str)-2] + fmt.Sprintf("], %d)", m.Len) 15 | } 16 | 17 | func sliceCmp(sl1, sl2 []Pos) bool { 18 | if len(sl1) != len(sl2) { 19 | return false 20 | } 21 | sort.Sort(ByPos(sl1)) 22 | sort.Sort(ByPos(sl2)) 23 | for i := range sl1 { 24 | if sl1[i] != sl2[i] { 25 | return false 26 | } 27 | } 28 | return true 29 | } 30 | 31 | type ByPos []Pos 32 | 33 | func (p ByPos) Len() int { 34 | return len(p) 35 | } 36 | 37 | func (p ByPos) Swap(i, j int) { 38 | p[i], p[j] = p[j], p[i] 39 | } 40 | 41 | func (p ByPos) Less(i, j int) bool { 42 | return p[i] < p[j] 43 | } 44 | 45 | func TestFindingDupl(t *testing.T) { 46 | testCases := []struct { 47 | s string 48 | threshold int 49 | matches []Match 50 | }{ 51 | {"abab$", 3, []Match{}}, 52 | {"abab$", 2, []Match{{[]Pos{0, 2}, 2}}}, 53 | {"abcbcabc$", 3, []Match{{[]Pos{0, 5}, 3}}}, 54 | {"abcbcabc$", 2, []Match{{[]Pos{0, 5}, 3}, {[]Pos{1, 3, 6}, 2}}}, 55 | {`All work and no play makes Jack a dull boy 56 | All work and no play makes Jack a dull boy$`, 4, []Match{{[]Pos{0, 43}, 42}}}, 57 | } 58 | 59 | for _, tc := range testCases { 60 | tree := New() 61 | tree.Update(str2tok(tc.s)...) 62 | ch := tree.FindDuplOver(tc.threshold) 63 | for _, exp := range tc.matches { 64 | act, ok := <-ch 65 | if !ok { 66 | t.Errorf("missing match %v for '%s'", exp, tc.s) 67 | } else if exp.Len != act.Len || !sliceCmp(exp.Ps, act.Ps) { 68 | t.Errorf("got %v, want %v", act, exp) 69 | } 70 | } 71 | for act := range ch { 72 | t.Errorf("beyond expected match %v for '%s'", act, tc.s) 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /suffixtree/suffixtree.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "math" 7 | "strings" 8 | ) 9 | 10 | const infinity = math.MaxInt32 11 | 12 | // Pos denotes position in data slice. 13 | type Pos int32 14 | 15 | type Token interface { 16 | Val() int 17 | } 18 | 19 | // STree is a struct representing a suffix tree. 20 | type STree struct { 21 | data []Token 22 | root *state 23 | auxState *state // auxiliary state 24 | 25 | // active point 26 | s *state 27 | start, end Pos 28 | } 29 | 30 | // New creates new suffix tree. 31 | func New() *STree { 32 | t := new(STree) 33 | t.data = make([]Token, 0, 50) 34 | t.root = newState(t) 35 | t.auxState = newState(t) 36 | t.root.linkState = t.auxState 37 | t.s = t.root 38 | return t 39 | } 40 | 41 | // Update refreshes the suffix tree to by new data. 42 | func (t *STree) Update(data ...Token) { 43 | t.data = append(t.data, data...) 44 | for range data { 45 | t.update() 46 | t.s, t.start = t.canonize(t.s, t.start, t.end) 47 | t.end++ 48 | } 49 | } 50 | 51 | // update transforms suffix tree T(n) to T(n+1). 52 | func (t *STree) update() { 53 | oldr := t.root 54 | 55 | // (s, (start, end)) is the canonical reference pair for the active point 56 | s := t.s 57 | start, end := t.start, t.end 58 | var r *state 59 | for { 60 | var endPoint bool 61 | r, endPoint = t.testAndSplit(s, start, end-1) 62 | if endPoint { 63 | break 64 | } 65 | r.fork(end) 66 | if oldr != t.root { 67 | oldr.linkState = r 68 | } 69 | oldr = r 70 | s, start = t.canonize(s.linkState, start, end-1) 71 | } 72 | if oldr != t.root { 73 | oldr.linkState = r 74 | } 75 | 76 | // update active point 77 | t.s = s 78 | t.start = start 79 | } 80 | 81 | // testAndSplit tests whether a state with canonical ref. pair 82 | // (s, (start, end)) is the end point, that is, a state that have 83 | // a c-transition. If not, then state (exs, (start, end)) is made 84 | // explicit (if not already so). 85 | func (t *STree) testAndSplit(s *state, start, end Pos) (exs *state, endPoint bool) { 86 | c := t.data[t.end] 87 | if start <= end { 88 | tr := s.findTran(t.data[start]) 89 | splitPoint := tr.start + end - start + 1 90 | if t.data[splitPoint].Val() == c.Val() { 91 | return s, true 92 | } 93 | // make the (s, (start, end)) state explicit 94 | newSt := newState(s.tree) 95 | newSt.addTran(splitPoint, tr.end, tr.state) 96 | tr.end = splitPoint - 1 97 | tr.state = newSt 98 | return newSt, false 99 | } 100 | if s == t.auxState || s.findTran(c) != nil { 101 | return s, true 102 | } 103 | return s, false 104 | } 105 | 106 | // canonize returns updated state and start position for ref. pair 107 | // (s, (start, end)) of state r so the new ref. pair is canonical, 108 | // that is, referenced from the closest explicit ancestor of r. 109 | func (t *STree) canonize(s *state, start, end Pos) (*state, Pos) { 110 | if s == t.auxState { 111 | s, start = t.root, start+1 112 | } 113 | if start > end { 114 | return s, start 115 | } 116 | 117 | var tr *tran 118 | for { 119 | if start <= end { 120 | tr = s.findTran(t.data[start]) 121 | if tr == nil { 122 | panic(fmt.Sprintf("there should be some transition for '%d' at %d", 123 | t.data[start].Val(), start)) 124 | } 125 | } 126 | if tr.end-tr.start > end-start { 127 | break 128 | } 129 | start += tr.end - tr.start + 1 130 | s = tr.state 131 | } 132 | if s == nil { 133 | panic("there should always be some suffix link resolution") 134 | } 135 | return s, start 136 | } 137 | 138 | func (t *STree) At(p Pos) Token { 139 | if p < 0 || p >= Pos(len(t.data)) { 140 | panic("position out of bounds") 141 | } 142 | return t.data[p] 143 | } 144 | 145 | func (t *STree) String() string { 146 | buf := new(bytes.Buffer) 147 | printState(buf, t.root, 0) 148 | return buf.String() 149 | } 150 | 151 | func printState(buf *bytes.Buffer, s *state, ident int) { 152 | for _, tr := range s.trans { 153 | fmt.Fprint(buf, strings.Repeat(" ", ident)) 154 | fmt.Fprintf(buf, "* (%d, %d)\n", tr.start, tr.ActEnd()) 155 | printState(buf, tr.state, ident+1) 156 | } 157 | } 158 | 159 | // state is an explicit state of the suffix tree. 160 | type state struct { 161 | tree *STree 162 | trans []*tran 163 | linkState *state 164 | } 165 | 166 | func newState(t *STree) *state { 167 | return &state{ 168 | tree: t, 169 | trans: make([]*tran, 0), 170 | linkState: nil, 171 | } 172 | } 173 | 174 | func (s *state) addTran(start, end Pos, r *state) { 175 | s.trans = append(s.trans, newTran(start, end, r)) 176 | } 177 | 178 | // fork creates a new branch from the state s. 179 | func (s *state) fork(i Pos) *state { 180 | r := newState(s.tree) 181 | s.addTran(i, infinity, r) 182 | return r 183 | } 184 | 185 | // findTran finds c-transition. 186 | func (s *state) findTran(c Token) *tran { 187 | for _, tran := range s.trans { 188 | if s.tree.data[tran.start].Val() == c.Val() { 189 | return tran 190 | } 191 | } 192 | return nil 193 | } 194 | 195 | // tran represents a state's transition. 196 | type tran struct { 197 | start, end Pos 198 | state *state 199 | } 200 | 201 | func newTran(start, end Pos, s *state) *tran { 202 | return &tran{start, end, s} 203 | } 204 | 205 | func (t *tran) len() int { 206 | return int(t.end - t.start + 1) 207 | } 208 | 209 | // ActEnd returns actual end position as consistent with 210 | // the actual length of the data in the STree. 211 | func (t *tran) ActEnd() Pos { 212 | if t.end == infinity { 213 | return Pos(len(t.state.tree.data)) - 1 214 | } 215 | return t.end 216 | } 217 | -------------------------------------------------------------------------------- /suffixtree/suffixtree_test.go: -------------------------------------------------------------------------------- 1 | package suffixtree 2 | 3 | import "testing" 4 | 5 | type char byte 6 | 7 | func (c char) Val() int { 8 | return int(c) 9 | } 10 | 11 | func str2tok(str string) []Token { 12 | toks := make([]Token, len(str)) 13 | for i, c := range str { 14 | toks[i] = char(c) 15 | } 16 | return toks 17 | } 18 | 19 | func TestConstruction(t *testing.T) { 20 | str := "cacao" 21 | _, s := genStates(8, str) 22 | // s[0] is root 23 | s[0].addTran(0, 1, s[1]) // ca 24 | s[0].addTran(1, 1, s[2]) // a 25 | s[0].addTran(4, 4, s[3]) // o 26 | 27 | s[1].addTran(2, 4, s[4]) // cao 28 | s[1].addTran(4, 4, s[5]) // o 29 | 30 | s[2].addTran(2, 4, s[4]) // cao 31 | s[2].addTran(4, 4, s[5]) // o 32 | 33 | cacao := New() 34 | cacao.Update(str2tok(str)...) 35 | compareTrees(t, s[0], cacao.root) 36 | 37 | str2 := "banana" 38 | _, r := genStates(4, str2) 39 | r[0].addTran(0, 5, r[1]) // banana 40 | r[0].addTran(1, 5, r[2]) // anana 41 | r[0].addTran(2, 5, r[3]) // nana 42 | 43 | banana := New() 44 | banana.Update(str2tok(str2)...) 45 | compareTrees(t, r[0], banana.root) 46 | 47 | _, q := genStates(11, str2+"$") 48 | // r[0] is root 49 | q[0].addTran(0, 6, q[1]) // banana$ 50 | q[0].addTran(1, 1, q[2]) // a 51 | q[0].addTran(2, 3, q[3]) // na 52 | q[0].addTran(6, 6, q[4]) // $ 53 | 54 | q[2].addTran(2, 3, q[5]) // na 55 | q[2].addTran(6, 6, q[6]) // $ 56 | 57 | q[3].addTran(4, 6, q[7]) // na$ 58 | q[3].addTran(6, 6, q[8]) // $ 59 | 60 | q[5].addTran(4, 6, q[9]) // na$ 61 | q[5].addTran(6, 6, q[10]) // $ 62 | 63 | banana.Update(char('$')) 64 | compareTrees(t, q[0], banana.root) 65 | 66 | foo := New() 67 | foo.Update(str2tok("a b ac c ")...) 68 | } 69 | 70 | func compareTrees(t *testing.T, expected, actual *state) { 71 | ch1, ch2 := walker(expected), walker(actual) 72 | for { 73 | etran, ok1 := <-ch1 74 | atran, ok2 := <-ch2 75 | if !ok1 || !ok2 { 76 | if ok1 { 77 | t.Error("expected tree is longer") 78 | } else if ok2 { 79 | t.Error("actual tree is longer") 80 | } 81 | break 82 | } 83 | if etran.start != atran.start || etran.ActEnd() != atran.ActEnd() { 84 | t.Errorf("got transition (%d, %d) '%s', want (%d, %d) '%s'", 85 | atran.start, atran.ActEnd(), actual.tree.data[atran.start:atran.ActEnd()+1], 86 | etran.start, etran.ActEnd(), expected.tree.data[etran.start:etran.ActEnd()+1], 87 | ) 88 | } 89 | } 90 | } 91 | 92 | func walker(s *state) <-chan *tran { 93 | ch := make(chan *tran) 94 | go func() { 95 | walk(s, ch) 96 | close(ch) 97 | }() 98 | return ch 99 | } 100 | 101 | func walk(s *state, ch chan<- *tran) { 102 | for _, tr := range s.trans { 103 | ch <- tr 104 | walk(tr.state, ch) 105 | } 106 | } 107 | 108 | func genStates(count int, data string) (*STree, []*state) { 109 | t := new(STree) 110 | t.data = str2tok(data) 111 | states := make([]*state, count) 112 | for i := range states { 113 | states[i] = newState(t) 114 | } 115 | return t, states 116 | } 117 | 118 | type refPair struct { 119 | s *state 120 | start, end Pos 121 | } 122 | 123 | func TestCanonize(t *testing.T) { 124 | tree, s := genStates(5, "somebanana") 125 | tree.auxState, tree.root = s[4], s[0] 126 | s[0].addTran(0, 3, s[1]) 127 | s[1].addTran(4, 6, s[2]) 128 | s[2].addTran(7, infinity, s[3]) 129 | 130 | find := func(needle *state) int { 131 | for i, state := range s { 132 | if state == needle { 133 | return i 134 | } 135 | } 136 | return -1 137 | } 138 | 139 | var testCases = []struct { 140 | origin, expected refPair 141 | }{ 142 | {refPair{s[0], 0, 0}, refPair{s[0], 0, 0}}, 143 | {refPair{s[0], 0, 2}, refPair{s[0], 0, 0}}, 144 | {refPair{s[0], 0, 3}, refPair{s[1], 4, 0}}, 145 | {refPair{s[0], 0, 8}, refPair{s[2], 7, 0}}, 146 | {refPair{s[0], 0, 6}, refPair{s[2], 7, 0}}, 147 | {refPair{s[0], 0, 100}, refPair{s[2], 7, 0}}, 148 | {refPair{s[4], -1, 100}, refPair{s[2], 7, 0}}, 149 | } 150 | 151 | for _, tc := range testCases { 152 | s, start := tree.canonize(tc.origin.s, tc.origin.start, tc.origin.end) 153 | if s != tc.expected.s || start != tc.expected.start { 154 | t.Errorf("for origin ref. pair (%d, (%d, %d)) got (%d, %d), want (%d, %d)", 155 | find(tc.origin.s), tc.origin.start, tc.origin.end, 156 | find(s), start, 157 | find(tc.expected.s), tc.expected.start, 158 | ) 159 | } 160 | } 161 | } 162 | 163 | func TestSplitting(t *testing.T) { 164 | tree := new(STree) 165 | tree.data = str2tok("banana|cbao") 166 | s1 := newState(tree) 167 | s2 := newState(tree) 168 | s1.addTran(0, 3, s2) 169 | 170 | // active point is (s1, 0, -1), an explicit state 171 | tree.end = 7 // c 172 | rets, end := tree.testAndSplit(s1, 0, -1) 173 | if rets != s1 { 174 | t.Errorf("got state %p, want %p", rets, s1) 175 | } 176 | if end { 177 | t.Error("should not be an end-point") 178 | } 179 | tree.end = 8 // b 180 | _, end = tree.testAndSplit(s1, 0, -1) 181 | if !end { 182 | t.Error("should be an end-point") 183 | } 184 | 185 | // active point is (s1, 0, 2), an implicit state 186 | tree.end = 9 // a 187 | rets, end = tree.testAndSplit(s1, 0, 2) 188 | if rets != s1 { 189 | t.Error("returned state should be unchanged") 190 | } 191 | if !end { 192 | t.Error("should be an end-point") 193 | } 194 | 195 | // [s1]-banana->[s2] => [s1]-ban->[rets]-ana->[s2] 196 | tree.end = 10 // o 197 | rets, end = tree.testAndSplit(s1, 0, 2) 198 | tr := s1.findTran(char('b')) 199 | if tr == nil { 200 | t.Error("should have a b-transition") 201 | } else if tr.state != rets { 202 | t.Errorf("got state %p, want %p", tr.state, rets) 203 | } 204 | tr2 := rets.findTran(char('a')) 205 | if tr2 == nil { 206 | t.Error("should have an a-transition") 207 | } else if tr2.state != s2 { 208 | t.Errorf("got state %p, want %p", tr2.state, s2) 209 | } 210 | if end { 211 | t.Error("should not be an end-point") 212 | } 213 | } 214 | 215 | func TestPosMaxValue(t *testing.T) { 216 | var p Pos = infinity 217 | if p+1 > 0 { 218 | t.Error("const infinity is not max value") 219 | } 220 | } 221 | 222 | func BenchmarkConstruction(b *testing.B) { 223 | stream := str2tok(`all work and no play makes jack a dull boy 224 | all work and no play makes jack a dull boy 225 | all work and no play makes jack a dull boy`) 226 | 227 | for i := 0; i < b.N; i++ { 228 | t := New() 229 | t.Update(stream...) 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /syntax/golang/golang.go: -------------------------------------------------------------------------------- 1 | package golang 2 | 3 | import ( 4 | "go/ast" 5 | "go/parser" 6 | "go/token" 7 | 8 | "github.com/mibk/dupl/syntax" 9 | ) 10 | 11 | const ( 12 | BadNode = iota 13 | File 14 | ArrayType 15 | AssignStmt 16 | BasicLit 17 | BinaryExpr 18 | BlockStmt 19 | BranchStmt 20 | CallExpr 21 | CaseClause 22 | ChanType 23 | CommClause 24 | CompositeLit 25 | DeclStmt 26 | DeferStmt 27 | Ellipsis 28 | EmptyStmt 29 | ExprStmt 30 | Field 31 | FieldList 32 | ForStmt 33 | FuncDecl 34 | FuncLit 35 | FuncType 36 | GenDecl 37 | GoStmt 38 | Ident 39 | IfStmt 40 | IncDecStmt 41 | IndexExpr 42 | InterfaceType 43 | KeyValueExpr 44 | LabeledStmt 45 | MapType 46 | ParenExpr 47 | RangeStmt 48 | ReturnStmt 49 | SelectStmt 50 | SelectorExpr 51 | SendStmt 52 | SliceExpr 53 | StarExpr 54 | StructType 55 | SwitchStmt 56 | TypeAssertExpr 57 | TypeSpec 58 | TypeSwitchStmt 59 | UnaryExpr 60 | ValueSpec 61 | ) 62 | 63 | // Parse the given file and return uniform syntax tree. 64 | func Parse(filename string) (*syntax.Node, error) { 65 | fset := token.NewFileSet() 66 | file, err := parser.ParseFile(fset, filename, nil, 0) 67 | if err != nil { 68 | return nil, err 69 | } 70 | t := &transformer{ 71 | fileset: fset, 72 | filename: filename, 73 | } 74 | return t.trans(file), nil 75 | } 76 | 77 | type transformer struct { 78 | fileset *token.FileSet 79 | filename string 80 | } 81 | 82 | // trans transforms given golang AST to uniform tree structure. 83 | func (t *transformer) trans(node ast.Node) (o *syntax.Node) { 84 | o = syntax.NewNode() 85 | o.Filename = t.filename 86 | st, end := node.Pos(), node.End() 87 | o.Pos, o.End = t.fileset.File(st).Offset(st), t.fileset.File(end).Offset(end) 88 | 89 | switch n := node.(type) { 90 | case *ast.ArrayType: 91 | o.Type = ArrayType 92 | if n.Len != nil { 93 | o.AddChildren(t.trans(n.Len)) 94 | } 95 | o.AddChildren(t.trans(n.Elt)) 96 | 97 | case *ast.AssignStmt: 98 | o.Type = AssignStmt 99 | for _, e := range n.Rhs { 100 | o.AddChildren(t.trans(e)) 101 | } 102 | 103 | for _, e := range n.Lhs { 104 | o.AddChildren(t.trans(e)) 105 | } 106 | 107 | case *ast.BasicLit: 108 | o.Type = BasicLit 109 | 110 | case *ast.BinaryExpr: 111 | o.Type = BinaryExpr 112 | o.AddChildren(t.trans(n.X), t.trans(n.Y)) 113 | 114 | case *ast.BlockStmt: 115 | o.Type = BlockStmt 116 | for _, stmt := range n.List { 117 | o.AddChildren(t.trans(stmt)) 118 | } 119 | 120 | case *ast.BranchStmt: 121 | o.Type = BranchStmt 122 | if n.Label != nil { 123 | o.AddChildren(t.trans(n.Label)) 124 | } 125 | 126 | case *ast.CallExpr: 127 | o.Type = CallExpr 128 | o.AddChildren(t.trans(n.Fun)) 129 | for _, arg := range n.Args { 130 | o.AddChildren(t.trans(arg)) 131 | } 132 | 133 | case *ast.CaseClause: 134 | o.Type = CaseClause 135 | for _, e := range n.List { 136 | o.AddChildren(t.trans(e)) 137 | } 138 | for _, stmt := range n.Body { 139 | o.AddChildren(t.trans(stmt)) 140 | } 141 | 142 | case *ast.ChanType: 143 | o.Type = ChanType 144 | o.AddChildren(t.trans(n.Value)) 145 | 146 | case *ast.CommClause: 147 | o.Type = CommClause 148 | if n.Comm != nil { 149 | o.AddChildren(t.trans(n.Comm)) 150 | } 151 | for _, stmt := range n.Body { 152 | o.AddChildren(t.trans(stmt)) 153 | } 154 | 155 | case *ast.CompositeLit: 156 | o.Type = CompositeLit 157 | if n.Type != nil { 158 | o.AddChildren(t.trans(n.Type)) 159 | } 160 | for _, e := range n.Elts { 161 | o.AddChildren(t.trans(e)) 162 | } 163 | 164 | case *ast.DeclStmt: 165 | o.Type = DeclStmt 166 | o.AddChildren(t.trans(n.Decl)) 167 | 168 | case *ast.DeferStmt: 169 | o.Type = DeferStmt 170 | o.AddChildren(t.trans(n.Call)) 171 | 172 | case *ast.Ellipsis: 173 | o.Type = Ellipsis 174 | if n.Elt != nil { 175 | o.AddChildren(t.trans(n.Elt)) 176 | } 177 | 178 | case *ast.EmptyStmt: 179 | o.Type = EmptyStmt 180 | 181 | case *ast.ExprStmt: 182 | o.Type = ExprStmt 183 | o.AddChildren(t.trans(n.X)) 184 | 185 | case *ast.Field: 186 | o.Type = Field 187 | for _, name := range n.Names { 188 | o.AddChildren(t.trans(name)) 189 | } 190 | o.AddChildren(t.trans(n.Type)) 191 | 192 | case *ast.FieldList: 193 | o.Type = FieldList 194 | for _, field := range n.List { 195 | o.AddChildren(t.trans(field)) 196 | } 197 | 198 | case *ast.File: 199 | o.Type = File 200 | for _, decl := range n.Decls { 201 | if genDecl, ok := decl.(*ast.GenDecl); ok && genDecl.Tok == token.IMPORT { 202 | // skip import declarations 203 | continue 204 | } 205 | o.AddChildren(t.trans(decl)) 206 | } 207 | 208 | case *ast.ForStmt: 209 | o.Type = ForStmt 210 | if n.Init != nil { 211 | o.AddChildren(t.trans(n.Init)) 212 | } 213 | if n.Cond != nil { 214 | o.AddChildren(t.trans(n.Cond)) 215 | } 216 | if n.Post != nil { 217 | o.AddChildren(t.trans(n.Post)) 218 | } 219 | o.AddChildren(t.trans(n.Body)) 220 | 221 | case *ast.FuncDecl: 222 | o.Type = FuncDecl 223 | if n.Recv != nil { 224 | o.AddChildren(t.trans(n.Recv)) 225 | } 226 | o.AddChildren(t.trans(n.Name), t.trans(n.Type)) 227 | if n.Body != nil { 228 | o.AddChildren(t.trans(n.Body)) 229 | } 230 | 231 | case *ast.FuncLit: 232 | o.Type = FuncLit 233 | o.AddChildren(t.trans(n.Type), t.trans(n.Body)) 234 | 235 | case *ast.FuncType: 236 | o.Type = FuncType 237 | o.AddChildren(t.trans(n.Params)) 238 | if n.Results != nil { 239 | o.AddChildren(t.trans(n.Results)) 240 | } 241 | 242 | case *ast.GenDecl: 243 | o.Type = GenDecl 244 | for _, spec := range n.Specs { 245 | o.AddChildren(t.trans(spec)) 246 | } 247 | 248 | case *ast.GoStmt: 249 | o.Type = GoStmt 250 | o.AddChildren(t.trans(n.Call)) 251 | 252 | case *ast.Ident: 253 | o.Type = Ident 254 | 255 | case *ast.IfStmt: 256 | o.Type = IfStmt 257 | if n.Init != nil { 258 | o.AddChildren(t.trans(n.Init)) 259 | } 260 | o.AddChildren(t.trans(n.Cond), t.trans(n.Body)) 261 | if n.Else != nil { 262 | o.AddChildren(t.trans(n.Else)) 263 | } 264 | 265 | case *ast.IncDecStmt: 266 | o.Type = IncDecStmt 267 | o.AddChildren(t.trans(n.X)) 268 | 269 | case *ast.IndexExpr: 270 | o.Type = IndexExpr 271 | o.AddChildren(t.trans(n.X), t.trans(n.Index)) 272 | 273 | case *ast.InterfaceType: 274 | o.Type = InterfaceType 275 | o.AddChildren(t.trans(n.Methods)) 276 | 277 | case *ast.KeyValueExpr: 278 | o.Type = KeyValueExpr 279 | o.AddChildren(t.trans(n.Key), t.trans(n.Value)) 280 | 281 | case *ast.LabeledStmt: 282 | o.Type = LabeledStmt 283 | o.AddChildren(t.trans(n.Label), t.trans(n.Stmt)) 284 | 285 | case *ast.MapType: 286 | o.Type = MapType 287 | o.AddChildren(t.trans(n.Key), t.trans(n.Value)) 288 | 289 | case *ast.ParenExpr: 290 | o.Type = ParenExpr 291 | o.AddChildren(t.trans(n.X)) 292 | 293 | case *ast.RangeStmt: 294 | o.Type = RangeStmt 295 | if n.Key != nil { 296 | o.AddChildren(t.trans(n.Key)) 297 | } 298 | if n.Value != nil { 299 | o.AddChildren(t.trans(n.Value)) 300 | } 301 | o.AddChildren(t.trans(n.X), t.trans(n.Body)) 302 | 303 | case *ast.ReturnStmt: 304 | o.Type = ReturnStmt 305 | for _, e := range n.Results { 306 | o.AddChildren(t.trans(e)) 307 | } 308 | 309 | case *ast.SelectStmt: 310 | o.Type = SelectStmt 311 | o.AddChildren(t.trans(n.Body)) 312 | 313 | case *ast.SelectorExpr: 314 | o.Type = SelectorExpr 315 | o.AddChildren(t.trans(n.X), t.trans(n.Sel)) 316 | 317 | case *ast.SendStmt: 318 | o.Type = SendStmt 319 | o.AddChildren(t.trans(n.Chan), t.trans(n.Value)) 320 | 321 | case *ast.SliceExpr: 322 | o.Type = SliceExpr 323 | o.AddChildren(t.trans(n.X)) 324 | if n.Low != nil { 325 | o.AddChildren(t.trans(n.Low)) 326 | } 327 | if n.High != nil { 328 | o.AddChildren(t.trans(n.High)) 329 | } 330 | if n.Max != nil { 331 | o.AddChildren(t.trans(n.Max)) 332 | } 333 | 334 | case *ast.StarExpr: 335 | o.Type = StarExpr 336 | o.AddChildren(t.trans(n.X)) 337 | 338 | case *ast.StructType: 339 | o.Type = StructType 340 | o.AddChildren(t.trans(n.Fields)) 341 | 342 | case *ast.SwitchStmt: 343 | o.Type = SwitchStmt 344 | if n.Init != nil { 345 | o.AddChildren(t.trans(n.Init)) 346 | } 347 | if n.Tag != nil { 348 | o.AddChildren(t.trans(n.Tag)) 349 | } 350 | o.AddChildren(t.trans(n.Body)) 351 | 352 | case *ast.TypeAssertExpr: 353 | o.Type = TypeAssertExpr 354 | o.AddChildren(t.trans(n.X)) 355 | if n.Type != nil { 356 | o.AddChildren(t.trans(n.Type)) 357 | } 358 | 359 | case *ast.TypeSpec: 360 | o.Type = TypeSpec 361 | o.AddChildren(t.trans(n.Name), t.trans(n.Type)) 362 | 363 | case *ast.TypeSwitchStmt: 364 | o.Type = TypeSwitchStmt 365 | if n.Init != nil { 366 | o.AddChildren(t.trans(n.Init)) 367 | } 368 | o.AddChildren(t.trans(n.Assign), t.trans(n.Body)) 369 | 370 | case *ast.UnaryExpr: 371 | o.Type = UnaryExpr 372 | o.AddChildren(t.trans(n.X)) 373 | 374 | case *ast.ValueSpec: 375 | o.Type = ValueSpec 376 | for _, name := range n.Names { 377 | o.AddChildren(t.trans(name)) 378 | } 379 | if n.Type != nil { 380 | o.AddChildren(t.trans(n.Type)) 381 | } 382 | for _, val := range n.Values { 383 | o.AddChildren(t.trans(val)) 384 | } 385 | 386 | default: 387 | o.Type = BadNode 388 | 389 | } 390 | 391 | return o 392 | } 393 | -------------------------------------------------------------------------------- /syntax/syntax.go: -------------------------------------------------------------------------------- 1 | package syntax 2 | 3 | import ( 4 | "crypto/sha1" 5 | 6 | "github.com/mibk/dupl/suffixtree" 7 | ) 8 | 9 | type Node struct { 10 | Type int 11 | Filename string 12 | Pos, End int 13 | Children []*Node 14 | Owns int 15 | } 16 | 17 | func NewNode() *Node { 18 | return &Node{} 19 | } 20 | 21 | func (n *Node) AddChildren(children ...*Node) { 22 | n.Children = append(n.Children, children...) 23 | } 24 | 25 | func (n *Node) Val() int { 26 | return n.Type 27 | } 28 | 29 | type Match struct { 30 | Hash string 31 | Frags [][]*Node 32 | } 33 | 34 | func Serialize(n *Node) []*Node { 35 | stream := make([]*Node, 0, 10) 36 | serial(n, &stream) 37 | return stream 38 | } 39 | 40 | func serial(n *Node, stream *[]*Node) int { 41 | *stream = append(*stream, n) 42 | var count int 43 | for _, child := range n.Children { 44 | count += serial(child, stream) 45 | } 46 | n.Owns = count 47 | return count + 1 48 | } 49 | 50 | // FindSyntaxUnits finds all complete syntax units in the match group and returns them 51 | // with the corresponding hash. 52 | func FindSyntaxUnits(data []*Node, m suffixtree.Match, threshold int) Match { 53 | if len(m.Ps) == 0 { 54 | return Match{} 55 | } 56 | firstSeq := data[m.Ps[0] : m.Ps[0]+m.Len] 57 | indexes := getUnitsIndexes(firstSeq, threshold) 58 | 59 | // TODO: is this really working? 60 | indexCnt := len(indexes) 61 | if indexCnt > 0 { 62 | lasti := indexes[indexCnt-1] 63 | firstn := firstSeq[lasti] 64 | for i := 1; i < len(m.Ps); i++ { 65 | n := data[int(m.Ps[i])+lasti] 66 | if firstn.Owns != n.Owns { 67 | indexes = indexes[:indexCnt-1] 68 | break 69 | } 70 | } 71 | } 72 | if len(indexes) == 0 || isCyclic(indexes, firstSeq) || spansMultipleFiles(indexes, firstSeq) { 73 | return Match{} 74 | } 75 | 76 | match := Match{Frags: make([][]*Node, len(m.Ps))} 77 | for i, pos := range m.Ps { 78 | match.Frags[i] = make([]*Node, len(indexes)) 79 | for j, index := range indexes { 80 | match.Frags[i][j] = data[int(pos)+index] 81 | } 82 | } 83 | 84 | lastIndex := indexes[len(indexes)-1] 85 | match.Hash = hashSeq(firstSeq[indexes[0] : lastIndex+firstSeq[lastIndex].Owns]) 86 | return match 87 | } 88 | 89 | func getUnitsIndexes(nodeSeq []*Node, threshold int) []int { 90 | var indexes []int 91 | var split bool 92 | for i := 0; i < len(nodeSeq); { 93 | n := nodeSeq[i] 94 | switch { 95 | case n.Owns >= len(nodeSeq)-i: 96 | // not complete syntax unit 97 | i++ 98 | split = true 99 | continue 100 | case n.Owns+1 < threshold: 101 | split = true 102 | default: 103 | if split { 104 | indexes = indexes[:0] 105 | split = false 106 | } 107 | indexes = append(indexes, i) 108 | } 109 | i += n.Owns + 1 110 | } 111 | return indexes 112 | } 113 | 114 | // isCyclic finds out whether there is a repetive pattern in the found clone. If positive, 115 | // it return false to point out that the clone would be redundant. 116 | func isCyclic(indexes []int, nodes []*Node) bool { 117 | cnt := len(indexes) 118 | if cnt <= 1 { 119 | return false 120 | } 121 | 122 | alts := make(map[int]bool) 123 | for i := 1; i <= cnt/2; i++ { 124 | if cnt%i == 0 { 125 | alts[i] = true 126 | } 127 | } 128 | 129 | for i := 0; i < indexes[cnt/2]; i++ { 130 | nstart := nodes[i+indexes[0]] 131 | AltLoop: 132 | for alt := range alts { 133 | for j := alt; j < cnt; j += alt { 134 | index := i + indexes[j] 135 | if index < len(nodes) { 136 | nalt := nodes[index] 137 | if nstart.Owns == nalt.Owns && nstart.Type == nalt.Type { 138 | continue 139 | } 140 | } else if i >= indexes[alt] { 141 | return true 142 | } 143 | delete(alts, alt) 144 | continue AltLoop 145 | } 146 | } 147 | if len(alts) == 0 { 148 | return false 149 | } 150 | } 151 | return true 152 | } 153 | 154 | func spansMultipleFiles(indexes []int, nodes []*Node) bool { 155 | if len(indexes) < 2 { 156 | return false 157 | } 158 | f := nodes[indexes[0]].Filename 159 | for i := 1; i < len(indexes); i++ { 160 | if nodes[indexes[i]].Filename != f { 161 | return true 162 | } 163 | } 164 | return false 165 | } 166 | 167 | func hashSeq(nodes []*Node) string { 168 | h := sha1.New() 169 | bytes := make([]byte, len(nodes)) 170 | for i, node := range nodes { 171 | bytes[i] = byte(node.Type) 172 | } 173 | h.Write(bytes) 174 | return string(h.Sum(nil)) 175 | } 176 | -------------------------------------------------------------------------------- /syntax/syntax_test.go: -------------------------------------------------------------------------------- 1 | package syntax 2 | 3 | import "testing" 4 | 5 | func TestSerialization(t *testing.T) { 6 | n := genNodes(7) 7 | n[0].AddChildren(n[1], n[2], n[3]) 8 | n[1].AddChildren(n[4], n[5]) 9 | n[2].AddChildren(n[6]) 10 | m := genNodes(6) 11 | m[0].AddChildren(m[1], m[2], m[3], m[4], m[5]) 12 | testCases := []struct { 13 | t *Node 14 | expected []int 15 | }{ 16 | {n[0], []int{6, 2, 0, 0, 1, 0, 0}}, 17 | {m[0], []int{5, 0, 0, 0, 0, 0}}, 18 | } 19 | 20 | for _, tc := range testCases { 21 | compareSeries(t, Serialize(tc.t), tc.expected) 22 | } 23 | } 24 | 25 | func genNodes(cnt int) []*Node { 26 | nodes := make([]*Node, cnt) 27 | for i := range nodes { 28 | nodes[i] = NewNode() 29 | } 30 | return nodes 31 | } 32 | 33 | func compareSeries(t *testing.T, stream []*Node, owns []int) { 34 | if len(stream) != len(owns) { 35 | t.Errorf("series aren't the same length; got %d, want %d", len(stream), len(owns)) 36 | return 37 | } 38 | for i, item := range stream { 39 | if item.Owns != owns[i] { 40 | t.Errorf("got %d, want %d", item.Owns, owns[i]) 41 | } 42 | } 43 | } 44 | 45 | func TestGetUnitsIndexes(t *testing.T) { 46 | testCases := []struct { 47 | seq string 48 | threshold int 49 | expected []int 50 | }{ 51 | {"a8 a0 a2 a0 a0", 3, []int{2}}, 52 | {"a0 a8 a2 a0 a0", 1, []int{2}}, 53 | {"a3 a0 a0 a0 a1", 3, []int{0}}, 54 | {"a3 a0 a0 a0 a0", 1, []int{0, 4}}, 55 | {"a1 a0 a1 a0 a0", 2, []int{0, 2}}, 56 | } 57 | 58 | Loop: 59 | for _, tc := range testCases { 60 | nodes := str2nodes(tc.seq) 61 | indexes := getUnitsIndexes(nodes, tc.threshold) 62 | for i := range tc.expected { 63 | if i > len(indexes)-1 || tc.expected[i] != indexes[i] { 64 | t.Errorf("for seq '%s', got %v, want %v", tc.seq, indexes, tc.expected) 65 | } 66 | continue Loop 67 | } 68 | } 69 | } 70 | 71 | func TestCyclicDupl(t *testing.T) { 72 | testCases := []struct { 73 | seq string 74 | indexes []int 75 | expected bool 76 | }{ 77 | {"a1 b0 a2 b0", []int{0, 2}, false}, 78 | {"a1 b0 a1 b0", []int{0, 2}, true}, 79 | {"a0 a0", []int{0, 1}, true}, 80 | {"a1 b0 c1 b0 a1 b0 c1 b0", []int{0, 2, 4, 6}, true}, 81 | {"a1 b0 c1 b0 a1 b0", []int{0, 2, 4}, false}, 82 | {"a0 b0 a0 c0", []int{0, 1, 2, 3}, false}, 83 | {"a0 b0 a0 b0 a0", []int{0, 1, 2}, false}, 84 | {"a1 b0 a1 b0 c1 b0", []int{0, 2, 4}, false}, 85 | {"a1 a1 a1 a1 a1 a1", []int{0, 4}, false}, 86 | {"a2 b0 b0 a2 b0 b0 a2 b0 b0 a2 b0 b0 a2 b0 b0", []int{0, 3, 6, 9, 12}, true}, 87 | } 88 | 89 | for _, tc := range testCases { 90 | nodes := str2nodes(tc.seq) 91 | if tc.expected != isCyclic(tc.indexes, nodes) { 92 | t.Errorf("for seq '%s', indexes %v, got %t, want %t", tc.seq, tc.indexes, !tc.expected, tc.expected) 93 | } 94 | } 95 | } 96 | 97 | // str2nodes converts strint to a sequence of *Node by following principle: 98 | // - node is represented by 2 characters 99 | // - first character is node type 100 | // - second character is the number for Node.Owns. 101 | func str2nodes(str string) []*Node { 102 | chars := []rune(str) 103 | nodes := make([]*Node, (len(chars)+1)/3) 104 | for i := 0; i < len(chars)-1; i += 3 { 105 | nodes[i/3] = &Node{Type: int(chars[i]), Owns: int(chars[i+1] - '0')} 106 | } 107 | return nodes 108 | } 109 | --------------------------------------------------------------------------------