├── README.md
├── clean
    ├── .DS_Store
    ├── duplicates
    │   ├── entries.go
    │   ├── entries_test.go
    │   ├── index.go
    │   └── index_test.go
    ├── main.go
    ├── testdata
    │   ├── .DS_Store
    │   ├── copy.txt
    │   └── text.txt
    └── utils
    │   ├── utils.go
    │   └── utils_test.go
└── spaghet
    └── main.go


/README.md:
--------------------------------------------------------------------------------
  1 | # Cleaning Code in Go
  2 | 
  3 | ## Introduction
  4 | So, this article is a little different from my others. Instead of focusing on a specific product, or solving a speficied problem, we will be looking at something a little more abstract. This article will be focusing on writing "clean code" with golang. The article will be starting with a short introduction as to what is defined by "clean code" and then, we will move onto a practical example, in which we refactor an example application, into a cleaner version. You can find all code for this article at https://github.com/Pungyeon/clean-go
  5 | 
  6 | ## What is "Clean Code"
  7 | The idea of clean code, is not something that is particularly rigid in definition. In my opinion, the closest thing to a defacto standard, are the books produced by Robert C. Martin (also known as "Uncle Bob"), who has written the "Clean Code" series, as well as having produced an excellent and extensive video series on the topic. 
  8 | 
  9 | However, I will attempt to give a brief summary of what I believe to be clean code:
 10 | 
 11 | 1. Easy to read code
 12 | 	- Clean code is easy to read. In fact, it should be almost as easy to read as prose. If there is need for comments or the like, the code most likely isn't clean. It's intentions should be very clear, just from skimming the code.
 13 | 2. Independent of rest of code base
 14 | 	- Clean code ensures that if code changes in one part of the codebase, the rest of the codebase is essentially unaffected. In other words, code is segregated into functionality silos, independent of the rest of the code base.
 15 | 3. Testable
 16 | 	- If code is not testable, we can be very sure that it's not clean. Of course, *all code* should be tested, this is not necessarily something that is strictly related to clean code. Making code testable, however, is a big aspect of clean code.
 17 | 
 18 | There are many other additions to these sentiments. Code shouldn't be duplicated, functions shouldn't be very long etc. However, we will cover this later. These three rules are, in my opinion, the most important aspects to writing clean code.
 19 | 
 20 | Whereas most aspects of clean code make sense and seems extremely intuitive, there are also some counterintuitive aspects of clean code. Writing clean code can potentially produce more lines of code than dirty code (also referred to as smelly or sphagetti code). It's therefore very important to recognize, that writing clean code is not making the code "fat free" exclusively. The main goal of writing clean code is to make future development of code easier, and to reduce / eliminate introdution of bugs to applications.
 21 | 
 22 | > NOTE: In this article, I will not be writing tests along with the refactoring. Writing tests before refactoring (and before developing for that matter), is extremely important when writing clean code. However, I typically find that explaining TDD in text, rather than in video is not enjoyable for the writer, nor the reader. However, please please please, write tests when refactoring, to ensure that your refactoring is not destroying your code. I have provided some test examples in the source code for this article.
 23 | 
 24 | ## Our Application
 25 | So, let's get right to it. I made a simple program, which traverses a file system and returns a list of duplicate files, based on their file contents. The way we are doing this is by reading the file and hashing the contents as a `sha256` string, which is stored in a hash table, and then comparing the files on each traversal iteration. 
 26 | 
 27 | ### Sphagetti Code
 28 | This is my first iteration of the program. Which was written pretty fast, without the consideration of anyone else going reading the code:
 29 | 
 30 | ```go 
 31 | package main
 32 | 
 33 | import (
 34 | 	"crypto/sha1"
 35 | 	"flag"
 36 | 	"fmt"
 37 | 	"io/ioutil"
 38 | 	"os"
 39 | 	"path"
 40 | 	"strconv"
 41 | 	"sync/atomic"
 42 | )
 43 | 
 44 | func traverseDir(hashes, duplicates map[string]string, dupeSize *int64, entries []os.FileInfo, directory string) {
 45 | 	for _, entry := range entries {
 46 | 		fullpath := (path.Join(directory, entry.Name()))
 47 | 
 48 | 		if !entry.Mode().IsDir() && !entry.Mode().IsRegular() {
 49 | 			continue
 50 | 		}
 51 | 
 52 | 		if entry.IsDir() {
 53 | 			dirFiles, err := ioutil.ReadDir(fullpath)
 54 | 			if err != nil {
 55 | 				panic(err)
 56 | 			}
 57 | 			traverseDir(hashes, duplicates, dupeSize, dirFiles, fullpath)
 58 | 			continue
 59 | 		}
 60 | 		file, err := ioutil.ReadFile(fullpath)
 61 | 		if err != nil {
 62 | 			panic(err)
 63 | 		}
 64 | 		hash := sha1.New()
 65 | 		if _, err := hash.Write(file); err != nil {
 66 | 			panic(err)
 67 | 		}
 68 | 		hashSum := hash.Sum(nil)
 69 | 		hashString := fmt.Sprintf("%x", hashSum)
 70 | 		if hashEntry, ok := hashes[hashString]; ok {
 71 | 			duplicates[hashEntry] = fullpath
 72 | 			atomic.AddInt64(dupeSize, entry.Size())
 73 | 		} else {
 74 | 			hashes[hashString] = fullpath
 75 | 		}
 76 | 	}
 77 | }
 78 | 
 79 | func toReadableSize(nbytes int64) string {
 80 | 	if nbytes > 1000*1000*1000*1000 {
 81 | 		return strconv.FormatInt(nbytes/(1000*1000*1000*1000), 10) + " TB"
 82 | 	}
 83 | 	if nbytes > 1000*1000*1000 {
 84 | 		return strconv.FormatInt(nbytes/(1000*1000*1000), 10) + " GB"
 85 | 	}
 86 | 	if nbytes > 1000*1000 {
 87 | 		return strconv.FormatInt(nbytes/(1000*1000), 10) + " MB"
 88 | 	}
 89 | 	if nbytes > 1000 {
 90 | 		return strconv.FormatInt(nbytes/1000, 10) + " KB"
 91 | 	}
 92 | 	return strconv.FormatInt(nbytes, 10) + " B"
 93 | }
 94 | 
 95 | func main() {
 96 | 	var err error
 97 | 	dir := flag.String("path", "", "the path to traverse searching for duplicates")
 98 | 	flag.Parse()
 99 | 
100 | 	if *dir == "" {
101 | 		*dir, err = os.Getwd()
102 | 		if err != nil {
103 | 			panic(err)
104 | 		}
105 | 	}
106 | 
107 | 	hashes := map[string]string{}
108 | 	duplicates := map[string]string{}
109 | 	var dupeSize int64
110 | 
111 | 	entries, err := ioutil.ReadDir(*dir)
112 | 	if err != nil {
113 | 		panic(err)
114 | 	}
115 | 
116 | 	traverseDir(hashes, duplicates, &dupeSize, entries, *dir)
117 | 
118 | 	fmt.Println("DUPLICATES")
119 | 	for key, val := range duplicates {
120 | 		fmt.Printf("key: %s, val: %s\n", key, val)
121 | 	}
122 | 	fmt.Println("TOTAL FILES:", len(hashes))
123 | 	fmt.Println("DUPLICATES:", len(duplicates))
124 | 	fmt.Println("TOTAL DUPLICATE SIZE:", toReadableSize(dupeSize))
125 | }
126 | 
127 | // running into problems of not being able to open directories inside .app folders
128 | ```
129 | 
130 | Going through the code via. the `main` method, we are parsing an input parameter `path`, and using this to read files from a directory. These files will be sent to the function `traverseDir`, in which we are also parsing two hash `map` objects `hashes` (all file hashes) and `duplicates` (all duplicate file hashes). Lastly, we are also inputting the `dupeSize` parameter, which will indicate the cummultative file size of our duplicate files. 
131 | 
132 | Finally, we print out our results in a 'human readable' format. Instead of presenting our results as byte count, we will convert them to the appropriate size unit (KB, MB, GB etc.).
133 | 
134 | ## Refactoring
135 | 
136 | ### Refactoring `toReadableSize`
137 | 
138 | First, we are going to be picking the low-hanging-fruits. The function `toReadableSize` looks pretty ugly. Firstly, we are using multiples of `1000`. For everyone who knows what this number represents, it makes sense, however, for anyone reading the code for the first time, this number is rather ambiguous. Therefore, we will establish some global constants for the different values of the sizes that we are returning (GB, MB etc.). We use this when determining the readable size of `nbytes`, and change the if statement blocks into switch statements. As you might have noticed, we are only returning integers, where it would make more sense to return floats:
139 | 
140 | ```go
141 | const (
142 | 	TB = GB * 1000.0
143 | 	GB = MB * 1000.0
144 | 	MB = KB * 1000.0
145 | 	KB = 1000.0
146 | )
147 | 
148 | 
149 | func ToReadableSize(nbytes int64) string {
150 | 	switch {
151 | 	case nbytes > TB:
152 | 		return strconv.FormatFloat(float64(nbytes)/TB, 'f', 2, 64) + " TB"
153 | 	case nbytes > GB:
154 | 		return strconv.FormatFloat(float64(nbytes)/GB, 'f', 2, 64) + " GB"
155 | 	case nbytes > MB:
156 | 		return strconv.FormatFloat(float64(nbytes)/MB, 'f', 2, 64) + " MB"
157 | 	case nbytes > KB:
158 | 		return strconv.FormatFloat(float64(nbytes)/KB, 'f', 2, 64) + " KB"
159 | 	}
160 | 	return strconv.FormatFloat(float64(nbytes), 'f', 2, 64) + " B"
161 | }
162 | ```
163 | 
164 | However, this is still very ugly and just as (if not more unreadable) than before. There is a lot of code duplication here, which we should get rid of. So let's make our own `toFloatString` function:
165 | 
166 | ```go
167 | func toFloatString(nbytes int64, divider float64) string {
168 | 	return strconv.FormatFloat(float64(nbytes)/divider, 'f', 2, 64)
169 | }
170 | 
171 | func ToReadableSize(nbytes int64) string {
172 | 	switch {
173 | 	case nbytes > TB:
174 | 		return toFloatString(nbytes, TB) + " TB"
175 | 	case nbytes > GB:
176 | 		return toFloatString(nbytes, GB) + " GB"
177 | 	case nbytes > MB:
178 | 		return toFloatString(nbytes, MB) + " MB"
179 | 	case nbytes > KB:
180 | 		return toFloatString(nbytes, KB) + " KB"
181 | 	}
182 | 	return strconv.FormatInt(nbytes, 10) + " B"
183 | }
184 | ```
185 | 
186 | Now, our function is nice and readable again. This refactor obviously isn't game changing, but it's a good example to start off with. The intention of this function is now much clearer, with very little effort.
187 | 
188 | ### Refactoring `traverseDir`
189 | 
190 | Ok, let's go to the more interesting function, `traverseDir`. Why do we want to refactor this function? A good way to think about this, is to think of how you would describe this function in pseudo code and then compare it to your actual code. I'm thinking that this function could be reduced to the following pseudo-code.
191 | 
192 | ```
193 | traverseDir:
194 |     for each entry in directory:
195 |         if dir:
196 |             return traverseDir
197 |         if file:
198 |             check file is duplicate
199 | ```
200 | 
201 | That is a lot less lines than what we have now... and definitely more readable than what we have now. Pseudo code is a pretty good way to establish a 'goal' for what your clean code should look like. At the very least, you should aim to make your actual code as readable as pseudo code. We can do this, by moving code into functions with descriptive names. This however, is an iterative process. We will start small and bit by bit, we will find a solution as to how to make our code simple and readable.
202 | 
203 | So let's look for code, which we can move out of this function... Something that is nice about golang's, otherwise very criticised, error handling system, is that it's quite easy to spot when there is potential for refactoring. Whenever you see two `if err != nil` statements in the same function, you know you can split this out to a single function. In our case, this:
204 | 
205 | ```go
206 | func traverseDir(...)
207 | 	...
208 | 	file, err := ioutil.ReadFile(fullpath)
209 | 	if err != nil {
210 | 		panic(err)
211 | 	}
212 | 	hash := sha1.New()
213 | 	if _, err := hash.Write(file); err != nil {
214 | 		panic(err)
215 | 	}
216 | 	hashSum := hash.Sum(nil)
217 | 	hashString := fmt.Sprintf("%x", hashSum)
218 | 	...
219 | }
220 | ```
221 | 
222 | Can be refactored to the following:
223 | 
224 | ```go
225 | func traverseDir(...) {
226 |     ...
227 |     hash, err := newFileHash(fullpath)
228 |     if err != nil {
229 |         panic(err)
230 |     }
231 |     ...
232 | }
233 | 
234 | func newFileHash(path string) (string, error) {
235 | 	file, err := ioutil.ReadFile(path)
236 | 	if err != nil {
237 | 		return "", err
238 | 	}
239 | 	hash := sha1.New()
240 | 	if _, err := hash.Write(file); err != nil {
241 | 		return "", err
242 | 	}
243 | 	hashSum := hash.Sum(nil)
244 | 	return fmt.Sprintf("%x", hashSum), nil
245 | }
246 | ```
247 | 
248 | The justification behind this, is that when reading our `traverseDir` we aren't immediately concerned with how we are creating a new file hash (sum). We just need to know that we are creating a new file hash. If we want to dig into the details of this, then we can by looking at our `newFileHash`. In other words, we are removing unecessary clutter from the function, improving readability.
249 | 
250 | Looking for more low-hanging fruites, we are still panicking in the case of an error, this is pretty dirty, so let's clean it up a little, by making `traverseDir` return an `error`, by adding `error` to the end of the function delcaration and replacing `panic(err)` with `return err`:
251 | 
252 | ```go 
253 | func traverseDir(hashes, duplicates map[string]string, dupeSize *int64, entries []os.FileInfo, directory string) error {
254 |     ...
255 |     return err
256 |     ...
257 |     return nil
258 | }
259 | ```
260 | 
261 | Now, looking at the function signature, we can see that it's a bit... long? We are expecting five input parameters. Not only does this make our function signature super long, it can also makes it very confusing to read on invokation. Consider the following code (taken from the golang rabbitmq tutorial):
262 | 
263 | ```go
264 | q, err := ch.QueueDeclare("hello",false,false,false,false,nil)
265 | ```
266 | There is absolutely no chance of understanding what this means. We know that we are declaring a queue, but all the boolean inputs... well, they could be anything? So, we have to either look at the source code or look at the documentation. This is tedious and slows down development speed and increases the risk of mistakes. A good rule of thumb is to have two input parameters (three at most), to try to avoid this type of confusion.
267 | 
268 | Generally, if there are more input parameters it is recommended to extract a type (creating a new type, which will be used as the input parameters). As an example:
269 | 
270 | ```go
271 | type QueueOptions struct {
272 |   Name 	string
273 |   Durable bool
274 |   DeleteWhenUsed bool
275 |   Exclusive bool
276 |   NoWait bool
277 |   Arguments interface{}
278 | }
279 | ```
280 | 
281 | Now, our declaration of our queue, could look something like the following:
282 | 
283 | ```go
284 | q, err := ch.NewQueue(QueueOptions{
285 | 	Name: "hello",
286 | 	Durable: true,
287 | 	DeleteWhenUsed: false,
288 | 	Exclusive: false,
289 | 	NoWait: false,
290 | 	Arguments: nil,
291 | })
292 | ```
293 | 
294 | Now there is, at the very least, less confusion as to what kind of queue that we are declaring. We can very easily identify that our queue name is `hello` and is a `durable` queue. Another way to go about this, is to create a wrapper function, which explains the type of queue we are creating. This is preferable, when you have no control over the code, such as when using a library:
295 | 
296 | ```go
297 | func DeclareDurableQueue() (ch.Queue, error) {
298 | 	return ch.QueueDeclare("hello", true, false, false, false, nil)
299 | }
300 | ```
301 | 
302 | So, how do we go about solving this issue for our `traverseDir`? We need all the values, which is why we are passing them to the function. However, when we see this kind of pattern, it's usually a sign, that we need to extract a `type`. So, let's make a new `type`, which holds the paremeters that we need:
303 | 
304 | Here we create `DuplicateIndex` for keeping track of our hashes and duplicates:
305 | 
306 | ```go 
307 | type DuplicateIndex struct {
308 | 	hashes     map[string]string
309 | 	duplicates map[string]string
310 | 	dupeSize   int64
311 | }
312 | 
313 | func NewDuplicateIndex() *DuplicateIndex {
314 | 	return &DuplicateIndex{
315 | 		hashes:     map[string]string{},
316 | 		duplicates: map[string]string{},
317 | 	}
318 | }
319 | 
320 | func (index *DuplicateIndex) AddEntry(hash, path string, size int64) {
321 | 	if entry, ok := index.hashes[hash]; ok {
322 | 		index.duplicates[entry] = path
323 | 		index.dupeSize += size
324 | 		return
325 | 	}
326 |     index.hashes[hash] = path
327 | }
328 | ```
329 | 
330 | With this, we can actually replace `hashes`, `duplicates` and `dupeSize` from our function parameters and also replace our insert of the hash:
331 | 
332 | ```go
333 | func traverseDir(index *DuplicateIndex, entries []os.FileInfo, directory string) {
334 |     ...
335 |     index.AddEntry(hash, fullpath, entry.Size())
336 |     ...
337 | }
338 | ```
339 | 
340 | But ah! We can actually make this a method on the `DuplicateIndex` `type` and that way, we now only have two input parameters :clap: We can also move the reading of the directory out of the for loop, and just accept a single parameter `path`. So now our method looks like this:
341 | 
342 | ```go
343 | func (index *DuplicateIndex) TraverseDirRecursively(directory string) error {
344 | 	entries, err := ioutil.ReadDir(directory)
345 | 	if err != nil {
346 | 		return err
347 | 	}
348 | 	for _, entry := range entries {
349 | 		fullpath := (path.Join(directory, entry.Name()))
350 | 
351 | 		if entry.IsDir() {
352 | 			index.TraverseDirRecursively(fullpath)
353 | 			continue
354 | 		}
355 | 		if !entry.Mode().IsRegular() {
356 | 			continue
357 | 		}
358 | 
359 | 		hash, err := newFileHash(fullpath)
360 | 		if err != nil {
361 | 			return err
362 | 		}
363 | 		index.AddEntry(hash, fullpath, entry.Size())
364 | 	}
365 | 	return nil
366 | }
367 | ```
368 | 
369 | > Notice that we have also renamed our function name for clarity
370 | 
371 | Now we are almost happy. However, the for lopp in `TraverseDirRecursively` is still smelling a little... what should we do? Well, one way we can get rid of the code smell, is to get rid of the `if` statements inside, by creating an `interface` together with a factory-like constructor. This means we will return the appropriate type determined by the input of the constructor. This returned type will implement an `interface`, which implements a single function: `Handle`. This function will perform the appropriate action associated with the type. Let's see what this looks like in action.
372 | 
373 | We will need quite a lot of code, but please don't let that scare you off!
374 | 
375 | ```go
376 | type EntryHandler interface {
377 | 	Handle(*DuplicateIndex) error
378 | }
379 | 
380 | type DirEntry struct {
381 | 	fullpath string
382 | }
383 | 
384 | type FileEntry struct {
385 | 	fullpath string
386 | 	size     int64
387 | }
388 | 
389 | type NilEntry struct{}
390 | 
391 | func NewEntryHandler(entry os.FileInfo, directory string) EntryHandler {
392 | 	fullpath := path.Join(directory, entry.Name())
393 | 	if entry.Mode().IsDir() {
394 | 		return &DirEntry{fullpath}
395 | 	}
396 | 	if entry.Mode().IsRegular() {
397 | 		return &FileEntry{fullpath, entry.Size()}
398 | 	}
399 | 	return &NilEntry{}
400 | }
401 | 
402 | func (entry *DirEntry) Handle(index *DuplicateIndex) error {
403 | 	return index.TraverseDirRecursively(entry.fullpath)
404 | }
405 | 
406 | func (entry *FileEntry) Handle(index *DuplicateIndex) error {
407 | 	hash, err := newFileHash(entry.fullpath)
408 | 	if err != nil {
409 | 		return err
410 | 	}
411 | 	index.AddEntry(hash, entry.fullpath, entry.size)
412 | 	return nil
413 | }
414 | 
415 | func (entry *NilEntry) Handle(index *DuplicateIndex) error {
416 | 	return nil
417 | }
418 | ```
419 | 
420 | With these types, our `TraverseDirRecursively` can now be refactored to the following:
421 | 
422 | ```go
423 | func (index *DuplicateIndex) TraverseDirRecursively(directory string) error {
424 | 	entries, err := ioutil.ReadDir(directory)
425 | 	if err != nil {
426 | 		return err
427 | 	}
428 | 	for _, entry := range entries {
429 | 		if err := NewEntryHandler(entry, directory).Handle(index); err != nil {
430 | 			return err
431 | 		}
432 | 	}
433 | 	return nil
434 | }
435 | ```
436 | 
437 | It may seem extensive, to add almost 40 lines of code just to remove 14. However, there is a reason behind the madness. When looking at the `TraverseDirRecursively` function, it is now only 9 lines of code. This is very easily digestable by the brain, whereas 23 lines might be hard to contain at first. The big gain though, is that we are isolating code, we can test all of our functions very easily and understand exactly what they do, with very little effort. Another great advantage of this isolation, is that we are also making our `TraverseDirRecursively` more dynamic. If we find out, that there is a new type of entry that we need to handle (Shortcut for example), we can just add a new type implementing `EntryHandler` and add it to our mini-factory `NewEntryHandler`. We are now <b>only</b> changing the logic of `NewEntryHandler` as every other code addition, is completely separate. The obvious advantage of this is, it makes it easier to implement new code, without it breaking the rest of our code. We like this :thumbs_up:
438 | 
439 | We can also attach the `newFileHash` function to our `FileEntry` `type` and remove the `path` input parameter. We can also rename this function. Since it's attached to our `FileEntry`, there is no need to specify that we are creating a `FileHash`:
440 | 
441 | ```go
442 | func (entry *FileEntry) newHash() (string, error) {
443 | 	file, err := ioutil.ReadFile(entry.fullpath)
444 | 	if err != nil {
445 | 		return "", err
446 | 	}
447 | 	hash := sha1.New()
448 | 	if _, err := hash.Write(file); err != nil {
449 | 		return "", err
450 | 	}
451 | 	return fmt.Sprintf("%x", hash.Sum(nil)), nil
452 | }
453 | ```
454 | 
455 | ### Refactoring `main`
456 | 
457 | So, now we are pretty much all refactored on the functions of the program. All we need to do now, is to refactor the main function. 
458 | 
459 | Firstly, I don't like the `var err error` that has to go! Whenever we see this, it's a sign that we are doing something wrong (in my opinion :)) Normally, this indicates that we should move our code into a new function, but in this case, we can actually just move the logic around a little...
460 | 
461 | The last thing we are going to do, is that we will create a `Result()` function on the `DuplicateIndex`, which will return a similar string to what we are printing now:
462 | 
463 | ```go
464 | func (index *DuplicateIndex) Result() string {
465 | 	buf := &bytes.Buffer{}
466 | 	buf.WriteString("DUPLICATES\n")
467 | 	for key, val := range index.duplicates {
468 | 		buf.WriteString(
469 | 			fmt.Sprintf("key: %s, val: %s\n", key, val),
470 | 		)
471 | 	}
472 | 	buf.WriteString(fmt.Sprintln("TOTAL FILES:", len(index.hashes)))
473 | 	buf.WriteString(fmt.Sprintln("DUPLICATES:", len(index.duplicates)))
474 | 	buf.WriteString(fmt.Sprintln("TOTAL DUPLICATE SIZE:", toReadableSize(index.dupeSize)))
475 | 	return buf.String()
476 | }
477 | ```
478 | 
479 | this makes our final `main` function, look like this:
480 | 
481 | ```go
482 | func main() {
483 | 	defaultPath, err := os.Getwd()
484 | 	if err != nil {
485 | 		panic(err)
486 | 	}
487 | 
488 | 	dir := flag.String("path", defaultPath, "the path to traverse searching for duplicates")
489 | 	flag.Parse()
490 | 
491 | 	index := NewDuplicateIndex()
492 | 	if err := index.TraverseDirRecursively(*dir); err != nil {
493 | 		panic(err)
494 | 	}
495 | 
496 | 	fmt.Println(index.Result())
497 | }
498 | ```
499 | 
500 | We could do some more refactoring, but for this short article, I think this is a good point to stop. Of course, in actual code, we would separate these functions into packages, to separate / isolate the responsibility of the code. However, again for the brevity of this article, I have decided to omit this refactoring step. You can, however, see how I decided to do this in the source code. 
501 | 
502 | Now, let's sum up the result of our code refactor:
503 | * Our code is now easy to implement for other developers. 
504 | * It's much easier to read than before. We can skim the code to begin with, and then go into detail on the parts that we wish to. There is less ambiguous / vague code, making everything generally easier to comprehend.
505 | * Our code is super easy to test. This makes further development a lot easier and decreases the chances for bugs, for this very reason.
506 | 
507 | As mentioned to begin with 'clean code' is not necessarily super well defined and sometimes comes down to subjective opinion on what is 'more readable' or 'nicer looking'. However, I hope this article gave some insight as to why it's important to refactor your code, as well as how easy it actually is!
508 | 
509 | Let me know if you have any feedback or questions on this articles content, by sending me an e-mail at lasse@jakobsen.dev thanks! :wave:
510 | 


--------------------------------------------------------------------------------
/clean/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pungyeon/clean-go/7b1861fa92fc9cd84f5e4c649f1b71190ce1f14e/clean/.DS_Store


--------------------------------------------------------------------------------
/clean/duplicates/entries.go:
--------------------------------------------------------------------------------
 1 | package duplicates
 2 | 
 3 | import (
 4 | 	"crypto/sha1"
 5 | 	"fmt"
 6 | 	"io/ioutil"
 7 | 	"os"
 8 | 	"path"
 9 | )
10 | 
11 | type EntryHandler interface {
12 | 	Handle(*DuplicateIndex) error
13 | }
14 | 
15 | type DirEntry struct {
16 | 	fullpath string
17 | }
18 | 
19 | type FileEntry struct {
20 | 	fullpath string
21 | 	size     int64
22 | }
23 | 
24 | type NilEntry struct{}
25 | 
26 | func NewEntryHandler(entry os.FileInfo, directory string) EntryHandler {
27 | 	fullpath := path.Join(directory, entry.Name())
28 | 	if entry.Mode().IsDir() {
29 | 		return &DirEntry{fullpath}
30 | 	}
31 | 	if entry.Mode().IsRegular() {
32 | 		return &FileEntry{fullpath, entry.Size()}
33 | 	}
34 | 	return &NilEntry{}
35 | }
36 | 
37 | func (entry *DirEntry) Handle(index *DuplicateIndex) error {
38 | 	return index.TraverseDirRecursively(entry.fullpath)
39 | }
40 | 
41 | func (entry *FileEntry) Handle(index *DuplicateIndex) error {
42 | 	hash, err := entry.newHash()
43 | 	if err != nil {
44 | 		return err
45 | 	}
46 | 	index.AddEntry(hash, entry.fullpath, entry.size)
47 | 	return nil
48 | }
49 | 
50 | func (entry *FileEntry) newHash() (string, error) {
51 | 	file, err := ioutil.ReadFile(entry.fullpath)
52 | 	if err != nil {
53 | 		return "", err
54 | 	}
55 | 	hash := sha1.New()
56 | 	if _, err := hash.Write(file); err != nil {
57 | 		return "", err
58 | 	}
59 | 	return fmt.Sprintf("%x", hash.Sum(nil)), nil
60 | }
61 | 
62 | func (entry *NilEntry) Handle(index *DuplicateIndex) error {
63 | 	return nil
64 | }
65 | 


--------------------------------------------------------------------------------
/clean/duplicates/entries_test.go:
--------------------------------------------------------------------------------
 1 | package duplicates
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | 	"testing"
 6 | )
 7 | 
 8 | const (
 9 | 	filehash = "2123251bdbfbb162fcd77b74f4954726461e8093"
10 | )
11 | 
12 | func TestFileEntry(t *testing.T) {
13 | 	tt := []struct {
14 | 		name        string
15 | 		fullpath    string
16 | 		size        int64
17 | 		expectError bool
18 | 	}{
19 | 		{"handle existing file", "../testdata/text.txt", 100, false},
20 | 		{"handle non existing file", "../testdata/does_not_exist.txt", 100, true},
21 | 	}
22 | 
23 | 	for _, tc := range tt {
24 | 		t.Run(tc.name, func(t *testing.T) {
25 | 			fileEntry := FileEntry{
26 | 				fullpath: tc.fullpath,
27 | 				size:     tc.size,
28 | 			}
29 | 			err := fileEntry.Handle(NewDuplicateIndex())
30 | 			if err != nil && tc.expectError == false {
31 | 				t.Errorf("expected error: %v, actual error: %v", err, tc.expectError)
32 | 			}
33 | 		})
34 | 	}
35 | }
36 | 
37 | func TestFileHash(t *testing.T) {
38 | 	fileEntry := FileEntry{
39 | 		fullpath: "../testdata/text.txt",
40 | 		size:     100,
41 | 	}
42 | 
43 | 	hash, err := fileEntry.newHash()
44 | 	if err != nil {
45 | 		t.Error(err)
46 | 	}
47 | 
48 | 	if hash != filehash {
49 | 		t.Error(hash)
50 | 	}
51 | }
52 | 
53 | func TestNilEntry(t *testing.T) {
54 | 	nilEntry := NilEntry{}
55 | 
56 | 	result := nilEntry.Handle(&DuplicateIndex{})
57 | 	if result != nil {
58 | 		t.Error("nil entry should always return nil on handle, but instead return: " + result.Error())
59 | 	}
60 | }
61 | 
62 | func TestEntryHandlers(t *testing.T) {
63 | 	entries, err := ioutil.ReadDir("../testdata")
64 | 	if err != nil {
65 | 		t.Fatal(err)
66 | 	}
67 | 	index := NewDuplicateIndex()
68 | 
69 | 	for _, entry := range entries {
70 | 		if err := NewEntryHandler(entry, "../testdata").Handle(index); err != nil {
71 | 			t.Error(err)
72 | 		}
73 | 	}
74 | }
75 | 


--------------------------------------------------------------------------------
/clean/duplicates/index.go:
--------------------------------------------------------------------------------
 1 | package duplicates
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"fmt"
 6 | 	"io/ioutil"
 7 | 
 8 | 	"github.com/Pungyeon/clean-go-code/clean/utils"
 9 | )
10 | 
11 | type DuplicateIndex struct {
12 | 	hashes     map[string]string
13 | 	duplicates map[string]string
14 | 	dupeSize   int64
15 | }
16 | 
17 | func NewDuplicateIndex() *DuplicateIndex {
18 | 	return &DuplicateIndex{
19 | 		hashes:     map[string]string{},
20 | 		duplicates: map[string]string{},
21 | 	}
22 | }
23 | 
24 | func (index *DuplicateIndex) AddEntry(hash, path string, size int64) {
25 | 	if entry, ok := index.hashes[hash]; ok {
26 | 		index.duplicates[entry] = path
27 | 		index.dupeSize += size
28 | 		return
29 | 	}
30 | 	index.hashes[hash] = path
31 | }
32 | 
33 | func (index *DuplicateIndex) TraverseDirRecursively(directory string) error {
34 | 	entries, err := ioutil.ReadDir(directory)
35 | 	if err != nil {
36 | 		return err
37 | 	}
38 | 	for _, entry := range entries {
39 | 		if err := NewEntryHandler(entry, directory).Handle(index); err != nil {
40 | 			return err
41 | 		}
42 | 	}
43 | 	return nil
44 | }
45 | 
46 | func (index *DuplicateIndex) Result() string {
47 | 	buf := &bytes.Buffer{}
48 | 	buf.WriteString("DUPLICATES\n")
49 | 	for key, val := range index.duplicates {
50 | 		buf.WriteString(
51 | 			fmt.Sprintf("key: %s, val: %s\n", key, val),
52 | 		)
53 | 	}
54 | 	buf.WriteString(fmt.Sprintln("TOTAL FILES:", len(index.hashes)))
55 | 	buf.WriteString(fmt.Sprintln("DUPLICATES:", len(index.duplicates)))
56 | 	buf.WriteString(fmt.Sprintln("TOTAL DUPLICATE SIZE:", utils.ToReadableSize(index.dupeSize)))
57 | 	return buf.String()
58 | }
59 | 


--------------------------------------------------------------------------------
/clean/duplicates/index_test.go:
--------------------------------------------------------------------------------
 1 | package duplicates
 2 | 
 3 | import "testing"
 4 | 
 5 | const (
 6 | 	result = `DUPLICATES
 7 | key: ../testdata/copy.txt, val: ../testdata/text.txt
 8 | TOTAL FILES: 2
 9 | DUPLICATES: 1
10 | TOTAL DUPLICATE SIZE: 41 B
11 | `
12 | )
13 | 
14 | func TestTraverseDir(t *testing.T) {
15 | 	tt := []struct {
16 | 		name        string
17 | 		directory   string
18 | 		expectError bool
19 | 	}{
20 | 		{"traverse existing directory", "../testdata", false},
21 | 		{"traverse non-existing directory", "../does_not_exist", true},
22 | 	}
23 | 
24 | 	for _, tc := range tt {
25 | 		t.Run(tc.name, func(t *testing.T) {
26 | 			err := NewDuplicateIndex().TraverseDirRecursively(tc.directory)
27 | 			if err != nil && tc.expectError == false {
28 | 				t.Error(err)
29 | 			}
30 | 		})
31 | 	}
32 | }
33 | 
34 | func TestTraverseDirResult(t *testing.T) {
35 | 	index := NewDuplicateIndex()
36 | 	if err := index.TraverseDirRecursively("../testdata"); err != nil {
37 | 		t.Error(err)
38 | 	}
39 | 	if index.Result() != result {
40 | 		t.Error("unexpected result")
41 | 		t.Error(index.Result())
42 | 	}
43 | }
44 | 


--------------------------------------------------------------------------------
/clean/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"os"
 7 | 
 8 | 	"github.com/Pungyeon/clean-go-code/clean/duplicates"
 9 | )
10 | 
11 | func main() {
12 | 	defaultPath, err := os.Getwd()
13 | 	if err != nil {
14 | 		panic(err)
15 | 	}
16 | 
17 | 	dir := flag.String("path", defaultPath, "the path to traverse searching for duplicates")
18 | 	flag.Parse()
19 | 
20 | 	index := duplicates.NewDuplicateIndex()
21 | 	if err := index.TraverseDirRecursively(*dir); err != nil {
22 | 		panic(err)
23 | 	}
24 | 
25 | 	fmt.Println(index.Result())
26 | }
27 | 


--------------------------------------------------------------------------------
/clean/testdata/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/Pungyeon/clean-go/7b1861fa92fc9cd84f5e4c649f1b71190ce1f14e/clean/testdata/.DS_Store


--------------------------------------------------------------------------------
/clean/testdata/copy.txt:
--------------------------------------------------------------------------------
1 | i am a test file! Don't worry about me :)


--------------------------------------------------------------------------------
/clean/testdata/text.txt:
--------------------------------------------------------------------------------
1 | i am a test file! Don't worry about me :)


--------------------------------------------------------------------------------
/clean/utils/utils.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import "strconv"
 4 | 
 5 | const (
 6 | 	TB = GB * 1000.0
 7 | 	GB = MB * 1000.0
 8 | 	MB = KB * 1000.0
 9 | 	KB = 1000.0
10 | )
11 | 
12 | func toFloatString(nbytes int64, divider float64) string {
13 | 	return strconv.FormatFloat(float64(nbytes)/divider, 'f', 2, 64)
14 | }
15 | 
16 | func ToReadableSize(nbytes int64) string {
17 | 	switch {
18 | 	case nbytes > TB:
19 | 		return toFloatString(nbytes, TB) + " TB"
20 | 	case nbytes > GB:
21 | 		return toFloatString(nbytes, GB) + " GB"
22 | 	case nbytes > MB:
23 | 		return toFloatString(nbytes, MB) + " MB"
24 | 	case nbytes > KB:
25 | 		return toFloatString(nbytes, KB) + " KB"
26 | 	}
27 | 	return strconv.FormatInt(nbytes, 10) + " B"
28 | }
29 | 


--------------------------------------------------------------------------------
/clean/utils/utils_test.go:
--------------------------------------------------------------------------------
 1 | package utils
 2 | 
 3 | import "testing"
 4 | 
 5 | func TestToReadableSize(t *testing.T) {
 6 | 	tt := []struct {
 7 | 		name     string
 8 | 		input    int64
 9 | 		expected string
10 | 	}{
11 | 		{"byte return", 125, "125 B"},
12 | 		{"kilobyte return", 1010, "1.01 KB"},
13 | 		{"megabyte return", 1988909, "1.99 MB"},
14 | 		{"gigabyte return", 29121988909, "29.12 GB"},
15 | 		{"gigabyte return", 890929121988909, "890.93 TB"},
16 | 	}
17 | 
18 | 	for _, tc := range tt {
19 | 		t.Run(tc.name, func(t *testing.T) {
20 | 			output := ToReadableSize(tc.input)
21 | 			if output != tc.expected {
22 | 				t.Errorf("input %d, unexpected output: %s", tc.input, output)
23 | 			}
24 | 		})
25 | 	}
26 | }
27 | 


--------------------------------------------------------------------------------
/spaghet/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"crypto/sha1"
 5 | 	"flag"
 6 | 	"fmt"
 7 | 	"io/ioutil"
 8 | 	"os"
 9 | 	"path"
10 | 	"strconv"
11 | 	"sync/atomic"
12 | )
13 | 
14 | func traverseDir(hashes, duplicates map[string]string, dupeSize *int64, entries []os.FileInfo, directory string) {
15 | 	for _, entry := range entries {
16 | 		fullpath := (path.Join(directory, entry.Name()))
17 | 
18 | 		if !entry.Mode().IsDir() && !entry.Mode().IsRegular() {
19 | 			continue
20 | 		}
21 | 
22 | 		if entry.IsDir() {
23 | 			dirFiles, err := ioutil.ReadDir(fullpath)
24 | 			if err != nil {
25 | 				panic(err)
26 | 			}
27 | 			traverseDir(hashes, duplicates, dupeSize, dirFiles, fullpath)
28 | 			continue
29 | 		}
30 | 		file, err := ioutil.ReadFile(fullpath)
31 | 		if err != nil {
32 | 			panic(err)
33 | 		}
34 | 		hash := sha1.New()
35 | 		if _, err := hash.Write(file); err != nil {
36 | 			panic(err)
37 | 		}
38 | 		hashSum := hash.Sum(nil)
39 | 		hashString := fmt.Sprintf("%x", hashSum)
40 | 		if hashEntry, ok := hashes[hashString]; ok {
41 | 			duplicates[hashEntry] = fullpath
42 | 			atomic.AddInt64(dupeSize, entry.Size())
43 | 		} else {
44 | 			hashes[hashString] = fullpath
45 | 		}
46 | 	}
47 | }
48 | 
49 | func toReadableSize(nbytes int64) string {
50 | 	if nbytes > 1000*1000*1000*1000 {
51 | 		return strconv.FormatInt(nbytes/(1000*1000*1000*1000), 10) + " TB"
52 | 	}
53 | 	if nbytes > 1000*1000*1000 {
54 | 		return strconv.FormatInt(nbytes/(1000*1000*1000), 10) + " GB"
55 | 	}
56 | 	if nbytes > 1000*1000 {
57 | 		return strconv.FormatInt(nbytes/(1000*1000), 10) + " MB"
58 | 	}
59 | 	if nbytes > 1000 {
60 | 		return strconv.FormatInt(nbytes/1000, 10) + " KB"
61 | 	}
62 | 	return strconv.FormatInt(nbytes, 10) + " B"
63 | }
64 | 
65 | func main() {
66 | 	var err error
67 | 	dir := flag.String("path", "", "the path to traverse searching for duplicates")
68 | 	flag.Parse()
69 | 
70 | 	if *dir == "" {
71 | 		*dir, err = os.Getwd()
72 | 		if err != nil {
73 | 			panic(err)
74 | 		}
75 | 	}
76 | 
77 | 	hashes := map[string]string{}
78 | 	duplicates := map[string]string{}
79 | 	var dupeSize int64
80 | 
81 | 	entries, err := ioutil.ReadDir(*dir)
82 | 	if err != nil {
83 | 		panic(err)
84 | 	}
85 | 
86 | 	traverseDir(hashes, duplicates, &dupeSize, entries, *dir)
87 | 
88 | 	fmt.Println("DUPLICATES")
89 | 	for key, val := range duplicates {
90 | 		fmt.Printf("key: %s, val: %s\n", key, val)
91 | 	}
92 | 	fmt.Println("TOTAL FILES:", len(hashes))
93 | 	fmt.Println("DUPLICATES:", len(duplicates))
94 | 	fmt.Println("TOTAL DUPLICATE SIZE:", toReadableSize(dupeSize))
95 | }
96 | 
97 | // running into problems of not being able to open directories inside .app folders
98 | 


--------------------------------------------------------------------------------