├── .gitignore ├── .travis.yml ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── anonymisations.go ├── anonymisations_test.go ├── config.go ├── config_defaults_test.json ├── config_invalid_test.json ├── config_test.go ├── config_test.json ├── icon.svg ├── main.go └── main_test.go /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | anon 3 | target 4 | *.exe 5 | *.exe~ 6 | *.dll 7 | *.so 8 | *.dylib 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | --- 2 | # Modified version of the Travis CI config for the go-cmp project: 3 | # https://github.com/google/go-cmp/blob/master/.travis.yml 4 | sudo: false 5 | language: go 6 | 7 | # Build for MacOS and Linux. 8 | os: 9 | - linux 10 | - osx 11 | 12 | go: 1.10.x 13 | 14 | script: 15 | - diff -u <(echo -n) <(gofmt -d .) # Catch any gofmt errors. 16 | - go build -o target/anon-$TRAVIS_OS_NAME # Catch any compile errors first. 17 | - go test -v -race -coverprofile=coverage.txt -covermode=atomic ./... # Run the tests with coverage. 18 | 19 | after_success: 20 | - bash <(curl -s https://codecov.io/bash) 21 | 22 | # On a tag, we will publish the binary produced to the GitHub release. 23 | deploy: 24 | provider: releases 25 | api_key: 26 | secure: "ezDOGZRKLTK4kbdm3cMJg/PbJw7Jx3XwtOwhppNEE5VR7mc3gAZVae3S6zydTnDICdpPJgCBzyMej2lmfLqCi6rfYIROlZw0IOKQm+V9E01/WTACrlVCmXuoeQArb1Q0KUUqr8buEaLgrau4fay/StVfgk2tAjrF02GWk8vNu4IglahUqR5oRqkBmnZrJD5i0Y9vZRuUa0y7YWVBQMknQHxrGTS3SzCfLAAuIggigQt8AfkfC3iWDllQnH4ElIKpc1qv8dfVu2qsqHxwlWrFHHGFVSDAgX1dvgtmR38NY9j+fXiJQ7gvFJzTcyemZuB1w7HYjr7Zk0+9SB5nJV5pZDTeGXLyO547HqAGDe/d4L1uEXBtpkImopz4qekKRCeG/jcUE6iTp9ZmZOGuLpEOOInDRj4pNLyY5RwPBxC7Cfk4J2Lo6/FOdpxv0O+4FsoKkr/+cU2Zm4uf2V8L0c6OwVNbglQe/lDJBLBrR1KUc5OYM07IVSuOJRyR77EUb6BXvK/qF2t7C8s/+n93KqRMcLNTezWy2QDb7LBJ9g1PNi6alF+CU//vtHROYPlxU+QFX/rH8HMD3aIx/bEdWdM06OnzbLsYwBw4tKb1huabJVjpvWANsPwhIk5xGOGhRqywY5aJS2lcHnFl5lvfXh1Szuk8ZMLix6pzm6Qip24QizU=" 27 | file: target/anon-$TRAVIS_OS_NAME 28 | skip_cleanup: true 29 | on: 30 | tags: true 31 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as 6 | contributors and maintainers pledge to making participation in our project and 7 | our community a harassment-free experience for everyone, regardless of age, body 8 | size, disability, ethnicity, gender identity and expression, level of experience, 9 | education, socio-economic status, nationality, personal appearance, race, 10 | religion, or sexual identity and orientation. 11 | 12 | ## Our Standards 13 | 14 | Examples of behavior that contributes to creating a positive environment 15 | include: 16 | 17 | * Using welcoming and inclusive language 18 | * Being respectful of differing viewpoints and experiences 19 | * Gracefully accepting constructive criticism 20 | * Focusing on what is best for the community 21 | * Showing empathy towards other community members 22 | 23 | Examples of unacceptable behavior by participants include: 24 | 25 | * The use of sexualized language or imagery and unwelcome sexual attention or 26 | advances 27 | * Trolling, insulting/derogatory comments, and personal or political attacks 28 | * Public or private harassment 29 | * Publishing others' private information, such as a physical or electronic 30 | address, without explicit permission 31 | * Other conduct which could reasonably be considered inappropriate in a 32 | professional setting 33 | 34 | ## Our Responsibilities 35 | 36 | Project maintainers are responsible for clarifying the standards of acceptable 37 | behavior and are expected to take appropriate and fair corrective action in 38 | response to any instances of unacceptable behavior. 39 | 40 | Project maintainers have the right and responsibility to remove, edit, or 41 | reject comments, commits, code, wiki edits, issues, and other contributions 42 | that are not aligned to this Code of Conduct, or to ban temporarily or 43 | permanently any contributor for other behaviors that they deem inappropriate, 44 | threatening, offensive, or harmful. 45 | 46 | ## Scope 47 | 48 | This Code of Conduct applies both within project spaces and in public spaces 49 | when an individual is representing the project or its community. Examples of 50 | representing a project or community include using an official project e-mail 51 | address, posting via an official social media account, or acting as an appointed 52 | representative at an online or offline event. Representation of a project may be 53 | further defined and clarified by project maintainers. 54 | 55 | ## Enforcement 56 | 57 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 58 | reported by contacting the project team at albert.pastrana@intenthq.com or 59 | nathan.kleyn@intenthq.com. All complaints will be reviewed and investigated and 60 | will result in a response that is deemed necessary and appropriate to the 61 | circumstances. The project team is obligated to maintain confidentiality with 62 | regard to the reporter of an incident. Further details of specific enforcement 63 | policies may be posted separately. 64 | 65 | Project maintainers who do not follow or enforce the Code of Conduct in good 66 | faith may face temporary or permanent repercussions as determined by other 67 | members of the project's leadership. 68 | 69 | ## Attribution 70 | 71 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 72 | available at https://www.contributor-covenant.org/version/1/4/code-of-conduct.html 73 | 74 | [homepage]: https://www.contributor-covenant.org 75 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Anon 2 | 3 | ## Code of Conduct 4 | 5 | This project and everyone participating in it is governed by the [Anon Code of Conduct](CODE_OF_CONDUCT.md). 6 | By participating, you are expected to uphold this code. 7 | 8 | ## How Can I Contribute 9 | 10 | Any contribution is welcome, raise a bug (and fix it! :-)) request or add a new feature, add some documentation... 11 | Don't be shy and raise a pull request, anything on the following topics will be very welcome: 12 | - New actions to anonymise data 13 | - New input formats (JSON?) 14 | - Bug fixes 15 | 16 | You can also take a look at the [issues](https://github.com/intenthq/anon/issues) and pick the one you like better. 17 | 18 | If you are going to contribute, we ask you to do the following: 19 | - Use `gofmt` to format your code 20 | - Check your code with `go vet`, `gocyclo`, `golint` 21 | - Cover the logic with enough tests 22 | - Write decent commit messages 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 IntentHQ 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |

2 | 3 |

4 | 5 | # Anon — A UNIX Command To Anonymise Data 6 | [![Build Status](https://travis-ci.org/intenthq/anon.svg?branch=master)](https://travis-ci.org/intenthq/anon) 7 | 8 | [![Go Report Card](https://goreportcard.com/badge/github.com/intenthq/anon)](https://goreportcard.com/report/github.com/intenthq/anon) [![License](https://img.shields.io/npm/l/express.svg)](https://github.com/intenthq/anon/LICENSE) 9 | ![GitHub release](https://img.shields.io/github/release/intenthq/anon.svg) 10 | 11 | Anon is a tool for taking delimited files and anonymising or transforming columns until the output is useful for applications where sensitive information cannot be exposed. 12 | 13 | ## Installation 14 | 15 | Releases of Anon are available as pre-compiled static binaries [on the corresponding GitHub release](https://github.com/intenthq/anon/releases). Simply download the appropriate build for your machine and make sure it's in your `PATH` (or use it directly). 16 | 17 | ## Usage 18 | 19 | ```sh 20 | anon [--config ] 21 | [--output ] 22 | ``` 23 | 24 | Anon is designed to take input from `STDIN` and by default will output the anonymised file to `STDOUT`: 25 | 26 | ```sh 27 | anon < some_file.csv > some_file_anonymised.csv 28 | ``` 29 | 30 | ### Configuration 31 | 32 | In order to be useful, Anon needs to be told what you want to do to each column of the CSV. The config is defined as a JSON file (defaults to a file called `config.json` in the current directory): 33 | 34 | ```json5 35 | { 36 | "csv": { 37 | "delimiter": "," 38 | }, 39 | // Optionally define a number of rows to randomly sample down to. 40 | // To do it, it will hash (using FNV-1 32 bits) the column with the ID 41 | // in it and will mod the result by the value specified to decide if the 42 | // row is included or not -> include = hash(idColumn) % mod == 0 43 | "sampling": { 44 | // Number used to mod the hash of the id and determine if the row 45 | // has to be included in the sample or not 46 | "mod": 30000 47 | // Specify in which a column a unique ID exists on which the sampling can 48 | // be performed. Indices are 0 based, so this would sample on the first 49 | // column. 50 | "idColumn": 0 51 | }, 52 | // An array of actions to take on each column - indices are 0 based, so index 53 | // 0 in this array corresponds to column 1, and so on. 54 | // 55 | // There must be an action for every column in the CSV. 56 | "actions": [ 57 | { 58 | // The no-op, leaves the input unchanged. 59 | "name": "nothing" 60 | }, 61 | { 62 | // Takes a UK format postcode (eg. W1W 8BE) and just keeps the outcode 63 | // (eg. W1W). 64 | "name": "outcode" 65 | }, 66 | { 67 | // Hash (SHA1) the input. 68 | "name": "hash", 69 | // Optional salt that will be appened to the input. 70 | // If not defined, a random salt will be generated 71 | "salt": "salt" 72 | }, 73 | { 74 | // Given a date, just keep the year. 75 | "name": "year", 76 | "dateConfig": { 77 | // Define the format of the input date here. 78 | "format": "YYYYmmmdd" 79 | } 80 | }, 81 | { 82 | // Summarise a range of values. 83 | "name": "range", 84 | "rangeConfig": { 85 | "ranges": [ 86 | // For example, this will take values between 0 and 100, and convert 87 | // them to the string "0-100". 88 | // You can use one of (gt, gte) and (lt, lte) but not both at the 89 | // same time. 90 | // You also need to define at least one of (gt, gte, lt, lte). 91 | { 92 | "gte": 0, 93 | "lt": 100, 94 | "output": "0-100" 95 | } 96 | ] 97 | } 98 | } 99 | ] 100 | } 101 | ``` 102 | 103 | ## Contributing 104 | 105 | Any contribution will be welcome, please refer to our [contributing guidelines](CONTRIBUTING.md) for more information. 106 | 107 | ## License 108 | 109 | This project is [licensed under the MIT license](LICENSE). 110 | 111 | The icon is by [Pixel Perfect](https://www.flaticon.com/authors/pixel-perfect) from [Flaticon](https://www.flaticon.com/), and is licensed under a [Creative Commons 3.0 BY](http://creativecommons.org/licenses/by/3.0/) license. 112 | -------------------------------------------------------------------------------- /anonymisations.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "crypto/sha1" 5 | "errors" 6 | "fmt" 7 | "io" 8 | "math/rand" 9 | "strconv" 10 | "strings" 11 | "time" 12 | ) 13 | 14 | // Anonymisation is a function that transforms a string into another one 15 | type Anonymisation func(string) (string, error) 16 | 17 | // DateConfig stores the format (layout) of an input date 18 | type DateConfig struct { 19 | Format string 20 | } 21 | 22 | // RangeConfig stores configuration to define a range of values 23 | type RangeConfig struct { 24 | Gt *float64 25 | Gte *float64 26 | Lt *float64 27 | Lte *float64 28 | Output *string 29 | } 30 | 31 | // ActionConfig stores the config of an anonymisation action 32 | type ActionConfig struct { 33 | Name string 34 | Salt *string 35 | DateConfig DateConfig 36 | RangeConfig []RangeConfig 37 | } 38 | 39 | // Returns an array of anonymisations according to the config 40 | func anonymisations(configs *[]ActionConfig) ([]Anonymisation, error) { 41 | var err error 42 | res := make([]Anonymisation, len(*configs)) 43 | for i, config := range *configs { 44 | if res[i], err = config.create(); err != nil { 45 | return nil, err 46 | } 47 | } 48 | return res, nil 49 | } 50 | 51 | // Returns the configured salt or a random one 52 | // if it's not set. 53 | func (ac *ActionConfig) saltOrRandom() string { 54 | if ac.Salt != nil { 55 | return *ac.Salt 56 | } 57 | return strconv.Itoa(rand.Int()) 58 | } 59 | 60 | func (ac *ActionConfig) create() (Anonymisation, error) { 61 | switch ac.Name { 62 | case "nothing": 63 | return identity, nil 64 | case "outcode": 65 | return outcode, nil 66 | case "hash": 67 | return hash(ac.saltOrRandom()), nil 68 | case "year": 69 | return year(ac.DateConfig.Format) 70 | case "ranges": 71 | return ranges(ac.RangeConfig) 72 | } 73 | return nil, fmt.Errorf("can't create an action with name %s", ac.Name) 74 | } 75 | 76 | // The no-op, returns the input unchanged. 77 | func identity(s string) (string, error) { 78 | return s, nil 79 | } 80 | 81 | // Hashes (SHA1) the input. 82 | func hash(salt string) Anonymisation { 83 | return func(s string) (string, error) { 84 | h := sha1.New() 85 | io.WriteString(h, s) 86 | io.WriteString(h, salt) 87 | return fmt.Sprintf("%x", h.Sum(nil)), nil 88 | } 89 | } 90 | 91 | // Takes a UK format postcode (eg. W1W 8BE) and just keeps 92 | // the outcode (eg. W1W). 93 | // i.e. returns the prefix of the input until it finds a space 94 | func outcode(s string) (string, error) { 95 | return strings.Split(s, " ")[0], nil 96 | } 97 | 98 | // Given a date format/layout, it returns a function that 99 | // given a date in that format, just keeps the year. 100 | // If either the format is invalid or the year doesn't 101 | // match that format, it will return an error and 102 | // the input unchanged 103 | func year(format string) (Anonymisation, error) { 104 | if _, err := time.Parse(format, format); err != nil { 105 | return nil, err 106 | } 107 | return func(s string) (string, error) { 108 | t, err := time.Parse(format, s) 109 | if err != nil { 110 | return s, err 111 | } 112 | return strconv.Itoa(t.Year()), nil 113 | }, nil 114 | } 115 | 116 | // Given a list of ranges, it will summarise numeric 117 | // values into groups of values, each group defined 118 | // by a range and an output 119 | func ranges(ranges []RangeConfig) (Anonymisation, error) { 120 | for _, rc := range ranges { 121 | if rc.Gt != nil && rc.Gte != nil || rc.Lt != nil && rc.Lte != nil { 122 | return nil, errors.New("you can only specify one of (gt, gte) and (lt, lte)") 123 | } else if rc.Gt == nil && rc.Gte == nil && rc.Lt == nil && rc.Lte == nil { 124 | return nil, errors.New("you need to specify at least one of gt, gte, lt, lte") 125 | } else if rc.Output == nil { 126 | return nil, errors.New("you need to specify the output for a range") 127 | } 128 | } 129 | return func(s string) (string, error) { 130 | v, err := strconv.ParseFloat(s, 64) 131 | if err != nil { 132 | return s, err 133 | } 134 | for _, rang := range ranges { 135 | if rang.contains(v) { 136 | return *rang.Output, nil 137 | } 138 | } 139 | return s, errors.New("No range defined for value") 140 | }, nil 141 | } 142 | 143 | func (r *RangeConfig) contains(v float64) bool { 144 | return (r.Gt == nil && r.Gte == nil || r.Gt != nil && *r.Gt < v || r.Gte != nil && *r.Gte <= v) && 145 | (r.Lt == nil && r.Lte == nil || r.Lt != nil && *r.Lt > v || r.Lte != nil && *r.Lte >= v) 146 | } 147 | -------------------------------------------------------------------------------- /anonymisations_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "math/rand" 5 | "testing" 6 | 7 | "github.com/leanovate/gopter" 8 | "github.com/leanovate/gopter/gen" 9 | "github.com/leanovate/gopter/prop" 10 | "github.com/stretchr/testify/assert" 11 | "github.com/stretchr/testify/require" 12 | ) 13 | 14 | var salt = "jump" 15 | 16 | const seed = int64(1) 17 | 18 | //this is the first random salt with the seed above 19 | const firstSalt = "5577006791947779410" 20 | 21 | // can't test that the functions are equal because of https://github.com/stretchr/testify/issues/182 22 | // and https://github.com/stretchr/testify/issues/159#issuecomment-99557398 23 | // will have to test that the functions return the same 24 | func assertAnonymisationFunction(t *testing.T, expected Anonymisation, actual Anonymisation, value string) { 25 | require.NotNil(t, expected) 26 | require.NotNil(t, actual) 27 | expectedRes, expectedErr := expected(value) 28 | actualRes, actualErr := actual(value) 29 | assert.Equal(t, expectedRes, actualRes) 30 | assert.Equal(t, expectedErr, actualErr) 31 | } 32 | 33 | func TestAnonymisations(t *testing.T) { 34 | t.Run("a valid configuration", func(t *testing.T) { 35 | conf := &[]ActionConfig{ 36 | ActionConfig{ 37 | Name: "nothing", 38 | }, 39 | ActionConfig{ 40 | Name: "hash", 41 | Salt: &salt, 42 | }, 43 | } 44 | anons, err := anonymisations(conf) 45 | assert.NoError(t, err) 46 | assertAnonymisationFunction(t, identity, anons[0], "a") 47 | assertAnonymisationFunction(t, hash(salt), anons[1], "a") 48 | }) 49 | t.Run("an invalid configuration", func(t *testing.T) { 50 | conf := &[]ActionConfig{ActionConfig{Name: "year", DateConfig: DateConfig{Format: "3333"}}} 51 | anons, err := anonymisations(conf) 52 | assert.Error(t, err, "should return an error") 53 | assert.Nil(t, anons) 54 | }) 55 | } 56 | 57 | func TestActionConfigSaltOrRandom(t *testing.T) { 58 | t.Run("if salt is not specified", func(t *testing.T) { 59 | rand.Seed(seed) 60 | acNoSalt := ActionConfig{Name: "hash"} 61 | assert.Equal(t, firstSalt, acNoSalt.saltOrRandom(), "should return a random salt") 62 | }) 63 | t.Run("if salt is specified", func(t *testing.T) { 64 | emptySalt := "" 65 | acEmptySalt := ActionConfig{Name: "hash", Salt: &emptySalt} 66 | assert.Empty(t, acEmptySalt.saltOrRandom(), "should return the empty salt if empty") 67 | 68 | acSalt := ActionConfig{Name: "hash", Salt: &salt} 69 | assert.Equal(t, "jump", acSalt.saltOrRandom(), "should return the salt") 70 | }) 71 | } 72 | 73 | func TestActionConfigCreate(t *testing.T) { 74 | t.Run("invalid name", func(t *testing.T) { 75 | ac := ActionConfig{Name: "invalid name"} 76 | res, err := ac.create() 77 | assert.Error(t, err) 78 | assert.Nil(t, res) 79 | }) 80 | t.Run("identity", func(t *testing.T) { 81 | ac := ActionConfig{Name: "nothing"} 82 | res, err := ac.create() 83 | assert.NoError(t, err) 84 | assertAnonymisationFunction(t, identity, res, "a") 85 | }) 86 | t.Run("outcode", func(t *testing.T) { 87 | ac := ActionConfig{Name: "outcode"} 88 | res, err := ac.create() 89 | assert.NoError(t, err) 90 | assertAnonymisationFunction(t, outcode, res, "a") 91 | }) 92 | t.Run("hash", func(t *testing.T) { 93 | t.Run("if salt is not specified uses a random salt", func(t *testing.T) { 94 | rand.Seed(1) 95 | ac := ActionConfig{Name: "hash"} 96 | res, err := ac.create() 97 | assert.NoError(t, err) 98 | assertAnonymisationFunction(t, hash(firstSalt), res, "a") 99 | }) 100 | t.Run("if salt is specified uses it", func(t *testing.T) { 101 | ac := ActionConfig{Name: "hash", Salt: &salt} 102 | res, err := ac.create() 103 | assert.NoError(t, err) 104 | assertAnonymisationFunction(t, hash(salt), res, "a") 105 | }) 106 | }) 107 | t.Run("year", func(t *testing.T) { 108 | t.Run("with an invalid format", func(t *testing.T) { 109 | ac := ActionConfig{Name: "year", DateConfig: DateConfig{Format: "11112233"}} 110 | res, err := ac.create() 111 | assert.Error(t, err, "should fail") 112 | assert.Nil(t, res) 113 | }) 114 | t.Run("with a valid format", func(t *testing.T) { 115 | ac := ActionConfig{Name: "year", DateConfig: DateConfig{Format: "20060102"}} 116 | res, err := ac.create() 117 | assert.NoError(t, err, "should not fail") 118 | y, err := year("20060102") 119 | assert.NoError(t, err) 120 | assertAnonymisationFunction(t, y, res, "21121212") 121 | }) 122 | }) 123 | t.Run("ranges", func(t *testing.T) { 124 | num := 2.0 125 | output := "0-100" 126 | t.Run("range has at least one of lt, lte, gt, gte", func(t *testing.T) { 127 | ac := ActionConfig{ 128 | Name: "ranges", 129 | RangeConfig: []RangeConfig{RangeConfig{Output: &output}}, 130 | } 131 | r, err := ac.create() 132 | assert.Error(t, err, "if not should return an error") 133 | assert.Nil(t, r) 134 | }) 135 | t.Run("range contains both lt and lte", func(t *testing.T) { 136 | ac := ActionConfig{ 137 | Name: "ranges", 138 | RangeConfig: []RangeConfig{RangeConfig{Lt: &num, Lte: &num, Output: &output}}, 139 | } 140 | r, err := ac.create() 141 | assert.Error(t, err, "if not should return an error") 142 | assert.Nil(t, r) 143 | }) 144 | t.Run("range contains both gt and gte", func(t *testing.T) { 145 | ac := ActionConfig{ 146 | Name: "ranges", 147 | RangeConfig: []RangeConfig{RangeConfig{Gt: &num, Gte: &num, Output: &output}}, 148 | } 149 | r, err := ac.create() 150 | assert.Error(t, err, "if not should return an error") 151 | assert.Nil(t, r) 152 | }) 153 | t.Run("range without output defined", func(t *testing.T) { 154 | ac := ActionConfig{ 155 | Name: "ranges", 156 | RangeConfig: []RangeConfig{RangeConfig{Lt: &num, Gte: &num}}, 157 | } 158 | r, err := ac.create() 159 | assert.Error(t, err, "if not should return an error") 160 | assert.Nil(t, r) 161 | }) 162 | t.Run("valid range", func(t *testing.T) { 163 | rangeConfigs := []RangeConfig{RangeConfig{Lte: &num, Gte: &num, Output: &output}} 164 | ac := ActionConfig{ 165 | Name: "ranges", 166 | RangeConfig: rangeConfigs, 167 | } 168 | r, err := ac.create() 169 | expected, _ := ranges(rangeConfigs) 170 | assert.NoError(t, err) 171 | assertAnonymisationFunction(t, expected, r, "2") 172 | }) 173 | }) 174 | } 175 | 176 | func TestIdentity(t *testing.T) { 177 | properties := gopter.NewProperties(nil) 178 | 179 | properties.Property("Same output as input", prop.ForAll( 180 | func(v string) bool { 181 | res, err := identity(v) 182 | return assert.NoError(t, err) && assert.Equal(t, v, res) 183 | }, 184 | gen.AnyString(), 185 | )) 186 | 187 | properties.TestingRun(t) 188 | } 189 | 190 | func TestHash(t *testing.T) { 191 | t.Run("should hash the values using sha1 without a salt", func(t *testing.T) { 192 | unsaltedHash := hash("") 193 | res, err := unsaltedHash("") 194 | assert.NoError(t, err) 195 | assert.Equal(t, "da39a3ee5e6b4b0d3255bfef95601890afd80709", res) 196 | res, err = unsaltedHash("hasselhoff") 197 | assert.Equal(t, "ffe3294fad149c2dd3579cb864a1aebb2201f38d", res) 198 | }) 199 | t.Run("should use the salt if provided", func(t *testing.T) { 200 | properties := gopter.NewProperties(nil) 201 | 202 | properties.Property("hash(salt)(s) == hash(s+salt)", prop.ForAll( 203 | func(salt string, s string) bool { 204 | res1, err1 := hash(salt)(s) 205 | res2, err2 := hash("")(s + salt) 206 | return assert.NoError(t, err1) && assert.NoError(t, err2) && assert.Equal(t, res1, res2) 207 | }, 208 | gen.AlphaString(), 209 | gen.AlphaString(), 210 | )) 211 | }) 212 | } 213 | 214 | func TestOutcode(t *testing.T) { 215 | properties := gopter.NewProperties(nil) 216 | 217 | properties.Property("Same output as input", prop.ForAll( 218 | func(v1 string, v2 string) bool { 219 | res, err := outcode(v1 + " " + v2) 220 | return assert.NoError(t, err) && assert.Equal(t, v1, res) 221 | }, 222 | gen.AlphaString(), 223 | gen.AlphaString(), 224 | )) 225 | 226 | properties.TestingRun(t) 227 | } 228 | 229 | func TestYear(t *testing.T) { 230 | f, _ := year("20060102") 231 | t.Run("if the date can be parsed", func(t *testing.T) { 232 | res, err := f("20120102") 233 | assert.NoError(t, err, "should return no error") 234 | assert.Equal(t, "2012", res, "should return the year") 235 | }) 236 | t.Run("if the date cannot be parsed", func(t *testing.T) { 237 | res, err := f("input") 238 | assert.Error(t, err, "should return an error") 239 | assert.Equal(t, "input", res, "should return the input unchanged") 240 | }) 241 | } 242 | func TestRanges(t *testing.T) { 243 | min := 0.0 244 | max := 100.0 245 | output := "0-100" 246 | f, _ := ranges([]RangeConfig{RangeConfig{Gt: &min, Lte: &max, Output: &output}}) 247 | t.Run("if the value is not a float", func(t *testing.T) { 248 | res, err := f("input") 249 | assert.Error(t, err, "should return an error") 250 | assert.Equal(t, "input", res, "should return the input unchanged") 251 | }) 252 | t.Run("if the value is a float", func(t *testing.T) { 253 | t.Run("not in any range", func(t *testing.T) { 254 | res, err := f("2000") 255 | assert.Error(t, err, "should return an error") 256 | assert.Equal(t, "2000", res, "should return the input unchanged") 257 | }) 258 | t.Run("inside a range", func(t *testing.T) { 259 | res, err := f("10") 260 | assert.NoError(t, err, "should return no error") 261 | assert.Equal(t, output, res, "should return the output") 262 | }) 263 | }) 264 | } 265 | 266 | func TestRangeConfigContains(t *testing.T) { 267 | min := 0.0 268 | max := 100.0 269 | t.Run("range containing only lt", func(t *testing.T) { 270 | conf := RangeConfig{Lt: &max} 271 | assert.True(t, conf.contains(max-1)) 272 | assert.False(t, conf.contains(max)) 273 | assert.False(t, conf.contains(max+1)) 274 | }) 275 | t.Run("range containing only lte", func(t *testing.T) { 276 | conf := RangeConfig{Lte: &max} 277 | assert.True(t, conf.contains(max-1)) 278 | assert.True(t, conf.contains(max)) 279 | assert.False(t, conf.contains(max+1)) 280 | }) 281 | t.Run("range containing only gt", func(t *testing.T) { 282 | conf := RangeConfig{Gt: &min} 283 | assert.False(t, conf.contains(min-1)) 284 | assert.False(t, conf.contains(min)) 285 | assert.True(t, conf.contains(min+1)) 286 | }) 287 | t.Run("range containing only gte", func(t *testing.T) { 288 | conf := RangeConfig{Gte: &min} 289 | assert.False(t, conf.contains(min-1)) 290 | assert.True(t, conf.contains(min)) 291 | assert.True(t, conf.contains(min+1)) 292 | }) 293 | t.Run("range containing gt and lt", func(t *testing.T) { 294 | conf := RangeConfig{Gt: &min, Lt: &max} 295 | assert.False(t, conf.contains(min-1)) 296 | assert.False(t, conf.contains(min)) 297 | assert.True(t, conf.contains(min+1)) 298 | assert.False(t, conf.contains(max)) 299 | assert.False(t, conf.contains(max+1)) 300 | }) 301 | t.Run("range containing gt and lte", func(t *testing.T) { 302 | conf := RangeConfig{Gt: &min, Lte: &max} 303 | assert.False(t, conf.contains(min-1)) 304 | assert.False(t, conf.contains(min)) 305 | assert.True(t, conf.contains(min+1)) 306 | assert.True(t, conf.contains(max)) 307 | assert.False(t, conf.contains(max+1)) 308 | }) 309 | t.Run("range containing gte and lt", func(t *testing.T) { 310 | conf := RangeConfig{Gte: &min, Lt: &max} 311 | assert.False(t, conf.contains(min-1)) 312 | assert.True(t, conf.contains(min)) 313 | assert.True(t, conf.contains(min+1)) 314 | assert.False(t, conf.contains(max)) 315 | assert.False(t, conf.contains(max+1)) 316 | }) 317 | t.Run("range containing gte and lte", func(t *testing.T) { 318 | conf := RangeConfig{Gte: &min, Lte: &max} 319 | assert.False(t, conf.contains(min-1)) 320 | assert.True(t, conf.contains(min)) 321 | assert.True(t, conf.contains(min+1)) 322 | assert.True(t, conf.contains(max)) 323 | assert.False(t, conf.contains(max+1)) 324 | }) 325 | } 326 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "os" 6 | ) 7 | 8 | // CsvConfig stores the config to read and write the csv file 9 | type CsvConfig struct { 10 | Delimiter string 11 | } 12 | 13 | // SamplingConfig stores the config to know how to sample the file 14 | type SamplingConfig struct { 15 | Mod uint32 16 | IDColumn uint32 17 | } 18 | 19 | // Config stores all the configuration 20 | type Config struct { 21 | Csv CsvConfig 22 | Sampling SamplingConfig 23 | Actions []ActionConfig 24 | } 25 | 26 | var defaultCsvConfig = CsvConfig{ 27 | Delimiter: ",", 28 | } 29 | 30 | var defaultSamplingConfig = SamplingConfig{ 31 | Mod: 1, 32 | IDColumn: 0, 33 | } 34 | 35 | var defaultActionsConfig = []ActionConfig{} 36 | 37 | func loadConfig(filename string) (*Config, error) { 38 | file, err := os.Open(filename) 39 | defer file.Close() 40 | if err != nil { 41 | return nil, err 42 | } 43 | decoder := json.NewDecoder(file) 44 | conf := Config{ 45 | Csv: defaultCsvConfig, 46 | Sampling: defaultSamplingConfig, 47 | Actions: defaultActionsConfig, 48 | } 49 | err = decoder.Decode(&conf) 50 | if err != nil { 51 | return nil, err 52 | } 53 | return &conf, err 54 | } 55 | -------------------------------------------------------------------------------- /config_defaults_test.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /config_invalid_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "csv": { 3 | "delimiter": "," 4 | }, 5 | "sampling": { 6 | "mod": "not a number", 7 | "idColumn": 0 8 | }, 9 | "actions": [ 10 | { 11 | "name": "hash" 12 | }, 13 | { 14 | "name": "outcode" 15 | }, 16 | { 17 | "name": "year", 18 | "dateConfig": { 19 | "format": "20060102" 20 | } 21 | }, 22 | { 23 | "name": "nothing" 24 | } 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /config_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | "github.com/stretchr/testify/require" 8 | ) 9 | 10 | func TestLoadConfig(t *testing.T) { 11 | t.Run("if the file doesn't exist", func(t *testing.T) { 12 | conf, err := loadConfig("non-existing-file") 13 | assert.Nil(t, conf, "should return nil if the file doesn't exist") 14 | assert.Error(t, err, "should return the error if the file doesn't exist") 15 | }) 16 | t.Run("if the json can't be decoded", func(t *testing.T) { 17 | conf, err := loadConfig("config_invalid_test.json") 18 | assert.Nil(t, conf, "should return nil if the json can't be decoded") 19 | assert.Error(t, err, "should return the error if the json can't be decoded") 20 | }) 21 | t.Run("default config values", func(t *testing.T) { 22 | conf, err := loadConfig("config_defaults_test.json") 23 | require.NoError(t, err, "should return no error if the config can be loaded") 24 | assert.Equal(t, Config{ 25 | Csv: CsvConfig{ 26 | Delimiter: ",", 27 | }, 28 | Sampling: SamplingConfig{ 29 | Mod: 1, 30 | IDColumn: 0, 31 | }, 32 | Actions: []ActionConfig{}, 33 | }, *conf, "should fill the config with the default values") 34 | }) 35 | t.Run("if the config can be loaded", func(t *testing.T) { 36 | gte := 0.0 37 | lt := 100.0 38 | output := "0-100" 39 | conf, err := loadConfig("config_test.json") 40 | require.NoError(t, err, "should return no error if the config can be loaded") 41 | assert.Equal(t, Config{ 42 | Csv: CsvConfig{ 43 | Delimiter: "|", 44 | }, 45 | Sampling: SamplingConfig{ 46 | Mod: 77, 47 | IDColumn: 84, 48 | }, 49 | Actions: []ActionConfig{ 50 | ActionConfig{ 51 | Name: "hash", 52 | }, 53 | ActionConfig{ 54 | Name: "outcode", 55 | }, 56 | ActionConfig{ 57 | Name: "year", 58 | DateConfig: DateConfig{ 59 | Format: "20060102", 60 | }, 61 | }, 62 | ActionConfig{ 63 | Name: "ranges", 64 | RangeConfig: []RangeConfig{ 65 | RangeConfig{ 66 | Gte: >e, 67 | Lt: <, 68 | Output: &output, 69 | }, 70 | }, 71 | }, 72 | ActionConfig{ 73 | Name: "nothing", 74 | }, 75 | }, 76 | }, *conf, "should return the config properly decoded") 77 | }) 78 | } 79 | -------------------------------------------------------------------------------- /config_test.json: -------------------------------------------------------------------------------- 1 | { 2 | "csv": { 3 | "delimiter": "|" 4 | }, 5 | "sampling": { 6 | "mod": 77, 7 | "idColumn": 84 8 | }, 9 | "actions": [ 10 | { 11 | "name": "hash" 12 | }, 13 | { 14 | "name": "outcode" 15 | }, 16 | { 17 | "name": "year", 18 | "dateConfig": { 19 | "format": "20060102" 20 | } 21 | }, 22 | { 23 | "name": "ranges", 24 | "rangeConfig": [ 25 | { 26 | "gte": 0, 27 | "lt": 100, 28 | "output": "0-100" 29 | } 30 | ] 31 | }, 32 | { 33 | "name": "nothing" 34 | } 35 | ] 36 | } 37 | -------------------------------------------------------------------------------- /icon.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | 9 | 12 | 16 | 17 | 21 | 25 | 26 | 29 | 34 | 37 | 38 | 42 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/csv" 5 | "flag" 6 | "fmt" 7 | "hash/fnv" 8 | "io" 9 | "log" 10 | "math/rand" 11 | "os" 12 | "time" 13 | ) 14 | 15 | func main() { 16 | rand.Seed(time.Now().UTC().UnixNano()) 17 | //TODO move args parsing to a function 18 | configFile := flag.String("config", "config.json", "Configuration of the data to be anonymised. Default is 'config.json'") 19 | outputFile := flag.String("output", "", "Output file. Default is stdout.") 20 | flag.Parse() 21 | log.Printf("Using configuration in file %s\n", *configFile) 22 | conf, err := loadConfig(*configFile) 23 | if err != nil { 24 | log.Fatal(err) 25 | } 26 | r := initReader(flag.Arg(0), conf.Csv) 27 | w := initWriter(*outputFile, conf.Csv) 28 | anons, err := anonymisations(&conf.Actions) 29 | if err != nil { 30 | log.Fatal(err) 31 | } 32 | 33 | if err := process(r, w, conf, &anons); err != nil { 34 | log.Fatal(err) 35 | } 36 | } 37 | 38 | func process(r *csv.Reader, w *csv.Writer, conf *Config, anons *[]Anonymisation) error { 39 | i := 0 40 | 41 | for { 42 | record, err := r.Read() 43 | if err == io.EOF { 44 | break 45 | } else if pe, ok := err.(*csv.ParseError); ok && pe.Err == csv.ErrFieldCount { 46 | // we just print the error and skip the record 47 | log.Print(err) 48 | } else if err != nil { 49 | return err 50 | } else if int64(conf.Sampling.IDColumn) >= int64(len(record)) { 51 | return fmt.Errorf("id column (%d) out of range, record has %d columns", conf.Sampling.IDColumn, len(record)) 52 | } else if sample(record[conf.Sampling.IDColumn], conf.Sampling) { 53 | anonymised, err := anonymise(record, *anons) 54 | if err != nil { 55 | // we just print the error and skip the record 56 | log.Print(err) 57 | } else { 58 | w.Write(anonymised) 59 | } 60 | //TODO decide how often do we want to flush 61 | if i%100 == 0 { 62 | w.Flush() 63 | } 64 | } 65 | i++ 66 | } 67 | w.Flush() 68 | return nil 69 | } 70 | 71 | func sample(s string, conf SamplingConfig) bool { 72 | h := fnv.New32a() 73 | h.Write([]byte(s)) 74 | return h.Sum32()%conf.Mod == 0 75 | } 76 | 77 | func initReader(filename string, conf CsvConfig) *csv.Reader { 78 | reader := csv.NewReader(fileOr(filename, os.Stdin, os.Open)) 79 | reader.Comma = []rune(conf.Delimiter)[0] 80 | return reader 81 | } 82 | 83 | func initWriter(filename string, conf CsvConfig) *csv.Writer { 84 | writer := csv.NewWriter(fileOr(filename, os.Stdout, os.Create)) 85 | writer.Comma = []rune(conf.Delimiter)[0] 86 | return writer 87 | } 88 | 89 | // If filename is empty, will return `def`, if it's not, will return the 90 | // result of the function `action` after passing `filename` ot it. 91 | func fileOr(filename string, def *os.File, action func(string) (*os.File, error)) *os.File { 92 | if filename == "" { 93 | return def 94 | } 95 | f, err := action(filename) 96 | if err != nil { 97 | log.Fatal(err) 98 | } 99 | return f 100 | } 101 | 102 | func anonymise(record []string, anons []Anonymisation) ([]string, error) { 103 | var err error 104 | for i := range record { 105 | // TODO decide if we fail if not enough anonmisations are defined 106 | // or we just skip the column (i.e. we apply identity) 107 | if i < len(anons) { 108 | if record[i], err = anons[i](record[i]); err != nil { 109 | return nil, err 110 | } 111 | } 112 | } 113 | return record, nil 114 | } 115 | -------------------------------------------------------------------------------- /main_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/csv" 6 | "io/ioutil" 7 | "log" 8 | "os" 9 | "strings" 10 | "testing" 11 | 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | func TestInitReader(t *testing.T) { 16 | t.Run("with an empty filename", func(t *testing.T) { 17 | tmpfile := tmpFile("content") 18 | defer os.Remove(tmpfile.Name()) // clean up 19 | 20 | oldStdin := os.Stdin 21 | defer func() { os.Stdin = oldStdin }() // Restore original Stdin 22 | os.Stdin = tmpfile 23 | 24 | r := initReader("", defaultCsvConfig) 25 | record, err := r.Read() 26 | 27 | assert.NoError(t, err, "should return no error") 28 | assert.Equal(t, []string{"content"}, record, "should return a csv reader that reads from stdin") 29 | }) 30 | t.Run("with a valid filename", func(t *testing.T) { 31 | tmpfile := tmpFile("content") 32 | defer os.Remove(tmpfile.Name()) // clean up 33 | 34 | r := initReader(tmpfile.Name(), defaultCsvConfig) 35 | record, err := r.Read() 36 | 37 | assert.NoError(t, err, "should return no error") 38 | assert.Equal(t, []string{"content"}, record, "should return a csv reader that reads from the file") 39 | }) 40 | } 41 | 42 | func tmpFile(content string) *os.File { 43 | tmpfile, err := ioutil.TempFile("", "anon-test") 44 | if err != nil { 45 | log.Fatal(err) 46 | } 47 | ioutil.WriteFile(tmpfile.Name(), []byte("content"), os.ModePerm) 48 | return tmpfile 49 | } 50 | 51 | func TestInitWriter(t *testing.T) { 52 | t.Run("with an empty filename", func(t *testing.T) { 53 | tmpfile := tmpFile("") 54 | defer os.Remove(tmpfile.Name()) // clean up 55 | 56 | oldStdout := os.Stdout 57 | defer func() { os.Stdout = oldStdout }() // Restore original Stdout 58 | os.Stdout = tmpfile 59 | 60 | w := initWriter("", defaultCsvConfig) 61 | err := w.Write([]string{"csv", "content"}) 62 | w.Flush() 63 | 64 | content, _ := ioutil.ReadFile(tmpfile.Name()) 65 | assert.NoError(t, err, "should return no error") 66 | assert.Equal(t, "csv,content\n", string(content), "should return a csv writer that writes to stdout") 67 | }) 68 | t.Run("with a valid filename", func(t *testing.T) { 69 | tmpfile := tmpFile("") 70 | defer os.Remove(tmpfile.Name()) // clean up 71 | 72 | w := initWriter(tmpfile.Name(), defaultCsvConfig) 73 | err := w.Write([]string{"csv", "content"}) 74 | w.Flush() 75 | 76 | content, _ := ioutil.ReadFile(tmpfile.Name()) 77 | assert.NoError(t, err, "should return no error") 78 | assert.Equal(t, "csv,content\n", string(content), "should return a csv writer that writes to stdout") 79 | }) 80 | } 81 | func TestFileOr(t *testing.T) { 82 | assert.Equal(t, fileOr("", os.Stdin, stdOutOk), os.Stdin, "with an empty filename returns the default value") 83 | assert.Equal(t, fileOr("something", os.Stdin, stdOutOk), os.Stdout, "with non empty filename returns the value returned by the action") 84 | } 85 | 86 | func stdOutOk(s string) (*os.File, error) { 87 | return os.Stdout, nil 88 | } 89 | 90 | func TestAnonymise(t *testing.T) { 91 | record := []string{"a", "b", "c"} 92 | actions := []Anonymisation{identity, hash(""), identity} 93 | output := []string{"a", "e9d71f5ee7c92d6dc9e92ffdad17b8bd49418f98", "c"} 94 | res, err := anonymise(record, actions) 95 | assert.NoError(t, err) 96 | assert.Equal(t, output, res, "should apply anonymisation functions to each column in the record") 97 | } 98 | 99 | func TestSample(t *testing.T) { 100 | conf := SamplingConfig{ 101 | Mod: 2, 102 | } 103 | assert.True(t, sample("a", conf)) 104 | assert.False(t, sample("b", conf)) 105 | } 106 | 107 | func TestProcess(t *testing.T) { 108 | config := func(mod uint32, idColumn uint32) *Config { 109 | return &Config{Sampling: SamplingConfig{Mod: mod, IDColumn: idColumn}} 110 | } 111 | anons := &[]Anonymisation{identity, outcode} 112 | createReaderAndWriter := func(in string) (*csv.Reader, *csv.Writer, *bytes.Buffer) { 113 | var out bytes.Buffer 114 | r := csv.NewReader(strings.NewReader(in)) 115 | 116 | w := csv.NewWriter(&out) 117 | return r, w, &out 118 | } 119 | t.Run("when the id column is out of range", func(t *testing.T) { 120 | r, w, out := createReaderAndWriter("a,b c\nd,e f\n") 121 | 122 | err := process(r, w, config(1, 100), anons) 123 | assert.Error(t, err, "should return an error") 124 | assert.Equal(t, "", out.String(), "shouldn't write any output") 125 | }) 126 | t.Run("when there is an error writing the output", func(t *testing.T) { 127 | var out bytes.Buffer 128 | f, _ := os.Open("non existing file") 129 | r := csv.NewReader(f) 130 | 131 | w := csv.NewWriter(&out) 132 | err := process(r, w, config(1, 0), anons) 133 | assert.Error(t, err, "should return an error") 134 | }) 135 | t.Run("when there is an error processing one of the rows", func(t *testing.T) { 136 | r, w, out := createReaderAndWriter("20020202\nfail\n10010101") 137 | 138 | y, _ := year("20060102") 139 | err := process(r, w, config(1, 0), &[]Anonymisation{y}) 140 | assert.NoError(t, err, "should not return an error") 141 | assert.Equal(t, "2002\n1001\n", out.String(), "should skip that row") 142 | }) 143 | t.Run("when sampling is defined", func(t *testing.T) { 144 | r, w, out := createReaderAndWriter("a,b c\nd,e f\ng,h i\nj,k l\n") 145 | 146 | err := process(r, w, config(2, 0), anons) 147 | assert.NoError(t, err, "should return no error") 148 | assert.Equal(t, "a,b\ng,h\n", out.String(), "should process some rows") 149 | }) 150 | t.Run("when all the rows are valid", func(t *testing.T) { 151 | r, w, out := createReaderAndWriter("a,b c\nd,e f\n") 152 | 153 | err := process(r, w, config(1, 0), anons) 154 | assert.NoError(t, err, "should return no error") 155 | assert.Equal(t, "a,b\nd,e\n", out.String(), "should process all rows") 156 | }) 157 | } 158 | --------------------------------------------------------------------------------