├── .github └── workflows │ └── go.yml ├── .gitignore ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── config.go ├── doc.go ├── driver.go ├── driver_test.go ├── emitter.go ├── emitter_test.go ├── examples ├── README.md ├── amplab1 │ ├── Makefile │ ├── README.md │ ├── amplab1.go │ └── data │ │ ├── part-00000 │ │ ├── part-00001 │ │ ├── part-00002 │ │ ├── part-00003 │ │ ├── part-00004 │ │ ├── part-00005 │ │ ├── part-00006 │ │ ├── part-00007 │ │ ├── part-00008 │ │ └── part-00009 ├── amplab2 │ ├── Makefile │ ├── README.md │ ├── amplab2.go │ └── data │ │ ├── part-00000 │ │ ├── part-00001 │ │ ├── part-00002 │ │ ├── part-00003 │ │ ├── part-00004 │ │ ├── part-00005 │ │ ├── part-00006 │ │ ├── part-00007 │ │ ├── part-00008 │ │ └── part-00009 ├── amplab3 │ ├── Makefile │ ├── README.md │ ├── amplab3.go │ └── data │ │ ├── rankings │ │ ├── part-00000 │ │ ├── part-00001 │ │ ├── part-00002 │ │ ├── part-00003 │ │ ├── part-00004 │ │ ├── part-00005 │ │ ├── part-00006 │ │ ├── part-00007 │ │ ├── part-00008 │ │ └── part-00009 │ │ └── visits │ │ ├── part-00000 │ │ ├── part-00001 │ │ ├── part-00002 │ │ ├── part-00003 │ │ ├── part-00004 │ │ ├── part-00005 │ │ ├── part-00006 │ │ ├── part-00007 │ │ ├── part-00008 │ │ └── part-00009 └── word_count │ ├── Makefile │ ├── README.md │ ├── metamorphosis.txt │ └── word_count.go ├── executor.go ├── go.mod ├── go.sum ├── img ├── architecture.svg ├── logo.svg └── word_count.gif ├── internal └── pkg │ ├── corfs │ ├── filesys.go │ ├── filesys_test.go │ ├── local.go │ ├── local_test.go │ ├── s3.go │ ├── s3_io.go │ ├── s3_test.go │ └── util.go │ ├── coriam │ ├── client.go │ └── client_test.go │ └── corlambda │ ├── client.go │ └── client_test.go ├── job.go ├── job_test.go ├── lambda.go ├── lambda_test.go ├── mapreduce.go ├── mapreduce_test.go ├── split.go ├── split_test.go └── task.go /.github/workflows/go.yml: -------------------------------------------------------------------------------- 1 | name: Go 2 | 3 | on: 4 | push: 5 | branches: [ master ] 6 | pull_request: 7 | branches: [ master ] 8 | 9 | jobs: 10 | 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v2 15 | 16 | - name: Set up Go 17 | uses: actions/setup-go@v2 18 | with: 19 | go-version: 1.15 20 | 21 | - name: Build 22 | run: go build -v ./... 23 | 24 | - name: Test 25 | run: go test -v ./... 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.dll 4 | *.so 5 | *.dylib 6 | 7 | # Test binary, build with `go test -c` 8 | *.test 9 | 10 | # Output of the go coverage tool, specifically when used with LiteIDE 11 | *.out 12 | 13 | # Project-local glide cache, RE: https://github.com/Masterminds/glide/issues/736 14 | .glide/ 15 | 16 | vendor/ 17 | .vscode 18 | *.gif 19 | bin/ 20 | output-* 21 | coverage.txt 22 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | When contributing to this repository, please first discuss the change you wish to make via issue, 4 | email, or any other method with the owners of this repository before making a change. 5 | 6 | Please note we have a code of conduct, please follow it in all your interactions with the project. 7 | 8 | ## Pull Request Process 9 | 10 | 1. Ensure any install or build dependencies are removed before the end of the layer when doing a build. 11 | 1. Ensure that any changes you make are formatted with `gofmt` and pass the `golint` linter. 12 | 1. Update the README.md and any relevant documentation comments with details of changes to the interfaces. This includes new environment variables, settings, or API. 13 | 1. If the changes are nontrivial (i.e. adding or substantially modifying behavior), please submit unit tests as appropriate. 14 | 1. Create a PR that has a descriptive name and comprehensive description of the changes made. 15 | 1. You may merge the Pull Request in once you receive approval on the PR. 16 | 17 | ## Updating the README 18 | 19 | The corral README uses [doctoc](https://github.com/thlorenz/doctoc) to generate the table of contents. If you change the heading structure of the README, please rerun doctoc: 20 | 21 | ``` 22 | doctoc README.md --github 23 | ``` 24 | 25 | ## Code of Conduct 26 | 27 | ### Our Pledge 28 | 29 | In the interest of fostering an open and welcoming environment, we as 30 | contributors and maintainers pledge to making participation in our project and 31 | our community a harassment-free experience for everyone, regardless of age, body 32 | size, disability, ethnicity, gender identity and expression, level of experience, 33 | nationality, personal appearance, race, religion, or sexual identity and 34 | orientation. 35 | 36 | ### Our Standards 37 | 38 | Examples of behavior that contributes to creating a positive environment 39 | include: 40 | 41 | * Using welcoming and inclusive language 42 | * Being respectful of differing viewpoints and experiences 43 | * Gracefully accepting constructive criticism 44 | * Focusing on what is best for the community 45 | * Showing empathy towards other community members 46 | 47 | Examples of unacceptable behavior by participants include: 48 | 49 | * The use of sexualized language or imagery and unwelcome sexual attention or 50 | advances 51 | * Trolling, insulting/derogatory comments, and personal or political attacks 52 | * Public or private harassment 53 | * Publishing others' private information, such as a physical or electronic 54 | address, without explicit permission 55 | * Other conduct which could reasonably be considered inappropriate in a 56 | professional setting 57 | 58 | ### Our Responsibilities 59 | 60 | Project maintainers are responsible for clarifying the standards of acceptable 61 | behavior and are expected to take appropriate and fair corrective action in 62 | response to any instances of unacceptable behavior. 63 | 64 | Project maintainers have the right and responsibility to remove, edit, or 65 | reject comments, commits, code, wiki edits, issues, and other contributions 66 | that are not aligned to this Code of Conduct, or to ban temporarily or 67 | permanently any contributor for other behaviors that they deem inappropriate, 68 | threatening, offensive, or harmful. 69 | 70 | ### Scope 71 | 72 | This Code of Conduct applies both within project spaces and in public spaces 73 | when an individual is representing the project or its community. Examples of 74 | representing a project or community include using an official project e-mail 75 | address, posting via an official social media account, or acting as an appointed 76 | representative at an online or offline event. Representation of a project may be 77 | further defined and clarified by project maintainers. 78 | 79 | ### Enforcement 80 | 81 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 82 | reported by contacting the project team at "bcongdo2 at illinois dot edu". All 83 | complaints will be reviewed and investigated and will result in a response that 84 | is deemed necessary and appropriate to the circumstances. The project team is 85 | obligated to maintain confidentiality with regard to the reporter of an incident. 86 | Further details of specific enforcement policies may be posted separately. 87 | 88 | Project maintainers who do not follow or enforce the Code of Conduct in good 89 | faith may face temporary or permanent repercussions as determined by other 90 | members of the project's leadership. 91 | 92 | ### Attribution 93 | 94 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, 95 | available at [http://contributor-covenant.org/version/1/4][version] 96 | 97 | [homepage]: http://contributor-covenant.org 98 | [version]: http://contributor-covenant.org/version/1/4/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 Ben Congdon 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /config.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "github.com/spf13/viper" 5 | ) 6 | 7 | func loadConfig() { 8 | viper.SetConfigName("corralrc") 9 | viper.AddConfigPath(".") 10 | viper.AddConfigPath("$HOME/.corral") 11 | 12 | setupDefaults() 13 | 14 | viper.ReadInConfig() 15 | 16 | viper.SetEnvPrefix("corral") 17 | viper.AutomaticEnv() 18 | } 19 | 20 | func setupDefaults() { 21 | defaultSettings := map[string]interface{}{ 22 | "lambdaFunctionName": "corral_function", 23 | "lambdaMemory": 1500, 24 | "lambdaTimeout": 180, 25 | "lambdaManageRole": true, 26 | "cleanup": true, 27 | "verbose": false, 28 | "splitSize": 100 * 1024 * 1024, // Default input split size is 100Mb 29 | "mapBinSize": 512 * 1024 * 1024, // Default map bin size is 512Mb 30 | "reduceBinSize": 512 * 1024 * 1024, // Default reduce bin size is 512Mb 31 | "maxConcurrency": 500, // Maximum number of concurrent executors 32 | "workingLocation": ".", 33 | } 34 | for key, value := range defaultSettings { 35 | viper.SetDefault(key, value) 36 | } 37 | 38 | aliases := map[string]string{ 39 | "verbose": "v", 40 | "working_location": "o", 41 | } 42 | for key, alias := range aliases { 43 | viper.RegisterAlias(alias, key) 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /doc.go: -------------------------------------------------------------------------------- 1 | /*Package corral is a MapReduce framework designed to be deployed to serverless 2 | platforms, like AWS Lambda. 3 | 4 | It presents a lightweight alternative to Hadoop MapReduce. Much of the design 5 | philosophy was inspired by Yelp's mrjob -- corral retains mrjob's ease-of-use 6 | while gaining the type safety and speed of Go. 7 | 8 | Corral's runtime model consists of stateless, transient executors controlled by 9 | a central driver. Currently, the best environment for deployment is AWS Lambda, 10 | but corral is modular enough that support for other serverless platforms can be 11 | added as support for Go in cloud functions improves. 12 | 13 | Corral is best suited for data-intensive but computationally inexpensive tasks, 14 | such as ETL jobs. 15 | */ 16 | package corral 17 | -------------------------------------------------------------------------------- /driver.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | "os" 7 | "runtime" 8 | "runtime/pprof" 9 | "sync" 10 | "time" 11 | 12 | "github.com/dustin/go-humanize" 13 | 14 | "github.com/spf13/viper" 15 | 16 | "golang.org/x/sync/semaphore" 17 | 18 | log "github.com/sirupsen/logrus" 19 | pb "gopkg.in/cheggaaa/pb.v1" 20 | 21 | "github.com/aws/aws-lambda-go/lambda" 22 | "github.com/bcongdon/corral/internal/pkg/corfs" 23 | flag "github.com/spf13/pflag" 24 | ) 25 | 26 | // Driver controls the execution of a MapReduce Job 27 | type Driver struct { 28 | jobs []*Job 29 | config *config 30 | executor executor 31 | } 32 | 33 | // config configures a Driver's execution of jobs 34 | type config struct { 35 | Inputs []string 36 | SplitSize int64 37 | MapBinSize int64 38 | ReduceBinSize int64 39 | MaxConcurrency int 40 | WorkingLocation string 41 | Cleanup bool 42 | } 43 | 44 | func newConfig() *config { 45 | loadConfig() // Load viper config from settings file(s) and environment 46 | 47 | // Register command line flags 48 | flag.Parse() 49 | viper.BindPFlags(flag.CommandLine) 50 | 51 | return &config{ 52 | Inputs: []string{}, 53 | SplitSize: viper.GetInt64("splitSize"), 54 | MapBinSize: viper.GetInt64("mapBinSize"), 55 | ReduceBinSize: viper.GetInt64("reduceBinSize"), 56 | MaxConcurrency: viper.GetInt("maxConcurrency"), 57 | WorkingLocation: viper.GetString("workingLocation"), 58 | Cleanup: viper.GetBool("cleanup"), 59 | } 60 | } 61 | 62 | // Option allows configuration of a Driver 63 | type Option func(*config) 64 | 65 | // NewDriver creates a new Driver with the provided job and optional configuration 66 | func NewDriver(job *Job, options ...Option) *Driver { 67 | d := &Driver{ 68 | jobs: []*Job{job}, 69 | executor: localExecutor{}, 70 | } 71 | 72 | c := newConfig() 73 | for _, f := range options { 74 | f(c) 75 | } 76 | 77 | if c.SplitSize > c.MapBinSize { 78 | log.Warn("Configured Split Size is larger than Map Bin size") 79 | c.SplitSize = c.MapBinSize 80 | } 81 | 82 | d.config = c 83 | log.Debugf("Loaded config: %#v", c) 84 | 85 | return d 86 | } 87 | 88 | // NewMultiStageDriver creates a new Driver with the provided jobs and optional configuration 89 | func NewMultiStageDriver(jobs []*Job, options ...Option) *Driver { 90 | driver := NewDriver(nil, options...) 91 | driver.jobs = jobs 92 | return driver 93 | } 94 | 95 | // WithSplitSize sets the SplitSize of the Driver 96 | func WithSplitSize(s int64) Option { 97 | return func(c *config) { 98 | c.SplitSize = s 99 | } 100 | } 101 | 102 | // WithMapBinSize sets the MapBinSize of the Driver 103 | func WithMapBinSize(s int64) Option { 104 | return func(c *config) { 105 | c.MapBinSize = s 106 | } 107 | } 108 | 109 | // WithReduceBinSize sets the ReduceBinSize of the Driver 110 | func WithReduceBinSize(s int64) Option { 111 | return func(c *config) { 112 | c.ReduceBinSize = s 113 | } 114 | } 115 | 116 | // WithWorkingLocation sets the location and filesystem backend of the Driver 117 | func WithWorkingLocation(location string) Option { 118 | return func(c *config) { 119 | c.WorkingLocation = location 120 | } 121 | } 122 | 123 | // WithInputs specifies job inputs (i.e. input files/directories) 124 | func WithInputs(inputs ...string) Option { 125 | return func(c *config) { 126 | c.Inputs = append(c.Inputs, inputs...) 127 | } 128 | } 129 | 130 | func (d *Driver) runMapPhase(job *Job, jobNumber int, inputs []string) { 131 | inputSplits := job.inputSplits(inputs, d.config.SplitSize) 132 | if len(inputSplits) == 0 { 133 | log.Warnf("No input splits") 134 | return 135 | } 136 | log.Debugf("Number of job input splits: %d", len(inputSplits)) 137 | 138 | inputBins := packInputSplits(inputSplits, d.config.MapBinSize) 139 | log.Debugf("Number of job input bins: %d", len(inputBins)) 140 | bar := pb.New(len(inputBins)).Prefix("Map").Start() 141 | 142 | var wg sync.WaitGroup 143 | sem := semaphore.NewWeighted(int64(d.config.MaxConcurrency)) 144 | for binID, bin := range inputBins { 145 | sem.Acquire(context.Background(), 1) 146 | wg.Add(1) 147 | go func(bID uint, b []inputSplit) { 148 | defer wg.Done() 149 | defer sem.Release(1) 150 | defer bar.Increment() 151 | err := d.executor.RunMapper(job, jobNumber, bID, b) 152 | if err != nil { 153 | log.Errorf("Error when running mapper %d: %s", bID, err) 154 | } 155 | }(uint(binID), bin) 156 | } 157 | wg.Wait() 158 | bar.Finish() 159 | } 160 | 161 | func (d *Driver) runReducePhase(job *Job, jobNumber int) { 162 | var wg sync.WaitGroup 163 | bar := pb.New(int(job.intermediateBins)).Prefix("Reduce").Start() 164 | for binID := uint(0); binID < job.intermediateBins; binID++ { 165 | wg.Add(1) 166 | go func(bID uint) { 167 | defer wg.Done() 168 | defer bar.Increment() 169 | err := d.executor.RunReducer(job, jobNumber, bID) 170 | if err != nil { 171 | log.Errorf("Error when running reducer %d: %s", bID, err) 172 | } 173 | }(binID) 174 | } 175 | wg.Wait() 176 | bar.Finish() 177 | } 178 | 179 | // run starts the Driver 180 | func (d *Driver) run() { 181 | if runningInLambda() { 182 | lambdaDriver = d 183 | lambda.Start(handleRequest) 184 | } 185 | if lBackend, ok := d.executor.(*lambdaExecutor); ok { 186 | lBackend.Deploy() 187 | } 188 | 189 | if len(d.config.Inputs) == 0 { 190 | log.Error("No inputs!") 191 | return 192 | } 193 | 194 | inputs := d.config.Inputs 195 | for idx, job := range d.jobs { 196 | // Initialize job filesystem 197 | job.fileSystem = corfs.InferFilesystem(inputs[0]) 198 | 199 | jobWorkingLoc := d.config.WorkingLocation 200 | log.Infof("Starting job%d (%d/%d)", idx, idx+1, len(d.jobs)) 201 | 202 | if len(d.jobs) > 1 { 203 | jobWorkingLoc = job.fileSystem.Join(jobWorkingLoc, fmt.Sprintf("job%d", idx)) 204 | } 205 | job.outputPath = jobWorkingLoc 206 | 207 | *job.config = *d.config 208 | d.runMapPhase(job, idx, inputs) 209 | d.runReducePhase(job, idx) 210 | 211 | // Set inputs of next job to be outputs of current job 212 | inputs = []string{job.fileSystem.Join(jobWorkingLoc, "output-*")} 213 | 214 | log.Infof("Job %d - Total Bytes Read:\t%s", idx, humanize.Bytes(uint64(job.bytesRead))) 215 | log.Infof("Job %d - Total Bytes Written:\t%s", idx, humanize.Bytes(uint64(job.bytesWritten))) 216 | } 217 | } 218 | 219 | var lambdaFlag = flag.Bool("lambda", false, "Use lambda backend") 220 | var outputDir = flag.StringP("out", "o", "", "Output `directory` (can be local or in S3)") 221 | var memprofile = flag.String("memprofile", "", "Write memory profile to `file`") 222 | var verbose = flag.BoolP("verbose", "v", false, "Output verbose logs") 223 | var undeploy = flag.Bool("undeploy", false, "Undeploy the Lambda function and IAM permissions without running the driver") 224 | 225 | // Main starts the Driver, running the submitted jobs. 226 | func (d *Driver) Main() { 227 | if viper.GetBool("verbose") { 228 | log.SetLevel(log.DebugLevel) 229 | } 230 | 231 | if *undeploy { 232 | lambda := newLambdaExecutor(viper.GetString("lambdaFunctionName")) 233 | lambda.Undeploy() 234 | return 235 | } 236 | 237 | d.config.Inputs = append(d.config.Inputs, flag.Args()...) 238 | if *lambdaFlag { 239 | d.executor = newLambdaExecutor(viper.GetString("lambdaFunctionName")) 240 | } 241 | 242 | if *outputDir != "" { 243 | d.config.WorkingLocation = *outputDir 244 | } 245 | 246 | start := time.Now() 247 | d.run() 248 | end := time.Now() 249 | fmt.Printf("Job Execution Time: %s\n", end.Sub(start)) 250 | 251 | if *memprofile != "" { 252 | f, err := os.Create(*memprofile) 253 | if err != nil { 254 | log.Fatal("could not create memory profile: ", err) 255 | } 256 | runtime.GC() // get up-to-date statistics 257 | if err := pprof.WriteHeapProfile(f); err != nil { 258 | log.Fatal("could not write memory profile: ", err) 259 | } 260 | f.Close() 261 | } 262 | } 263 | -------------------------------------------------------------------------------- /driver_test.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "path/filepath" 8 | "strings" 9 | "testing" 10 | 11 | "github.com/stretchr/testify/assert" 12 | ) 13 | 14 | func TestNewDriver(t *testing.T) { 15 | j := &Job{} 16 | driver := NewDriver( 17 | j, 18 | WithSplitSize(100), 19 | WithMapBinSize(200), 20 | WithReduceBinSize(300), 21 | WithWorkingLocation("s3://foo"), 22 | ) 23 | 24 | assert.Equal(t, j, driver.jobs[0]) 25 | assert.Equal(t, int64(100), driver.config.SplitSize) 26 | assert.Equal(t, int64(200), driver.config.MapBinSize) 27 | assert.Equal(t, int64(300), driver.config.ReduceBinSize) 28 | assert.Equal(t, "s3://foo", driver.config.WorkingLocation) 29 | } 30 | 31 | type testWCJob struct{} 32 | 33 | func (testWCJob) Map(key, value string, emitter Emitter) { 34 | for _, word := range strings.Fields(value) { 35 | emitter.Emit(word, "1") 36 | } 37 | } 38 | 39 | func (testWCJob) Reduce(key string, values ValueIterator, emitter Emitter) { 40 | count := 0 41 | for range values.Iter() { 42 | count++ 43 | } 44 | emitter.Emit(key, fmt.Sprintf("%d", count)) 45 | } 46 | 47 | type testFilterJob struct { 48 | prefix string 49 | } 50 | 51 | func (j *testFilterJob) Map(key, value string, emitter Emitter) { 52 | if strings.HasPrefix(key, j.prefix) { 53 | emitter.Emit(key, value) 54 | } 55 | } 56 | 57 | func (j *testFilterJob) Reduce(key string, values ValueIterator, emitter Emitter) { 58 | // Identity reducer 59 | for value := range values.Iter() { 60 | emitter.Emit(key, value) 61 | } 62 | 63 | } 64 | 65 | func testOutputToKeyValues(output string) []keyValue { 66 | lines := strings.Split(output, "\n") 67 | keyVals := make([]keyValue, 0, len(lines)) 68 | 69 | for _, line := range lines { 70 | split := strings.Split(line, "\t") 71 | if len(split) != 2 { 72 | continue 73 | } 74 | keyVals = append(keyVals, keyValue{ 75 | Key: split[0], 76 | Value: split[1], 77 | }) 78 | } 79 | return keyVals 80 | } 81 | 82 | func TestLocalMapReduce(t *testing.T) { 83 | tmpdir, err := ioutil.TempDir("", "test") 84 | assert.Nil(t, err) 85 | defer os.RemoveAll(tmpdir) 86 | 87 | inputPath := filepath.Join(tmpdir, "test_input") 88 | ioutil.WriteFile(inputPath, []byte("the test input\nthe input test\nfoo bar baz"), 0700) 89 | 90 | job := NewJob(testWCJob{}, testWCJob{}) 91 | driver := NewDriver( 92 | job, 93 | WithInputs(tmpdir), 94 | WithWorkingLocation(tmpdir), 95 | ) 96 | 97 | driver.Main() 98 | 99 | output, err := ioutil.ReadFile(filepath.Join(tmpdir, "output-part-0")) 100 | assert.Nil(t, err) 101 | 102 | keyVals := testOutputToKeyValues(string(output)) 103 | assert.Len(t, keyVals, 6) 104 | 105 | correct := []keyValue{ 106 | {"the", "2"}, 107 | {"test", "2"}, 108 | {"input", "2"}, 109 | {"foo", "1"}, 110 | {"bar", "1"}, 111 | {"baz", "1"}, 112 | } 113 | for _, kv := range correct { 114 | assert.Contains(t, keyVals, kv) 115 | } 116 | } 117 | 118 | func TestLocalMultiJob(t *testing.T) { 119 | tmpdir, err := ioutil.TempDir("", "test") 120 | assert.Nil(t, err) 121 | defer os.RemoveAll(tmpdir) 122 | 123 | inputPath := filepath.Join(tmpdir, "test_input") 124 | ioutil.WriteFile(inputPath, []byte("the test input\nthe input test\nfoo bar baz"), 0700) 125 | 126 | mr1 := testWCJob{} 127 | job1 := NewJob(mr1, mr1) 128 | 129 | // Second job filters out any keys that don't start with 't' 130 | mr2 := &testFilterJob{prefix: "t"} 131 | job2 := NewJob(mr2, mr2) 132 | 133 | driver := NewMultiStageDriver([]*Job{job1, job2}, 134 | WithInputs(tmpdir), 135 | WithWorkingLocation(tmpdir), 136 | ) 137 | 138 | driver.Main() 139 | 140 | output, err := ioutil.ReadFile(filepath.Join(tmpdir, "job1", "output-part-0")) 141 | assert.Nil(t, err) 142 | 143 | keyVals := testOutputToKeyValues(string(output)) 144 | assert.Len(t, keyVals, 2) 145 | 146 | correct := []keyValue{ 147 | {"the", "2"}, 148 | {"test", "2"}, 149 | } 150 | for _, kv := range correct { 151 | assert.Contains(t, keyVals, kv) 152 | } 153 | } 154 | 155 | func TestLocalNoCrashOnNoResolvedInputFiles(t *testing.T) { 156 | job := NewJob(testWCJob{}, testWCJob{}) 157 | driver := NewDriver( 158 | job, 159 | WithInputs("does_not_exist"), 160 | WithWorkingLocation("some_file"), 161 | ) 162 | 163 | driver.Main() 164 | } 165 | 166 | type statefulJob struct { 167 | filterWords *[]string 168 | } 169 | 170 | func (s statefulJob) Map(key, value string, emitter Emitter) { 171 | for _, word := range strings.Fields(value) { 172 | for _, filterWord := range *s.filterWords { 173 | if filterWord == word { 174 | emitter.Emit(word, "1") 175 | } 176 | } 177 | } 178 | } 179 | 180 | func (statefulJob) Reduce(key string, values ValueIterator, emitter Emitter) { 181 | count := 0 182 | for range values.Iter() { 183 | count++ 184 | } 185 | emitter.Emit(key, fmt.Sprintf("%d", count)) 186 | } 187 | 188 | func TestLocalStructFieldMapReduce(t *testing.T) { 189 | tmpdir, err := ioutil.TempDir("", "test") 190 | assert.Nil(t, err) 191 | defer os.RemoveAll(tmpdir) 192 | 193 | inputPath := filepath.Join(tmpdir, "test_input") 194 | ioutil.WriteFile(inputPath, []byte("the test input\nthe input test\nfoo bar baz"), 0700) 195 | 196 | jobStruct := statefulJob{filterWords: &[]string{"foo", "bar"}} 197 | job := NewJob(jobStruct, jobStruct) 198 | driver := NewDriver( 199 | job, 200 | WithInputs(tmpdir), 201 | WithWorkingLocation(tmpdir), 202 | ) 203 | 204 | driver.Main() 205 | 206 | output, err := ioutil.ReadFile(filepath.Join(tmpdir, "output-part-0")) 207 | assert.Nil(t, err) 208 | 209 | keyVals := testOutputToKeyValues(string(output)) 210 | assert.Len(t, keyVals, 2) 211 | 212 | correct := []keyValue{ 213 | {"foo", "1"}, 214 | {"bar", "1"}, 215 | } 216 | for _, kv := range correct { 217 | assert.Contains(t, keyVals, kv) 218 | } 219 | } 220 | -------------------------------------------------------------------------------- /emitter.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "encoding/json" 5 | "errors" 6 | "fmt" 7 | "hash/fnv" 8 | "io" 9 | "strings" 10 | "sync" 11 | 12 | "github.com/bcongdon/corral/internal/pkg/corfs" 13 | log "github.com/sirupsen/logrus" 14 | ) 15 | 16 | // Emitter enables mappers and reducers to yield key-value pairs. 17 | type Emitter interface { 18 | Emit(key, value string) error 19 | close() error 20 | bytesWritten() int64 21 | } 22 | 23 | // reducerEmitter is a threadsafe emitter. 24 | type reducerEmitter struct { 25 | writer io.WriteCloser 26 | mut *sync.Mutex 27 | writtenBytes int64 28 | } 29 | 30 | // newReducerEmitter initializes and returns a new reducerEmitter 31 | func newReducerEmitter(writer io.WriteCloser) *reducerEmitter { 32 | return &reducerEmitter{ 33 | writer: writer, 34 | mut: &sync.Mutex{}, 35 | } 36 | } 37 | 38 | // Emit yields a key-value pair to the framework. 39 | func (e *reducerEmitter) Emit(key, value string) error { 40 | e.mut.Lock() 41 | defer e.mut.Unlock() 42 | 43 | n, err := e.writer.Write([]byte(fmt.Sprintf("%s\t%s\n", key, value))) 44 | e.writtenBytes += int64(n) 45 | return err 46 | } 47 | 48 | // close terminates the reducerEmitter. close must not be called more than once 49 | func (e *reducerEmitter) close() error { 50 | return e.writer.Close() 51 | } 52 | 53 | func (e *reducerEmitter) bytesWritten() int64 { 54 | return e.writtenBytes 55 | } 56 | 57 | // mapperEmitter is an emitter that partitions keys written to it. 58 | // mapperEmitter maintains a map of writers. Keys are partitioned into one of numBins 59 | // intermediate "shuffle" bins. Each bin is written as a separate file. 60 | type mapperEmitter struct { 61 | numBins uint // number of intermediate shuffle bins 62 | writers map[uint]io.WriteCloser // maps a parition number to an open writer 63 | fs corfs.FileSystem // filesystem to use when opening writers 64 | mapperID uint // numeric identifier of the mapper using this emitter 65 | outDir string // folder to save map output to 66 | partitionFunc PartitionFunc // PartitionFunc to use when partitioning map output keys into intermediate bins 67 | writtenBytes int64 // counter for number of bytes written from emitted key/val pairs 68 | } 69 | 70 | // Initializes a new mapperEmitter 71 | func newMapperEmitter(numBins uint, mapperID uint, outDir string, fs corfs.FileSystem) mapperEmitter { 72 | return mapperEmitter{ 73 | numBins: numBins, 74 | writers: make(map[uint]io.WriteCloser, numBins), 75 | fs: fs, 76 | mapperID: mapperID, 77 | outDir: outDir, 78 | partitionFunc: hashPartition, 79 | } 80 | } 81 | 82 | // hashPartition partitions a key to one of numBins shuffle bins 83 | func hashPartition(key string, numBins uint) uint { 84 | h := fnv.New64() 85 | h.Write([]byte(key)) 86 | return uint(h.Sum64() % uint64(numBins)) 87 | } 88 | 89 | // Emit yields a key-value pair to the framework. 90 | func (me *mapperEmitter) Emit(key, value string) error { 91 | bin := me.partitionFunc(key, me.numBins) 92 | 93 | // Open writer for the bin, if necessary 94 | writer, exists := me.writers[bin] 95 | if !exists { 96 | var err error 97 | path := me.fs.Join(me.outDir, fmt.Sprintf("map-bin%d-%d.out", bin, me.mapperID)) 98 | 99 | writer, err = me.fs.OpenWriter(path) 100 | if err != nil { 101 | return err 102 | } 103 | me.writers[bin] = writer 104 | } 105 | 106 | kv := keyValue{ 107 | Key: key, 108 | Value: value, 109 | } 110 | 111 | data, err := json.Marshal(kv) 112 | if err != nil { 113 | log.Error(err) 114 | return err 115 | } 116 | 117 | data = append(data, '\n') 118 | _, err = writer.Write(data) 119 | return err 120 | } 121 | 122 | // close terminates the mapperEmitter. Must not be called more than once 123 | func (me *mapperEmitter) close() error { 124 | errs := make([]string, 0) 125 | for _, writer := range me.writers { 126 | err := writer.Close() 127 | if err != nil { 128 | errs = append(errs, err.Error()) 129 | } 130 | } 131 | if len(errs) > 0 { 132 | return errors.New(strings.Join(errs, "\n")) 133 | } 134 | 135 | return nil 136 | } 137 | 138 | func (me *mapperEmitter) bytesWritten() int64 { 139 | return me.writtenBytes 140 | } 141 | -------------------------------------------------------------------------------- /emitter_test.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "io" 7 | "io/ioutil" 8 | "strings" 9 | "sync" 10 | "testing" 11 | 12 | "github.com/bcongdon/corral/internal/pkg/corfs" 13 | 14 | "github.com/stretchr/testify/assert" 15 | ) 16 | 17 | type testWriteCloser struct { 18 | *bytes.Buffer 19 | } 20 | 21 | func (t *testWriteCloser) Close() error { 22 | return nil 23 | } 24 | 25 | func TestHashPartition(t *testing.T) { 26 | bin := hashPartition("foo", 100) 27 | assert.Equal(t, bin, uint(0x63)) 28 | } 29 | 30 | func TestReducerEmitter(t *testing.T) { 31 | writer := &testWriteCloser{new(bytes.Buffer)} 32 | emitter := newReducerEmitter(writer) 33 | 34 | err := emitter.Emit("key", "value") 35 | assert.Nil(t, err) 36 | 37 | written, err := ioutil.ReadAll(writer) 38 | assert.Nil(t, err) 39 | assert.Equal(t, "key\tvalue\n", string(written)) 40 | 41 | err = emitter.close() 42 | assert.Nil(t, err) 43 | } 44 | 45 | func TestReducerEmitterThreadSafety(t *testing.T) { 46 | writer := &testWriteCloser{new(bytes.Buffer)} 47 | emitter := newReducerEmitter(writer) 48 | 49 | var wg sync.WaitGroup 50 | for i := 0; i < 10; i++ { 51 | wg.Add(1) 52 | go func(key int) { 53 | defer wg.Done() 54 | err := emitter.Emit(fmt.Sprint(key), "value") 55 | assert.Nil(t, err) 56 | }(i) 57 | } 58 | wg.Wait() 59 | 60 | written, err := ioutil.ReadAll(writer) 61 | assert.Nil(t, err) 62 | 63 | records := strings.Split(string(written), "\n") 64 | assert.Len(t, records, 11) 65 | for i := 0; i < 10; i++ { 66 | assert.Contains(t, records, fmt.Sprintf("%d\tvalue", i)) 67 | } 68 | 69 | err = emitter.close() 70 | assert.Nil(t, err) 71 | } 72 | 73 | type mockFs struct { 74 | writers map[string]*testWriteCloser 75 | } 76 | 77 | func (m *mockFs) ListFiles(string) ([]corfs.FileInfo, error) { 78 | return []corfs.FileInfo{}, nil 79 | } 80 | 81 | func (m *mockFs) OpenReader(filePath string, startAt int64) (io.ReadCloser, error) { 82 | return ioutil.NopCloser(new(bytes.Buffer)), nil 83 | } 84 | 85 | func (m *mockFs) OpenWriter(filePath string) (io.WriteCloser, error) { 86 | if _, ok := m.writers[filePath]; !ok { 87 | buf := new(bytes.Buffer) 88 | m.writers[filePath] = &testWriteCloser{buf} 89 | } 90 | return m.writers[filePath], nil 91 | } 92 | 93 | func (m *mockFs) Stat(filePath string) (corfs.FileInfo, error) { 94 | return corfs.FileInfo{ 95 | Name: filePath, 96 | Size: 0, 97 | }, nil 98 | } 99 | 100 | func (m *mockFs) Init() error { return nil } 101 | 102 | func (m *mockFs) Join(e ...string) string { return strings.Join(e, "/") } 103 | 104 | func (m *mockFs) Delete(string) error { return nil } 105 | 106 | func TestMapperEmitter(t *testing.T) { 107 | mFs := &mockFs{writers: make(map[string]*testWriteCloser)} 108 | var fs corfs.FileSystem = mFs 109 | emitter := newMapperEmitter(3, 0, "out", fs) 110 | 111 | err := emitter.Emit("key1", "val1") 112 | assert.Nil(t, err) 113 | 114 | err = emitter.Emit("key123", "val2") 115 | assert.Nil(t, err) 116 | 117 | err = emitter.Emit("key359", "val3") 118 | assert.Nil(t, err) 119 | 120 | assert.Len(t, mFs.writers, 3) 121 | 122 | assert.Equal(t, `{"key":"key123","value":"val2"}`+"\n", string(mFs.writers["out/map-bin0-0.out"].Bytes())) 123 | assert.Equal(t, `{"key":"key359","value":"val3"}`+"\n", string(mFs.writers["out/map-bin1-0.out"].Bytes())) 124 | assert.Equal(t, `{"key":"key1","value":"val1"}`+"\n", string(mFs.writers["out/map-bin2-0.out"].Bytes())) 125 | 126 | assert.Nil(t, emitter.close()) 127 | } 128 | 129 | func TestMapperEmitterCustomPartition(t *testing.T) { 130 | mFs := &mockFs{writers: make(map[string]*testWriteCloser)} 131 | var fs corfs.FileSystem = mFs 132 | emitter := newMapperEmitter(3, 0, "out", fs) 133 | emitter.partitionFunc = func(key string, numBuckets uint) uint { 134 | if strings.HasPrefix(key, "a") { 135 | return 0 136 | } 137 | return numBuckets - 1 138 | } 139 | 140 | err := emitter.Emit("a", "val1") 141 | assert.Nil(t, err) 142 | 143 | err = emitter.Emit("a", "val2") 144 | assert.Nil(t, err) 145 | 146 | err = emitter.Emit("b", "val3") 147 | assert.Nil(t, err) 148 | 149 | assert.Len(t, mFs.writers, 2) 150 | 151 | assert.Equal(t, `{"key":"a","value":"val1"}`+"\n"+`{"key":"a","value":"val2"}`+"\n", string(mFs.writers["out/map-bin0-0.out"].Bytes())) 152 | assert.Equal(t, `{"key":"b","value":"val3"}`+"\n", string(mFs.writers["out/map-bin2-0.out"].Bytes())) 153 | 154 | assert.Nil(t, emitter.close()) 155 | } 156 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Corral Examples 2 | 3 | The below examples are provided for "Getting Started" writing applicaitons in corral. 4 | 5 | The Amplab benchmarks are useful for comparing corral's performance to other "Big Data Frameworks". These benchmark applications are also useful as they showcase common MapReduce tasks (filters, aggregations, and joins) as written in corral. 6 | 7 | ## [Word Count](word_count) 8 | 9 | * Reads input files line-by-line and reports the occurences of each observed word. 10 | 11 | ## [Amplab Benchmark Query 1](amplab1) 12 | 13 | * Implements the ["Scan Query" benchmark](https://amplab.cs.berkeley.edu/benchmark/#query1) from the Amplab Big Data Benchmark 14 | * Performs a scan of input data, with a filter enforced on certain fields 15 | 16 | ## [Amplab Benchmark Query 2](amplab2) 17 | 18 | * Implements the ["Aggregation Query" benchmark](https://amplab.cs.berkeley.edu/benchmark/#query2) from the Amplab Big Data Benchmark 19 | * Performs a filter on input data, and returns an aggregate (sum) value by key 20 | 21 | ## [Amplab Benchmark Query 3](amplab3) 22 | 23 | * Implements the ["Join Query" benchmark](https://amplab.cs.berkeley.edu/benchmark/#query3) from the Amplab Big Data Benchmark 24 | * Performs filters, aggregations, and a join on multiple input datasets. 25 | 26 | -------------------------------------------------------------------------------- /examples/amplab1/Makefile: -------------------------------------------------------------------------------- 1 | BUCKET = ${AWS_TEST_BUCKET} 2 | 3 | BIN_DIR = ./bin 4 | PROG_NAME = amplab1 5 | AMPLAB_PATH = big-data-benchmark/pavlo/text 6 | 7 | .PHONY: all clean $(PROG_NAME) input_in_s3 8 | 9 | .EXPORT_ALL_VARIABLES: 10 | CORRAL_VERBOSE = true 11 | 12 | all: $(PROG_NAME) 13 | 14 | $(PROG_NAME): 15 | go build -o $(BIN_DIR)/$@ . 16 | 17 | test_al1_local_tiny: $(PROG_NAME) 18 | $(BIN_DIR)/$(PROG_NAME) data/* 19 | 20 | tiny_data: 21 | aws s3 cp --recursive ./data/ s3://${BUCKET} 22 | 23 | test_al1_s3_tiny: $(PROG_NAME) tiny_data 24 | $(BIN_DIR)/$(PROG_NAME) --out s3://${BUCKET} s3://${BUCKET}/part-* 25 | 26 | test_al1_lambda_tiny: $(PROG_NAME) tiny_data 27 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://${BUCKET}/part-* 28 | 29 | test_al1_lambda_1node: $(PROG_NAME) 30 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://$(AMPLAB_PATH)/1node/rankings/part-* 31 | 32 | test_al1_lambda_5node: $(PROG_NAME) 33 | .$(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://$(AMPLAB_PATH)/5nodes/rankings/part-* 34 | 35 | clean: 36 | find . -name "*.out" -print0 | xargs -0 rm 37 | rm -f $(BIN_DIR)/$(PROG_NAME) output* 38 | aws s3 rm s3://${BUCKET} --recursive 39 | -------------------------------------------------------------------------------- /examples/amplab1/README.md: -------------------------------------------------------------------------------- 1 | # Amplab 1 Example 2 | 3 | This example implements the ["Scan Query" benchmark](https://amplab.cs.berkeley.edu/benchmark/#query1) from the Amplab Big Data Benchmark. 4 | 5 | ## Benchmark Results 6 | 7 | | Benchmark | Dataset Size | Job Execution Time | 8 | |:----------------------|:-------------|:-------------------| 9 | | test_al1_local_tiny | 77.6KB | 10ms | 10 | | test_al1_s3_tiny | 77.6KB | 1.25sec | 11 | | test_al1_lambda_tiny | 77.6KB | 3.92sec | 12 | | test_al1_lambda_1node | 1.28GB | 35.6sec | 13 | | test_al1_lambda_5node | 6.38GB | 41.8sec | 14 | 15 | Compared to the results reported in the [graphs provided by Amplab](https://amplab.cs.berkeley.edu/benchmark/#query1), corral performs reasonably strongly. It does not outperform most of the listed frameworks (except for Hive), but executes within a a similar timescale. 16 | -------------------------------------------------------------------------------- /examples/amplab1/amplab1.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | 8 | "github.com/bcongdon/corral" 9 | ) 10 | 11 | const pageRankCutoff = 50 12 | 13 | type amplab1 struct{} 14 | 15 | func (a amplab1) Map(key, value string, emitter corral.Emitter) { 16 | fields := strings.Split(value, ",") 17 | if len(fields) != 3 { 18 | fmt.Printf("Invalid record: '%s'\n", value) 19 | return 20 | } 21 | 22 | pageURL := fields[0] 23 | pageRank, err := strconv.Atoi(fields[1]) 24 | if err == nil && pageRank > pageRankCutoff { 25 | emitter.Emit(pageURL, fields[1]) 26 | } 27 | } 28 | 29 | func (a amplab1) Reduce(key string, values corral.ValueIterator, emitter corral.Emitter) { 30 | for value := range values.Iter() { 31 | emitter.Emit(key, value) 32 | } 33 | } 34 | 35 | func main() { 36 | job := corral.NewJob(amplab1{}, amplab1{}) 37 | 38 | driver := corral.NewDriver(job) 39 | driver.Main() 40 | } 41 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00002: -------------------------------------------------------------------------------- 1 | pucpwgjykbcelcevdmzuahojzjxgmpgojfehhabasvbzbzpczqcubfbfekv,397,51 2 | cscnxpejru,169,93 3 | bwmsfxqjlmnpfgflgbjheluvpdfbjhppfjgtmhiuayfhnopxlasxuofelttcyfhiizmzglofxivnymntrwj,152,52 4 | asvqyjexthkrgsubsnktrnfkeclppaaqymzjrjonvuckxnahiakpizueeabawbafa,125,80 5 | azoomxtuqxyoxiffctdmpqzvxypnbbnkivtnnrihmahalsptqubbkpjefrjkwdbjzrl,111,79 6 | wkjiwqlntxenobhsrqznlvshfgryhohlkurecbtcvflyvawskjowgtcsjrhtuwaxuegluhppstocdmhwtguztpfjraskx,95,91 7 | bnokzcllyzyjxljomkqwguhuhnewryslitzomgefsoffwzroryy,76,22 8 | tpmltuwalvkduavbwyxvv,94,89 9 | bwdjrrvhmcuvhoseriwysjrsjqktawqcvzqicmfyuqlkzmpiihhmxiamzalvljqcejvogzshjlg,76,47 10 | ruqqqiaenpbtetnhdyw,65,86 11 | toqnmxdhvwtdgdssipqkbsgghcxlgwrr,50,76 12 | jrcxilplavgpxejqveqahdsfbxpwjirgzfkuwtiphpogiyoxvsplogtrkfpfvtacnreoimmjlhmamjpwtuniqc,56,83 13 | jelkixioil,74,42 14 | fsomqgktlhupjwnuaijfckuragjhgekrjfxotekoqjudmopiuxlifatkmaztltdhzwlqoxfwphjvxqepmllrxaugbnbabxqlqauf,68,74 15 | egadxzlpzfxnylxsvvjezcye,46,79 16 | mwocakscnhykpsjxzoqezpabzesnm,54,11 17 | ycgofjpllagprixqqkflwxugzztlmzlxhhwzzkufxcryvvpmnewnbygwndnrcanubrruxqpcsnlsspfyvnrjihacuwmct,46,79 18 | zjawojxmqgoclronijxktltbvyywshjuqwljdipgdgxftwfhtvpscmaqgrembsoiwieq,61,49 19 | tbzlzttlkcw,34,23 20 | xylbmfgypferxxmmjppsrwffcuyiizkkoqrclrhykhpaeyquxjmehow,55,65 21 | fffzyiovcsqohyfozsrzmvirgggnlrwwbukwnvbutzukubsejlavvdzwdrhnfvq,47,71 22 | rgljlcfyqdkgvzulehqycatspqsbxeijuztoymrxmhuqrzknpjobysmvxxvhusniqjadegtfzkrvgn,52,49 23 | dnyqjqgstxsczwdqzboqofpglbidounbzdvfqtaqmikbdphgcasiaxpekaszfvuesvismgehmkhzjqzsorlehbnwr,39,74 24 | mrokzjuqeapxmwlgwcxltrebaxcdfsnsjklmfialtztozcpvizhubnxazvrfzgospnjnrbtuhkjkcuoaedfmtfcmu,33,21 25 | dubkynaxlwezsahqtwrmgqbrvcfmnojjtxeqepfrrbrdjflyuwvxwieurcmduaqrcnymuukixscplekgwcoqbbaoz,39,14 26 | hjzppcmvfsmjhufzawrvemkrfwgcqwkagjirlekjpeujpitmpoumbxyevwbsumjimevmdudctfz,42,2 27 | kxmkvrhcqxefwdcpotczhxyugzvkhrkumyoezlfxavzyzzqaulqqbornqekrunuqvmyfhzcrxctzjcwucdwimqipwwpx,40,45 28 | vkwhdwwfvrprmfikjxydiialkfwgkbzxxrcddtzkkvlshwgicwiwafizplmkdicvgcnokmxfgedfk,36,54 29 | uyjocupptcphnppmrkehkeijausbsgvwhxyafacclaqvnjigkytaatoqwaumdyicfzcwfmthkjgttoas,39,6 30 | kyfhmmpghliadreeikdovatlgdimordqlogcykmrbiiycjeobzyjruwcqwvkcexpwiekvvqgv,34,66 31 | kjvdtqnprmmvblvnljqgjgxcighxqjmbralfgqqpwgegtgl,27,33 32 | evcamtbxizunuaygisrzkkbotosljmjyucsyyhnpbhujvuwimgwnbizreutcrqldr,44,99 33 | lpxqbpbodlafnvwvcbbzliafrdhdgtxkzadhaytsbiqjznkyzbtvjbhttmqarntesfzrqbtzilsutzfn,37,19 34 | olyzdcweeauhijzpmfqevcjdmdhkgphznotlrgseuapdlfwldywcqjsijxehiekowircqadeatsttugevlujiudwxn,42,1 35 | pezoxvvzcknucawwtqni,50,66 36 | skpqjjuxxpkescepxafhvcofriqmppqsofaxgguuwiaawsq,40,64 37 | fvompeitysfpmmpjopfumzspdqmcqifxeijibtzjhjgivmsavhrsbvbsbuidbnijufrgsqcodtpwgrjymr,35,76 38 | nkvxglelpvlsexfslgnvylcpbugixoochfopwha,38,19 39 | mdlbvizitfbtcwfexdksnavlafvgmypyznvwgtvealayueqytudbecnjdsthhhojgdyhtoiofpjiik,40,52 40 | aairfezrfs,37,56 41 | ceegngeyvabxtantxzkrfwjynzeiobiguklvbhiufytliasxzrpuyishhmfqcgepvolbievecijhwowlxfi,38,92 42 | phyzqlwcppyxjsuvnpickdlctpsnqesnyrbvyomjttuiogpoacncuwcnznzxpoxgsm,33,90 43 | mmedkcgykjqoflq,42,99 44 | evaskongtymmjsaspdxkxogcmyqmirorzifzqcui,28,5 45 | enkfsycihzgvblrgjmgz,26,81 46 | prsufmapjmcpdlvsmhubsqoqsofanvwwiskoihrhocnwimgmwzcajjapqinosmbzhqdxptnrpbvk,29,56 47 | jpolvahkgdzvorremcykpbjcensohsujxncogff,26,18 48 | vnvquklvjvafthniwwrprbryw,33,90 49 | lbibiltgqnl,29,11 50 | zlfpgxopmisvqlhybmemwgsnhhihidsjql,29,63 51 | pkaqmfogivksgxagyrfibief,30,70 52 | urrmvsdmirxqxffqkoikpgpktuxzppvrgdaunbvmgbyvmlaeynvhclwinyowyorsmcldjvxbokwheyteapsculda,18,91 53 | agbixdkfofaclgwprvxhkzkgjiolxgccqorp,23,94 54 | nfqnazltxhszqwoyedjwfinbffnnxsqkolaohtzesyuqgdyaatdafipqgjzqiodmyklfqbm,25,2 55 | qfueqgswwuhvuokdhvgbliztytidubshtvytphaxfxxb,21,34 56 | ggkvewyyfwdnjpxgnywgcdhymvsqgmtrblpdhwe,26,2 57 | hrvqughgkbkquaamtmjbejnzkkgzaauouwqgcypztripdoothcmdsbbjzowjxuigtcsxoptuovt,22,25 58 | mnfbdrooyqaxjbwakrrvwxonjsrvmpjmlhjmrdqabfhiwlorwirbsgwqynyghfrwwauqvjdujwjhivfjkggfwio,26,47 59 | ivdqjjzkbxjonaxiqtryvgdjtrhemsvsqrobubvxctetgmezjowsdmffcnekcszuzxhgxbjpbiotufudomlpygs,36,88 60 | wivgtiidpvelgzwdhtyzbcgipslkvhqpxdlkrtztsyqixjdeebfcjymrl,26,55 61 | qzzqryyxizgbzakeeekwjbytxxtuphglohpfhvzmqzwiucoemmycsjkowryppovgdmnoddikdoxgqyysu,22,6 62 | stwzfysehr,24,53 63 | ylsskdxoydhnxcegzfweofaxuxeenm,22,86 64 | hlzvdgmvmrcivcxcznivungmhfyqemrwhm,26,6 65 | faptfnoorzrathmgijtkxqwxkeifaqhudgfkmgvzfknrwjxofcdoyuylyhgmcyhkdjndfypxyapkla,26,8 66 | qkilhirrmevqopqgqvbqwaverqfikaebgjtekuvufubvtogbc,25,67 67 | vnuxngoumdnxbdlbwnnzznsxfswocdwyylznipwgrfdhpxajaslt,18,86 68 | lfpldxezkcgnfshiowiigogrorlvvqawodwdtxsze,29,52 69 | xdonpzhgrqaeoszkfeqkwmsmuwcozqqgaotzchhpacsplzprizjeymmhfdroojftesyxfmzwtnyttwpvmtnmycjmknvcw,26,63 70 | awfwcnggjqljcwdvyjzbzfkwpgxpzebqpgozvbpmaxekhivdeocjhacsuydfypozbwplxxnoigq,29,54 71 | ipkuqklqdunuzcwgxbedfkzkwkwowycgupdtvfznwvf,35,60 72 | tmncvfeasadd,23,57 73 | yftkkoatykxtzeodqosqoimrtawvchdonbxbbgzloimxzybbtbiiffvbzktxss,20,80 74 | atqzcjdfbrgxmmwiggbyxcpepotgjkrbudznjgskwbfyuuejtvcuhvqcxtimimxhanzhrdlaypozbmvgntzmwqzom,28,88 75 | jgfzghifcutlawgikeftasiqugxqqgoveiqsu,22,89 76 | ylxiphwfvrmemupgnk,26,21 77 | hcwqtdledudydsumenmbwrslrdvqx,24,69 78 | tkqvlktxqkrozzvzwyqzfowittzryiqagtwzsscarkzfa,29,15 79 | zmdopijugwjonpa,28,96 80 | flzyiiwcwoxirjtzubj,22,56 81 | yhmhisupogcgstwzihfxxovfomubbydviptetbkhcbcdwnqledpjqukihebtbocrgttzsxlbsowqcyaaqqcdudeea,38,75 82 | zpbummfgsdwsjeojtriipyvrsnbbvwrtponwcqiqrvtgrslyaoacq,27,39 83 | qvbdqhgmowzhkabatrwqadchsxhotcondhhtgwszmnsqorqxeqvejtrxodnpgtga,26,26 84 | fbrmywexrnuneizkmnrjzynxhbgyfqcdooxirmcgvzkopdnfyfpjdvkwesqryeriuqvgwzt,31,52 85 | meybmdvzykssqulqzrpidijcrltlktnquybyraltwbqfitckgdgpp,22,62 86 | dtvrjhbgchtdrjuoqnjniaxp,24,36 87 | kzcgxmdkpndeizzqmcsqxbjtuafkjtqwfpulakwcnivwprlfvgmerdmfxtcejdnapdyzsymneaetfkbjblkltjokungnprekhz,34,48 88 | bomzpfdawqbcborzcrixfzpvnqevjlzuwojepgywazdqpkgppnrgbobfnkmufeyeaojgkmaeoyefwzquprpistbtnropvrczj,24,84 89 | prcehwijetccpjbabdgmmrxoynwibegqsjqfpujnjwwyesfhbyowfspcxwnweulxeyhdfrtqilrvmkrnjgnwcbkdmo,20,69 90 | jgyxudrhofpoq,23,59 91 | tcsjdddsxrmbfrbzjuukjzbtmjksofyieyshdtslarna,23,13 92 | lwfboyzuykoyaeyfxjmidnpdezthmqehqxpwfitphqpfmdc,24,83 93 | mfrrfadmxdtueiinmwqitgcsivpjgftvzpwohlmiswlcigprfkoimaril,32,15 94 | iejoarsuidommpxsboizmalgjgwzlrusfwtyqbpsldkjhvtnmrtorucfcukxktyuglemhbzocjjgtccmawfuvvrrkvhd,29,62 95 | ohagdxgihvfbtfjpqiiblgcvabyseuqmrrrqjyqhubspoepfvqcs,18,15 96 | rwzrqftvhqofahpvbdfbtccplyboavfxuteqffrcppqzzbptldtyhqcrzjhuuibpzmdrymltdhpdiipyguwnkwjyhe,22,3 97 | rqqdfxjknteqallindfdlgpzzsqewrtvgwdmeyzsnotwfhzycptdwelbavuhjru,15,88 98 | aosflydybdgjnbanyfhdjshguzlovniicdlbiblkpkcoremeupdkjlnvaiabqnjjqrggckuyiwwlmpkhkiah,29,96 99 | ymysicmeapczxacgocugrkqvjwrcfsqgtaurqhezltbeypvaxihnivdfseawwbqpsqib,30,55 100 | srihokmvipfclrovrrzsjclhbejpwu,16,74 101 | fyaqfilculgrrqfozvvkllqgiauzadrojfaxomludcquiejkwmguhhznpnbzznoei,21,27 102 | ehegqevxsvojrlgljajtjhpnbuaxxnmoyanxqogjxuk,27,74 103 | ejqihokrlejwvizwdsxxrwpvogpbenwiyuamywfkzcjraysomijgarwblpxbretjckcs,26,72 104 | lnxhpmgodwevrbckvxgfvfqqsaxyzfeozdasymlpyfeefubxwdvjlomteainhknnjbnadm,22,39 105 | nctqphhcdfoffbqxnnohooyybgltxzrorikogolbdfitdfyyk,27,21 106 | upcqyionolcodzmfuotabekrwxzqntjqmoawxwuwjqwwohotsfzjtjkqmrbcivredmcfbkobmay,32,72 107 | xflvbymlsimkgwwkwhnaadiduehryhjmsctsftahiitvntkqtgjjbshbvmuudztcslraflzfvhpevfkqjdgs,24,23 108 | sgzkljnbrwfvjscokewkzyxhegoyaclqspqamqkcyzeodtnvligjsmijncbpdseohvceshnblly,23,80 109 | kcxxbfuqjrftwiqjoffsanhmjvmcvetsmveabxkeesbxxqqtwmmjrengynukeyecqmlfemhjjhfewrrozbsrruynspfhrsihunh,28,22 110 | mkvuiziarcsrerhvkqyqercrwmypzbrqkikepirbzqokihrjtnyydtybzyjjcmgoeeau,25,24 111 | ukghnduhzkpfvttsjrqhyrppsqwhxeqihtjytxchrjndxuizteycsihzqishx,35,86 112 | jumggziytoyywaehqrnnnvepnugh,19,51 113 | svjivisnoybhozoauchiaqfkwap,26,38 114 | ouilycramq,23,40 115 | shsgdwseqkjhvofiv,27,61 116 | fsedcowgdnrpioohgcefysuvuytmpokugrvognrqisvnifpywyzvyyyspwhfawqpqivsffzpxbknwaopvxw,33,76 117 | bkejvgfptolenggdllkmzbxgxbbolhvdlkxjttrwqiwyqzeifokxtcsvmbxlntcwgzwhttecgix,28,28 118 | cbxeuqxqcxafbxzkdbumgqdkgwdcjpyjqfugnudrplirlzowlzfbnro,33,33 119 | mfgxdfgacoqohyknlmepyhhuspusilz,16,37 120 | toogateygggdwssm,19,15 121 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00003: -------------------------------------------------------------------------------- 1 | ffygkvsklpmup,332,63 2 | hnapejzsgqrzxdswzepsnrwhupzxeoeourvexevzvplzfatcnxdeioleokcphan,171,35 3 | rvbyrwhzgfqvzqkusietecgfllzvswjypculysxmqfiwbvhowaouxybxyhufneoazowbqifwmxtkllunuctbrrezedtzzu,148,22 4 | dsmcwjpmomksjeknnzqlgnyyqatnkjvrigwkosrqnrgwusfafpwlcsysdqyudclrfegjrbjhaosgmirdzeiynpf,100,74 5 | knwlhzmcyolhaccqrjrodrquzavpupxqhlirczxahcgukvfyrjnjwdgvclyrbyrijrolgwz,104,67 6 | zznghuzphdxpffyvireslpzieohxfnybwxritdmerkyuuysprppivzmvzlodmvuiugrxiqbxavepxman,95,49 7 | ogbdomakokymdvfwpzareaidbersktfmvawcrvoystglticrfhlhiynhohbqiozjwpxuvzmwivwaitckvmhvprvgnuqmkrie,65,98 8 | dujeekkipzorxnlgbyqkqhguqgkgjaimlruzvntntpbvfzuuxtcuazr,80,47 9 | nkoqcbfdgeyjqnjpduofeobmrfilyopppcnhmloygqiuwyzoypdperfi,91,31 10 | qswhentzpmaswnahgqgmelonpvhegjlogtzxj,69,14 11 | dbgxyulbmiegbpqjjvyugrotcgddympsyqvldyjipz,80,42 12 | nntxzkpafnroecjdeuasfafrt,68,51 13 | novmceeekpcizaqvkxyphhmoth,66,48 14 | jaorwedfyuppfxfmvkolwpzfcne,46,18 15 | xdymsbwfsopjckuakurhwdvfubshgqdcqvvearhunboqvopouyjpbakkhclvfhmrfdnilcrwxygwioucqmv,57,57 16 | shmqzllepevdmfbsxcamooddbfqyydgwobtqxvjvwkoyvnisxxzjdbpbslfgcbyxszgbkwsgwassilxsolosvssmxzkf,51,70 17 | euepmnknkbhszhbiviobtylmlwqhufargkosyxnnwrycyg,47,59 18 | jilugbnkpqefuxygivelnqvpbpahaamoetjmmalhvlssqxcjxitrdkmjbqqqgboxhahnvncmusngcbnh,46,13 19 | mzikvhjseodyjazasocv,50,47 20 | ixdkwhwklkrveshtzhoqtgwtbltcakupuujgbpwfarqclu,61,76 21 | pguyzljhuqd,58,6 22 | houkmpqccccguywrvgoopwsxarvolalarhw,51,3 23 | zrxewqcbpfahvgwzpxyuqxygvbnqejpdomxpbokpotmkgobrpuseehssmuzzienkjgq,53,56 24 | dyhzfsjrzctorgpeyxqakkcqovuehoxcnxnjqyvqnphcfygax,37,56 25 | dyihrprqjgfpsgnitrupnstslgxastqpwarpzynpjqhqipmgpmahmfhqqxzupxhzjkuholjdc,50,50 26 | xfghmaqmxcrclktkk,50,76 27 | hyxykkpvtwvfovycgaxopchybbozoittsjbxbjusoxgmehgnevbvmuomqmnbe,44,25 28 | rirlpcmdsipxelzcepwwmavsmyeoyqqgksjbpnwycgcdgkelkyhqxc,37,97 29 | cgrhvbwbgeejdrixthealtpujujqnglaudavwlcjfutqvrgftcoebhnhsss,46,63 30 | bbqruyvnmmsk,41,75 31 | igwgonutjaealrxoqhikohmrnballskzb,31,47 32 | ksuvyhmxoqemjviebpcxbvigirvkecnqwmdtgixetywrvmsfqvekhkcszscqmjhczefmmcvddadqehu,40,58 33 | aenrihsgsf,50,49 34 | maujidxtxnonplawbvverojjxqbvgvvqeskgchkjlilwguyhrhnzndrtqxiapwkgfwbvijcpkpmlkvbeqpxzxkz,43,30 35 | bwazbtavlyjsbvhkxbzkcuigeqqnezihnzpnjyvmnbmxxseeyjryznweecjjsupthzaytrmtzutjlztobtq,33,49 36 | ciymremulqciqibiwnksomenjupltancihvijfnqyxsdmfepvtkqkobecujfundbnphobjrjotwopuqbhlyakcmfhkneggirtsfh,37,41 37 | edxixjbslbmtuzdsaqbnrrryvqfn,36,74 38 | dbzajttojejnehscpzoecawrxwdbddspoelxjlsiqxejhzqlbilyhbjbrhldepxbbjwmmdpvgseqnufpwurnttbkgndw,43,69 39 | nxcbelxzhocqjrrzyztnxafabwyzocvnhganktxupyubepdmvkynyasjskyrigxjapmzuxmoqyplceoxiyacutapiaxlmdodp,37,58 40 | vhsidxwcunjqrnsdsmpnlredzodpqvsqdnbohukvuamoalmlqavdfjgwlxdgki,43,89 41 | cetxzrjboivdwbmhumvebplwpntmnzdwsoigqhkufwozowyteufuxdpnzzyllpbscvovyvlhm,43,1 42 | xzkkoxdhpgczevnwpxqhbkftsxefspuakwxlemrozbvyqtzisvcintnkuztjvuvaruepqrskrtnpexsnhsmrmgpxregsrk,40,86 43 | pkvrnqnibkzwxqht,37,48 44 | psqmrtgbwypcaieahtqpap,24,88 45 | yrhnkxqwupmebomcczntqdkduyvuwzfcqujhpxqpgkpdvckegyaeini,28,88 46 | txxmmtupshwihcxzaivahlmgxacyehxdlockzhguxowkkxm,19,32 47 | ihhwtqlluofrvicvawfuqjvhfdfchhfypipd,20,75 48 | bfqzuiqsfpyfbpbjucbwwunobatewbqxupfvgrwljkzk,33,33 49 | vfkudnupjcohmkinanwrtjckstbcxsptxnyfrrzrtyjrcrfsvlhjirkoiivisonlvalltdjecifbbsiihwvqou,25,56 50 | anlkdyxblwotc,14,55 51 | ctcutxlyrtsaxqnzuhtfkuhrnwmxxoqudyyy,30,30 52 | ijzktfmgimwdfvcjuglaqupvacuxrlnekrwpptepi,20,54 53 | yulsrwxhthuywdqyafavxdrsjvbue,28,78 54 | evphrayuavjlwzyadocnblqhckkdtkaetkndfrhbmywlrok,30,73 55 | jtxasskopxowdtgoqdjiwlbujkkxzaubgulkwjfiothamqgrfewhfofi,20,62 56 | yykqssobhdbwqclphcvayoudpequkwqlficgcovcvswsphlybnhiytpvtmraelelclubtvzrgohoxbwydiivmjdoselhreosqjxc,20,15 57 | uqlcbepjszodlcqurqziuunuvwtccjfmjicfmuetwhwekaeeyzyqcptiyycyztaodwydaojypbkyotwbmbwcisuvhbvd,30,2 58 | nsaocyzspqiittgyzxrhsybiwzkyqel,24,10 59 | xpevapsbfbovowfbwhaivssjiaqljnqeqpktibutzjfqihilzxxxuifvgrytjipnoxqzhfzhkywjqkexu,34,17 60 | muoopqfbwbxhgqvaesjibeqwfnb,19,26 61 | xvmsawkzmbxutfncaocslykpxwmlqeqhbruoerbwdkjofnzybwlapkhmn,21,77 62 | qoblckhsukhtgjkdmungzacguqvyooxglumpgehouhotbxakbbreorbdchxfidndelyrmyyopvrhcqutnxaqkqe,25,1 63 | flwlckjgpemozdmbdwu,24,40 64 | syhlmudmzeiewazbqoigjfjgahztxqdyiskozokmsyqkcocafvcofsrt,32,76 65 | zbhwvhmoekkzishimqncxghg,30,95 66 | abkmdqrmsioklcxifkkapyrgnwpsinhuevbepuxqvppjp,34,61 67 | flergcyllelpmronlrkxvipdhkimnvqheankidht,31,22 68 | uuncvufmzeiw,23,16 69 | gcrnzhhnlfehuqwegshloxzwcqdotaxetmhaclhoyyjgmcdpfgbvj,27,76 70 | hhawatnfwvufchflujrdnlskmmsit,34,78 71 | ftsfztagtqzorytjdajuaowgcjsnlpxseoimnayh,18,59 72 | hpebwvdcszmczpyqnrfhxyuabfpqlyeuodlotqmqvhtmrdtsfvszstghljtxccu,36,90 73 | uryjidsdniioyzvorofacyty,31,85 74 | mdpxzahcfctcssueieysrbxdxcdfmjhdoabcscwmctzrrharyazvflpcvficohcndglc,25,51 75 | ussgnkykvxiuoeasfdahmshgcwj,17,7 76 | woexnaghismafgkzrnvsvrukluymslpuubpmwudnaqdxuqlbhryrastwkniumipgbzxhjeipjlhmkaobseorbaofjfyhtisuvmv,27,3 77 | owqojjrbxaqpwgrtumtbdcmrdoiipykflbddnbnuxjmcfxwbetfktlsaxvjnppmlfittnscecxorequl,19,53 78 | igfpklcuppzypecchukywhiaturqmozmmqunvrmmcslicgfbqytteqsrpbexwekwkksifgoorjgrnfxdxzvfbrosgejjldprvr,36,43 79 | keoklmfnxrbitqbfbymcyvbzqdufrrqrmxzmgviyoqehlasgz,31,41 80 | idqxdvrlpqkgartfpivobox,21,69 81 | jmhkohqfrzczbamqrowqqgyqeoymzehnxooscvbpd,27,26 82 | zntfgoukca,19,28 83 | vblvhcelfrgotentbewxtpfyxesofxwidphlcjdzrkniaerdaxkscuurmywzrupykqanvwucmyl,29,12 84 | nlqzvyvyyglepzpxslsbmsmilcyddyvuhbkfahdjtxortjwrqvkhdcneyjdmyecaxfiqbgweueoqzlqxfptomoffwyq,23,27 85 | ujunyfwbglhouwfrqmugwdrkipsjrtnewshpyfhxbxctpyhucbqwyhjvjgyinvminc,24,73 86 | srmaudcfsouahcxihowpwazdgxtsnmnerjfnvkxiaudcguxztjboufdbaqonhbwzqhhegjzlzeqrflpwvf,20,27 87 | tbsrldzjteqdvtxkxovpimfsavopaqqgejbq,30,72 88 | bnvlqdxgvviwqezqchbwnvnsqnqzkhwndntxiooxzaueuqfiyxhcznnamffensnrqtcdsqfiz,23,9 89 | ahxwllmxdxhyhrflhyhetmagenpchykbeardzwtg,27,57 90 | sjxaaclzkjbsghkkenaepreudnzturabomgbswxbbmflnhqcsccyrusatinbiezjbljtdvg,29,92 91 | msjdoosdwgtnmdkhw,27,84 92 | scvtetdhmkmxvhfbflovfgfzhqdioytmovxwkwmutgqljzicwcssoqaajvtnonffmrbjrogqpsmevkgndieios,31,86 93 | qlubngzhkydwurcziytyaxrwbuokuggdevhnhunocgsitschutgqczawmnsbymscxrksvgmkonvi,35,65 94 | qnipzumcjpddxfzzvuvzrfgdeoxmpfpjpakwlndqsxwygxqcqfljmyrapkxpxiijymnodauvtrla,37,65 95 | qyjywejoyvazrcbuhnrmtffyohjdxzztcrrakpnpfhqgvivqkftzzjjglca,27,71 96 | trtqvmurhxdkegvzvsecgjgkgebqq,29,1 97 | ojahibpysrmiyvfkgrmzoghwhccclytjqfcqbebhznk,26,89 98 | hzhuzgrwumuncirrtncmlmkhyvefhjzmhniopxrcrxolbrqfwxljyeaabmvekbgxtbyomrowulnvmtsdtacwwpijbodngep,31,75 99 | thpcmvqbhxnbtjpmkkhqqbwxgrkkmaqfjgrvayzxubmismltwllnrdywbozgcfuwfvllagijiinizvodcnwnab,24,8 100 | slatuzfmxbsegmwthabskuxxxynvykhwmrsssdwckdujazkvst,20,73 101 | udbzpahqieqhymjwpmvimvtdvlbalmx,24,8 102 | rbwpiexjgy,25,49 103 | zhbghwhbjyjtsnwqbmtywnezcoljexsurstvhzethsuuupjplomphfqrcouqsbnfuppxibxgx,38,50 104 | phzjjvfgsgxkoeudfkhabutrosobnsuvumkibyxggmfyjiopyteuak,25,86 105 | rlknypphhokupxosahlapotebavdwwquq,25,34 106 | pwmfubgvehpdrdtzfmspscvpxcd,12,81 107 | pxyabcknlnkyfbojjcbojdqybnywhhcluqodrgedtvjvnoohpynwsjxtacnpulsyldkrczlzksvntkljkevsps,21,28 108 | savcmvrrldfxixpzjlmtldotkegeizfqqpxupqdg,21,52 109 | xbuwfwpeehewkkvpyjydrthmnikswsvkmzihvtgeonfdmpkpnwswbndosurrsceotovpeqwvauubeiqejrw,34,89 110 | hmdralzxejzxrshfymhhppeuvjue,23,6 111 | xwernhvzcplrejqqwffbmpxaljvludtrqdwfqewlettnkluzgiqukajbllkkuwbulsclzlmkpfyraixizaggenv,24,2 112 | fwfbkaaziyspfkvuneqxcsdottbjwvbsomdhvxfkomzecavbsuhpdpvyrumgdpcrou,24,48 113 | iqyozyramnxnyqdzfjqu,34,42 114 | ewtugjyrlqdfmmwfsoldweldzfyanveeqsbpmidijvg,23,45 115 | jkcseidvlnxmrfcblvdonijgnurgueunzjczjgwukyanaxqcjjngpargodecljovfjgwppnickkzxxshjltiozfqweqppxakywa,27,83 116 | ropccouuqkzhxrlkbxmontqwigyqzuxbduxejrwsrviujmmjlkmvnhvkfoguno,28,13 117 | oytxrpedhlevsmaesmipabhoddglgebeteeeelgsjzzsecimxmwgcwdaecgbcmqcccohisvurejklroziqbkktyyzfjw,24,37 118 | ejijaancyteetet,26,22 119 | oyiydbnbgi,20,19 120 | wvfsqnyyggi,25,10 121 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00004: -------------------------------------------------------------------------------- 1 | qtqntqkvqioouwfujojmjdurfxbrfcqsisl,278,45 2 | wrwgqnhxviqnaacnctwpkmyysedhreewiiechohnxwdcrrzmvqnfqfuhxqjbpgtljgycrzeooyzitbzidbbq,135,2 3 | cxdmunpixtrqnvglnt,146,54 4 | ixgiosdefdnhrzqomnf,126,33 5 | xybwfjcuhauxiopfirbnzodqjnrbxhtu,112,4 6 | xkbowoagajfgfwxjweyzjfqybeufhrzpzbcjvj,90,63 7 | mtuqcwwotyhumzeebremxycwbrkbkofk,74,14 8 | uknvebpirkoriljfrsoryrngsizixcploywsmxvrvrdorjbqwzjorqbgrshgklepiturazhoukg,82,99 9 | ddkcctektekhtqcnvjhoqqelfrommuhmppndcuansrazgnfz,71,9 10 | jcwtzqrqxczaunfzgovpuomuqxwddglozxdzn,62,40 11 | ucwntichsmhpigfnpohzengvlxxlujgwizrkhwufkijahmxdpfnpq,49,36 12 | hfqkjptpoosgnuhazipxdqqusnufswthewjlmhofufp,71,4 13 | wbilzeiisymxlghikfszhl,83,8 14 | kkjkkvaxplrssftcqopwuzlkvbdrnnipinxrcsznmcwemkuf,47,25 15 | ljsulyfewfvgaaurjqgicjygwfttfenkalimxxikixduvacxycyxowylpihrvenwroehsfohkdzcxwdczjv,48,12 16 | fpviiollixxwheuvavsqmdouaxweyqimpxtyclithtyuxjnolfljweqeignwajnlenkpmq,43,83 17 | zinahrkkhaozmfbxpexzgtljtilssggkzudameyiymagikvqikseplyrfjmacjntbxexpysiodplzs,50,76 18 | obexsavebuajjncbmdzunwotzxugeniwpyyicmcgtarzsdfsdgegukigecbtdubyezfkahesypeknknyjcaqv,56,26 19 | jviualmpljxchkrpzbbpwqbitrmcchb,58,12 20 | cbynavzxlsaetuqcvieqkwwtnwglsigogtaipeucbebuuudssgnuinavcmqehnnymejxaroppfzuizfibycdtawuypyea,52,79 21 | mpthehfptmzojzaojghebugrvchxoonempeituimachyhbbitsakdgklskusrdtpcpuftwkawebrjghapggqhhadbviczibvivmc,36,13 22 | ogpusxggdgqfbxvhsnubgfjwulhzepwdvkodqcjjxknimmbaxwtbzhfhibmlotiqrxmkvszdjqs,55,72 23 | hpwaxoiagjmmfoljzssktymccczsnslqsfevxrdaddkibyuujvbronwkwjtwnjvbcqjjmnwwiyyih,38,13 24 | pegqenooxiinjl,39,24 25 | tqvsethttsgjqoxcxruiohrmugykjhwpudfejz,39,38 26 | wlzgdxstiueucocoyuvzbfhnxqldbzybr,47,11 27 | cebgpiqymsrdqtnzguzwmpufqdjxraroabhdyki,46,60 28 | qgdafbdttiaopjxxsxbobktmcqwmjh,35,7 29 | qatppbocwfp,41,94 30 | ufuqiyhdynqnbzbplcwvbrnpeppuiuxymytajhfpvamibetxmbjwgpqvxtjkayxtofubpzhpprszxy,43,39 31 | jtblaqhpdzadcfuemktyhkvjtuwbdjuuqwfrarmkgbqgidnlzxg,44,71 32 | xhvvhbftsdikrnmltzcpfsquqfzcftlzwfggyfftbjoacrjluoktsulflcnjgwpoharzlcfroryxlkbukynvnjqqlfad,36,69 33 | nwphqcvpnuremtvzibpxyjbhbxuzxbvshnvntvazcvcqzphjguyfxxkmgdskuoamxnbrwxwqjqpfrmbiqautxdtlnynmy,38,56 34 | tqkbissauczmwhagqwyqrllzwlojnxzulqiuvdwfbzpjyzlnvtrdprjvkliyzmlnjyviomhwfaloewqmgyfxppqotdnhwqjhpjgc,41,95 35 | ftwsyxapynsdzkzfojgghujlnzwfftevtjtbljcuereatsulfipeujynlowggxqw,45,5 36 | ivtugnnyrerospdlfhguxhbrs,43,18 37 | lzuwnatnrsmtnrpadtwwayzapidpj,31,14 38 | yyxquznmmkmssddyuewudbtltztozwjpequmyatmeqnrmwgdolzyytuznkgvbtwakdizmeqvkfko,29,97 39 | oujppnaujjacichdajvumedbzmjqlupbsafqcxjfzjczrrzomxefqktjnjbetxodwidzftkn,37,20 40 | vdamewmbsmtzxntxfwmnsvwvzgbhvnhmbmnmsfloapqfejapbftcuzoonettwmgxqzkassyuuaystyozhasjs,40,56 41 | umrgwyezrviaqlxbdhlknxfkr,42,82 42 | pqynouikvkcbmfwxtwgtfwhjsnhndkhkhflsxkhyfwqlgknyvwvedzzmnlzlwpcuwvgkztditecaqyxqoezfqoqntyd,37,84 43 | hfqeuicxibbakhhfugqyonwxxuqidaertqxrbucpzazpedqgpfrko,47,37 44 | scowxkwfvdfypybpqjjjpuphdq,25,85 45 | ywfpsvuyeyorsmemrchkclkdcehfaxac,24,88 46 | asafhzedjtnriqullmsrccoqsctbyufajfyyfpsutylrsshtfuoadroxpzjkvhktxnrbkgnmecsollnrrzihjdnlgmwztfbomqpx,25,87 47 | rwlopbkozbh,26,86 48 | hgvajoegfikzhaqzezyrkezhlxvxfajnhykteusezpvaftlmdlzdtrb,26,23 49 | iikhxgpvhuhqjvnlhxnvrlqznxhuufjfynhcvkdsuracaxxi,27,66 50 | gztaebsbaqmfgusknmnayoapevpdxxdtnfrngnrlimowqqbbpxmhkkjqcokuekwzxphsxamddjwqzhfjhfzw,24,52 51 | jasrvbhlxntztliaebsqhrvzinoxenakabwmljn,27,83 52 | juyphrluyhmugzsynokcueumggtqpq,28,66 53 | fdsuquodyddyyukxisoqmnujznkpfreegguftswcdxqnhmiwypxuerxvehtrwyaybf,29,35 54 | lxesxvkqxhvxwstneqaakcjdyoayjazcbgonm,22,48 55 | yzxacxeqjhxphlvtpcfuvryumhwtqeytikzdslvydqhtbrohyidunliqhosapyebihhjjsojicm,23,87 56 | zpupycebmsyazoyjqemxrnujojrhedzgdrkkccclrejjtwhoixlfxevqnrldoxnfajqbrj,22,37 57 | voaytyaiknvcvvkdjbigsjgwtiwunrnt,33,75 58 | lbewuzdybemgkaflfqejhqrhsmcafocqgxmbivaddvjdbbsfwvlehapklnqmurbjb,16,62 59 | sebdnwwsrfvxwzvbwvmbuyvbxxpfwngzgeltmowwlekknbxvddrmcbauhribrohusgcokca,22,43 60 | lxbbmspyozbflzzmcznlannlveywmxchtgumgqkbnbytvbytqrurojmmrkgkqoiaebqydye,24,60 61 | hyhoorjwfvwhlaqpltufqsvrtshcehtsbjbzkwiuorjejwolpwfnqxekkwiadxnbdmcvwvvzfwzdpzqoajzdvfmgzctjd,19,51 62 | gzjginjqigngyzpqjwmwajvwrlzribxbwedcmisycbzbzmnztx,22,10 63 | xmqriphoqffimukhhonstobzalicbztvgspjbkxlixmuhux,25,49 64 | rympwivzhsnenuzalurdztsudtwjwoufpundgxcvlixnnvmwaykrcsy,22,48 65 | xcravjsyrxtlgigbfobjbvmkowesvxdyptqjusa,25,71 66 | eavflbemqreirlvqkaxooxqdrqogupiuforltdukbwuwntdyfqzsatmpigvfnydhpzcxgjetguaenblpjdzbxefv,23,19 67 | gslipyaqffodswcnpilxezshfkxskcxvjogwoqsrobhizwfsvxymktgipnjrbzfyiatpqkuzhvdouqpookvsgzgqiw,19,88 68 | xnmzpadhjrvcypvikkfzuhvtilvywyonseyicggmmfxkbqq,29,9 69 | xkqtjtthjselxuhmx,33,39 70 | msvscisagsfitogxpubyofqpwqensjpib,28,43 71 | wdjelatywibsmiedltypmyegkjqmgolideaqtkilertbkxemchrypsitiqjntiaulvowczkkuonzldjglrrtlnj,31,41 72 | axlvhxojgzcptstquvvrzqdlaxojrpxrucifqkfylpxcvxybmaqoaojuzohlerksbxgbvvacrtxcxmtnuemwuxycw,25,38 73 | gowpnzwlceydulwzjvxahogateebgdghuyorchcdzvvzlmslvyadnxbxakguwrxeabfcayhbjysstavfgnlaxiictmpmsvrafp,26,86 74 | ivvburqtzadpnmrpbavcebqfazbrhhlleawlmgpysjahlcqodfcpjyciwwfaghtpfoea,32,64 75 | fcfrcesfgdxdemybwbybmrtywbpsfob,30,89 76 | onbsbmxmypxyvntypsrldhydoz,33,80 77 | nazqjcbowlghdkyfqotpx,27,89 78 | thcdkidrwoihqexyipzwdvesueqndluopaylzefvqubkrob,24,29 79 | ffuyxsvugiousdumesfr,23,31 80 | keqmbrrxmptpjbiyaiegwesofoyyvbktdoboabbblidtdkgulvhdegwgkjzlmloateyawrgxpzfyejqjhtinhxiiqmeabcgeb,24,30 81 | ujixhafsmxq,26,74 82 | bwmsddjgeutaywfcpdibemhdkzpiwoednhvhjnclkgukfzsvyrosxoqlkohdhgslcsqbohpsatuynhnvry,27,86 83 | kkdrsnpmdbfvcvqhxkwfnmkg,19,50 84 | whlkitxfhpmshmeyhentozwykrqvqzfynsptoxscumobaamoazsnxkeicycxv,23,62 85 | pfpugqidclwdbxxlpquoszgxqrqtuzbeyzgjuptdhjkyttljruecwdeqfccxtyrwjjbqcihjtgfaanh,25,76 86 | twfldvpujohkpversibquxsndxvva,38,65 87 | rrzbrjglvdjgfzoheyumcmvxuniwyylheydwcvqtgcvpluauwsbzlklzwjl,31,76 88 | nibpfbzeymsqacskfgdofeebcsajjzzofajjgncvmywgsnlmlbehdljdvxujydmndfohdccxcbmtmqrkojiftlqrmhlsd,34,13 89 | lulnrwzjyuogmxbnelc,26,73 90 | ewizyrwmkmjyftyjrphrzmrbw,39,59 91 | yukpolrzoxnjlstlmxdqucuinqdbnitijdwiyuu,25,20 92 | bivdjgpotuopmonakpesvwgjlejmixqpldxuqjectlkpjeb,20,67 93 | usdnoljkdfxwy,30,55 94 | htpxpbirpdqujpedcrrkqjoopmzlwwkldqpksejyudlpjvjprzixicsy,24,25 95 | mprmrogezadnngvuuariszmiwcdhpidygusjgnyqiwtkxazgmekdjqosmewhhwjx,28,23 96 | zvqipxbqkzgvndxiagbnvkloazuyydocauqqhufpugmlfgknllmzwllhvtttstxvxdwlgfgkprdkuzrzlgjtscpmvctbmmpbo,31,41 97 | kqmywbagvsrbnlqdqdnguvavbafxygsd,24,89 98 | ooiwcytbnldhqukkcyshnsluwtzbsrovmselwonscfqygqvlepbrsdoskbuhcgyxdhtouyqc,23,91 99 | mkjlyrinvordgvalgaxjdwaxwljiceljmzcmpubcrdiklquxggdz,30,97 100 | isiuuaurwwlnaubxiwmnuinwzlknocjkaoktlqsftpgccnhypfotwqklqnafbszmcqoyjqaewutpddpmlfhpcexnogewmbnbts,20,51 101 | kaptrtwepxdvnlczbvaohdqqpmdfxldyzbnponmlynqucehvayngrpxiawaokxdk,27,81 102 | ddhdjzzyljnplmkuilnektmrakvzsmzvvnbpxnwokuapargyvcylfwwmmtairzmvdnx,23,5 103 | xjzbieepibmjbwljfecyushntnbw,19,44 104 | lthmqrgoxttsgbktgkyqjxwqbpj,29,37 105 | ayyihkqlbihgxr,26,17 106 | vsikmgyjlrdppvjdmafvuwnzrzfuivfyaijzheesueubu,20,2 107 | laeulqlpeyktajdfkcmoytszdzrihzbemrxrlfdqrzsuhxghuexzxoocmleshkrarddvtato,27,48 108 | aimnaqxjoyvfqakkkroifdbabnovkkkspcyopiurdhjrkcqnftmqwpudzwhubgdhxobzo,21,87 109 | xtunozeolqumwqzjnbvo,27,23 110 | whemdmvvhsmpylapyxigqcmmnmswxrdlcwppvrviti,16,21 111 | zhwmcdaunwjvllkgljuvcrapuurmmqeectsajyuqujbvu,31,65 112 | gfzykkedtbgcclkbdfnmdctuwsumnbwwjzeawnnupkhwmphenokriiuojymrqdbxk,26,27 113 | wflgjnydcgszetdjdborwwoif,25,43 114 | uzpelvznvsbgaiuprudzmzwigjulpprumzaavobnaaxnmnatncnejvgxjpdltiobozilanofrarzsonf,35,67 115 | kybypbojxknlawnzakpmuvbp,28,67 116 | adtfhtarmezfqkmlfbmyklwigvqotkzyznclwisvggkwtdvmoknxmxmrtfleemfsvxg,27,79 117 | mosbqzzhgjvshrfjyfcgcyfddpzhzuklae,28,78 118 | nasjmfmnawhvpvqpn,33,98 119 | xyxlcaugomlckbeasjbcdsdrojbkhdwhrzotfirvynnbvjkjppddpwkecrswozrhqmznyrxebxgusgfnt,20,95 120 | qgzchqkttamjysvyepcwairwlcipymzklaqnfa,24,29 121 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00006: -------------------------------------------------------------------------------- 1 | xjhmjsuqolfklbvxngyinvezezzpnpudcsrdaggcqtjkvythibgjnrlmwtijvixsxwpbrbfts,251,80 2 | seozvzwkcfgnfuzfdueccfguavodnxo,165,66 3 | fdgvmwbrjlmvuoquyblfkvyzraemutrewunyqmfyvcahx,132,88 4 | hxilvmwkazrvjqtndvlziqhyzhorryycvrblpmdtmlccsnvrktoyfaiu,99,14 5 | gqghyyardomubrfsvhwxcsmnlnyhtbaozi,108,29 6 | zbqmvcuwgwtbynfglageenwqklfimolvtpuxgnibmgpfrkkjyqaffptoacjhhkiijnusaknyounbopavarsvwjbueiwpjlumelr,89,38 7 | tktwwwkukgdtlfltbrhgfwoilgfoysvrnrykoahowsifaskiqedusjlypqyvukgvomahhafsjmxzmtsmlzadndzacyyv,59,83 8 | jhsddobuctksnpinkf,88,96 9 | tcvhndmcoslftvswhjolissexnibusrgqcfn,67,55 10 | jxgpkxbhomwgxrjpqdn,62,88 11 | hqphaefgnaldqjcgwsqgenvzlccnlfgqqslzgzcactk,49,49 12 | haodlgdzlqdvknzzxrnrkhuvszpvyjwanqzfezsgtjzleuossyohvexzzqftstifacwmrewutflvytxdfrvovvezmxvxlianjg,64,97 13 | rrpxypelcmbxef,65,85 14 | vhegulkgplgjbtyhltewbj,56,8 15 | qgtocakklmmyuwkefpfrlrkjdhvtftlwfpdwdbahzcgdwtplvofcuptqpwlxkqdr,48,2 16 | lkpkylmtaluyxnxshrhjwoosjffjagujgwipjnpuuwcw,37,59 17 | pujcczisycqikjhxrpaiwmogrpznwbceynrgoar,48,43 18 | ocbmbjmncjgfrjsxjkmdveidysgiqeechwmolbojbobqruavioofncbytpdtdfeyseullupffrvxrrtvbouzbmwfvulb,50,1 19 | afagtsimbfynqvisfssdfvoqbslmpkryjopffwsmvjwpxgugeoqntjtsdvipvmakz,53,82 20 | bzomrnivqkwyski,41,73 21 | ikxbdshlxrxcdkyrsakdiuynicnuhlmoxcoalkajfvauwshaqpyqokasdoaivhak,38,98 22 | mlbdidhmxdmpghaktygpfyolqeieggnfilbsqxbxycrpwubqfrikrvgaoexkdknbmtkcilxotyskkqgvfwsgsfuaht,59,30 23 | kjbmjeqflragppviyzvkidmsypkzqorxsdouueytokveoujsdynvb,40,1 24 | rthvxmtlugamgij,37,89 25 | inpozrhjszsiikenzcyalvzpgu,33,63 26 | qurvwnwgtmepddycuvd,32,42 27 | sppmejlvllmiepddyzckialbevyyyrjrklrbydbmdspejtlftkumgxfpvjedgrgxradairjddqpxodc,33,88 28 | pjxygmzazniskasvyjrojxqeitlcltcnfzfatiwaaetfveouwyjozwzvqoagzeherenqegj,41,89 29 | zmrpjdzcfcvynqriidlokugdojqqmcwcyntawmmldlaiyqsryxbl,34,31 30 | jzhejiotxplwcydmznvkuvbsrlswrxcoiedrcsbaewblkuzwrmxzwcqdpinbatiavfxxvjmnaoyywbbrfawiyoih,42,15 31 | ngrsqvfsnnubqedjjhfoctuoufsst,47,25 32 | pwtmyqwiyvdahi,39,46 33 | fexlcxpigjvxyihgsxdrqsvuzgegjbpwearvrbqqgiiohoiuwdhllhnishihnloqbljgmmrbhvttwehjlikqtx,38,44 34 | ifpggwiwmellovxbdshvuzuhbggwqwujvovgotbcgrzrgmcydrtqjygrfjj,34,64 35 | zugadwpogrzrdijvfrrajsfsvpkzshgejlpjutaldyexsidngryeuhrkekmzmdnonxnpqikru,29,91 36 | utdwrgcunkunekhnvrjilfyuuodqfqnrlvdyfdzcnr,36,66 37 | qblphnktzcggynqtqitdftdqoaprwpihsatrzfneummocvdcbzrmlyxgmwld,41,35 38 | rhuqrfjgenghyjvkswalukgdsxdbynwttfmkrlwqhg,25,40 39 | towgqtqsryhsgylkbgwygpifjuupkumofdqbxtvumcpryci,39,97 40 | pkyxrgpehtmlj,42,24 41 | qfpmnwlctzppnjvvzgzbelaggukecjvievtqcaydyagilmqagqhquuvcqaffkrrtpstxdilbxhayxjgyjqujdnictvzag,37,18 42 | kxourznhhvzjcu,41,87 43 | osqtptreqcoytafxpcichaqj,33,58 44 | wyrjdiskjzqigxvxaynmkcyhqewhmuinxv,26,91 45 | cpkjyhsmuusjbvvndq,29,86 46 | neczyinbwvgkndvvxclptfxvdxpykdwyinrdhkwrodipkjlequyamcqvtqpsphmhkietemauomm,21,77 47 | tkmcolwenyqegxspnqcctfcfrgoozeoaatazxvvxgxkwiqfcgwdppwoxdnijwzeofip,37,62 48 | ejiwfdscwhtckcqric,31,46 49 | edfjbkdofthergsxfcunkkzsixfsozjnpsenrwmibyofjubbrxntzuvijelhbiyunlzkpynvsvejojcpyyilcaddibwwb,38,47 50 | teimvdgdbklpkva,25,55 51 | hfculndbvilmlnylxifhfzkkfxojzygqlpkdkavioulisrikbpezkjwvfkjamkzhrgcgferhthl,20,79 52 | jhqyzpooamrfdxxglyajyefjnhpmzhzlhxvfixgibqxwjliflhxvipyscetb,26,40 53 | tolyyvkgxifwlxqueajjvjuietztxvmlll,25,22 54 | xbcjwmgjiyozdstkuealycopzieiqdzcxpsiuyhjrujislaywseletjpqxwgdihwhwpmtaajkjdceheqy,25,99 55 | dlzsrmwciwsadqctziixegfhgeprgitrgyteasdqtrmyvgnxaliwdwoqprmo,35,38 56 | irdxjksczmhdkfaubldoymnpsuhzjxulmzskfgtuvtfitwdfulazoobwwqbahgiiywhjoke,30,92 57 | gibiszcexbemcyycznzwinewvogihhfzajyilxkpziuybyewwgudtqblmqsnrrltcrhbnyzk,14,25 58 | oxbjhbytfbykujmzzynccaerkipiedvnlnascwvmknojdqrleoaludvly,26,58 59 | gvrvgobyjnttufktfsmmculrhhwtqhxojbdvdqgonvao,16,91 60 | zypwtxkbknqpcqlzjlxmefigbkuthkgcd,29,91 61 | nyeingfdontlyflupmnochhabnfhzdqgblcmiwrttvokodsxvlxnhzlp,26,5 62 | ldxlookswsgrgakznimlwuwkolqysnhqinjksgpbylqseogxhpuxoshwbmgopsklvranyqbujofcahhpfkbtfynwvznthwcqcet,24,91 63 | vnimmsukvzuzwkeuytogebyipmkvmynggrza,28,28 64 | oprwywholoowommcvzkbemdjfyzafissllmknqmlzgbyupaw,18,2 65 | lopvatoiwztsm,22,18 66 | vbkwwcemtrenzxarytcevigppaglpyrjgorq,30,4 67 | oyeyymeoovziujwnvbkucx,19,54 68 | mjyammwdvvvvtpfvizfm,24,23 69 | nfbjifaioxnakytvuwyzvkqrbehyurb,21,15 70 | uhaitphkoepksbggieaokjyvjilbwxgmaokgiiidhvkbuavvcunoq,23,15 71 | xvonqfvhppnykno,27,80 72 | jkydrfjckkodkgdudvcruniepiqvahahxxncwuzswcruigeawdrcupauwimxwydwoqpahwqwlkeebumxyfsmoe,44,43 73 | mpxzuytmewaeqixutlkwvgtwrhstefjmztpkinkjbsjvjhhdguuwzaczeuelowneoqehdlhftgewlggelrwbrjtbpxszw,33,85 74 | ckwgrhcjxhgfyanuwiemadkpknptbhsnjofgesnqtobuoxwllbewkeksalo,26,38 75 | libkfuolqbswbznsbxbssxbmwspyujppxq,28,21 76 | cucmycfdddyuibopuluyizlwccjxjcnbotesbhsruxekrnsxettwbfcccpnugmfbzkbrgwdaysink,16,28 77 | kbyzdeadkfkxijjdydjomompjkfdfmppotfmthakdgzyuswfwthatyqzlure,21,16 78 | kwspclnpaxmrfzjqwwb,25,56 79 | fkpflcpgtmbetixagxicmexgjrheirfubqlgvwwnmldjaqmyllh,20,53 80 | rousxxeftodldbig,30,6 81 | bccnrymjfklperugtjdzavxryoxirr,34,74 82 | hpoltmlmrjdzecmmuovapykzbdsjjivcecufmuebutduwcngvlpvolafvho,23,70 83 | ltbuitmzogliixhfcbdyoimzwpjpnnxpqxclwfatlnyxwllhhssrevhdsrtoptdf,30,76 84 | nghojnwggjopiyelujgessgjowtylxqvubbwbhztomhjzdwcbvacwvmhvjsocojvbrasdhxajs,37,92 85 | pmqpxabovpygvvlzcrtysaurs,18,71 86 | fjdvbbwajbngyxyxyrfbfxpkoelxhvrcebmminyhppdizkzqwzghsododdlvjsvkljyphhbmyyjzqjkyxcduzusgdi,24,87 87 | ibupkwerktlrmmpyarcmdloaazloqtsldgavil,16,68 88 | gitfesicsxuuzsvbxxwdqhkrejjhfcjthpedehozgjjwiytl,20,4 89 | yrqrzfobmlwjysvoaaegtfijri,28,39 90 | lnqpqdmjvemypvefpchvyhbrzmujrxvqzgdzsczck,32,27 91 | pyypdzvshwehagjafnzukpdaeraaibwrzyzalqxfpuumdbowetucizpvaqqdqndddhahixvjkzdediitdmacbgimv,24,50 92 | bwgrivstroyjaksumihghobyrnkbrrlflmgegsbtymjhmyoywgmpjjfjopmrrjzlxhwgikb,31,5 93 | uhnykdwwwewacvscvvjhyzbzrcbbxbrjsfjeucaygbvcqantuonquciajoyemlmmuqeefktziglakoytonblzlicggfgru,34,74 94 | swbpsmlruzykeedtayyrbmftqxpvqfrcxtfqzcxdhdvdtzrjbenodgssivhtqbrfsczumxkpbbuwntmojefcjclzobbsabrnej,20,4 95 | ijuzqegtjgetqwidvotdzjumdiccxihhctkofm,30,20 96 | evdgcnckpilhgdvulufvxuxlzvhqwrxbbfitqgjs,24,60 97 | xbvdhkytlfsnfhttkhjohrlmsyzthoguqrfvkdchdkvgbldpqpldtrmreoyjqtdnziuegsfhlfas,21,88 98 | zkjvkmkfxszcfcpqsqvdinrbpnojbd,24,61 99 | gzyufkffydyqjakkovvbkwyzrpuduugapmobtjfkxkh,34,17 100 | bvokvusjrjlnmpnnxknzskefenu,17,92 101 | gzxgwlhdxfakigxlduqujym,29,31 102 | kirtwwxpxethskdnftjroflgviklqingdwdjfsxlzwnx,34,94 103 | fnxurbybgloqredqigzxmeohsgsotsgktytkxhpsaebdgqxouqn,19,55 104 | zuifqqkviuvpfu,28,38 105 | ombmrkpiarhuhlloiajbtrfpsxiwoljtetxtigijclgtdhlomtlortpnhkvfjbjrqpaq,22,50 106 | owfzczbxfqnkpkvyisihxtqcmk,27,59 107 | vwsqrbontzspgavubavglwxw,22,6 108 | ulompvdvvlxdvkvpfyovlsnioxdmwlldtoivfcqsdiwaqezusylxyimwwtlsyxkzezdqupixkgpgfxqsctzsscgbsjvoyevwhg,30,19 109 | xugikcvscpmugnefaqptnochndscujgagxewylpnej,20,56 110 | tkdkabmlnnnjhfidfhqqpksdjfkndnoaozdhfnrsmkycasadxmdxazxjhyzjhlyhiftwygkbtpfzjxvcgsa,27,88 111 | ogzhwqqonsvlaoeijtklyixsbkkaqcvswodmaurxtwjvougrudjzygbsbpljbgiosznlsd,23,38 112 | kvsduennklksugupxtyydfmpxpnarbdagmnojyrugqjozlaswhupuxuhdsoczloeiltfxjflwjdxfuq,25,69 113 | swkedlolsuxahtxkqmsbzznjpuwkivheombujozlzydjz,22,41 114 | mumiokyvlbyuiagbhlyviywlrmihyylwm,28,25 115 | avkyydcxrxcyyjdhykyqympzixwkxbrdijgeeeogpywcildzujkzginkhyskypatumxqpsic,23,99 116 | bzuomsfsjdsgkwljfrmvvuqlfgymeijhoxrroryoweggadordycjfqavwxolcwbtysgmdwofqwtfdrjiastpjkkcelsddkidp,26,45 117 | igcvclhukiftoaczfxoqqbowecewfwxycnoquqfzdyorvdcgdvrlnfjpxvoswdksvrjsxkmdjb,29,96 118 | nbxmvpviuadznmnmjtgpstsqwrugzzkrksaglsmmxqefwhuvurgaejdkltxpfshcnnvbscltlivrntrvijlrtrmmwgfmuwm,28,44 119 | eiavwxfipxyseselqxevoqtoufoqsapaqjupwgbmijhxfqskofyfwzsoqljto,30,42 120 | lecetrlgmmzppvxzupvrmqmvagtqxvfbuajmactctoojqdumkpnptuutqehkubarhctynoxfauepolcejxuluzn,27,91 121 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00007: -------------------------------------------------------------------------------- 1 | cuithkyytiwidhgbobbjsogrnfkvxvyeytmgumqlmorafzioxjevadombpkhza,225,92 2 | czwbdyadiwyheohhwavwceoiybycdgowjwcmxpufmgzmqhqnqajxlwsjodmkbxwvnixlrnvqzhq,155,8 3 | urkmlwkhrnqcctjrnxjekebostnuendvrzzdpcewnbsgzqz,140,56 4 | tggikcuwjjzvibggitsbbtezkcxycrlsthgpuzclilrfsgzxyqbfluszkthixushgambeiyfudrwogsvmunkxrghwerwsujbsdc,103,8 5 | ncbmhzsswcqjfxlbdllwjcbisuwyqfdgbsvdgxcbloiwgmvtwucjwuohrcgsu,107,16 6 | sjosgyanscncvlhedgwtuouwmihwnqxwgudwzelrjwnpllkdiipzn,88,96 7 | bhzpxyhwayknmnqmdoxqzbiemelfingfekjco,83,61 8 | ntwwwjsxcrieezqfrcdvliuzhhceghghlwhdxfpnnggunezkiqfylefwfltfkzzsekigvlcybdldqsksjdbaapvdnhccjzkf,69,54 9 | cotgaoikqwrmdvdacdwpzyhkrxlvosllgdmhrdeyxzqrmznvyswhwlwpdxdovtmpxooupmyfmilinhmcivynzmpmmd,71,39 10 | uhavjdppbeluxbmtvvtrlkivozbicjoeonvlj,68,16 11 | ehqvpglnuphqrzatnmbwhvkppgbperroygrxhraoyshpnigzixneiyagxfkvfrewoddzxwcbfx,52,16 12 | vpdmxpyuohrjgybeyvoblfwhxotwibyzoaiasbsjnhnomrhwrrnziqqwwckfuqujsvhve,55,63 13 | bpyletjakycvtmcnhjgbqkwxyvwwvazxbucqdppabkfyncgfqgblgshoxciaiwfzgau,68,90 14 | xspqlghpzuyzevxle,56,51 15 | mshyqcpqqkyun,56,77 16 | lcmxtyzncdrrqnwwbfpfzpbpgvwcajlgvikmwowkgrahtjdsggsxkrvrqhoyujynopyluemjxgzbmnvhhnwoxxasefbsvhpitre,49,19 17 | dwkevbgwnjafqcxpulcpbyvpmlhegyocndkmnnbnueruthyl,48,25 18 | sehpqemzpqmmaikoqpyhvgujjeodlshopmmapjqubqf,53,62 19 | iqxezvsoolspcuteafnbmdhaftvcieszamkmgrggvldahoiwndlsvpwvaigujufmhmdhyxeedlobzfzdcsa,62,7 20 | wnmdfvgqxionggfiyatzcezzmvulvpmydeuvpehzoqtakcejiocqkeyyoiuvxzbgcauqjqwolkedgdowudy,41,85 21 | hylxcpwjzyqquiptbryzakehthg,36,31 22 | arneiaykbaccztmcyqnzestpfmhkkzvsiycipq,49,83 23 | bsgzzmfwtzhnwhlkqwdkbudiadmvhrpstpzawzdpnrhhomnizwmkbbuxwxdoxdqmwpwc,37,84 24 | xkvlievixslxhwqgfopepwoiebnuyrdcdrrkyinpycgpwuibnkongnewndwmbypiunsugrwyaqbbybmrkmavtbgpwmjytbjfxxtr,40,26 25 | qtirbatbzkwtbckiajkrvodlbtewkkduinhhyhwpakjytceldjqtsxbgvamrrbimwlu,37,99 26 | akypwmcwxv,40,16 27 | vztthiwmvgyrujz,33,68 28 | bhkvdnhwmuwjwjyxrrjqf,44,34 29 | fesgcybayzdacxidnfuuueujgoqhnjvvjnwlefrtbgbhxijvdyxiyrcqdtabid,34,88 30 | zyeustbhaqitoymoqdyinpnkfgejzknjpmxbdpcwiufdfkhtusxbdkoonwwftlcexdpecvbtmungticoypndcemgvyeogfjkgvuv,37,24 31 | acmdqjhawhmumgfecxubcemsftkanoucuaktnhmmtqxiquwzimnhtrsdlsctdbfwyjbysfomxzcyimvwtdgxgwcueqaqhqgaac,44,39 32 | pswmyionopgbmypivjwxrvonlhmfkxeujykjrkzzajwzcqorktslvwcyt,38,5 33 | jbsczinsgigcusorhvvygukspcwdtfurymcnxtusizgvphcuqwotvgldezzbsaqefpuvwiaxlrmwagujprddagvwkmqfyw,34,74 34 | txmsgdkosgchzizuibxmgdezfswbjirxnoajrxyefearabxybvf,42,93 35 | zywrhncahgecesmwrzwrwlnkoihbnyeeqvomrvcpzt,43,74 36 | xxulfsvijzfnxythwtweodnagcxavwfuiymvegfvqvwdetmljrerngjjiontbex,37,44 37 | lpmahuvvvucexznocggbccsbfkocszhdcqcteudomobpzyurwpxntzvilwmbfwkroodsv,45,33 38 | vvvcmvwiexttygacwxgjsssqyoemzmuixtkyaavsyfgmpjpjahtrnidzujkjwvjrvnxoix,44,89 39 | idikkpjirofzeeyzkmregkoqvd,47,5 40 | aguhmrrzqktjvednsalrrmoxcsemywgiltwxn,32,55 41 | uppqvlziasgmmpmtxfmyuls,45,27 42 | cltzcpgatgfhfmmkuzfxnoncogssu,36,82 43 | rxbmgwmulgmrqzhpadywojbltaqvhtksxdvvluogdnzayjzgflxpkomcmscdlnfgdicjpfuxneqcnnjtidwczfcrxhnkdzu,37,5 44 | zfdwvgtdvftajlqoncuytuhrylkiiicyd,32,77 45 | gbmeirbhajwbijsdexbvof,22,93 46 | tagdoknawbzkfxgjqhcrrnhrvrjkwtmfchalkpapsdewfhx,25,54 47 | foiqoroqsgjshmrdgovbtikyztzsmimyrugcxa,23,20 48 | rjjtwbcpnjrcocduyytxwhy,20,86 49 | mqeadihelmlaheyrmkkzwivtoeevbevmxxrx,25,93 50 | rttwsretdfzzmdashsacsazbvyodqdxthqzmf,26,47 51 | dnfehxocjndwflhnsqdakrkkyyvminclkzlhh,22,40 52 | vmhozxyhqvtzlunpqlumtkbteaxivizsmusveufsokmxshkaqkpubxuxafjidhhelnfabxbpdekqrapxro,34,2 53 | kgqpijsfrprtjslirixwaeduynrefmrttsffnexjfxniejswetrfou,24,5 54 | augckizztkmnqissfowfgrokxlgracvdkhdh,24,71 55 | xsvmympywnpyxmuhwoprjdcllcbvrsftrkyqheoygmkcrwzmmmadkpfqzqiebwzvihkmd,21,64 56 | qkuqvlksiyklicbkddmxsammbqysbjupbppliaizumpjhhfaosoublesfugloaigyzmqlnnahbnvisfvxhsbbt,18,6 57 | ejgaoinsogukuwfxetdovsdubtaczqskpfw,29,2 58 | zhcwfwrufzeklafmx,29,21 59 | nlcgjlbozowxotgoptf,29,19 60 | gchzrgbktmbxlsuqokvmhfjnksfyxcfmgqeqxlbqrubjydmoymtklwrwrjhwdhrlogjctkp,26,63 61 | qnatfqalbkigzbakzqrwtefdntyaapp,26,74 62 | qsbwbjeherxfrwqpczeuhkdcjroqsxrwtldzhslhpgrqivzwvuzpbpxytwdnesvrtoxkv,25,37 63 | ehtaqntrxayzbwgqpodupugmpxelmalbvygfwnovslxoalsbksiuybwuujudgftrurdxcwgvpfpnshidpbymefquc,21,81 64 | mxbsnisseeoruoxkanozuvjxmeltmfkprewmimetpcps,20,70 65 | jmkvrcyrdtbhglwmlivhnsvescdymvsvtldcwfdqoglsm,27,4 66 | rqsidxtyiufobthnqtllhsvmpytkkoacaciybjlegbilacxrkyooxbaskobmqwxlzhwbgqvusppgyklxg,31,95 67 | vahboiraqxoajpzozsnnevhyvykmccfrxbgumdcswvvqvrpjyouytpwwbvjpn,26,90 68 | urpulvciahfyrwv,26,84 69 | azmhbkhlitvuqymqfzeerfxvemjuxtytllnogkvit,25,28 70 | ctusfuhknoswziewdybfzkmjmlqkrjimchtzokjrcbybyvploxrowijkot,27,39 71 | ztcqmpfdigbqncqxpsvsbhodhgkpct,25,80 72 | hjpgcedpersrvplwynjujygpbyoeyzemebuj,28,76 73 | agxdshsoyejtlvsznltwhxsommsybugfgwhpsmbdhgphywcvfaemhskffsyixlqeazvxlmyhitfhlobaok,25,88 74 | mowckaydfrhlguatoatl,20,1 75 | qoirkypjyjd,24,38 76 | gqajrfphigtqbfpjyvbrubmkkekkaludddmwnabvobnknzbksdlicyqohqde,29,10 77 | zabrmyxxfqbttuclpsyooojrofahthaopkalfudzatvlzrvryriisbdwbszfrpitndrdtovkjkaolxa,20,99 78 | xntxlvwapkjmcnimteh,22,85 79 | naeakmwwaryffmxlnfpsddfnlvaeawnmbx,27,97 80 | atardodvrratseiaqbasgpjtokceqtlcyqubjkjxomgwprhbwbqecfbolbhxffqjtxpmzlqv,19,18 81 | alcfjplmboxcigxhvuotezlh,35,25 82 | smnshqfpqxcevrszwlnaeocuasqxksxloapyreqnr,28,59 83 | mgvuyjbebzpaimwvaappdkdkrqlgdvsbaonnksopqwqnlyqqcaalqthifkrktrzecqowojazhhqimdsy,26,61 84 | xdesdwervdiztqdzephjokdagqotbesxvq,35,68 85 | pgnxahqifryksbnvnwvjjhsblhlzquhepyfmfozexiaqqrwtrmryacoexjjzgaahkodwfnznndeljpx,27,82 86 | shpyubsriyezmwkpizaatrnsevbqysawxbeqqbnqymlvmomsutigcmctfvpv,23,80 87 | ovwqgxmxiolbgqrxgdhyirxwhojoxwvxqghhljzejcxfctuzwwhsszwwcjpabpv,21,89 88 | zbmeyihmvnzlkycldm,27,30 89 | zrrfkhqyxsziposgxxkbodepwymfefovvemrzseskrnupxufzojwjiupnmqfnwlqwybrlfhklcw,30,26 90 | inxcpnetasjthmwufiqwftchnzybggptovg,20,58 91 | izxalrfvbwjlbgmsxumznwdxsorpsqxnoabfgythmbtc,28,21 92 | ijzpkumkowdrdxrrupdrhpfkxywpivldyxcooqtjizpphdamwztkzav,30,8 93 | qexfatxccmvozswzvescjxpsvpapzwbklnwgcxdttcijcdgdvcfslxxsmgwmpbklwvvbfnhjixebvlyqofuk,23,27 94 | zfdwtcpbqb,18,7 95 | ejexxlydyqyzmlotjndkgiprvuaksedpvlmbryaelfsjomilqnuqcglgihylqdykuyhpmimwofcibjxapcpqkwsvfsfzbaowkq,22,76 96 | hmjvogjbyhfyfxdssprpzjnwderzpyuymjjyoyuxlhwrdosfabduzda,25,60 97 | cfrwqrvwdpuotibvfrcgbkzprjofenjidkzptekybeaiyzjtdymvplk,24,89 98 | kvsyouyixoqsryfdptrhfzvxtlzqtnqapzws,25,40 99 | xkhhvwfrfdqolxgywlslcjssiitflnlhtwy,24,69 100 | pepkxgpmypsnjepkipazugwyevlumpulkkrdofpiwqrjcbgzdsjsjkoeicbtykpjrihbiamikefwysuyku,27,88 101 | ccynlxqpyqxpbpaqcyauiksjmewcpfwfqutbpiwsneqdcmyogayjkjiveyzwbudwljqwj,32,11 102 | qrwxjhhgnqskuxbjzlbmnzgiaraivdwqifghvqzpkagrwcoseeseypfengysdicxzsvunryavwezysfhkxmiyinj,29,69 103 | sngayrtbgascsiokxuglkzjrlalagxssoeojtgfqaztpkspoztywjjpgbriidzcicqooechnhuhykxardtnbjnpiix,25,30 104 | waqqrnjfrwmsrtvmkkuhicsfuimcmdbrfzuyukzntzemerwmywlzkcwyvqvcjlbeahpfxbjqbxxfuwicvs,24,84 105 | utixkpjtecilvefwuysetnvufuligaaespxubbnlbzsygakbhbgeuzqcncroygicvorqjaubamidpeqqmmgqjyycxtyxpirwsp,23,63 106 | jyrzlwlhxjmtvsidlvjfnmxxyxhnhpjamthqywkjsapqacuibunuqcdufdiv,26,68 107 | wxgnbyczeujotljfqvmzizfgtuknpzmcbqlybqvzitwa,26,11 108 | mqmmawfbxaddnhnenwwbinuyevmqeitumgqkapxtltuuajrfmcupwosfwryrobls,28,88 109 | aeoqxcdadhreqbarnfjxybfylkjtovkepyilgdzbarmbtihyvbasfxanhriafouhejkcrcnnyqwqswgoysmpmvzugumwzswynzzi,22,22 110 | qvxkdswzmilpmreleb,28,68 111 | ttdewgtrrjmszuqgaxaitzhlefucmskqfrpfxgixg,28,54 112 | vvyqtukrdhvuuqu,32,67 113 | ckeykxzrxydbabttkodtockq,26,48 114 | rpdcxplebaecwykkwrsrkjpissqrfkdrceuwqnhcxhnyiktbhpzkut,29,30 115 | hdnnlgklxfjqefkccazpwlxbvibdwlczuwsyplhbpc,31,20 116 | jnqwtprwzfluhiztqnjqbvuyoucphsgmlubtloaxkeqtk,19,81 117 | qtbtzqjjnvuoldkdy,32,6 118 | jowrqnqujnwsqkggomnntwsjjnmnxfeaovpsxiycmqaynsbifvtxvazsebvaoqhxsiaqvyfvgqqvyoqlibtpxttbdaqwnmoooir,31,33 119 | zdvxdedipnfeyigozeoxxawlgnuksnqskxdtdyzobdbemuadflfzupzlvgyppzczlvycda,28,25 120 | lccjbceyzpihbvgyytnjgoizrnndnsprdnfhhgztuuvngivvxqaiipigrohrlwnkqkfcjsaawjazpjxnodmiyutcyamssrxb,29,88 121 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00008: -------------------------------------------------------------------------------- 1 | ecfuzdmqkvqktydvisaknmovbbsltqoufihtspydkagsujqcevtguvzsmwrzttmxqxqsxgollffmtchfxoqkujigsopmya,237,74 2 | dagtwwybivyiuxmkhbxkkpvybycyionubrxfqxjdmpsjraoadnzdhhftvklbfps,177,74 3 | emucailxlqlqazqrupsaphhmsgqifscofkyhybngvcbxda,134,89 4 | nzaxnvjaqxapdjnzbbvisvpmfwklhzqjgexhnqzvinldvfjlrifgapzzhmactlpyfvoyhxdnzngsrcokqhaozsb,119,68 5 | czoqdnkqnonnkmjlzfsntboumupseoahz,85,54 6 | csczwwuhzppmmagirffgkrqfmbptywyatsn,76,10 7 | rxxdcfzivveqiicvhbibtagagzcijtbxeygytznbxlxeyopeqvqcnialilmmtnoijqgmlxymdqzhgoryjkluyuresnv,85,75 8 | bgtfepelqqceryiwutvdetglxsystjvepyngbzpggxrjxjlxahundkahyjbhojjwmhspfzhmazihmlqlvafsibri,88,7 9 | nirxfdnojzdnpmsmixzgdlypkmziuybomkvmlmn,84,17 10 | tusmkbrqtcfxihxnuspukuswzkgtlqljdfxhkxqyrneqkqb,68,41 11 | drdyhgkabnr,61,9 12 | aoamzircowhjogwirncjnsxcdruftktlnfhxbwxwpglrycfbulqskfwhddmlwtvngemtepdwcdgd,61,17 13 | cdqbezipxeqrdckcbnscbzeriybmfcdkfpdodopxrftwanflnstjujixxxppkjtopphjalljroomknulhqp,70,53 14 | atbhmgxqoiokajrupfwibcohqyulhcccvedenuckabgxtqokxojmdnbughakzeezkjiiq,56,59 15 | xiqhsgokuydxezhgglqowliufbogeqyyijiiniwmirrgrodnyaf,38,33 16 | esstmookugkksfhctkephbxr,54,30 17 | rzxwewfgbcfrkqudwffwnuxeojqycloawyqclvegvuuhbqcmqzbmliksqntgcihizjcluvqjhxsq,62,42 18 | mqoxcmmlsbmxedrzhtktuayutwrwjcxyqktnwxp,58,75 19 | zqaqfovcbpxcssqognxoucdwtcascuhjapbneabqkdclohlvxytaoztngwsyeaenlavlgpme,59,72 20 | rnhdvsoyhvfxmfds,51,88 21 | rrpcutrxevevkrupoavmbktyprmnobzwrdgwqdxvfqqaeqflrtomornvdmbx,59,38 22 | sgkxpajwrzlfhrmqbnbnjmhbuswvayizqttntjomkfstnufnwonslfjfwskphwbkptbldtlanfqkcjitpxbxcdjefxr,54,56 23 | uexuhkcicyfirfciqnvlmczosqlshtdwplmkfxplffakpdxrenubwicglwlcvjdiwdxpmiemnpy,40,39 24 | ilgwvrerjhavnuq,45,92 25 | edgigyawwrkdxapnwvuweyewgicxbbwvkslhsyligkrmmlqlefysujwekfgrltqmxynqkrvcwstlxuyhrivadqxplyrboagpgol,36,87 26 | xvlilprffrfxizobby,50,52 27 | tgjlutgiauwshclqbtyjgetjqbhdwuxvx,44,4 28 | tnerxrfgtcvtxahlkqozdorsgfckjplvflcegncesclotgvsjxelqreompdhwmbeqcdprulryagtsqplqtrdnsnbln,52,42 29 | uuwqpajauxbmcbohvaewjzo,33,22 30 | isfxiijggamjiynmrjlgiklqciaonetojuajgjrxdsfnauvkykrdxpedosuprgjtjhqghvkjisecwajm,28,86 31 | dxlzbbmqbygbyispgwodpplacqdewbzyeaihdcmnackvlpnmdulylrsbexapeboejpnnwxarzqvh,46,61 32 | shldxwvstfzbnmlttotwzmvxrzudmrdijnmssdajjpgufwdbsskslgmpzfsucvvgkucphwayoxmoi,39,16 33 | njsneahmbtnquqkjpacajwxyhxwpwddwyjtvpmblsehoozdnlhohsehwspnmqxugnsjlkdgigopjypfzwsegdy,43,79 34 | bwwclqtuhnucpulivtgldmprqbenayhjejruhfssudkvdlxdznf,34,59 35 | lnzhyzvyvlrzzegnljxyqgrrkxysbfcgcikaoebdnyqjevtnovueyzclcycmfixhoxtzgknozcsvmfrpyvsjvkxthdpfkqoqc,44,30 36 | jnikmlfsvwfoplrdxuanzljvlonbux,36,21 37 | fyypresoyxqxxgnzmlaawbnifhfahwqgxelnynrbelcywxnvddaqnhfnphahstuwjrzrbynyamzlstctdiwgkoqaxvkjdvui,39,72 38 | vwwtpivdorwcseqcmrsvnwsrfkxlbagdcxbmswdzduskrcthiwxwqhjxoolnkwhejvwmljezibypm,41,19 39 | hxemjbadgkgpycdjmbohaxhzlrnrvahvsnokklurokrfebwyxgjurdiizhoqczprlkercldxmggndcocrqzrral,27,64 40 | sjcoqhalezffgajfqvmaxkpekujegkweabtaxrjphv,31,21 41 | nznryflmyyurhylozyoezf,31,8 42 | iuptmiexhlrimtm,35,81 43 | nmphiiyhsfxoufswtlbshiyfdvgslszbxbxeqnydqxzggophxwj,36,94 44 | qoundlzatgcrpfduhndbepofwizqjgzlxrqegvwzbgdyzrdegpiilupdmeyfrlmhfkpltvtrtocdsabhstylpcwlti,23,72 45 | hsndgnijrvdzaxhtcbdmasfanerkckmcydydamdamfvsebefsgzkfdv,26,93 46 | bkswsfiqyzaxanjnjbdkiksrlcspmtjjctqedkaoxdhtmleaepxtpmwdxdkhjzitwhtkqhupifiotnhwcyggfzhvqxfbv,26,9 47 | kjxfldpcrqfpylvdzo,36,33 48 | lyucblamnpmvmanzlbommmqtcnzumnxexfvfku,30,75 49 | okknrvflsqow,27,3 50 | juphyjzdqyzkzcbtfilzbkivcvirnmjihnouymcvxhamioisqognvdowatfdwjjfhbutuoteyjynkndp,25,44 51 | qpmhtbpwjsribswrtlnkokflxelbkqpvjggbaedauzwgskojewksxouqicjepe,33,92 52 | qqrxahkzukemuszenedzhjlilplixbpqdevilaveasz,29,14 53 | ooqguqrqocqtvgludncurpqloqvemqudbadyoauhqkdylhhlrdxuzhtimxmumahothiugnqoiwlp,25,61 54 | uftctlmpzxreammiospb,26,47 55 | egvkjxkladfvossfikauigdtxtoepnthxavmqozjnnigetmhekzkjbcgaqsdokfoyucb,35,91 56 | qmuxsxaibpqdhgphgwbdkwnfqmzjueggvllzsgcmphasmotlalxwucuhhpgcoiuthdu,33,28 57 | ubrrijzgpqhgqnvgfnkviejmifxnotrhuzyaiwwmtnzoaujnzk,30,75 58 | ejmesjgncrndsuuybeqdtluakdycfgnvnbfkpurnlqcqelcwaexqxlppzsrchptvdokbznzboxcejrkomm,23,72 59 | ityiesahtzcngkyrjbwbfntorkbipyyiyrhcbxqlimxnqqrwmyhkggqrzegjglpvcqrvzyxgrxe,26,45 60 | plvltehkjpuotvmagxvsaipiiqrylmnz,28,96 61 | gurmosmvbrmdgpbwkcfrcwqrwyyusopbolwulppdielespeesueq,24,49 62 | qwmgmxjiqszudtrisynqyuxbvbbaqmtnpivliribziujsgcvxaumewciyebguftdhgxdbunewimagicjygqzvdzuwyruavp,21,46 63 | oyxwtpogqljgttbgqtgaaagzdyultwuhuzzmcxjyhuyjthq,26,92 64 | ozyhjhgdoevekoypwhagtbjobfruamjklynpdtbhibwfoodlipjrsnafhieekhdiohtvwbekipgxygzwjxxaiopjza,26,45 65 | hbqrhjzhtehifdhcex,27,81 66 | zgcloiujqkkbpqpnvsjufmohgdifefxqvmvclddmyoefzompqfvbfughagrseltkriuoqpenxhvjnidscvaunn,19,55 67 | fztyrxoqdipapatgquqqomicvboafyehgmcfdmrxvrgkojfktxqn,26,57 68 | vihjioqscpauyyteasanynvvgfztiuqeegwwyniergqurocltthxcsyfcubzzotchgq,28,77 69 | uddqawryrdwralkmlqwkboiywycnbeurcsfjigientmdcsoipbymwvmblkle,21,91 70 | rjbjyczfpo,21,3 71 | wkbigmphilihyclqmddngojanezsrufgjbduysrvqbzqcooisnxhhovdejbccclg,22,61 72 | kkiofsfptbdkfpcsjtmcoljqyqievajuwnainbwypvppdwjagjbcnccydu,26,24 73 | mslferrsfdwfesiqdrcbxmqatmyycymdsqawf,27,88 74 | boctddyjucwnfpbbpclmkvygdfimowthrlbzggvxp,27,14 75 | sdwnjmdfbzpsxpvzpdgrqbkpwxbhuuqrpesbeikpfjejllscacoukstongmnfaijvyhfgfveh,14,21 76 | zfwcqirarwrifztqvjimdfgnvqqscbteigastwypftcsifdffxtvykmillemecttnypwvlfvuxhvzcstrpxuorv,33,85 77 | lsdldmstiwziunenckchdkzprixbkxopetspl,26,36 78 | ygjarpwwxtpglimoabhdrobflttmqyghmgfwufmxmaeewndnaniucxzedplituwnareyqcpcxzmjmshrnayqrwpcfhhxjrimeaj,26,71 79 | qkwumwhdurgnflpqbhixgrfsqaeabshffishmxohe,28,85 80 | bummftyztpqonbinpezqsvgrssrwthvvjporxrjxzfqcqkvmyejfoasvkssvddgiegwkxrnjhquxgyw,19,79 81 | yeaorkxovnrmavjptemfipcyxompbib,28,73 82 | ciybroztzdrzcwarxgoettvvbvuizuurzhgsjposshzi,28,17 83 | czdmqxnmvcsvrnfefmkuqyjenbrizjljkiqnulwohcglqpxwwxsxoxjbpcnnroaqbykveffxfdruxdbkdt,29,97 84 | ohzugpcocxskrkogbwafmmvfrliqdbfcjifnygyvdzotl,37,3 85 | qqjwttcsfmcijqqvzaiuvlcbqzkhpmvizlmkanainxzarkprzotkplkvtejgtwyxrunudyszfkxspztkedisulbafckelhjmg,36,83 86 | zjvempfilvykjhbnzmguhkpvplurdssrujmzkwcbypdcuvnjrovixtedoywjfdve,25,17 87 | jpqqdwjsubetihqwktasoumjlacktnghtzkzswcmifayuaynzbtmejdbmfzlbnpmabmqufzbijzpmclkpqtyjwptjdve,27,96 88 | jjyjwdftspfvuqsgafyqtuxplczytiktkxfsdpsvzrkrwbueonfunbjnckgmidyetxjjfcrciwoxlyvp,23,35 89 | uhkkjejodktptyijjjixqebztrupmgilamakapyn,23,44 90 | vlioveugwllcysuehatlgpvuycrommwlbxzbuiibkazbozo,36,25 91 | pnvjxchkrkwgjpzmszoezuekfhnrcwybekuorldwultqdjxtvgsvpscczqcjnaskhwcd,24,55 92 | xnfpulsktpugoewfvmhkvhstutlnnsofgxfkhgrmlzbffnuqq,24,89 93 | qoxbapijuwafvdvysriokpynwbsuggscjbhikkgkgcbcvfhwbkjrvalifhgponhgh,22,15 94 | ghzwcsiuyjuckvftfvwoneveslyafuetymzfxskgyyxkzraxuzpaogjselgcfuudpexpizwcvllnebszvt,28,68 95 | mhopdokaejwphihgkuizaoctkdvzaipttnrmuwaowrbvosojntonugaqdgnbzrdlcqkrnyzodhvhxnyjffbibaqseflbqlzaukr,19,27 96 | jtedezbstioggrzcndnhqlyxenublkdorzufcarj,22,98 97 | spqwgyhjllngzpjaouhaxyfyichpcojqkbdycjvgafchjegvtkfrduejjreqskmduygdxqjwwtgzmhdnioemyu,24,89 98 | zofdglughshgqqhupdqekdqjrmyfuhpgtmcbcwxiv,29,56 99 | fpluyzcalprrwvxzwuktpwtftoqvphzruyfbteqprcfgseprtkiqbgopppazuheedmjektuazhbdwquqtwjfmacrkrnj,30,59 100 | bohljurxxbatchpqjaraxfuykasvvrhygegcgovmzmdkktippdwsmauhfkubzqaedjiwhvwqcdcihnmwuumhcwpmj,27,69 101 | nrcigkhfjajmoznzbs,35,86 102 | efwokyhhoyzrtnmuvuosomtzalgbwhkfklrfsiacattutdtkcalntljbffbfvmfseswtgygqvaygoiwwbstotqb,20,25 103 | peotthkuaexjtuipjwawonmm,30,27 104 | ymrbdclhgoqcdphrcadolvvuqmxcggmlcnjvwtrlblohotifhovocokkpydcztjsfnowqoxlrdmpmlzzxexkgrxszvy,15,35 105 | jvpjtsrziuxnfekiwcldqsf,20,47 106 | zeipuglkjmsicyiukdjhswqdrazwhpjpwuzzsacgrzoehtcvkdnymhonnocnaacxhzsojynqlqo,20,88 107 | lbzxirnnthjodqbnlgsspqsja,28,33 108 | rrwpeuczhqmijuobdsgwwiqrslwjmttqqqcqlvdoeggxasdpengdbpnotjqycyofmeskdilcfohcsuxvtpwjtbbmtbgow,30,24 109 | wqhujnxjrqxmovjpxxpcuatwtoovebrekddhn,23,55 110 | vamlataeexvlctwyxby,17,85 111 | hyjutsioxovbg,26,18 112 | etayifympvnucvbmcvnqakudfrdoikncqlglprowsjkztwkvengqqlonvwcbsyjkurhgrrjpmfdkwrdrknunqjlrfqyun,27,45 113 | arsyyzoypwypkhkwghrmqkvzekcpzpkxnrqjtne,24,49 114 | hfvzndmjdcixwczythgpbskf,29,50 115 | toyahapbfkhpntbdlqibnnxhvpihiryprybog,25,4 116 | polwytlsjiytcmtxbqewqhewcxlhvvvtgqmwmlwthzveowinmmuyipfdd,25,48 117 | rnyevrnmqordkpzponmongmlknosyyvxdulsl,25,47 118 | aqgpmcxahvygx,30,10 119 | alwlcjtsuu,29,1 120 | lttxwrcaioxtlgizpcsthongedooqwuqf,28,8 121 | -------------------------------------------------------------------------------- /examples/amplab1/data/part-00009: -------------------------------------------------------------------------------- 1 | urnbqjjdtrqglbookyfekylrnysaootwjyrmdtaxfuwbwezuluctymtvohiiauqaxubxrjsbgeuoedocbs,207,28 2 | xmsrinfbjpgycjjdogjykstqgwnnhuirsjsvzxiaxumxmvmihzpliftrpjfyxxlu,146,70 3 | rpreurxlhltbwbrymhfdphsibqzimpndhhtunugwthcdkmkverzxwdcga,114,88 4 | kpvhodkqxlzvcuadnbqmsarcymozyixvaihjpukfaplhtaqetbhmfsmnycapqaymusvednugc,103,91 5 | vlkaennnijrdztzuvsevrdzfhitfsrxunttjttipdnnmmqxyenttlohvsoz,79,41 6 | iwezajcvhieflfetnogaliideuwohcgpxdhquqximevmqwhtprjsefibwnpqgmltwepmseaiz,86,74 7 | ozqjfnpffnaiwqlinliudhgmgrtpdxfawxnlmelym,75,29 8 | zbalidyswoveyccapuegnikrgklktulheexeutxzgezm,69,50 9 | lncztavxdfzhnizyse,55,85 10 | wmnvheoblovmlnsktzpbqcxzwgsvndkwygzlwnkrjkhnkrcgalfh,63,65 11 | wykfplohmcwewbberaiekupgy,71,29 12 | ffpykxxdlnzyurjvkagfheejqfqftkcdpvcwkllk,58,57 13 | lpvqoegkjwfxxfpqowtufrwnbcdlrtisdjfgzfwiiptmhayxicwhcupghmnaki,58,70 14 | gefsrurekqjwbkpybmcbfoglvonkvqxtdpnksrzhneysdyidbr,57,34 15 | dpextiguedhbxtrbajfxvdmbryqmcjoegtnrgpecvvjvhhgtsyndvlgdiykbtyrrahibrlsbfobhzmqyvdaxaon,41,67 16 | ccotxrauqdfctsnsbeufgsbzkwndkfypvxfvv,46,94 17 | nyoufmpsivkdjuwvwqxavydoycjzjopwlqnvvjokmbvhfjzfzvbacmz,49,92 18 | anecuecqkkqirvjhvhvynzxdmdzlskjqbqxgfxbqrgqiuwixkjzprwqqqolkilozvynmtueeruocqvxngvugyyehgaq,52,38 19 | rphattbhselefytrwgcqdxpjgzodzywjsefpyijbrhhiwmpl,56,77 20 | zcvqsaekjzswowjfvnfdijdcpbzdbgqmzexhcgieqreppyv,59,80 21 | bbgoawzxlpycugbflaomqnaiqexpjcksajgjqrgcgoavroaksjlqhroyugvjlnksaxdvxssvnuvrtbtwpkfifuzeuftwue,59,18 22 | zojhvsamqdlmnimpcfwlmukisvshxavfnlhhmlzdmypxrrjdfkplwmjyzprznezyago,47,44 23 | jwcgdqayrzvrzwmiokzhqhvajpolhnhjmxvygwncrpv,37,70 24 | agxhqrrtzghceqnxzrzmsavfdmkkzcwnrrqxleet,44,90 25 | xzxhysrxdimazstaanxmxxsbxpuky,51,24 26 | wtcvbrdzayupunyvxhhbzghrxjlnnvbfxvaoswfig,27,47 27 | gdgvzkoqwylcweapybzlsgflkukmsjfyqnosmltmuhoavcnlwabrjnyoxmszrtrykiojksfgfdmlmniolqs,46,96 28 | vfkpuoennnfgwvetfjyrnzpwjslzwqranmtggbnyvpyelmuzoudiam,30,15 29 | nimldoffnozisyvltftimjiabgfcopblcbjaprowyofqsgywwecqjlemgfie,42,27 30 | dsubvmstvwvdwevzbixzyzevtjgloquodxwseqojlcefeakwhhwotmsdmcjfcdymnfhymhll,28,99 31 | mzxxmdthdhoaozrrdfphgrgbyokuiwvtcjxmmszahrfkldte,47,93 32 | inhikewnflpzymhlvleqgezlpvrfdytvlxuhqfzxcbidilkjdumztcsapaeu,26,81 33 | qtcnwzajhtruqtfizk,43,61 34 | ihriycldlxjfeiarfjnxpmpisbaobomggrmlaztzjrglgxddikwmzlfthetpcjnwtgpincvezshp,37,63 35 | wqlqkdssdddpvalyzavhczjpwlunmgvrv,51,62 36 | lshrphbrefmavzdapumehhyqssxqpocgcxccybufcmbbfsnpeabxcyitwfmimiekjuffg,38,92 37 | guhmssklrbsuzefkwjtmueakpoqsasfiqvfjqfdyilfxpdptappaxrowyalmwfhaaaqbzgncttuvjwx,33,53 38 | xbkobrcutssjxcxokwezhobfyefjjbyoixbknmdhynldcaziyhvlsowgbixhjyjcxzxcg,45,29 39 | rlrsbfedhgaqeaot,39,82 40 | qvfsvhxexdovinlufweyjqchdvffznpqkofshlqn,39,23 41 | luafgyseqxacmiyufbhzze,56,63 42 | zkdolnifbogfhabajd,42,83 43 | mtvfbounizmfpxyuuhmnrohhokibiisnkcmfhblzpylaufane,48,27 44 | pfibljmnsmqkgkqhgtwybskjdtyjohl,28,82 45 | wacwstfwqprtqlzwwtxldrknvkonj,26,89 46 | gunzoeasolkdofcavv,17,42 47 | cbojqcicobhyayipoclheltjpoqshantxzhdwpuayaeodeaecl,23,97 48 | vzxeqzbdrszpmiopdudbbfvb,24,10 49 | loqduevaxdlprohgztlrr,28,75 50 | rexhokifsnkpphvamxeaeiivtqohuxnfjwrblsrttjpuofzlnbvrhfsbzoavmsbxw,25,50 51 | bgnsomiulwmcdkahudehrsthihjypbaehmnpjubljtuer,32,36 52 | vekjbygetnqofwrtakosqzopbzorfalbrwbutqipmcsqdkqrrboqannnopgmuyebqbfcnmqgntabxihuyezlgwi,27,78 53 | bndceheogmuwdfzptjipsmkzgjopqarqaraghijfczzkkimpxhtpbbhapnrwjefnujzppuboyhrcqagzoqfrgzbbdxrgp,22,91 54 | drorwjpynhdxfdmtkpthqcpaccwuqdnxsqdhcvnflnpbhledesunrvgvafkprnnozrmcw,25,24 55 | yohxxbdcivrewetztqthrrnmtperkziwxnfphawvlquedygbeifwxlbvktlafyhdbii,24,15 56 | esavkjmdruvmjgrnwdoythwiomlelh,25,60 57 | piuucqffvdaahwtybhlgwsmdmqygusqpkqopolwpyybwgmtlunolbzmrfafywaqejsy,31,51 58 | ecnhpeldxatphzmiagmrtrmpcrmeravqhsmsvpwvfqasdizkgkjfgtfgqcvhkslpvlpwqsxztnjnkgofotrvfkmzwozal,27,16 59 | gympxqouggjbub,27,69 60 | kwhabptvgxsgnenteyneepby,21,94 61 | xobbkalpkqsojnrjjqbfugsdzujimtmobbxrxkvmygystmfdsgleltrmidoqmjqbqnwrqsqsfgifxgjpdzve,25,27 62 | dyryryofrulq,23,19 63 | nsjesjsnwslvrvdqxbgdekpioga,29,60 64 | tsxkngebmufrsargxcywcspfcqicjqxorslfsw,30,23 65 | pegyqgsulnmbrgpgvpibudbwewmpdc,21,50 66 | zootygnarotf,26,79 67 | ispqhskgxogebw,31,56 68 | hjkbjelyfljymyoclnhndiszjytiaodrejw,18,99 69 | xodjzkwloikhcfisnsfpomvksxwzm,25,3 70 | motgfumbpydniflpakujufyutsqibuprajwpanlpqbxoqvnsknyqmciqegxeagrjyflodevkqlagcb,26,9 71 | spovhausxghoczsmlzlcntgnrfwsaktoeceiruojxtgjugwrjrqzzqnqweyhcnafoxvwhoo,19,20 72 | reaybftthkjdlveedto,26,83 73 | lbufapnbqsrtfctgqknkudedk,38,86 74 | jatkkrzqukasmnjtwenysjsdmxqvznsvgruroybuwcpfbmcl,26,75 75 | mkmxuiuacpugqlwidcjqwvlovhaecwwpzwqujdycqpppxowfjxjjithvt,20,72 76 | qobxaprtscngvpswktzbkmjprrrzixngnggynalrnexoncecsczsipprzavidkds,23,55 77 | zjlactmmadxwnjztkedjfvuxwxucyotmulwuybshoyemgrodhtervfyftcpopvedpyrxebqnybuntfpwcwxy,28,26 78 | hsueviaosellsfwcqhtzqsvprzijkudteqbpmasuqchnz,26,13 79 | tyuwhbduykrpekhmasyyoaisimmqhnqmwuktoemaunnfdfnngbruamkysuahocaoewzrmi,23,18 80 | cxftmjfniev,25,93 81 | sgfzuauhxvfukfvbq,33,25 82 | jsdznluiflzhzaeviutmmeeaofakrpfpieykwcd,19,44 83 | evheusiampvcgmoqlcacoozzsrxhufbfxrtchieizfqrpnpqkuidqnugniqrxolotiolvjxtymmdiq,24,85 84 | wegjsgmqkhrykagvdlcofgzlushbxomgokncegnctatktypuyysomkjqpnnmxz,18,97 85 | bblykezvdshf,24,77 86 | eyomifqdmxgloeqpzkwrifecoizrz,23,3 87 | rsleqbunxrwokshcgghehonebzhufuzolzfkosztbxgydtnnvblfanvkoznironkdvswhehrmojtjsaspbausidgkklpm,34,81 88 | beodbwqfggfjkbhnmdqogmsrqcpmrxmlvwpconxxdxplrocczzmezhekdanwcwfsmgpqqyqkaadrimphtjddvatqacnjitswhnlm,34,18 89 | aynhtcdtcvcrhjvfwgugipzyaxgwdcotjllkvzgpdtzpvsoj,33,91 90 | onsnwqswvdvddkogsqiejoapwgteywkoyhdytvmembawvxewwzieplyilqzacongqdqmin,18,26 91 | voyrqbmyqsthebvhstxpojquncpivkognsjnlcguoyayfohhaygujo,25,53 92 | azxummpwyexfwzahtheyizjruprnwsizouqqmrmklctbecjvyvmve,28,48 93 | nrmlzbmowvliwumxiz,22,46 94 | lwavrqepxkhtdsxhcjfdxxnzlpxdubklkcdpil,26,86 95 | afdyyjtnsfzykuyjnbfofzlzbzwklytvovjsyyztrrvujvyzmieftzydoyjeyojawcicqsppppatco,22,73 96 | wjizvwspkszoktpwhtdxqvohprfjfdqcaot,33,79 97 | iqndbeftubzkdmlxfrlzsxcbaglppomgmbfcbocbvkyvtfniiyug,25,88 98 | znqevfrmopxmk,23,8 99 | glzxsfpuhilqtkrprsebbvvrpkowjstkngpzawgqnvwuhfndizbgjndduyxdbukrsxwiypyl,20,88 100 | siumgdhwjqtvfeqbimtctkbededx,35,30 101 | gdssftuicgsgbrhtbgncjtmsvlxbjrsoqfvqyxfvhfyybpstuhsbquhurezciodrsgftheqooslpszytzof,29,58 102 | hajbwruxofoaccpwilvokanghhyqzfnxhhbgyxqsblmpgpcmkfyama,22,60 103 | gkpkgoorvwfibzsueswkrilbnerammespamzjuvtzxssvuwotnoaklbusmeghvhtjxcatgkjpcaufvuofzwpqnygtjc,26,41 104 | zcikfozjbseeswukyprlqodjrokyvygmhipgmea,33,85 105 | jhqkqxfxeesbodjjalcbgbruqkmwzlfewsxnvgmixugrnkypofvdloazzrmtxdsqzfaumniiftizaori,29,98 106 | ohvyquqgkikprpswownkflzvujlmzkcauuvcnkwfkiygxgsnqfbflxvexjhrqkxucubwkxxvgyayexzsubdtwrjeagxdequhd,27,25 107 | dlsoqddaqlmdcdzjiouqmasmmmewzltlavugrdozoyk,22,69 108 | tcstiuokbbrpxpvrksfgelqretsjwrficlzmktphmbylm,22,21 109 | kfolefjlbhsupjnpozwuckmvwpfyxtoouwlccpevyjhqdlprfskxwknsluaptktouonjwnjarsyn,36,54 110 | ncuswrdkcathwjetwpevhpeylkardtmzrjjffznmeurrcedwuvrotysqgttennispndijohumpa,28,36 111 | erhzvdritnnbhczwlrppyfhklmnmjfhbeafaihzuverkthxddjhcvgkoehrkabmnm,29,29 112 | ftifmlvhntktvibnhurvltbkiokrxbjtcspxdvckfubdifppankvufezebhbthbyaghwehjtfufybmrqokauctocq,23,8 113 | otzbplwsnuprwcepuqdpghguxnotzc,23,46 114 | rmxgfqtyhvxmhcfofsneb,21,86 115 | hotuvxfphrjrilmurswhxlrarvwzlrrmpiugkyzcbhrfjequltiyvsmvvijgulwptopsusbnwhmoqqicydxygdmiqh,32,52 116 | sgmgeyfnciuhzztljgjnigkqvirknyrfrpdxtpylyinpowimpakvicxbprtgdzzzajwqlmhdlszvpdvnac,28,47 117 | lowsvdyyqtjozuolsorertagjuyaxu,28,22 118 | fhpbpmtpuemyvgulkkkctipjfjqrsnnejlksyasaozdutucoadrfdtzdzdavduuiu,28,76 119 | whnmrvxjwhczlwurcnjqimgaiuuxsumntycumfnvxxctbchmelgszpmtgrdbmgpgifgsncybwaaklysxkmovana,32,71 120 | kolqznrsnilljgygpwjyyatitxur,34,50 121 | -------------------------------------------------------------------------------- /examples/amplab2/Makefile: -------------------------------------------------------------------------------- 1 | BUCKET = ${AWS_TEST_BUCKET} 2 | 3 | BIN_DIR = ./bin 4 | PROG_NAME = amplab2 5 | AMPLAB_PATH = big-data-benchmark/pavlo/text 6 | 7 | .PHONY: all clean $(PROG_NAME) input_in_s3 8 | 9 | .EXPORT_ALL_VARIABLES: 10 | CORRAL_VERBOSE=true 11 | 12 | all: $(PROG_NAME) 13 | 14 | $(PROG_NAME): 15 | go build -o $(BIN_DIR)/$@ . 16 | 17 | test_al2_local_tiny: $(PROG_NAME) 18 | $(BIN_DIR)/$(PROG_NAME) data/* 19 | 20 | tiny_data: 21 | aws s3 cp --recursive ./data/ s3://${BUCKET} 22 | 23 | test_al2_s3_tiny: $(PROG_NAME) tiny_data 24 | $(BIN_DIR)/$(PROG_NAME) --out s3://${BUCKET} s3://${BUCKET}/part-* 25 | 26 | test_al2_lambda_tiny: $(PROG_NAME) tiny_data 27 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://${BUCKET}/part-* 28 | 29 | test_al2_lambda_1node: $(PROG_NAME) 30 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://$(AMPLAB_PATH)/1node/uservisits/part-* 31 | 32 | test_al2_lambda_5node: $(PROG_NAME) 33 | .$(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://$(AMPLAB_PATH)/5nodes/uservisits/part-* 34 | 35 | clean: 36 | find . -name "*.out" -print0 | xargs -0 rm 37 | rm -f $(BIN_DIR)/$(PROG_NAME) output* 38 | aws s3 rm s3://${BUCKET} --recursive 39 | -------------------------------------------------------------------------------- /examples/amplab2/README.md: -------------------------------------------------------------------------------- 1 | # Amplab 2 Example 2 | 3 | This example implements the ["Aggregation Query" benchmark](https://amplab.cs.berkeley.edu/benchmark/#query2) from the Amplab Big Data Benchmark. 4 | 5 | ## Benchmark Results 6 | 7 | | Benchmark | Dataset Size | Job Execution Time | 8 | |:----------------------|:-------------|:-------------------| 9 | | test_al2_local_tiny | 1.7MB | 180ms | 10 | | test_al2_s3_tiny | 1.7MB | 3.19sec | 11 | | test_al2_lambda_tiny | 1.7MB | 2.33sec | 12 | | test_al2_lambda_1node | 25.4GB | 48.48sec | 13 | | test_al2_lambda_5node | 126.8GB | 168.83sec | 14 | 15 | Compared to the results reported in the [graphs provided by Amplab](https://amplab.cs.berkeley.edu/benchmark/#query2), corral performs very strongly. Corral appears (from the admitedly unscientific benchmarking I've done) outperform most of the listed frameworks. This is likely due to the high bandwidth available between Lambda and S3, as well as the level of parallelism afforded by using Lambda over a traditional 1-5 node cluster. -------------------------------------------------------------------------------- /examples/amplab2/amplab2.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | "strings" 7 | 8 | "github.com/bcongdon/corral" 9 | ) 10 | 11 | const subStrX = 8 12 | 13 | type amplab2 struct{} 14 | 15 | func min(a, b int) int { 16 | if a < b { 17 | return a 18 | } 19 | return b 20 | } 21 | 22 | func (a amplab2) Map(key, value string, emitter corral.Emitter) { 23 | fields := strings.Split(value, ",") 24 | if len(fields) != 9 { 25 | fmt.Printf("Invalid record: '%s'\n", value) 26 | return 27 | } 28 | 29 | sourceIP := fields[0] 30 | adRevenue := fields[3] 31 | emitter.Emit(sourceIP[:min(subStrX, len(sourceIP))], adRevenue) 32 | } 33 | 34 | func (a amplab2) Reduce(key string, values corral.ValueIterator, emitter corral.Emitter) { 35 | totalRevenue := 0.0 36 | for value := range values.Iter() { 37 | adRevenue, err := strconv.ParseFloat(value, 64) 38 | if err == nil { 39 | totalRevenue += adRevenue 40 | } 41 | } 42 | emitter.Emit(key, fmt.Sprintf("%f", totalRevenue)) 43 | } 44 | 45 | func main() { 46 | job := corral.NewJob(amplab2{}, amplab2{}) 47 | 48 | driver := corral.NewDriver(job) 49 | driver.Main() 50 | } 51 | -------------------------------------------------------------------------------- /examples/amplab3/Makefile: -------------------------------------------------------------------------------- 1 | BUCKET = ${AWS_TEST_BUCKET} 2 | 3 | BIN_DIR = ./bin 4 | PROG_NAME = amplab3 5 | AMPLAB_PATH = big-data-benchmark/pavlo/text 6 | 7 | .PHONY: all clean $(PROG_NAME) input_in_s3 8 | 9 | .EXPORT_ALL_VARIABLES: 10 | CORRAL_VERBOSE=true 11 | CORRAL_LAMBDAMEMORY=3000 12 | CORRAL_LAMBDATIMEOUT=180 13 | 14 | all: $(PROG_NAME) 15 | 16 | $(PROG_NAME): 17 | go build -o $(BIN_DIR)/$@ . 18 | 19 | test_al3_local_tiny: $(PROG_NAME) 20 | $(BIN_DIR)/$(PROG_NAME) data/* 21 | 22 | tiny_data: 23 | aws s3 cp --recursive ./data/ s3://${BUCKET} 24 | 25 | test_al3_s3_tiny: $(PROG_NAME) tiny_data 26 | $(BIN_DIR)/$(PROG_NAME) --out s3://${BUCKET} s3://${BUCKET}/rankings/ s3://${BUCKET}/visits/ 27 | 28 | test_al3_lambda_tiny: $(PROG_NAME) tiny_data 29 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://${BUCKET}/rankings/ s3://${BUCKET}/visits/ 30 | 31 | test_al3_lambda_1node: $(PROG_NAME) 32 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://$(AMPLAB_PATH)/1node/uservisits/part-* s3://$(AMPLAB_PATH)/1node/rankings/part-* 33 | 34 | test_al3_lambda_5node: $(PROG_NAME) 35 | env 36 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET} s3://$(AMPLAB_PATH)/5nodes/uservisits/part-* s3://$(AMPLAB_PATH)/5nodes/rankings/part-* 37 | 38 | clean: 39 | find . -name "*.out" -print0 | xargs -0 rm 40 | rm -f $(BIN_DIR)/$(PROG_NAME) output* 41 | rm -rf job0 42 | aws s3 rm s3://${BUCKET} --recursive 43 | -------------------------------------------------------------------------------- /examples/amplab3/README.md: -------------------------------------------------------------------------------- 1 | # Amplab 3 Example 2 | 3 | This example implements the ["Join Query" benchmark](https://amplab.cs.berkeley.edu/benchmark/#query3) from the Amplab Big Data Benchmark. 4 | 5 | ## Benchmark Results 6 | 7 | | Benchmark | Dataset Size | Job Execution Time | 8 | |:----------------------|:-------------|:-------------------| 9 | | test_al3_local_tiny | 1.7MB | 580ms | 10 | | test_al3_s3_tiny | 1.7MB | 7.61sec | 11 | | test_al3_lambda_tiny | 1.7MB | 5.07sec | 12 | | test_al3_lambda_1node | 26.68GB | 288.09sec | 13 | | test_al3_lambda_5node | 133.18GB | 884.61sec | 14 | 15 | Compared to the results reported in the [graphs provided by Amplab](https://amplab.cs.berkeley.edu/benchmark/#query3), corral performs reasonably strongly. For smaller dataset sizes (i.e. for the "tiny" and "1node" datasets), corral outperforms its competition -- except perhaps for Redshift. However, the limitations of corral's architecture can be seen in the larger "5node" benchmark. Since corral doesn't have an internal secondary sort, joins on large datasets are quite expensive. In the "5node" benchmark, corral loses in performance to all except Tez and Hive. -------------------------------------------------------------------------------- /examples/amplab3/amplab3.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "fmt" 6 | "strconv" 7 | "strings" 8 | "time" 9 | 10 | "github.com/bcongdon/corral" 11 | ) 12 | 13 | type amplab3Join struct{} 14 | type amplab3Aggregate struct{} 15 | 16 | const dateFormat = "2006-01-02" 17 | 18 | var cutoffDate, _ = time.Parse(dateFormat, "2000-01-01") 19 | 20 | const ( 21 | rankingType = iota 22 | visitType 23 | ) 24 | 25 | // Record holds joined data for "UserVisit" and "Ranking" record types 26 | type Record struct { 27 | RecordType int 28 | PageURL string 29 | PageRank int 30 | DestURL string 31 | AdRevenue float64 32 | SourceIP string 33 | } 34 | 35 | // Map receives input lines from both "UserVisit" and "Ranking" datasets. 36 | // It parses the line into a record. It filters by visit date (in the case of "UserVisit"). 37 | func (a amplab3Join) Map(key, value string, emitter corral.Emitter) { 38 | fields := strings.Split(value, ",") 39 | 40 | switch len(fields) { 41 | case 3: // Rankings Record 42 | pageRank, _ := strconv.Atoi(fields[1]) 43 | ranking := Record{ 44 | RecordType: rankingType, 45 | PageURL: fields[0], 46 | PageRank: pageRank, 47 | } 48 | emitRecord(ranking.PageURL, ranking, emitter) 49 | case 9: // Visits record 50 | adRevenue, _ := strconv.ParseFloat(fields[3], 64) 51 | visit := Record{ 52 | RecordType: visitType, 53 | DestURL: fields[1], 54 | AdRevenue: adRevenue, 55 | SourceIP: fields[0], 56 | } 57 | date, err := time.Parse(dateFormat, fields[2]) 58 | if err != nil { 59 | fmt.Println(err) 60 | } 61 | if date.Before(cutoffDate) { 62 | emitRecord(visit.DestURL, visit, emitter) 63 | } 64 | default: 65 | fmt.Printf("Invalid record: '%s'\n", value) 66 | return 67 | } 68 | } 69 | 70 | func emitRecord(key string, record Record, emitter corral.Emitter) error { 71 | payload, _ := json.Marshal(record) 72 | return emitter.Emit(key, string(payload)) 73 | } 74 | 75 | func (a amplab3Join) Reduce(URL string, values corral.ValueIterator, emitter corral.Emitter) { 76 | bufferedVisits := make([]Record, 0) 77 | var matchingRank *Record 78 | 79 | for value := range values.Iter() { 80 | var record Record 81 | json.Unmarshal([]byte(value), &record) 82 | 83 | if record.RecordType == rankingType { 84 | matchingRank = &record 85 | for _, visit := range bufferedVisits { 86 | visit.PageRank = matchingRank.PageRank 87 | emitRecord(visit.SourceIP, visit, emitter) 88 | } 89 | bufferedVisits = nil 90 | } else if matchingRank != nil { 91 | record.PageRank = matchingRank.PageRank 92 | emitRecord(record.SourceIP, record, emitter) 93 | } else { 94 | bufferedVisits = append(bufferedVisits, record) 95 | } 96 | } 97 | } 98 | 99 | func (amplab3Aggregate) Map(key, value string, emitter corral.Emitter) { 100 | emitter.Emit(key, value) 101 | } 102 | 103 | func (amplab3Aggregate) Reduce(sourceIP string, values corral.ValueIterator, emitter corral.Emitter) { 104 | sumPageRank := 0 105 | sumAdRevenue := 0.0 106 | count := 0 107 | 108 | for value := range values.Iter() { 109 | var record Record 110 | json.Unmarshal([]byte(value), &record) 111 | 112 | sumPageRank += record.PageRank 113 | sumAdRevenue += record.AdRevenue 114 | count++ 115 | } 116 | 117 | avgPageRank := float64(sumPageRank) / float64(count) 118 | avgAdRevenue := sumAdRevenue / float64(count) 119 | emitter.Emit(sourceIP, fmt.Sprintf("%f\t%f", avgPageRank, avgAdRevenue)) 120 | } 121 | 122 | func main() { 123 | job1 := corral.NewJob(amplab3Join{}, amplab3Join{}) 124 | job2 := corral.NewJob(amplab3Aggregate{}, amplab3Aggregate{}) 125 | 126 | driver := corral.NewMultiStageDriver( 127 | []*corral.Job{job1, job2}, 128 | corral.WithMapBinSize(250*1024*1024), 129 | ) 130 | driver.Main() 131 | } 132 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00002: -------------------------------------------------------------------------------- 1 | pucpwgjykbcelcevdmzuahojzjxgmpgojfehhabasvbzbzpczqcubfbfekv,397,51 2 | cscnxpejru,169,93 3 | bwmsfxqjlmnpfgflgbjheluvpdfbjhppfjgtmhiuayfhnopxlasxuofelttcyfhiizmzglofxivnymntrwj,152,52 4 | asvqyjexthkrgsubsnktrnfkeclppaaqymzjrjonvuckxnahiakpizueeabawbafa,125,80 5 | azoomxtuqxyoxiffctdmpqzvxypnbbnkivtnnrihmahalsptqubbkpjefrjkwdbjzrl,111,79 6 | wkjiwqlntxenobhsrqznlvshfgryhohlkurecbtcvflyvawskjowgtcsjrhtuwaxuegluhppstocdmhwtguztpfjraskx,95,91 7 | bnokzcllyzyjxljomkqwguhuhnewryslitzomgefsoffwzroryy,76,22 8 | tpmltuwalvkduavbwyxvv,94,89 9 | bwdjrrvhmcuvhoseriwysjrsjqktawqcvzqicmfyuqlkzmpiihhmxiamzalvljqcejvogzshjlg,76,47 10 | ruqqqiaenpbtetnhdyw,65,86 11 | toqnmxdhvwtdgdssipqkbsgghcxlgwrr,50,76 12 | jrcxilplavgpxejqveqahdsfbxpwjirgzfkuwtiphpogiyoxvsplogtrkfpfvtacnreoimmjlhmamjpwtuniqc,56,83 13 | jelkixioil,74,42 14 | fsomqgktlhupjwnuaijfckuragjhgekrjfxotekoqjudmopiuxlifatkmaztltdhzwlqoxfwphjvxqepmllrxaugbnbabxqlqauf,68,74 15 | egadxzlpzfxnylxsvvjezcye,46,79 16 | mwocakscnhykpsjxzoqezpabzesnm,54,11 17 | ycgofjpllagprixqqkflwxugzztlmzlxhhwzzkufxcryvvpmnewnbygwndnrcanubrruxqpcsnlsspfyvnrjihacuwmct,46,79 18 | zjawojxmqgoclronijxktltbvyywshjuqwljdipgdgxftwfhtvpscmaqgrembsoiwieq,61,49 19 | tbzlzttlkcw,34,23 20 | xylbmfgypferxxmmjppsrwffcuyiizkkoqrclrhykhpaeyquxjmehow,55,65 21 | fffzyiovcsqohyfozsrzmvirgggnlrwwbukwnvbutzukubsejlavvdzwdrhnfvq,47,71 22 | rgljlcfyqdkgvzulehqycatspqsbxeijuztoymrxmhuqrzknpjobysmvxxvhusniqjadegtfzkrvgn,52,49 23 | dnyqjqgstxsczwdqzboqofpglbidounbzdvfqtaqmikbdphgcasiaxpekaszfvuesvismgehmkhzjqzsorlehbnwr,39,74 24 | mrokzjuqeapxmwlgwcxltrebaxcdfsnsjklmfialtztozcpvizhubnxazvrfzgospnjnrbtuhkjkcuoaedfmtfcmu,33,21 25 | dubkynaxlwezsahqtwrmgqbrvcfmnojjtxeqepfrrbrdjflyuwvxwieurcmduaqrcnymuukixscplekgwcoqbbaoz,39,14 26 | hjzppcmvfsmjhufzawrvemkrfwgcqwkagjirlekjpeujpitmpoumbxyevwbsumjimevmdudctfz,42,2 27 | kxmkvrhcqxefwdcpotczhxyugzvkhrkumyoezlfxavzyzzqaulqqbornqekrunuqvmyfhzcrxctzjcwucdwimqipwwpx,40,45 28 | vkwhdwwfvrprmfikjxydiialkfwgkbzxxrcddtzkkvlshwgicwiwafizplmkdicvgcnokmxfgedfk,36,54 29 | uyjocupptcphnppmrkehkeijausbsgvwhxyafacclaqvnjigkytaatoqwaumdyicfzcwfmthkjgttoas,39,6 30 | kyfhmmpghliadreeikdovatlgdimordqlogcykmrbiiycjeobzyjruwcqwvkcexpwiekvvqgv,34,66 31 | kjvdtqnprmmvblvnljqgjgxcighxqjmbralfgqqpwgegtgl,27,33 32 | evcamtbxizunuaygisrzkkbotosljmjyucsyyhnpbhujvuwimgwnbizreutcrqldr,44,99 33 | lpxqbpbodlafnvwvcbbzliafrdhdgtxkzadhaytsbiqjznkyzbtvjbhttmqarntesfzrqbtzilsutzfn,37,19 34 | olyzdcweeauhijzpmfqevcjdmdhkgphznotlrgseuapdlfwldywcqjsijxehiekowircqadeatsttugevlujiudwxn,42,1 35 | pezoxvvzcknucawwtqni,50,66 36 | skpqjjuxxpkescepxafhvcofriqmppqsofaxgguuwiaawsq,40,64 37 | fvompeitysfpmmpjopfumzspdqmcqifxeijibtzjhjgivmsavhrsbvbsbuidbnijufrgsqcodtpwgrjymr,35,76 38 | nkvxglelpvlsexfslgnvylcpbugixoochfopwha,38,19 39 | mdlbvizitfbtcwfexdksnavlafvgmypyznvwgtvealayueqytudbecnjdsthhhojgdyhtoiofpjiik,40,52 40 | aairfezrfs,37,56 41 | ceegngeyvabxtantxzkrfwjynzeiobiguklvbhiufytliasxzrpuyishhmfqcgepvolbievecijhwowlxfi,38,92 42 | phyzqlwcppyxjsuvnpickdlctpsnqesnyrbvyomjttuiogpoacncuwcnznzxpoxgsm,33,90 43 | mmedkcgykjqoflq,42,99 44 | evaskongtymmjsaspdxkxogcmyqmirorzifzqcui,28,5 45 | enkfsycihzgvblrgjmgz,26,81 46 | prsufmapjmcpdlvsmhubsqoqsofanvwwiskoihrhocnwimgmwzcajjapqinosmbzhqdxptnrpbvk,29,56 47 | jpolvahkgdzvorremcykpbjcensohsujxncogff,26,18 48 | vnvquklvjvafthniwwrprbryw,33,90 49 | lbibiltgqnl,29,11 50 | zlfpgxopmisvqlhybmemwgsnhhihidsjql,29,63 51 | pkaqmfogivksgxagyrfibief,30,70 52 | urrmvsdmirxqxffqkoikpgpktuxzppvrgdaunbvmgbyvmlaeynvhclwinyowyorsmcldjvxbokwheyteapsculda,18,91 53 | agbixdkfofaclgwprvxhkzkgjiolxgccqorp,23,94 54 | nfqnazltxhszqwoyedjwfinbffnnxsqkolaohtzesyuqgdyaatdafipqgjzqiodmyklfqbm,25,2 55 | qfueqgswwuhvuokdhvgbliztytidubshtvytphaxfxxb,21,34 56 | ggkvewyyfwdnjpxgnywgcdhymvsqgmtrblpdhwe,26,2 57 | hrvqughgkbkquaamtmjbejnzkkgzaauouwqgcypztripdoothcmdsbbjzowjxuigtcsxoptuovt,22,25 58 | mnfbdrooyqaxjbwakrrvwxonjsrvmpjmlhjmrdqabfhiwlorwirbsgwqynyghfrwwauqvjdujwjhivfjkggfwio,26,47 59 | ivdqjjzkbxjonaxiqtryvgdjtrhemsvsqrobubvxctetgmezjowsdmffcnekcszuzxhgxbjpbiotufudomlpygs,36,88 60 | wivgtiidpvelgzwdhtyzbcgipslkvhqpxdlkrtztsyqixjdeebfcjymrl,26,55 61 | qzzqryyxizgbzakeeekwjbytxxtuphglohpfhvzmqzwiucoemmycsjkowryppovgdmnoddikdoxgqyysu,22,6 62 | stwzfysehr,24,53 63 | ylsskdxoydhnxcegzfweofaxuxeenm,22,86 64 | hlzvdgmvmrcivcxcznivungmhfyqemrwhm,26,6 65 | faptfnoorzrathmgijtkxqwxkeifaqhudgfkmgvzfknrwjxofcdoyuylyhgmcyhkdjndfypxyapkla,26,8 66 | qkilhirrmevqopqgqvbqwaverqfikaebgjtekuvufubvtogbc,25,67 67 | vnuxngoumdnxbdlbwnnzznsxfswocdwyylznipwgrfdhpxajaslt,18,86 68 | lfpldxezkcgnfshiowiigogrorlvvqawodwdtxsze,29,52 69 | xdonpzhgrqaeoszkfeqkwmsmuwcozqqgaotzchhpacsplzprizjeymmhfdroojftesyxfmzwtnyttwpvmtnmycjmknvcw,26,63 70 | awfwcnggjqljcwdvyjzbzfkwpgxpzebqpgozvbpmaxekhivdeocjhacsuydfypozbwplxxnoigq,29,54 71 | ipkuqklqdunuzcwgxbedfkzkwkwowycgupdtvfznwvf,35,60 72 | tmncvfeasadd,23,57 73 | yftkkoatykxtzeodqosqoimrtawvchdonbxbbgzloimxzybbtbiiffvbzktxss,20,80 74 | atqzcjdfbrgxmmwiggbyxcpepotgjkrbudznjgskwbfyuuejtvcuhvqcxtimimxhanzhrdlaypozbmvgntzmwqzom,28,88 75 | jgfzghifcutlawgikeftasiqugxqqgoveiqsu,22,89 76 | ylxiphwfvrmemupgnk,26,21 77 | hcwqtdledudydsumenmbwrslrdvqx,24,69 78 | tkqvlktxqkrozzvzwyqzfowittzryiqagtwzsscarkzfa,29,15 79 | zmdopijugwjonpa,28,96 80 | flzyiiwcwoxirjtzubj,22,56 81 | yhmhisupogcgstwzihfxxovfomubbydviptetbkhcbcdwnqledpjqukihebtbocrgttzsxlbsowqcyaaqqcdudeea,38,75 82 | zpbummfgsdwsjeojtriipyvrsnbbvwrtponwcqiqrvtgrslyaoacq,27,39 83 | qvbdqhgmowzhkabatrwqadchsxhotcondhhtgwszmnsqorqxeqvejtrxodnpgtga,26,26 84 | fbrmywexrnuneizkmnrjzynxhbgyfqcdooxirmcgvzkopdnfyfpjdvkwesqryeriuqvgwzt,31,52 85 | meybmdvzykssqulqzrpidijcrltlktnquybyraltwbqfitckgdgpp,22,62 86 | dtvrjhbgchtdrjuoqnjniaxp,24,36 87 | kzcgxmdkpndeizzqmcsqxbjtuafkjtqwfpulakwcnivwprlfvgmerdmfxtcejdnapdyzsymneaetfkbjblkltjokungnprekhz,34,48 88 | bomzpfdawqbcborzcrixfzpvnqevjlzuwojepgywazdqpkgppnrgbobfnkmufeyeaojgkmaeoyefwzquprpistbtnropvrczj,24,84 89 | prcehwijetccpjbabdgmmrxoynwibegqsjqfpujnjwwyesfhbyowfspcxwnweulxeyhdfrtqilrvmkrnjgnwcbkdmo,20,69 90 | jgyxudrhofpoq,23,59 91 | tcsjdddsxrmbfrbzjuukjzbtmjksofyieyshdtslarna,23,13 92 | lwfboyzuykoyaeyfxjmidnpdezthmqehqxpwfitphqpfmdc,24,83 93 | mfrrfadmxdtueiinmwqitgcsivpjgftvzpwohlmiswlcigprfkoimaril,32,15 94 | iejoarsuidommpxsboizmalgjgwzlrusfwtyqbpsldkjhvtnmrtorucfcukxktyuglemhbzocjjgtccmawfuvvrrkvhd,29,62 95 | ohagdxgihvfbtfjpqiiblgcvabyseuqmrrrqjyqhubspoepfvqcs,18,15 96 | rwzrqftvhqofahpvbdfbtccplyboavfxuteqffrcppqzzbptldtyhqcrzjhuuibpzmdrymltdhpdiipyguwnkwjyhe,22,3 97 | rqqdfxjknteqallindfdlgpzzsqewrtvgwdmeyzsnotwfhzycptdwelbavuhjru,15,88 98 | aosflydybdgjnbanyfhdjshguzlovniicdlbiblkpkcoremeupdkjlnvaiabqnjjqrggckuyiwwlmpkhkiah,29,96 99 | ymysicmeapczxacgocugrkqvjwrcfsqgtaurqhezltbeypvaxihnivdfseawwbqpsqib,30,55 100 | srihokmvipfclrovrrzsjclhbejpwu,16,74 101 | fyaqfilculgrrqfozvvkllqgiauzadrojfaxomludcquiejkwmguhhznpnbzznoei,21,27 102 | ehegqevxsvojrlgljajtjhpnbuaxxnmoyanxqogjxuk,27,74 103 | ejqihokrlejwvizwdsxxrwpvogpbenwiyuamywfkzcjraysomijgarwblpxbretjckcs,26,72 104 | lnxhpmgodwevrbckvxgfvfqqsaxyzfeozdasymlpyfeefubxwdvjlomteainhknnjbnadm,22,39 105 | nctqphhcdfoffbqxnnohooyybgltxzrorikogolbdfitdfyyk,27,21 106 | upcqyionolcodzmfuotabekrwxzqntjqmoawxwuwjqwwohotsfzjtjkqmrbcivredmcfbkobmay,32,72 107 | xflvbymlsimkgwwkwhnaadiduehryhjmsctsftahiitvntkqtgjjbshbvmuudztcslraflzfvhpevfkqjdgs,24,23 108 | sgzkljnbrwfvjscokewkzyxhegoyaclqspqamqkcyzeodtnvligjsmijncbpdseohvceshnblly,23,80 109 | kcxxbfuqjrftwiqjoffsanhmjvmcvetsmveabxkeesbxxqqtwmmjrengynukeyecqmlfemhjjhfewrrozbsrruynspfhrsihunh,28,22 110 | mkvuiziarcsrerhvkqyqercrwmypzbrqkikepirbzqokihrjtnyydtybzyjjcmgoeeau,25,24 111 | ukghnduhzkpfvttsjrqhyrppsqwhxeqihtjytxchrjndxuizteycsihzqishx,35,86 112 | jumggziytoyywaehqrnnnvepnugh,19,51 113 | svjivisnoybhozoauchiaqfkwap,26,38 114 | ouilycramq,23,40 115 | shsgdwseqkjhvofiv,27,61 116 | fsedcowgdnrpioohgcefysuvuytmpokugrvognrqisvnifpywyzvyyyspwhfawqpqivsffzpxbknwaopvxw,33,76 117 | bkejvgfptolenggdllkmzbxgxbbolhvdlkxjttrwqiwyqzeifokxtcsvmbxlntcwgzwhttecgix,28,28 118 | cbxeuqxqcxafbxzkdbumgqdkgwdcjpyjqfugnudrplirlzowlzfbnro,33,33 119 | mfgxdfgacoqohyknlmepyhhuspusilz,16,37 120 | toogateygggdwssm,19,15 121 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00003: -------------------------------------------------------------------------------- 1 | ffygkvsklpmup,332,63 2 | hnapejzsgqrzxdswzepsnrwhupzxeoeourvexevzvplzfatcnxdeioleokcphan,171,35 3 | rvbyrwhzgfqvzqkusietecgfllzvswjypculysxmqfiwbvhowaouxybxyhufneoazowbqifwmxtkllunuctbrrezedtzzu,148,22 4 | dsmcwjpmomksjeknnzqlgnyyqatnkjvrigwkosrqnrgwusfafpwlcsysdqyudclrfegjrbjhaosgmirdzeiynpf,100,74 5 | knwlhzmcyolhaccqrjrodrquzavpupxqhlirczxahcgukvfyrjnjwdgvclyrbyrijrolgwz,104,67 6 | zznghuzphdxpffyvireslpzieohxfnybwxritdmerkyuuysprppivzmvzlodmvuiugrxiqbxavepxman,95,49 7 | ogbdomakokymdvfwpzareaidbersktfmvawcrvoystglticrfhlhiynhohbqiozjwpxuvzmwivwaitckvmhvprvgnuqmkrie,65,98 8 | dujeekkipzorxnlgbyqkqhguqgkgjaimlruzvntntpbvfzuuxtcuazr,80,47 9 | nkoqcbfdgeyjqnjpduofeobmrfilyopppcnhmloygqiuwyzoypdperfi,91,31 10 | qswhentzpmaswnahgqgmelonpvhegjlogtzxj,69,14 11 | dbgxyulbmiegbpqjjvyugrotcgddympsyqvldyjipz,80,42 12 | nntxzkpafnroecjdeuasfafrt,68,51 13 | novmceeekpcizaqvkxyphhmoth,66,48 14 | jaorwedfyuppfxfmvkolwpzfcne,46,18 15 | xdymsbwfsopjckuakurhwdvfubshgqdcqvvearhunboqvopouyjpbakkhclvfhmrfdnilcrwxygwioucqmv,57,57 16 | shmqzllepevdmfbsxcamooddbfqyydgwobtqxvjvwkoyvnisxxzjdbpbslfgcbyxszgbkwsgwassilxsolosvssmxzkf,51,70 17 | euepmnknkbhszhbiviobtylmlwqhufargkosyxnnwrycyg,47,59 18 | jilugbnkpqefuxygivelnqvpbpahaamoetjmmalhvlssqxcjxitrdkmjbqqqgboxhahnvncmusngcbnh,46,13 19 | mzikvhjseodyjazasocv,50,47 20 | ixdkwhwklkrveshtzhoqtgwtbltcakupuujgbpwfarqclu,61,76 21 | pguyzljhuqd,58,6 22 | houkmpqccccguywrvgoopwsxarvolalarhw,51,3 23 | zrxewqcbpfahvgwzpxyuqxygvbnqejpdomxpbokpotmkgobrpuseehssmuzzienkjgq,53,56 24 | dyhzfsjrzctorgpeyxqakkcqovuehoxcnxnjqyvqnphcfygax,37,56 25 | dyihrprqjgfpsgnitrupnstslgxastqpwarpzynpjqhqipmgpmahmfhqqxzupxhzjkuholjdc,50,50 26 | xfghmaqmxcrclktkk,50,76 27 | hyxykkpvtwvfovycgaxopchybbozoittsjbxbjusoxgmehgnevbvmuomqmnbe,44,25 28 | rirlpcmdsipxelzcepwwmavsmyeoyqqgksjbpnwycgcdgkelkyhqxc,37,97 29 | cgrhvbwbgeejdrixthealtpujujqnglaudavwlcjfutqvrgftcoebhnhsss,46,63 30 | bbqruyvnmmsk,41,75 31 | igwgonutjaealrxoqhikohmrnballskzb,31,47 32 | ksuvyhmxoqemjviebpcxbvigirvkecnqwmdtgixetywrvmsfqvekhkcszscqmjhczefmmcvddadqehu,40,58 33 | aenrihsgsf,50,49 34 | maujidxtxnonplawbvverojjxqbvgvvqeskgchkjlilwguyhrhnzndrtqxiapwkgfwbvijcpkpmlkvbeqpxzxkz,43,30 35 | bwazbtavlyjsbvhkxbzkcuigeqqnezihnzpnjyvmnbmxxseeyjryznweecjjsupthzaytrmtzutjlztobtq,33,49 36 | ciymremulqciqibiwnksomenjupltancihvijfnqyxsdmfepvtkqkobecujfundbnphobjrjotwopuqbhlyakcmfhkneggirtsfh,37,41 37 | edxixjbslbmtuzdsaqbnrrryvqfn,36,74 38 | dbzajttojejnehscpzoecawrxwdbddspoelxjlsiqxejhzqlbilyhbjbrhldepxbbjwmmdpvgseqnufpwurnttbkgndw,43,69 39 | nxcbelxzhocqjrrzyztnxafabwyzocvnhganktxupyubepdmvkynyasjskyrigxjapmzuxmoqyplceoxiyacutapiaxlmdodp,37,58 40 | vhsidxwcunjqrnsdsmpnlredzodpqvsqdnbohukvuamoalmlqavdfjgwlxdgki,43,89 41 | cetxzrjboivdwbmhumvebplwpntmnzdwsoigqhkufwozowyteufuxdpnzzyllpbscvovyvlhm,43,1 42 | xzkkoxdhpgczevnwpxqhbkftsxefspuakwxlemrozbvyqtzisvcintnkuztjvuvaruepqrskrtnpexsnhsmrmgpxregsrk,40,86 43 | pkvrnqnibkzwxqht,37,48 44 | psqmrtgbwypcaieahtqpap,24,88 45 | yrhnkxqwupmebomcczntqdkduyvuwzfcqujhpxqpgkpdvckegyaeini,28,88 46 | txxmmtupshwihcxzaivahlmgxacyehxdlockzhguxowkkxm,19,32 47 | ihhwtqlluofrvicvawfuqjvhfdfchhfypipd,20,75 48 | bfqzuiqsfpyfbpbjucbwwunobatewbqxupfvgrwljkzk,33,33 49 | vfkudnupjcohmkinanwrtjckstbcxsptxnyfrrzrtyjrcrfsvlhjirkoiivisonlvalltdjecifbbsiihwvqou,25,56 50 | anlkdyxblwotc,14,55 51 | ctcutxlyrtsaxqnzuhtfkuhrnwmxxoqudyyy,30,30 52 | ijzktfmgimwdfvcjuglaqupvacuxrlnekrwpptepi,20,54 53 | yulsrwxhthuywdqyafavxdrsjvbue,28,78 54 | evphrayuavjlwzyadocnblqhckkdtkaetkndfrhbmywlrok,30,73 55 | jtxasskopxowdtgoqdjiwlbujkkxzaubgulkwjfiothamqgrfewhfofi,20,62 56 | yykqssobhdbwqclphcvayoudpequkwqlficgcovcvswsphlybnhiytpvtmraelelclubtvzrgohoxbwydiivmjdoselhreosqjxc,20,15 57 | uqlcbepjszodlcqurqziuunuvwtccjfmjicfmuetwhwekaeeyzyqcptiyycyztaodwydaojypbkyotwbmbwcisuvhbvd,30,2 58 | nsaocyzspqiittgyzxrhsybiwzkyqel,24,10 59 | xpevapsbfbovowfbwhaivssjiaqljnqeqpktibutzjfqihilzxxxuifvgrytjipnoxqzhfzhkywjqkexu,34,17 60 | muoopqfbwbxhgqvaesjibeqwfnb,19,26 61 | xvmsawkzmbxutfncaocslykpxwmlqeqhbruoerbwdkjofnzybwlapkhmn,21,77 62 | qoblckhsukhtgjkdmungzacguqvyooxglumpgehouhotbxakbbreorbdchxfidndelyrmyyopvrhcqutnxaqkqe,25,1 63 | flwlckjgpemozdmbdwu,24,40 64 | syhlmudmzeiewazbqoigjfjgahztxqdyiskozokmsyqkcocafvcofsrt,32,76 65 | zbhwvhmoekkzishimqncxghg,30,95 66 | abkmdqrmsioklcxifkkapyrgnwpsinhuevbepuxqvppjp,34,61 67 | flergcyllelpmronlrkxvipdhkimnvqheankidht,31,22 68 | uuncvufmzeiw,23,16 69 | gcrnzhhnlfehuqwegshloxzwcqdotaxetmhaclhoyyjgmcdpfgbvj,27,76 70 | hhawatnfwvufchflujrdnlskmmsit,34,78 71 | ftsfztagtqzorytjdajuaowgcjsnlpxseoimnayh,18,59 72 | hpebwvdcszmczpyqnrfhxyuabfpqlyeuodlotqmqvhtmrdtsfvszstghljtxccu,36,90 73 | uryjidsdniioyzvorofacyty,31,85 74 | mdpxzahcfctcssueieysrbxdxcdfmjhdoabcscwmctzrrharyazvflpcvficohcndglc,25,51 75 | ussgnkykvxiuoeasfdahmshgcwj,17,7 76 | woexnaghismafgkzrnvsvrukluymslpuubpmwudnaqdxuqlbhryrastwkniumipgbzxhjeipjlhmkaobseorbaofjfyhtisuvmv,27,3 77 | owqojjrbxaqpwgrtumtbdcmrdoiipykflbddnbnuxjmcfxwbetfktlsaxvjnppmlfittnscecxorequl,19,53 78 | igfpklcuppzypecchukywhiaturqmozmmqunvrmmcslicgfbqytteqsrpbexwekwkksifgoorjgrnfxdxzvfbrosgejjldprvr,36,43 79 | keoklmfnxrbitqbfbymcyvbzqdufrrqrmxzmgviyoqehlasgz,31,41 80 | idqxdvrlpqkgartfpivobox,21,69 81 | jmhkohqfrzczbamqrowqqgyqeoymzehnxooscvbpd,27,26 82 | zntfgoukca,19,28 83 | vblvhcelfrgotentbewxtpfyxesofxwidphlcjdzrkniaerdaxkscuurmywzrupykqanvwucmyl,29,12 84 | nlqzvyvyyglepzpxslsbmsmilcyddyvuhbkfahdjtxortjwrqvkhdcneyjdmyecaxfiqbgweueoqzlqxfptomoffwyq,23,27 85 | ujunyfwbglhouwfrqmugwdrkipsjrtnewshpyfhxbxctpyhucbqwyhjvjgyinvminc,24,73 86 | srmaudcfsouahcxihowpwazdgxtsnmnerjfnvkxiaudcguxztjboufdbaqonhbwzqhhegjzlzeqrflpwvf,20,27 87 | tbsrldzjteqdvtxkxovpimfsavopaqqgejbq,30,72 88 | bnvlqdxgvviwqezqchbwnvnsqnqzkhwndntxiooxzaueuqfiyxhcznnamffensnrqtcdsqfiz,23,9 89 | ahxwllmxdxhyhrflhyhetmagenpchykbeardzwtg,27,57 90 | sjxaaclzkjbsghkkenaepreudnzturabomgbswxbbmflnhqcsccyrusatinbiezjbljtdvg,29,92 91 | msjdoosdwgtnmdkhw,27,84 92 | scvtetdhmkmxvhfbflovfgfzhqdioytmovxwkwmutgqljzicwcssoqaajvtnonffmrbjrogqpsmevkgndieios,31,86 93 | qlubngzhkydwurcziytyaxrwbuokuggdevhnhunocgsitschutgqczawmnsbymscxrksvgmkonvi,35,65 94 | qnipzumcjpddxfzzvuvzrfgdeoxmpfpjpakwlndqsxwygxqcqfljmyrapkxpxiijymnodauvtrla,37,65 95 | qyjywejoyvazrcbuhnrmtffyohjdxzztcrrakpnpfhqgvivqkftzzjjglca,27,71 96 | trtqvmurhxdkegvzvsecgjgkgebqq,29,1 97 | ojahibpysrmiyvfkgrmzoghwhccclytjqfcqbebhznk,26,89 98 | hzhuzgrwumuncirrtncmlmkhyvefhjzmhniopxrcrxolbrqfwxljyeaabmvekbgxtbyomrowulnvmtsdtacwwpijbodngep,31,75 99 | thpcmvqbhxnbtjpmkkhqqbwxgrkkmaqfjgrvayzxubmismltwllnrdywbozgcfuwfvllagijiinizvodcnwnab,24,8 100 | slatuzfmxbsegmwthabskuxxxynvykhwmrsssdwckdujazkvst,20,73 101 | udbzpahqieqhymjwpmvimvtdvlbalmx,24,8 102 | rbwpiexjgy,25,49 103 | zhbghwhbjyjtsnwqbmtywnezcoljexsurstvhzethsuuupjplomphfqrcouqsbnfuppxibxgx,38,50 104 | phzjjvfgsgxkoeudfkhabutrosobnsuvumkibyxggmfyjiopyteuak,25,86 105 | rlknypphhokupxosahlapotebavdwwquq,25,34 106 | pwmfubgvehpdrdtzfmspscvpxcd,12,81 107 | pxyabcknlnkyfbojjcbojdqybnywhhcluqodrgedtvjvnoohpynwsjxtacnpulsyldkrczlzksvntkljkevsps,21,28 108 | savcmvrrldfxixpzjlmtldotkegeizfqqpxupqdg,21,52 109 | xbuwfwpeehewkkvpyjydrthmnikswsvkmzihvtgeonfdmpkpnwswbndosurrsceotovpeqwvauubeiqejrw,34,89 110 | hmdralzxejzxrshfymhhppeuvjue,23,6 111 | xwernhvzcplrejqqwffbmpxaljvludtrqdwfqewlettnkluzgiqukajbllkkuwbulsclzlmkpfyraixizaggenv,24,2 112 | fwfbkaaziyspfkvuneqxcsdottbjwvbsomdhvxfkomzecavbsuhpdpvyrumgdpcrou,24,48 113 | iqyozyramnxnyqdzfjqu,34,42 114 | ewtugjyrlqdfmmwfsoldweldzfyanveeqsbpmidijvg,23,45 115 | jkcseidvlnxmrfcblvdonijgnurgueunzjczjgwukyanaxqcjjngpargodecljovfjgwppnickkzxxshjltiozfqweqppxakywa,27,83 116 | ropccouuqkzhxrlkbxmontqwigyqzuxbduxejrwsrviujmmjlkmvnhvkfoguno,28,13 117 | oytxrpedhlevsmaesmipabhoddglgebeteeeelgsjzzsecimxmwgcwdaecgbcmqcccohisvurejklroziqbkktyyzfjw,24,37 118 | ejijaancyteetet,26,22 119 | oyiydbnbgi,20,19 120 | wvfsqnyyggi,25,10 121 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00004: -------------------------------------------------------------------------------- 1 | qtqntqkvqioouwfujojmjdurfxbrfcqsisl,278,45 2 | wrwgqnhxviqnaacnctwpkmyysedhreewiiechohnxwdcrrzmvqnfqfuhxqjbpgtljgycrzeooyzitbzidbbq,135,2 3 | cxdmunpixtrqnvglnt,146,54 4 | ixgiosdefdnhrzqomnf,126,33 5 | xybwfjcuhauxiopfirbnzodqjnrbxhtu,112,4 6 | xkbowoagajfgfwxjweyzjfqybeufhrzpzbcjvj,90,63 7 | mtuqcwwotyhumzeebremxycwbrkbkofk,74,14 8 | uknvebpirkoriljfrsoryrngsizixcploywsmxvrvrdorjbqwzjorqbgrshgklepiturazhoukg,82,99 9 | ddkcctektekhtqcnvjhoqqelfrommuhmppndcuansrazgnfz,71,9 10 | jcwtzqrqxczaunfzgovpuomuqxwddglozxdzn,62,40 11 | ucwntichsmhpigfnpohzengvlxxlujgwizrkhwufkijahmxdpfnpq,49,36 12 | hfqkjptpoosgnuhazipxdqqusnufswthewjlmhofufp,71,4 13 | wbilzeiisymxlghikfszhl,83,8 14 | kkjkkvaxplrssftcqopwuzlkvbdrnnipinxrcsznmcwemkuf,47,25 15 | ljsulyfewfvgaaurjqgicjygwfttfenkalimxxikixduvacxycyxowylpihrvenwroehsfohkdzcxwdczjv,48,12 16 | fpviiollixxwheuvavsqmdouaxweyqimpxtyclithtyuxjnolfljweqeignwajnlenkpmq,43,83 17 | zinahrkkhaozmfbxpexzgtljtilssggkzudameyiymagikvqikseplyrfjmacjntbxexpysiodplzs,50,76 18 | obexsavebuajjncbmdzunwotzxugeniwpyyicmcgtarzsdfsdgegukigecbtdubyezfkahesypeknknyjcaqv,56,26 19 | jviualmpljxchkrpzbbpwqbitrmcchb,58,12 20 | cbynavzxlsaetuqcvieqkwwtnwglsigogtaipeucbebuuudssgnuinavcmqehnnymejxaroppfzuizfibycdtawuypyea,52,79 21 | mpthehfptmzojzaojghebugrvchxoonempeituimachyhbbitsakdgklskusrdtpcpuftwkawebrjghapggqhhadbviczibvivmc,36,13 22 | ogpusxggdgqfbxvhsnubgfjwulhzepwdvkodqcjjxknimmbaxwtbzhfhibmlotiqrxmkvszdjqs,55,72 23 | hpwaxoiagjmmfoljzssktymccczsnslqsfevxrdaddkibyuujvbronwkwjtwnjvbcqjjmnwwiyyih,38,13 24 | pegqenooxiinjl,39,24 25 | tqvsethttsgjqoxcxruiohrmugykjhwpudfejz,39,38 26 | wlzgdxstiueucocoyuvzbfhnxqldbzybr,47,11 27 | cebgpiqymsrdqtnzguzwmpufqdjxraroabhdyki,46,60 28 | qgdafbdttiaopjxxsxbobktmcqwmjh,35,7 29 | qatppbocwfp,41,94 30 | ufuqiyhdynqnbzbplcwvbrnpeppuiuxymytajhfpvamibetxmbjwgpqvxtjkayxtofubpzhpprszxy,43,39 31 | jtblaqhpdzadcfuemktyhkvjtuwbdjuuqwfrarmkgbqgidnlzxg,44,71 32 | xhvvhbftsdikrnmltzcpfsquqfzcftlzwfggyfftbjoacrjluoktsulflcnjgwpoharzlcfroryxlkbukynvnjqqlfad,36,69 33 | nwphqcvpnuremtvzibpxyjbhbxuzxbvshnvntvazcvcqzphjguyfxxkmgdskuoamxnbrwxwqjqpfrmbiqautxdtlnynmy,38,56 34 | tqkbissauczmwhagqwyqrllzwlojnxzulqiuvdwfbzpjyzlnvtrdprjvkliyzmlnjyviomhwfaloewqmgyfxppqotdnhwqjhpjgc,41,95 35 | ftwsyxapynsdzkzfojgghujlnzwfftevtjtbljcuereatsulfipeujynlowggxqw,45,5 36 | ivtugnnyrerospdlfhguxhbrs,43,18 37 | lzuwnatnrsmtnrpadtwwayzapidpj,31,14 38 | yyxquznmmkmssddyuewudbtltztozwjpequmyatmeqnrmwgdolzyytuznkgvbtwakdizmeqvkfko,29,97 39 | oujppnaujjacichdajvumedbzmjqlupbsafqcxjfzjczrrzomxefqktjnjbetxodwidzftkn,37,20 40 | vdamewmbsmtzxntxfwmnsvwvzgbhvnhmbmnmsfloapqfejapbftcuzoonettwmgxqzkassyuuaystyozhasjs,40,56 41 | umrgwyezrviaqlxbdhlknxfkr,42,82 42 | pqynouikvkcbmfwxtwgtfwhjsnhndkhkhflsxkhyfwqlgknyvwvedzzmnlzlwpcuwvgkztditecaqyxqoezfqoqntyd,37,84 43 | hfqeuicxibbakhhfugqyonwxxuqidaertqxrbucpzazpedqgpfrko,47,37 44 | scowxkwfvdfypybpqjjjpuphdq,25,85 45 | ywfpsvuyeyorsmemrchkclkdcehfaxac,24,88 46 | asafhzedjtnriqullmsrccoqsctbyufajfyyfpsutylrsshtfuoadroxpzjkvhktxnrbkgnmecsollnrrzihjdnlgmwztfbomqpx,25,87 47 | rwlopbkozbh,26,86 48 | hgvajoegfikzhaqzezyrkezhlxvxfajnhykteusezpvaftlmdlzdtrb,26,23 49 | iikhxgpvhuhqjvnlhxnvrlqznxhuufjfynhcvkdsuracaxxi,27,66 50 | gztaebsbaqmfgusknmnayoapevpdxxdtnfrngnrlimowqqbbpxmhkkjqcokuekwzxphsxamddjwqzhfjhfzw,24,52 51 | jasrvbhlxntztliaebsqhrvzinoxenakabwmljn,27,83 52 | juyphrluyhmugzsynokcueumggtqpq,28,66 53 | fdsuquodyddyyukxisoqmnujznkpfreegguftswcdxqnhmiwypxuerxvehtrwyaybf,29,35 54 | lxesxvkqxhvxwstneqaakcjdyoayjazcbgonm,22,48 55 | yzxacxeqjhxphlvtpcfuvryumhwtqeytikzdslvydqhtbrohyidunliqhosapyebihhjjsojicm,23,87 56 | zpupycebmsyazoyjqemxrnujojrhedzgdrkkccclrejjtwhoixlfxevqnrldoxnfajqbrj,22,37 57 | voaytyaiknvcvvkdjbigsjgwtiwunrnt,33,75 58 | lbewuzdybemgkaflfqejhqrhsmcafocqgxmbivaddvjdbbsfwvlehapklnqmurbjb,16,62 59 | sebdnwwsrfvxwzvbwvmbuyvbxxpfwngzgeltmowwlekknbxvddrmcbauhribrohusgcokca,22,43 60 | lxbbmspyozbflzzmcznlannlveywmxchtgumgqkbnbytvbytqrurojmmrkgkqoiaebqydye,24,60 61 | hyhoorjwfvwhlaqpltufqsvrtshcehtsbjbzkwiuorjejwolpwfnqxekkwiadxnbdmcvwvvzfwzdpzqoajzdvfmgzctjd,19,51 62 | gzjginjqigngyzpqjwmwajvwrlzribxbwedcmisycbzbzmnztx,22,10 63 | xmqriphoqffimukhhonstobzalicbztvgspjbkxlixmuhux,25,49 64 | rympwivzhsnenuzalurdztsudtwjwoufpundgxcvlixnnvmwaykrcsy,22,48 65 | xcravjsyrxtlgigbfobjbvmkowesvxdyptqjusa,25,71 66 | eavflbemqreirlvqkaxooxqdrqogupiuforltdukbwuwntdyfqzsatmpigvfnydhpzcxgjetguaenblpjdzbxefv,23,19 67 | gslipyaqffodswcnpilxezshfkxskcxvjogwoqsrobhizwfsvxymktgipnjrbzfyiatpqkuzhvdouqpookvsgzgqiw,19,88 68 | xnmzpadhjrvcypvikkfzuhvtilvywyonseyicggmmfxkbqq,29,9 69 | xkqtjtthjselxuhmx,33,39 70 | msvscisagsfitogxpubyofqpwqensjpib,28,43 71 | wdjelatywibsmiedltypmyegkjqmgolideaqtkilertbkxemchrypsitiqjntiaulvowczkkuonzldjglrrtlnj,31,41 72 | axlvhxojgzcptstquvvrzqdlaxojrpxrucifqkfylpxcvxybmaqoaojuzohlerksbxgbvvacrtxcxmtnuemwuxycw,25,38 73 | gowpnzwlceydulwzjvxahogateebgdghuyorchcdzvvzlmslvyadnxbxakguwrxeabfcayhbjysstavfgnlaxiictmpmsvrafp,26,86 74 | ivvburqtzadpnmrpbavcebqfazbrhhlleawlmgpysjahlcqodfcpjyciwwfaghtpfoea,32,64 75 | fcfrcesfgdxdemybwbybmrtywbpsfob,30,89 76 | onbsbmxmypxyvntypsrldhydoz,33,80 77 | nazqjcbowlghdkyfqotpx,27,89 78 | thcdkidrwoihqexyipzwdvesueqndluopaylzefvqubkrob,24,29 79 | ffuyxsvugiousdumesfr,23,31 80 | keqmbrrxmptpjbiyaiegwesofoyyvbktdoboabbblidtdkgulvhdegwgkjzlmloateyawrgxpzfyejqjhtinhxiiqmeabcgeb,24,30 81 | ujixhafsmxq,26,74 82 | bwmsddjgeutaywfcpdibemhdkzpiwoednhvhjnclkgukfzsvyrosxoqlkohdhgslcsqbohpsatuynhnvry,27,86 83 | kkdrsnpmdbfvcvqhxkwfnmkg,19,50 84 | whlkitxfhpmshmeyhentozwykrqvqzfynsptoxscumobaamoazsnxkeicycxv,23,62 85 | pfpugqidclwdbxxlpquoszgxqrqtuzbeyzgjuptdhjkyttljruecwdeqfccxtyrwjjbqcihjtgfaanh,25,76 86 | twfldvpujohkpversibquxsndxvva,38,65 87 | rrzbrjglvdjgfzoheyumcmvxuniwyylheydwcvqtgcvpluauwsbzlklzwjl,31,76 88 | nibpfbzeymsqacskfgdofeebcsajjzzofajjgncvmywgsnlmlbehdljdvxujydmndfohdccxcbmtmqrkojiftlqrmhlsd,34,13 89 | lulnrwzjyuogmxbnelc,26,73 90 | ewizyrwmkmjyftyjrphrzmrbw,39,59 91 | yukpolrzoxnjlstlmxdqucuinqdbnitijdwiyuu,25,20 92 | bivdjgpotuopmonakpesvwgjlejmixqpldxuqjectlkpjeb,20,67 93 | usdnoljkdfxwy,30,55 94 | htpxpbirpdqujpedcrrkqjoopmzlwwkldqpksejyudlpjvjprzixicsy,24,25 95 | mprmrogezadnngvuuariszmiwcdhpidygusjgnyqiwtkxazgmekdjqosmewhhwjx,28,23 96 | zvqipxbqkzgvndxiagbnvkloazuyydocauqqhufpugmlfgknllmzwllhvtttstxvxdwlgfgkprdkuzrzlgjtscpmvctbmmpbo,31,41 97 | kqmywbagvsrbnlqdqdnguvavbafxygsd,24,89 98 | ooiwcytbnldhqukkcyshnsluwtzbsrovmselwonscfqygqvlepbrsdoskbuhcgyxdhtouyqc,23,91 99 | mkjlyrinvordgvalgaxjdwaxwljiceljmzcmpubcrdiklquxggdz,30,97 100 | isiuuaurwwlnaubxiwmnuinwzlknocjkaoktlqsftpgccnhypfotwqklqnafbszmcqoyjqaewutpddpmlfhpcexnogewmbnbts,20,51 101 | kaptrtwepxdvnlczbvaohdqqpmdfxldyzbnponmlynqucehvayngrpxiawaokxdk,27,81 102 | ddhdjzzyljnplmkuilnektmrakvzsmzvvnbpxnwokuapargyvcylfwwmmtairzmvdnx,23,5 103 | xjzbieepibmjbwljfecyushntnbw,19,44 104 | lthmqrgoxttsgbktgkyqjxwqbpj,29,37 105 | ayyihkqlbihgxr,26,17 106 | vsikmgyjlrdppvjdmafvuwnzrzfuivfyaijzheesueubu,20,2 107 | laeulqlpeyktajdfkcmoytszdzrihzbemrxrlfdqrzsuhxghuexzxoocmleshkrarddvtato,27,48 108 | aimnaqxjoyvfqakkkroifdbabnovkkkspcyopiurdhjrkcqnftmqwpudzwhubgdhxobzo,21,87 109 | xtunozeolqumwqzjnbvo,27,23 110 | whemdmvvhsmpylapyxigqcmmnmswxrdlcwppvrviti,16,21 111 | zhwmcdaunwjvllkgljuvcrapuurmmqeectsajyuqujbvu,31,65 112 | gfzykkedtbgcclkbdfnmdctuwsumnbwwjzeawnnupkhwmphenokriiuojymrqdbxk,26,27 113 | wflgjnydcgszetdjdborwwoif,25,43 114 | uzpelvznvsbgaiuprudzmzwigjulpprumzaavobnaaxnmnatncnejvgxjpdltiobozilanofrarzsonf,35,67 115 | kybypbojxknlawnzakpmuvbp,28,67 116 | adtfhtarmezfqkmlfbmyklwigvqotkzyznclwisvggkwtdvmoknxmxmrtfleemfsvxg,27,79 117 | mosbqzzhgjvshrfjyfcgcyfddpzhzuklae,28,78 118 | nasjmfmnawhvpvqpn,33,98 119 | xyxlcaugomlckbeasjbcdsdrojbkhdwhrzotfirvynnbvjkjppddpwkecrswozrhqmznyrxebxgusgfnt,20,95 120 | qgzchqkttamjysvyepcwairwlcipymzklaqnfa,24,29 121 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00006: -------------------------------------------------------------------------------- 1 | xjhmjsuqolfklbvxngyinvezezzpnpudcsrdaggcqtjkvythibgjnrlmwtijvixsxwpbrbfts,251,80 2 | seozvzwkcfgnfuzfdueccfguavodnxo,165,66 3 | fdgvmwbrjlmvuoquyblfkvyzraemutrewunyqmfyvcahx,132,88 4 | hxilvmwkazrvjqtndvlziqhyzhorryycvrblpmdtmlccsnvrktoyfaiu,99,14 5 | gqghyyardomubrfsvhwxcsmnlnyhtbaozi,108,29 6 | zbqmvcuwgwtbynfglageenwqklfimolvtpuxgnibmgpfrkkjyqaffptoacjhhkiijnusaknyounbopavarsvwjbueiwpjlumelr,89,38 7 | tktwwwkukgdtlfltbrhgfwoilgfoysvrnrykoahowsifaskiqedusjlypqyvukgvomahhafsjmxzmtsmlzadndzacyyv,59,83 8 | jhsddobuctksnpinkf,88,96 9 | tcvhndmcoslftvswhjolissexnibusrgqcfn,67,55 10 | jxgpkxbhomwgxrjpqdn,62,88 11 | hqphaefgnaldqjcgwsqgenvzlccnlfgqqslzgzcactk,49,49 12 | haodlgdzlqdvknzzxrnrkhuvszpvyjwanqzfezsgtjzleuossyohvexzzqftstifacwmrewutflvytxdfrvovvezmxvxlianjg,64,97 13 | rrpxypelcmbxef,65,85 14 | vhegulkgplgjbtyhltewbj,56,8 15 | qgtocakklmmyuwkefpfrlrkjdhvtftlwfpdwdbahzcgdwtplvofcuptqpwlxkqdr,48,2 16 | lkpkylmtaluyxnxshrhjwoosjffjagujgwipjnpuuwcw,37,59 17 | pujcczisycqikjhxrpaiwmogrpznwbceynrgoar,48,43 18 | ocbmbjmncjgfrjsxjkmdveidysgiqeechwmolbojbobqruavioofncbytpdtdfeyseullupffrvxrrtvbouzbmwfvulb,50,1 19 | afagtsimbfynqvisfssdfvoqbslmpkryjopffwsmvjwpxgugeoqntjtsdvipvmakz,53,82 20 | bzomrnivqkwyski,41,73 21 | ikxbdshlxrxcdkyrsakdiuynicnuhlmoxcoalkajfvauwshaqpyqokasdoaivhak,38,98 22 | mlbdidhmxdmpghaktygpfyolqeieggnfilbsqxbxycrpwubqfrikrvgaoexkdknbmtkcilxotyskkqgvfwsgsfuaht,59,30 23 | kjbmjeqflragppviyzvkidmsypkzqorxsdouueytokveoujsdynvb,40,1 24 | rthvxmtlugamgij,37,89 25 | inpozrhjszsiikenzcyalvzpgu,33,63 26 | qurvwnwgtmepddycuvd,32,42 27 | sppmejlvllmiepddyzckialbevyyyrjrklrbydbmdspejtlftkumgxfpvjedgrgxradairjddqpxodc,33,88 28 | pjxygmzazniskasvyjrojxqeitlcltcnfzfatiwaaetfveouwyjozwzvqoagzeherenqegj,41,89 29 | zmrpjdzcfcvynqriidlokugdojqqmcwcyntawmmldlaiyqsryxbl,34,31 30 | jzhejiotxplwcydmznvkuvbsrlswrxcoiedrcsbaewblkuzwrmxzwcqdpinbatiavfxxvjmnaoyywbbrfawiyoih,42,15 31 | ngrsqvfsnnubqedjjhfoctuoufsst,47,25 32 | pwtmyqwiyvdahi,39,46 33 | fexlcxpigjvxyihgsxdrqsvuzgegjbpwearvrbqqgiiohoiuwdhllhnishihnloqbljgmmrbhvttwehjlikqtx,38,44 34 | ifpggwiwmellovxbdshvuzuhbggwqwujvovgotbcgrzrgmcydrtqjygrfjj,34,64 35 | zugadwpogrzrdijvfrrajsfsvpkzshgejlpjutaldyexsidngryeuhrkekmzmdnonxnpqikru,29,91 36 | utdwrgcunkunekhnvrjilfyuuodqfqnrlvdyfdzcnr,36,66 37 | qblphnktzcggynqtqitdftdqoaprwpihsatrzfneummocvdcbzrmlyxgmwld,41,35 38 | rhuqrfjgenghyjvkswalukgdsxdbynwttfmkrlwqhg,25,40 39 | towgqtqsryhsgylkbgwygpifjuupkumofdqbxtvumcpryci,39,97 40 | pkyxrgpehtmlj,42,24 41 | qfpmnwlctzppnjvvzgzbelaggukecjvievtqcaydyagilmqagqhquuvcqaffkrrtpstxdilbxhayxjgyjqujdnictvzag,37,18 42 | kxourznhhvzjcu,41,87 43 | osqtptreqcoytafxpcichaqj,33,58 44 | wyrjdiskjzqigxvxaynmkcyhqewhmuinxv,26,91 45 | cpkjyhsmuusjbvvndq,29,86 46 | neczyinbwvgkndvvxclptfxvdxpykdwyinrdhkwrodipkjlequyamcqvtqpsphmhkietemauomm,21,77 47 | tkmcolwenyqegxspnqcctfcfrgoozeoaatazxvvxgxkwiqfcgwdppwoxdnijwzeofip,37,62 48 | ejiwfdscwhtckcqric,31,46 49 | edfjbkdofthergsxfcunkkzsixfsozjnpsenrwmibyofjubbrxntzuvijelhbiyunlzkpynvsvejojcpyyilcaddibwwb,38,47 50 | teimvdgdbklpkva,25,55 51 | hfculndbvilmlnylxifhfzkkfxojzygqlpkdkavioulisrikbpezkjwvfkjamkzhrgcgferhthl,20,79 52 | jhqyzpooamrfdxxglyajyefjnhpmzhzlhxvfixgibqxwjliflhxvipyscetb,26,40 53 | tolyyvkgxifwlxqueajjvjuietztxvmlll,25,22 54 | xbcjwmgjiyozdstkuealycopzieiqdzcxpsiuyhjrujislaywseletjpqxwgdihwhwpmtaajkjdceheqy,25,99 55 | dlzsrmwciwsadqctziixegfhgeprgitrgyteasdqtrmyvgnxaliwdwoqprmo,35,38 56 | irdxjksczmhdkfaubldoymnpsuhzjxulmzskfgtuvtfitwdfulazoobwwqbahgiiywhjoke,30,92 57 | gibiszcexbemcyycznzwinewvogihhfzajyilxkpziuybyewwgudtqblmqsnrrltcrhbnyzk,14,25 58 | oxbjhbytfbykujmzzynccaerkipiedvnlnascwvmknojdqrleoaludvly,26,58 59 | gvrvgobyjnttufktfsmmculrhhwtqhxojbdvdqgonvao,16,91 60 | zypwtxkbknqpcqlzjlxmefigbkuthkgcd,29,91 61 | nyeingfdontlyflupmnochhabnfhzdqgblcmiwrttvokodsxvlxnhzlp,26,5 62 | ldxlookswsgrgakznimlwuwkolqysnhqinjksgpbylqseogxhpuxoshwbmgopsklvranyqbujofcahhpfkbtfynwvznthwcqcet,24,91 63 | vnimmsukvzuzwkeuytogebyipmkvmynggrza,28,28 64 | oprwywholoowommcvzkbemdjfyzafissllmknqmlzgbyupaw,18,2 65 | lopvatoiwztsm,22,18 66 | vbkwwcemtrenzxarytcevigppaglpyrjgorq,30,4 67 | oyeyymeoovziujwnvbkucx,19,54 68 | mjyammwdvvvvtpfvizfm,24,23 69 | nfbjifaioxnakytvuwyzvkqrbehyurb,21,15 70 | uhaitphkoepksbggieaokjyvjilbwxgmaokgiiidhvkbuavvcunoq,23,15 71 | xvonqfvhppnykno,27,80 72 | jkydrfjckkodkgdudvcruniepiqvahahxxncwuzswcruigeawdrcupauwimxwydwoqpahwqwlkeebumxyfsmoe,44,43 73 | mpxzuytmewaeqixutlkwvgtwrhstefjmztpkinkjbsjvjhhdguuwzaczeuelowneoqehdlhftgewlggelrwbrjtbpxszw,33,85 74 | ckwgrhcjxhgfyanuwiemadkpknptbhsnjofgesnqtobuoxwllbewkeksalo,26,38 75 | libkfuolqbswbznsbxbssxbmwspyujppxq,28,21 76 | cucmycfdddyuibopuluyizlwccjxjcnbotesbhsruxekrnsxettwbfcccpnugmfbzkbrgwdaysink,16,28 77 | kbyzdeadkfkxijjdydjomompjkfdfmppotfmthakdgzyuswfwthatyqzlure,21,16 78 | kwspclnpaxmrfzjqwwb,25,56 79 | fkpflcpgtmbetixagxicmexgjrheirfubqlgvwwnmldjaqmyllh,20,53 80 | rousxxeftodldbig,30,6 81 | bccnrymjfklperugtjdzavxryoxirr,34,74 82 | hpoltmlmrjdzecmmuovapykzbdsjjivcecufmuebutduwcngvlpvolafvho,23,70 83 | ltbuitmzogliixhfcbdyoimzwpjpnnxpqxclwfatlnyxwllhhssrevhdsrtoptdf,30,76 84 | nghojnwggjopiyelujgessgjowtylxqvubbwbhztomhjzdwcbvacwvmhvjsocojvbrasdhxajs,37,92 85 | pmqpxabovpygvvlzcrtysaurs,18,71 86 | fjdvbbwajbngyxyxyrfbfxpkoelxhvrcebmminyhppdizkzqwzghsododdlvjsvkljyphhbmyyjzqjkyxcduzusgdi,24,87 87 | ibupkwerktlrmmpyarcmdloaazloqtsldgavil,16,68 88 | gitfesicsxuuzsvbxxwdqhkrejjhfcjthpedehozgjjwiytl,20,4 89 | yrqrzfobmlwjysvoaaegtfijri,28,39 90 | lnqpqdmjvemypvefpchvyhbrzmujrxvqzgdzsczck,32,27 91 | pyypdzvshwehagjafnzukpdaeraaibwrzyzalqxfpuumdbowetucizpvaqqdqndddhahixvjkzdediitdmacbgimv,24,50 92 | bwgrivstroyjaksumihghobyrnkbrrlflmgegsbtymjhmyoywgmpjjfjopmrrjzlxhwgikb,31,5 93 | uhnykdwwwewacvscvvjhyzbzrcbbxbrjsfjeucaygbvcqantuonquciajoyemlmmuqeefktziglakoytonblzlicggfgru,34,74 94 | swbpsmlruzykeedtayyrbmftqxpvqfrcxtfqzcxdhdvdtzrjbenodgssivhtqbrfsczumxkpbbuwntmojefcjclzobbsabrnej,20,4 95 | ijuzqegtjgetqwidvotdzjumdiccxihhctkofm,30,20 96 | evdgcnckpilhgdvulufvxuxlzvhqwrxbbfitqgjs,24,60 97 | xbvdhkytlfsnfhttkhjohrlmsyzthoguqrfvkdchdkvgbldpqpldtrmreoyjqtdnziuegsfhlfas,21,88 98 | zkjvkmkfxszcfcpqsqvdinrbpnojbd,24,61 99 | gzyufkffydyqjakkovvbkwyzrpuduugapmobtjfkxkh,34,17 100 | bvokvusjrjlnmpnnxknzskefenu,17,92 101 | gzxgwlhdxfakigxlduqujym,29,31 102 | kirtwwxpxethskdnftjroflgviklqingdwdjfsxlzwnx,34,94 103 | fnxurbybgloqredqigzxmeohsgsotsgktytkxhpsaebdgqxouqn,19,55 104 | zuifqqkviuvpfu,28,38 105 | ombmrkpiarhuhlloiajbtrfpsxiwoljtetxtigijclgtdhlomtlortpnhkvfjbjrqpaq,22,50 106 | owfzczbxfqnkpkvyisihxtqcmk,27,59 107 | vwsqrbontzspgavubavglwxw,22,6 108 | ulompvdvvlxdvkvpfyovlsnioxdmwlldtoivfcqsdiwaqezusylxyimwwtlsyxkzezdqupixkgpgfxqsctzsscgbsjvoyevwhg,30,19 109 | xugikcvscpmugnefaqptnochndscujgagxewylpnej,20,56 110 | tkdkabmlnnnjhfidfhqqpksdjfkndnoaozdhfnrsmkycasadxmdxazxjhyzjhlyhiftwygkbtpfzjxvcgsa,27,88 111 | ogzhwqqonsvlaoeijtklyixsbkkaqcvswodmaurxtwjvougrudjzygbsbpljbgiosznlsd,23,38 112 | kvsduennklksugupxtyydfmpxpnarbdagmnojyrugqjozlaswhupuxuhdsoczloeiltfxjflwjdxfuq,25,69 113 | swkedlolsuxahtxkqmsbzznjpuwkivheombujozlzydjz,22,41 114 | mumiokyvlbyuiagbhlyviywlrmihyylwm,28,25 115 | avkyydcxrxcyyjdhykyqympzixwkxbrdijgeeeogpywcildzujkzginkhyskypatumxqpsic,23,99 116 | bzuomsfsjdsgkwljfrmvvuqlfgymeijhoxrroryoweggadordycjfqavwxolcwbtysgmdwofqwtfdrjiastpjkkcelsddkidp,26,45 117 | igcvclhukiftoaczfxoqqbowecewfwxycnoquqfzdyorvdcgdvrlnfjpxvoswdksvrjsxkmdjb,29,96 118 | nbxmvpviuadznmnmjtgpstsqwrugzzkrksaglsmmxqefwhuvurgaejdkltxpfshcnnvbscltlivrntrvijlrtrmmwgfmuwm,28,44 119 | eiavwxfipxyseselqxevoqtoufoqsapaqjupwgbmijhxfqskofyfwzsoqljto,30,42 120 | lecetrlgmmzppvxzupvrmqmvagtqxvfbuajmactctoojqdumkpnptuutqehkubarhctynoxfauepolcejxuluzn,27,91 121 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00007: -------------------------------------------------------------------------------- 1 | cuithkyytiwidhgbobbjsogrnfkvxvyeytmgumqlmorafzioxjevadombpkhza,225,92 2 | czwbdyadiwyheohhwavwceoiybycdgowjwcmxpufmgzmqhqnqajxlwsjodmkbxwvnixlrnvqzhq,155,8 3 | urkmlwkhrnqcctjrnxjekebostnuendvrzzdpcewnbsgzqz,140,56 4 | tggikcuwjjzvibggitsbbtezkcxycrlsthgpuzclilrfsgzxyqbfluszkthixushgambeiyfudrwogsvmunkxrghwerwsujbsdc,103,8 5 | ncbmhzsswcqjfxlbdllwjcbisuwyqfdgbsvdgxcbloiwgmvtwucjwuohrcgsu,107,16 6 | sjosgyanscncvlhedgwtuouwmihwnqxwgudwzelrjwnpllkdiipzn,88,96 7 | bhzpxyhwayknmnqmdoxqzbiemelfingfekjco,83,61 8 | ntwwwjsxcrieezqfrcdvliuzhhceghghlwhdxfpnnggunezkiqfylefwfltfkzzsekigvlcybdldqsksjdbaapvdnhccjzkf,69,54 9 | cotgaoikqwrmdvdacdwpzyhkrxlvosllgdmhrdeyxzqrmznvyswhwlwpdxdovtmpxooupmyfmilinhmcivynzmpmmd,71,39 10 | uhavjdppbeluxbmtvvtrlkivozbicjoeonvlj,68,16 11 | ehqvpglnuphqrzatnmbwhvkppgbperroygrxhraoyshpnigzixneiyagxfkvfrewoddzxwcbfx,52,16 12 | vpdmxpyuohrjgybeyvoblfwhxotwibyzoaiasbsjnhnomrhwrrnziqqwwckfuqujsvhve,55,63 13 | bpyletjakycvtmcnhjgbqkwxyvwwvazxbucqdppabkfyncgfqgblgshoxciaiwfzgau,68,90 14 | xspqlghpzuyzevxle,56,51 15 | mshyqcpqqkyun,56,77 16 | lcmxtyzncdrrqnwwbfpfzpbpgvwcajlgvikmwowkgrahtjdsggsxkrvrqhoyujynopyluemjxgzbmnvhhnwoxxasefbsvhpitre,49,19 17 | dwkevbgwnjafqcxpulcpbyvpmlhegyocndkmnnbnueruthyl,48,25 18 | sehpqemzpqmmaikoqpyhvgujjeodlshopmmapjqubqf,53,62 19 | iqxezvsoolspcuteafnbmdhaftvcieszamkmgrggvldahoiwndlsvpwvaigujufmhmdhyxeedlobzfzdcsa,62,7 20 | wnmdfvgqxionggfiyatzcezzmvulvpmydeuvpehzoqtakcejiocqkeyyoiuvxzbgcauqjqwolkedgdowudy,41,85 21 | hylxcpwjzyqquiptbryzakehthg,36,31 22 | arneiaykbaccztmcyqnzestpfmhkkzvsiycipq,49,83 23 | bsgzzmfwtzhnwhlkqwdkbudiadmvhrpstpzawzdpnrhhomnizwmkbbuxwxdoxdqmwpwc,37,84 24 | xkvlievixslxhwqgfopepwoiebnuyrdcdrrkyinpycgpwuibnkongnewndwmbypiunsugrwyaqbbybmrkmavtbgpwmjytbjfxxtr,40,26 25 | qtirbatbzkwtbckiajkrvodlbtewkkduinhhyhwpakjytceldjqtsxbgvamrrbimwlu,37,99 26 | akypwmcwxv,40,16 27 | vztthiwmvgyrujz,33,68 28 | bhkvdnhwmuwjwjyxrrjqf,44,34 29 | fesgcybayzdacxidnfuuueujgoqhnjvvjnwlefrtbgbhxijvdyxiyrcqdtabid,34,88 30 | zyeustbhaqitoymoqdyinpnkfgejzknjpmxbdpcwiufdfkhtusxbdkoonwwftlcexdpecvbtmungticoypndcemgvyeogfjkgvuv,37,24 31 | acmdqjhawhmumgfecxubcemsftkanoucuaktnhmmtqxiquwzimnhtrsdlsctdbfwyjbysfomxzcyimvwtdgxgwcueqaqhqgaac,44,39 32 | pswmyionopgbmypivjwxrvonlhmfkxeujykjrkzzajwzcqorktslvwcyt,38,5 33 | jbsczinsgigcusorhvvygukspcwdtfurymcnxtusizgvphcuqwotvgldezzbsaqefpuvwiaxlrmwagujprddagvwkmqfyw,34,74 34 | txmsgdkosgchzizuibxmgdezfswbjirxnoajrxyefearabxybvf,42,93 35 | zywrhncahgecesmwrzwrwlnkoihbnyeeqvomrvcpzt,43,74 36 | xxulfsvijzfnxythwtweodnagcxavwfuiymvegfvqvwdetmljrerngjjiontbex,37,44 37 | lpmahuvvvucexznocggbccsbfkocszhdcqcteudomobpzyurwpxntzvilwmbfwkroodsv,45,33 38 | vvvcmvwiexttygacwxgjsssqyoemzmuixtkyaavsyfgmpjpjahtrnidzujkjwvjrvnxoix,44,89 39 | idikkpjirofzeeyzkmregkoqvd,47,5 40 | aguhmrrzqktjvednsalrrmoxcsemywgiltwxn,32,55 41 | uppqvlziasgmmpmtxfmyuls,45,27 42 | cltzcpgatgfhfmmkuzfxnoncogssu,36,82 43 | rxbmgwmulgmrqzhpadywojbltaqvhtksxdvvluogdnzayjzgflxpkomcmscdlnfgdicjpfuxneqcnnjtidwczfcrxhnkdzu,37,5 44 | zfdwvgtdvftajlqoncuytuhrylkiiicyd,32,77 45 | gbmeirbhajwbijsdexbvof,22,93 46 | tagdoknawbzkfxgjqhcrrnhrvrjkwtmfchalkpapsdewfhx,25,54 47 | foiqoroqsgjshmrdgovbtikyztzsmimyrugcxa,23,20 48 | rjjtwbcpnjrcocduyytxwhy,20,86 49 | mqeadihelmlaheyrmkkzwivtoeevbevmxxrx,25,93 50 | rttwsretdfzzmdashsacsazbvyodqdxthqzmf,26,47 51 | dnfehxocjndwflhnsqdakrkkyyvminclkzlhh,22,40 52 | vmhozxyhqvtzlunpqlumtkbteaxivizsmusveufsokmxshkaqkpubxuxafjidhhelnfabxbpdekqrapxro,34,2 53 | kgqpijsfrprtjslirixwaeduynrefmrttsffnexjfxniejswetrfou,24,5 54 | augckizztkmnqissfowfgrokxlgracvdkhdh,24,71 55 | xsvmympywnpyxmuhwoprjdcllcbvrsftrkyqheoygmkcrwzmmmadkpfqzqiebwzvihkmd,21,64 56 | qkuqvlksiyklicbkddmxsammbqysbjupbppliaizumpjhhfaosoublesfugloaigyzmqlnnahbnvisfvxhsbbt,18,6 57 | ejgaoinsogukuwfxetdovsdubtaczqskpfw,29,2 58 | zhcwfwrufzeklafmx,29,21 59 | nlcgjlbozowxotgoptf,29,19 60 | gchzrgbktmbxlsuqokvmhfjnksfyxcfmgqeqxlbqrubjydmoymtklwrwrjhwdhrlogjctkp,26,63 61 | qnatfqalbkigzbakzqrwtefdntyaapp,26,74 62 | qsbwbjeherxfrwqpczeuhkdcjroqsxrwtldzhslhpgrqivzwvuzpbpxytwdnesvrtoxkv,25,37 63 | ehtaqntrxayzbwgqpodupugmpxelmalbvygfwnovslxoalsbksiuybwuujudgftrurdxcwgvpfpnshidpbymefquc,21,81 64 | mxbsnisseeoruoxkanozuvjxmeltmfkprewmimetpcps,20,70 65 | jmkvrcyrdtbhglwmlivhnsvescdymvsvtldcwfdqoglsm,27,4 66 | rqsidxtyiufobthnqtllhsvmpytkkoacaciybjlegbilacxrkyooxbaskobmqwxlzhwbgqvusppgyklxg,31,95 67 | vahboiraqxoajpzozsnnevhyvykmccfrxbgumdcswvvqvrpjyouytpwwbvjpn,26,90 68 | urpulvciahfyrwv,26,84 69 | azmhbkhlitvuqymqfzeerfxvemjuxtytllnogkvit,25,28 70 | ctusfuhknoswziewdybfzkmjmlqkrjimchtzokjrcbybyvploxrowijkot,27,39 71 | ztcqmpfdigbqncqxpsvsbhodhgkpct,25,80 72 | hjpgcedpersrvplwynjujygpbyoeyzemebuj,28,76 73 | agxdshsoyejtlvsznltwhxsommsybugfgwhpsmbdhgphywcvfaemhskffsyixlqeazvxlmyhitfhlobaok,25,88 74 | mowckaydfrhlguatoatl,20,1 75 | qoirkypjyjd,24,38 76 | gqajrfphigtqbfpjyvbrubmkkekkaludddmwnabvobnknzbksdlicyqohqde,29,10 77 | zabrmyxxfqbttuclpsyooojrofahthaopkalfudzatvlzrvryriisbdwbszfrpitndrdtovkjkaolxa,20,99 78 | xntxlvwapkjmcnimteh,22,85 79 | naeakmwwaryffmxlnfpsddfnlvaeawnmbx,27,97 80 | atardodvrratseiaqbasgpjtokceqtlcyqubjkjxomgwprhbwbqecfbolbhxffqjtxpmzlqv,19,18 81 | alcfjplmboxcigxhvuotezlh,35,25 82 | smnshqfpqxcevrszwlnaeocuasqxksxloapyreqnr,28,59 83 | mgvuyjbebzpaimwvaappdkdkrqlgdvsbaonnksopqwqnlyqqcaalqthifkrktrzecqowojazhhqimdsy,26,61 84 | xdesdwervdiztqdzephjokdagqotbesxvq,35,68 85 | pgnxahqifryksbnvnwvjjhsblhlzquhepyfmfozexiaqqrwtrmryacoexjjzgaahkodwfnznndeljpx,27,82 86 | shpyubsriyezmwkpizaatrnsevbqysawxbeqqbnqymlvmomsutigcmctfvpv,23,80 87 | ovwqgxmxiolbgqrxgdhyirxwhojoxwvxqghhljzejcxfctuzwwhsszwwcjpabpv,21,89 88 | zbmeyihmvnzlkycldm,27,30 89 | zrrfkhqyxsziposgxxkbodepwymfefovvemrzseskrnupxufzojwjiupnmqfnwlqwybrlfhklcw,30,26 90 | inxcpnetasjthmwufiqwftchnzybggptovg,20,58 91 | izxalrfvbwjlbgmsxumznwdxsorpsqxnoabfgythmbtc,28,21 92 | ijzpkumkowdrdxrrupdrhpfkxywpivldyxcooqtjizpphdamwztkzav,30,8 93 | qexfatxccmvozswzvescjxpsvpapzwbklnwgcxdttcijcdgdvcfslxxsmgwmpbklwvvbfnhjixebvlyqofuk,23,27 94 | zfdwtcpbqb,18,7 95 | ejexxlydyqyzmlotjndkgiprvuaksedpvlmbryaelfsjomilqnuqcglgihylqdykuyhpmimwofcibjxapcpqkwsvfsfzbaowkq,22,76 96 | hmjvogjbyhfyfxdssprpzjnwderzpyuymjjyoyuxlhwrdosfabduzda,25,60 97 | cfrwqrvwdpuotibvfrcgbkzprjofenjidkzptekybeaiyzjtdymvplk,24,89 98 | kvsyouyixoqsryfdptrhfzvxtlzqtnqapzws,25,40 99 | xkhhvwfrfdqolxgywlslcjssiitflnlhtwy,24,69 100 | pepkxgpmypsnjepkipazugwyevlumpulkkrdofpiwqrjcbgzdsjsjkoeicbtykpjrihbiamikefwysuyku,27,88 101 | ccynlxqpyqxpbpaqcyauiksjmewcpfwfqutbpiwsneqdcmyogayjkjiveyzwbudwljqwj,32,11 102 | qrwxjhhgnqskuxbjzlbmnzgiaraivdwqifghvqzpkagrwcoseeseypfengysdicxzsvunryavwezysfhkxmiyinj,29,69 103 | sngayrtbgascsiokxuglkzjrlalagxssoeojtgfqaztpkspoztywjjpgbriidzcicqooechnhuhykxardtnbjnpiix,25,30 104 | waqqrnjfrwmsrtvmkkuhicsfuimcmdbrfzuyukzntzemerwmywlzkcwyvqvcjlbeahpfxbjqbxxfuwicvs,24,84 105 | utixkpjtecilvefwuysetnvufuligaaespxubbnlbzsygakbhbgeuzqcncroygicvorqjaubamidpeqqmmgqjyycxtyxpirwsp,23,63 106 | jyrzlwlhxjmtvsidlvjfnmxxyxhnhpjamthqywkjsapqacuibunuqcdufdiv,26,68 107 | wxgnbyczeujotljfqvmzizfgtuknpzmcbqlybqvzitwa,26,11 108 | mqmmawfbxaddnhnenwwbinuyevmqeitumgqkapxtltuuajrfmcupwosfwryrobls,28,88 109 | aeoqxcdadhreqbarnfjxybfylkjtovkepyilgdzbarmbtihyvbasfxanhriafouhejkcrcnnyqwqswgoysmpmvzugumwzswynzzi,22,22 110 | qvxkdswzmilpmreleb,28,68 111 | ttdewgtrrjmszuqgaxaitzhlefucmskqfrpfxgixg,28,54 112 | vvyqtukrdhvuuqu,32,67 113 | ckeykxzrxydbabttkodtockq,26,48 114 | rpdcxplebaecwykkwrsrkjpissqrfkdrceuwqnhcxhnyiktbhpzkut,29,30 115 | hdnnlgklxfjqefkccazpwlxbvibdwlczuwsyplhbpc,31,20 116 | jnqwtprwzfluhiztqnjqbvuyoucphsgmlubtloaxkeqtk,19,81 117 | qtbtzqjjnvuoldkdy,32,6 118 | jowrqnqujnwsqkggomnntwsjjnmnxfeaovpsxiycmqaynsbifvtxvazsebvaoqhxsiaqvyfvgqqvyoqlibtpxttbdaqwnmoooir,31,33 119 | zdvxdedipnfeyigozeoxxawlgnuksnqskxdtdyzobdbemuadflfzupzlvgyppzczlvycda,28,25 120 | lccjbceyzpihbvgyytnjgoizrnndnsprdnfhhgztuuvngivvxqaiipigrohrlwnkqkfcjsaawjazpjxnodmiyutcyamssrxb,29,88 121 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00008: -------------------------------------------------------------------------------- 1 | ecfuzdmqkvqktydvisaknmovbbsltqoufihtspydkagsujqcevtguvzsmwrzttmxqxqsxgollffmtchfxoqkujigsopmya,237,74 2 | dagtwwybivyiuxmkhbxkkpvybycyionubrxfqxjdmpsjraoadnzdhhftvklbfps,177,74 3 | emucailxlqlqazqrupsaphhmsgqifscofkyhybngvcbxda,134,89 4 | nzaxnvjaqxapdjnzbbvisvpmfwklhzqjgexhnqzvinldvfjlrifgapzzhmactlpyfvoyhxdnzngsrcokqhaozsb,119,68 5 | czoqdnkqnonnkmjlzfsntboumupseoahz,85,54 6 | csczwwuhzppmmagirffgkrqfmbptywyatsn,76,10 7 | rxxdcfzivveqiicvhbibtagagzcijtbxeygytznbxlxeyopeqvqcnialilmmtnoijqgmlxymdqzhgoryjkluyuresnv,85,75 8 | bgtfepelqqceryiwutvdetglxsystjvepyngbzpggxrjxjlxahundkahyjbhojjwmhspfzhmazihmlqlvafsibri,88,7 9 | nirxfdnojzdnpmsmixzgdlypkmziuybomkvmlmn,84,17 10 | tusmkbrqtcfxihxnuspukuswzkgtlqljdfxhkxqyrneqkqb,68,41 11 | drdyhgkabnr,61,9 12 | aoamzircowhjogwirncjnsxcdruftktlnfhxbwxwpglrycfbulqskfwhddmlwtvngemtepdwcdgd,61,17 13 | cdqbezipxeqrdckcbnscbzeriybmfcdkfpdodopxrftwanflnstjujixxxppkjtopphjalljroomknulhqp,70,53 14 | atbhmgxqoiokajrupfwibcohqyulhcccvedenuckabgxtqokxojmdnbughakzeezkjiiq,56,59 15 | xiqhsgokuydxezhgglqowliufbogeqyyijiiniwmirrgrodnyaf,38,33 16 | esstmookugkksfhctkephbxr,54,30 17 | rzxwewfgbcfrkqudwffwnuxeojqycloawyqclvegvuuhbqcmqzbmliksqntgcihizjcluvqjhxsq,62,42 18 | mqoxcmmlsbmxedrzhtktuayutwrwjcxyqktnwxp,58,75 19 | zqaqfovcbpxcssqognxoucdwtcascuhjapbneabqkdclohlvxytaoztngwsyeaenlavlgpme,59,72 20 | rnhdvsoyhvfxmfds,51,88 21 | rrpcutrxevevkrupoavmbktyprmnobzwrdgwqdxvfqqaeqflrtomornvdmbx,59,38 22 | sgkxpajwrzlfhrmqbnbnjmhbuswvayizqttntjomkfstnufnwonslfjfwskphwbkptbldtlanfqkcjitpxbxcdjefxr,54,56 23 | uexuhkcicyfirfciqnvlmczosqlshtdwplmkfxplffakpdxrenubwicglwlcvjdiwdxpmiemnpy,40,39 24 | ilgwvrerjhavnuq,45,92 25 | edgigyawwrkdxapnwvuweyewgicxbbwvkslhsyligkrmmlqlefysujwekfgrltqmxynqkrvcwstlxuyhrivadqxplyrboagpgol,36,87 26 | xvlilprffrfxizobby,50,52 27 | tgjlutgiauwshclqbtyjgetjqbhdwuxvx,44,4 28 | tnerxrfgtcvtxahlkqozdorsgfckjplvflcegncesclotgvsjxelqreompdhwmbeqcdprulryagtsqplqtrdnsnbln,52,42 29 | uuwqpajauxbmcbohvaewjzo,33,22 30 | isfxiijggamjiynmrjlgiklqciaonetojuajgjrxdsfnauvkykrdxpedosuprgjtjhqghvkjisecwajm,28,86 31 | dxlzbbmqbygbyispgwodpplacqdewbzyeaihdcmnackvlpnmdulylrsbexapeboejpnnwxarzqvh,46,61 32 | shldxwvstfzbnmlttotwzmvxrzudmrdijnmssdajjpgufwdbsskslgmpzfsucvvgkucphwayoxmoi,39,16 33 | njsneahmbtnquqkjpacajwxyhxwpwddwyjtvpmblsehoozdnlhohsehwspnmqxugnsjlkdgigopjypfzwsegdy,43,79 34 | bwwclqtuhnucpulivtgldmprqbenayhjejruhfssudkvdlxdznf,34,59 35 | lnzhyzvyvlrzzegnljxyqgrrkxysbfcgcikaoebdnyqjevtnovueyzclcycmfixhoxtzgknozcsvmfrpyvsjvkxthdpfkqoqc,44,30 36 | jnikmlfsvwfoplrdxuanzljvlonbux,36,21 37 | fyypresoyxqxxgnzmlaawbnifhfahwqgxelnynrbelcywxnvddaqnhfnphahstuwjrzrbynyamzlstctdiwgkoqaxvkjdvui,39,72 38 | vwwtpivdorwcseqcmrsvnwsrfkxlbagdcxbmswdzduskrcthiwxwqhjxoolnkwhejvwmljezibypm,41,19 39 | hxemjbadgkgpycdjmbohaxhzlrnrvahvsnokklurokrfebwyxgjurdiizhoqczprlkercldxmggndcocrqzrral,27,64 40 | sjcoqhalezffgajfqvmaxkpekujegkweabtaxrjphv,31,21 41 | nznryflmyyurhylozyoezf,31,8 42 | iuptmiexhlrimtm,35,81 43 | nmphiiyhsfxoufswtlbshiyfdvgslszbxbxeqnydqxzggophxwj,36,94 44 | qoundlzatgcrpfduhndbepofwizqjgzlxrqegvwzbgdyzrdegpiilupdmeyfrlmhfkpltvtrtocdsabhstylpcwlti,23,72 45 | hsndgnijrvdzaxhtcbdmasfanerkckmcydydamdamfvsebefsgzkfdv,26,93 46 | bkswsfiqyzaxanjnjbdkiksrlcspmtjjctqedkaoxdhtmleaepxtpmwdxdkhjzitwhtkqhupifiotnhwcyggfzhvqxfbv,26,9 47 | kjxfldpcrqfpylvdzo,36,33 48 | lyucblamnpmvmanzlbommmqtcnzumnxexfvfku,30,75 49 | okknrvflsqow,27,3 50 | juphyjzdqyzkzcbtfilzbkivcvirnmjihnouymcvxhamioisqognvdowatfdwjjfhbutuoteyjynkndp,25,44 51 | qpmhtbpwjsribswrtlnkokflxelbkqpvjggbaedauzwgskojewksxouqicjepe,33,92 52 | qqrxahkzukemuszenedzhjlilplixbpqdevilaveasz,29,14 53 | ooqguqrqocqtvgludncurpqloqvemqudbadyoauhqkdylhhlrdxuzhtimxmumahothiugnqoiwlp,25,61 54 | uftctlmpzxreammiospb,26,47 55 | egvkjxkladfvossfikauigdtxtoepnthxavmqozjnnigetmhekzkjbcgaqsdokfoyucb,35,91 56 | qmuxsxaibpqdhgphgwbdkwnfqmzjueggvllzsgcmphasmotlalxwucuhhpgcoiuthdu,33,28 57 | ubrrijzgpqhgqnvgfnkviejmifxnotrhuzyaiwwmtnzoaujnzk,30,75 58 | ejmesjgncrndsuuybeqdtluakdycfgnvnbfkpurnlqcqelcwaexqxlppzsrchptvdokbznzboxcejrkomm,23,72 59 | ityiesahtzcngkyrjbwbfntorkbipyyiyrhcbxqlimxnqqrwmyhkggqrzegjglpvcqrvzyxgrxe,26,45 60 | plvltehkjpuotvmagxvsaipiiqrylmnz,28,96 61 | gurmosmvbrmdgpbwkcfrcwqrwyyusopbolwulppdielespeesueq,24,49 62 | qwmgmxjiqszudtrisynqyuxbvbbaqmtnpivliribziujsgcvxaumewciyebguftdhgxdbunewimagicjygqzvdzuwyruavp,21,46 63 | oyxwtpogqljgttbgqtgaaagzdyultwuhuzzmcxjyhuyjthq,26,92 64 | ozyhjhgdoevekoypwhagtbjobfruamjklynpdtbhibwfoodlipjrsnafhieekhdiohtvwbekipgxygzwjxxaiopjza,26,45 65 | hbqrhjzhtehifdhcex,27,81 66 | zgcloiujqkkbpqpnvsjufmohgdifefxqvmvclddmyoefzompqfvbfughagrseltkriuoqpenxhvjnidscvaunn,19,55 67 | fztyrxoqdipapatgquqqomicvboafyehgmcfdmrxvrgkojfktxqn,26,57 68 | vihjioqscpauyyteasanynvvgfztiuqeegwwyniergqurocltthxcsyfcubzzotchgq,28,77 69 | uddqawryrdwralkmlqwkboiywycnbeurcsfjigientmdcsoipbymwvmblkle,21,91 70 | rjbjyczfpo,21,3 71 | wkbigmphilihyclqmddngojanezsrufgjbduysrvqbzqcooisnxhhovdejbccclg,22,61 72 | kkiofsfptbdkfpcsjtmcoljqyqievajuwnainbwypvppdwjagjbcnccydu,26,24 73 | mslferrsfdwfesiqdrcbxmqatmyycymdsqawf,27,88 74 | boctddyjucwnfpbbpclmkvygdfimowthrlbzggvxp,27,14 75 | sdwnjmdfbzpsxpvzpdgrqbkpwxbhuuqrpesbeikpfjejllscacoukstongmnfaijvyhfgfveh,14,21 76 | zfwcqirarwrifztqvjimdfgnvqqscbteigastwypftcsifdffxtvykmillemecttnypwvlfvuxhvzcstrpxuorv,33,85 77 | lsdldmstiwziunenckchdkzprixbkxopetspl,26,36 78 | ygjarpwwxtpglimoabhdrobflttmqyghmgfwufmxmaeewndnaniucxzedplituwnareyqcpcxzmjmshrnayqrwpcfhhxjrimeaj,26,71 79 | qkwumwhdurgnflpqbhixgrfsqaeabshffishmxohe,28,85 80 | bummftyztpqonbinpezqsvgrssrwthvvjporxrjxzfqcqkvmyejfoasvkssvddgiegwkxrnjhquxgyw,19,79 81 | yeaorkxovnrmavjptemfipcyxompbib,28,73 82 | ciybroztzdrzcwarxgoettvvbvuizuurzhgsjposshzi,28,17 83 | czdmqxnmvcsvrnfefmkuqyjenbrizjljkiqnulwohcglqpxwwxsxoxjbpcnnroaqbykveffxfdruxdbkdt,29,97 84 | ohzugpcocxskrkogbwafmmvfrliqdbfcjifnygyvdzotl,37,3 85 | qqjwttcsfmcijqqvzaiuvlcbqzkhpmvizlmkanainxzarkprzotkplkvtejgtwyxrunudyszfkxspztkedisulbafckelhjmg,36,83 86 | zjvempfilvykjhbnzmguhkpvplurdssrujmzkwcbypdcuvnjrovixtedoywjfdve,25,17 87 | jpqqdwjsubetihqwktasoumjlacktnghtzkzswcmifayuaynzbtmejdbmfzlbnpmabmqufzbijzpmclkpqtyjwptjdve,27,96 88 | jjyjwdftspfvuqsgafyqtuxplczytiktkxfsdpsvzrkrwbueonfunbjnckgmidyetxjjfcrciwoxlyvp,23,35 89 | uhkkjejodktptyijjjixqebztrupmgilamakapyn,23,44 90 | vlioveugwllcysuehatlgpvuycrommwlbxzbuiibkazbozo,36,25 91 | pnvjxchkrkwgjpzmszoezuekfhnrcwybekuorldwultqdjxtvgsvpscczqcjnaskhwcd,24,55 92 | xnfpulsktpugoewfvmhkvhstutlnnsofgxfkhgrmlzbffnuqq,24,89 93 | qoxbapijuwafvdvysriokpynwbsuggscjbhikkgkgcbcvfhwbkjrvalifhgponhgh,22,15 94 | ghzwcsiuyjuckvftfvwoneveslyafuetymzfxskgyyxkzraxuzpaogjselgcfuudpexpizwcvllnebszvt,28,68 95 | mhopdokaejwphihgkuizaoctkdvzaipttnrmuwaowrbvosojntonugaqdgnbzrdlcqkrnyzodhvhxnyjffbibaqseflbqlzaukr,19,27 96 | jtedezbstioggrzcndnhqlyxenublkdorzufcarj,22,98 97 | spqwgyhjllngzpjaouhaxyfyichpcojqkbdycjvgafchjegvtkfrduejjreqskmduygdxqjwwtgzmhdnioemyu,24,89 98 | zofdglughshgqqhupdqekdqjrmyfuhpgtmcbcwxiv,29,56 99 | fpluyzcalprrwvxzwuktpwtftoqvphzruyfbteqprcfgseprtkiqbgopppazuheedmjektuazhbdwquqtwjfmacrkrnj,30,59 100 | bohljurxxbatchpqjaraxfuykasvvrhygegcgovmzmdkktippdwsmauhfkubzqaedjiwhvwqcdcihnmwuumhcwpmj,27,69 101 | nrcigkhfjajmoznzbs,35,86 102 | efwokyhhoyzrtnmuvuosomtzalgbwhkfklrfsiacattutdtkcalntljbffbfvmfseswtgygqvaygoiwwbstotqb,20,25 103 | peotthkuaexjtuipjwawonmm,30,27 104 | ymrbdclhgoqcdphrcadolvvuqmxcggmlcnjvwtrlblohotifhovocokkpydcztjsfnowqoxlrdmpmlzzxexkgrxszvy,15,35 105 | jvpjtsrziuxnfekiwcldqsf,20,47 106 | zeipuglkjmsicyiukdjhswqdrazwhpjpwuzzsacgrzoehtcvkdnymhonnocnaacxhzsojynqlqo,20,88 107 | lbzxirnnthjodqbnlgsspqsja,28,33 108 | rrwpeuczhqmijuobdsgwwiqrslwjmttqqqcqlvdoeggxasdpengdbpnotjqycyofmeskdilcfohcsuxvtpwjtbbmtbgow,30,24 109 | wqhujnxjrqxmovjpxxpcuatwtoovebrekddhn,23,55 110 | vamlataeexvlctwyxby,17,85 111 | hyjutsioxovbg,26,18 112 | etayifympvnucvbmcvnqakudfrdoikncqlglprowsjkztwkvengqqlonvwcbsyjkurhgrrjpmfdkwrdrknunqjlrfqyun,27,45 113 | arsyyzoypwypkhkwghrmqkvzekcpzpkxnrqjtne,24,49 114 | hfvzndmjdcixwczythgpbskf,29,50 115 | toyahapbfkhpntbdlqibnnxhvpihiryprybog,25,4 116 | polwytlsjiytcmtxbqewqhewcxlhvvvtgqmwmlwthzveowinmmuyipfdd,25,48 117 | rnyevrnmqordkpzponmongmlknosyyvxdulsl,25,47 118 | aqgpmcxahvygx,30,10 119 | alwlcjtsuu,29,1 120 | lttxwrcaioxtlgizpcsthongedooqwuqf,28,8 121 | -------------------------------------------------------------------------------- /examples/amplab3/data/rankings/part-00009: -------------------------------------------------------------------------------- 1 | urnbqjjdtrqglbookyfekylrnysaootwjyrmdtaxfuwbwezuluctymtvohiiauqaxubxrjsbgeuoedocbs,207,28 2 | xmsrinfbjpgycjjdogjykstqgwnnhuirsjsvzxiaxumxmvmihzpliftrpjfyxxlu,146,70 3 | rpreurxlhltbwbrymhfdphsibqzimpndhhtunugwthcdkmkverzxwdcga,114,88 4 | kpvhodkqxlzvcuadnbqmsarcymozyixvaihjpukfaplhtaqetbhmfsmnycapqaymusvednugc,103,91 5 | vlkaennnijrdztzuvsevrdzfhitfsrxunttjttipdnnmmqxyenttlohvsoz,79,41 6 | iwezajcvhieflfetnogaliideuwohcgpxdhquqximevmqwhtprjsefibwnpqgmltwepmseaiz,86,74 7 | ozqjfnpffnaiwqlinliudhgmgrtpdxfawxnlmelym,75,29 8 | zbalidyswoveyccapuegnikrgklktulheexeutxzgezm,69,50 9 | lncztavxdfzhnizyse,55,85 10 | wmnvheoblovmlnsktzpbqcxzwgsvndkwygzlwnkrjkhnkrcgalfh,63,65 11 | wykfplohmcwewbberaiekupgy,71,29 12 | ffpykxxdlnzyurjvkagfheejqfqftkcdpvcwkllk,58,57 13 | lpvqoegkjwfxxfpqowtufrwnbcdlrtisdjfgzfwiiptmhayxicwhcupghmnaki,58,70 14 | gefsrurekqjwbkpybmcbfoglvonkvqxtdpnksrzhneysdyidbr,57,34 15 | dpextiguedhbxtrbajfxvdmbryqmcjoegtnrgpecvvjvhhgtsyndvlgdiykbtyrrahibrlsbfobhzmqyvdaxaon,41,67 16 | ccotxrauqdfctsnsbeufgsbzkwndkfypvxfvv,46,94 17 | nyoufmpsivkdjuwvwqxavydoycjzjopwlqnvvjokmbvhfjzfzvbacmz,49,92 18 | anecuecqkkqirvjhvhvynzxdmdzlskjqbqxgfxbqrgqiuwixkjzprwqqqolkilozvynmtueeruocqvxngvugyyehgaq,52,38 19 | rphattbhselefytrwgcqdxpjgzodzywjsefpyijbrhhiwmpl,56,77 20 | zcvqsaekjzswowjfvnfdijdcpbzdbgqmzexhcgieqreppyv,59,80 21 | bbgoawzxlpycugbflaomqnaiqexpjcksajgjqrgcgoavroaksjlqhroyugvjlnksaxdvxssvnuvrtbtwpkfifuzeuftwue,59,18 22 | zojhvsamqdlmnimpcfwlmukisvshxavfnlhhmlzdmypxrrjdfkplwmjyzprznezyago,47,44 23 | jwcgdqayrzvrzwmiokzhqhvajpolhnhjmxvygwncrpv,37,70 24 | agxhqrrtzghceqnxzrzmsavfdmkkzcwnrrqxleet,44,90 25 | xzxhysrxdimazstaanxmxxsbxpuky,51,24 26 | wtcvbrdzayupunyvxhhbzghrxjlnnvbfxvaoswfig,27,47 27 | gdgvzkoqwylcweapybzlsgflkukmsjfyqnosmltmuhoavcnlwabrjnyoxmszrtrykiojksfgfdmlmniolqs,46,96 28 | vfkpuoennnfgwvetfjyrnzpwjslzwqranmtggbnyvpyelmuzoudiam,30,15 29 | nimldoffnozisyvltftimjiabgfcopblcbjaprowyofqsgywwecqjlemgfie,42,27 30 | dsubvmstvwvdwevzbixzyzevtjgloquodxwseqojlcefeakwhhwotmsdmcjfcdymnfhymhll,28,99 31 | mzxxmdthdhoaozrrdfphgrgbyokuiwvtcjxmmszahrfkldte,47,93 32 | inhikewnflpzymhlvleqgezlpvrfdytvlxuhqfzxcbidilkjdumztcsapaeu,26,81 33 | qtcnwzajhtruqtfizk,43,61 34 | ihriycldlxjfeiarfjnxpmpisbaobomggrmlaztzjrglgxddikwmzlfthetpcjnwtgpincvezshp,37,63 35 | wqlqkdssdddpvalyzavhczjpwlunmgvrv,51,62 36 | lshrphbrefmavzdapumehhyqssxqpocgcxccybufcmbbfsnpeabxcyitwfmimiekjuffg,38,92 37 | guhmssklrbsuzefkwjtmueakpoqsasfiqvfjqfdyilfxpdptappaxrowyalmwfhaaaqbzgncttuvjwx,33,53 38 | xbkobrcutssjxcxokwezhobfyefjjbyoixbknmdhynldcaziyhvlsowgbixhjyjcxzxcg,45,29 39 | rlrsbfedhgaqeaot,39,82 40 | qvfsvhxexdovinlufweyjqchdvffznpqkofshlqn,39,23 41 | luafgyseqxacmiyufbhzze,56,63 42 | zkdolnifbogfhabajd,42,83 43 | mtvfbounizmfpxyuuhmnrohhokibiisnkcmfhblzpylaufane,48,27 44 | pfibljmnsmqkgkqhgtwybskjdtyjohl,28,82 45 | wacwstfwqprtqlzwwtxldrknvkonj,26,89 46 | gunzoeasolkdofcavv,17,42 47 | cbojqcicobhyayipoclheltjpoqshantxzhdwpuayaeodeaecl,23,97 48 | vzxeqzbdrszpmiopdudbbfvb,24,10 49 | loqduevaxdlprohgztlrr,28,75 50 | rexhokifsnkpphvamxeaeiivtqohuxnfjwrblsrttjpuofzlnbvrhfsbzoavmsbxw,25,50 51 | bgnsomiulwmcdkahudehrsthihjypbaehmnpjubljtuer,32,36 52 | vekjbygetnqofwrtakosqzopbzorfalbrwbutqipmcsqdkqrrboqannnopgmuyebqbfcnmqgntabxihuyezlgwi,27,78 53 | bndceheogmuwdfzptjipsmkzgjopqarqaraghijfczzkkimpxhtpbbhapnrwjefnujzppuboyhrcqagzoqfrgzbbdxrgp,22,91 54 | drorwjpynhdxfdmtkpthqcpaccwuqdnxsqdhcvnflnpbhledesunrvgvafkprnnozrmcw,25,24 55 | yohxxbdcivrewetztqthrrnmtperkziwxnfphawvlquedygbeifwxlbvktlafyhdbii,24,15 56 | esavkjmdruvmjgrnwdoythwiomlelh,25,60 57 | piuucqffvdaahwtybhlgwsmdmqygusqpkqopolwpyybwgmtlunolbzmrfafywaqejsy,31,51 58 | ecnhpeldxatphzmiagmrtrmpcrmeravqhsmsvpwvfqasdizkgkjfgtfgqcvhkslpvlpwqsxztnjnkgofotrvfkmzwozal,27,16 59 | gympxqouggjbub,27,69 60 | kwhabptvgxsgnenteyneepby,21,94 61 | xobbkalpkqsojnrjjqbfugsdzujimtmobbxrxkvmygystmfdsgleltrmidoqmjqbqnwrqsqsfgifxgjpdzve,25,27 62 | dyryryofrulq,23,19 63 | nsjesjsnwslvrvdqxbgdekpioga,29,60 64 | tsxkngebmufrsargxcywcspfcqicjqxorslfsw,30,23 65 | pegyqgsulnmbrgpgvpibudbwewmpdc,21,50 66 | zootygnarotf,26,79 67 | ispqhskgxogebw,31,56 68 | hjkbjelyfljymyoclnhndiszjytiaodrejw,18,99 69 | xodjzkwloikhcfisnsfpomvksxwzm,25,3 70 | motgfumbpydniflpakujufyutsqibuprajwpanlpqbxoqvnsknyqmciqegxeagrjyflodevkqlagcb,26,9 71 | spovhausxghoczsmlzlcntgnrfwsaktoeceiruojxtgjugwrjrqzzqnqweyhcnafoxvwhoo,19,20 72 | reaybftthkjdlveedto,26,83 73 | lbufapnbqsrtfctgqknkudedk,38,86 74 | jatkkrzqukasmnjtwenysjsdmxqvznsvgruroybuwcpfbmcl,26,75 75 | mkmxuiuacpugqlwidcjqwvlovhaecwwpzwqujdycqpppxowfjxjjithvt,20,72 76 | qobxaprtscngvpswktzbkmjprrrzixngnggynalrnexoncecsczsipprzavidkds,23,55 77 | zjlactmmadxwnjztkedjfvuxwxucyotmulwuybshoyemgrodhtervfyftcpopvedpyrxebqnybuntfpwcwxy,28,26 78 | hsueviaosellsfwcqhtzqsvprzijkudteqbpmasuqchnz,26,13 79 | tyuwhbduykrpekhmasyyoaisimmqhnqmwuktoemaunnfdfnngbruamkysuahocaoewzrmi,23,18 80 | cxftmjfniev,25,93 81 | sgfzuauhxvfukfvbq,33,25 82 | jsdznluiflzhzaeviutmmeeaofakrpfpieykwcd,19,44 83 | evheusiampvcgmoqlcacoozzsrxhufbfxrtchieizfqrpnpqkuidqnugniqrxolotiolvjxtymmdiq,24,85 84 | wegjsgmqkhrykagvdlcofgzlushbxomgokncegnctatktypuyysomkjqpnnmxz,18,97 85 | bblykezvdshf,24,77 86 | eyomifqdmxgloeqpzkwrifecoizrz,23,3 87 | rsleqbunxrwokshcgghehonebzhufuzolzfkosztbxgydtnnvblfanvkoznironkdvswhehrmojtjsaspbausidgkklpm,34,81 88 | beodbwqfggfjkbhnmdqogmsrqcpmrxmlvwpconxxdxplrocczzmezhekdanwcwfsmgpqqyqkaadrimphtjddvatqacnjitswhnlm,34,18 89 | aynhtcdtcvcrhjvfwgugipzyaxgwdcotjllkvzgpdtzpvsoj,33,91 90 | onsnwqswvdvddkogsqiejoapwgteywkoyhdytvmembawvxewwzieplyilqzacongqdqmin,18,26 91 | voyrqbmyqsthebvhstxpojquncpivkognsjnlcguoyayfohhaygujo,25,53 92 | azxummpwyexfwzahtheyizjruprnwsizouqqmrmklctbecjvyvmve,28,48 93 | nrmlzbmowvliwumxiz,22,46 94 | lwavrqepxkhtdsxhcjfdxxnzlpxdubklkcdpil,26,86 95 | afdyyjtnsfzykuyjnbfofzlzbzwklytvovjsyyztrrvujvyzmieftzydoyjeyojawcicqsppppatco,22,73 96 | wjizvwspkszoktpwhtdxqvohprfjfdqcaot,33,79 97 | iqndbeftubzkdmlxfrlzsxcbaglppomgmbfcbocbvkyvtfniiyug,25,88 98 | znqevfrmopxmk,23,8 99 | glzxsfpuhilqtkrprsebbvvrpkowjstkngpzawgqnvwuhfndizbgjndduyxdbukrsxwiypyl,20,88 100 | siumgdhwjqtvfeqbimtctkbededx,35,30 101 | gdssftuicgsgbrhtbgncjtmsvlxbjrsoqfvqyxfvhfyybpstuhsbquhurezciodrsgftheqooslpszytzof,29,58 102 | hajbwruxofoaccpwilvokanghhyqzfnxhhbgyxqsblmpgpcmkfyama,22,60 103 | gkpkgoorvwfibzsueswkrilbnerammespamzjuvtzxssvuwotnoaklbusmeghvhtjxcatgkjpcaufvuofzwpqnygtjc,26,41 104 | zcikfozjbseeswukyprlqodjrokyvygmhipgmea,33,85 105 | jhqkqxfxeesbodjjalcbgbruqkmwzlfewsxnvgmixugrnkypofvdloazzrmtxdsqzfaumniiftizaori,29,98 106 | ohvyquqgkikprpswownkflzvujlmzkcauuvcnkwfkiygxgsnqfbflxvexjhrqkxucubwkxxvgyayexzsubdtwrjeagxdequhd,27,25 107 | dlsoqddaqlmdcdzjiouqmasmmmewzltlavugrdozoyk,22,69 108 | tcstiuokbbrpxpvrksfgelqretsjwrficlzmktphmbylm,22,21 109 | kfolefjlbhsupjnpozwuckmvwpfyxtoouwlccpevyjhqdlprfskxwknsluaptktouonjwnjarsyn,36,54 110 | ncuswrdkcathwjetwpevhpeylkardtmzrjjffznmeurrcedwuvrotysqgttennispndijohumpa,28,36 111 | erhzvdritnnbhczwlrppyfhklmnmjfhbeafaihzuverkthxddjhcvgkoehrkabmnm,29,29 112 | ftifmlvhntktvibnhurvltbkiokrxbjtcspxdvckfubdifppankvufezebhbthbyaghwehjtfufybmrqokauctocq,23,8 113 | otzbplwsnuprwcepuqdpghguxnotzc,23,46 114 | rmxgfqtyhvxmhcfofsneb,21,86 115 | hotuvxfphrjrilmurswhxlrarvwzlrrmpiugkyzcbhrfjequltiyvsmvvijgulwptopsusbnwhmoqqicydxygdmiqh,32,52 116 | sgmgeyfnciuhzztljgjnigkqvirknyrfrpdxtpylyinpowimpakvicxbprtgdzzzajwqlmhdlszvpdvnac,28,47 117 | lowsvdyyqtjozuolsorertagjuyaxu,28,22 118 | fhpbpmtpuemyvgulkkkctipjfjqrsnnejlksyasaozdutucoadrfdtzdzdavduuiu,28,76 119 | whnmrvxjwhczlwurcnjqimgaiuuxsumntycumfnvxxctbchmelgszpmtgrdbmgpgifgsncybwaaklysxkmovana,32,71 120 | kolqznrsnilljgygpwjyyatitxur,34,50 121 | -------------------------------------------------------------------------------- /examples/word_count/Makefile: -------------------------------------------------------------------------------- 1 | BUCKET = ${AWS_TEST_BUCKET} 2 | BIN_DIR = ./bin 3 | PROG_NAME = word_count 4 | 5 | .PHONY: all clean $(PROG_NAME) input_in_s3 6 | 7 | all: $(PROG_NAME) 8 | 9 | $(PROG_NAME): 10 | go build -o $(BIN_DIR)/$@ . 11 | 12 | input_in_s3: 13 | aws s3 cp ./metamorphosis.txt s3://${BUCKET} 14 | 15 | test_wc_local: $(PROG_NAME) 16 | $(BIN_DIR)/$(PROG_NAME) metamorphosis.txt 17 | 18 | test_wc_s3: $(PROG_NAME) input_in_s3 19 | $(BIN_DIR)/$(PROG_NAME) --out s3://${BUCKET}/ s3://${BUCKET}/metamorphosis.txt 20 | 21 | test_wc_lambda: $(PROG_NAME) input_in_s3 22 | $(BIN_DIR)/$(PROG_NAME) --lambda --out s3://${BUCKET}/ s3://${BUCKET}/metamorphosis.txt 23 | 24 | clean: 25 | find . -name "*.out" -print0 | xargs -0 rm 26 | rm -f $(BIN_DIR)/$(PROG_NAME) output* 27 | aws s3 rm s3://${BUCKET} --recursive 28 | -------------------------------------------------------------------------------- /examples/word_count/README.md: -------------------------------------------------------------------------------- 1 | # Word Count Example 2 | 3 | ## Benchmark Results 4 | 5 | | Benchmark | Job Execution Time | 6 | |----------------|--------------------| 7 | | test_wc_local | 170ms | 8 | | test_wc_s3 | 3.79sec | 9 | | test_wc_lambda | 3.92sec | 10 | -------------------------------------------------------------------------------- /examples/word_count/word_count.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "regexp" 6 | "strconv" 7 | "strings" 8 | 9 | "github.com/bcongdon/corral" 10 | ) 11 | 12 | type wordCount struct{} 13 | 14 | func (w wordCount) Map(key, value string, emitter corral.Emitter) { 15 | re := regexp.MustCompile("[^a-zA-Z0-9\\s]+") 16 | 17 | sanitized := strings.ToLower(re.ReplaceAllString(value, " ")) 18 | for _, word := range strings.Fields(sanitized) { 19 | if len(word) == 0 { 20 | continue 21 | } 22 | err := emitter.Emit(word, strconv.Itoa(1)) 23 | if err != nil { 24 | fmt.Println(err) 25 | } 26 | } 27 | } 28 | 29 | func (w wordCount) Reduce(key string, values corral.ValueIterator, emitter corral.Emitter) { 30 | count := 0 31 | for range values.Iter() { 32 | count++ 33 | } 34 | emitter.Emit(key, strconv.Itoa(count)) 35 | } 36 | 37 | func main() { 38 | job := corral.NewJob(wordCount{}, wordCount{}) 39 | 40 | options := []corral.Option{ 41 | corral.WithSplitSize(10 * 1024), 42 | corral.WithMapBinSize(10 * 1024), 43 | } 44 | 45 | driver := corral.NewDriver(job, options...) 46 | driver.Main() 47 | } 48 | -------------------------------------------------------------------------------- /executor.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | type executor interface { 4 | RunMapper(job *Job, jobNumber int, binID uint, inputSplits []inputSplit) error 5 | RunReducer(job *Job, jobNumber int, binID uint) error 6 | } 7 | 8 | type localExecutor struct{} 9 | 10 | func (localExecutor) RunMapper(job *Job, jobNumber int, binID uint, inputSplits []inputSplit) error { 11 | return job.runMapper(binID, inputSplits) 12 | } 13 | 14 | func (localExecutor) RunReducer(job *Job, jobNumber int, binID uint) error { 15 | return job.runReducer(binID) 16 | } 17 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/bcongdon/corral 2 | 3 | go 1.16 4 | 5 | require ( 6 | github.com/aws/aws-lambda-go v1.24.0 7 | github.com/aws/aws-sdk-go v1.38.45 8 | github.com/dustin/go-humanize v1.0.0 9 | github.com/hashicorp/golang-lru v0.5.4 10 | github.com/mattetti/filebuffer v1.0.1 11 | github.com/mattn/go-runewidth v0.0.12 // indirect 12 | github.com/sirupsen/logrus v1.8.1 13 | github.com/spf13/pflag v1.0.5 14 | github.com/spf13/viper v1.7.1 15 | github.com/stretchr/testify v1.7.0 16 | golang.org/x/sync v0.0.0-20210220032951-036812b2e83c 17 | gopkg.in/cheggaaa/pb.v1 v1.0.28 18 | ) 19 | -------------------------------------------------------------------------------- /img/word_count.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/bcongdon/corral/3b296bdfd98d3fbe84d83699ad305e660e786a63/img/word_count.gif -------------------------------------------------------------------------------- /internal/pkg/corfs/filesys.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "io" 5 | "strings" 6 | ) 7 | 8 | // FileSystemType is an identifier for supported FileSystems 9 | type FileSystemType int 10 | 11 | // Identifiers for supported FileSystemTypes 12 | const ( 13 | Local FileSystemType = iota 14 | S3 15 | ) 16 | 17 | // FileSystem provides the file backend for MapReduce jobs. 18 | // Input data is read from a file system. Intermediate and output data 19 | // is written to a file system. 20 | // This is abstracted to allow remote filesystems like S3 to be supported. 21 | type FileSystem interface { 22 | ListFiles(pathGlob string) ([]FileInfo, error) 23 | Stat(filePath string) (FileInfo, error) 24 | OpenReader(filePath string, startAt int64) (io.ReadCloser, error) 25 | OpenWriter(filePath string) (io.WriteCloser, error) 26 | Delete(filePath string) error 27 | Join(elem ...string) string 28 | Init() error 29 | } 30 | 31 | // FileInfo provides information about a file 32 | type FileInfo struct { 33 | Name string // file path 34 | Size int64 // file size in bytes 35 | } 36 | 37 | // InitFilesystem intializes a filesystem of the given type 38 | func InitFilesystem(fsType FileSystemType) FileSystem { 39 | var fs FileSystem 40 | switch fsType { 41 | case Local: 42 | fs = &LocalFileSystem{} 43 | case S3: 44 | fs = &S3FileSystem{} 45 | } 46 | 47 | fs.Init() 48 | return fs 49 | } 50 | 51 | // InferFilesystem initializes a filesystem by inferring its type from 52 | // a file address. 53 | // For example, locations starting with "s3://" will resolve to an S3 54 | // filesystem. 55 | func InferFilesystem(location string) FileSystem { 56 | var fs FileSystem 57 | if strings.HasPrefix(location, "s3://") { 58 | fs = &S3FileSystem{} 59 | } else { 60 | fs = &LocalFileSystem{} 61 | } 62 | 63 | fs.Init() 64 | return fs 65 | } 66 | -------------------------------------------------------------------------------- /internal/pkg/corfs/filesys_test.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestInitFilesystem(t *testing.T) { 10 | fs := InitFilesystem(S3) 11 | assert.NotNil(t, fs) 12 | assert.IsType(t, &S3FileSystem{}, fs) 13 | 14 | fs = InitFilesystem(Local) 15 | assert.NotNil(t, fs) 16 | assert.IsType(t, &LocalFileSystem{}, fs) 17 | } 18 | 19 | func TestInferFilesystem(t *testing.T) { 20 | fs := InferFilesystem("s3://foo/bar.txt") 21 | assert.NotNil(t, fs) 22 | assert.IsType(t, &S3FileSystem{}, fs) 23 | 24 | fs = InferFilesystem("./bar.txt") 25 | assert.NotNil(t, fs) 26 | assert.IsType(t, &LocalFileSystem{}, fs) 27 | } 28 | -------------------------------------------------------------------------------- /internal/pkg/corfs/local.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "io" 5 | "os" 6 | "path/filepath" 7 | 8 | log "github.com/sirupsen/logrus" 9 | ) 10 | 11 | // LocalFileSystem wraps "os" to provide access to the local filesystem. 12 | type LocalFileSystem struct{} 13 | 14 | func walkDir(dir string) []FileInfo { 15 | files := make([]FileInfo, 0) 16 | filepath.Walk(dir, func(path string, f os.FileInfo, err error) error { 17 | if err != nil { 18 | log.Error(err) 19 | return err 20 | } 21 | if f.IsDir() { 22 | return nil 23 | } 24 | files = append(files, FileInfo{ 25 | Name: path, 26 | Size: f.Size(), 27 | }) 28 | return nil 29 | }) 30 | 31 | return files 32 | } 33 | 34 | // ListFiles lists files that match pathGlob. 35 | func (l *LocalFileSystem) ListFiles(pathGlob string) ([]FileInfo, error) { 36 | globbedFiles, err := filepath.Glob(pathGlob) 37 | if err != nil { 38 | return nil, err 39 | } 40 | 41 | files := make([]FileInfo, 0) 42 | for _, fileName := range globbedFiles { 43 | fInfo, err := os.Stat(fileName) 44 | if err != nil { 45 | log.Error(err) 46 | continue 47 | } 48 | if !fInfo.IsDir() { 49 | files = append(files, FileInfo{ 50 | Name: fileName, 51 | Size: fInfo.Size(), 52 | }) 53 | } else { 54 | files = append(files, walkDir(fileName)...) 55 | } 56 | } 57 | 58 | return files, err 59 | } 60 | 61 | // OpenReader opens a reader to the file at filePath. The reader 62 | // is initially seeked to "startAt" bytes into the file. 63 | func (l *LocalFileSystem) OpenReader(filePath string, startAt int64) (io.ReadCloser, error) { 64 | file, err := os.OpenFile(filePath, os.O_RDONLY, 0600) 65 | if err != nil { 66 | return nil, err 67 | } 68 | _, err = file.Seek(startAt, io.SeekStart) 69 | return file, err 70 | } 71 | 72 | // OpenWriter opens a writer to the file at filePath. 73 | func (l *LocalFileSystem) OpenWriter(filePath string) (io.WriteCloser, error) { 74 | dir := filepath.Dir(filePath) 75 | 76 | // Create writer directory if necessary 77 | _, err := os.Stat(dir) 78 | if os.IsNotExist(err) { 79 | os.MkdirAll(dir, 0777) 80 | } 81 | 82 | return os.OpenFile(filePath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, 0600) 83 | } 84 | 85 | // Stat returns information about the file at filePath. 86 | func (l *LocalFileSystem) Stat(filePath string) (FileInfo, error) { 87 | fInfo, err := os.Stat(filePath) 88 | if err != nil { 89 | return FileInfo{}, err 90 | } 91 | return FileInfo{ 92 | Name: filePath, 93 | Size: fInfo.Size(), 94 | }, nil 95 | } 96 | 97 | // Init initializes the filesystem. 98 | func (l *LocalFileSystem) Init() error { 99 | return nil 100 | } 101 | 102 | // Join joins file path elements 103 | func (l *LocalFileSystem) Join(elem ...string) string { 104 | return filepath.Join(elem...) 105 | } 106 | 107 | // Delete deletes the file at filePath. 108 | func (l *LocalFileSystem) Delete(filePath string) error { 109 | return os.Remove(filePath) 110 | } 111 | -------------------------------------------------------------------------------- /internal/pkg/corfs/local_test.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "io/ioutil" 5 | "os" 6 | "path" 7 | "path/filepath" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestLocalImplementsFileSystem(t *testing.T) { 14 | backend := LocalFileSystem{} 15 | var fileSystem FileSystem 16 | fileSystem = &backend 17 | 18 | assert.NotNil(t, fileSystem) 19 | } 20 | 21 | func TestLocalListFiles(t *testing.T) { 22 | tmpdir, err := ioutil.TempDir("", "test") 23 | defer os.RemoveAll(tmpdir) 24 | assert.Nil(t, err) 25 | 26 | tmpFilePath := path.Join(tmpdir, "tmpfile") 27 | ioutil.WriteFile(tmpFilePath, []byte("foo"), 0777) 28 | 29 | fs := LocalFileSystem{} 30 | 31 | files, err := fs.ListFiles(tmpdir) 32 | assert.Nil(t, err) 33 | 34 | assert.Len(t, files, 1) 35 | assert.Equal(t, tmpFilePath, files[0].Name) 36 | } 37 | 38 | func TestLocalOpenReader(t *testing.T) { 39 | tmpdir, err := ioutil.TempDir("", "test") 40 | defer os.RemoveAll(tmpdir) 41 | assert.Nil(t, err) 42 | 43 | ioutil.WriteFile(path.Join(tmpdir, "tmpfile"), []byte("foo bar baz"), 0777) 44 | 45 | fs := LocalFileSystem{} 46 | 47 | path := filepath.Join(tmpdir, "tmpfile") 48 | 49 | // Test reader that begins at beginning of file 50 | reader, err := fs.OpenReader(path, 0) 51 | assert.Nil(t, err) 52 | 53 | contents, err := ioutil.ReadAll(reader) 54 | assert.Nil(t, err) 55 | assert.Equal(t, []byte("foo bar baz"), contents) 56 | err = reader.Close() 57 | assert.Nil(t, err) 58 | 59 | // Test reader that begins in the middle of a file 60 | reader, err = fs.OpenReader(path, 4) 61 | assert.Nil(t, err) 62 | 63 | contents, err = ioutil.ReadAll(reader) 64 | assert.Nil(t, err) 65 | assert.Equal(t, []byte("bar baz"), contents) 66 | err = reader.Close() 67 | assert.Nil(t, err) 68 | } 69 | 70 | func TestLocalOpenWriter(t *testing.T) { 71 | tmpdir, err := ioutil.TempDir("", "test") 72 | defer os.RemoveAll(tmpdir) 73 | assert.Nil(t, err) 74 | 75 | fs := LocalFileSystem{} 76 | 77 | path := filepath.Join(tmpdir, "tmpfile") 78 | 79 | writer, err := fs.OpenWriter(path) 80 | assert.Nil(t, err) 81 | 82 | n, err := writer.Write([]byte("foo bar baz")) 83 | assert.Equal(t, 11, n) 84 | assert.Nil(t, err) 85 | 86 | contents, err := ioutil.ReadFile(path) 87 | assert.Nil(t, err) 88 | assert.Equal(t, []byte("foo bar baz"), contents) 89 | } 90 | 91 | func TestLocalStat(t *testing.T) { 92 | tmpdir, err := ioutil.TempDir("", "test") 93 | defer os.RemoveAll(tmpdir) 94 | assert.Nil(t, err) 95 | 96 | path := path.Join(tmpdir, "tmpfile") 97 | 98 | ioutil.WriteFile(path, []byte("foo"), 0777) 99 | 100 | fs := LocalFileSystem{} 101 | 102 | fInfo, err := fs.Stat(path) 103 | assert.Nil(t, err) 104 | 105 | assert.Equal(t, path, fInfo.Name) 106 | assert.Equal(t, int64(3), fInfo.Size) 107 | } 108 | 109 | func TestLocalCreateIntermediateDirectory(t *testing.T) { 110 | tmpdir, err := ioutil.TempDir("", "test") 111 | defer os.RemoveAll(tmpdir) 112 | assert.Nil(t, err) 113 | 114 | path := path.Join(tmpdir, "additionalFolder", "tmpfile") 115 | 116 | fs := LocalFileSystem{} 117 | 118 | writer, err := fs.OpenWriter(path) 119 | assert.Nil(t, err) 120 | 121 | _, err = writer.Write([]byte("foo")) 122 | assert.Nil(t, err) 123 | 124 | assert.Nil(t, writer.Close()) 125 | 126 | stat, err := os.Stat(filepath.Join(tmpdir, "additionalFolder")) 127 | assert.Nil(t, err) 128 | assert.True(t, stat.IsDir()) 129 | } 130 | 131 | func TestLocalListGlob(t *testing.T) { 132 | tmpdir, err := ioutil.TempDir("", "test") 133 | defer os.RemoveAll(tmpdir) 134 | assert.Nil(t, err) 135 | 136 | path := path.Join(tmpdir, "tmpfile") 137 | 138 | ioutil.WriteFile(path, []byte("foo"), 0777) 139 | 140 | fs := LocalFileSystem{} 141 | 142 | files, err := fs.ListFiles(filepath.Join(tmpdir, "tmp*")) 143 | assert.Nil(t, err) 144 | assert.Len(t, files, 1) 145 | 146 | assert.Equal(t, int64(3), files[0].Size) 147 | assert.Equal(t, path, files[0].Name) 148 | } 149 | -------------------------------------------------------------------------------- /internal/pkg/corfs/s3.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "errors" 5 | "fmt" 6 | "io" 7 | "net/url" 8 | "os" 9 | "path/filepath" 10 | "regexp" 11 | "strings" 12 | 13 | "github.com/aws/aws-sdk-go/aws" 14 | "github.com/aws/aws-sdk-go/aws/session" 15 | "github.com/aws/aws-sdk-go/service/s3" 16 | lru "github.com/hashicorp/golang-lru" 17 | "github.com/mattetti/filebuffer" 18 | ) 19 | 20 | var validS3Schemes = map[string]bool{ 21 | "s3": true, 22 | "s3a": true, 23 | "s3n": true, 24 | } 25 | 26 | var globRegex = regexp.MustCompile(`^(.*?)([\[\*\?].*)$`) 27 | 28 | // S3FileSystem abstracts AWS S3 as a filesystem 29 | type S3FileSystem struct { 30 | s3Client *s3.S3 31 | objectCache *lru.Cache 32 | } 33 | 34 | var _ FileSystem = &S3FileSystem{} 35 | 36 | func parseS3URI(uri string) (*url.URL, error) { 37 | parsed, err := url.Parse(uri) 38 | 39 | if _, ok := validS3Schemes[parsed.Scheme]; !ok { 40 | return nil, fmt.Errorf("Invalid s3 scheme: '%s'", parsed.Scheme) 41 | } 42 | 43 | // if !strings.Contains(parsed.Path, "/") { 44 | // return nil, fmt.Errorf("Invalid s3 url: '%s'", uri) 45 | // } 46 | 47 | if strings.HasPrefix(parsed.Path, "/") { 48 | parsed.Path = parsed.Path[1:] 49 | } 50 | 51 | return parsed, err 52 | } 53 | 54 | // ListFiles lists files that match pathGlob. 55 | func (s *S3FileSystem) ListFiles(pathGlob string) ([]FileInfo, error) { 56 | s3Files := make([]FileInfo, 0) 57 | 58 | parsed, err := parseS3URI(pathGlob) 59 | if err != nil { 60 | return nil, err 61 | } 62 | 63 | baseURI := parsed.Path 64 | if globRegex.MatchString(parsed.Path) { 65 | baseURI = globRegex.FindStringSubmatch(parsed.Path)[1] 66 | } 67 | 68 | var dirGlob string 69 | if !strings.HasSuffix(pathGlob, "/") { 70 | dirGlob = pathGlob + "/*" 71 | } else { 72 | dirGlob = pathGlob + "*" 73 | } 74 | 75 | params := &s3.ListObjectsInput{ 76 | Bucket: aws.String(parsed.Hostname()), 77 | Prefix: aws.String(baseURI), 78 | } 79 | 80 | objectPrefix := fmt.Sprintf("%s://%s/", parsed.Scheme, parsed.Hostname()) 81 | err = s.s3Client.ListObjectsPages(params, 82 | func(page *s3.ListObjectsOutput, _ bool) bool { 83 | for _, object := range page.Contents { 84 | fullPath := objectPrefix + *object.Key 85 | 86 | dirMatch, _ := filepath.Match(dirGlob, fullPath) 87 | pathMatch, _ := filepath.Match(pathGlob, fullPath) 88 | if !(dirMatch || pathMatch) { 89 | continue 90 | } 91 | 92 | s3Files = append(s3Files, FileInfo{ 93 | Name: fullPath, 94 | Size: *object.Size, 95 | }) 96 | s.objectCache.Add(fullPath, object) 97 | } 98 | return true 99 | }) 100 | 101 | return s3Files, err 102 | } 103 | 104 | // OpenReader opens a reader to the file at filePath. The reader 105 | // is initially seeked to "startAt" bytes into the file. 106 | func (s *S3FileSystem) OpenReader(filePath string, startAt int64) (io.ReadCloser, error) { 107 | parsed, err := parseS3URI(filePath) 108 | if err != nil { 109 | return nil, err 110 | } 111 | 112 | objStat, err := s.Stat(filePath) 113 | if err != nil { 114 | return nil, err 115 | } 116 | 117 | reader := &s3Reader{ 118 | client: s.s3Client, 119 | bucket: parsed.Hostname(), 120 | key: parsed.Path, 121 | offset: startAt, 122 | chunkSize: 20 * 1024 * 1024, // 20 Mb chunk size 123 | totalSize: objStat.Size, 124 | } 125 | err = reader.loadNextChunk() 126 | return reader, err 127 | } 128 | 129 | // OpenWriter opens a writer to the file at filePath. 130 | func (s *S3FileSystem) OpenWriter(filePath string) (io.WriteCloser, error) { 131 | parsed, err := parseS3URI(filePath) 132 | if err != nil { 133 | return nil, err 134 | } 135 | 136 | writer := &s3Writer{ 137 | client: s.s3Client, 138 | bucket: parsed.Hostname(), 139 | key: parsed.Path, 140 | buf: filebuffer.New(nil), 141 | complatedParts: []*s3.CompletedPart{}, 142 | } 143 | err = writer.Init() 144 | return writer, err 145 | } 146 | 147 | // Stat returns information about the file at filePath. 148 | func (s *S3FileSystem) Stat(filePath string) (FileInfo, error) { 149 | if object, exists := s.objectCache.Get(filePath); exists { 150 | return FileInfo{ 151 | Name: filePath, 152 | Size: *object.(*s3.Object).Size, 153 | }, nil 154 | } 155 | 156 | parsed, err := parseS3URI(filePath) 157 | if err != nil { 158 | return FileInfo{}, err 159 | } 160 | 161 | params := &s3.ListObjectsInput{ 162 | Bucket: aws.String(parsed.Hostname()), 163 | Prefix: aws.String(parsed.Path), 164 | } 165 | result, err := s.s3Client.ListObjects(params) 166 | if err != nil { 167 | return FileInfo{}, err 168 | } 169 | 170 | for _, object := range result.Contents { 171 | if *object.Key == parsed.Path { 172 | s.objectCache.Add(filePath, object) 173 | return FileInfo{ 174 | Name: filePath, 175 | Size: *object.Size, 176 | }, nil 177 | } 178 | } 179 | 180 | return FileInfo{}, errors.New("No file with given filename") 181 | } 182 | 183 | // Init initializes the filesystem. 184 | func (s *S3FileSystem) Init() error { 185 | os.Setenv("AWS_SDK_LOAD_CONFIG", "true") 186 | sess, err := session.NewSession() 187 | if err != nil { 188 | return err 189 | } 190 | s.s3Client = s3.New(sess) 191 | 192 | s.objectCache, _ = lru.New(10000) 193 | 194 | return nil 195 | } 196 | 197 | // Delete deletes the file at filePath. 198 | func (s *S3FileSystem) Delete(filePath string) error { 199 | parsed, err := parseS3URI(filePath) 200 | if err != nil { 201 | return err 202 | } 203 | 204 | params := &s3.DeleteObjectInput{ 205 | Bucket: aws.String(parsed.Hostname()), 206 | Key: aws.String(parsed.Path), 207 | } 208 | _, err = s.s3Client.DeleteObject(params) 209 | return err 210 | } 211 | 212 | // Join joins file path elements 213 | func (s *S3FileSystem) Join(elem ...string) string { 214 | stripped := make([]string, len(elem)) 215 | for i, str := range elem { 216 | if strings.HasPrefix(str, "/") { 217 | str = str[1:] 218 | } 219 | if strings.HasSuffix(str, "/") && i != len(elem)-1 { 220 | str = str[:len(str)-1] 221 | } 222 | stripped[i] = str 223 | } 224 | return strings.Join(stripped, "/") 225 | } 226 | -------------------------------------------------------------------------------- /internal/pkg/corfs/s3_io.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "fmt" 5 | "io" 6 | 7 | "github.com/aws/aws-sdk-go/aws" 8 | "github.com/aws/aws-sdk-go/service/s3" 9 | "github.com/mattetti/filebuffer" 10 | ) 11 | 12 | type s3Writer struct { 13 | client *s3.S3 14 | bucket string 15 | key string 16 | buf *filebuffer.Buffer 17 | uploadChunkSize int64 18 | uploadID string 19 | complatedParts []*s3.CompletedPart 20 | } 21 | 22 | func (s *s3Writer) Init() error { 23 | params := &s3.CreateMultipartUploadInput{ 24 | Bucket: aws.String(s.bucket), 25 | Key: aws.String(s.key), 26 | } 27 | result, err := s.client.CreateMultipartUpload(params) 28 | 29 | if result != nil { 30 | s.uploadID = *result.UploadId 31 | } 32 | return err 33 | } 34 | 35 | func (s *s3Writer) uploadChunk() error { 36 | s.buf.Seek(0, io.SeekStart) 37 | partNumber := int64(len(s.complatedParts) + 1) 38 | 39 | uploadParams := &s3.UploadPartInput{ 40 | Bucket: aws.String(s.bucket), 41 | Key: aws.String(s.key), 42 | UploadId: aws.String(s.uploadID), 43 | Body: s.buf, 44 | PartNumber: aws.Int64(partNumber), 45 | } 46 | result, err := s.client.UploadPart(uploadParams) 47 | if result != nil { 48 | s.complatedParts = append(s.complatedParts, &s3.CompletedPart{ 49 | ETag: result.ETag, 50 | PartNumber: aws.Int64(partNumber), 51 | }) 52 | } 53 | 54 | // Reset buffer 55 | s.buf = filebuffer.New(nil) 56 | 57 | return err 58 | } 59 | 60 | func (s *s3Writer) Write(p []byte) (n int, err error) { 61 | n, err = s.buf.Write(p) 62 | if int64(len(s.buf.Bytes())) > s.uploadChunkSize { 63 | err = s.uploadChunk() 64 | } 65 | return n, err 66 | } 67 | 68 | func (s *s3Writer) Close() error { 69 | err := s.uploadChunk() 70 | 71 | completeParams := &s3.CompleteMultipartUploadInput{ 72 | Bucket: aws.String(s.bucket), 73 | Key: aws.String(s.key), 74 | UploadId: aws.String(s.uploadID), 75 | MultipartUpload: &s3.CompletedMultipartUpload{ 76 | Parts: s.complatedParts, 77 | }, 78 | } 79 | 80 | _, err = s.client.CompleteMultipartUpload(completeParams) 81 | 82 | return err 83 | } 84 | 85 | type s3Reader struct { 86 | client *s3.S3 87 | bucket string 88 | key string 89 | offset int64 90 | chunkSize int64 91 | chunk io.ReadCloser 92 | totalSize int64 93 | } 94 | 95 | func (s *s3Reader) loadNextChunk() error { 96 | size := min64(s.chunkSize, s.totalSize-s.offset) 97 | params := &s3.GetObjectInput{ 98 | Bucket: aws.String(s.bucket), 99 | Key: aws.String(s.key), 100 | Range: aws.String(fmt.Sprintf("bytes=%d-%d", s.offset, s.offset+size-1)), 101 | } 102 | s.offset += size 103 | output, err := s.client.GetObject(params) 104 | s.chunk = output.Body 105 | return err 106 | } 107 | 108 | func (s *s3Reader) Read(b []byte) (n int, err error) { 109 | n, err = s.chunk.Read(b) 110 | if err == io.EOF && s.offset != s.totalSize { 111 | s.chunk.Close() 112 | err = s.loadNextChunk() 113 | } 114 | return n, err 115 | } 116 | 117 | func (s *s3Reader) Close() error { 118 | return s.chunk.Close() 119 | } 120 | -------------------------------------------------------------------------------- /internal/pkg/corfs/s3_test.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | import ( 4 | "fmt" 5 | "io/ioutil" 6 | "os" 7 | "strings" 8 | "testing" 9 | 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func getS3TestBackend(t *testing.T) (string, *S3FileSystem) { 14 | t.Helper() 15 | 16 | backend := &S3FileSystem{} 17 | 18 | bucket := os.Getenv("AWS_TEST_BUCKET") 19 | if bucket == "" { 20 | t.Skipf("No test bucket is set under $AWS_TEST_BUCKET") 21 | } 22 | err := backend.Init() 23 | if err != nil { 24 | t.Fatalf("Could not initialize S3 filesystem: %s", err) 25 | } 26 | return fmt.Sprintf("s3://%s", bucket), backend 27 | } 28 | 29 | func cleanup(backend *S3FileSystem, t *testing.T) { 30 | bucket := os.Getenv("AWS_TEST_BUCKET") 31 | objects, err := backend.ListFiles("s3://" + bucket + "/") 32 | 33 | assert.Nil(t, err) 34 | for _, obj := range objects { 35 | err = backend.Delete(obj.Name) 36 | assert.Nil(t, err) 37 | } 38 | } 39 | 40 | func TestS3ReaderWriter(t *testing.T) { 41 | bucket, backend := getS3TestBackend(t) 42 | defer cleanup(backend, t) 43 | 44 | path := bucket + "/testobj" 45 | 46 | // Test writer 47 | writer, err := backend.OpenWriter(path) 48 | assert.Nil(t, err) 49 | 50 | _, err = writer.Write([]byte("foo bar baz")) 51 | assert.Nil(t, err) 52 | 53 | err = writer.Close() 54 | assert.Nil(t, err) 55 | 56 | // Test reader starting at beginning of file 57 | reader, err := backend.OpenReader(path, 0) 58 | assert.Nil(t, err) 59 | 60 | contents, err := ioutil.ReadAll(reader) 61 | assert.Nil(t, err) 62 | assert.Equal(t, "foo bar baz", string(contents)) 63 | 64 | err = reader.Close() 65 | assert.Nil(t, err) 66 | } 67 | 68 | func TestS3ReaderWriterWithOffset(t *testing.T) { 69 | bucket, backend := getS3TestBackend(t) 70 | defer cleanup(backend, t) 71 | 72 | path := bucket + "/testobj" 73 | 74 | // Test writer 75 | writer, err := backend.OpenWriter(path) 76 | assert.Nil(t, err) 77 | 78 | _, err = writer.Write([]byte("foo bar baz")) 79 | assert.Nil(t, err) 80 | 81 | err = writer.Close() 82 | assert.Nil(t, err) 83 | 84 | // Test reader starting in middle of file 85 | reader, err := backend.OpenReader(path, 4) 86 | assert.Nil(t, err) 87 | 88 | contents, err := ioutil.ReadAll(reader) 89 | assert.Nil(t, err) 90 | assert.Equal(t, "bar baz", string(contents)) 91 | 92 | err = reader.Close() 93 | assert.Nil(t, err) 94 | } 95 | 96 | func TestS3ListFiles(t *testing.T) { 97 | bucket, backend := getS3TestBackend(t) 98 | defer cleanup(backend, t) 99 | 100 | for i := 0; i < 5; i++ { 101 | fName := fmt.Sprintf("file%d", i) 102 | writer, err := backend.OpenWriter(bucket + "/" + fName) 103 | assert.Nil(t, err) 104 | 105 | _, err = writer.Write([]byte(fName)) 106 | assert.Nil(t, err) 107 | err = writer.Close() 108 | assert.Nil(t, err) 109 | } 110 | 111 | files, err := backend.ListFiles(bucket) 112 | assert.Nil(t, err) 113 | assert.Len(t, files, 5) 114 | 115 | expectedPrefix := bucket + "/file" 116 | for _, file := range files { 117 | fmt.Println(file.Name, expectedPrefix) 118 | assert.True(t, strings.HasPrefix(file.Name, expectedPrefix)) 119 | assert.Equal(t, int64(5), file.Size) 120 | } 121 | } 122 | 123 | func TestS3ListGlob(t *testing.T) { 124 | bucket, backend := getS3TestBackend(t) 125 | defer cleanup(backend, t) 126 | 127 | for i := 0; i < 3; i++ { 128 | fName := fmt.Sprintf("foo/file%d", i) 129 | writer, err := backend.OpenWriter(bucket + "/" + fName) 130 | assert.Nil(t, err) 131 | 132 | _, err = writer.Write([]byte(fName)) 133 | assert.Nil(t, err) 134 | err = writer.Close() 135 | assert.Nil(t, err) 136 | } 137 | 138 | files, err := backend.ListFiles(bucket + "/foo/*") 139 | assert.Nil(t, err) 140 | assert.Len(t, files, 3) 141 | 142 | expectedPrefix := bucket + "/foo/file" 143 | for _, file := range files { 144 | fmt.Println(file.Name, expectedPrefix) 145 | assert.True(t, strings.HasPrefix(file.Name, expectedPrefix)) 146 | assert.Equal(t, int64(9), file.Size) 147 | } 148 | } 149 | 150 | func TestS3Stat(t *testing.T) { 151 | bucket, backend := getS3TestBackend(t) 152 | defer cleanup(backend, t) 153 | 154 | path := bucket + "/testobj" 155 | 156 | writer, err := backend.OpenWriter(path) 157 | assert.Nil(t, err) 158 | 159 | _, err = writer.Write([]byte("foo bar baz")) 160 | assert.Nil(t, err) 161 | err = writer.Close() 162 | assert.Nil(t, err) 163 | 164 | file, err := backend.Stat(path) 165 | assert.Nil(t, err) 166 | 167 | assert.Equal(t, path, file.Name) 168 | assert.Equal(t, int64(11), file.Size) 169 | } 170 | 171 | func TestS3Join(t *testing.T) { 172 | _, backend := getS3TestBackend(t) 173 | 174 | res := backend.Join("s3://foo", "bar", "baz") 175 | assert.Equal(t, res, "s3://foo/bar/baz") 176 | 177 | res = backend.Join("s3://foo/", "/bar", "baz/") 178 | assert.Equal(t, res, "s3://foo/bar/baz/") 179 | } 180 | 181 | func TestS3ReaderChunk(t *testing.T) { 182 | bucket, backend := getS3TestBackend(t) 183 | defer cleanup(backend, t) 184 | 185 | path := bucket + "/testobj" 186 | 187 | // Test writer 188 | writer, err := backend.OpenWriter(path) 189 | assert.Nil(t, err) 190 | 191 | _, err = writer.Write([]byte("foo bar baz")) 192 | assert.Nil(t, err) 193 | 194 | err = writer.Close() 195 | assert.Nil(t, err) 196 | 197 | // Test reader w/ small chunk size 198 | reader := &s3Reader{ 199 | client: backend.s3Client, 200 | bucket: strings.TrimPrefix(bucket, "s3://"), 201 | key: "testobj", 202 | offset: 0, 203 | chunkSize: 3, 204 | totalSize: 11, 205 | } 206 | err = reader.loadNextChunk() 207 | fmt.Println(err) 208 | assert.Nil(t, err) 209 | 210 | // First chunk should advance reader offset by 3 bytes 211 | assert.Equal(t, int64(3), reader.offset) 212 | 213 | contents, err := ioutil.ReadAll(reader) 214 | assert.Nil(t, err) 215 | assert.Equal(t, "foo bar baz", string(contents)) 216 | 217 | err = reader.Close() 218 | assert.Nil(t, err) 219 | } 220 | -------------------------------------------------------------------------------- /internal/pkg/corfs/util.go: -------------------------------------------------------------------------------- 1 | package corfs 2 | 3 | func min64(a, b int64) int64 { 4 | if a < b { 5 | return a 6 | } 7 | return b 8 | } 9 | -------------------------------------------------------------------------------- /internal/pkg/coriam/client.go: -------------------------------------------------------------------------------- 1 | package coriam 2 | 3 | import ( 4 | "strings" 5 | 6 | "github.com/aws/aws-sdk-go/aws" 7 | "github.com/aws/aws-sdk-go/aws/session" 8 | "github.com/aws/aws-sdk-go/service/iam" 9 | "github.com/aws/aws-sdk-go/service/iam/iamiface" 10 | log "github.com/sirupsen/logrus" 11 | ) 12 | 13 | // IAMClient manages deploying IAM credentials for corral 14 | type IAMClient struct { 15 | iamiface.IAMAPI 16 | } 17 | 18 | // AssumePolicyDocument is the policy document used in the role that coriam creates 19 | const AssumePolicyDocument = `{ 20 | "Version": "2012-10-17", 21 | "Statement": [ 22 | { 23 | "Sid": "", 24 | "Effect": "Allow", 25 | "Principal": { 26 | "Service": [ 27 | "lambda.amazonaws.com" 28 | ] 29 | }, 30 | "Action": "sts:AssumeRole" 31 | } 32 | ] 33 | }` 34 | 35 | // AttachPolicyDocument is the policy document used in the policy that coriam attaches to the created role 36 | const AttachPolicyDocument = `{ 37 | "Version": "2012-10-17", 38 | "Statement": [ 39 | { 40 | "Effect": "Allow", 41 | "Action": [ 42 | "logs:*" 43 | ], 44 | "Resource": "arn:aws:logs:*:*:*" 45 | }, 46 | { 47 | "Effect": "Allow", 48 | "Action": [ 49 | "lambda:InvokeFunction" 50 | ], 51 | "Resource": [ 52 | "*" 53 | ] 54 | }, 55 | { 56 | "Effect": "Allow", 57 | "Action": [ 58 | "ec2:AttachNetworkInterface", 59 | "ec2:CreateNetworkInterface", 60 | "ec2:DeleteNetworkInterface", 61 | "ec2:DescribeInstances", 62 | "ec2:DescribeNetworkInterfaces", 63 | "ec2:DetachNetworkInterface", 64 | "ec2:ModifyNetworkInterfaceAttribute", 65 | "ec2:ResetNetworkInterfaceAttribute" 66 | ], 67 | "Resource": "*" 68 | }, 69 | { 70 | "Effect": "Allow", 71 | "Action": [ 72 | "s3:*" 73 | ], 74 | "Resource": "arn:aws:s3:::*" 75 | } 76 | ] 77 | }` 78 | 79 | const corralPolicyName = "corral-permissions" 80 | 81 | func (iamClient *IAMClient) createRole(roleName string) (roleARN string, err error) { 82 | createParams := &iam.CreateRoleInput{ 83 | AssumeRolePolicyDocument: aws.String(AssumePolicyDocument), 84 | RoleName: aws.String(roleName), 85 | } 86 | log.Debugf("Creating IAM role '%s'", roleName) 87 | role, err := iamClient.CreateRole(createParams) 88 | if err != nil { 89 | return "", err 90 | } 91 | return *role.Role.Arn, err 92 | } 93 | 94 | func (iamClient *IAMClient) updateAssumeRolePolicy(roleName string) error { 95 | updateParams := &iam.UpdateAssumeRolePolicyInput{ 96 | PolicyDocument: aws.String(AssumePolicyDocument), 97 | RoleName: aws.String(roleName), 98 | } 99 | log.Debugf("Updating IAM role '%s'", roleName) 100 | _, err := iamClient.UpdateAssumeRolePolicy(updateParams) 101 | 102 | return err 103 | } 104 | 105 | // deployRole creates/updates the role with the given name so that it has the policy 106 | // document that coriam defines (AssumePolicyDocument). 107 | func (iamClient *IAMClient) deployRole(roleName string) (roleARN string, err error) { 108 | getParams := &iam.GetRoleInput{ 109 | RoleName: aws.String(roleName), 110 | } 111 | exists, err := iamClient.GetRole(getParams) 112 | 113 | // Role already exists 114 | if exists != nil && err == nil { 115 | if *exists.Role.AssumeRolePolicyDocument != AssumePolicyDocument { 116 | err = iamClient.updateAssumeRolePolicy(roleName) 117 | return *exists.Role.Arn, err 118 | } 119 | log.Debugf("IAM Role '%s' already exists", roleName) 120 | return *exists.Role.Arn, nil 121 | } 122 | 123 | return iamClient.createRole(roleName) 124 | } 125 | 126 | func (iamClient *IAMClient) putAttachPolicy(roleName string) error { 127 | createParams := &iam.PutRolePolicyInput{ 128 | PolicyName: aws.String(corralPolicyName), 129 | PolicyDocument: aws.String(AttachPolicyDocument), 130 | RoleName: aws.String(roleName), 131 | } 132 | 133 | log.Debugf("Putting policy '%s'", *createParams.PolicyName) 134 | _, err := iamClient.PutRolePolicy(createParams) 135 | return err 136 | } 137 | 138 | // deployRole creates/updates the role with the given name so that it an 139 | // attached inline policy that matches AttachPolicyDocument 140 | func (iamClient *IAMClient) deployPolicy(roleName string) error { 141 | getParams := &iam.GetRolePolicyInput{ 142 | RoleName: aws.String(roleName), 143 | PolicyName: aws.String(corralPolicyName), 144 | } 145 | 146 | exists, err := iamClient.GetRolePolicy(getParams) 147 | 148 | // Policy already exists 149 | if exists != nil && err == nil { 150 | if *exists.PolicyDocument != AttachPolicyDocument { 151 | return iamClient.putAttachPolicy(roleName) 152 | } 153 | log.Debugf("Policy '%s' already exists", *exists.PolicyName) 154 | return nil 155 | } 156 | 157 | return iamClient.putAttachPolicy(roleName) 158 | } 159 | 160 | // DeployPermissions creates/updates IAM permissions for corral lambda functions. 161 | // It creates/updates an IAM role and inline policy to allow corral lambda functions 162 | // to access S3, invoke lambda functions, and write logs to CloudWatch. 163 | func (iamClient *IAMClient) DeployPermissions(roleName string) (roleARN string, err error) { 164 | roleARN, err = iamClient.deployRole(roleName) 165 | if err != nil { 166 | return roleARN, err 167 | } 168 | 169 | err = iamClient.deployPolicy(roleName) 170 | 171 | return roleARN, err 172 | } 173 | 174 | // DeletePermissions deletes corral's IA role policy and IAM role. 175 | func (iamClient *IAMClient) DeletePermissions(roleName string) error { 176 | log.Debugf("Deleting role policy") 177 | deletePolicyParams := &iam.DeleteRolePolicyInput{ 178 | RoleName: aws.String(roleName), 179 | PolicyName: aws.String(corralPolicyName), 180 | } 181 | _, err := iamClient.DeleteRolePolicy(deletePolicyParams) 182 | if err != nil && !strings.HasPrefix(err.Error(), iam.ErrCodeNoSuchEntityException) { 183 | return err 184 | } 185 | 186 | log.Debugf("Deleting role") 187 | deleteRoleParams := &iam.DeleteRoleInput{ 188 | RoleName: aws.String(roleName), 189 | } 190 | _, err = iamClient.DeleteRole(deleteRoleParams) 191 | if err != nil && !strings.HasPrefix(err.Error(), iam.ErrCodeNoSuchEntityException) { 192 | return err 193 | } 194 | return nil 195 | } 196 | 197 | // NewIAMClient initializes a new IAMClient 198 | func NewIAMClient() *IAMClient { 199 | sess := session.Must(session.NewSessionWithOptions(session.Options{ 200 | SharedConfigState: session.SharedConfigEnable, 201 | })) 202 | return &IAMClient{ 203 | iam.New(sess), 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /internal/pkg/coriam/client_test.go: -------------------------------------------------------------------------------- 1 | package coriam 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/aws/aws-sdk-go/aws" 7 | "github.com/aws/aws-sdk-go/service/iam" 8 | "github.com/aws/aws-sdk-go/service/iam/iamiface" 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | type iamMock struct { 13 | iamiface.IAMAPI 14 | roleExists bool 15 | policyExists bool 16 | attachRolePolicyDocument string 17 | assumeRolePolicyDocument string 18 | capturedGetRoleInput *iam.GetRoleInput 19 | capturedCreateRoleInput *iam.CreateRoleInput 20 | capturedUpdateAssumeRolePolicyInput *iam.UpdateAssumeRolePolicyInput 21 | capturedGetRolePolicyInput *iam.GetRolePolicyInput 22 | capturedPutRolePolicyInput *iam.PutRolePolicyInput 23 | capturedDeleteRolePolicyInput *iam.DeleteRolePolicyInput 24 | capturedDeleteRoleInput *iam.DeleteRoleInput 25 | } 26 | 27 | func (i *iamMock) GetRole(input *iam.GetRoleInput) (*iam.GetRoleOutput, error) { 28 | i.capturedGetRoleInput = input 29 | if !i.roleExists { 30 | return nil, nil 31 | } 32 | return &iam.GetRoleOutput{ 33 | Role: &iam.Role{ 34 | RoleName: input.RoleName, 35 | Arn: aws.String("testARN"), 36 | AssumeRolePolicyDocument: aws.String(i.assumeRolePolicyDocument), 37 | }, 38 | }, nil 39 | } 40 | 41 | func (i *iamMock) UpdateAssumeRolePolicy(input *iam.UpdateAssumeRolePolicyInput) (*iam.UpdateAssumeRolePolicyOutput, error) { 42 | i.capturedUpdateAssumeRolePolicyInput = input 43 | return nil, nil 44 | } 45 | 46 | func (i *iamMock) CreateRole(input *iam.CreateRoleInput) (*iam.CreateRoleOutput, error) { 47 | i.capturedCreateRoleInput = input 48 | return &iam.CreateRoleOutput{ 49 | Role: &iam.Role{ 50 | Arn: aws.String("testARN"), 51 | }, 52 | }, nil 53 | } 54 | 55 | func (i *iamMock) GetRolePolicy(input *iam.GetRolePolicyInput) (*iam.GetRolePolicyOutput, error) { 56 | i.capturedGetRolePolicyInput = input 57 | if !i.policyExists { 58 | return nil, nil 59 | } 60 | return &iam.GetRolePolicyOutput{ 61 | RoleName: input.RoleName, 62 | PolicyDocument: aws.String(i.attachRolePolicyDocument), 63 | }, nil 64 | } 65 | 66 | func (i *iamMock) PutRolePolicy(input *iam.PutRolePolicyInput) (*iam.PutRolePolicyOutput, error) { 67 | i.capturedPutRolePolicyInput = input 68 | return nil, nil 69 | } 70 | 71 | func (i *iamMock) DeleteRolePolicy(input *iam.DeleteRolePolicyInput) (*iam.DeleteRolePolicyOutput, error) { 72 | i.capturedDeleteRolePolicyInput = input 73 | return nil, nil 74 | } 75 | 76 | func (i *iamMock) DeleteRole(input *iam.DeleteRoleInput) (*iam.DeleteRoleOutput, error) { 77 | i.capturedDeleteRoleInput = input 78 | return nil, nil 79 | } 80 | 81 | func TestCreateRole(t *testing.T) { 82 | mock := &iamMock{ 83 | roleExists: false, 84 | } 85 | client := IAMClient{mock} 86 | 87 | arn, err := client.deployRole("role") 88 | assert.Nil(t, err) 89 | assert.Equal(t, "testARN", arn) 90 | assert.Equal(t, "role", *mock.capturedCreateRoleInput.RoleName) 91 | assert.Equal(t, AssumePolicyDocument, *mock.capturedCreateRoleInput.AssumeRolePolicyDocument) 92 | } 93 | 94 | func TestUpdateRole(t *testing.T) { 95 | mock := &iamMock{ 96 | roleExists: true, 97 | assumeRolePolicyDocument: "incorrect document", 98 | } 99 | client := IAMClient{mock} 100 | 101 | arn, err := client.deployRole("role") 102 | assert.Nil(t, err) 103 | assert.Equal(t, "testARN", arn) 104 | assert.Equal(t, "role", *mock.capturedUpdateAssumeRolePolicyInput.RoleName) 105 | assert.Equal(t, AssumePolicyDocument, *mock.capturedUpdateAssumeRolePolicyInput.PolicyDocument) 106 | } 107 | 108 | func TestCreatePolicy(t *testing.T) { 109 | mock := &iamMock{ 110 | policyExists: false, 111 | } 112 | client := IAMClient{mock} 113 | 114 | err := client.deployPolicy("role") 115 | assert.Nil(t, err) 116 | assert.Equal(t, "role", *mock.capturedGetRolePolicyInput.RoleName) 117 | assert.Equal(t, "corral-permissions", *mock.capturedGetRolePolicyInput.PolicyName) 118 | assert.Equal(t, AttachPolicyDocument, *mock.capturedPutRolePolicyInput.PolicyDocument) 119 | } 120 | 121 | func TestUpdatePolicy(t *testing.T) { 122 | mock := &iamMock{ 123 | policyExists: true, 124 | attachRolePolicyDocument: "incorrect document", 125 | } 126 | client := IAMClient{mock} 127 | 128 | err := client.deployPolicy("role") 129 | assert.Nil(t, err) 130 | assert.Equal(t, "role", *mock.capturedGetRolePolicyInput.RoleName) 131 | assert.Equal(t, "corral-permissions", *mock.capturedGetRolePolicyInput.PolicyName) 132 | assert.Equal(t, AttachPolicyDocument, *mock.capturedPutRolePolicyInput.PolicyDocument) 133 | } 134 | 135 | func TestDeployPermissions(t *testing.T) { 136 | mock := &iamMock{ 137 | roleExists: false, 138 | policyExists: false, 139 | } 140 | client := IAMClient{mock} 141 | 142 | arn, err := client.DeployPermissions("role") 143 | assert.Nil(t, err) 144 | assert.Equal(t, "testARN", arn) 145 | 146 | // Role Creation 147 | assert.Equal(t, "role", *mock.capturedCreateRoleInput.RoleName) 148 | assert.Equal(t, AssumePolicyDocument, *mock.capturedCreateRoleInput.AssumeRolePolicyDocument) 149 | 150 | // Role Policy Creation 151 | assert.Equal(t, "role", *mock.capturedGetRolePolicyInput.RoleName) 152 | assert.Equal(t, "corral-permissions", *mock.capturedGetRolePolicyInput.PolicyName) 153 | assert.Equal(t, AttachPolicyDocument, *mock.capturedPutRolePolicyInput.PolicyDocument) 154 | } 155 | 156 | func TestDeletePermissions(t *testing.T) { 157 | mock := &iamMock{} 158 | client := IAMClient{mock} 159 | 160 | err := client.DeletePermissions("testRole") 161 | assert.Nil(t, err) 162 | 163 | assert.Equal(t, "testRole", *mock.capturedDeleteRolePolicyInput.RoleName) 164 | assert.Equal(t, "corral-permissions", *mock.capturedDeleteRolePolicyInput.PolicyName) 165 | 166 | assert.Equal(t, "testRole", *mock.capturedDeleteRoleInput.RoleName) 167 | } 168 | -------------------------------------------------------------------------------- /internal/pkg/corlambda/client_test.go: -------------------------------------------------------------------------------- 1 | package corlambda 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/base64" 6 | "testing" 7 | 8 | "github.com/aws/aws-sdk-go/service/lambda/lambdaiface" 9 | 10 | "github.com/aws/aws-sdk-go/aws" 11 | "github.com/aws/aws-sdk-go/service/lambda" 12 | "github.com/stretchr/testify/assert" 13 | ) 14 | 15 | type lambdaInvokerMock struct { 16 | lambdaiface.LambdaAPI 17 | invokeFailures int 18 | outputPayload []byte 19 | } 20 | 21 | func (m *lambdaInvokerMock) Invoke(*lambda.InvokeInput) (*lambda.InvokeOutput, error) { 22 | if m.invokeFailures > 0 { 23 | m.invokeFailures-- 24 | return &lambda.InvokeOutput{ 25 | FunctionError: aws.String("error"), 26 | }, nil 27 | } 28 | return &lambda.InvokeOutput{ 29 | Payload: m.outputPayload, 30 | }, nil 31 | } 32 | 33 | type lambdaDeployMock struct { 34 | lambdaiface.LambdaAPI 35 | getFunctionOutput *lambda.GetFunctionOutput 36 | capturedCreateFunctionInput *lambda.CreateFunctionInput 37 | capturedUpdateFunctionCodeInput *lambda.UpdateFunctionCodeInput 38 | capturedUpdateFunctionConfigInput *lambda.UpdateFunctionConfigurationInput 39 | capturedDeleteFunctionInput *lambda.DeleteFunctionInput 40 | } 41 | 42 | func (d *lambdaDeployMock) GetFunction(*lambda.GetFunctionInput) (*lambda.GetFunctionOutput, error) { 43 | return d.getFunctionOutput, nil 44 | } 45 | 46 | func (d *lambdaDeployMock) CreateFunction(input *lambda.CreateFunctionInput) (*lambda.FunctionConfiguration, error) { 47 | d.capturedCreateFunctionInput = input 48 | return nil, nil 49 | } 50 | 51 | func (d *lambdaDeployMock) UpdateFunctionCode(input *lambda.UpdateFunctionCodeInput) (*lambda.FunctionConfiguration, error) { 52 | d.capturedUpdateFunctionCodeInput = input 53 | return nil, nil 54 | } 55 | 56 | func (d *lambdaDeployMock) UpdateFunctionConfiguration(input *lambda.UpdateFunctionConfigurationInput) (*lambda.FunctionConfiguration, error) { 57 | d.capturedUpdateFunctionConfigInput = input 58 | return nil, nil 59 | } 60 | 61 | func (d *lambdaDeployMock) DeleteFunction(input *lambda.DeleteFunctionInput) (*lambda.DeleteFunctionOutput, error) { 62 | d.capturedDeleteFunctionInput = input 63 | return nil, nil 64 | } 65 | 66 | func TestFunctionNeedsUpdate(t *testing.T) { 67 | functionCode := []byte("function code") 68 | codeHash := sha256.New() 69 | codeHash.Write(functionCode) 70 | codeHashDigest := base64.StdEncoding.EncodeToString(codeHash.Sum(nil)) 71 | 72 | cfg := &lambda.FunctionConfiguration{CodeSha256: aws.String(codeHashDigest)} 73 | 74 | assert.True(t, functionNeedsUpdate([]byte("not function code"), cfg)) 75 | assert.False(t, functionNeedsUpdate(functionCode, cfg)) 76 | } 77 | 78 | func TestInvoke(t *testing.T) { 79 | client := &LambdaClient{ 80 | &lambdaInvokerMock{ 81 | invokeFailures: 0, 82 | outputPayload: []byte("payload"), 83 | }, 84 | } 85 | 86 | output, err := client.Invoke("function", []byte("payload")) 87 | assert.Nil(t, err) 88 | 89 | assert.Equal(t, []byte("payload"), output) 90 | } 91 | 92 | func TestInvokeRetry(t *testing.T) { 93 | client := &LambdaClient{ 94 | &lambdaInvokerMock{ 95 | invokeFailures: 2, 96 | outputPayload: []byte("payload"), 97 | }, 98 | } 99 | 100 | output, err := client.Invoke("function", []byte("payload")) 101 | assert.Nil(t, err) 102 | 103 | assert.Equal(t, []byte("payload"), output) 104 | } 105 | 106 | func TestInvokeOutOfTries(t *testing.T) { 107 | client := &LambdaClient{ 108 | &lambdaInvokerMock{ 109 | invokeFailures: MaxLambdaRetries + 1, 110 | }, 111 | } 112 | 113 | _, err := client.Invoke("function", []byte("payload")) 114 | assert.NotNil(t, err) 115 | } 116 | 117 | func TestCreateFunction(t *testing.T) { 118 | mock := &lambdaDeployMock{} 119 | client := &LambdaClient{mock} 120 | 121 | config := &FunctionConfig{ 122 | Name: "test function", 123 | RoleARN: "testARN", 124 | Timeout: 10, 125 | MemorySize: 1000, 126 | } 127 | 128 | err := client.DeployFunction(config) 129 | assert.Nil(t, err) 130 | 131 | assert.Equal(t, "test function", *mock.capturedCreateFunctionInput.FunctionName) 132 | assert.Equal(t, "testARN", *mock.capturedCreateFunctionInput.Role) 133 | assert.Equal(t, int64(10), *mock.capturedCreateFunctionInput.Timeout) 134 | assert.Equal(t, int64(1000), *mock.capturedCreateFunctionInput.MemorySize) 135 | } 136 | 137 | func TestUpdateFunction(t *testing.T) { 138 | mock := &lambdaDeployMock{ 139 | getFunctionOutput: &lambda.GetFunctionOutput{ 140 | Configuration: &lambda.FunctionConfiguration{ 141 | CodeSha256: aws.String("sha"), 142 | Role: aws.String("wrongARN"), 143 | Timeout: aws.Int64(10), 144 | MemorySize: aws.Int64(1000), 145 | }, 146 | }, 147 | } 148 | client := &LambdaClient{mock} 149 | 150 | config := &FunctionConfig{ 151 | Name: "test function", 152 | RoleARN: "testARN", 153 | Timeout: 10, 154 | MemorySize: 1000, 155 | } 156 | 157 | err := client.DeployFunction(config) 158 | assert.Nil(t, err) 159 | 160 | assert.NotNil(t, mock.capturedUpdateFunctionCodeInput) 161 | assert.NotNil(t, mock.capturedUpdateFunctionCodeInput.ZipFile) 162 | assert.NotNil(t, mock.capturedUpdateFunctionCodeInput) 163 | assert.Equal(t, "testARN", *mock.capturedUpdateFunctionConfigInput.Role) 164 | } 165 | 166 | func TestDeleteFunction(t *testing.T) { 167 | mock := &lambdaDeployMock{} 168 | 169 | client := &LambdaClient{mock} 170 | 171 | err := client.DeleteFunction("function") 172 | assert.Nil(t, err) 173 | 174 | assert.Equal(t, "function", *mock.capturedDeleteFunctionInput.FunctionName) 175 | } 176 | -------------------------------------------------------------------------------- /job.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "bufio" 5 | "context" 6 | "encoding/json" 7 | "fmt" 8 | "strings" 9 | "sync" 10 | "sync/atomic" 11 | 12 | "github.com/bcongdon/corral/internal/pkg/corfs" 13 | humanize "github.com/dustin/go-humanize" 14 | log "github.com/sirupsen/logrus" 15 | "golang.org/x/sync/semaphore" 16 | ) 17 | 18 | // Job is the logical container for a MapReduce job 19 | type Job struct { 20 | Map Mapper 21 | Reduce Reducer 22 | PartitionFunc PartitionFunc 23 | 24 | fileSystem corfs.FileSystem 25 | config *config 26 | intermediateBins uint 27 | outputPath string 28 | 29 | bytesRead int64 30 | bytesWritten int64 31 | } 32 | 33 | // Logic for running a single map task 34 | func (j *Job) runMapper(mapperID uint, splits []inputSplit) error { 35 | emitter := newMapperEmitter(j.intermediateBins, mapperID, j.outputPath, j.fileSystem) 36 | if j.PartitionFunc != nil { 37 | emitter.partitionFunc = j.PartitionFunc 38 | } 39 | 40 | for _, split := range splits { 41 | err := j.runMapperSplit(split, &emitter) 42 | if err != nil { 43 | return err 44 | } 45 | } 46 | 47 | atomic.AddInt64(&j.bytesWritten, emitter.bytesWritten()) 48 | 49 | return emitter.close() 50 | } 51 | 52 | func splitInputRecord(record string) *keyValue { 53 | fields := strings.Split(record, "\t") 54 | if len(fields) == 2 { 55 | return &keyValue{ 56 | Key: fields[0], 57 | Value: fields[1], 58 | } 59 | } 60 | return &keyValue{ 61 | Value: record, 62 | } 63 | } 64 | 65 | // runMapperSplit runs the mapper on a single inputSplit 66 | func (j *Job) runMapperSplit(split inputSplit, emitter Emitter) error { 67 | offset := split.StartOffset 68 | if split.StartOffset != 0 { 69 | offset-- 70 | } 71 | 72 | inputSource, err := j.fileSystem.OpenReader(split.Filename, split.StartOffset) 73 | if err != nil { 74 | return err 75 | } 76 | 77 | scanner := bufio.NewScanner(inputSource) 78 | var bytesRead int64 79 | splitter := countingSplitFunc(bufio.ScanLines, &bytesRead) 80 | scanner.Split(splitter) 81 | 82 | if split.StartOffset != 0 { 83 | scanner.Scan() 84 | } 85 | 86 | for scanner.Scan() { 87 | record := scanner.Text() 88 | kv := splitInputRecord(record) 89 | j.Map.Map(kv.Key, kv.Value, emitter) 90 | 91 | // Stop reading when end of inputSplit is reached 92 | pos := bytesRead 93 | if split.Size() > 0 && pos > split.Size() { 94 | break 95 | } 96 | } 97 | 98 | atomic.AddInt64(&j.bytesRead, bytesRead) 99 | 100 | return nil 101 | } 102 | 103 | // Logic for running a single reduce task 104 | func (j *Job) runReducer(binID uint) error { 105 | // Determine the intermediate data files this reducer is responsible for 106 | path := j.fileSystem.Join(j.outputPath, fmt.Sprintf("map-bin%d-*", binID)) 107 | files, err := j.fileSystem.ListFiles(path) 108 | if err != nil { 109 | return err 110 | } 111 | 112 | // Open emitter for output data 113 | path = j.fileSystem.Join(j.outputPath, fmt.Sprintf("output-part-%d", binID)) 114 | emitWriter, err := j.fileSystem.OpenWriter(path) 115 | defer emitWriter.Close() 116 | if err != nil { 117 | return err 118 | } 119 | 120 | data := make(map[string][]string, 0) 121 | var bytesRead int64 122 | 123 | for _, file := range files { 124 | reader, err := j.fileSystem.OpenReader(file.Name, 0) 125 | bytesRead += file.Size 126 | if err != nil { 127 | return err 128 | } 129 | 130 | // Feed intermediate data into reducers 131 | decoder := json.NewDecoder(reader) 132 | for decoder.More() { 133 | var kv keyValue 134 | if err := decoder.Decode(&kv); err != nil { 135 | return err 136 | } 137 | 138 | if _, ok := data[kv.Key]; !ok { 139 | data[kv.Key] = make([]string, 0) 140 | } 141 | 142 | data[kv.Key] = append(data[kv.Key], kv.Value) 143 | } 144 | reader.Close() 145 | 146 | // Delete intermediate map data 147 | if j.config.Cleanup { 148 | err := j.fileSystem.Delete(file.Name) 149 | if err != nil { 150 | log.Error(err) 151 | } 152 | } 153 | } 154 | 155 | var waitGroup sync.WaitGroup 156 | sem := semaphore.NewWeighted(10) 157 | 158 | emitter := newReducerEmitter(emitWriter) 159 | for key, values := range data { 160 | sem.Acquire(context.Background(), 1) 161 | waitGroup.Add(1) 162 | go func(key string, values []string) { 163 | defer sem.Release(1) 164 | 165 | keyChan := make(chan string) 166 | keyIter := newValueIterator(keyChan) 167 | 168 | go func() { 169 | defer waitGroup.Done() 170 | j.Reduce.Reduce(key, keyIter, emitter) 171 | }() 172 | 173 | for _, value := range values { 174 | // Pass current value to the appropriate key channel 175 | keyChan <- value 176 | } 177 | close(keyChan) 178 | }(key, values) 179 | } 180 | 181 | waitGroup.Wait() 182 | 183 | atomic.AddInt64(&j.bytesWritten, emitter.bytesWritten()) 184 | atomic.AddInt64(&j.bytesRead, bytesRead) 185 | 186 | return nil 187 | } 188 | 189 | // inputSplits calculates all input files' inputSplits. 190 | // inputSplits also determines and saves the number of intermediate bins that will be used during the shuffle. 191 | func (j *Job) inputSplits(inputs []string, maxSplitSize int64) []inputSplit { 192 | files := make([]string, 0) 193 | for _, inputPath := range inputs { 194 | fileInfos, err := j.fileSystem.ListFiles(inputPath) 195 | if err != nil { 196 | log.Warn(err) 197 | continue 198 | } 199 | 200 | for _, fInfo := range fileInfos { 201 | files = append(files, fInfo.Name) 202 | } 203 | } 204 | 205 | splits := make([]inputSplit, 0) 206 | var totalSize int64 207 | for _, inputFileName := range files { 208 | fInfo, err := j.fileSystem.Stat(inputFileName) 209 | if err != nil { 210 | log.Warnf("Unable to load input file: %s (%s)", inputFileName, err) 211 | continue 212 | } 213 | 214 | totalSize += fInfo.Size 215 | splits = append(splits, splitInputFile(fInfo, maxSplitSize)...) 216 | } 217 | if len(files) > 0 { 218 | log.Debugf("Average split size: %s bytes", humanize.Bytes(uint64(totalSize)/uint64(len(splits)))) 219 | } 220 | 221 | j.intermediateBins = uint(float64(totalSize/j.config.ReduceBinSize) * 1.25) 222 | if j.intermediateBins == 0 { 223 | j.intermediateBins = 1 224 | } 225 | 226 | return splits 227 | } 228 | 229 | // NewJob creates a new job from a Mapper and Reducer. 230 | func NewJob(mapper Mapper, reducer Reducer) *Job { 231 | return &Job{ 232 | Map: mapper, 233 | Reduce: reducer, 234 | config: &config{}, 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /job_test.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestSplitInputRecord(t *testing.T) { 10 | var splitRecordTests = []struct { 11 | input string 12 | expectedKey string 13 | expectedValue string 14 | }{ 15 | {"foo\tbar", "foo", "bar"}, 16 | {"foo\tbar\tbaz", "", "foo\tbar\tbaz"}, 17 | {"foo bar baz", "", "foo bar baz"}, 18 | {"key without value\t", "key without value", ""}, 19 | {"\tvalue without key", "", "value without key"}, 20 | } 21 | 22 | for _, test := range splitRecordTests { 23 | keyVal := splitInputRecord(test.input) 24 | assert.Equal(t, test.expectedKey, keyVal.Key) 25 | assert.Equal(t, test.expectedValue, keyVal.Value) 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /lambda.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "fmt" 7 | "os" 8 | "runtime/debug" 9 | "strconv" 10 | "sync/atomic" 11 | 12 | log "github.com/sirupsen/logrus" 13 | "github.com/spf13/viper" 14 | 15 | "github.com/bcongdon/corral/internal/pkg/corfs" 16 | "github.com/bcongdon/corral/internal/pkg/coriam" 17 | "github.com/bcongdon/corral/internal/pkg/corlambda" 18 | ) 19 | 20 | var ( 21 | lambdaDriver *Driver 22 | ) 23 | 24 | // corralRoleName is the name to use when deploying an IAM role 25 | const corralRoleName = "CorralExecutionRole" 26 | 27 | // runningInLambda infers if the program is running in AWS lambda via inspection of the environment 28 | func runningInLambda() bool { 29 | expectedEnvVars := []string{"LAMBDA_TASK_ROOT", "AWS_EXECUTION_ENV", "LAMBDA_RUNTIME_DIR"} 30 | for _, envVar := range expectedEnvVars { 31 | if os.Getenv(envVar) == "" { 32 | return false 33 | } 34 | } 35 | return true 36 | } 37 | 38 | func prepareResult(job *Job) string { 39 | result := taskResult{ 40 | BytesRead: int(job.bytesRead), 41 | BytesWritten: int(job.bytesWritten), 42 | } 43 | 44 | payload, _ := json.Marshal(result) 45 | return string(payload) 46 | } 47 | 48 | func handleRequest(ctx context.Context, task task) (string, error) { 49 | // Precaution to avoid running out of memory for reused Lambdas 50 | debug.FreeOSMemory() 51 | 52 | // Setup current job 53 | fs := corfs.InitFilesystem(task.FileSystemType) 54 | currentJob := lambdaDriver.jobs[task.JobNumber] 55 | currentJob.fileSystem = fs 56 | currentJob.intermediateBins = task.IntermediateBins 57 | currentJob.outputPath = task.WorkingLocation 58 | currentJob.config.Cleanup = task.Cleanup 59 | 60 | // Need to reset job counters in case this is a reused lambda 61 | currentJob.bytesRead = 0 62 | currentJob.bytesWritten = 0 63 | 64 | if task.Phase == MapPhase { 65 | err := currentJob.runMapper(task.BinID, task.Splits) 66 | return prepareResult(currentJob), err 67 | } else if task.Phase == ReducePhase { 68 | err := currentJob.runReducer(task.BinID) 69 | return prepareResult(currentJob), err 70 | } 71 | return "", fmt.Errorf("Unknown phase: %d", task.Phase) 72 | } 73 | 74 | type lambdaExecutor struct { 75 | *corlambda.LambdaClient 76 | *coriam.IAMClient 77 | functionName string 78 | } 79 | 80 | func newLambdaExecutor(functionName string) *lambdaExecutor { 81 | return &lambdaExecutor{ 82 | LambdaClient: corlambda.NewLambdaClient(), 83 | IAMClient: coriam.NewIAMClient(), 84 | functionName: functionName, 85 | } 86 | } 87 | 88 | func loadTaskResult(payload []byte) taskResult { 89 | // Unescape JSON string 90 | payloadStr, _ := strconv.Unquote(string(payload)) 91 | 92 | var result taskResult 93 | err := json.Unmarshal([]byte(payloadStr), &result) 94 | if err != nil { 95 | log.Errorf("%s", err) 96 | } 97 | return result 98 | } 99 | 100 | func (l *lambdaExecutor) RunMapper(job *Job, jobNumber int, binID uint, inputSplits []inputSplit) error { 101 | mapTask := task{ 102 | JobNumber: jobNumber, 103 | Phase: MapPhase, 104 | BinID: binID, 105 | Splits: inputSplits, 106 | IntermediateBins: job.intermediateBins, 107 | FileSystemType: corfs.S3, 108 | WorkingLocation: job.outputPath, 109 | } 110 | payload, err := json.Marshal(mapTask) 111 | if err != nil { 112 | return err 113 | } 114 | 115 | resultPayload, err := l.Invoke(l.functionName, payload) 116 | taskResult := loadTaskResult(resultPayload) 117 | 118 | atomic.AddInt64(&job.bytesRead, int64(taskResult.BytesRead)) 119 | atomic.AddInt64(&job.bytesWritten, int64(taskResult.BytesWritten)) 120 | 121 | return err 122 | } 123 | 124 | func (l *lambdaExecutor) RunReducer(job *Job, jobNumber int, binID uint) error { 125 | mapTask := task{ 126 | JobNumber: jobNumber, 127 | Phase: ReducePhase, 128 | BinID: binID, 129 | FileSystemType: corfs.S3, 130 | WorkingLocation: job.outputPath, 131 | Cleanup: job.config.Cleanup, 132 | } 133 | payload, err := json.Marshal(mapTask) 134 | if err != nil { 135 | return err 136 | } 137 | 138 | resultPayload, err := l.Invoke(l.functionName, payload) 139 | taskResult := loadTaskResult(resultPayload) 140 | 141 | atomic.AddInt64(&job.bytesRead, int64(taskResult.BytesRead)) 142 | atomic.AddInt64(&job.bytesWritten, int64(taskResult.BytesWritten)) 143 | 144 | return err 145 | } 146 | 147 | func (l *lambdaExecutor) Deploy() { 148 | var roleARN string 149 | var err error 150 | if viper.GetBool("lambdaManageRole") { 151 | roleARN, err = l.DeployPermissions(corralRoleName) 152 | if err != nil { 153 | panic(err) 154 | } 155 | } else { 156 | roleARN = viper.GetString("lambdaRoleARN") 157 | } 158 | 159 | config := &corlambda.FunctionConfig{ 160 | Name: l.functionName, 161 | RoleARN: roleARN, 162 | Timeout: viper.GetInt64("lambdaTimeout"), 163 | MemorySize: viper.GetInt64("lambdaMemory"), 164 | } 165 | err = l.DeployFunction(config) 166 | if err != nil { 167 | panic(err) 168 | } 169 | } 170 | 171 | func (l *lambdaExecutor) Undeploy() { 172 | log.Info("Undeploying function") 173 | err := l.LambdaClient.DeleteFunction(l.functionName) 174 | if err != nil { 175 | log.Errorf("Error when undeploying function: %s", err) 176 | } 177 | 178 | log.Info("Undeploying IAM Permissions") 179 | err = l.IAMClient.DeletePermissions(corralRoleName) 180 | if err != nil { 181 | log.Errorf("Error when undeploying IAM permissions: %s", err) 182 | } 183 | } 184 | -------------------------------------------------------------------------------- /lambda_test.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | "testing" 8 | 9 | "github.com/spf13/viper" 10 | 11 | "github.com/aws/aws-sdk-go/service/lambda" 12 | "github.com/aws/aws-sdk-go/service/lambda/lambdaiface" 13 | 14 | "github.com/bcongdon/corral/internal/pkg/corfs" 15 | "github.com/bcongdon/corral/internal/pkg/corlambda" 16 | 17 | "github.com/stretchr/testify/assert" 18 | ) 19 | 20 | func TestRunningInLambda(t *testing.T) { 21 | res := runningInLambda() 22 | assert.False(t, res) 23 | 24 | for _, env := range []string{"LAMBDA_TASK_ROOT", "AWS_EXECUTION_ENV", "LAMBDA_RUNTIME_DIR"} { 25 | os.Setenv(env, "value") 26 | } 27 | 28 | res = runningInLambda() 29 | assert.True(t, res) 30 | } 31 | 32 | func TestHandleRequest(t *testing.T) { 33 | testTask := task{ 34 | JobNumber: 0, 35 | Phase: MapPhase, 36 | BinID: 0, 37 | IntermediateBins: 10, 38 | Splits: []inputSplit{}, 39 | FileSystemType: corfs.Local, 40 | WorkingLocation: ".", 41 | } 42 | 43 | job := &Job{ 44 | config: &config{}, 45 | } 46 | 47 | // These values should be reset to 0 by Lambda handler function 48 | job.bytesRead = 10 49 | job.bytesWritten = 20 50 | 51 | lambdaDriver = NewDriver(job) 52 | 53 | output, err := handleRequest(context.Background(), testTask) 54 | assert.Nil(t, err) 55 | assert.Equal(t, "{\"BytesRead\":0,\"BytesWritten\":0}", output) 56 | 57 | testTask.Phase = ReducePhase 58 | output, err = handleRequest(context.Background(), testTask) 59 | assert.Nil(t, err) 60 | assert.Equal(t, "{\"BytesRead\":0,\"BytesWritten\":0}", output) 61 | } 62 | 63 | type mockLambdaClient struct { 64 | lambdaiface.LambdaAPI 65 | capturedPayload []byte 66 | } 67 | 68 | func (m *mockLambdaClient) Invoke(input *lambda.InvokeInput) (*lambda.InvokeOutput, error) { 69 | m.capturedPayload = input.Payload 70 | return &lambda.InvokeOutput{}, nil 71 | } 72 | 73 | func (*mockLambdaClient) GetFunction(*lambda.GetFunctionInput) (*lambda.GetFunctionOutput, error) { 74 | return nil, nil 75 | } 76 | 77 | func (*mockLambdaClient) CreateFunction(*lambda.CreateFunctionInput) (*lambda.FunctionConfiguration, error) { 78 | return nil, nil 79 | } 80 | 81 | func TestRunLambdaMapper(t *testing.T) { 82 | mock := &mockLambdaClient{} 83 | executor := &lambdaExecutor{ 84 | &corlambda.LambdaClient{ 85 | Client: mock, 86 | }, 87 | nil, 88 | "FunctionName", 89 | } 90 | 91 | job := &Job{ 92 | config: &config{WorkingLocation: "."}, 93 | } 94 | err := executor.RunMapper(job, 0, 10, []inputSplit{}) 95 | assert.Nil(t, err) 96 | 97 | var taskPayload task 98 | err = json.Unmarshal(mock.capturedPayload, &taskPayload) 99 | assert.Nil(t, err) 100 | 101 | assert.Equal(t, uint(10), taskPayload.BinID) 102 | assert.Equal(t, MapPhase, taskPayload.Phase) 103 | } 104 | 105 | func TestRunLambdaReducer(t *testing.T) { 106 | mock := &mockLambdaClient{} 107 | executor := &lambdaExecutor{ 108 | &corlambda.LambdaClient{ 109 | Client: mock, 110 | }, 111 | nil, 112 | "FunctionName", 113 | } 114 | 115 | job := &Job{ 116 | config: &config{WorkingLocation: "."}, 117 | } 118 | err := executor.RunReducer(job, 0, 10) 119 | assert.Nil(t, err) 120 | 121 | var taskPayload task 122 | err = json.Unmarshal(mock.capturedPayload, &taskPayload) 123 | assert.Nil(t, err) 124 | 125 | assert.Equal(t, uint(10), taskPayload.BinID) 126 | assert.Equal(t, ReducePhase, taskPayload.Phase) 127 | } 128 | 129 | func TestDeployFunction(t *testing.T) { 130 | mock := &mockLambdaClient{} 131 | executor := &lambdaExecutor{ 132 | &corlambda.LambdaClient{ 133 | Client: mock, 134 | }, 135 | nil, 136 | "FunctionName", 137 | } 138 | 139 | viper.SetDefault("lambdaManageRole", false) // Disable testing role deployment 140 | executor.Deploy() 141 | } 142 | -------------------------------------------------------------------------------- /mapreduce.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | // ValueIterator iterates over a sequence of values. 4 | // This is used during the Reduce phase, wherein a reduce task 5 | // iterates over all values for a particular key. 6 | type ValueIterator struct { 7 | values chan string 8 | } 9 | 10 | // Iter iterates over all the values in the iterator. 11 | func (v *ValueIterator) Iter() <-chan string { 12 | return v.values 13 | } 14 | 15 | func newValueIterator(c chan string) ValueIterator { 16 | return ValueIterator{ 17 | values: c, 18 | } 19 | } 20 | 21 | // Mapper defines the interface for a Map task. 22 | type Mapper interface { 23 | Map(key, value string, emitter Emitter) 24 | } 25 | 26 | // Reducer defines the interface for a Reduce task. 27 | type Reducer interface { 28 | Reduce(key string, values ValueIterator, emitter Emitter) 29 | } 30 | 31 | // PartitionFunc defines a function that can be used to segment map keys into intermediate buckets. 32 | // The default partition function simply hashes the key, and takes hash % numBins to determine the bin. 33 | // The value returned from PartitionFunc (binIdx) must be in the range 0 <= binIdx < numBins, i.e. [0, numBins) 34 | type PartitionFunc func(key string, numBins uint) (binIdx uint) 35 | 36 | // keyValue is used to store intermediate shuffle data as key-value pairs 37 | type keyValue struct { 38 | Key string `json:"key"` 39 | Value string `json:"value"` 40 | } 41 | -------------------------------------------------------------------------------- /mapreduce_test.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestValueIterator(t *testing.T) { 10 | values := []string{"foo", "bar", "baz"} 11 | ch := make(chan string, 3) 12 | 13 | for _, val := range values { 14 | ch <- val 15 | } 16 | close(ch) 17 | 18 | iterator := newValueIterator(ch) 19 | i := 0 20 | for val := range iterator.Iter() { 21 | assert.Equal(t, values[i], val) 22 | i++ 23 | } 24 | } 25 | -------------------------------------------------------------------------------- /split.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "bufio" 5 | 6 | "github.com/bcongdon/corral/internal/pkg/corfs" 7 | humanize "github.com/dustin/go-humanize" 8 | log "github.com/sirupsen/logrus" 9 | ) 10 | 11 | // inputSplit contains the information about a contiguous chunk of an input file. 12 | // startOffset and endOffset are inclusive. For example, if the startOffset was 10 13 | // and the endOffset was 14, then the inputSplit would describe a 5 byte chunk 14 | // of the file. 15 | type inputSplit struct { 16 | Filename string // The file that the input split operates on 17 | StartOffset int64 // The starting byte index of the split in the file 18 | EndOffset int64 // The ending byte index (inclusive) of the split in the file 19 | } 20 | 21 | // Size returns the number of bytes that the inputSplit spans 22 | func (i inputSplit) Size() int64 { 23 | return i.EndOffset - i.StartOffset + 1 24 | } 25 | 26 | func min(a, b int64) int64 { 27 | if a < b { 28 | return a 29 | } 30 | return b 31 | } 32 | 33 | func splitInputFile(file corfs.FileInfo, maxSplitSize int64) []inputSplit { 34 | splits := make([]inputSplit, 0) 35 | 36 | for startOffset := int64(0); startOffset < file.Size; startOffset += maxSplitSize { 37 | endOffset := min(startOffset+maxSplitSize-1, file.Size-1) 38 | newSplit := inputSplit{ 39 | Filename: file.Name, 40 | StartOffset: startOffset, 41 | EndOffset: endOffset, 42 | } 43 | splits = append(splits, newSplit) 44 | } 45 | 46 | return splits 47 | } 48 | 49 | // inputBin is a collection of inputSplits. 50 | type inputBin struct { 51 | splits []inputSplit 52 | // The total size of the inputBin. (The sum of the size of all splits) 53 | size int64 54 | } 55 | 56 | // packInputSplits partitions inputSplits into bins. 57 | // The combined size of each bin will be no greater than maxBinSize 58 | func packInputSplits(splits []inputSplit, maxBinSize int64) [][]inputSplit { 59 | if len(splits) == 0 { 60 | return [][]inputSplit{} 61 | } 62 | 63 | bins := make([]*inputBin, 1) 64 | bins[0] = &inputBin{ 65 | splits: make([]inputSplit, 0), 66 | size: 0, 67 | } 68 | 69 | // Partition splits into bins using a naive Next-Fit packing algorithm 70 | for _, split := range splits { 71 | currBin := bins[len(bins)-1] 72 | 73 | if currBin.size+split.Size() <= maxBinSize { 74 | currBin.splits = append(currBin.splits, split) 75 | currBin.size += split.Size() 76 | } else { 77 | newBin := &inputBin{ 78 | splits: []inputSplit{split}, 79 | size: split.Size(), 80 | } 81 | bins = append(bins, newBin) 82 | } 83 | } 84 | 85 | binnedSplits := make([][]inputSplit, len(bins)) 86 | totalSize := int64(0) 87 | for i, bin := range bins { 88 | totalSize += bin.size 89 | binnedSplits[i] = bin.splits 90 | } 91 | log.Debugf("Average input bin size: %s", humanize.Bytes(uint64(totalSize/int64(len(bins))))) 92 | return binnedSplits 93 | } 94 | 95 | // countingSplitFunc wraps a bufio.SplitFunc and keeps track of the number of bytes advanced. 96 | // Upon each scan, the value of *bytesRead will be incremented by the number of bytes 97 | // that the SplitFunc advances. 98 | func countingSplitFunc(split bufio.SplitFunc, bytesRead *int64) bufio.SplitFunc { 99 | return func(data []byte, atEOF bool) (advance int, token []byte, err error) { 100 | adv, tok, err := split(data, atEOF) 101 | (*bytesRead) += int64(adv) 102 | return adv, tok, err 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /split_test.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "bufio" 5 | "bytes" 6 | "fmt" 7 | "testing" 8 | 9 | "github.com/bcongdon/corral/internal/pkg/corfs" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestPackInputSplits(t *testing.T) { 14 | var packingTests = []struct { 15 | splitSizes []int 16 | maxBinSize int64 17 | }{ 18 | {[]int{}, 0}, 19 | {[]int{1, 2, 3}, 3}, 20 | {[]int{3, 3, 1, 2, 3}, 3}, 21 | } 22 | 23 | for _, test := range packingTests { 24 | splits := make([]inputSplit, len(test.splitSizes)) 25 | for i, size := range test.splitSizes { 26 | splits[i] = inputSplit{ 27 | StartOffset: 0, 28 | EndOffset: int64(size) - 1, 29 | } 30 | } 31 | 32 | splitsSeen := 0 33 | bins := packInputSplits(splits, test.maxBinSize) 34 | for _, bin := range bins { 35 | binSize := int64(0) 36 | splitsSeen += len(bin) 37 | for _, split := range bin { 38 | binSize += split.Size() 39 | } 40 | assert.True(t, binSize <= test.maxBinSize) 41 | } 42 | 43 | // Make sure that all splits have been put in exactly 1 bin 44 | assert.Equal(t, len(test.splitSizes), splitsSeen) 45 | } 46 | } 47 | 48 | func TestCalculateInputSplits(t *testing.T) { 49 | var calculateSplitTests = []struct { 50 | fileSize int64 51 | maxSplitSize int64 52 | expectedSplitStarts []int64 53 | expectedSplitEnds []int64 54 | }{ 55 | {3, 3, []int64{0}, []int64{2}}, 56 | {10, 3, []int64{0, 3, 6, 9}, []int64{2, 5, 8, 9}}, 57 | {5, 10, []int64{0}, []int64{4}}, 58 | } 59 | 60 | for _, test := range calculateSplitTests { 61 | fInfo := corfs.FileInfo{ 62 | Size: test.fileSize, 63 | } 64 | 65 | splits := splitInputFile(fInfo, test.maxSplitSize) 66 | 67 | assert.Equal(t, len(test.expectedSplitStarts), len(splits), fmt.Sprintln(splits)) 68 | for i, split := range splits { 69 | assert.Equal(t, test.expectedSplitStarts[i], split.StartOffset) 70 | assert.Equal(t, test.expectedSplitEnds[i], split.EndOffset) 71 | } 72 | } 73 | } 74 | 75 | func TestSplitSize(t *testing.T) { 76 | var splitSizeTests = []struct { 77 | startOffset int64 78 | endOffset int64 79 | expectedSize int64 80 | }{ 81 | {0, 9, 10}, 82 | {100, 999, 900}, 83 | {1000, 1000, 1}, 84 | } 85 | 86 | for _, test := range splitSizeTests { 87 | split := inputSplit{ 88 | StartOffset: test.startOffset, 89 | EndOffset: test.endOffset, 90 | } 91 | assert.Equal(t, test.expectedSize, split.Size()) 92 | } 93 | } 94 | 95 | func TestCountingSplitFunc(t *testing.T) { 96 | var bytesRead int64 97 | splitFunc := countingSplitFunc(bufio.ScanLines, &bytesRead) 98 | 99 | buf := new(bytes.Buffer) 100 | buf.Write([]byte("foo\n123456\na")) 101 | 102 | scanner := bufio.NewScanner(buf) 103 | scanner.Split(splitFunc) 104 | 105 | assert.Equal(t, int64(0), bytesRead) 106 | 107 | scanner.Scan() 108 | assert.Equal(t, int64(4), bytesRead) 109 | assert.Equal(t, "foo", scanner.Text()) 110 | 111 | scanner.Scan() 112 | assert.Equal(t, int64(4+7), bytesRead) 113 | assert.Equal(t, "123456", scanner.Text()) 114 | 115 | scanner.Scan() 116 | assert.Equal(t, int64(4+7+1), bytesRead) 117 | assert.Equal(t, "a", scanner.Text()) 118 | } 119 | -------------------------------------------------------------------------------- /task.go: -------------------------------------------------------------------------------- 1 | package corral 2 | 3 | import ( 4 | "github.com/bcongdon/corral/internal/pkg/corfs" 5 | ) 6 | 7 | // Phase is a descriptor of the phase (i.e. Map or Reduce) of a Job 8 | type Phase int 9 | 10 | // Descriptors of the Job phase 11 | const ( 12 | MapPhase Phase = iota 13 | ReducePhase 14 | ) 15 | 16 | // task defines a serialized description of a single unit of work 17 | // in a MapReduce job, as well as the necessary information for a 18 | // remote executor to initialize itself and begin working. 19 | type task struct { 20 | JobNumber int 21 | Phase Phase 22 | BinID uint 23 | IntermediateBins uint 24 | Splits []inputSplit 25 | FileSystemType corfs.FileSystemType 26 | WorkingLocation string 27 | Cleanup bool 28 | } 29 | 30 | type taskResult struct { 31 | BytesRead int 32 | BytesWritten int 33 | } 34 | --------------------------------------------------------------------------------