├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── eval
    ├── evalresult.py
    ├── runtask.sh
    ├── test.sh
    └── testblob.py
├── pom.xml
└── src
    ├── main
        └── java
        │   └── com
        │       └── twitter
        │           └── crunch
        │               ├── AssignmentTracker.java
        │               ├── AssignmentTrackerFactory.java
        │               ├── AssignmentTrackerImpl.java
        │               ├── BaseRackIsolationPlacementRules.java
        │               ├── CRUSHPlacementAlgorithm.java
        │               ├── ConsistentHashingSelector.java
        │               ├── Crunch.java
        │               ├── JenkinsHash.java
        │               ├── MappingDiff.java
        │               ├── MappingEvaluator.java
        │               ├── MappingException.java
        │               ├── MappingFunction.java
        │               ├── MessageDigestHash.java
        │               ├── MultiInputHash.java
        │               ├── NoOpAssignmentTracker.java
        │               ├── Node.java
        │               ├── NodeFilter.java
        │               ├── PlacementAlgorithm.java
        │               ├── PlacementRules.java
        │               ├── ProbingRDFMapping.java
        │               ├── RDFCRUSHMapping.java
        │               ├── RDFMapping.java
        │               ├── RackBasedTypes.java
        │               ├── RackIsolationPlacementRules.java
        │               ├── RandomSelector.java
        │               ├── Selector.java
        │               ├── SimpleCRUSHMapping.java
        │               ├── StableRdfMapping.java
        │               ├── StorageSystemTypes.java
        │               ├── StrawSelector.java
        │               ├── Types.java
        │               └── Utils.java
    └── test
        ├── java
            └── com
            │   └── twitter
            │       └── crunch
            │           ├── AssignmentTrackerImplTest.java
            │           ├── BaseSelectionTest.java
            │           ├── ConsistentHashingSelectionTest.java
            │           ├── CrunchTest.java
            │           ├── CrunchTestSuite.java
            │           ├── MappingDiffTest.java
            │           ├── MessageDigestHashTest.java
            │           ├── NodeTest.java
            │           ├── RandomSelectionTest.java
            │           ├── StrawSelectionTest.java
            │           ├── TestUtils.java
            │           ├── Topology.java
            │           ├── integrated
            │               ├── RDFBalanceTest.java
            │               ├── RDFStabilityTest.java
            │               └── SiblingBiasTest.java
            │           └── tools
            │               ├── CalculateMovement.java
            │               ├── CreateBlobstoreMapping.java
            │               ├── CreateDataMapping.java
            │               ├── EvaluateMapping.java
            │               ├── EvaluateRDFMapping.java
            │               ├── InvalidTopologyException.java
            │               ├── TopologyGenerator.java
            │               ├── Utils.java
            │               ├── YamlTopologyFactory.java
            │               └── jsontopology
            │                   ├── JsonTopology.java
            │                   ├── JsonTopologyDeserializer.java
            │                   ├── JsonTopologySerializer.java
            │                   ├── MappingParameters.java
            │                   ├── Topology.java
            │                   ├── TopologyDeserializer.java
            │                   └── TopologySerializer.java
        └── resources
            ├── logback-test.xml
            ├── mapping.txt
            ├── topology.json
            └── topology.template.yaml


/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea/
 2 | *.iml
 3 | *.log
 4 | target/
 5 | .settings
 6 | .metadata/
 7 | .cache
 8 | .classpath
 9 | .classpath.txt
10 | .project
11 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: java
2 | 


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | == HEAD
2 | 
3 | == 1.0.0 (June 19, 2013)
4 | 
5 | * Initial public release.
6 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # Contributing to Libcrunch
 2 | 
 3 | Looking to contribute something to libcrunch? Here's how you can help.
 4 | 
 5 | ## Bugs reports
 6 | 
 7 | A bug is a _demonstrable problem_ that is caused by the code in the
 8 | repository. Good bug reports are extremely helpful - thank you!
 9 | 
10 | Guidelines for bug reports:
11 | 
12 | 1. **Use the GitHub issue search** &mdash; check if the issue has already been
13 |    reported.
14 | 
15 | 2. **Check if the issue has been fixed** &mdash; try to reproduce it using the
16 |    latest `master` or development branch in the repository.
17 | 
18 | 3. **Isolate the problem** &mdash; ideally create a reduced test
19 |    case and a live example.
20 | 
21 | 4. Please try to be as detailed as possible in your report. Include specific
22 |    information about the environment - operating system and version, java
23 |    and version, version of libcrunch - and steps required to reproduce the issue.
24 | 
25 | 
26 | ## Feature requests & contribution enquiries
27 | 
28 | Feature requests are welcome. But take a moment to find out whether your idea
29 | fits with the scope and aims of the project. It's up to *you* to make a strong
30 | case for the inclusion of your feature. Please provide as much detail and
31 | context as possible.
32 | 
33 | Contribution enquiries should take place before any significant pull request,
34 | otherwise you risk spending a lot of time working on something that we might
35 | have good reasons for rejecting.
36 | 
37 | 
38 | ## Pull requests
39 | 
40 | Good pull requests - patches, improvements, new features - are a fantastic
41 | help. They should remain focused in scope and avoid containing unrelated
42 | commits.
43 | 
44 | Make sure to adhere to the coding conventions used throughout the codebase
45 | (indentation, accurate comments, etc.) and any other requirements (such as test
46 | coverage).
47 | 
48 | Please follow this process; it's the best way to get your work included in the
49 | project:
50 | 
51 | 1. Create a new topic branch to contain your feature, change, or fix:
52 | 
53 | 2. Commit your changes in logical chunks. Provide clear and explanatory commit
54 |    messages. Use git's [interactive rebase](https://help.github.com/articles/interactive-rebase)
55 |    feature to tidy up your commits before making them public.
56 | 
57 | 3. Locally merge (or rebase) the upstream development branch into your topic branch:
58 | 
59 | 4. Push your topic branch up to your fork:
60 | 
61 | 5. [Open a Pull Request](http://help.github.com/send-pull-requests/) with a
62 |    clear title and description.
63 | 
64 | ## License
65 | 
66 | By contributing your code,
67 | 
68 | You agree to license your contribution under the terms of the Apache Public License 2.0
69 | https://github.com/twitter/libcrunch/blob/master/LICENSE


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # STATUS
 2 | 
 3 | Twitter is no longer maintaining this project or responding to issues or PRs.
 4 | 
 5 | # libcrunch [![Build Status](https://travis-ci.org/twitter/libcrunch.png?branch=master)](https://travis-ci.org/twitter/libcrunch)
 6 | Libcrunch is a lightweight mapping framework that maps data objects to a number of nodes, subject to user-specified constraints.
 7 | 
 8 | The libcrunch implementation was heavily inspired by the paper on the [CRUSH algorithm](http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf).
 9 | 
10 | ## Features
11 | * flexible cluster topology definition
12 | * define your placement rules
13 | * supports replication factor (RF) and replica distribution factor (RDF)
14 | * balanced distribution of data that reflects weights
15 | * stability against topology changes
16 | * supports target balancing
17 | 
18 | ## Getting Started
19 | The latest libcrunch artifacts are published to maven central. You can include libcrunch in your project by adding the following to your maven pom.xml file:
20 | 
21 | ```xml
22 |   <dependencies>
23 |     <dependency>
24 |       <groupId>com.twitter</groupId>
25 |       <artifactId>libcrunch</artifactId>
26 |       <version>1.0.0</version>
27 |     </dependency>
28 |   </dependencies>
29 | ```
30 | 
31 | ### Quickstart
32 | Creating and using the libcrunch mapping function is pretty straightforward. Once you define your data and the inputs to the mapping function, you get the mapping result via the computeMapping method. For example, to use the RDF mapping,
33 | 
34 | ```java
35 | // set up the input to the mapping function
36 | PlacementRules rules = createPlacementRules();
37 | 
38 | // instantiate the mapping function
39 | MappingFunction mappingFunction = new RDFMapping(rdf, rf, rules, targetBalance);
40 | 
41 | // prepare your data
42 | List<Long> data = prepareYourDataIds();
43 | // set up the topology
44 | Node root = createTopology();
45 | 
46 | // compute the mapping
47 | Map<Long,List<Node>> mapping = mappingFunction.computeMapping(data, root);
48 | ```
49 | 
50 | ## Problems?
51 | 
52 | If you find any issues please [report them](https://github.com/twitter/libcrunch/issues) or better,
53 | send a [pull request](https://github.com/twitter/libcrunch/pulls).
54 | 
55 | ## Authors:
56 | * Jerry Xu
57 | * Peter Schuller
58 | * Sangjin Lee
59 | 
60 | ## License
61 | Copyright 2013 Twitter, Inc.
62 | 
63 | Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0
64 | 


--------------------------------------------------------------------------------
/eval/evalresult.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Copyright 2013 Twitter, Inc.
 3 | Licensed under the Apache License, Version 2.0 (the "License");
 4 | you may not use this file except in compliance with the License.
 5 | You may obtain a copy of the License at
 6 | 
 7 | http://www.apache.org/licenses/LICENSE-2.0
 8 | 
 9 | Unless required by applicable law or agreed to in writing, software
10 | distributed under the License is distributed on an "AS IS" BASIS,
11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 | See the License for the specific language governing permissions and
13 | limitations under the License.
14 | """
15 | 
16 | import argparse
17 | 
18 | import glob
19 | import csv
20 | import re
21 | 
22 | 
23 | def evaluateMappings(result_files, start_point, print_count, output_filename):
24 |     output = ""
25 |     for result_file in result_files:
26 |         m = re.match(r'.*/rdf-(.*)-rd-(.*)-tb-(.*)\.csv', result_file)
27 |         if not m:
28 |             print "Cannot parse " + result_file
29 | 
30 |         with open(result_file, 'r') as result:
31 |             result_reader = csv.reader(result, delimiter=',')
32 |             skip = 0
33 |             while skip < start_point:
34 |                 result_reader.next()
35 |                 skip += 1
36 |             moves = 0
37 |             std = 0
38 |             c = 0
39 |             for row in result_reader:
40 |                 moves += int(row[5])
41 |                 std += float(row[3])
42 |                 c += 1
43 |             if print_count:
44 |                 print c
45 | 
46 |             output += m.group(1) + "," + m.group(2) + "," + m.group(3) + "," + str(moves) + "," + str(std) + "\n"
47 | 
48 |     f = open(output_filename, 'w')
49 |     f.write(output)
50 |     f.close()
51 | 
52 | 
53 | def main():
54 |     # parse the commandline arguments
55 |     parser = argparse.ArgumentParser(description='Evaluate mapping files for topologies from Blobstore')
56 |     parser.add_argument("-t", dest='result_path', type=str, required=True, help='path for the result files')
57 |     parser.add_argument("-o", dest='output_filename', type=str, required=True, help='output file name')
58 |     parser.add_argument("-s", dest='start_point', type=int, required=False, default=1, help='starting point for the calculation')
59 |     parser.add_argument("-c", dest='print_count', action="store_true", required=False, default=False, help='print count')
60 | 
61 |     args = parser.parse_args()
62 | 
63 |     # read topology files
64 |     result_files = glob.glob(args.result_path + "/*.csv")
65 | 
66 |     evaluateMappings(sorted(result_files), args.start_point, args.print_count, args.output_filename)
67 | 
68 | 
69 | if __name__ == '__main__':
70 |     main()
71 | 


--------------------------------------------------------------------------------
/eval/runtask.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | java -Dfile.encoding=UTF-8 -Xms64m -Xmx512m -classpath \
 4 | ../target/test-classes:\
 5 | ../target/classes:\
 6 | $HOME/.m2/repository/com/google/guava/guava/12.0.1/guava-12.0.1.jar:\
 7 | $HOME/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:\
 8 | $HOME/.m2/repository/junit/junit/4.10/junit-4.10.jar:\
 9 | $HOME/.m2/repository/org/hamcrest/hamcrest-core/1.1/hamcrest-core-1.1.jar:\
10 | $HOME/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.4/jackson-mapper-asl-1.9.4.jar:\
11 | $HOME/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.4/jackson-core-asl-1.9.4.jar:\
12 | $HOME/.m2/repository/log4j/log4j/1.2.16/log4j-1.2.16.jar:\
13 | $HOME/.m2/repository/org/slf4j/slf4j-api/1.6.4/slf4j-api-1.6.4.jar:\
14 | $HOME/.m2/repository/org/mockito/mockito-core/1.9.0/mockito-core-1.9.0.jar:\
15 | $HOME/.m2/repository/org/objenesis/objenesis/1.0/objenesis-1.0.jar:\
16 | $HOME/.m2/repository/ch/qos/logback/logback-core/1.0.1/logback-core-1.0.1.jar:\
17 | $HOME/.m2/repository/org/yaml/snakeyaml/1.10/snakeyaml-1.10.jar:\
18 | $HOME/.m2/repository/ch/qos/logback/logback-classic/1.0.1/logback-classic-1.0.1.jar \
19 | com.twitter.crunch.tools.$@
20 | 


--------------------------------------------------------------------------------
/eval/test.sh:
--------------------------------------------------------------------------------
 1 | template=$1
 2 | node_count_start=$2
 3 | node_count_inc=$3
 4 | topology_count=$4
 5 | node_weight=$5
 6 | 
 7 | let node_count_max=node_count_start+node_count_inc*topology_count
 8 | 
 9 | echo Generating the topology files...
10 | for i in $(seq $node_count_start $node_count_inc $node_count_max)
11 | do
12 |   	bash runtask.sh TopologyGenerator $template $i $node_weight topology${i}.yaml
13 | done
14 | 
15 | echo Creating mappings...
16 | for i in $(seq 1 2 3)
17 | do 
18 | 	for j in $(seq $node_count_start $node_count_inc $node_count_max)
19 | 	do
20 | 		let k=j-node_count_inc
21 | 		bash runtask.sh CreateDataMapping yaml ${i} topology${j}.yaml map${i}${j}.csv rdfmap${k}.csv rdfmap${j}.csv
22 | 	done
23 | done
24 | 
25 | echo Evaluate mappings...
26 | for i in $(seq 1 2 3)
27 | do 
28 | 	echo version ${i}:
29 | 	for j in $(seq $node_count_start $node_count_inc $node_count_max)
30 | 	do
31 | 		echo -n $topology${j}.yaml,
32 | 		let k=j-node_count_inc
33 | 		bash runtask.sh EvaluateMapping yaml topology${j}.yaml map${i}${j}.csv
34 | 		echo -n ,
35 | 		if [ -e map${i}${k}.csv ]
36 | 		then
37 | 			bash runtask.sh CalculateMovement map${i}${k}.csv map${i}${j}.csv
38 | 		else
39 | 			echo -n ,,
40 | 		fi
41 | 		echo
42 | 	done
43 | done
44 | 


--------------------------------------------------------------------------------
/eval/testblob.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright 2013 Twitter, Inc.
  3 | Licensed under the Apache License, Version 2.0 (the "License");
  4 | you may not use this file except in compliance with the License.
  5 | You may obtain a copy of the License at
  6 | 
  7 | http://www.apache.org/licenses/LICENSE-2.0
  8 | 
  9 | Unless required by applicable law or agreed to in writing, software
 10 | distributed under the License is distributed on an "AS IS" BASIS,
 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 | See the License for the specific language governing permissions and
 13 | limitations under the License.
 14 | """
 15 | 
 16 | import argparse
 17 | import glob
 18 | import os
 19 | import logging
 20 | import subprocess
 21 | import shutil
 22 | 
 23 | 
 24 | def runProcess(exe):
 25 |     logging.info(exe)
 26 |     return subprocess.check_output(exe)
 27 | 
 28 | 
 29 | def evaluateMapping(versions, topology_files, output_dir):
 30 |     output = ""
 31 |     for version in versions:
 32 |         for i, topology_file in enumerate(topology_files):
 33 |             topology_file_name = os.path.basename(topology_file)
 34 |             map_file = output_dir + "/map-" + str(version) + "-" + topology_file_name
 35 |             
 36 |             rdf_file_new = output_dir + "/rdfmap-" + topology_file_name
 37 | 
 38 |             eval_command_line = "EvaluateMapping json " + topology_file + " " + map_file + " " + rdf_file_new
 39 |             print eval_command_line
 40 |             map_output = runProcess(['./runtask.sh', eval_command_line])
 41 | 
 42 |             if i != 0:
 43 |                 old_map_file = output_dir + "/map-" + str(version) + "-" + os.path.basename(topology_files[i-1])
 44 |                 calc_command_line = "CalculateMovement " + " " + old_map_file + " " + map_file
 45 |                 map_output = map_output + "," + runProcess(['./runtask.sh', calc_command_line])
 46 | 
 47 |             map_output += '\n'
 48 |             print map_output
 49 |             output += map_output
 50 | 
 51 |     return output
 52 | 
 53 | 
 54 | def generateMapping(versions, topology_files, output_dir, rdf, target_balance, rack_diversity, track_capacity):
 55 |     for version in versions:
 56 |         for i, topology_file in enumerate(topology_files):
 57 |             topology_file_name = os.path.basename(topology_file)
 58 |             params_file = os.path.dirname(topology_file) + "/" + "params_" + topology_file_name
 59 |             map_file = output_dir + "/map-" + str(version) + "-" + topology_file_name
 60 |             rdf_file_new = output_dir + "/rdfmap-" + topology_file_name
 61 |             command_line = "CreateBlobstoreMapping " + str(version) + " " + topology_file + " " + params_file
 62 |             command_line = command_line + " " + map_file + " " + str(rdf) + " " + str(target_balance)
 63 | 
 64 |             if i != 0:
 65 |                 rdf_file_old = output_dir + "/rdfmap-" + os.path.basename(topology_files[i-1])
 66 |             else:
 67 |                 rdf_file_old = "null"
 68 | 
 69 |             command_line = command_line + " " + str(rack_diversity) + " " + track_capacity
 70 |             command_line = command_line + " " + rdf_file_new + " " + rdf_file_old
 71 | 
 72 |             logging.info(command_line)
 73 |             print command_line
 74 |             subprocess.call(['./runtask.sh', command_line])
 75 | 
 76 |             if not os.path.isfile(map_file):
 77 |                 return False
 78 |     return True
 79 | 
 80 | 
 81 | def compareMappings(topology_files, evaluate, output_dir):
 82 |     rdf_min = 8
 83 |     rdf_max = 88
 84 |     tb_min = 0.05
 85 |     tb_max = 0.15
 86 |     rd_min = 3
 87 |     rd_max = 8
 88 |     rdf = rdf_min
 89 |     while rdf <= rdf_max:
 90 |         rd = rd_min
 91 |         rdf_rd = int(rdf/rd) + 1
 92 |         while rd <= rd_max:
 93 |             if ((int(rdf/rd) + 1)  == rdf_rd  and rd != rd_min):
 94 |               rd += 1
 95 |               continue
 96 |             else:
 97 |               rdf_rd = int(rdf/rd) + 1
 98 |             tb = tb_min
 99 |             while tb <= tb_max:
100 |                 scenario_name = output_dir + "/" + "rdf-" + str(rdf) + "-rd-" + str(rd) + "-tb-" + str(tb)
101 |                 if (evaluate):
102 |                     print "Evaluating mappings " + scenario_name
103 |                     output = evaluateMapping("3", topology_files, scenario_name)
104 |                     f = open(scenario_name + ".csv", 'w')
105 |                     f.write(output)
106 |                     f.close()
107 |                 else:
108 |                     if not os.path.exists(scenario_name):
109 |                         os.makedirs(scenario_name)
110 |                     print "Generating mappings " + scenario_name
111 |                     result = generateMapping("3", topology_files, scenario_name, rdf, tb, rdf_rd, "false")
112 |                     if not result:
113 |                         print "Failed to converge on scenario: " + scenario_name
114 |                         shutil.rmtree(scenario_name)
115 |                 tb += 0.02
116 |             rd += 1
117 |         rdf += 8
118 | 
119 | 
120 | def main():
121 |     # parse the commandline arguments
122 |     parser = argparse.ArgumentParser(description='Generate mapping files for topologies from Blobstore')
123 |     parser.add_argument("-t", dest='topology_path', type=str, required=True, help='path for the topology files')
124 |     parser.add_argument("-o", dest='output_dir', type=str, default="./", required=False, help='output location')
125 | 
126 |     parser.add_argument("-s", dest='single_mapping', action="store_true", default=False, required=False, help='calculate single map')
127 |     parser.add_argument("-v", dest='algo_version', type=int, default=3, required=False, help='version of algorithm')
128 |     parser.add_argument("-r", dest='rack_diversity', type=str, default="8", required=False, help='rack diversity')
129 |     parser.add_argument("-c", dest='track_capacity', type=str, default="false", required=False, help='track replica capacity')
130 |     parser.add_argument("-b", dest='target_balance', type=str, default="0.25", required=False, help='target balance')
131 |     parser.add_argument("-d", dest='rdf', type=str, default="10", required=False, help='rdf')
132 | 
133 |     parser.add_argument("-g", dest='skip_generate', action="store_true", default=False, required=False, help='skip generating maps')
134 |     parser.add_argument("-e", dest='skip_evaluate', action="store_true", default=False, required=False, help='skip evaluating maps')
135 |     args = parser.parse_args()
136 | 
137 |     logging.basicConfig(filename="testblob.log", level=logging.INFO)
138 | 
139 |     # read topology files
140 |     topology_files = glob.glob(args.topology_path + "/topology_*")
141 | 
142 |     if not args.skip_generate:
143 |         print "Generating mappings..."
144 |         if args.single_mapping:
145 |             versions = [args.algo_version]
146 |             generateMapping(versions, sorted(topology_files), args.output_dir, args.rdf, args.target_balance, args.rack_diversity, args.track_capacity)
147 |         else:
148 |             compareMappings(sorted(topology_files), False, args.output_dir)
149 |     if not args.skip_evaluate:
150 |         print "Evaluate mappings..."
151 |         if args.single_mapping:
152 |             versions = [args.algo_version]
153 |             output = evaluateMapping(versions, sorted(topology_files), args.output_dir)
154 |             f = open(args.output_dir + "/result.csv", 'w')
155 |             f.write(output)
156 |             f.close()
157 |         else:
158 |             compareMappings(sorted(topology_files), True)
159 | 
160 | 
161 | if __name__ == '__main__':
162 |     main()
163 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  2 |   <modelVersion>4.0.0</modelVersion>
  3 |   <groupId>com.twitter</groupId>
  4 |   <artifactId>libcrunch</artifactId>
  5 |   <name>libcrunch</name>
  6 |   <version>1.0.4-SNAPSHOT</version>
  7 |   <url>http://github.com/twitter/libcrunch</url>
  8 |   <description>A lightweight mapping framework that maps data objects to a number of nodes, subject to constraints</description>
  9 | 
 10 |   <dependencies>
 11 |     <dependency>
 12 |       <groupId>com.google.guava</groupId>
 13 |       <artifactId>guava</artifactId>
 14 |       <version>12.0.1</version>
 15 |       <scope>compile</scope>
 16 |     </dependency>
 17 |     <dependency>
 18 |       <groupId>junit</groupId>
 19 |       <artifactId>junit</artifactId>
 20 |       <version>4.10</version>
 21 |       <scope>test</scope>
 22 |     </dependency>
 23 |     <dependency>
 24 |       <groupId>org.codehaus.jackson</groupId>
 25 |       <artifactId>jackson-mapper-asl</artifactId>
 26 |       <version>1.9.4</version>
 27 |       <scope>test</scope>
 28 |     </dependency>
 29 |     <dependency>
 30 |       <groupId>org.codehaus.jackson</groupId>
 31 |       <artifactId>jackson-core-asl</artifactId>
 32 |       <version>1.9.4</version>
 33 |       <scope>test</scope>
 34 |     </dependency>
 35 |     <dependency>
 36 |       <groupId>org.slf4j</groupId>
 37 |       <artifactId>slf4j-api</artifactId>
 38 |       <version>1.6.4</version>
 39 |     </dependency>
 40 |     <dependency>
 41 |       <groupId>org.mockito</groupId>
 42 |       <artifactId>mockito-core</artifactId>
 43 |       <version>1.9.0</version>
 44 |       <scope>test</scope>
 45 |     </dependency>
 46 |     <dependency>
 47 |       <groupId>ch.qos.logback</groupId>
 48 |       <artifactId>logback-core</artifactId>
 49 |       <version>1.0.1</version>
 50 |       <scope>test</scope>
 51 |     </dependency>
 52 |     <dependency>
 53 |       <groupId>ch.qos.logback</groupId>
 54 |       <artifactId>logback-classic</artifactId>
 55 |       <version>1.0.1</version>
 56 |       <scope>test</scope>
 57 |     </dependency>
 58 |     <dependency>
 59 |       <groupId>org.yaml</groupId>
 60 |       <artifactId>snakeyaml</artifactId>
 61 |       <version>1.10</version>
 62 |       <scope>test</scope>
 63 |     </dependency>
 64 |   </dependencies>
 65 | 
 66 |   <scm>
 67 |     <connection>scm:git:git@github.com:twitter/libcrunch.git</connection>
 68 |     <url>scm:git:git@github.com:twitter/libcrunch.git</url>
 69 |     <developerConnection>scm:git:git@github.com:twitter/libcrunch.git</developerConnection>
 70 |   </scm>
 71 | 
 72 |   <licenses>
 73 |     <license>
 74 |       <name>The Apache Software License, Version 2.0</name>
 75 |       <url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
 76 |     </license>
 77 |   </licenses>
 78 | 
 79 |   <developers>
 80 |     <developer>
 81 |       <name>Sangjin Lee</name>
 82 |       <email>sjlee@twitter.com</email>
 83 |     </developer>
 84 |     <developer>
 85 |       <name>Jerry Xu</name>
 86 |       <email>jxu@twitter.com</email>
 87 |     </developer>
 88 |   </developers>
 89 | 
 90 | 
 91 |   <distributionManagement>
 92 |     <snapshotRepository>
 93 |       <id>sonatype-nexus-snapshots</id>
 94 |       <name>Sonatype OSS</name>
 95 |       <url>https://oss.sonatype.org/content/repositories/snapshots</url>
 96 |     </snapshotRepository>
 97 |     <repository>
 98 |       <id>sonatype-nexus-staging</id>
 99 |       <name>Nexus Release Repository</name>
100 |       <url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
101 |     </repository>
102 |   </distributionManagement>
103 | 
104 |   <repositories>
105 |     <repository>
106 |       <id>sonatype-nexus-snapshots</id>
107 |       <url>https://oss.sonatype.org/content/repositories/snapshots</url>
108 |       <releases>
109 |         <enabled>false</enabled>
110 |       </releases>
111 |       <snapshots>
112 |         <enabled>true</enabled>
113 |       </snapshots>
114 |     </repository>
115 |   </repositories>
116 | 
117 |   <properties>
118 |     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
119 |   </properties>
120 | 
121 |   <build>
122 |     <pluginManagement>
123 |       <plugins>
124 |         <plugin>
125 |           <groupId>org.apache.maven.plugins</groupId>
126 |           <artifactId>maven-release-plugin</artifactId>
127 |           <version>2.1</version>
128 |           <configuration>
129 |             <mavenExecutorId>forked-path</mavenExecutorId>
130 |             <useReleaseProfile>false</useReleaseProfile>
131 |             <arguments>-Psonatype-oss-release</arguments>
132 |           </configuration>
133 |         </plugin>
134 |       </plugins>
135 |     </pluginManagement>
136 |     <plugins>
137 |       <plugin>
138 |         <artifactId>maven-compiler-plugin</artifactId>
139 |         <version>2.5.1</version>
140 |         <configuration>
141 |           <source>1.6</source>
142 |           <target>1.6</target>
143 |           <showDeprecation>true</showDeprecation>
144 |           <showWarnings>true</showWarnings>
145 |           <fork>true</fork>
146 |         </configuration>
147 |       </plugin>
148 |       <plugin>
149 |         <groupId>org.apache.maven.plugins</groupId>
150 |         <artifactId>maven-surefire-plugin</artifactId>
151 |         <version>2.12</version>
152 |         <configuration>
153 |           <argLine>-Xmx1024m</argLine>
154 |           <redirectTestOutputToFile>false</redirectTestOutputToFile>
155 |           <includes>
156 |             <include>**/CrunchTestSuite.java</include>
157 |           </includes>
158 |         </configuration>
159 |       </plugin>
160 |       <plugin>
161 |         <groupId>org.apache.maven.plugins</groupId>
162 |         <artifactId>maven-source-plugin</artifactId>
163 |         <version>2.2</version>
164 |         <executions>
165 |           <execution>
166 |             <id>attach-sources</id>
167 |             <goals>
168 |               <goal>jar</goal>
169 |             </goals>
170 |           </execution>
171 |         </executions>
172 |       </plugin>
173 |     </plugins>
174 |   </build>
175 |   <profiles>
176 |     <profile>
177 |       <id>sonatype-oss-release</id>
178 |       <build>
179 |         <plugins>
180 |           <plugin>
181 |             <groupId>org.apache.maven.plugins</groupId>
182 |             <artifactId>maven-source-plugin</artifactId>
183 |             <version>2.1.2</version>
184 |             <executions>
185 |               <execution>
186 |                 <id>attach-sources</id>
187 |                 <goals>
188 |                   <goal>jar-no-fork</goal>
189 |                 </goals>
190 |               </execution>
191 |             </executions>
192 |           </plugin>
193 |           <plugin>
194 |             <groupId>org.apache.maven.plugins</groupId>
195 |             <artifactId>maven-javadoc-plugin</artifactId>
196 |             <version>2.7</version>
197 |             <executions>
198 |               <execution>
199 |                 <id>attach-javadocs</id>
200 |                 <goals>
201 |                   <goal>jar</goal>
202 |                 </goals>
203 |               </execution>
204 |             </executions>
205 |           </plugin>
206 |           <plugin>
207 |             <groupId>org.apache.maven.plugins</groupId>
208 |             <artifactId>maven-gpg-plugin</artifactId>
209 |             <version>1.1</version>
210 |             <executions>
211 |               <execution>
212 |                 <id>sign-artifacts</id>
213 |                 <phase>verify</phase>
214 |                 <goals>
215 |                   <goal>sign</goal>
216 |                 </goals>
217 |               </execution>
218 |             </executions>
219 |           </plugin>
220 |         </plugins>
221 |       </build>
222 |     </profile>
223 |   </profiles>
224 | </project>
225 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/AssignmentTracker.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Tracker that keeps track of data assignment during the course of mapping generation, and rejects
20 |  * assignments based on the target balance parameter.
21 |  * <br/>
22 |  * It is important to note that this keeps track of the assignment status, and therefore is
23 |  * stateful. One object needs to be created and retained for the duration of the mapping generation.
24 |  */
25 | interface AssignmentTracker {
26 |   /**
27 |    * Tracks assignment of this particular node. Assignment tracking happens essentially with the
28 |    * leaf nodes. When a leaf node is positively selected, the assignment of the leaf node is
29 |    * recorded, and any parent node whose type assignment is being tracked for is also tracked at
30 |    * that point.
31 |    *
32 |    * @return whether the particular node is tracked directly.
33 |    */
34 |   boolean trackAssignment(Node node);
35 | 
36 |   /**
37 |    * Returns whether the node should be rejected due to high assignment against the target balance.
38 |    * The determination of whether to reject it is a function of the current data assignment level of
39 |    * the node. The exact nature of how the selection is rejected is an implementation detail. The
40 |    * only guaranteed behavior is the node will be rejected 100% of the time if it reaches the
41 |    * assignment level specified by the target balance.
42 |    *
43 |    */
44 |   boolean rejectAssignment(Node node);
45 | }
46 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/AssignmentTrackerFactory.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Factory class that provides a single static factory method to create an assignment tracker
20 |  * instance.
21 |  */
22 | class AssignmentTrackerFactory {
23 |   /**
24 |    * Factory method that creates an assignment tracker instance. If target balance is not a positive
25 |    * number, a no-op instance will be returned.
26 |    *
27 |    * @param rootNode the root node under which nodes will have assignments tracked
28 |    * @param dataSize the size of the data objects; this is used to come up with the mean and max
29 |    * assignments
30 |    * @param targetBalance the expected target balance in relative percentages; e.g. 0.3 (30%). It
31 |    * means that this target will be used to control and curb over-assignment to nodes. Note that
32 |    * this is a target, and some small over-assignment may still occur if it becomes difficult to
33 |    * meet this target. Must be positive.
34 |    * @return newly created assignment tracker instance
35 |    */
36 |   public static AssignmentTracker create(Node rootNode, int dataSize, double targetBalance) {
37 |     if (rootNode != null && dataSize > 0 && targetBalance > 0.0d) {
38 |       return new AssignmentTrackerImpl(rootNode, dataSize, targetBalance);
39 |     }
40 |     return new NoOpAssignmentTracker(); // do not track
41 |   }
42 | }
43 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/BaseRackIsolationPlacementRules.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.HashSet;
 20 | import java.util.List;
 21 | import java.util.Set;
 22 | 
 23 | import com.google.common.base.Predicate;
 24 | import com.google.common.base.Predicates;
 25 | 
 26 | /**
 27 |  * Based on a topology based on racks, prescribes rack isolation placement rules. Specific
 28 |  * implementations should mix in their specific types based on the {@link RackBasedTypes} and define
 29 |  * the end type.
 30 |  */
 31 | public abstract class BaseRackIsolationPlacementRules implements PlacementRules, RackBasedTypes {
 32 |   /**
 33 |    * In case we get less than full return values from the placement algorithm, we retry by changing
 34 |    * the input to the placement algorithm. This should converge pretty rapidly under normal
 35 |    * circumstances. However, if it fails to converge after a certain number of tries, we throw a
 36 |    * MappingException to indicate the failure.
 37 |    */
 38 |   private static final int CONVERGENCE_LIMIT = 20;
 39 | 
 40 |   private final MultiInputHash hashFunction = new JenkinsHash();
 41 | 
 42 |   /**
 43 |    * Enforce rack isolation. The caller will either get the expected number of selected nodes as a
 44 |    * result, or an exception will be thrown.
 45 |    *
 46 |    * @return the number of selected nodes with the rack isolation placement rules enforced. The size
 47 |    * will match the input count
 48 |    * @throws MappingException if it is unable to find the mapping that satisfies all constraints
 49 |    */
 50 |   public List<Node> select(Node topNode, long data, int n, PlacementAlgorithm pa)
 51 |       throws MappingException {
 52 |     List<Node> nodes = new ArrayList<Node>(n);
 53 |     Set<Node> selectedRacks = new HashSet<Node>();
 54 |     long input = data;
 55 |     int count = n;
 56 |     int tries = 0;
 57 |     while (nodes.size() < n) {
 58 |       doSelect(topNode, input, count, pa, nodes, selectedRacks);
 59 |       count = n - nodes.size();
 60 |       if (count > 0) { // still not all picked
 61 |         input = hash(input); // hash the input to create a different data value
 62 |         tries++;
 63 |         if (tries >= CONVERGENCE_LIMIT) {
 64 |           throw new MappingException(String.format("could not fulfill all selection after %d tries",
 65 |               tries));
 66 |         }
 67 |       }
 68 |     }
 69 |     return nodes;
 70 |   }
 71 | 
 72 |   private void doSelect(Node topNode, long input, int count, PlacementAlgorithm pa,
 73 |       List<Node> selectedNodes, Set<Node> selectedRacks) {
 74 |     // pick (count) racks avoiding the racks already picked
 75 |     List<Node> racks = pa.select(topNode, input, count, RACK, getRackPredicate(selectedRacks));
 76 |     // add the racks to the selected racks
 77 |     selectedRacks.addAll(racks);
 78 |     // pick one end node
 79 |     for (Node rack: racks) {
 80 |       List<Node> endNode = pa.select(rack, input, 1, getEndNodeType());
 81 |       selectedNodes.addAll(endNode);
 82 |     }
 83 |   }
 84 | 
 85 |   /**
 86 |    * Use the predicate to reject already selected racks.
 87 |    */
 88 |   private Predicate<Node> getRackPredicate(Set<Node> selectedRacks) {
 89 |     return Predicates.not(Predicates.in(selectedRacks));
 90 |   }
 91 | 
 92 |   /**
 93 |    * Do a simple hashing of the original data.
 94 |    */
 95 |   private long hash(long data) {
 96 |     return hashFunction.hash(data);
 97 |   }
 98 | 
 99 |   /**
100 |    * Rejects the replica if they share the rack.
101 |    */
102 |   public boolean acceptReplica(Node primary, Node replica) {
103 |     Node primaryRack = primary.findParent(RACK);
104 |     Node replicaRack = replica.findParent(RACK);
105 |     return primaryRack.getId() != replicaRack.getId();
106 |   }
107 | }
108 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/CRUSHPlacementAlgorithm.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.HashMap;
 20 | import java.util.HashSet;
 21 | import java.util.List;
 22 | import java.util.Map;
 23 | import java.util.Set;
 24 | 
 25 | import org.slf4j.Logger;
 26 | import org.slf4j.LoggerFactory;
 27 | 
 28 | import com.google.common.base.Predicate;
 29 | import com.google.common.base.Predicates;
 30 | 
 31 | /**
 32 |  * The transcription of the CRUSH placement algorithm from the Weil paper. This is a fairly simple
 33 |  * adaptation, but a couple of important changes have been made to work with the crunch mapping.
 34 |  */
 35 | public class CRUSHPlacementAlgorithm implements PlacementAlgorithm {
 36 |   /**
 37 |    * In case the select() method fails to select after looping back to the origin of selection after
 38 |    * so many tries, we stop the search. This constant denotes the maximum number of retries after
 39 |    * looping back to the origin. It is expected that in most cases the selection will either succeed
 40 |    * with a small number of tries, or it will never succeed. So a reasonably large number to
 41 |    * distinguish these two cases should be sufficient.
 42 |    */
 43 |   private static final int MAX_LOOPBACK_COUNT = 50;
 44 |   private static final Logger logger = LoggerFactory.getLogger(CRUSHPlacementAlgorithm.class);
 45 | 
 46 |   private final boolean keepOffset;
 47 |   private final Map<Long,Integer> roundOffset;
 48 |   private final AssignmentTracker assignmentTracker;
 49 | 
 50 |   /**
 51 |    * Creates the crush placement object.
 52 |    */
 53 |   public CRUSHPlacementAlgorithm() {
 54 |     this(false);
 55 |   }
 56 | 
 57 |   /**
 58 |    * Creates the crush placement algorithm with the indication whether the round offset should be
 59 |    * kept for the duration of this object for successive selection of the same input.
 60 |    */
 61 |   public CRUSHPlacementAlgorithm(boolean keepOffset) {
 62 |     this(keepOffset, null);
 63 |   }
 64 | 
 65 |   /**
 66 |    * Creates the crush placement algorithm object with the assignment tracking.
 67 |    */
 68 |   public CRUSHPlacementAlgorithm(AssignmentTracker assignmentTracker) {
 69 |     this(false, assignmentTracker);
 70 |   }
 71 | 
 72 |   // TODO consider better constructors for these options
 73 |   public CRUSHPlacementAlgorithm(boolean keepOffset, AssignmentTracker assignmentTracker) {
 74 |     this.keepOffset = keepOffset;
 75 |     roundOffset = keepOffset ? new HashMap<Long,Integer>() : null;
 76 |     this.assignmentTracker = assignmentTracker;
 77 |   }
 78 | 
 79 |   /**
 80 |    * Returns a list of (count) nodes of the desired type. If the count is more than the number of
 81 |    * available nodes, an exception is thrown. Note that it is possible for this method to return a
 82 |    * list whose size is smaller than the requested size (count) if it is unable to select all the
 83 |    * nodes for any reason. Callers should check the size of the returned list and take action if
 84 |    * needed.
 85 |    *
 86 |    */
 87 |   public List<Node> select(Node parent, long input, int count, int type) {
 88 |     return select(parent, input, count, type, Predicates.<Node>alwaysTrue());
 89 |   }
 90 | 
 91 |   public List<Node> select(Node parent, long input, int count, int type,
 92 |       Predicate<Node> nodePredicate) {
 93 |     int childCount = parent.getChildrenCount(type);
 94 |     if (childCount < count) {
 95 |       throw new IllegalArgumentException(count + " nodes of type " + type +
 96 |           " were requested but the tree has only " + childCount + " nodes!");
 97 |     }
 98 | 
 99 |     List<Node> selected = new ArrayList<Node>(count);
100 |     // use the index stored in the map
101 |     Integer offset;
102 |     if (keepOffset) {
103 |       offset = roundOffset.get(input);
104 |       if (offset == null) {
105 |         offset = 0;
106 |         roundOffset.put(input, offset);
107 |       }
108 |     } else {
109 |       offset = 0;
110 |     }
111 | 
112 |     int rPrime = 0;
113 |     for (int r = 1; r <= count; r++) {
114 |       int failure = 0;
115 |       // number of times we had to loop back to the origin
116 |       int loopbackCount = 0;
117 |       boolean escape = false;
118 |       boolean retryOrigin;
119 |       Node out = null;
120 |       do {
121 |         retryOrigin = false; // initialize at the outset
122 |         Node in = parent;
123 |         Set<Node> rejected = new HashSet<Node>();
124 |         boolean retryNode;
125 |         do {
126 |           retryNode = false; // initialize at the outset
127 |           rPrime = r + offset + failure;
128 |           logger.trace("{}.select({}, {})", new Object[] {in, input, rPrime});
129 |           out = in.select(input, rPrime);
130 |           if (out.getType() != type) {
131 |             logger.trace("selected output {} for data {} didn't match the type {}: walking down " +
132 |                 "the hierarchy...", new Object[] {out, input, type});
133 |             in = out; // walk down the hierarchy
134 |             retryNode = true; // stay within the node and walk down the tree
135 |           } else { // type matches
136 |             boolean predicateRejected = !nodePredicate.apply(out);
137 |             if (selected.contains(out) || predicateRejected) {
138 |               if (predicateRejected) {
139 |                 logger.trace("{} was rejected by the node predicate for data {}: rejecting and " +
140 |                     "increasing rPrime", out, input);
141 |                 rejected.add(out);
142 |               } else { // already selected
143 |                 logger.trace("{} was already selected for data {}: rejecting and increasing rPrime",
144 |                     out, input);
145 |               }
146 | 
147 |               // we need to see if we have selected all possible nodes from this parent, in which
148 |               // case we should loop back to the origin and start over
149 |               if (allChildNodesEliminated(in, selected, rejected)) {
150 |                 logger.trace("all child nodes of {} have been eliminated", in);
151 |                 if (loopbackCount == MAX_LOOPBACK_COUNT) {
152 |                   // we looped back the maximum times we specified; we give up search, and exit
153 |                   escape = true;
154 |                   break;
155 |                 }
156 |                 loopbackCount++;
157 |                 logger.trace("looping back to the original parent node ({})", parent);
158 |                 retryOrigin = true;
159 |               } else {
160 |                 retryNode = true; // go back and reselect on the same parent
161 |               }
162 |               failure++;
163 |             } else if (nodeIsOut(out)) {
164 |               logger.trace("{} is marked as out (failed or over the maximum assignment) for data " +
165 |                   "{}! looping back to the original parent node", out, input);
166 |               failure++;
167 |               if (loopbackCount == MAX_LOOPBACK_COUNT) {
168 |                 // we looped back the maximum times we specified; we give up search, and exit
169 |                 escape = true;
170 |                 break;
171 |               }
172 |               loopbackCount++;
173 |               // re-selection on the same parent is detrimental in case of node failure: loop back
174 |               // to the origin
175 |               retryOrigin = true;
176 |             } else {
177 |               // we got a successful selection
178 |               break;
179 |             }
180 |           }
181 |         } while (retryNode);
182 |       } while (retryOrigin);
183 | 
184 |       if (escape) {
185 |         // cannot find a node under this parent; return a smaller set than was intended
186 |         logger.debug("we could not select a node for data {} under parent {}; a smaller data set " +
187 |             "than is requested will be returned", input, parent);
188 |         continue;
189 |       }
190 | 
191 |       logger.trace("{} was selected for data {}", out, input);
192 |       selected.add(out);
193 |       // track the assignment
194 |       if (assignmentTracker != null) {
195 |         assignmentTracker.trackAssignment(out);
196 |       }
197 |     }
198 |     if (keepOffset) {
199 |       roundOffset.put(input, rPrime);
200 |     }
201 |     return selected;
202 |   }
203 | 
204 | 
205 |   private boolean nodeIsOut(Node node) {
206 |     if (node.isLeaf() && node.isFailed()) {
207 |       return true;
208 |     }
209 |     if (assignmentTracker != null) {
210 |       return assignmentTracker.rejectAssignment(node);
211 |     }
212 |     return false;
213 |   }
214 | 
215 |   /**
216 |    * Examines the immediate child nodes of the given parent node, and sees if all of the children
217 |    * that can be selected (i.e. not failed) are already selected. This is used to determine whether
218 |    * this parent node should no longer be used in the selection.
219 |    */
220 |   private boolean allChildNodesEliminated(Node parent, List<Node> selected, Set<Node> rejected) {
221 |     List<Node> children = parent.getChildren();
222 |     if (children != null) {
223 |       for (Node child: children) {
224 |         if (!nodeIsOut(child) && !selected.contains(child) && !rejected.contains(child)) {
225 |           return false;
226 |         }
227 |       }
228 |     }
229 |     return true;
230 |   }
231 | }
232 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/ConsistentHashingSelector.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.nio.ByteBuffer;
 19 | import java.security.MessageDigest;
 20 | import java.security.NoSuchAlgorithmException;
 21 | import java.util.ArrayList;
 22 | import java.util.Collections;
 23 | import java.util.HashMap;
 24 | import java.util.List;
 25 | import java.util.Map;
 26 | 
 27 | /**
 28 |  * Simple implementation of selection based on consistent hashing.
 29 |  */
 30 | class ConsistentHashingSelector implements Selector {
 31 |   public static final long DEFAULT_MAX_TOKENS_PER_NODE = 500;
 32 | 
 33 |   private final MessageDigest md;
 34 |   private final List<Long> tokenList;
 35 |   private final Map<Long,Node> tokenMap;
 36 | 
 37 |   public ConsistentHashingSelector(Node node) {
 38 |     if (node.isLeaf()) {
 39 |       throw new IllegalArgumentException();
 40 |     }
 41 |     try {
 42 |       md = MessageDigest.getInstance("SHA-1");
 43 |     } catch (NoSuchAlgorithmException ignore) {
 44 |       throw new IllegalArgumentException(ignore);
 45 |     }
 46 | 
 47 |     final long maxTokensPerNode = DEFAULT_MAX_TOKENS_PER_NODE;
 48 | //    long totalWeight = 0;
 49 |     long maxWeight = 0;
 50 |     List<Node> nodes = node.getChildren();
 51 |     for (Node n: nodes) {
 52 | //      totalWeight += n.getWeight();
 53 |       maxWeight = Math.max(n.getWeight(), maxWeight);
 54 |     }
 55 | 
 56 |     MessageDigest ringMd;
 57 |     try {
 58 |       ringMd = MessageDigest.getInstance("SHA-1");
 59 |     } catch (NoSuchAlgorithmException ignore) {
 60 |       throw new IllegalArgumentException(ignore);
 61 |     }
 62 |     tokenMap = new HashMap<Long,Node>();
 63 |     for (Node n: nodes) {
 64 |       long tokenCount = maxTokensPerNode*n.getWeight()/maxWeight;
 65 |       byte[] h = null;
 66 |       for (int i = 0; i < tokenCount; i++) {
 67 |         byte[] input = (h == null) ? n.getName().getBytes() : h;
 68 |         h = ringMd.digest(input);
 69 |         long token = Utils.bstrTo32bit(h);
 70 |         if (!tokenMap.containsKey(token)) {
 71 |           tokenMap.put(token, n);
 72 |         }
 73 |       }
 74 |     }
 75 | 
 76 |     tokenList = new ArrayList<Long>(tokenMap.keySet());
 77 |     Collections.sort(tokenList);
 78 |   }
 79 | 
 80 |   public Node select(long input, long round) {
 81 |     byte[] b = longToBytes(input, round);
 82 |     byte[] h = md.digest(b);
 83 |     long token = Utils.bstrTo32bit(h);
 84 |     return tokenMap.get(findSuccessorToken(token));
 85 |   }
 86 | 
 87 |   private byte[] longToBytes(long a, long b) {
 88 |     ByteBuffer buf = ByteBuffer.allocate(8*2).putLong(a).putLong(b);
 89 |     return buf.array();
 90 |   }
 91 | 
 92 |   private long findSuccessorToken(long token) {
 93 |     int i = Collections.binarySearch(tokenList, token);
 94 |     if (i < 0) {
 95 |       i = -1 - i;
 96 |     }
 97 |     // [sjlee] why?
 98 |     if (i == tokenList.size()) {
 99 |       i = 0;
100 |     }
101 |     return tokenList.get(i);
102 |   }
103 | }
104 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/Crunch.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.security.MessageDigest;
 19 | import java.security.NoSuchAlgorithmException;
 20 | import java.util.ArrayList;
 21 | import java.util.List;
 22 | 
 23 | 
 24 | public class Crunch {
 25 |   private final MessageDigest md;
 26 | 
 27 |   public Crunch() {
 28 |     try {
 29 |       md = MessageDigest.getInstance("SHA-1");
 30 |     } catch (NoSuchAlgorithmException ignore) {
 31 |       throw new IllegalArgumentException(ignore);
 32 |     }
 33 |   }
 34 | 
 35 |   /**
 36 |    * Creates a "crunched" tree from the topological tree input. It is assumed that the topological
 37 |    * tree begins with a root node with the right root type.
 38 |    * <br/>
 39 |    * As a result of this operation, a copy with the following properties is created:
 40 |    * <ul>
 41 |    *   <li>name, type, and selection properties are copied from the topological nodes</li>
 42 |    *   <li>id's are assigned as a SHA-1 hash of the node name</li>
 43 |    *   <li>both children and parent properties are set</li>
 44 |    *   <li>weights are assigned as sums of child weights</li>
 45 |    *   <li>the selector objects are created</li>
 46 |    * </ul>
 47 |    * No modifications are done on the original topological nodes.
 48 |    */
 49 |   public Node makeCrunch(Node topology) {
 50 |     if (topology.getType() != Types.ROOT) {
 51 |       throw new IllegalArgumentException("the root node is not the ROOT type!");
 52 |     }
 53 | 
 54 |     return makeCrunchNode(topology);
 55 |   }
 56 | 
 57 |   private Node makeCrunchNode(Node topologicalNode) {
 58 |     // copy the intrinsic properties: id, weights, relationship, and selectors will be set
 59 |     Node node = new Node(topologicalNode);
 60 |     // assign the id from the name hash
 61 |     node.setId(computeId(node));
 62 |     if (!topologicalNode.isLeaf()) {
 63 |       List<Node> newChildren = new ArrayList<Node>();
 64 |       List<Node> children = topologicalNode.getChildren();
 65 |       for (Node child: children) {
 66 |         // depth-first traversal
 67 |         Node newChild = makeCrunchNode(child);
 68 |         // set the child-parent relationship
 69 |         newChildren.add(newChild);
 70 |         newChild.setParent(node);
 71 |       }
 72 |       node.setChildren(newChildren);
 73 | 
 74 |       // weights and selector should be set after all lower nodes are crunched
 75 |       computeWeightAndSelector(node);
 76 |     }
 77 |     return node;
 78 |   }
 79 | 
 80 |   private long computeId(Node node) {
 81 |     byte[] h = md.digest(node.getName().getBytes());
 82 |     // TODO see if this is adequate as a unique id: I suspect it is...
 83 |     return Utils.bstrTo32bit(h);
 84 |   }
 85 | 
 86 |   /**
 87 |    * Performs modifications, and reassigns certain properties on the tree in place. The input is
 88 |    * assumed to be a properly "crunched" tree. This is mainly to aid creating the "mini-tree" for
 89 |    * the data selection in the RDF mapping.
 90 |    * <br/>
 91 |    * The only properties that are recalculated are the weights and the selectors.
 92 |    */
 93 |   public void recrunch(Node node) {
 94 |     if (!node.isLeaf()) {
 95 |       for (Node child: node.getChildren()) {
 96 |         recrunch(child);
 97 |       }
 98 | 
 99 |       computeWeightAndSelector(node);
100 |     }
101 |   }
102 | 
103 |   private void computeWeightAndSelector(Node node) {
104 |     // set the weight after all its children are already "crunched"
105 |     int weight = 0;
106 |     for (Node child: node.getChildren()) {
107 |       weight += child.getWeight();
108 |     }
109 |     node.setWeight(weight);
110 |     // set the selector
111 |     node.setSelector(pickSelector(node));
112 |   }
113 | 
114 |   private Selector pickSelector(Node node) {
115 |     switch (node.getSelection()) {
116 |     case CONSISTENT_HASHING:
117 |       return new ConsistentHashingSelector(node);
118 |     case STRAW:
119 |       return new StrawSelector(node);
120 |     default:
121 |       throw new IllegalArgumentException("unrecognized type!");
122 |     }
123 |   }
124 | }
125 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/JenkinsHash.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | public class JenkinsHash implements MultiInputHash {
 19 |   // max value to limit it to 4 bytes
 20 |   private static final long MAX_VALUE = 0xFFFFFFFFL;
 21 |   private static final long CRUSH_HASH_SEED = 1315423911L;
 22 | 
 23 |   /**
 24 |    * Convert a byte into a long value without making it negative.
 25 |    */
 26 |   private static long byteToLong(byte b) {
 27 |     long val = b & 0x7F;
 28 |     if ((b & 0x80) != 0) {
 29 |       val += 128;
 30 |     }
 31 |     return val;
 32 |   }
 33 | 
 34 |   /**
 35 |    * Do addition and turn into 4 bytes.
 36 |    */
 37 |   private static long add(long val, long add) {
 38 |     return (val + add) & MAX_VALUE;
 39 |   }
 40 | 
 41 |   /**
 42 |    * Do subtraction and turn into 4 bytes.
 43 |    */
 44 |   private static long subtract(long val, long subtract) {
 45 |     return (val - subtract) & MAX_VALUE;
 46 |   }
 47 | 
 48 |   /**
 49 |    * Left shift val by shift bits and turn in 4 bytes.
 50 |    */
 51 |   private static long xor(long val, long xor) {
 52 |     return (val ^ xor) & MAX_VALUE;
 53 |   }
 54 | 
 55 |   /**
 56 |    * Left shift val by shift bits.  Cut down to 4 bytes.
 57 |    */
 58 |   private static long leftShift(long val, int shift) {
 59 |     return (val << shift) & MAX_VALUE;
 60 |   }
 61 | 
 62 |   /**
 63 |    * Convert 4 bytes from the buffer at offset into a long value.
 64 |    */
 65 |   private static long fourByteToLong(byte[] bytes, int offset) {
 66 |     return (byteToLong(bytes[offset + 0])
 67 |         + (byteToLong(bytes[offset + 1]) << 8)
 68 |         + (byteToLong(bytes[offset + 2]) << 16)
 69 |         + (byteToLong(bytes[offset + 3]) << 24));
 70 |   }
 71 | 
 72 |   /**
 73 |    * Mix up the values in the hash function.
 74 |    */
 75 |   private static Triple hashMix(Triple t) {
 76 |     long a = t.a; long b = t.b; long c = t.c;
 77 |     a = subtract(a, b); a = subtract(a, c); a = xor(a, c >> 13);
 78 |     b = subtract(b, c); b = subtract(b, a); b = xor(b, leftShift(a, 8));
 79 |     c = subtract(c, a); c = subtract(c, b); c = xor(c, (b >> 13));
 80 |     a = subtract(a, b); a = subtract(a, c); a = xor(a, (c >> 12));
 81 |     b = subtract(b, c); b = subtract(b, a); b = xor(b, leftShift(a, 16));
 82 |     c = subtract(c, a); c = subtract(c, b); c = xor(c, (b >> 5));
 83 |     a = subtract(a, b); a = subtract(a, c); a = xor(a, (c >> 3));
 84 |     b = subtract(b, c); b = subtract(b, a); b = xor(b, leftShift(a, 10));
 85 |     c = subtract(c, a); c = subtract(c, b); c = xor(c, (b >> 15));
 86 |     return new Triple(a, b, c);
 87 |   }
 88 | 
 89 |   private static class Triple {
 90 |     long a;
 91 |     long b;
 92 |     long c;
 93 | 
 94 |     public Triple(long a, long b, long c) {
 95 |       this.a = a; this.b = b; this.c = c;
 96 |     }
 97 |   }
 98 | 
 99 |   public long hash(long a) {
100 |     long hash = xor(CRUSH_HASH_SEED, a);
101 |     long b = a;
102 |     long x = 231232L;
103 |     long y = 1232L;
104 |     Triple val = hashMix(new Triple(b, x, hash));
105 |     b = val.a; x = val.b; hash = val.c;
106 |     val = hashMix(new Triple(y, a, hash));
107 |     hash = val.c;
108 |     return hash;
109 |   }
110 | 
111 |   public long hash(long a, long b) {
112 |     long hash = xor(xor(CRUSH_HASH_SEED, a), b);
113 |     long x = 231232L;
114 |     long y = 1232L;
115 |     Triple val = hashMix(new Triple(a, b, hash));
116 |     a = val.a; b = val.b; hash = val.c;
117 |     val = hashMix(new Triple(x, a, hash));
118 |     x = val.a; a = val.b; hash = val.c;
119 |     val = hashMix(new Triple(b, y, hash));
120 |     hash = val.c;
121 |     return hash;
122 |   }
123 | 
124 |   public long hash(long a, long b, long c) {
125 |     long hash = xor(xor(xor(CRUSH_HASH_SEED, a), b), c);
126 |     long x = 231232L;
127 |     long y = 1232L;
128 |     Triple val = hashMix(new Triple(a, b, hash));
129 |     a = val.a; b = val.b; hash = val.c;
130 |     val = hashMix(new Triple(c, x, hash));
131 |     c = val.a; x = val.b; hash = val.c;
132 |     val = hashMix(new Triple(y, a, hash));
133 |     y = val.a; a = val.b; hash = val.c;
134 |     val = hashMix(new Triple(b, x, hash));
135 |     b = val.a; x = val.b; hash = val.c;
136 |     val = hashMix(new Triple(y, c, hash));
137 |     hash = val.c;
138 |     return hash;
139 |   }
140 | }
141 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/MappingDiff.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.Collections;
 20 | import java.util.HashMap;
 21 | import java.util.HashSet;
 22 | import java.util.List;
 23 | import java.util.Map;
 24 | import java.util.Set;
 25 | 
 26 | /**
 27 |  * Utility class that computes the diffs between two mappings. The diff is based on the
 28 |  * <code>equals</code> contract. It also indicates whether the particular change is addition or
 29 |  * removal.
 30 |  */
 31 | public class MappingDiff {
 32 |   /**
 33 |    * Returns the difference between the two mappings. It only contains the keys with which there are
 34 |    * any differences. The value lists are neither null nor empty. There is no particular ordering in
 35 |    * the value returned, so one should not rely on the ordering of values.
 36 |    */
 37 |   public static <K,V> Map<K,List<Value<V>>> calculateDiff(Map<K,List<V>> before,
 38 |       Map<K,List<V>> after) {
 39 |     Map<K,List<Value<V>>> result = new HashMap<K,List<Value<V>>>();
 40 |     // iterate over m1 and compute the diff first
 41 |     for (K key: before.keySet()) {
 42 |       List<V> l1 = before.get(key);
 43 |       List<V> l2 = after.get(key);
 44 |       List<Value<V>> diff = calculateDiff(l1, l2);
 45 |       if (!diff.isEmpty()) {
 46 |         result.put(key, diff);
 47 |       }
 48 |     }
 49 |     // see if there is any key that is mapped in m2 but not in m1
 50 |     Set<K> m2Keys = new HashSet<K>(after.keySet());
 51 |     m2Keys.removeAll(before.keySet());
 52 |     for (K key: m2Keys) {
 53 |       // this is purely difference
 54 |       List<V> list = after.get(key);
 55 |       if (!list.isEmpty()) {
 56 |         result.put(key, wrapList(list, Difference.ADDED));
 57 |       }
 58 |     }
 59 |     return result;
 60 |   }
 61 | 
 62 |   /**
 63 |    * Returns the list that contains that have changed between before and after. If either is null,
 64 |    * the other list is returned. If both are null, an empty list is returned.
 65 |    */
 66 |   private static <V> List<Value<V>> calculateDiff(List<V> before, List<V> after) {
 67 |     if (before == null && after == null) {
 68 |       return Collections.emptyList();
 69 |     }
 70 |     if (before == null) {
 71 |       return wrapList(after, Difference.ADDED);
 72 |     }
 73 |     if (after == null) {
 74 |       return wrapList(before, Difference.REMOVED);
 75 |     }
 76 |     // neither list is null
 77 |     List<Value<V>> result = new ArrayList<Value<V>>();
 78 |     for (V v: before) {
 79 |       if (!after.contains(v)) {
 80 |         result.add(new Value<V>(v, Difference.REMOVED));
 81 |       }
 82 |     }
 83 |     for (V v: after) {
 84 |       if (!before.contains(v)) {
 85 |         result.add(new Value<V>(v, Difference.ADDED));
 86 |       }
 87 |     }
 88 |     return result;
 89 |   }
 90 | 
 91 |   private static <V> List<Value<V>> wrapList(List<V> list, Difference diff) {
 92 |     List<Value<V>> result = new ArrayList<Value<V>>();
 93 |     for (V v: list) {
 94 |       result.add(new Value<V>(v, diff));
 95 |     }
 96 |     return result;
 97 |   }
 98 | 
 99 |   public static class Value<V> {
100 |     private final V value;
101 |     private final Difference diff;
102 | 
103 |     public Value(V value, Difference diff) {
104 |       this.value = value;
105 |       this.diff = diff;
106 |     }
107 | 
108 |     public V get() {
109 |       return value;
110 |     }
111 | 
112 |     public Difference getDifferenceType() {
113 |       return diff;
114 |     }
115 |   }
116 | 
117 |   public enum Difference { ADDED, REMOVED }
118 | }
119 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/MappingEvaluator.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.util.List;
19 | import java.util.Map;
20 | 
21 | public class MappingEvaluator {
22 | 
23 |     public static double getMean(List<Double> distribution) {
24 |         double sum = 0;
25 |         for(double a: distribution)
26 |             sum += a;
27 | 
28 |         return sum/distribution.size();
29 |     }
30 | 
31 |     public static double getStandardDeviation(List<Double> distribution)
32 |     {
33 |         double mean = getMean(distribution);
34 | 
35 |         double temp = 0;
36 |         for(double a: distribution)
37 |             temp += (mean-a)*(mean-a);
38 | 
39 |         return Math.sqrt(temp/distribution.size());
40 |     }
41 | 
42 |     public static double getWeightedMean(Map<String, Long> distribution, Map<String, Long> weight) {
43 |         assert(distribution.size() == weight.size());
44 |         long sum1 = 0;
45 |         long sum2 = 0;
46 | 
47 |         for (String node : distribution.keySet()) {
48 |             sum1 += distribution.get(node) * weight.get(node);
49 |             sum2 += weight.get(node);
50 |         }
51 | 
52 |         return sum1/sum2;
53 |     }
54 | 
55 |     public static double getWeightedStandardDeviation(Map<String, Long> distribution, Map<String, Long> weight){
56 |         assert(distribution.size() == weight.size());
57 |         double mean = getWeightedMean(distribution, weight);
58 |         double sum1 = 0;
59 |         double sum2 = 0;
60 |         int m = 0;
61 | 
62 |         for (String node : distribution.keySet()) {
63 |             sum1 += weight.get(node) * Math.pow(distribution.get(node) - mean, 2);
64 |         }
65 | 
66 |         for (String node : weight.keySet()) {
67 |             sum2 += weight.get(node);
68 |             if (weight.get(node) != 0) m++;
69 |         }
70 | 
71 |         return Math.sqrt(sum1/((m - 1)*sum2/m));
72 |     }
73 | }
74 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/MappingException.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Exception that signifies failure to generate a mapping, given the provided input. This is a
20 |  * pretty severe exception.
21 |  */
22 | public class MappingException extends RuntimeException {
23 |   private static final long serialVersionUID = 2099244230202336093L;
24 | 
25 |   public MappingException(String msg) {
26 |     super(msg);
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/MappingFunction.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.util.List;
19 | import java.util.Map;
20 | 
21 | /**
22 |  * Interface that defines a mapping function.
23 |  */
24 | public interface MappingFunction {
25 |   Map<Long,List<Node>> computeMapping(List<Long> data, Node topology);
26 | }
27 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/MessageDigestHash.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.nio.ByteBuffer;
19 | import java.security.MessageDigest;
20 | import java.security.NoSuchAlgorithmException;
21 | 
22 | public class MessageDigestHash implements MultiInputHash {
23 |   private final String algorithm;
24 |   private final MessageDigest md;
25 | 
26 |   public MessageDigestHash(String algorithm) {
27 |     this.algorithm = algorithm;
28 |     try {
29 |       md = MessageDigest.getInstance(algorithm);
30 |     } catch (NoSuchAlgorithmException e) {
31 |       throw new IllegalArgumentException("invalid algorithm passed in", e);
32 |     }
33 |   }
34 | 
35 |   public String getAlgorithm() {
36 |     return algorithm;
37 |   }
38 | 
39 |   public long hash(long a) {
40 |     ByteBuffer buf = ByteBuffer.allocate(8).putLong(a);
41 |     return hashFromBuffer(buf);
42 |   }
43 | 
44 |   public long hash(long a, long b) {
45 |     ByteBuffer buf = ByteBuffer.allocate(8*2).
46 |         putLong(a).putLong(b);
47 |     return hashFromBuffer(buf);
48 |   }
49 | 
50 |   public long hash(long a, long b, long c) {
51 |     ByteBuffer buf = ByteBuffer.allocate(8*3).
52 |         putLong(a).putLong(b).putLong(c);
53 |     return hashFromBuffer(buf);
54 |   }
55 | 
56 |   private long hashFromBuffer(ByteBuffer buf) {
57 |     byte[] result = md.digest(buf.array());
58 |     return Utils.bstrTo32bit(result);
59 |   }
60 | }
61 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/MultiInputHash.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | public interface MultiInputHash {
19 |   long hash(long a);
20 |   long hash(long a, long b);
21 |   long hash(long a, long b, long c);
22 | }
23 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/NoOpAssignmentTracker.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Implementation of the assignment tracker that simply does not track. It is used when assignment
20 |  * tracking is disabled (i.e. target balance is not used).
21 |  */
22 | class NoOpAssignmentTracker implements AssignmentTracker {
23 |   /**
24 |    * No tracking.
25 |    */
26 |   public boolean trackAssignment(Node node) {
27 |     return false;
28 |   }
29 | 
30 |   /**
31 |    * No rejection.
32 |    */
33 |   public boolean rejectAssignment(Node node) {
34 |     return false;
35 |   }
36 | }
37 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/Node.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.List;
 20 | 
 21 | public class Node implements Comparable<Node> {
 22 |   public enum Selection { STRAW, CONSISTENT_HASHING }
 23 | 
 24 |   private String name;
 25 |   private int type;
 26 |   private long id;
 27 |   private long weight;
 28 |   private Selection selection;
 29 | 
 30 |   private List<Node> children;
 31 |   private Node parent;
 32 | 
 33 |   private Selector selector;
 34 | 
 35 |   private boolean failed;
 36 | 
 37 |   public Node() {}
 38 | 
 39 |   public Node(Node node) {
 40 |     this.name = node.name;
 41 |     this.type = node.type;
 42 |     this.id = node.id;
 43 |     this.weight = node.weight;
 44 |     this.selection = node.selection;
 45 |     this.failed = node.failed;
 46 |   }
 47 | 
 48 |   public String getName() {
 49 |     return name;
 50 |   }
 51 | 
 52 |   public void setName(String name) {
 53 |     this.name = name;
 54 |   }
 55 | 
 56 |   public int getType() {
 57 |     return type;
 58 |   }
 59 | 
 60 |   public void setType(int type) {
 61 |     this.type = type;
 62 |   }
 63 | 
 64 |   public long getId() {
 65 |     return id;
 66 |   }
 67 | 
 68 |   public void setId(long id) {
 69 |     this.id = id;
 70 |   }
 71 | 
 72 |   public long getWeight() {
 73 |     return weight;
 74 |   }
 75 | 
 76 |   public void setWeight(long weight) {
 77 |     this.weight = weight;
 78 |   }
 79 | 
 80 |   public Selection getSelection() {
 81 |     return selection;
 82 |   }
 83 | 
 84 |   public void setSelection(Selection selection) {
 85 |     this.selection = selection;
 86 |   }
 87 | 
 88 | 
 89 |   public boolean isFailed() {
 90 |     return failed;
 91 |   }
 92 | 
 93 |   public void setFailed(boolean failed) {
 94 |     if (!isLeaf()) {
 95 |       throw new UnsupportedOperationException("you cannot set failed on a non-leaf!");
 96 |     }
 97 |     this.failed = failed;
 98 |   }
 99 | 
100 |   public List<Node> getChildren() {
101 |     return children;
102 |   }
103 | 
104 |   public void setChildren(List<Node> children) {
105 |     this.children = children;
106 |   }
107 | 
108 |   public boolean isLeaf() {
109 |     return children == null || children.isEmpty();
110 |   }
111 | 
112 |   public Node getParent() {
113 |     return parent;
114 |   }
115 | 
116 |   public void setParent(Node parent) {
117 |     this.parent = parent;
118 |   }
119 | 
120 |   public Selector getSelector() {
121 |     return selector;
122 |   }
123 | 
124 |   public void setSelector(Selector selector) {
125 |     this.selector = selector;
126 |   }
127 | 
128 |   /**
129 |    * Uses the selection algorithm that is assigned to the node and return the selected node.
130 |    */
131 |   public Node select(long input, long round) {
132 |     return selector.select(input, round);
133 |   }
134 | 
135 |   /**
136 |    * Returns all leaf nodes that belong in the tree. Returns itself if this node is a leaf. As with
137 |    * most other methods in this class, the nodes are added via depth-first traversal.
138 |    */
139 |   public List<Node> getAllLeafNodes() {
140 |     // TODO optimize for performance (cache)
141 |     List<Node> nodes = new ArrayList<Node>();
142 |     if (isLeaf()) {
143 |       nodes.add(this);
144 |     } else {
145 |       for (Node child: children) {
146 |         nodes.addAll(child.getAllLeafNodes());
147 |       }
148 |     }
149 |     return nodes;
150 |   }
151 | 
152 |   /**
153 |    * Returns all child nodes that match the type. Returns itself if this node matches it. If no
154 |    * child matches the type, an empty list is returned.
155 |    */
156 |   public List<Node> findChildren(int type) {
157 |     List<Node> nodes = new ArrayList<Node>();
158 |     if (this.type == type) {
159 |       nodes.add(this);
160 |     } else if (!isLeaf()) {
161 |       for (Node child: children) {
162 |         nodes.addAll(child.findChildren(type));
163 |       }
164 |     }
165 |     return nodes;
166 |   }
167 | 
168 |   /**
169 |    * Returns the number of all child nodes that match the type. Returns 1 if this node matches it.
170 |    * Returns 0 if no child matches the type.
171 |    */
172 |   public int getChildrenCount(int type) {
173 |     int count = 0;
174 |     if (this.type == type) {
175 |       count++;
176 |     } else if (!isLeaf()) {
177 |       for (Node child: children) {
178 |         count += child.getChildrenCount(type);
179 |       }
180 |     }
181 |     return count;
182 |   }
183 | 
184 |   /**
185 |    * Finds a parent that matches the given type. If the node itself matches it, it is returned. If
186 |    * there is no matching parent in the hierarchy, null is returned.
187 |    */
188 |   public Node findParent(int type) {
189 |     Node node = this;
190 |     while (node != null) {
191 |       if (node.type == type) {
192 |         return node;
193 |       }
194 |       node = node.parent; // keep walking up the tree
195 |     }
196 |     return null; // no match was found
197 |   }
198 | 
199 |   /**
200 |    * Returns the top-most ("root") node from this node. If this node itself does not have a parent,
201 |    * returns itself.
202 |    */
203 |   public Node getRoot() {
204 |     Node node = this;
205 |     while (node.parent != null) {
206 |       node = node.parent;
207 |     }
208 |     return node;
209 |   }
210 | 
211 |   @Override
212 |   public String toString() {
213 |     return name + ":" + id;
214 |   }
215 | 
216 |   @Override
217 |   public int hashCode() {
218 |     return name.hashCode();
219 |   }
220 | 
221 |   @Override
222 |   public boolean equals(Object obj) {
223 |     if (obj == this) {
224 |       return true;
225 |     }
226 |     if (!(obj instanceof Node)) {
227 |       return false;
228 |     }
229 |     Node that = (Node)obj;
230 |     return name.equals(that.name);
231 |   }
232 | 
233 |   public int compareTo(Node o) {
234 |     return name.compareTo(o.name);
235 |   }
236 | }
237 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/NodeFilter.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | public interface NodeFilter {
19 |   boolean accept(Node node);
20 | }
21 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/PlacementAlgorithm.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.util.List;
19 | 
20 | import com.google.common.base.Predicate;
21 | 
22 | /**
23 |  * Encapsulation of the algorithm that selects a number of child nodes in the topology to place the
24 |  * data based on the data input as well as the node properties such as the selection algorithm,
25 |  * the weight, and the type. It is orthogonal to the placement rules, and is used as a building
26 |  * block operations to create placement rules.
27 |  */
28 | public interface PlacementAlgorithm {
29 |   /**
30 |    * Returns a list of nodes of the desired type. If the count is more than the number of available
31 |    * nodes, an exception is thrown.
32 |    *
33 |    * @return a list of nodes
34 |    */
35 |   List<Node> select(Node parent, long input, int count, int type);
36 | 
37 |   /**
38 |    * Returns a list of nodes that have the matching type and pass the predicate. If the count is
39 |    * more than the number of available nodes, an exception is thrown.
40 |    *
41 |    * @return a list of nodes
42 |    */
43 |   List<Node> select(Node parent, long input, int count, int type, Predicate<Node> pred);
44 | }
45 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/PlacementRules.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.util.List;
19 | 
20 | /**
21 |  * A way to express the placement rules for the crunch/CRUSH mapping. Placement rules are often
22 |  * combined with a more specific topology (i.e. type definitions). In general, it should only
23 |  * express the prescription of how a number of end nodes should be selected, and should not rely on
24 |  * specific data, the top node from which the selection begins, or the placement algorithm.
25 |  */
26 | public interface PlacementRules {
27 |   /**
28 |    * Describes how a number of end nodes should be selected from the top node.
29 |    */
30 |   List<Node> select(Node topNode, long data, int n, PlacementAlgorithm pa);
31 |   /**
32 |    * Returns the types values that the placement rules use.
33 |    */
34 |   int getEndNodeType();
35 |   /**
36 |    * Given a node, returns whether the replica end node is acceptable. It must be consistent with
37 |    * the selection prescribed in the select methods.
38 |    */
39 |   boolean acceptReplica(Node primary, Node replica);
40 | }
41 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/RDFCRUSHMapping.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.*;
 19 | 
 20 | public class RDFCRUSHMapping {
 21 |     private final int rf;
 22 |     private final PlacementRules rules;
 23 |     private final double targetBalance;
 24 | 
 25 |     private final Crunch cruncher = new Crunch();
 26 | 
 27 |     public RDFCRUSHMapping(int rf, PlacementRules rules, double targetBalance) {
 28 |         if (rf < 1) {
 29 |             throw new IllegalArgumentException("RF must be positive");
 30 |         }
 31 |         this.rf = rf;
 32 |         this.rules = rules;
 33 |         this.targetBalance = targetBalance;
 34 |     }
 35 | 
 36 |     /**
 37 |      * Given the list of data objects (as expressed as long values) and the processed topology,
 38 |      * returns the mapping from data objects to lists of end nodes onto which the data may be stored.
 39 |      */
 40 |     public Map<Long,List<Node>> createMapping(List<Long> data, Node crunchedRoot, Map<Node,List<Node>> rdfMap) {
 41 |         // sort the data to ensure data is used in the same order
 42 |         List<Long> sorted = new ArrayList<Long>(data);
 43 |         Collections.sort(sorted);
 44 | 
 45 |         Map<Long,List<Node>> map = new HashMap<Long,List<Node>>(sorted.size());
 46 |         // performance optimization
 47 |         // we create mini-trees to select the replicas; instead of creating the mini-trees every time,
 48 |         // we cache the result
 49 |         Map<Node,Node> miniTreeCache = new HashMap<Node,Node>();
 50 |         List<Node> datacenters = crunchedRoot.findChildren(Types.DATA_CENTER);
 51 |         // iterate on all datacenters
 52 |         for (Node datacenter: datacenters) {
 53 |             AssignmentTracker tracker = AssignmentTrackerFactory.create(datacenter, rf*sorted.size(), targetBalance);
 54 |             PlacementAlgorithm pa = new CRUSHPlacementAlgorithm(tracker);
 55 | 
 56 |             for (Long l: sorted) { // ~ N
 57 |                 List<Node> selected = pickNodes(l, datacenter, pa, rdfMap, miniTreeCache);
 58 |                 List<Node> nodes = map.get(l);
 59 |                 if (nodes == null) {
 60 |                     nodes = new ArrayList<Node>(rf*datacenters.size());
 61 |                     map.put(l, nodes);
 62 |                 }
 63 |                 nodes.addAll(selected);
 64 |             }
 65 |         }
 66 |         return map;
 67 |     }
 68 | 
 69 |     private List<Node> pickNodes(long data, Node datacenter, PlacementAlgorithm pa,
 70 |                                  Map<Node,List<Node>> rdfMap, Map<Node,Node> miniTreeCache) {
 71 |         List<Node> nodes = new ArrayList<Node>(rf);
 72 |         // get the primary node
 73 |         Node primary = pa.select(datacenter, data, 1, rules.getEndNodeType()).get(0);
 74 |         nodes.add(primary);
 75 | 
 76 |         // obtain the "mini-tree"
 77 |         Node miniTree = miniTreeCache.get(primary);
 78 |         if (miniTree == null) {
 79 |             // we haven't seen this primary yet
 80 |             // get the (RF-1) secondary nodes
 81 |             List<Node> secondaries = rdfMap.get(primary);
 82 |             // construct the "mini-tree"
 83 |             miniTree = makeMiniTree(secondaries);
 84 |             miniTreeCache.put(primary, miniTree);
 85 |         }
 86 |         // select (RF-1) nodes from the mini-tree using the placement rules
 87 |         List<Node> selected = rules.select(miniTree, data, rf-1, pa);
 88 |         nodes.addAll(selected);
 89 |         return nodes;
 90 |     }
 91 | 
 92 |     private Node makeMiniTree(List<Node> nodes) {
 93 |         // this is used to look up parents nodes that are already created
 94 |         Map<Long,Node> lookup = new HashMap<Long,Node>();
 95 |         Node root = null;
 96 |         for (Node node: nodes) { // ~ RDF
 97 |             // create a copy for this purpose
 98 |             Node newNode = new Node(node);
 99 |             root = handleParent(node, newNode, lookup);
100 |         }
101 | 
102 |         // crunch
103 |         crunchNode(root);
104 |         return root;
105 |     }
106 | 
107 |     /**
108 |      * Recursively handles all the parents. Returns the root node as a result.
109 |      */
110 |     private Node handleParent(Node node, Node newNode, Map<Long,Node> lookup) {
111 |         Node parent = node.getParent();
112 |         if (parent == null) {
113 |             // root node: return it
114 |             return newNode;
115 |         }
116 | 
117 |         // process the parent
118 |         Node newParent = lookup.get(parent.getId());
119 |         if (newParent != null) { // it is already mapped
120 |             // set the relationship
121 |             setRelationship(newNode, newParent);
122 |             // we do not need to walk further because it is already processed
123 |             // simply return the root
124 |             return newParent.getRoot();
125 |         } else {
126 |             // this is the first time we are seeing this node: we need to walk up the tree
127 |             // create a copy
128 |             newParent = new Node(parent);
129 |             // add it to the lookup map
130 |             lookup.put(newParent.getId(), newParent);
131 |             // set the relationship
132 |             setRelationship(newNode, newParent);
133 |             // recurse for its parent
134 |             return handleParent(parent, newParent, lookup);
135 |         }
136 |     }
137 | 
138 |     private void setRelationship(Node newNode, Node newParent) {
139 |         newNode.setParent(newParent);
140 |         List<Node> childList = newParent.getChildren();
141 |         if (childList == null) {
142 |             childList = new ArrayList<Node>();
143 |             newParent.setChildren(childList);
144 |         }
145 |         childList.add(newNode);
146 |     }
147 | 
148 |     private void crunchNode(Node root) {
149 |         cruncher.recrunch(root);
150 |     }
151 | }
152 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/RDFMapping.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.Collections;
 20 | import java.util.HashMap;
 21 | import java.util.List;
 22 | import java.util.Map;
 23 | 
 24 | import org.slf4j.Logger;
 25 | import org.slf4j.LoggerFactory;
 26 | 
 27 | /**
 28 |  * The mapping function based on implementing RDF (replica distribution factor), RF (replication
 29 |  * factor), and a multi-level topology and placement rules.
 30 |  * <br/>
 31 |  * Both parameters are interpreted as per-datacenter; i.e. if RF = 2, you will have two replicas per
 32 |  * datacenter.
 33 |  */
 34 | 
 35 | public class RDFMapping implements MappingFunction {
 36 |   private static final Logger logger = LoggerFactory.getLogger(RDFMapping.class);
 37 | 
 38 |   private final int rdf;
 39 |   private final int rf;
 40 |   private final PlacementRules rules;
 41 | 
 42 |   private final boolean bidirectional;
 43 |   private final boolean handleOverload;
 44 |   private final double targetBalance;
 45 | 
 46 |   private final Crunch cruncher = new Crunch();
 47 | 
 48 |   private Map<Node,List<Node>> rdfMap;
 49 | 
 50 |   public RDFMapping(int rdf, int rf, PlacementRules rules) {
 51 |     this(rdf, rf, rules, false, false, 0.0d); // bi-di and overload handling are off by default
 52 |   }
 53 | 
 54 |   public RDFMapping(int rdf, int rf, PlacementRules rules, double targetBalance) {
 55 |     this(rdf, rf, rules, false, false, targetBalance);
 56 |   }
 57 | 
 58 |   public RDFMapping(int rdf, int rf, PlacementRules rules, boolean bidirectional) {
 59 |     this(rdf, rf, rules, bidirectional, false, 0.0d); // overload handling is off by default
 60 |   }
 61 | 
 62 |   private RDFMapping(int rdf, int rf, PlacementRules rules, boolean bidirectional,
 63 |       boolean handleOverload, double targetBalance) {
 64 |     if (rf < 1) {
 65 |       throw new IllegalArgumentException("RF must be positive");
 66 |     }
 67 |     if (rdf < rf) {
 68 |       throw new IllegalArgumentException("RDF must be equal to or greater than RF");
 69 |     }
 70 |     this.rdf = rdf;
 71 |     this.rf = rf;
 72 |     this.rules = rules;
 73 |     this.bidirectional = bidirectional;
 74 |     this.handleOverload = handleOverload;
 75 |     this.targetBalance = targetBalance;
 76 |   }
 77 | 
 78 |   /**
 79 |    * Given the topology and the list of data as represented by long values, and the placement rules
 80 |    * specified by the RDF mapping object, produces the mapping from data to list of nodes.
 81 |    */
 82 |   public Map<Long,List<Node>> computeMapping(List<Long> data, Node topology) {
 83 |     Node crunched = cruncher.makeCrunch(topology);
 84 |     long begin = System.nanoTime();
 85 |     rdfMap = createRDFMapping(crunched);
 86 |     long end = System.nanoTime();
 87 |     logger.info("time taken to create the RDF mapping: {} ms", (end-begin)/1000000L);
 88 |     begin = System.nanoTime();
 89 |     RDFCRUSHMapping crushMapping = new RDFCRUSHMapping(rf, rules, targetBalance);
 90 |     Map<Long,List<Node>> map = crushMapping.createMapping(data, crunched, rdfMap);
 91 |     end = System.nanoTime();
 92 |     logger.info("time taken to create mapping: {} ms", (end-begin)/1000000L);
 93 |     return map;
 94 |   }
 95 | 
 96 |   /**
 97 |    * Given the processed topology, returns the mapping from end nodes to lists of secondary end
 98 |    * nodes that are allowed to store the replicas. This mapping uses the same CRUSH algorithm as the
 99 |    * basic placement algorithm.
100 |    */
101 |   public Map<Node,List<Node>> createRDFMapping(Node crunchedRoot) {
102 |     // iterate on all datacenters
103 |     List<Node> datacenters = crunchedRoot.findChildren(Types.DATA_CENTER);
104 |     Map<Node,List<Node>> map = new HashMap<Node,List<Node>>(crunchedRoot.getAllLeafNodes().size());
105 |     for (Node datacenter: datacenters) {
106 |       createRDFMappingPerDC(datacenter, map);
107 |     }
108 |     return map;
109 |   }
110 | 
111 |   public Map<String, List<String>> getNewRdfMap() {
112 |     Map<String, List<String>> map = new HashMap<String, List<String>>();
113 | 
114 |     for (Map.Entry<Node, List<Node>> entry: rdfMap.entrySet()) {
115 |       List<String> nodeList = new ArrayList<String>();
116 |       for (Node node: entry.getValue()) {
117 |         nodeList.add(node.getName());
118 |       }
119 |       map.put(entry.getKey().getName(), nodeList);
120 |     }
121 | 
122 |     return map;
123 |   }
124 | 
125 |   private Map<Node,List<Node>> createRDFMappingPerDC(Node datacenter, Map<Node,List<Node>> map) {
126 |     final List<Node> allLeaves = datacenter.getAllLeafNodes();
127 |     final int endNodeSize = allLeaves.size();
128 |     final int totalMapping = endNodeSize*(rdf-1);
129 | 
130 |     // use a placement algorithm object for this run and keep track of successive rounds
131 |     PlacementAlgorithm pa = new CRUSHPlacementAlgorithm(true);
132 | 
133 |     // create the quota so we avoid overloading nodes
134 |     Map<Node,Integer> quota = handleOverload ? createQuota(allLeaves) : null;
135 |     int mapped = 0;
136 | 
137 |     while (mapped < totalMapping) {
138 |       for (Node primary: allLeaves) { // <~ n
139 |         List<Node> secondaries = map.get(primary);
140 |         if (secondaries == null) {
141 |           secondaries = new ArrayList<Node>(rdf-1);
142 |           map.put(primary, secondaries);
143 |         }
144 |         // if it is already filled, we don't need to look at it
145 |         if (secondaries.size() < rdf-1) {
146 |           // CRUSH selection of nodes using the primary's id
147 |           Node selected = pa.select(datacenter, primary.getId(), 1, rules.getEndNodeType()).get(0);
148 | 
149 |           if (handleOverload) {
150 |             // pass through a number of filters to reject the selection
151 |             int currentQuota = quota.get(selected);
152 |             if (currentQuota == 0) { // we have used all the quota for this node
153 |               logger.trace("rejecting {} because it is fully committed.", selected);
154 |               continue;
155 |             }
156 |           }
157 |           // first run it through placement rules' acceptance
158 |           if (!rules.acceptReplica(primary, selected)) {
159 |             // reject and move onto the next primary
160 |             logger.trace("rejecting {} for {} from placement rules: we're at {} %",
161 |                 new Object[] {selected, primary, ((float)mapped)*100/totalMapping});
162 |             continue;
163 |           }
164 | 
165 |           // for bi-di, we need to reject the mapping if the secondary is full too
166 |           if (bidirectional) {
167 |             List<Node> other = map.get(selected);
168 |             if (other == null) {
169 |               other = new ArrayList<Node>(rdf-1);
170 |               map.put(selected, other);
171 |             } else if (other.size() >= rdf-1) {
172 |               // reject and move onto the next primary
173 |               logger.trace("rejecting {} for {} because secondary is fully mapped already.",
174 |                   selected, primary);
175 |               continue;
176 |             }
177 |             // make sure it's not selected already
178 |             if (secondaries.contains(selected) || other.contains(primary)) { // these are really one and the same
179 |               logger.trace("secondary {} is already mapped for {}", selected, primary);
180 |               continue;
181 |             }
182 |             logger.trace("accepting {} for {}", selected, primary);
183 |             // accept the node pair
184 |             secondaries.add(selected);
185 |             other.add(primary);
186 |             mapped += 2;
187 |             if (handleOverload) {
188 |               // make sure to decrement the quota
189 |               decrementQuota(selected, quota);
190 |               decrementQuota(primary, quota);
191 |             }
192 |           } else { // uni-directional
193 |             // make sure it's not selected already
194 |             if (secondaries.contains(selected)) {
195 |               logger.trace("secondary {} is already mapped for {}", selected, primary);
196 |               continue;
197 |             }
198 |             logger.trace("accepting {} for {}", selected, primary);
199 |             secondaries.add(selected);
200 |             mapped++;
201 |             if (handleOverload) {
202 |               // make sure to decrement the quota
203 |               decrementQuota(selected, quota);
204 |             }
205 |           }
206 |         }
207 |       }
208 |     }
209 |     return map;
210 |   }
211 | 
212 |   private Map<Node,Integer> createQuota(List<Node> nodes) {
213 |     Map<Node,Integer> quota = new HashMap<Node,Integer>();
214 |     final int headroom = 1;
215 |     final int size = nodes.size();
216 |     long totalWeight = 0;
217 |     for (Node node: nodes) {
218 |       totalWeight += node.getWeight();
219 |     }
220 |     for (Node node: nodes) {
221 |       int value = (int)(node.getWeight()*(rdf-1)*size/totalWeight) + headroom;
222 | //      logger.debug("quota for {}: {}", node, value);
223 |       quota.put(node, value);
224 |     }
225 |     return quota;
226 |   }
227 | 
228 |   private void decrementQuota(Node node, Map<Node,Integer> quota) {
229 |     int value = quota.get(node);
230 |     quota.put(node, --value);
231 |   }
232 | }
233 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/RackBasedTypes.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Type system that is based on racks. The first three types are defined as ROOT, DATA_CENTER, and
20 |  * RACK. It can be extended to describe more layers of topologies. It is used by
21 |  * {@link BaseRackIsolationPlacementRules}.
22 |  *
23 |  * @see BaseRackIsolationPlacementRules
24 |  *
25 |  */
26 | public interface RackBasedTypes extends Types {
27 |   int RACK = 2;
28 | }
29 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/RackIsolationPlacementRules.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | 
19 | /**
20 |  * Rack isolation placement rules based on storage system types with disks as end nodes.
21 |  * <br/>
22 |  * This is provided as a typical concrete implementation for certain types of topologies. One should
23 |  * define their own types and extend the {@link BaskRackIsolationPlacementRules} to suit their
24 |  * needs.
25 |  *
26 |  * @see StorageSystemTypes
27 |  */
28 | public class RackIsolationPlacementRules extends BaseRackIsolationPlacementRules
29 |     implements StorageSystemTypes {
30 |   public int getEndNodeType() {
31 |     return DISK;
32 |   }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/RandomSelector.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.util.List;
19 | import java.util.Random;
20 | 
21 | /**
22 |  * Implementation of deterministic-but-random selection: identical series of calls will result
23 |  * in identical selections. This is strictly for test purposes, and should not be used for any
24 |  * real selection.
25 |  */
26 | class RandomSelector implements Selector {
27 |   private final Random rng = new Random(42);
28 |   private final Node node;
29 | 
30 |   public RandomSelector(Node node) {
31 |     if (node.isLeaf()) {
32 |       throw new IllegalArgumentException("count is larger than the number of nodes!");
33 |     }
34 |     this.node = node;
35 |   }
36 | 
37 |   public Node select(long input, long round) {
38 |     List<Node> children = node.getChildren();
39 |     final int length = children.size();
40 |     if (length == 1) {
41 |       return children.get(0);
42 |     }
43 | 
44 |     // compute the sum of weights
45 |     int totalWeight = 0;
46 |     for (Node n: children) {
47 |       totalWeight += n.getWeight();
48 |     }
49 |     // random number
50 |     int draw = rng.nextInt(totalWeight);
51 |     // pick a node based on the random number
52 |     int sum = 0;
53 |     for (Node n: children) {
54 |       sum += n.getWeight();
55 |       if (draw < sum) {
56 |         // have a match: make a copy
57 |         return n;
58 |       }
59 |     }
60 |     return null;
61 |   }
62 | }
63 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/Selector.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Object that encapsulates the algorithm (or "bucket type" in CRUSH terms) that, given a node,
20 |  * selects from its immediate children. On instantiation, it may calculate certain properties and
21 |  * attributes specific to the selection algorithm but independent of the data input, and maintain
22 |  * that state. Those properties will be used as part of the selection input.
23 |  */
24 | public interface Selector {
25 |   /**
26 |    * Selects one node based on the input value and an additional round value.
27 |    */
28 |   Node select(long input, long round);
29 | }
30 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/SimpleCRUSHMapping.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.util.ArrayList;
19 | import java.util.Collections;
20 | import java.util.HashMap;
21 | import java.util.List;
22 | import java.util.Map;
23 | 
24 | /**
25 |  * Mapping function that computes a simple CRUSH mapping. By default, it accepts RF as the only
26 |  * parameter to control the replication factor.
27 |  */
28 | public class SimpleCRUSHMapping implements MappingFunction {
29 |   private final int rf;
30 |   private final PlacementRules rules;
31 |   private final double targetBalance;
32 | 
33 |   public SimpleCRUSHMapping(int rf, PlacementRules rules) {
34 |     this(rf, rules, 0.0d);
35 |   }
36 | 
37 |   public SimpleCRUSHMapping(int rf, PlacementRules rules, double targetBalance) {
38 |     this.rf = rf;
39 |     this.rules = rules;
40 |     this.targetBalance = targetBalance;
41 |   }
42 | 
43 |   public Map<Long,List<Node>> computeMapping(List<Long> data, Node topology) {
44 |     // sort the data to ensure data is used in the same order
45 |     List<Long> sorted = new ArrayList<Long>(data);
46 |     Collections.sort(sorted);
47 | 
48 |     Node crunch = new Crunch().makeCrunch(topology);
49 |     Map<Long,List<Node>> map = new HashMap<Long,List<Node>>(sorted.size());
50 |     // iterate over datacenters
51 |     List<Node> datacenters = crunch.findChildren(Types.DATA_CENTER);
52 |     for (Node datacenter: datacenters) {
53 |       AssignmentTracker tracker =
54 |           AssignmentTrackerFactory.create(datacenter, rf*sorted.size(), targetBalance);
55 |       PlacementAlgorithm pa = new CRUSHPlacementAlgorithm(tracker);
56 | 
57 |       for (Long l: sorted) {
58 |         // apply the placement rules
59 |         List<Node> selected = rules.select(datacenter, l, rf, pa);
60 |         List<Node> nodes = map.get(l);
61 |         if (nodes == null) {
62 |           nodes = new ArrayList<Node>(rf*datacenters.size());
63 |           map.put(l, nodes);
64 |         }
65 |         nodes.addAll(selected);
66 |       }
67 |     }
68 |     return map;
69 |   }
70 | }
71 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/StorageSystemTypes.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | /**
19 |  * Describes types for a fairly typical storage system cluster.
20 |  *
21 |  * @see RackIsolationPlacementRules
22 |  */
23 | public interface StorageSystemTypes extends RackBasedTypes {
24 |   int SUB_RACK = 3;
25 |   int STORAGE_NODE = 4;
26 |   int DISK = 5;
27 | }
28 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/StrawSelector.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.Collections;
 20 | import java.util.Comparator;
 21 | import java.util.HashMap;
 22 | import java.util.List;
 23 | import java.util.Map;
 24 | 
 25 | /**
 26 |  * Selection algorithm based on the "straw" bucket type as described in the CRUSH algorithm.
 27 |  */
 28 | class StrawSelector implements Selector {
 29 |   private final Map<Node,Long> straws = new HashMap<Node,Long>();
 30 |   private final MultiInputHash hashFunction;
 31 | 
 32 |   public StrawSelector(Node node) {
 33 |     if (!node.isLeaf()) {
 34 |       // create a map from the nodes to their values
 35 |       List<Node> sortedNodes = sortNodes(node.getChildren()); // do a reverse sort by weight
 36 | 
 37 |       int numLeft = sortedNodes.size();
 38 |       float straw = 1.0f;
 39 |       float wbelow = 0.0f;
 40 |       float lastw = 0.0f;
 41 |       int i = 0;
 42 |       final int length = sortedNodes.size();
 43 |       while (i < length) {
 44 |         Node current = sortedNodes.get(i);
 45 |         if (current.getWeight() == 0) {
 46 |           straws.put(current, 0L);
 47 |           i++;
 48 |           continue;
 49 |         }
 50 |         straws.put(current, (long)(straw*0x10000));
 51 |         i++;
 52 |         if (i == length) {
 53 |           break;
 54 |         }
 55 | 
 56 |         current = sortedNodes.get(i);
 57 |         Node previous = sortedNodes.get(i-1);
 58 |         if (current.getWeight() == previous.getWeight()) {
 59 |           continue;
 60 |         }
 61 |         wbelow += (float)(previous.getWeight() - lastw)*numLeft;
 62 |         for (int j = i; j < length; j++) {
 63 |           if (sortedNodes.get(j).getWeight() == current.getWeight()) {
 64 |             numLeft--;
 65 |           } else {
 66 |             break;
 67 |           }
 68 |         }
 69 |         float wnext = (float)(numLeft * (current.getWeight() - previous.getWeight()));
 70 |         float pbelow = wbelow/(wbelow + wnext);
 71 |         straw *= Math.pow(1.0/pbelow, 1.0/numLeft);
 72 |         lastw = previous.getWeight();
 73 |       }
 74 |     }
 75 |     hashFunction = new JenkinsHash();
 76 |   }
 77 | 
 78 |   /**
 79 |    * Returns a new list that's sorted in the reverse order of the weight.
 80 |    */
 81 |   private List<Node> sortNodes(List<Node> nodes) {
 82 |     List<Node> ret = new ArrayList<Node>(nodes);
 83 |     sortNodesInPlace(ret);
 84 |     return ret;
 85 |   }
 86 | 
 87 |   /**
 88 |    * Sorts the list in place in the reverse order of the weight.
 89 |    */
 90 |   private void sortNodesInPlace(List<Node> nodes) {
 91 |     Collections.sort(nodes, new Comparator<Node>() {
 92 |       public int compare(Node n1, Node n2) {
 93 |         if (n2.getWeight() == n1.getWeight()) {
 94 |           return 0;
 95 |         }
 96 |         return (n2.getWeight() - n1.getWeight() > 0) ? 1 : -1;
 97 |         // sort by weight only in the reverse order
 98 |       }
 99 |     });
100 |   }
101 | 
102 |   public Node select(long input, long round) {
103 |     Node selected = null;
104 |     long hiScore = -1;
105 |     for (Map.Entry<Node,Long> e: straws.entrySet()) {
106 |       Node child = e.getKey();
107 |       long straw = e.getValue();
108 |       long score = weightedScore(child, straw, input, round);
109 |       if (score > hiScore) {
110 |         selected = child;
111 |         hiScore = score;
112 |       }
113 |     }
114 |     if (selected == null) {
115 |       throw new IllegalStateException();
116 |     }
117 |     return selected;
118 |   }
119 | 
120 |   private long weightedScore(Node child, long straw, long input, long round) {
121 |     long hash = hashFunction.hash(input, child.getId(), round);
122 |     hash = hash&0xffff;
123 |     long weightedScore = hash*straw;
124 |     return weightedScore;
125 |   }
126 | }
127 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/Types.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | public interface Types {
19 |   int ROOT = 0;
20 |   int DATA_CENTER = 1;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/main/java/com/twitter/crunch/Utils.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | 
19 | public class Utils {
20 |   public static long bstrTo32bit(byte[] bstr) {
21 |     if (bstr.length < 4) {
22 |       throw new IllegalArgumentException("hashed is less than 4 bytes!");
23 |     }
24 |     // need to "simulate" unsigned int
25 |     return (long)(((ord(bstr[0]) << 24)
26 |         | (ord(bstr[1]) << 16)
27 |         | (ord(bstr[2]) << 8)
28 |         | (ord(bstr[3]))))
29 |         & 0xffffffffL;
30 |   }
31 | 
32 |   private static int ord(byte b) {
33 |     return b & 0xff;
34 |   }
35 | }
36 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/AssignmentTrackerImplTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import static org.junit.Assert.assertFalse;
19 | import static org.junit.Assert.assertTrue;
20 | import static org.mockito.Mockito.mock;
21 | import static org.mockito.Mockito.when;
22 | 
23 | import java.util.ArrayList;
24 | import java.util.List;
25 | 
26 | import org.junit.Test;
27 | 
28 | public class AssignmentTrackerImplTest {
29 |   @Test
30 |   public void testLowWatermark() {
31 |     final int childType = 3;
32 |     Node child = mockChildNode(childType);
33 |     Node root = mockRootNode(childType, child);
34 |     AssignmentTracker tracker =
35 |         new AssignmentTrackerImpl(root, (int)(AssignmentTrackerImpl.LOW_WATERMARK-1), 0.25d);
36 |     assertFalse(tracker.trackAssignment(child));
37 |   }
38 | 
39 |   private Node mockRootNode(final int childType, Node child) {
40 |     Node root = mock(Node.class);
41 |     List<Node> children = new ArrayList<Node>();
42 |     children.add(child);
43 |     when(root.findChildren(childType)).thenReturn(children);
44 |     return root;
45 |   }
46 | 
47 |   private Node mockChildNode(final int childType) {
48 |     Node child = mock(Node.class);
49 |     when(child.getType()).thenReturn(childType);
50 |     when(child.getWeight()).thenReturn(65536L);
51 |     when(child.isLeaf()).thenReturn(true);
52 |     return child;
53 |   }
54 | 
55 |   @Test
56 |   public void testDifferenceThreshold() {
57 |     final int childType = 3;
58 |     Node child = mockChildNode(childType);
59 |     Node root = mockRootNode(childType, child);
60 |     final int dataCount = 100;
61 |     final double maxAllowed = ((double)(AssignmentTrackerImpl.DIFFERENCE_THRESHOLD - 1))/dataCount;
62 |     assertTrue(dataCount*maxAllowed < AssignmentTrackerImpl.DIFFERENCE_THRESHOLD);
63 |     AssignmentTracker tracker = new AssignmentTrackerImpl(root, dataCount, maxAllowed);
64 |     assertFalse(tracker.trackAssignment(child));
65 |   }
66 | 
67 |   @Test
68 |   public void testRejectAssignment() {
69 |     final int childType = 3;
70 |     Node child = mockChildNode(childType);
71 |     Node root = mockRootNode(childType, child);
72 |     final int dataCount = 50;
73 |     final double maxAllowed = 0.25d;
74 |     final int max = (int)Math.ceil(dataCount*(1.0d + maxAllowed));
75 |     AssignmentTracker tracker = new AssignmentTrackerImpl(root, dataCount, maxAllowed);
76 |     // no assignment: should not be rejected
77 |     assertFalse(tracker.rejectAssignment(child));
78 | 
79 |     // fill it up to max
80 |     for (int i = 0; i < max; i++) {
81 |       assertTrue(tracker.trackAssignment(child));
82 |     }
83 |     assertTrue(tracker.rejectAssignment(child));
84 |   }
85 | }
86 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/BaseSelectionTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import java.lang.reflect.Constructor;
 19 | import java.lang.reflect.InvocationTargetException;
 20 | import java.security.MessageDigest;
 21 | import java.security.NoSuchAlgorithmException;
 22 | import java.util.ArrayList;
 23 | import java.util.Collections;
 24 | import java.util.HashMap;
 25 | import java.util.List;
 26 | import java.util.Map;
 27 | 
 28 | import com.twitter.crunch.MappingDiff.Value;
 29 | import com.twitter.crunch.Node.Selection;
 30 | 
 31 | public class BaseSelectionTest {
 32 |   protected void doTestBasic(Class<? extends Selector> type) {
 33 |     Node rack = createTree();
 34 |     Selector selector = createSelector(type, rack);
 35 |     long input = getHashFromString("some random key");
 36 |     Node selected = selector.select(input, 1);
 37 |     System.out.println(selected.getName());
 38 |     selected = selector.select(input, 2);
 39 |     System.out.println(selected.getName());
 40 |   }
 41 | 
 42 |   protected void doTestBalance(Class<? extends Selector> type, final int tries) {
 43 |     Node rack = createTree();
 44 |     Selector selector = createSelector(type, rack);
 45 |     Map<String,Integer> nodeCounts = new HashMap<String,Integer>();
 46 |     for (int i = 0; i < tries; i++) {
 47 |       long input = getHashFromString("key" + i);
 48 |       Node selected = selector.select(input, 1);
 49 |       String node = selected.getName();
 50 |       Integer old = nodeCounts.get(node);
 51 |       int value = (old == null) ? 1 : old.intValue()+1;
 52 |       nodeCounts.put(node, value);
 53 |     }
 54 | 
 55 |     for (Map.Entry<String,Integer> e: nodeCounts.entrySet()) {
 56 |       System.out.println(e.getKey() + ": " + e.getValue());
 57 |     }
 58 | 
 59 |     printDeviation(nodeCounts, tries);
 60 |   }
 61 | 
 62 |   private Node createTree() {
 63 |     Node rack = createNode("rack", StorageSystemTypes.RACK, 0, 0, null);
 64 |     List<Node> children = new ArrayList<Node>();
 65 |     children.add(createNode("node1", StorageSystemTypes.DISK, 1, 100, null));
 66 |     children.add(createNode("node2", StorageSystemTypes.DISK, 2, 50, null));
 67 |     children.add(createNode("node3", StorageSystemTypes.DISK, 3, 100, null));
 68 |     rack.setChildren(children);
 69 |     return rack;
 70 |   }
 71 | 
 72 |   private Node createNode(String name, int type, long id, long weight, Selection selection) {
 73 |     Node node = new Node();
 74 |     node.setName(name);
 75 |     node.setType(type);
 76 |     node.setId(id);
 77 |     node.setWeight(weight);
 78 |     node.setSelection(selection);
 79 |     return node;
 80 |   }
 81 | 
 82 |   private Node createLargeTree() {
 83 |     Node rack = createNode("rack", StorageSystemTypes.RACK, 0, 0, null);
 84 |     List<Node> children = new ArrayList<Node>();
 85 |     final int size = 1024;
 86 |     for (int i = 1; i <= size; i++) {
 87 |       children.add(createNode("node" + i, StorageSystemTypes.DISK, i, 100, null));
 88 |     }
 89 |     rack.setChildren(children);
 90 |     return rack;
 91 |   }
 92 | 
 93 |   private long getHashFromString(String string) {
 94 |     MessageDigest md = null;
 95 |     try {
 96 |       md = MessageDigest.getInstance("MD5");
 97 |     } catch (NoSuchAlgorithmException ignore) {}
 98 |     byte[] hash = md.digest(string.getBytes());
 99 |     return Utils.bstrTo32bit(hash);
100 |   }
101 | 
102 |   private Map<String,Integer> getExpectedBalance(int tries) {
103 |     Map<String,Integer> map = new HashMap<String,Integer>();
104 |     map.put("node1", (int)(tries*0.4));
105 |     map.put("node2", (int)(tries*0.2));
106 |     map.put("node3", (int)(tries*0.4));
107 |     return map;
108 |   }
109 | 
110 |   private void printDeviation(Map<String,Integer> actual, int tries) {
111 |     // compute the deviation
112 |     Map<String,Integer> expected = getExpectedBalance(tries);
113 |     double varianceSum = 0.0;
114 |     int count = actual.size();
115 |     for (Map.Entry<String,Integer> e: actual.entrySet()) {
116 |       int expectedCount = expected.get(e.getKey());
117 |       int actualCount = e.getValue();
118 |       double diff = expectedCount - actualCount;
119 |       varianceSum += diff*diff/expectedCount/expectedCount;
120 |     }
121 |     double deviation = Math.sqrt(varianceSum/count);
122 |     System.out.println("relative deviation (%): " + deviation*100);
123 |   }
124 | 
125 |   protected void doTestLargeTree(Class<? extends Selector> type) {
126 |     Node rack = createLargeTree();
127 |     Selector selector = createSelector(type, rack);
128 |     Map<String,Integer> nodeCounts = new HashMap<String,Integer>();
129 |     final int size = rack.getChildren().size();
130 |     final int tries = 1024*128;
131 |     for (int i = 1; i <= tries; i++) {
132 |       Node selected = selector.select((long)i, 1);
133 |       String node = selected.getName();
134 |       Integer old = nodeCounts.get(node);
135 |       int value = (old == null) ? 1 : old.intValue()+1;
136 |       nodeCounts.put(node, value);
137 |     }
138 | 
139 |     for (Map.Entry<String,Integer> e: nodeCounts.entrySet()) {
140 |       System.out.println(e.getKey() + ": " + e.getValue());
141 |     }
142 | 
143 |     // compute the deviation
144 |     final int expectedCount = tries/size;
145 |     double varianceSum = 0.0;
146 |     int count = nodeCounts.size();
147 |     for (Integer actualCount: nodeCounts.values()) {
148 |       double diff = expectedCount - actualCount;
149 |       varianceSum += diff*diff/expectedCount/expectedCount;
150 |     }
151 |     double deviation = Math.sqrt(varianceSum/count);
152 |     System.out.println("relative deviation (%): " + deviation*100);
153 |   }
154 | 
155 |   private void analyzeDiff(Map<Integer, List<String>> mapping1,
156 |       Map<Integer, List<String>> mapping2) {
157 |     // compute the diff
158 |     Map<Integer,List<Value<String>>> diff = MappingDiff.calculateDiff(mapping1, mapping2);
159 |     System.out.println("number of data objects that moved: " + diff.size());
160 |     System.out.println("relative movement (%): " + ((float)diff.size())*100/mapping1.size());
161 |   }
162 | 
163 |   protected void doTestStability(Class<? extends Selector> type, boolean removal) {
164 |     // first try: create the full tree
165 |     Node rack = createLargeTree();
166 |     Selector selector = createSelector(type, rack);
167 |     Map<Integer,List<String>> mapping1 = new HashMap<Integer,List<String>>();
168 |     final int tries = 1024*128;
169 |     for (int i = 1; i <= tries; i++) {
170 |       Node selected = selector.select((long)i, 1);
171 |       String node = selected.getName();
172 |       mapping1.put(i, Collections.singletonList(node));
173 |     }
174 | 
175 |     // second try: remove a node
176 |     List<Node> children = rack.getChildren();
177 |     if (removal) {
178 |       children.remove(0); // remove the first node
179 |     } else { // addition
180 |       Node extra = createNode("node1000000", StorageSystemTypes.DISK, 1000000, 100, null);
181 |       children.add(extra); // remove the first node
182 |     }
183 | 
184 |     Selector selector2 = createSelector(type, rack);
185 |     Map<Integer,List<String>> mapping2 = new HashMap<Integer,List<String>>();
186 |     for (int i = 1; i <= tries; i++) {
187 |       Node selected = selector2.select((long)i, 1);
188 |       String node = selected.getName();
189 |       mapping2.put(i, Collections.singletonList(node));
190 |     }
191 | 
192 |     analyzeDiff(mapping1, mapping2);
193 |   }
194 | 
195 |   /**
196 |    * We require a constructor with a single argument that takes the Node.
197 |    */
198 |   private Selector createSelector(Class<? extends Selector> type, Node rack) {
199 |     try {
200 |       Constructor<? extends Selector> ctr = type.getConstructor(Node.class);
201 |       return ctr.newInstance(rack);
202 |     } catch (NoSuchMethodException e) { // should not occur
203 |       throw new IllegalArgumentException(e);
204 |     } catch (InstantiationException e) { // should not occur
205 |       throw new IllegalArgumentException(e);
206 |     } catch (InvocationTargetException e) { // should not occur
207 |       throw new IllegalArgumentException(e);
208 |     } catch (IllegalAccessException e) { // should not occur
209 |         throw new IllegalArgumentException(e);
210 |     }
211 |   }
212 | }
213 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/ConsistentHashingSelectionTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import org.junit.Test;
19 | 
20 | public class ConsistentHashingSelectionTest extends BaseSelectionTest {
21 |   private final Class<? extends Selector> cls = ConsistentHashingSelector.class;
22 | 
23 |   @Test
24 |   public void testLargeTree() {
25 |     doTestLargeTree(cls);
26 |   }
27 | 
28 |   @Test
29 |   public void testStabilityOnRemoval() {
30 |     doTestStability(cls, true);
31 |   }
32 | 
33 |   @Test
34 |   public void testStabilityOnAddition() {
35 |     doTestStability(cls, false);
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/CrunchTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import static org.junit.Assert.assertEquals;
 19 | import static org.junit.Assert.assertNotNull;
 20 | import static org.junit.Assert.assertTrue;
 21 | import static org.junit.Assert.fail;
 22 | 
 23 | import java.util.Arrays;
 24 | import java.util.List;
 25 | 
 26 | import org.junit.Test;
 27 | 
 28 | public class CrunchTest {
 29 |   @Test
 30 |   public void testMakeCrunch() {
 31 |     Node root = TestUtils.createSimpleTree();
 32 |     Node crunched = new Crunch().makeCrunch(root);
 33 |     verifyNode(crunched);
 34 |     assertEquals(800, crunched.getWeight());
 35 |   }
 36 | 
 37 |   private void verifyNode(Node node) {
 38 |     System.out.println(node + " => " + node.getWeight());
 39 |     assertTrue(node.getWeight() > 0);
 40 |     if (!node.isLeaf()) {
 41 |       assertNotNull(node.getSelector());
 42 |       for (Node child: node.getChildren()) {
 43 |         verifyNode(child);
 44 |       }
 45 |     }
 46 |   }
 47 | 
 48 |   /**
 49 |    * Creates a situation where some parent nodes have all of their children picked. The mapping
 50 |    * should still converge fast in this case.
 51 |    */
 52 |   @Test
 53 |   public void testSmallTopology() {
 54 |     Node root = TestUtils.createSimpleTree();
 55 | 
 56 |     PlacementRules rules = new PlacementRules() {
 57 |       public List<Node> select(Node topCNode, long data, int count, PlacementAlgorithm pa) {
 58 |         return pa.select(topCNode, data, count, getEndNodeType());
 59 |       }
 60 | 
 61 |       public int getEndNodeType() { return StorageSystemTypes.DISK; }
 62 | 
 63 |       public boolean acceptReplica(Node primary, Node replica) {
 64 |         return true;
 65 |       }
 66 |     };
 67 | 
 68 |     new SimpleCRUSHMapping(3, rules).computeMapping(Arrays.<Long>asList(3L), root);
 69 |     assertTrue(true);
 70 |   }
 71 | 
 72 |   @Test
 73 |   public void testNonRootCrunch() {
 74 |     Node root = TestUtils.createSimpleTree();
 75 |     // override the type to cause the exception
 76 |     root.setType(StorageSystemTypes.STORAGE_NODE);
 77 |     try {
 78 |       new Crunch().makeCrunch(root);
 79 |       fail("we shouldn't reach this line");
 80 |     } catch (IllegalArgumentException e) {
 81 |       assertTrue(true);
 82 |     }
 83 |   }
 84 | 
 85 |   @Test
 86 |   public void testRecrunch() {
 87 |     Node root = TestUtils.createSimpleTree();
 88 |     Node crunched = new Crunch().makeCrunch(root);
 89 |     // now remove a node from the tree and recrunch
 90 |     Node rack = crunched.getChildren().get(1).getChildren().get(1);
 91 |     assertEquals(StorageSystemTypes.RACK, rack.getType());
 92 |     Node hd = rack.getChildren().get(1);
 93 |     assertEquals(StorageSystemTypes.DISK, hd.getType());
 94 |     List<Node> children = rack.getChildren();
 95 |     children.remove(hd);
 96 |     // recrunch
 97 |     new Crunch().recrunch(crunched);
 98 |     verifyNode(crunched);
 99 |     assertEquals(700, crunched.getWeight());
100 |   }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/CrunchTestSuite.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import org.junit.runner.RunWith;
19 | import org.junit.runners.Suite;
20 | import org.junit.runners.Suite.SuiteClasses;
21 | 
22 | import com.twitter.crunch.integrated.RDFBalanceTest;
23 | import com.twitter.crunch.integrated.RDFStabilityTest;
24 | 
25 | /**
26 |  * Test suite that exercises all crunch tests.
27 |  */
28 | @RunWith(Suite.class)
29 | @SuiteClasses({
30 |   NodeTest.class,
31 |   MessageDigestHashTest.class,
32 |   MappingDiffTest.class,
33 |   StrawSelectionTest.class,
34 |   ConsistentHashingSelectionTest.class,
35 |   RandomSelectionTest.class,
36 |   CrunchTest.class,
37 |   AssignmentTrackerImplTest.class,
38 |   RDFBalanceTest.class,
39 |   RDFStabilityTest.class
40 | })
41 | public class CrunchTestSuite {}


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/MappingDiffTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import static org.junit.Assert.assertEquals;
 19 | import static org.junit.Assert.assertNotNull;
 20 | import static org.junit.Assert.assertSame;
 21 | import static org.junit.Assert.fail;
 22 | 
 23 | import java.util.Arrays;
 24 | import java.util.Collections;
 25 | import java.util.HashMap;
 26 | import java.util.HashSet;
 27 | import java.util.List;
 28 | import java.util.Map;
 29 | import java.util.Set;
 30 | 
 31 | import org.junit.Test;
 32 | 
 33 | import com.twitter.crunch.MappingDiff.Difference;
 34 | import com.twitter.crunch.MappingDiff.Value;
 35 | 
 36 | public class MappingDiffTest {
 37 | 
 38 |   @Test
 39 |   public void testSimpleDifferences() {
 40 |     Map<Integer,List<Integer>> m1 = new HashMap<Integer,List<Integer>>();
 41 |     // 1 => [1, 2, 3]
 42 |     // 2 => [4, 5, 6]
 43 |     // 3 => [7, 8, 9]
 44 |     m1.put(1, Arrays.asList(1, 2, 3));
 45 |     m1.put(2, Arrays.asList(4, 5, 6));
 46 |     m1.put(3, Arrays.asList(7, 8, 9));
 47 |     Map<Integer,List<Integer>> m2 = new HashMap<Integer,List<Integer>>();
 48 |     // 1 => [1, 2, 4]
 49 |     // 2 => [3, 5, 6]
 50 |     // 3 => [7, 8, 9]
 51 |     m2.put(1, Arrays.asList(1, 2, 4));
 52 |     m2.put(2, Arrays.asList(3, 5, 6));
 53 |     m2.put(3, Arrays.asList(9, 7, 8));
 54 |     // expect
 55 |     // 1 => [3, 4]
 56 |     // 2 => [3, 4]
 57 |     Map<Integer,List<Value<Integer>>> diff = MappingDiff.calculateDiff(m1, m2);
 58 |     assertEquals(2, diff.size());
 59 |     Set<Integer> keys = diff.keySet();
 60 |     Set<Integer> expectedKeys = new HashSet<Integer>();
 61 |     Collections.addAll(expectedKeys, 1, 2);
 62 |     assertEquals(expectedKeys, keys);
 63 |     List<Value<Integer>> d1 = diff.get(1);
 64 |     assertNotNull(d1);
 65 |     for (Value<Integer> v: d1) {
 66 |       Difference type = v.getDifferenceType();
 67 |       switch (v.get()) {
 68 |       case 3:
 69 |         assertSame(Difference.REMOVED, type);
 70 |         break;
 71 |       case 4:
 72 |         assertSame(Difference.ADDED, type);
 73 |         break;
 74 |       default:
 75 |         fail("we shouldn't be here!");
 76 |       }
 77 |     }
 78 |     List<Value<Integer>> d2 = diff.get(2);
 79 |     assertNotNull(d2);
 80 |     for (Value<Integer> v: d2) {
 81 |       Difference type = v.getDifferenceType();
 82 |       switch (v.get()) {
 83 |       case 3:
 84 |         assertSame(Difference.ADDED, type);
 85 |         break;
 86 |       case 4:
 87 |         assertSame(Difference.REMOVED, type);
 88 |         break;
 89 |       default:
 90 |         fail("we shouldn't be here!");
 91 |       }
 92 |     }
 93 |   }
 94 | 
 95 |   @Test
 96 |   public void testMoreDiff() {
 97 |     Map<Integer,List<Integer>> m1 = new HashMap<Integer,List<Integer>>();
 98 |     // 1 => [1, 2, 3]
 99 |     // 2 => [4, 5, 6]
100 |     m1.put(1, Arrays.asList(1, 2, 3));
101 |     m1.put(2, Arrays.asList(4, 5, 6));
102 |     Map<Integer,List<Integer>> m2 = new HashMap<Integer,List<Integer>>();
103 |     // 1 => [1, 2, 4]
104 |     // 2 => [3, 5, 6]
105 |     // 3 => [7, 8, 9]
106 |     m2.put(1, Arrays.asList(1, 2, 3));
107 |     m2.put(2, Arrays.asList(4, 5, 6));
108 |     m2.put(3, Arrays.asList(9, 7, 8));
109 |     // expect
110 |     // 3 => [7, 8, 9]
111 |     Map<Integer,List<Value<Integer>>> diff = MappingDiff.calculateDiff(m1, m2);
112 |     assertEquals(1, diff.size());
113 |     Set<Integer> keys = diff.keySet();
114 |     Set<Integer> expectedKeys = new HashSet<Integer>();
115 |     expectedKeys.add(3);
116 |     assertEquals(expectedKeys, keys);
117 |     List<Value<Integer>> values = diff.get(3);
118 |     assertNotNull(values);
119 |     for (Value<Integer> v: values) {
120 |       Difference type = v.getDifferenceType();
121 |       switch (v.get()) {
122 |       case 7:
123 |       case 8:
124 |       case 9:
125 |         assertSame(Difference.ADDED, type);
126 |         break;
127 |       default:
128 |         fail("we shouldn't be here!");
129 |       }
130 |     }
131 |   }
132 | }
133 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/MessageDigestHashTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import org.junit.Test;
19 | 
20 | public class MessageDigestHashTest {
21 |   @Test
22 |   public void testHashLongLongLong() {
23 |     long a = 1;
24 |     long b = 2;
25 |     long c = 3;
26 |     MultiInputHash hf = new MessageDigestHash("SHA-1");
27 |     long val = hf.hash(a, b, c);
28 |     System.out.println(val);
29 |     a = 2; b = 2; c = 3;
30 |     val = hf.hash(a, b, c);
31 |     System.out.println(val);
32 |   }
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/NodeTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch;
 17 | 
 18 | import static org.junit.Assert.assertEquals;
 19 | import static org.junit.Assert.assertNull;
 20 | import static org.junit.Assert.assertSame;
 21 | import static org.junit.Assert.assertTrue;
 22 | 
 23 | import java.util.ArrayList;
 24 | import java.util.List;
 25 | 
 26 | import org.junit.Test;
 27 | 
 28 | import com.twitter.crunch.Node.Selection;
 29 | 
 30 | public class NodeTest {
 31 |   @Test
 32 |   public void testCopyConstructor() {
 33 |     Node node = new Node();
 34 |     // properties that should be equal
 35 |     final String foo = "foo";
 36 |     final int id = 1234;
 37 |     final int type = StorageSystemTypes.RACK;
 38 |     final long weight = 100;
 39 |     final Selection selection = Selection.STRAW;
 40 | 
 41 |     node.setName(foo);
 42 |     node.setId(id);
 43 |     node.setType(type);
 44 |     node.setWeight(weight);
 45 |     node.setSelection(selection);
 46 | 
 47 |     // properties that should not be copied
 48 |     node.setChildren(new ArrayList<Node>());
 49 |     node.setParent(new Node());
 50 | 
 51 |     Node copy = new Node(node);
 52 |     assertEquals(node.getName(), copy.getName());
 53 |     assertEquals(node.getId(), copy.getId());
 54 |     assertEquals(node.getType(), copy.getType());
 55 |     assertEquals(node.getWeight(), copy.getWeight());
 56 |     assertEquals(node.getSelection(), copy.getSelection());
 57 |     // ensure relationship is not copied
 58 |     assertNull(copy.getChildren());
 59 |     assertNull(copy.getParent());
 60 |   }
 61 | 
 62 |   @Test
 63 |   public void testIsLeaf() {
 64 |     Node node = new Node();
 65 |     List<Node> children = new ArrayList<Node>();
 66 |     children.add(new Node());
 67 |     children.remove(0);
 68 |     node.setChildren(children);
 69 | 
 70 |     assertTrue(node.isLeaf());
 71 |   }
 72 | 
 73 |   @Test
 74 |   public void testGetAllLeafNodes() {
 75 |     Node root = TestUtils.createSimpleTree();
 76 |     List<Node> leaves = root.getAllLeafNodes();
 77 |     assertEquals(8, leaves.size());
 78 |     // test a leaf node itself
 79 |     Node leaf = leaves.get(0);
 80 |     List<Node> self = leaf.getAllLeafNodes();
 81 |     assertEquals(1, self.size());
 82 |     assertSame(leaf, self.get(0));
 83 |   }
 84 | 
 85 |   @Test
 86 |   public void testFindChildren() {
 87 |     Node root = TestUtils.createSimpleTree();
 88 |     List<Node> racks = root.findChildren(StorageSystemTypes.RACK);
 89 |     assertEquals(4, racks.size());
 90 |     for (Node r: racks) {
 91 |       assertEquals(StorageSystemTypes.RACK, r.getType());
 92 |     }
 93 |     // test the node itself
 94 |     Node rack = racks.get(0);
 95 |     List<Node> self = rack.findChildren(StorageSystemTypes.RACK);
 96 |     assertEquals(1, self.size());
 97 |     assertSame(rack, self.get(0));
 98 |     // test the no match
 99 |     List<Node> empty = rack.findChildren(Types.DATA_CENTER);
100 |     assertTrue(empty.isEmpty());
101 |   }
102 | 
103 |   @Test
104 |   public void testChildrenCount() {
105 |     Node root = TestUtils.createSimpleTree();
106 |     int count = root.getChildrenCount(StorageSystemTypes.RACK);
107 |     assertEquals(4, count);
108 |     // test the node itself
109 |     List<Node> racks = root.findChildren(StorageSystemTypes.RACK);
110 |     Node rack = racks.get(0);
111 |     count = rack.getChildrenCount(StorageSystemTypes.RACK);
112 |     assertEquals(1, count);
113 |     // test the no match
114 |     count = rack.getChildrenCount(Types.DATA_CENTER);
115 |     assertEquals(0, count);
116 |   }
117 | 
118 |   @Test
119 |   public void testFindParent() {
120 |     Node root = TestUtils.createSimpleTree();
121 |     Node rack = root.findChildren(StorageSystemTypes.RACK).get(0);
122 |     Node dc = rack.findParent(Types.DATA_CENTER);
123 |     assertEquals(Types.DATA_CENTER, dc.getType());
124 |     Node self = rack.findParent(StorageSystemTypes.RACK);
125 |     assertSame(rack, self);
126 |     Node none = rack.findParent(StorageSystemTypes.STORAGE_NODE);
127 |     assertNull(none);
128 |   }
129 | 
130 |   @Test
131 |   public void testGetRoot() {
132 |     Node root = TestUtils.createSimpleTree();
133 |     Node hd = root.findChildren(StorageSystemTypes.DISK).get(0);
134 |     Node ret = hd.getRoot();
135 |     assertSame(root, ret);
136 |   }
137 | }
138 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/RandomSelectionTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import org.junit.Test;
19 | 
20 | public class RandomSelectionTest extends BaseSelectionTest {
21 |   private final Class<? extends Selector> cls = RandomSelector.class;
22 | 
23 |   @Test
24 |   public void testLargeTree() {
25 |     doTestLargeTree(cls);
26 |   }
27 | 
28 |   @Test
29 |   public void testStabilityOnRemoval() {
30 |     doTestStability(cls, true);
31 |   }
32 | 
33 |   @Test
34 |   public void testStabilityOnAddition() {
35 |     doTestStability(cls, false);
36 |   }
37 | }
38 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/StrawSelectionTest.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import org.junit.Test;
19 | 
20 | public class StrawSelectionTest extends BaseSelectionTest {
21 |   private final Class<? extends Selector> cls = StrawSelector.class;
22 | 
23 |   @Test
24 |   public void testBasic() {
25 |     doTestBasic(cls);
26 |   }
27 | 
28 |   @Test
29 |   public void testBalance() {
30 |     doTestBalance(cls, 1000);
31 |   }
32 | 
33 |   @Test
34 |   public void testLargeTree() {
35 |     doTestLargeTree(cls);
36 |   }
37 | 
38 |   @Test
39 |   public void testStabilityOnRemoval() {
40 |     doTestStability(cls, true);
41 |   }
42 | 
43 |   @Test
44 |   public void testStabilityOnAddition() {
45 |     doTestStability(cls, false);
46 |   }
47 | }
48 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/Topology.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch;
17 | 
18 | import java.io.File;
19 | import java.io.IOException;
20 | 
21 | import org.codehaus.jackson.map.ObjectMapper;
22 | 
23 | public class Topology {
24 | 	public Node readTopology(String file) throws IOException {
25 | 		ObjectMapper mapper = new ObjectMapper();
26 | 		Node root = mapper.readValue(new File(file), Node.class);
27 | 		return root;
28 | 	}
29 | }
30 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/integrated/RDFBalanceTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.integrated;
 17 | 
 18 | import static org.junit.Assert.assertEquals;
 19 | 
 20 | import java.util.HashSet;
 21 | import java.util.List;
 22 | import java.util.Map;
 23 | import java.util.Set;
 24 | 
 25 | import org.junit.Test;
 26 | 
 27 | import com.twitter.crunch.Crunch;
 28 | import com.twitter.crunch.MappingFunction;
 29 | import com.twitter.crunch.Node;
 30 | import com.twitter.crunch.RDFMapping;
 31 | import com.twitter.crunch.RackIsolationPlacementRules;
 32 | import com.twitter.crunch.SimpleCRUSHMapping;
 33 | import com.twitter.crunch.TestUtils;
 34 | import com.twitter.crunch.Types;
 35 | 
 36 | public class RDFBalanceTest {
 37 |   @Test
 38 |   public void testRDFMapping() {
 39 |     Node topology = TestUtils.createLargeTree();
 40 |     // reduce weight on one node to see if it gets less data
 41 |     Node smallNode = pickOneNode(topology);
 42 |     smallNode.setWeight(10);
 43 |     System.out.println("reduced weight on " + smallNode + " from 100 to 10.");
 44 |     Node crunch = new Crunch().makeCrunch(topology);
 45 |     final int rf = 2;
 46 |     final int rdf = 32;
 47 |     RDFMapping mappingFunction = new RDFMapping(rdf, rf, new RackIsolationPlacementRules());
 48 |     Map<Node,List<Node>> rdfMapping = mappingFunction.createRDFMapping(crunch);
 49 |     List<Node> leafNodes = crunch.getAllLeafNodes();
 50 |     assertEquals(leafNodes.size(), rdfMapping.size());
 51 |     for (Map.Entry<Node,List<Node>> e: rdfMapping.entrySet()) {
 52 |       // ensure there are no duplicates
 53 |       List<Node> nodes = e.getValue();
 54 |       Set<Node> set = new HashSet<Node>(nodes);
 55 |       assertEquals(nodes.size(), set.size());
 56 |     }
 57 |     TestUtils.analyzeRDFMapping(rdfMapping);
 58 |   }
 59 | 
 60 |   private Node pickOneNode(Node topology) {
 61 |     Node node = topology;
 62 |     while (!node.isLeaf()) {
 63 |       List<Node> children = node.getChildren();
 64 |       node = children.get(children.size()-1);
 65 |     }
 66 |     return node;
 67 |   }
 68 | 
 69 |   @Test
 70 |   public void testFullMapping() {
 71 |     doTestFullMapping(8);
 72 |     doTestFullMapping(32);
 73 |     doTestFullMapping(128);
 74 |   }
 75 | 
 76 |   @Test
 77 |   public void testFullMappingWithTargetBalance() {
 78 |     final double targetBalance = 0.3d;
 79 |     doTestFullMapping(8, targetBalance);
 80 |     doTestFullMapping(32, targetBalance);
 81 |     doTestFullMapping(128, targetBalance);
 82 |   }
 83 | 
 84 |   private void doTestFullMapping(final int rdf) {
 85 |     doTestFullMapping(rdf, 0.0f);
 86 |   }
 87 | 
 88 |   private void doTestFullMapping(final int rdf, final double targetBalance) {
 89 |     Node topology = TestUtils.createLargeTree();
 90 |     final int rf = 2;
 91 |     System.out.print("RDF = " + rdf);
 92 |     if (targetBalance > 0.0d) {
 93 |       System.out.println(", target balance = " + targetBalance);
 94 |     } else {
 95 |       System.out.println("");
 96 |     }
 97 |     MappingFunction mappingFunction =
 98 |         new RDFMapping(rdf, rf, new RackIsolationPlacementRules(), targetBalance);
 99 |     List<Long> data = TestUtils.createData();
100 | 
101 |     long begin = System.nanoTime();
102 |     Map<Long,List<Node>> mapping = mappingFunction.computeMapping(data, topology);
103 |     long end = System.nanoTime();
104 |     System.out.println("mapping time: " + (end - begin)/1000000 + " ms");
105 |     TestUtils.analyzeMapping(rf, topology.getChildrenCount(Types.DATA_CENTER), data.size(),
106 |         topology.getAllLeafNodes().size(), mapping);
107 |   }
108 | 
109 |   @Test
110 |   public void testPlainCrush() {
111 |     System.out.println("testing distribution using plain CRUSH");
112 |     Node topo = TestUtils.createLargeTree();
113 |     final int rf = 2;
114 |     MappingFunction mappingFunction = new SimpleCRUSHMapping(rf, new RackIsolationPlacementRules());
115 |     List<Long> data = TestUtils.createData();
116 |     long begin = System.nanoTime();
117 |     Map<Long,List<Node>> map = mappingFunction.computeMapping(data, topo);
118 |     long end = System.nanoTime();
119 |     System.out.println("mapping time: " + (end - begin)/1000000 + " ms");
120 |     TestUtils.analyzeMapping(rf, topo.getChildrenCount(Types.DATA_CENTER), data.size(),
121 |         topo.getAllLeafNodes().size(), map);
122 |   }
123 | 
124 |   @Test
125 |   public void testPlainCrushWithTargetBalance() {
126 |     final double targetBalance = 0.3d;
127 |     System.out.println("testing distribution using plain CRUSH with target balance of " + targetBalance);
128 |     Node topo = TestUtils.createLargeTree();
129 |     final int rf = 2;
130 |     MappingFunction mappingFunction =
131 |         new SimpleCRUSHMapping(rf, new RackIsolationPlacementRules(), targetBalance);
132 |     List<Long> data = TestUtils.createData();
133 |     long begin = System.nanoTime();
134 |     Map<Long,List<Node>> map = mappingFunction.computeMapping(data, topo);
135 |     long end = System.nanoTime();
136 |     System.out.println("mapping time: " + (end - begin)/1000000 + " ms");
137 |     TestUtils.analyzeMapping(rf, topo.getChildrenCount(Types.DATA_CENTER), data.size(),
138 |         topo.getAllLeafNodes().size(), map);
139 |   }
140 | }
141 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/integrated/RDFStabilityTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.integrated;
 17 | 
 18 | import static org.junit.Assert.assertEquals;
 19 | 
 20 | import java.util.Arrays;
 21 | import java.util.HashSet;
 22 | import java.util.List;
 23 | import java.util.Map;
 24 | import java.util.Set;
 25 | 
 26 | import org.junit.Test;
 27 | 
 28 | import com.twitter.crunch.MappingDiff;
 29 | import com.twitter.crunch.MappingDiff.Value;
 30 | import com.twitter.crunch.MappingFunction;
 31 | import com.twitter.crunch.Node;
 32 | import com.twitter.crunch.RDFMapping;
 33 | import com.twitter.crunch.RackIsolationPlacementRules;
 34 | import com.twitter.crunch.SimpleCRUSHMapping;
 35 | import com.twitter.crunch.TestUtils;
 36 | import com.twitter.crunch.Types;
 37 | 
 38 | public class RDFStabilityTest {
 39 |   @Test
 40 |   public void testStability() {
 41 |     doTestStability(8);
 42 |     doTestStability(32);
 43 |     doTestStability(128);
 44 |   }
 45 | 
 46 |   private void doTestStability(final int rdf) {
 47 |     Node topo = TestUtils.createLargeTree();
 48 |     final int nodeSize = topo.getAllLeafNodes().size();
 49 |     final int dcCount = topo.getChildrenCount(Types.DATA_CENTER);
 50 |     final int rf = 2;
 51 |     System.out.println("RDF = " + rdf);
 52 |     MappingFunction mappingFunction = new RDFMapping(rdf, rf, new RackIsolationPlacementRules(), 0.3d);
 53 |     List<Long> data = TestUtils.createData();
 54 | 
 55 |     Map<Long,List<Node>> before = mappingFunction.computeMapping(data, topo);
 56 |     verifyMapping(before, dcCount, rf, data.size());
 57 | 
 58 |     // make changes to the topology and compute the mapping again
 59 |     // reduce weight on one node to see if it gets less data
 60 |     Node removed = TestUtils.removeOneNode(topo);
 61 |     Map<Long,List<Node>> after = mappingFunction.computeMapping(data, topo);
 62 |     verifyMapping(after, dcCount, rf, data.size());
 63 | 
 64 |     // calculate the diff: vb -> its node movement
 65 |     Map<Long,List<Value<Node>>> diff = MappingDiff.calculateDiff(before, after);
 66 |     analyzeDiff(before, diff, rf, dcCount, nodeSize, removed);
 67 | 
 68 |     // calculate per-data replica-replacement counts: high values might indicate that data
 69 |     // completely swapped RDF groups
 70 |     int[] movementHistogram = new int[rf + 1];
 71 |     // for data not affected by the diff, increment 0
 72 |     for (Long input: before.keySet()) {
 73 |       if (!diff.containsKey(input))
 74 |         movementHistogram[0]++;
 75 |     }
 76 |     // for data affected by the diff, increment the appropriate movement count
 77 |     for (List<Value<Node>> value: diff.values()) {
 78 |       movementHistogram[value.size() / 2]++;
 79 |     }
 80 |     System.out.println("per-data movement histogram: " + Arrays.toString(movementHistogram));
 81 |   }
 82 | 
 83 |   private void verifyMapping(Map<Long,List<Node>> mapping, int dcCount, int rf, int dataSize) {
 84 |     assertEquals(dataSize, mapping.size());
 85 |     for (List<Node> nodes: mapping.values()) {
 86 |       // ensure there are no duplicates
 87 |       Set<Node> set = new HashSet<Node>(nodes);
 88 |       assertEquals(rf*dcCount, set.size());
 89 |     }
 90 |   }
 91 | 
 92 |   private void analyzeDiff(Map<Long,List<Node>> before, Map<Long,List<Value<Node>>> diff,
 93 |       final int rf, final int dcCount, final int nodeSize, Node removed) {
 94 |     final int beforeSize = before.size();
 95 |     // the same data may have moved in mulitple nodes (replicas)
 96 |     // need to count the actual movement
 97 |     int moves = 0;
 98 |     for (List<Value<Node>> l: diff.values()) {
 99 |       moves += l.size();
100 |     }
101 |     // it's a pretty good assumption that the moves are always pairs
102 |     moves /= 2;
103 |     System.out.println("number of data objects that moved: " + moves);
104 |     float relativeMovement = ((float)moves)/(beforeSize*rf*dcCount);
105 |     float multiplier = relativeMovement*nodeSize;
106 |     System.out.println("relative movement (%): " + relativeMovement*100);
107 |     System.out.println("movement multiplier: " + multiplier);
108 |     System.out.println("data objects moved per node (mean): " + ((float)moves)/nodeSize);
109 | 
110 |     // reverse the map and process it again
111 |     reverseDiff(before, diff, removed);
112 |   }
113 | 
114 |   private void reverseDiff(Map<Long,List<Node>> before, Map<Long,List<Value<Node>>> diff, Node removed) {
115 |     Map<Node,List<Value<Long>>> map = TestUtils.calculateReverseDiff(before, diff, removed);
116 | 
117 |     // identify the worst case number
118 |     int maxMoves = 0;
119 |     for (Map.Entry<Node,List<Value<Long>>> e: map.entrySet()) {
120 |       List<Value<Long>> list = e.getValue();
121 |       int send = 0;
122 |       int receive = 0;
123 |       for (Value<Long> v: list) {
124 |         switch (v.getDifferenceType()) {
125 |         case REMOVED:
126 |           send++;
127 |           break;
128 |         case ADDED:
129 |           receive++;
130 |           break;
131 |         }
132 |       }
133 |       if (send >= 50 || receive >= 50) {
134 |         System.err.println("node " + e.getKey() + " has movement of 50 or greater!");
135 |       }
136 |       maxMoves = Math.max(Math.max(send, receive), maxMoves);
137 |     }
138 |     System.out.println("data objects moved per node (max): " + maxMoves);
139 |   }
140 | 
141 |   @Test
142 |   public void testStabilityPlainCrush() {
143 |     System.out.println("testing stability using plain CRUSH");
144 |     Node topo = TestUtils.createLargeTree();
145 |     final int nodeSize = topo.getAllLeafNodes().size();
146 |     final int dcCount = topo.getChildrenCount(Types.DATA_CENTER);
147 |     final int rf = 2;
148 |     MappingFunction mappingFunction = new SimpleCRUSHMapping(rf, new RackIsolationPlacementRules(), 0.3d);
149 |     List<Long> data = TestUtils.createData();
150 | 
151 |     Map<Long,List<Node>> before = mappingFunction.computeMapping(data, topo);
152 |     verifyMapping(before, dcCount, rf, data.size());
153 | 
154 |     // make changes to the topology and compute the mapping again
155 |     // reduce weight on one node to see if it gets less data
156 |     Node removed = TestUtils.removeOneNode(topo);
157 | 
158 |     Map<Long,List<Node>> after = mappingFunction.computeMapping(data, topo);
159 |     verifyMapping(after, dcCount, rf, data.size());
160 | 
161 |     // calculate the diff
162 |     Map<Long,List<Value<Node>>> diff = MappingDiff.calculateDiff(before, after);
163 |     analyzeDiff(before, diff, rf, dcCount, nodeSize, removed);
164 |   }
165 | }
166 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/integrated/SiblingBiasTest.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.integrated;
 17 | 
 18 | import java.util.ArrayList;
 19 | import java.util.HashMap;
 20 | import java.util.List;
 21 | import java.util.Map;
 22 | 
 23 | import org.junit.Test;
 24 | 
 25 | import com.twitter.crunch.MappingDiff;
 26 | import com.twitter.crunch.MappingDiff.Difference;
 27 | import com.twitter.crunch.MappingDiff.Value;
 28 | import com.twitter.crunch.MappingFunction;
 29 | import com.twitter.crunch.Node;
 30 | import com.twitter.crunch.Node.Selection;
 31 | import com.twitter.crunch.RackIsolationPlacementRules;
 32 | import com.twitter.crunch.SimpleCRUSHMapping;
 33 | import com.twitter.crunch.StorageSystemTypes;
 34 | import com.twitter.crunch.Types;
 35 | 
 36 | public class SiblingBiasTest {
 37 | 
 38 |   @Test
 39 |   public void testMultiLevel() {
 40 |     Node topo = createSmallTree();
 41 |     doTest(topo, 400);
 42 |   }
 43 | 
 44 |   @Test
 45 |   public void testOneLevel() {
 46 |     Node topo = createFlatTree();
 47 |     doTest(topo, 400);
 48 |   }
 49 | 
 50 |   private void doTest(Node topo, int dataSize) {
 51 |     List<Long> data = createData(dataSize);
 52 | 
 53 |     MappingFunction mappingFunction = new SimpleCRUSHMapping(1, new RackIsolationPlacementRules()); // RF = 1
 54 |     Map<Long,List<Node>> before = mappingFunction.computeMapping(data, topo);
 55 |     analyzeDistribution(before);
 56 | 
 57 |     Node removed = removeOneNode(topo);
 58 |     Map<Long,List<Node>> after = mappingFunction.computeMapping(data, topo);
 59 |     analyzeDistribution(after);
 60 |     // let's figure out where the data that belong in old node went
 61 |     analyzeMovement(before, after, removed);
 62 |   }
 63 | 
 64 |   private <K,V> void printMap(Map<K,V> map) {
 65 |     for (Map.Entry<K,V> e: map.entrySet()) {
 66 |       System.out.println(e.getKey() + " => " + e.getValue());
 67 |     }
 68 |   }
 69 | 
 70 |   private void analyzeDistribution(Map<Long,List<Node>> map) {
 71 |     Map<Node,Integer> dist = new HashMap<Node,Integer>();
 72 |     for (Map.Entry<Long,List<Node>> e: map.entrySet()) {
 73 |       for (Node node: e.getValue()) {
 74 |         Integer count = dist.get(node);
 75 |         dist.put(node,
 76 |             count == null ? 1 : ++count);
 77 |       }
 78 |     }
 79 |     printMap(dist);
 80 |   }
 81 | 
 82 |   private Node createSmallTree() {
 83 |     final int rackCount = 2;
 84 |     final int hdCount = 2;
 85 | 
 86 |     int id = 0;
 87 |     // root
 88 |     Node root = new Node();
 89 |     root.setName("root");
 90 |     root.setId(id++);
 91 |     root.setType(Types.ROOT);
 92 |     root.setSelection(Selection.STRAW);
 93 |     // 2 racks
 94 |     List<Node> racks = new ArrayList<Node>();
 95 |     for (int j = 1; j <= rackCount; j++) {
 96 |       Node rack = new Node();
 97 |       racks.add(rack);
 98 |       rack.setName("rack" + j);
 99 |       rack.setId(id++);
100 |       rack.setType(StorageSystemTypes.RACK);
101 |       rack.setSelection(Selection.STRAW);
102 |       rack.setParent(root);
103 |       // 2 hds
104 |       List<Node> hds = new ArrayList<Node>();
105 |       for (int k = 1; k <= hdCount; k++) {
106 |         Node hd = new Node();
107 |         hds.add(hd);
108 |         hd.setName(rack.getName() + "hd" + k);
109 |         hd.setId(id++);
110 |         hd.setType(StorageSystemTypes.DISK);
111 |         hd.setWeight(100);
112 |         hd.setParent(rack);
113 |       }
114 |       rack.setChildren(hds);
115 |     }
116 |     root.setChildren(racks);
117 |     return root;
118 |   }
119 | 
120 |   private Node createFlatTree() {
121 |     int id = 0;
122 |     // root
123 |     Node root = new Node();
124 |     root.setName("root");
125 |     root.setId(id++);
126 |     root.setType(Types.ROOT);
127 |     root.setSelection(Selection.STRAW);
128 |     // 4 hds
129 |     List<Node> hds = new ArrayList<Node>();
130 |     Node hd = createNode("rack1hd1", StorageSystemTypes.DISK, id++, 100, null);
131 |     hd.setParent(root);
132 |     hds.add(hd);
133 |     hd = createNode("rack1hd2", StorageSystemTypes.DISK, id++, 100, null);
134 |     hd.setParent(root);
135 |     hds.add(hd);
136 |     hd = createNode("rack2hd1", StorageSystemTypes.DISK, id++, 100, null);
137 |     hd.setParent(root);
138 |     hds.add(hd);
139 |     hd = createNode("rack2hd2", StorageSystemTypes.DISK, id++, 100, null);
140 |     hd.setParent(root);
141 |     hds.add(hd);
142 |     root.setChildren(hds);
143 |     return root;
144 |   }
145 | 
146 |   private Node createNode(String name, int type, long id, long weight, Selection selection) {
147 |     Node node = new Node();
148 |     node.setName(name);
149 |     node.setType(type);
150 |     node.setId(id);
151 |     node.setWeight(weight);
152 |     node.setSelection(selection);
153 |     return node;
154 |   }
155 | 
156 |   private List<Long> createData(final int size) {
157 |     List<Long> data = new ArrayList<Long>();
158 |     for (int i = 1; i <= size; i++) {
159 |       data.add((long)i);
160 |     }
161 |     return data;
162 |   }
163 | 
164 |   private Node removeOneNode(Node topo) {
165 |     Node node = topo;
166 |     while (!node.isLeaf()) {
167 |       List<Node> children = node.getChildren();
168 |       node = children.get(children.size()-1);
169 |       if (node.isLeaf()) {
170 |         children.remove(node);
171 |         System.out.println("marked " + node + " as failed");
172 |       }
173 |     }
174 |     return node;
175 |   }
176 | 
177 |   private void analyzeMovement(Map<Long,List<Node>> before, Map<Long,List<Node>> after, Node removed) {
178 |     Map<Long,List<Value<Node>>> diff = MappingDiff.calculateDiff(before, after);
179 |     Map<Node,Integer> distributed = new HashMap<Node,Integer>();
180 |     for (Map.Entry<Long,List<Value<Node>>> e: diff.entrySet()) {
181 |       Long data = e.getKey();
182 |       List<Value<Node>> moves = e.getValue();
183 |       boolean hit = false;
184 |       for (Value<Node> v: moves) {
185 |         Node node = v.get();
186 |         if (node.equals(removed)) {
187 |           hit = true;
188 |           break;
189 |         }
190 |       }
191 |       if (hit) {
192 |         for (Value<Node> v: moves) {
193 |           if (v.getDifferenceType() == Difference.ADDED) {
194 |             Node destination = v.get();
195 |             Integer count = distributed.get(destination);
196 |             distributed.put(destination,
197 |                 count == null ? 1 : ++count);
198 |             System.out.println(data + " moved from " + removed.getName() + " to " + destination.getName());
199 |           }
200 |         }
201 |       }
202 |     }
203 |     printMap(distributed);
204 |   }
205 | }
206 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/CalculateMovement.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools;
17 | 
18 | import java.util.*;
19 | 
20 | public class CalculateMovement {
21 | 
22 | 
23 |     private static void calTopologyChange(Map<Long, List<String>> before, Map<Long, List<String>> after) {
24 |         int moved = 0;
25 | 
26 |         for (Long bucket: before.keySet()) {
27 |             List<String> beforeMap = before.get(bucket);
28 |             List<String> afterMap = after.get(bucket);
29 |             for (String node: beforeMap) {
30 |                 if (!afterMap.contains(node))  moved++;
31 |             }
32 |         }
33 | 
34 |         System.out.print(String.format("%d", moved));
35 |     }
36 | 
37 |     public static void  main(String[] args) throws Exception {
38 |         if (args.length != 2) {
39 |             System.out.println("Usage: old_map_filename new_map_filename");
40 |             System.out.println("  moved");
41 |             return;
42 |         }
43 | 
44 |         String before = args[0];
45 |         String after = args[1];
46 | 
47 |         calTopologyChange(Utils.importMap(before), Utils.importMap(after));
48 |     }
49 | }
50 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/CreateBlobstoreMapping.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.tools;
 17 | 
 18 | import java.io.*;
 19 | import java.util.*;
 20 | 
 21 | import com.twitter.crunch.*;
 22 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer;
 23 | import com.twitter.crunch.tools.jsontopology.MappingParameters;
 24 | import org.codehaus.jackson.map.ObjectMapper;
 25 | import org.slf4j.Logger;
 26 | import org.slf4j.LoggerFactory;
 27 | 
 28 | public class CreateBlobstoreMapping {
 29 |   private static final Logger logger = LoggerFactory.getLogger(CreateBlobstoreMapping.class);
 30 | 
 31 |   private static List<Long> initializeVirtualBuckets(final int count) {
 32 |     List<Long> data = new ArrayList<Long>(count);
 33 |     for (long l = 1; l <= count; l++) {
 34 |       data.add(l);
 35 |     }
 36 | 
 37 |     return Collections.unmodifiableList(data);
 38 |   }
 39 | 
 40 |   public static Map<Long, List<Node>> createNodeMapv1(MappingParameters params, Node root) throws InvalidTopologyException, IOException {
 41 |     final RDFMapping mappingFunction = new RDFMapping(
 42 |       params.getRdf(),
 43 |       params.getRf(),
 44 |       new RackIsolationPlacementRules(),
 45 |       params.getTargetBalance());
 46 | 
 47 |     final List<Long> buckets = initializeVirtualBuckets(params.getVirtualBucketCount());
 48 |     final Map<Long, List<Node>> mapping = mappingFunction.computeMapping(buckets, root);
 49 | 
 50 |     Map<String,List<String>> rdfMap = mappingFunction.getNewRdfMap();
 51 |     Utils.exportRDFMap("rdfmap_v1", rdfMap);
 52 | 
 53 |     return mapping;
 54 |   }
 55 | 
 56 |   public static Map<Long, List<Node>> createNodeMapv3(MappingParameters params, Node root, String oldFileName, String newFileName,
 57 |                                                       int rackDiversity, boolean trackCapacity, String migrationMapFileName) throws InvalidTopologyException {
 58 |     final List<Long> buckets = initializeVirtualBuckets(params.getVirtualBucketCount());
 59 | 
 60 |     Map<String, List<String>> currentMap;
 61 |     Map<String, List<String>> migrationMap = null;
 62 |     try {
 63 |       currentMap = Utils.importRDFMap(oldFileName);
 64 |     } catch (Exception e) {
 65 |       currentMap = new HashMap<String, List<String>>();
 66 |     }
 67 | 
 68 |     try {
 69 |       if (migrationMapFileName != null)
 70 |         migrationMap = Utils.importRDFMap(migrationMapFileName);
 71 |     } catch (Exception e) {
 72 |       migrationMap = null;
 73 |     }
 74 | 
 75 |     int rdfRange = (int)(params.getRdf() * 0.2);
 76 | 
 77 |     final StableRdfMapping mappingFunction = new StableRdfMapping(
 78 |       params.getRdf(),
 79 |       params.getRf(),
 80 |       new RackIsolationPlacementRules(),
 81 |       currentMap,
 82 |       params.getRdf() - rdfRange,
 83 |       params.getRdf() + rdfRange,
 84 |       params.getTargetBalance(),
 85 |       rackDiversity,
 86 |       trackCapacity,
 87 |       migrationMap);
 88 | 
 89 |     final Map<Long, List<Node>> mapping = mappingFunction.computeMapping(buckets, root);
 90 | 
 91 |     final Map<String, List<String>> newRdfMap = mappingFunction.getNewRdfMap();
 92 |     try {
 93 |       Utils.exportRDFMap(newFileName, newRdfMap);
 94 |     } catch (IOException e) {
 95 |       System.out.println(e.getMessage());
 96 |     }
 97 | 
 98 |     return mapping;
 99 |   }
100 | 
101 |   public static void  main(String[] args) throws Exception {
102 |     if (args.length < 4) {
103 |       System.out.println("Usage: version topology_json topology_params_json mapping_filename rdf target_balance rack_diversity track_capacity [new_rdf_filename] [old_rdf_filename] [migration_map]");
104 |       System.out.println("  version 1: Generate RDF Map using libcrunch");
105 |       System.out.println("  version 3: Generate RDF Map using stateful distribution");
106 |       return;
107 |     }
108 | 
109 |     final int version = Integer.parseInt(args[0]);
110 |     final String topologyJson = args[1];
111 |     final String topologyParamsJson = args[2];
112 |     final String fileName = args[3];
113 | 
114 |     final int rdf = Integer.parseInt(args[4]);
115 |     final double targetBalance = Double.parseDouble(args[5]);
116 | 
117 |     final int rackDiversity = Integer.parseInt(args[6]);
118 |     final boolean trackCapacity = Boolean.parseBoolean(args[7]);
119 |     final String newFileName = args[8];
120 |     final String oldFileName = args[9];
121 |     String migrationMapFileName = null;
122 |     if (args.length == 11) {
123 |       migrationMapFileName = args[10];
124 |     }
125 | 
126 |     JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer();
127 |     com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyJson));
128 |     Node root = topology.getRootNode();
129 | 
130 |     ObjectMapper mapper = new ObjectMapper();
131 |     MappingParameters params = mapper.readValue(new File(topologyParamsJson), MappingParameters.class);
132 | 
133 |     Map<Long, List<Node>> map = null;
134 |     switch(version) {
135 |       case 1:
136 |         map = createNodeMapv1(params, root);
137 |         break;
138 |       case 3:
139 |         params.setRdf(rdf);
140 |         params.setTargetBalance(targetBalance);
141 |         map = createNodeMapv3(params, root, oldFileName, newFileName, rackDiversity, trackCapacity, migrationMapFileName);
142 |         break;
143 |       default:
144 |         System.out.println("Wrong version");
145 |         break;
146 |     }
147 | 
148 |     // Dump map
149 |     Writer out = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
150 |     try {
151 |       for (Long bucket: new TreeSet<Long>(map.keySet())) {
152 |         out.append(bucket.toString());
153 |         List<Node> replicas = map.get(bucket);
154 |         for (Node replica: replicas) {
155 |           out.append(',');
156 |           out.append(replica.getName());
157 |         }
158 |         out.append('\n');
159 |       }
160 |     } finally {
161 |       out.flush();
162 |       out.close();
163 |     }
164 |   }
165 | }
166 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/CreateDataMapping.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.tools;
 17 | 
 18 | import java.io.*;
 19 | import java.util.*;
 20 | 
 21 | import com.twitter.crunch.*;
 22 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer;
 23 | import org.slf4j.Logger;
 24 | import org.slf4j.LoggerFactory;
 25 | import org.yaml.snakeyaml.Yaml;
 26 | import org.yaml.snakeyaml.constructor.Constructor;
 27 | 
 28 | public class CreateDataMapping {
 29 |     private static final Logger logger = LoggerFactory.getLogger(CreateDataMapping.class);
 30 | 
 31 |     private static List<Long> initializeVirtualBuckets(final int count) {
 32 |         List<Long> data = new ArrayList<Long>(count);
 33 |         for (long l = 1; l <= count; l++) {
 34 |             data.add(l);
 35 |         }
 36 | 
 37 |         return Collections.unmodifiableList(data);
 38 |     }
 39 | 
 40 |     public static Map<Long, List<Node>> createNodeMapv1(YamlTopologyFactory factory, Node root) throws InvalidTopologyException {
 41 |         final MappingFunction mappingFunction = new RDFMapping(
 42 |                 factory.replica_distribution_factor,
 43 |                 factory.replication_factor,
 44 |                 new RackIsolationPlacementRules(),
 45 |                 factory.target_balance_max);
 46 | 
 47 |         final List<Long> buckets = initializeVirtualBuckets(factory.number_of_buckets);
 48 |         final Map<Long, List<Node>> mapping = mappingFunction.computeMapping(buckets, root);
 49 |         return mapping;
 50 |     }
 51 | 
 52 |     public static Map<Long, List<Node>> createNodeMapv2(YamlTopologyFactory factory, Node root) throws InvalidTopologyException {
 53 |         final ProbingRDFMapping mappingFunction = new ProbingRDFMapping(
 54 |                 factory.replica_distribution_factor,
 55 |                 factory.replication_factor,
 56 |                 new RackIsolationPlacementRules(),
 57 |                 factory.weight_balance_tries,
 58 |                 factory.weight_balance_factor,
 59 |                 factory.history_count,
 60 |                 factory.sd_threshold,
 61 |                 factory.target_balance_max);
 62 | 
 63 |         final List<Long> buckets = initializeVirtualBuckets(factory.number_of_buckets);
 64 |         final Map<Long, List<Node>> mapping = mappingFunction.computeMapping(buckets, root);
 65 |         return mapping;
 66 |     }
 67 | 
 68 |     public static Map<Long, List<Node>> createNodeMapv3(YamlTopologyFactory factory, Node root, String oldFileName, String newFileName) throws InvalidTopologyException {
 69 |         final List<Long> buckets = initializeVirtualBuckets(factory.number_of_buckets);
 70 | 
 71 |         Map<String, List<String>> currentMap = null;
 72 |         try {
 73 |             currentMap = Utils.importRDFMap(oldFileName);
 74 |         } catch (Exception e) {
 75 |             currentMap = new HashMap<String, List<String>>();
 76 |         }
 77 |         final StableRdfMapping mappingFunction = new StableRdfMapping(
 78 |                 factory.replica_distribution_factor,
 79 |                 factory.replication_factor,
 80 |                 new RackIsolationPlacementRules(),
 81 |                 currentMap,
 82 |                 factory.replica_distribution_factor_min,
 83 |                 factory.replica_distribution_factor_max,
 84 |                 factory.target_balance_max,
 85 |                 8,
 86 |                 false);
 87 | 
 88 |         final Map<Long, List<Node>> mapping = mappingFunction.computeMapping(buckets, root);
 89 | 
 90 |         final Map<String, List<String>> newRdfMap = mappingFunction.getNewRdfMap();
 91 |         try {
 92 |             Utils.exportRDFMap(newFileName, newRdfMap);
 93 |         } catch (IOException e) {
 94 |             System.out.println(e.getMessage());
 95 |         }
 96 | 
 97 |         return mapping;
 98 |     }
 99 | 
100 |     public static void  main(String[] args) throws Exception {
101 |         if (args.length < 4) {
102 |             System.out.println("Usage: yaml version topology_yaml bucket_map_filename [old_rdf_filename] [new_rdf_filename]");
103 |             System.out.println("       json version topology_yaml topology_json bucket_map_filename [old_rdf_filename] [new_rdf_filename]");
104 |             System.out.println("  version 1: Generate RDF Map using libcrunch");
105 |             System.out.println("  version 2: Generate RDF Map using libcrunch with probing");
106 |             System.out.println("  version 3: Generate RDF Map using stateful distribution");
107 |             return;
108 |         }
109 | 
110 |         final int version = Integer.parseInt(args[1]);
111 |         final String topologyConfig = args[2];
112 | 
113 |         String yamlContents = new String(Utils.slurp((new FileInputStream(topologyConfig))));
114 |         final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class));
115 |         final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents);
116 | 
117 |         int offset = 0;
118 |         Node root = null;
119 |         if (args[0].equalsIgnoreCase("yaml")) {
120 |             offset = 0;
121 |             root = topologyFactory.loadTopology();
122 |         } else {
123 |             offset = 1;
124 |             String topologyJson = args[3];
125 |             JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer();
126 |             com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyJson));
127 |             root = topology.getRootNode();
128 |         }
129 |         final String fileName = args[3 + offset];
130 | 
131 |         Map<Long, List<Node>> map = null;
132 |         switch(version) {
133 |             case 1:
134 |                 map = createNodeMapv1(topologyFactory, root);
135 |                 break;
136 |             case 2:
137 |                 map = createNodeMapv2(topologyFactory, root);
138 |                 break;
139 |             case 3:
140 |                 final String oldFileName = args[4 + offset];
141 |                 final String newFileName = args[5 + offset];
142 |                 map = createNodeMapv3(topologyFactory, root, oldFileName, newFileName);
143 |                 break;
144 |             default:
145 |                 System.out.println("Wrong version");
146 |                 break;
147 |         }
148 | 
149 |         // Dump map
150 |         Writer out = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
151 |         try {
152 |             for (Long bucket: new TreeSet<Long>(map.keySet())) {
153 |                 out.append(bucket.toString());
154 |                 List<Node> replicas = map.get(bucket);
155 |                 for (Node replica: replicas) {
156 |                     out.append(',');
157 |                     out.append(replica.getName());
158 |                 }
159 |                 out.append('\n');
160 |             }
161 |         } finally {
162 |             out.flush();
163 |             out.close();
164 |         }
165 |     }
166 | }
167 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/EvaluateMapping.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.tools;
 17 | 
 18 | import com.twitter.crunch.Node;
 19 | import com.twitter.crunch.MappingEvaluator;
 20 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer;
 21 | import org.yaml.snakeyaml.Yaml;
 22 | import org.yaml.snakeyaml.constructor.Constructor;
 23 | 
 24 | import java.io.File;
 25 | import java.io.FileInputStream;
 26 | import java.io.FileNotFoundException;
 27 | import java.util.*;
 28 | 
 29 | public class EvaluateMapping {
 30 |   private static void evaluateMap(Map<Long,List<String>> mapping, Map<String, Long> weight, Map<String, List<String>> rdfMap) {
 31 |     Map<String, Long> keyDistribution;
 32 |     double mean;
 33 |     double stdDev;
 34 |     Set<String> primaryNodes = null;
 35 | 
 36 |     if (rdfMap != null) primaryNodes = rdfMap.keySet();
 37 | 
 38 |     keyDistribution = new HashMap<String, Long>();
 39 |     for (Long key: mapping.keySet()) {
 40 |       for (String node: mapping.get(key)) {
 41 |         if (primaryNodes != null && !primaryNodes.contains(node)) {
 42 |           //System.out.println("RDF under min - " + node);
 43 |           continue;
 44 |         }
 45 | 
 46 |         if (keyDistribution.containsKey(node)) {
 47 |           final long count = keyDistribution.get(node) + 1;
 48 |           keyDistribution.put(node, count);
 49 |         } else {
 50 |           keyDistribution.put(node, (long)1);
 51 |         }
 52 |       }
 53 |     }
 54 | 
 55 |     for (String node : weight.keySet()) {
 56 |       if (primaryNodes != null && !primaryNodes.contains(node)) continue;
 57 | 
 58 |       if (!keyDistribution.containsKey(node)) keyDistribution.put(node, (long)0);
 59 |     }
 60 | 
 61 |     mean = MappingEvaluator.getWeightedMean(keyDistribution, weight);
 62 |     stdDev = MappingEvaluator.getWeightedStandardDeviation(keyDistribution, weight);
 63 |     Long min = Collections.min(keyDistribution.values());
 64 |     Long max = Collections.max(keyDistribution.values());
 65 |     final int replicaOnlyNodes = primaryNodes == null ? 0 : (weight.keySet().size() - primaryNodes.size());
 66 | 
 67 |     System.out.print(String.format("%d,%d,%.4f,%.4f,%d", min, max, mean, stdDev, replicaOnlyNodes));
 68 |   }
 69 | 
 70 |   public static void  main(String[] args) throws Exception {
 71 |     if (args.length < 3) {
 72 |       System.out.println("Usage: yaml|json topology_file map_filename rdfmap_filename");
 73 |       System.out.println("  min,max,mean,sd,replicaOnlyNodes");
 74 |       return;
 75 |     }
 76 | 
 77 |     final String topologyConfig = args[1];
 78 |     final String mapFileName = args[2];
 79 |     Map<String, List<String>> rdfMap = null;
 80 | 
 81 |     Node root = null;
 82 |     if (args[0].equalsIgnoreCase("yaml")) {
 83 |       String yamlContents = new String(Utils.slurp((new FileInputStream(topologyConfig))));
 84 |       final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class));
 85 |       final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents);
 86 |       root = topologyFactory.loadTopology();
 87 |     } else {
 88 |       try {
 89 |         final String rdfMapFileName = args[3];
 90 |         rdfMap = Utils.importRDFMap(rdfMapFileName);
 91 |       } catch (FileNotFoundException ex) {
 92 |         // ignore this
 93 |       }
 94 |       JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer();
 95 |       com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyConfig));
 96 |       root = topology.getRootNode();
 97 |     }
 98 | 
 99 |     Map<Long, List<String>> map = Utils.importMap(mapFileName);
100 | 
101 |     final List<Node> allLeaves = root.getAllLeafNodes();
102 | 
103 |     Map<String, Long> definedWeight = new HashMap<String, Long>();
104 |     for (Node node : allLeaves) {
105 |       if (!node.isFailed() && node.getWeight() != 0) definedWeight.put(node.getName(), node.getWeight());
106 |       //if (node.getWeight() == 0) System.out.println("Weight 0 - " + node.getName());
107 |     }
108 | 
109 |     evaluateMap(map, definedWeight, rdfMap);
110 |   }
111 | }
112 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/EvaluateRDFMapping.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools;
17 | 
18 | import com.twitter.crunch.MappingEvaluator;
19 | import com.twitter.crunch.Node;
20 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer;
21 | import org.yaml.snakeyaml.Yaml;
22 | import org.yaml.snakeyaml.constructor.Constructor;
23 | 
24 | import java.io.File;
25 | import java.io.FileInputStream;
26 | import java.util.*;
27 | 
28 | public class EvaluateRDFMapping {
29 | 
30 |     private static void evaluateRDFMap(Map<String,List<String>> mapping, Map<String, Long> weight) {
31 |         Map<String, Long> keyDistribution;
32 |         double mean;
33 |         double stdDev;
34 | 
35 |         keyDistribution = new HashMap<String, Long>();
36 |         for (String key: mapping.keySet()) {
37 |             for (String node: mapping.get(key)) {
38 |                 if (keyDistribution.containsKey(node)) {
39 |                     final long count = keyDistribution.get(node) + 1;
40 |                     keyDistribution.put(node, count);
41 |                 } else {
42 |                     keyDistribution.put(node, (long)1);
43 |                 }
44 |             }
45 |         }
46 | 
47 |         for (String key: mapping.keySet()) {
48 |             List<String> replicaList = mapping.get(key);
49 |             Set<String> replicaSet = new HashSet<String>(replicaList);
50 |             if (replicaSet.size() != replicaList.size()) System.out.print("Duplicates found for " + key);
51 |         }
52 | 
53 |         for (String node : weight.keySet()) {
54 |             if (!keyDistribution.containsKey(node)) keyDistribution.put(node, (long)0);
55 |         }
56 | 
57 |         mean = MappingEvaluator.getWeightedMean(keyDistribution, weight);
58 |         stdDev = MappingEvaluator.getWeightedStandardDeviation(keyDistribution, weight);
59 |         Long min = Collections.min(keyDistribution.values());
60 |         Long max = Collections.max(keyDistribution.values());
61 | 
62 |         System.out.print(String.format("%d,%d,%.4f,%.4f", min, max, mean, stdDev));
63 |     }
64 | 
65 |     public static void  main(String[] args) throws Exception {
66 |         if (args.length != 3) {
67 |             System.out.println("Usage: yaml|json topology_file rdfmap_filename");
68 |             System.out.println("  min,max,mean,sd");
69 |             return;
70 |         }
71 | 
72 |         final String topologyConfig = args[1];
73 |         final String rdfMapFileName = args[2];
74 | 
75 |         Map<String, List<String>> map = Utils.importRDFMap(rdfMapFileName);
76 | 
77 |         Node root = null;
78 |         if (args[0].equalsIgnoreCase("yaml")) {
79 |             String yamlContents = new String(Utils.slurp((new FileInputStream(topologyConfig))));
80 |             final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class));
81 |             final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents);
82 |             root = topologyFactory.loadTopology();
83 |         } else {
84 |             JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer();
85 |             com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyConfig));
86 |             root = topology.getRootNode();
87 |         }
88 | 
89 |         final List<Node> allLeaves = root.getAllLeafNodes();
90 | 
91 |         Map<String, Long> definedWeight = new HashMap<String, Long>();
92 |         for (Node node : allLeaves) {
93 |           if (!node.isFailed()) definedWeight.put(node.getName(), node.getWeight());
94 |         }
95 | 
96 |         evaluateRDFMap(map, definedWeight);
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/InvalidTopologyException.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools;
17 | 
18 | public class InvalidTopologyException extends Exception {
19 |     public InvalidTopologyException() {
20 |     }
21 | 
22 |     public InvalidTopologyException(String message) {
23 |         super(message);
24 |     }
25 | 
26 |     public InvalidTopologyException(String message, Throwable cause) {
27 |         super(message, cause);
28 |     }
29 | 
30 |     public InvalidTopologyException(Throwable cause) {
31 |         super(cause);
32 |     }
33 | }
34 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/TopologyGenerator.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools;
17 | 
18 | import org.yaml.snakeyaml.Yaml;
19 | import org.yaml.snakeyaml.constructor.Constructor;
20 | 
21 | import java.io.FileInputStream;
22 | import java.io.PrintWriter;
23 | import java.util.ArrayList;
24 | 
25 | public class TopologyGenerator {
26 |     public static void  main(String[] args) throws Exception {
27 |         if (args.length != 4) {
28 |             System.out.println("Usage: topology.template.yaml node_count node_weight output_filename");
29 |             return;
30 |         }
31 | 
32 |         final String topologyTemplate = args[0];
33 |         final int nodeCount = Integer.parseInt(args[1]);
34 |         final long nodeWeight = Long.parseLong(args[2]);
35 |         final String fileName = args[3];
36 | 
37 |         String yamlContents = new String(Utils.slurp((new FileInputStream(topologyTemplate))));
38 |         final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class));
39 |         final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents);
40 | 
41 |         topologyFactory.machine_list = new ArrayList<YamlTopologyFactory.TopologyMachine>();
42 |         for (int i = 1; i <= nodeCount; i++) {
43 |             YamlTopologyFactory.TopologyMachine machine = new YamlTopologyFactory.TopologyMachine();
44 |             machine.name = String.format("smf1-%03d-01-sr1.prod.twitter.com", i);
45 |             machine.weight = nodeWeight;
46 |             topologyFactory.machine_list.add(machine);
47 |         }
48 | 
49 |         PrintWriter out = new PrintWriter(fileName);
50 |         out.print(yaml.dump(topologyFactory));
51 |         out.close();
52 |     }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/Utils.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.tools;
 17 | 
 18 | import java.io.*;
 19 | import java.util.*;
 20 | 
 21 | public class Utils {
 22 |     public static byte[] slurp(InputStream in) throws IOException {
 23 |         byte[] buf = new byte[Math.max(in.available(), 4096)];
 24 | 
 25 |         int sofar = 0;
 26 |         while (true) {
 27 |             if (sofar == buf.length) {
 28 |                 byte[] tmp = new byte[buf.length + 2];
 29 |                 System.arraycopy(buf, 0, tmp, 0, buf.length);
 30 |                 buf = tmp;
 31 |             }
 32 |             int read = in.read(buf, sofar, buf.length - sofar);
 33 |             if (read == -1) {
 34 |                 byte[] ret = new byte[sofar];
 35 |                 System.arraycopy(buf, 0, ret, 0, sofar);
 36 |                 return ret;
 37 |             }
 38 |             sofar += read;
 39 |         }
 40 |     }
 41 | 
 42 |     public static Map<Long, List<String>> importMap(String fileName) throws IOException {
 43 |         Scanner scanner = new Scanner(new FileInputStream(fileName), "UTF-8");
 44 |         Map<Long, List<String>> map = new HashMap<Long, List<String>>();
 45 |         try {
 46 |             while (scanner.hasNextLine()){
 47 |                 String line = scanner.nextLine();
 48 |                 if (line.contains(",")) {
 49 |                     String[] parts = line.split(",");
 50 |                     Long name = Long.parseLong(parts[0]);
 51 |                     String[] replicas = Arrays.copyOfRange(parts, 1, parts.length);
 52 |                     map.put(name, Arrays.asList(replicas));
 53 |                 }
 54 |             }
 55 |         }finally {
 56 |             scanner.close();
 57 |         }
 58 | 
 59 |         return map;
 60 |     }
 61 | 
 62 |     public static void exportRDFMap(String fileName, Map<String, List<String>> map) throws IOException {
 63 | 
 64 |         Writer out = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8");
 65 |         try {
 66 |             for (String node: map.keySet()) {
 67 |                 out.append(node);
 68 |                 List<String> replicas = map.get(node);
 69 |                 for (String replica: replicas) {
 70 |                     out.append(',');
 71 |                     out.append(replica);
 72 |                 }
 73 |                 out.append('\n');
 74 |             }
 75 |         } finally {
 76 |             out.flush();
 77 |             out.close();
 78 |         }
 79 |     }
 80 | 
 81 |     public static Map<String, List<String>> importRDFMap(String fileName) throws IOException {
 82 | 
 83 |         Scanner scanner = new Scanner(new FileInputStream(fileName), "UTF-8");
 84 |         Map<String, List<String>> map = new HashMap<String, List<String>>();
 85 |         try {
 86 |             while (scanner.hasNextLine()){
 87 |                 String line = scanner.nextLine();
 88 |                 if (line.contains(",")) {
 89 |                     String[] parts = line.split(",");
 90 |                     String name = parts[0];
 91 |                     String[] replicas = Arrays.copyOfRange(parts, 1, parts.length);
 92 |                     map.put(name, Arrays.asList(replicas));
 93 |                 }
 94 |             }
 95 |         }finally {
 96 |             scanner.close();
 97 |         }
 98 | 
 99 |         return map;
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/YamlTopologyFactory.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.tools;
 17 | 
 18 | import com.twitter.crunch.*;
 19 | 
 20 | import java.util.*;
 21 | 
 22 | public class YamlTopologyFactory {
 23 |     public int number_of_buckets = 50000;
 24 |     public int replica_distribution_factor = 6;
 25 |     public int replication_factor = 3;
 26 |     public int replica_distribution_factor_min = 5;
 27 |     public int replica_distribution_factor_max = 7;
 28 | 
 29 |     public double target_balance_max = 0;
 30 |     public boolean dump_detail_map = false;
 31 |     public int weight_balance_tries = 1;
 32 |     public double weight_balance_factor = 0.1;
 33 |     public int history_count = 10;
 34 |     public double sd_threshold = 0.05;
 35 | 
 36 |     public List<TopologyMachine> machine_list = null;
 37 |     public static class TopologyMachine {
 38 |         public String name;
 39 |         public Long weight;
 40 |         public String datacenter;
 41 |         public String rack;
 42 |     }
 43 | 
 44 |     private static void parseMachineName(TopologyMachine machine) throws InvalidTopologyException {
 45 |         String[] nameParts = machine.name.split("\\.");
 46 |         if (nameParts.length == 0) {
 47 |             throw new InvalidTopologyException("Machine name " + machine.name + " is not fully qualified domain name");
 48 |         }
 49 |         String machineName = nameParts[0];
 50 |         String[] parts = machineName.split("-");
 51 |         if (parts.length != 4) {
 52 |             throw new InvalidTopologyException("Machine name " + machineName + " is not in dc-rack-subrack-# format");
 53 |         }
 54 |         machine.datacenter = parts[0];
 55 |         machine.rack = parts[1];
 56 |     }
 57 | 
 58 |     private Node buildLibcrunchTree(Map<String, Map<String, Set<TopologyMachine>>> datacenters) {
 59 |         int id = 0;
 60 | 
 61 |         // Build the root
 62 |         Node libcrunchRoot = new Node();
 63 |         libcrunchRoot.setName("root");
 64 |         libcrunchRoot.setId(id++);
 65 |         libcrunchRoot.setType(Types.ROOT);
 66 |         libcrunchRoot.setSelection(Node.Selection.STRAW);
 67 | 
 68 |         List<Node> libcrunchDcs = new ArrayList<Node>();
 69 |         for (String datacenter : datacenters.keySet()) {
 70 |             Node libcrunchDc = new Node();
 71 |             libcrunchDc.setName(datacenter);
 72 |             libcrunchDc.setId(id++);
 73 |             libcrunchDc.setType(Types.DATA_CENTER);
 74 |             libcrunchDc.setSelection(Node.Selection.STRAW);
 75 |             libcrunchDc.setParent(libcrunchRoot);
 76 | 
 77 |             List<Node> libcrunchRacks = new ArrayList<Node>();
 78 |             for (String rack : datacenters.get(datacenter).keySet()) {
 79 |                 Node libcrunchRack = new Node();
 80 |                 libcrunchRack.setName(rack);
 81 |                 libcrunchRack.setId(id++);
 82 |                 libcrunchRack.setType(StorageSystemTypes.RACK);
 83 |                 libcrunchRack.setSelection(Node.Selection.STRAW);
 84 |                 libcrunchRack.setParent(libcrunchDc);
 85 | 
 86 |                 List<Node> libcrunchNodes = new ArrayList<Node>();
 87 |                 for (TopologyMachine machine : datacenters.get(datacenter).get(rack)) {
 88 |                     Node libcrunchNode = new Node();
 89 |                     libcrunchNode.setName(machine.name);
 90 |                     libcrunchNode.setWeight(machine.weight);
 91 |                     libcrunchNode.setId(id++);
 92 |                     libcrunchNode.setType(StorageSystemTypes.DISK);
 93 |                     libcrunchNode.setSelection(Node.Selection.STRAW);
 94 |                     libcrunchNode.setParent(libcrunchRack);
 95 |                     libcrunchNodes.add(libcrunchNode);
 96 |                 }
 97 |                 libcrunchRack.setChildren(libcrunchNodes);
 98 |                 libcrunchRacks.add(libcrunchRack);
 99 |             }
100 |             libcrunchDc.setChildren(libcrunchRacks);
101 |             libcrunchDcs.add(libcrunchDc);
102 |         }
103 |         libcrunchRoot.setChildren(libcrunchDcs);
104 | 
105 |         return libcrunchRoot;
106 |     }
107 | 
108 |     public Node loadTopology() throws InvalidTopologyException {
109 |         // Parse machine name to get datacenter and rack information
110 |         Map<String, Map<String, Set<TopologyMachine>>> datecenters = new HashMap<String, Map<String, Set<TopologyMachine>>>();
111 |         for(TopologyMachine machine: machine_list) {
112 |             parseMachineName(machine);
113 |             if (datecenters.containsKey(machine.datacenter)) {
114 |                 Map<String, Set<TopologyMachine>> racks = datecenters.get(machine.datacenter);
115 |                 if (racks.containsKey(machine.rack)) {
116 |                     Set<TopologyMachine> machines = racks.get(machine.rack);
117 |                     machines.add(machine);
118 |                 } else {
119 |                     Set<TopologyMachine> machines = new HashSet<TopologyMachine>();
120 |                     machines.add(machine);
121 |                     racks.put(machine.rack, machines);
122 |                 }
123 |             } else {
124 |                 Map<String, Set<TopologyMachine>> rack = new HashMap<String, Set<TopologyMachine>>();
125 |                 Set<TopologyMachine> machines = new HashSet<TopologyMachine>();
126 |                 machines.add(machine);
127 |                 rack.put(machine.rack, machines);
128 |                 datecenters.put(machine.datacenter, rack);
129 |             }
130 |         }
131 | 
132 |         return buildLibcrunchTree(datecenters);
133 |     }
134 | }
135 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/JsonTopology.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools.jsontopology;
17 | 
18 | import com.twitter.crunch.Node;
19 | 
20 | public class JsonTopology extends Topology {
21 |   public void setVersion(long version) {
22 |     this.version = version;
23 |   }
24 | 
25 |   public void setRootNode(Node root) {
26 |     this.root = root;
27 |   }
28 | }
29 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/JsonTopologyDeserializer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools.jsontopology;
17 | 
18 | import java.io.File;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 | 
22 | import org.codehaus.jackson.map.ObjectMapper;
23 | 
24 | public final class JsonTopologyDeserializer implements TopologyDeserializer {
25 |   public Topology readTopology(InputStream is) throws IOException {
26 |     ObjectMapper mapper = new ObjectMapper();
27 |     return mapper.readValue(is, JsonTopology.class);
28 |   }
29 | 
30 |   public Topology readTopology(File file) throws IOException {
31 |     ObjectMapper mapper = new ObjectMapper();
32 |     return mapper.readValue(file, JsonTopology.class);
33 |   }
34 | 
35 |   public Topology readTopology(String string) throws IOException {
36 |     ObjectMapper mapper = new ObjectMapper();
37 |     return mapper.readValue(string, JsonTopology.class);
38 |   }
39 | }
40 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/JsonTopologySerializer.java:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Copyright 2013 Twitter, Inc.
  3 |  * Licensed under the Apache License, Version 2.0 (the "License");
  4 |  * you may not use this file except in compliance with the License.
  5 |  * You may obtain a copy of the License at
  6 |  *
  7 |  * http://www.apache.org/licenses/LICENSE-2.0
  8 |  *
  9 |  * Unless required by applicable law or agreed to in writing, software
 10 |  * distributed under the License is distributed on an "AS IS" BASIS,
 11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12 |  * See the License for the specific language governing permissions and
 13 |  * limitations under the License.
 14 |  */
 15 | 
 16 | package com.twitter.crunch.tools.jsontopology;
 17 | 
 18 | import java.io.File;
 19 | import java.io.IOException;
 20 | import java.io.OutputStream;
 21 | import java.util.List;
 22 | import java.util.ListIterator;
 23 | 
 24 | import org.codehaus.jackson.JsonGenerator;
 25 | import org.codehaus.jackson.Version;
 26 | import org.codehaus.jackson.annotate.JsonIgnore;
 27 | import org.codehaus.jackson.map.JsonSerializer;
 28 | import org.codehaus.jackson.map.Module;
 29 | import org.codehaus.jackson.map.ObjectMapper;
 30 | import org.codehaus.jackson.map.ObjectWriter;
 31 | import org.codehaus.jackson.map.SerializationConfig;
 32 | import org.codehaus.jackson.map.SerializerProvider;
 33 | import org.codehaus.jackson.map.annotate.JsonSerialize;
 34 | import org.codehaus.jackson.map.introspect.BasicBeanDescription;
 35 | import org.codehaus.jackson.map.ser.BeanPropertyWriter;
 36 | import org.codehaus.jackson.map.ser.BeanSerializerModifier;
 37 | 
 38 | import com.twitter.crunch.Node;
 39 | import com.twitter.crunch.Selector;
 40 | 
 41 | public final class JsonTopologySerializer implements TopologySerializer {
 42 |   public void writeTopology(Topology topology, OutputStream os) throws IOException {
 43 |     getWriter().writeValue(os, topology);
 44 |   }
 45 | 
 46 |   public void writeTopology(Topology topology, String path) throws IOException {
 47 |     getWriter().writeValue(new File(path), topology);
 48 |   }
 49 | 
 50 |   private ObjectWriter getWriter() {
 51 |     ObjectMapper mapper = new ObjectMapper();
 52 | 
 53 |     // omit null fields from serialization
 54 |     mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_NULL);
 55 |     // exclude certain fields and getter methods from node serialization via mixin
 56 |     mapper.getSerializationConfig().addMixInAnnotations(Node.class, MixIn.class);
 57 |     // register the module that suppresses the failed property if false
 58 |     mapper.registerModule(new IsFailedSuppressor());
 59 | 
 60 |     return mapper.writer().withDefaultPrettyPrinter();
 61 |   }
 62 | 
 63 |   private abstract class MixIn {
 64 |     @JsonIgnore public abstract long getId();
 65 |     @JsonIgnore public abstract Node getParent();
 66 |     @JsonIgnore public abstract boolean isLeaf();
 67 |     @JsonIgnore public abstract Selector getSelector();
 68 |     @JsonIgnore public abstract List<Node> getAllLeafNodes();
 69 |     @JsonIgnore public abstract int getChildrenCount();
 70 |     @JsonIgnore public abstract Node getRoot();
 71 |   }
 72 | 
 73 |   private static class IsFailedSuppressor extends Module {
 74 |     public String getModuleName() {
 75 |       return "IsFailedSuppressor";
 76 |     }
 77 | 
 78 |     public Version version() {
 79 |       return new Version(1, 0, 0, null);
 80 |     }
 81 | 
 82 |     public void setupModule(SetupContext context) {
 83 |       context.addBeanSerializerModifier(new BeanSerializerModifier() {
 84 |         @Override
 85 |         public List<BeanPropertyWriter> changeProperties(SerializationConfig config,
 86 |           BasicBeanDescription beanDesc, List<BeanPropertyWriter> beanProperties) {
 87 |           ListIterator<BeanPropertyWriter> it = beanProperties.listIterator();
 88 |           while (it.hasNext()) {
 89 |             BeanPropertyWriter writer = it.next();
 90 |             // replace the bean writer with my own if it is for "failed"
 91 |             if (writer.getName().equals("failed")) {
 92 |               BeanPropertyWriter newWriter = new IsFailedWriter(writer);
 93 |               it.set(newWriter);
 94 |             }
 95 |           }
 96 |           return beanProperties;
 97 |         }
 98 |       });
 99 |     }
100 |   }
101 | 
102 |   private static class IsFailedWriter extends BeanPropertyWriter {
103 |     public IsFailedWriter(BeanPropertyWriter base) {
104 |       super(base);
105 |     }
106 | 
107 |     public IsFailedWriter(BeanPropertyWriter base, JsonSerializer<Object> ser) {
108 |       super(base, ser);
109 |     }
110 | 
111 |     @Override
112 |     public void serializeAsField(Object bean, JsonGenerator jgen, SerializerProvider prov)
113 |         throws Exception {
114 |       Object value = get(bean);
115 |       if (value instanceof Boolean) {
116 |         Boolean b = (Boolean)value;
117 |         if (!b.booleanValue()) {
118 |           // filter if "failed" is false
119 |           return;
120 |         }
121 |       }
122 |       super.serializeAsField(bean, jgen, prov);
123 |     }
124 | 
125 |     @Override
126 |     public BeanPropertyWriter withSerializer(JsonSerializer<Object> ser) {
127 |       return new IsFailedWriter(this, ser);
128 |     }
129 |   }
130 | }
131 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/MappingParameters.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools.jsontopology;
17 | 
18 | public class MappingParameters {
19 |   // keys for mapping parameters
20 |   public static final String RF = "rf";
21 |   public static final String RDF = "rdf";
22 |   public static final String TARGET_BALANCE = "target_balance";
23 |   public static final String VIRTUAL_BUCKET_COUNT = "virtual_bucket_count";
24 |   public static final String USE_CRUSH_MAPPING = "use_crush_mapping";
25 | 
26 |   private volatile int rf;
27 |   private volatile int rdf;
28 |   private volatile double targetBalance;
29 |   private volatile int virtualBucketCount;
30 |   private volatile boolean useCrushMapping;
31 | 
32 |   public MappingParameters() {}
33 | 
34 |   public MappingParameters(MappingParameters params) {
35 |     this.rf = params.rf;
36 |     this.rdf = params.rdf;
37 |     this.targetBalance = params.targetBalance;
38 |     this.virtualBucketCount = params.virtualBucketCount;
39 |     this.useCrushMapping = params.useCrushMapping;
40 |   }
41 | 
42 |   public int getRf() {
43 |     return rf;
44 |   }
45 | 
46 |   public void setRf(int rf) {
47 |     this.rf = rf;
48 |   }
49 | 
50 |   public int getRdf() {
51 |     return rdf;
52 |   }
53 | 
54 |   public void setRdf(int rdf) {
55 |     this.rdf = rdf;
56 |   }
57 | 
58 |   public double getTargetBalance() {
59 |     return targetBalance;
60 |   }
61 | 
62 |   public void setTargetBalance(double targetBalance) {
63 |     this.targetBalance = targetBalance;
64 |   }
65 | 
66 |   public int getVirtualBucketCount() {
67 |     return virtualBucketCount;
68 |   }
69 | 
70 |   public void setVirtualBucketCount(int virtualBucketCount) {
71 |     this.virtualBucketCount = virtualBucketCount;
72 |   }
73 | 
74 |   public boolean isUseCrushMapping() {
75 |     return useCrushMapping;
76 |   }
77 | 
78 |   public void setUseCrushMapping(boolean useCrushMapping) {
79 |     this.useCrushMapping = useCrushMapping;
80 |   }
81 | 
82 |   @Override
83 |   public String toString() {
84 |     return "(" + RF + "=" + rf + ", " + RDF + "=" + rdf + ", " + TARGET_BALANCE + "=" +
85 |       targetBalance + ", " + VIRTUAL_BUCKET_COUNT + "=" + virtualBucketCount + ", " +
86 |       USE_CRUSH_MAPPING + "=" + useCrushMapping + ")";
87 |   }
88 | }
89 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/Topology.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools.jsontopology;
17 | 
18 | import com.twitter.crunch.Node;
19 | 
20 | public class Topology {
21 |   protected volatile Node root;
22 |   protected volatile long version;
23 | 
24 |   public Topology() {}
25 | 
26 |   public Topology(Node root, long version) {
27 |     this.root = root;
28 |     this.version = version;
29 |   }
30 | 
31 |   public long getVersion() {
32 |     return version;
33 |   }
34 | 
35 |   public Node getRootNode() {
36 |     return root;
37 |   }
38 | }
39 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/TopologyDeserializer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools.jsontopology;
17 | 
18 | import java.io.File;
19 | import java.io.IOException;
20 | import java.io.InputStream;
21 | 
22 | public interface TopologyDeserializer {
23 |   Topology readTopology(InputStream is) throws IOException;
24 |   Topology readTopology(File file) throws IOException;
25 |   Topology readTopology(String string) throws IOException;
26 | }
27 | 


--------------------------------------------------------------------------------
/src/test/java/com/twitter/crunch/tools/jsontopology/TopologySerializer.java:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Copyright 2013 Twitter, Inc.
 3 |  * Licensed under the Apache License, Version 2.0 (the "License");
 4 |  * you may not use this file except in compliance with the License.
 5 |  * You may obtain a copy of the License at
 6 |  *
 7 |  * http://www.apache.org/licenses/LICENSE-2.0
 8 |  *
 9 |  * Unless required by applicable law or agreed to in writing, software
10 |  * distributed under the License is distributed on an "AS IS" BASIS,
11 |  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 |  * See the License for the specific language governing permissions and
13 |  * limitations under the License.
14 |  */
15 | 
16 | package com.twitter.crunch.tools.jsontopology;
17 | 
18 | import java.io.IOException;
19 | import java.io.OutputStream;
20 | 
21 | public interface TopologySerializer {
22 |   void writeTopology(Topology topology, OutputStream os) throws IOException;
23 |   void writeTopology(Topology topology, String path) throws IOException;
24 | }
25 | 


--------------------------------------------------------------------------------
/src/test/resources/logback-test.xml:
--------------------------------------------------------------------------------
 1 | <configuration>
 2 |   <appender name="FILE" class="ch.qos.logback.core.FileAppender">
 3 |     <file>libcrunch.log</file>
 4 |     <!-- encoders are assigned the type
 5 |          ch.qos.logback.classic.encoder.PatternLayoutEncoder by default -->
 6 |     <encoder>
 7 |       <pattern>%d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n</pattern>
 8 |     </encoder>
 9 |   </appender>
10 | 
11 | <!--   <logger name="com.twitter.crunch.CRUSHPlacementAlgorithm" level="DEBUG" /> -->
12 | <!--   <logger name="com.twitter.crunch.RDFMapping" level="DEBUG" /> -->
13 | <!--   <logger name="com.twitter.crunch.StrawSelector" level="DEBUG" /> -->
14 | 
15 |   <root level="info">
16 |     <appender-ref ref="FILE" />
17 |   </root>
18 | </configuration>
19 | 


--------------------------------------------------------------------------------
/src/test/resources/topology.json:
--------------------------------------------------------------------------------
  1 | {
  2 | 	"name": "my-cluster",
  3 | 	"type": 0,
  4 | 	"selection": "STRAW",
  5 | 	"children": [
  6 | 		{
  7 | 			"name": "dc1",
  8 | 			"type": 1,
  9 | 			"selection": "STRAW",
 10 | 			"children": [
 11 | 				{
 12 | 					"name": "dc1rack1",
 13 | 					"type": 2,
 14 | 					"selection": "STRAW",
 15 | 					"children": [
 16 | 						{
 17 | 							"name": "dc1rack1node1", 
 18 | 							"type": 5,
 19 | 							"weight": 100
 20 | 						},
 21 | 						{
 22 | 							"name": "dc1rack1node2", 
 23 | 							"type": 5,
 24 | 							"weight": 100
 25 | 						}
 26 | 					]
 27 | 				},
 28 | 				{
 29 | 					"name": "dc1rack2",
 30 | 					"type": 2,
 31 | 					"selection": "STRAW",
 32 | 					"children": [
 33 | 						{
 34 | 							"name": "dc1rack2node1", 
 35 | 							"type": 5,
 36 | 							"weight": 100
 37 | 						},
 38 | 						{
 39 | 							"name": "dc1rack2node2", 
 40 | 							"type": 5,
 41 | 							"weight": 50
 42 | 						}
 43 | 					]
 44 | 				},
 45 | 				{
 46 | 					"name": "dc1rack3",
 47 | 					"type": 2,
 48 | 					"selection": "STRAW",
 49 | 					"children": [
 50 | 						{
 51 | 							"name": "dc1rack3node1", 
 52 | 							"type": 5,
 53 | 							"weight": 100
 54 | 						},
 55 | 						{
 56 | 							"name": "dc1rack3node2", 
 57 | 							"type": 5,
 58 | 							"weight": 100
 59 | 						}
 60 | 					]
 61 | 				},
 62 | 				{
 63 | 					"name": "dc1rack4",
 64 | 					"type": 2,
 65 | 					"selection": "STRAW",
 66 | 					"children": [
 67 | 						{
 68 | 							"name": "dc1rack4node1", 
 69 | 							"type": 5,
 70 | 							"weight": 100
 71 | 						},
 72 | 						{
 73 | 							"name": "dc1rack4node2", 
 74 | 							"type": 5,
 75 | 							"weight": 100
 76 | 						}
 77 | 					]
 78 | 				},
 79 | 				{
 80 | 					"name": "dc1rack5",
 81 | 					"type": 2,
 82 | 					"selection": "STRAW",
 83 | 					"children": [
 84 | 						{
 85 | 							"name": "dc1rack5node1", 
 86 | 							"type": 5,
 87 | 							"weight": 100
 88 | 						},
 89 | 						{
 90 | 							"name": "dc1rack5node2", 
 91 | 							"type": 5,
 92 | 							"weight": 100
 93 | 						}
 94 | 					]
 95 | 				},
 96 | 				{
 97 | 					"name": "dc1rack6",
 98 | 					"type": 2,
 99 | 					"selection": "STRAW",
100 | 					"children": [
101 | 						{
102 | 							"name": "dc1rack6node1", 
103 | 							"type": 5,
104 | 							"weight": 100
105 | 						},
106 | 						{
107 | 							"name": "dc1rack6node2", 
108 | 							"type": 5,
109 | 							"weight": 100
110 | 						}
111 | 					]
112 | 				}
113 | 			]
114 | 		},
115 | 		{
116 | 			"name": "dc2",
117 | 			"type": 1,
118 | 			"selection": "STRAW",
119 | 			"children": [
120 | 				{
121 | 					"name": "dc2rack1",
122 | 					"type": 2,
123 | 					"selection": "STRAW",
124 | 					"children": [
125 | 						{
126 | 							"name": "dc2rack1node1", 
127 | 							"type": 5,
128 | 							"weight": 100
129 | 						},
130 | 						{
131 | 							"name": "dc2rack1node2", 
132 | 							"type": 5,
133 | 							"weight": 75
134 | 						}
135 | 					]
136 | 				},
137 | 				{
138 | 					"name": "dc2rack2",
139 | 					"type": 2,
140 | 					"selection": "STRAW",
141 | 					"children": [
142 | 						{
143 | 							"name": "dc2rack2node1", 
144 | 							"type": 5,
145 | 							"weight": 25
146 | 						},
147 | 						{
148 | 							"name": "dc2rack2node2", 
149 | 							"type": 5,
150 | 							"weight": 25
151 | 						}
152 | 					]
153 | 				}
154 | 			]
155 | 		}
156 | 	]
157 | }


--------------------------------------------------------------------------------
/src/test/resources/topology.template.yaml:
--------------------------------------------------------------------------------
 1 | target_balance_max: 0
 2 | dump_detail_map: yes
 3 | weight_balance_tries: 200
 4 | weight_balance_factor: 0.1
 5 | number_of_buckets: 50000
 6 | replica_distribution_factor: 7
 7 | replication_factor: 3
 8 | replica_distribution_factor_min: 5
 9 | replica_distribution_factor_max: 7
10 | machine_list:


--------------------------------------------------------------------------------