├── .gitignore ├── .travis.yml ├── CHANGELOG.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── eval ├── evalresult.py ├── runtask.sh ├── test.sh └── testblob.py ├── pom.xml └── src ├── main └── java │ └── com │ └── twitter │ └── crunch │ ├── AssignmentTracker.java │ ├── AssignmentTrackerFactory.java │ ├── AssignmentTrackerImpl.java │ ├── BaseRackIsolationPlacementRules.java │ ├── CRUSHPlacementAlgorithm.java │ ├── ConsistentHashingSelector.java │ ├── Crunch.java │ ├── JenkinsHash.java │ ├── MappingDiff.java │ ├── MappingEvaluator.java │ ├── MappingException.java │ ├── MappingFunction.java │ ├── MessageDigestHash.java │ ├── MultiInputHash.java │ ├── NoOpAssignmentTracker.java │ ├── Node.java │ ├── NodeFilter.java │ ├── PlacementAlgorithm.java │ ├── PlacementRules.java │ ├── ProbingRDFMapping.java │ ├── RDFCRUSHMapping.java │ ├── RDFMapping.java │ ├── RackBasedTypes.java │ ├── RackIsolationPlacementRules.java │ ├── RandomSelector.java │ ├── Selector.java │ ├── SimpleCRUSHMapping.java │ ├── StableRdfMapping.java │ ├── StorageSystemTypes.java │ ├── StrawSelector.java │ ├── Types.java │ └── Utils.java └── test ├── java └── com │ └── twitter │ └── crunch │ ├── AssignmentTrackerImplTest.java │ ├── BaseSelectionTest.java │ ├── ConsistentHashingSelectionTest.java │ ├── CrunchTest.java │ ├── CrunchTestSuite.java │ ├── MappingDiffTest.java │ ├── MessageDigestHashTest.java │ ├── NodeTest.java │ ├── RandomSelectionTest.java │ ├── StrawSelectionTest.java │ ├── TestUtils.java │ ├── Topology.java │ ├── integrated │ ├── RDFBalanceTest.java │ ├── RDFStabilityTest.java │ └── SiblingBiasTest.java │ └── tools │ ├── CalculateMovement.java │ ├── CreateBlobstoreMapping.java │ ├── CreateDataMapping.java │ ├── EvaluateMapping.java │ ├── EvaluateRDFMapping.java │ ├── InvalidTopologyException.java │ ├── TopologyGenerator.java │ ├── Utils.java │ ├── YamlTopologyFactory.java │ └── jsontopology │ ├── JsonTopology.java │ ├── JsonTopologyDeserializer.java │ ├── JsonTopologySerializer.java │ ├── MappingParameters.java │ ├── Topology.java │ ├── TopologyDeserializer.java │ └── TopologySerializer.java └── resources ├── logback-test.xml ├── mapping.txt ├── topology.json └── topology.template.yaml /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | *.iml 3 | *.log 4 | target/ 5 | .settings 6 | .metadata/ 7 | .cache 8 | .classpath 9 | .classpath.txt 10 | .project 11 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: java 2 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | == HEAD 2 | 3 | == 1.0.0 (June 19, 2013) 4 | 5 | * Initial public release. 6 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing to Libcrunch 2 | 3 | Looking to contribute something to libcrunch? Here's how you can help. 4 | 5 | ## Bugs reports 6 | 7 | A bug is a _demonstrable problem_ that is caused by the code in the 8 | repository. Good bug reports are extremely helpful - thank you! 9 | 10 | Guidelines for bug reports: 11 | 12 | 1. **Use the GitHub issue search** — check if the issue has already been 13 | reported. 14 | 15 | 2. **Check if the issue has been fixed** — try to reproduce it using the 16 | latest `master` or development branch in the repository. 17 | 18 | 3. **Isolate the problem** — ideally create a reduced test 19 | case and a live example. 20 | 21 | 4. Please try to be as detailed as possible in your report. Include specific 22 | information about the environment - operating system and version, java 23 | and version, version of libcrunch - and steps required to reproduce the issue. 24 | 25 | 26 | ## Feature requests & contribution enquiries 27 | 28 | Feature requests are welcome. But take a moment to find out whether your idea 29 | fits with the scope and aims of the project. It's up to *you* to make a strong 30 | case for the inclusion of your feature. Please provide as much detail and 31 | context as possible. 32 | 33 | Contribution enquiries should take place before any significant pull request, 34 | otherwise you risk spending a lot of time working on something that we might 35 | have good reasons for rejecting. 36 | 37 | 38 | ## Pull requests 39 | 40 | Good pull requests - patches, improvements, new features - are a fantastic 41 | help. They should remain focused in scope and avoid containing unrelated 42 | commits. 43 | 44 | Make sure to adhere to the coding conventions used throughout the codebase 45 | (indentation, accurate comments, etc.) and any other requirements (such as test 46 | coverage). 47 | 48 | Please follow this process; it's the best way to get your work included in the 49 | project: 50 | 51 | 1. Create a new topic branch to contain your feature, change, or fix: 52 | 53 | 2. Commit your changes in logical chunks. Provide clear and explanatory commit 54 | messages. Use git's [interactive rebase](https://help.github.com/articles/interactive-rebase) 55 | feature to tidy up your commits before making them public. 56 | 57 | 3. Locally merge (or rebase) the upstream development branch into your topic branch: 58 | 59 | 4. Push your topic branch up to your fork: 60 | 61 | 5. [Open a Pull Request](http://help.github.com/send-pull-requests/) with a 62 | clear title and description. 63 | 64 | ## License 65 | 66 | By contributing your code, 67 | 68 | You agree to license your contribution under the terms of the Apache Public License 2.0 69 | https://github.com/twitter/libcrunch/blob/master/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # STATUS 2 | 3 | Twitter is no longer maintaining this project or responding to issues or PRs. 4 | 5 | # libcrunch [![Build Status](https://travis-ci.org/twitter/libcrunch.png?branch=master)](https://travis-ci.org/twitter/libcrunch) 6 | Libcrunch is a lightweight mapping framework that maps data objects to a number of nodes, subject to user-specified constraints. 7 | 8 | The libcrunch implementation was heavily inspired by the paper on the [CRUSH algorithm](http://www.ssrc.ucsc.edu/Papers/weil-sc06.pdf). 9 | 10 | ## Features 11 | * flexible cluster topology definition 12 | * define your placement rules 13 | * supports replication factor (RF) and replica distribution factor (RDF) 14 | * balanced distribution of data that reflects weights 15 | * stability against topology changes 16 | * supports target balancing 17 | 18 | ## Getting Started 19 | The latest libcrunch artifacts are published to maven central. You can include libcrunch in your project by adding the following to your maven pom.xml file: 20 | 21 | ```xml 22 | 23 | 24 | com.twitter 25 | libcrunch 26 | 1.0.0 27 | 28 | 29 | ``` 30 | 31 | ### Quickstart 32 | Creating and using the libcrunch mapping function is pretty straightforward. Once you define your data and the inputs to the mapping function, you get the mapping result via the computeMapping method. For example, to use the RDF mapping, 33 | 34 | ```java 35 | // set up the input to the mapping function 36 | PlacementRules rules = createPlacementRules(); 37 | 38 | // instantiate the mapping function 39 | MappingFunction mappingFunction = new RDFMapping(rdf, rf, rules, targetBalance); 40 | 41 | // prepare your data 42 | List data = prepareYourDataIds(); 43 | // set up the topology 44 | Node root = createTopology(); 45 | 46 | // compute the mapping 47 | Map> mapping = mappingFunction.computeMapping(data, root); 48 | ``` 49 | 50 | ## Problems? 51 | 52 | If you find any issues please [report them](https://github.com/twitter/libcrunch/issues) or better, 53 | send a [pull request](https://github.com/twitter/libcrunch/pulls). 54 | 55 | ## Authors: 56 | * Jerry Xu 57 | * Peter Schuller 58 | * Sangjin Lee 59 | 60 | ## License 61 | Copyright 2013 Twitter, Inc. 62 | 63 | Licensed under the Apache License, Version 2.0: http://www.apache.org/licenses/LICENSE-2.0 64 | -------------------------------------------------------------------------------- /eval/evalresult.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2013 Twitter, Inc. 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | """ 15 | 16 | import argparse 17 | 18 | import glob 19 | import csv 20 | import re 21 | 22 | 23 | def evaluateMappings(result_files, start_point, print_count, output_filename): 24 | output = "" 25 | for result_file in result_files: 26 | m = re.match(r'.*/rdf-(.*)-rd-(.*)-tb-(.*)\.csv', result_file) 27 | if not m: 28 | print "Cannot parse " + result_file 29 | 30 | with open(result_file, 'r') as result: 31 | result_reader = csv.reader(result, delimiter=',') 32 | skip = 0 33 | while skip < start_point: 34 | result_reader.next() 35 | skip += 1 36 | moves = 0 37 | std = 0 38 | c = 0 39 | for row in result_reader: 40 | moves += int(row[5]) 41 | std += float(row[3]) 42 | c += 1 43 | if print_count: 44 | print c 45 | 46 | output += m.group(1) + "," + m.group(2) + "," + m.group(3) + "," + str(moves) + "," + str(std) + "\n" 47 | 48 | f = open(output_filename, 'w') 49 | f.write(output) 50 | f.close() 51 | 52 | 53 | def main(): 54 | # parse the commandline arguments 55 | parser = argparse.ArgumentParser(description='Evaluate mapping files for topologies from Blobstore') 56 | parser.add_argument("-t", dest='result_path', type=str, required=True, help='path for the result files') 57 | parser.add_argument("-o", dest='output_filename', type=str, required=True, help='output file name') 58 | parser.add_argument("-s", dest='start_point', type=int, required=False, default=1, help='starting point for the calculation') 59 | parser.add_argument("-c", dest='print_count', action="store_true", required=False, default=False, help='print count') 60 | 61 | args = parser.parse_args() 62 | 63 | # read topology files 64 | result_files = glob.glob(args.result_path + "/*.csv") 65 | 66 | evaluateMappings(sorted(result_files), args.start_point, args.print_count, args.output_filename) 67 | 68 | 69 | if __name__ == '__main__': 70 | main() 71 | -------------------------------------------------------------------------------- /eval/runtask.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | java -Dfile.encoding=UTF-8 -Xms64m -Xmx512m -classpath \ 4 | ../target/test-classes:\ 5 | ../target/classes:\ 6 | $HOME/.m2/repository/com/google/guava/guava/12.0.1/guava-12.0.1.jar:\ 7 | $HOME/.m2/repository/com/google/code/findbugs/jsr305/1.3.9/jsr305-1.3.9.jar:\ 8 | $HOME/.m2/repository/junit/junit/4.10/junit-4.10.jar:\ 9 | $HOME/.m2/repository/org/hamcrest/hamcrest-core/1.1/hamcrest-core-1.1.jar:\ 10 | $HOME/.m2/repository/org/codehaus/jackson/jackson-mapper-asl/1.9.4/jackson-mapper-asl-1.9.4.jar:\ 11 | $HOME/.m2/repository/org/codehaus/jackson/jackson-core-asl/1.9.4/jackson-core-asl-1.9.4.jar:\ 12 | $HOME/.m2/repository/log4j/log4j/1.2.16/log4j-1.2.16.jar:\ 13 | $HOME/.m2/repository/org/slf4j/slf4j-api/1.6.4/slf4j-api-1.6.4.jar:\ 14 | $HOME/.m2/repository/org/mockito/mockito-core/1.9.0/mockito-core-1.9.0.jar:\ 15 | $HOME/.m2/repository/org/objenesis/objenesis/1.0/objenesis-1.0.jar:\ 16 | $HOME/.m2/repository/ch/qos/logback/logback-core/1.0.1/logback-core-1.0.1.jar:\ 17 | $HOME/.m2/repository/org/yaml/snakeyaml/1.10/snakeyaml-1.10.jar:\ 18 | $HOME/.m2/repository/ch/qos/logback/logback-classic/1.0.1/logback-classic-1.0.1.jar \ 19 | com.twitter.crunch.tools.$@ 20 | -------------------------------------------------------------------------------- /eval/test.sh: -------------------------------------------------------------------------------- 1 | template=$1 2 | node_count_start=$2 3 | node_count_inc=$3 4 | topology_count=$4 5 | node_weight=$5 6 | 7 | let node_count_max=node_count_start+node_count_inc*topology_count 8 | 9 | echo Generating the topology files... 10 | for i in $(seq $node_count_start $node_count_inc $node_count_max) 11 | do 12 | bash runtask.sh TopologyGenerator $template $i $node_weight topology${i}.yaml 13 | done 14 | 15 | echo Creating mappings... 16 | for i in $(seq 1 2 3) 17 | do 18 | for j in $(seq $node_count_start $node_count_inc $node_count_max) 19 | do 20 | let k=j-node_count_inc 21 | bash runtask.sh CreateDataMapping yaml ${i} topology${j}.yaml map${i}${j}.csv rdfmap${k}.csv rdfmap${j}.csv 22 | done 23 | done 24 | 25 | echo Evaluate mappings... 26 | for i in $(seq 1 2 3) 27 | do 28 | echo version ${i}: 29 | for j in $(seq $node_count_start $node_count_inc $node_count_max) 30 | do 31 | echo -n $topology${j}.yaml, 32 | let k=j-node_count_inc 33 | bash runtask.sh EvaluateMapping yaml topology${j}.yaml map${i}${j}.csv 34 | echo -n , 35 | if [ -e map${i}${k}.csv ] 36 | then 37 | bash runtask.sh CalculateMovement map${i}${k}.csv map${i}${j}.csv 38 | else 39 | echo -n ,, 40 | fi 41 | echo 42 | done 43 | done 44 | -------------------------------------------------------------------------------- /eval/testblob.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright 2013 Twitter, Inc. 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | """ 15 | 16 | import argparse 17 | import glob 18 | import os 19 | import logging 20 | import subprocess 21 | import shutil 22 | 23 | 24 | def runProcess(exe): 25 | logging.info(exe) 26 | return subprocess.check_output(exe) 27 | 28 | 29 | def evaluateMapping(versions, topology_files, output_dir): 30 | output = "" 31 | for version in versions: 32 | for i, topology_file in enumerate(topology_files): 33 | topology_file_name = os.path.basename(topology_file) 34 | map_file = output_dir + "/map-" + str(version) + "-" + topology_file_name 35 | 36 | rdf_file_new = output_dir + "/rdfmap-" + topology_file_name 37 | 38 | eval_command_line = "EvaluateMapping json " + topology_file + " " + map_file + " " + rdf_file_new 39 | print eval_command_line 40 | map_output = runProcess(['./runtask.sh', eval_command_line]) 41 | 42 | if i != 0: 43 | old_map_file = output_dir + "/map-" + str(version) + "-" + os.path.basename(topology_files[i-1]) 44 | calc_command_line = "CalculateMovement " + " " + old_map_file + " " + map_file 45 | map_output = map_output + "," + runProcess(['./runtask.sh', calc_command_line]) 46 | 47 | map_output += '\n' 48 | print map_output 49 | output += map_output 50 | 51 | return output 52 | 53 | 54 | def generateMapping(versions, topology_files, output_dir, rdf, target_balance, rack_diversity, track_capacity): 55 | for version in versions: 56 | for i, topology_file in enumerate(topology_files): 57 | topology_file_name = os.path.basename(topology_file) 58 | params_file = os.path.dirname(topology_file) + "/" + "params_" + topology_file_name 59 | map_file = output_dir + "/map-" + str(version) + "-" + topology_file_name 60 | rdf_file_new = output_dir + "/rdfmap-" + topology_file_name 61 | command_line = "CreateBlobstoreMapping " + str(version) + " " + topology_file + " " + params_file 62 | command_line = command_line + " " + map_file + " " + str(rdf) + " " + str(target_balance) 63 | 64 | if i != 0: 65 | rdf_file_old = output_dir + "/rdfmap-" + os.path.basename(topology_files[i-1]) 66 | else: 67 | rdf_file_old = "null" 68 | 69 | command_line = command_line + " " + str(rack_diversity) + " " + track_capacity 70 | command_line = command_line + " " + rdf_file_new + " " + rdf_file_old 71 | 72 | logging.info(command_line) 73 | print command_line 74 | subprocess.call(['./runtask.sh', command_line]) 75 | 76 | if not os.path.isfile(map_file): 77 | return False 78 | return True 79 | 80 | 81 | def compareMappings(topology_files, evaluate, output_dir): 82 | rdf_min = 8 83 | rdf_max = 88 84 | tb_min = 0.05 85 | tb_max = 0.15 86 | rd_min = 3 87 | rd_max = 8 88 | rdf = rdf_min 89 | while rdf <= rdf_max: 90 | rd = rd_min 91 | rdf_rd = int(rdf/rd) + 1 92 | while rd <= rd_max: 93 | if ((int(rdf/rd) + 1) == rdf_rd and rd != rd_min): 94 | rd += 1 95 | continue 96 | else: 97 | rdf_rd = int(rdf/rd) + 1 98 | tb = tb_min 99 | while tb <= tb_max: 100 | scenario_name = output_dir + "/" + "rdf-" + str(rdf) + "-rd-" + str(rd) + "-tb-" + str(tb) 101 | if (evaluate): 102 | print "Evaluating mappings " + scenario_name 103 | output = evaluateMapping("3", topology_files, scenario_name) 104 | f = open(scenario_name + ".csv", 'w') 105 | f.write(output) 106 | f.close() 107 | else: 108 | if not os.path.exists(scenario_name): 109 | os.makedirs(scenario_name) 110 | print "Generating mappings " + scenario_name 111 | result = generateMapping("3", topology_files, scenario_name, rdf, tb, rdf_rd, "false") 112 | if not result: 113 | print "Failed to converge on scenario: " + scenario_name 114 | shutil.rmtree(scenario_name) 115 | tb += 0.02 116 | rd += 1 117 | rdf += 8 118 | 119 | 120 | def main(): 121 | # parse the commandline arguments 122 | parser = argparse.ArgumentParser(description='Generate mapping files for topologies from Blobstore') 123 | parser.add_argument("-t", dest='topology_path', type=str, required=True, help='path for the topology files') 124 | parser.add_argument("-o", dest='output_dir', type=str, default="./", required=False, help='output location') 125 | 126 | parser.add_argument("-s", dest='single_mapping', action="store_true", default=False, required=False, help='calculate single map') 127 | parser.add_argument("-v", dest='algo_version', type=int, default=3, required=False, help='version of algorithm') 128 | parser.add_argument("-r", dest='rack_diversity', type=str, default="8", required=False, help='rack diversity') 129 | parser.add_argument("-c", dest='track_capacity', type=str, default="false", required=False, help='track replica capacity') 130 | parser.add_argument("-b", dest='target_balance', type=str, default="0.25", required=False, help='target balance') 131 | parser.add_argument("-d", dest='rdf', type=str, default="10", required=False, help='rdf') 132 | 133 | parser.add_argument("-g", dest='skip_generate', action="store_true", default=False, required=False, help='skip generating maps') 134 | parser.add_argument("-e", dest='skip_evaluate', action="store_true", default=False, required=False, help='skip evaluating maps') 135 | args = parser.parse_args() 136 | 137 | logging.basicConfig(filename="testblob.log", level=logging.INFO) 138 | 139 | # read topology files 140 | topology_files = glob.glob(args.topology_path + "/topology_*") 141 | 142 | if not args.skip_generate: 143 | print "Generating mappings..." 144 | if args.single_mapping: 145 | versions = [args.algo_version] 146 | generateMapping(versions, sorted(topology_files), args.output_dir, args.rdf, args.target_balance, args.rack_diversity, args.track_capacity) 147 | else: 148 | compareMappings(sorted(topology_files), False, args.output_dir) 149 | if not args.skip_evaluate: 150 | print "Evaluate mappings..." 151 | if args.single_mapping: 152 | versions = [args.algo_version] 153 | output = evaluateMapping(versions, sorted(topology_files), args.output_dir) 154 | f = open(args.output_dir + "/result.csv", 'w') 155 | f.write(output) 156 | f.close() 157 | else: 158 | compareMappings(sorted(topology_files), True) 159 | 160 | 161 | if __name__ == '__main__': 162 | main() 163 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 4.0.0 3 | com.twitter 4 | libcrunch 5 | libcrunch 6 | 1.0.4-SNAPSHOT 7 | http://github.com/twitter/libcrunch 8 | A lightweight mapping framework that maps data objects to a number of nodes, subject to constraints 9 | 10 | 11 | 12 | com.google.guava 13 | guava 14 | 12.0.1 15 | compile 16 | 17 | 18 | junit 19 | junit 20 | 4.10 21 | test 22 | 23 | 24 | org.codehaus.jackson 25 | jackson-mapper-asl 26 | 1.9.4 27 | test 28 | 29 | 30 | org.codehaus.jackson 31 | jackson-core-asl 32 | 1.9.4 33 | test 34 | 35 | 36 | org.slf4j 37 | slf4j-api 38 | 1.6.4 39 | 40 | 41 | org.mockito 42 | mockito-core 43 | 1.9.0 44 | test 45 | 46 | 47 | ch.qos.logback 48 | logback-core 49 | 1.0.1 50 | test 51 | 52 | 53 | ch.qos.logback 54 | logback-classic 55 | 1.0.1 56 | test 57 | 58 | 59 | org.yaml 60 | snakeyaml 61 | 1.10 62 | test 63 | 64 | 65 | 66 | 67 | scm:git:git@github.com:twitter/libcrunch.git 68 | scm:git:git@github.com:twitter/libcrunch.git 69 | scm:git:git@github.com:twitter/libcrunch.git 70 | 71 | 72 | 73 | 74 | The Apache Software License, Version 2.0 75 | http://www.apache.org/licenses/LICENSE-2.0.txt 76 | 77 | 78 | 79 | 80 | 81 | Sangjin Lee 82 | sjlee@twitter.com 83 | 84 | 85 | Jerry Xu 86 | jxu@twitter.com 87 | 88 | 89 | 90 | 91 | 92 | 93 | sonatype-nexus-snapshots 94 | Sonatype OSS 95 | https://oss.sonatype.org/content/repositories/snapshots 96 | 97 | 98 | sonatype-nexus-staging 99 | Nexus Release Repository 100 | https://oss.sonatype.org/service/local/staging/deploy/maven2/ 101 | 102 | 103 | 104 | 105 | 106 | sonatype-nexus-snapshots 107 | https://oss.sonatype.org/content/repositories/snapshots 108 | 109 | false 110 | 111 | 112 | true 113 | 114 | 115 | 116 | 117 | 118 | UTF-8 119 | 120 | 121 | 122 | 123 | 124 | 125 | org.apache.maven.plugins 126 | maven-release-plugin 127 | 2.1 128 | 129 | forked-path 130 | false 131 | -Psonatype-oss-release 132 | 133 | 134 | 135 | 136 | 137 | 138 | maven-compiler-plugin 139 | 2.5.1 140 | 141 | 1.6 142 | 1.6 143 | true 144 | true 145 | true 146 | 147 | 148 | 149 | org.apache.maven.plugins 150 | maven-surefire-plugin 151 | 2.12 152 | 153 | -Xmx1024m 154 | false 155 | 156 | **/CrunchTestSuite.java 157 | 158 | 159 | 160 | 161 | org.apache.maven.plugins 162 | maven-source-plugin 163 | 2.2 164 | 165 | 166 | attach-sources 167 | 168 | jar 169 | 170 | 171 | 172 | 173 | 174 | 175 | 176 | 177 | sonatype-oss-release 178 | 179 | 180 | 181 | org.apache.maven.plugins 182 | maven-source-plugin 183 | 2.1.2 184 | 185 | 186 | attach-sources 187 | 188 | jar-no-fork 189 | 190 | 191 | 192 | 193 | 194 | org.apache.maven.plugins 195 | maven-javadoc-plugin 196 | 2.7 197 | 198 | 199 | attach-javadocs 200 | 201 | jar 202 | 203 | 204 | 205 | 206 | 207 | org.apache.maven.plugins 208 | maven-gpg-plugin 209 | 1.1 210 | 211 | 212 | sign-artifacts 213 | verify 214 | 215 | sign 216 | 217 | 218 | 219 | 220 | 221 | 222 | 223 | 224 | 225 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/AssignmentTracker.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Tracker that keeps track of data assignment during the course of mapping generation, and rejects 20 | * assignments based on the target balance parameter. 21 | *
22 | * It is important to note that this keeps track of the assignment status, and therefore is 23 | * stateful. One object needs to be created and retained for the duration of the mapping generation. 24 | */ 25 | interface AssignmentTracker { 26 | /** 27 | * Tracks assignment of this particular node. Assignment tracking happens essentially with the 28 | * leaf nodes. When a leaf node is positively selected, the assignment of the leaf node is 29 | * recorded, and any parent node whose type assignment is being tracked for is also tracked at 30 | * that point. 31 | * 32 | * @return whether the particular node is tracked directly. 33 | */ 34 | boolean trackAssignment(Node node); 35 | 36 | /** 37 | * Returns whether the node should be rejected due to high assignment against the target balance. 38 | * The determination of whether to reject it is a function of the current data assignment level of 39 | * the node. The exact nature of how the selection is rejected is an implementation detail. The 40 | * only guaranteed behavior is the node will be rejected 100% of the time if it reaches the 41 | * assignment level specified by the target balance. 42 | * 43 | */ 44 | boolean rejectAssignment(Node node); 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/AssignmentTrackerFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Factory class that provides a single static factory method to create an assignment tracker 20 | * instance. 21 | */ 22 | class AssignmentTrackerFactory { 23 | /** 24 | * Factory method that creates an assignment tracker instance. If target balance is not a positive 25 | * number, a no-op instance will be returned. 26 | * 27 | * @param rootNode the root node under which nodes will have assignments tracked 28 | * @param dataSize the size of the data objects; this is used to come up with the mean and max 29 | * assignments 30 | * @param targetBalance the expected target balance in relative percentages; e.g. 0.3 (30%). It 31 | * means that this target will be used to control and curb over-assignment to nodes. Note that 32 | * this is a target, and some small over-assignment may still occur if it becomes difficult to 33 | * meet this target. Must be positive. 34 | * @return newly created assignment tracker instance 35 | */ 36 | public static AssignmentTracker create(Node rootNode, int dataSize, double targetBalance) { 37 | if (rootNode != null && dataSize > 0 && targetBalance > 0.0d) { 38 | return new AssignmentTrackerImpl(rootNode, dataSize, targetBalance); 39 | } 40 | return new NoOpAssignmentTracker(); // do not track 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/BaseRackIsolationPlacementRules.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.HashSet; 20 | import java.util.List; 21 | import java.util.Set; 22 | 23 | import com.google.common.base.Predicate; 24 | import com.google.common.base.Predicates; 25 | 26 | /** 27 | * Based on a topology based on racks, prescribes rack isolation placement rules. Specific 28 | * implementations should mix in their specific types based on the {@link RackBasedTypes} and define 29 | * the end type. 30 | */ 31 | public abstract class BaseRackIsolationPlacementRules implements PlacementRules, RackBasedTypes { 32 | /** 33 | * In case we get less than full return values from the placement algorithm, we retry by changing 34 | * the input to the placement algorithm. This should converge pretty rapidly under normal 35 | * circumstances. However, if it fails to converge after a certain number of tries, we throw a 36 | * MappingException to indicate the failure. 37 | */ 38 | private static final int CONVERGENCE_LIMIT = 20; 39 | 40 | private final MultiInputHash hashFunction = new JenkinsHash(); 41 | 42 | /** 43 | * Enforce rack isolation. The caller will either get the expected number of selected nodes as a 44 | * result, or an exception will be thrown. 45 | * 46 | * @return the number of selected nodes with the rack isolation placement rules enforced. The size 47 | * will match the input count 48 | * @throws MappingException if it is unable to find the mapping that satisfies all constraints 49 | */ 50 | public List select(Node topNode, long data, int n, PlacementAlgorithm pa) 51 | throws MappingException { 52 | List nodes = new ArrayList(n); 53 | Set selectedRacks = new HashSet(); 54 | long input = data; 55 | int count = n; 56 | int tries = 0; 57 | while (nodes.size() < n) { 58 | doSelect(topNode, input, count, pa, nodes, selectedRacks); 59 | count = n - nodes.size(); 60 | if (count > 0) { // still not all picked 61 | input = hash(input); // hash the input to create a different data value 62 | tries++; 63 | if (tries >= CONVERGENCE_LIMIT) { 64 | throw new MappingException(String.format("could not fulfill all selection after %d tries", 65 | tries)); 66 | } 67 | } 68 | } 69 | return nodes; 70 | } 71 | 72 | private void doSelect(Node topNode, long input, int count, PlacementAlgorithm pa, 73 | List selectedNodes, Set selectedRacks) { 74 | // pick (count) racks avoiding the racks already picked 75 | List racks = pa.select(topNode, input, count, RACK, getRackPredicate(selectedRacks)); 76 | // add the racks to the selected racks 77 | selectedRacks.addAll(racks); 78 | // pick one end node 79 | for (Node rack: racks) { 80 | List endNode = pa.select(rack, input, 1, getEndNodeType()); 81 | selectedNodes.addAll(endNode); 82 | } 83 | } 84 | 85 | /** 86 | * Use the predicate to reject already selected racks. 87 | */ 88 | private Predicate getRackPredicate(Set selectedRacks) { 89 | return Predicates.not(Predicates.in(selectedRacks)); 90 | } 91 | 92 | /** 93 | * Do a simple hashing of the original data. 94 | */ 95 | private long hash(long data) { 96 | return hashFunction.hash(data); 97 | } 98 | 99 | /** 100 | * Rejects the replica if they share the rack. 101 | */ 102 | public boolean acceptReplica(Node primary, Node replica) { 103 | Node primaryRack = primary.findParent(RACK); 104 | Node replicaRack = replica.findParent(RACK); 105 | return primaryRack.getId() != replicaRack.getId(); 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/CRUSHPlacementAlgorithm.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.HashMap; 20 | import java.util.HashSet; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.Set; 24 | 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | import com.google.common.base.Predicate; 29 | import com.google.common.base.Predicates; 30 | 31 | /** 32 | * The transcription of the CRUSH placement algorithm from the Weil paper. This is a fairly simple 33 | * adaptation, but a couple of important changes have been made to work with the crunch mapping. 34 | */ 35 | public class CRUSHPlacementAlgorithm implements PlacementAlgorithm { 36 | /** 37 | * In case the select() method fails to select after looping back to the origin of selection after 38 | * so many tries, we stop the search. This constant denotes the maximum number of retries after 39 | * looping back to the origin. It is expected that in most cases the selection will either succeed 40 | * with a small number of tries, or it will never succeed. So a reasonably large number to 41 | * distinguish these two cases should be sufficient. 42 | */ 43 | private static final int MAX_LOOPBACK_COUNT = 50; 44 | private static final Logger logger = LoggerFactory.getLogger(CRUSHPlacementAlgorithm.class); 45 | 46 | private final boolean keepOffset; 47 | private final Map roundOffset; 48 | private final AssignmentTracker assignmentTracker; 49 | 50 | /** 51 | * Creates the crush placement object. 52 | */ 53 | public CRUSHPlacementAlgorithm() { 54 | this(false); 55 | } 56 | 57 | /** 58 | * Creates the crush placement algorithm with the indication whether the round offset should be 59 | * kept for the duration of this object for successive selection of the same input. 60 | */ 61 | public CRUSHPlacementAlgorithm(boolean keepOffset) { 62 | this(keepOffset, null); 63 | } 64 | 65 | /** 66 | * Creates the crush placement algorithm object with the assignment tracking. 67 | */ 68 | public CRUSHPlacementAlgorithm(AssignmentTracker assignmentTracker) { 69 | this(false, assignmentTracker); 70 | } 71 | 72 | // TODO consider better constructors for these options 73 | public CRUSHPlacementAlgorithm(boolean keepOffset, AssignmentTracker assignmentTracker) { 74 | this.keepOffset = keepOffset; 75 | roundOffset = keepOffset ? new HashMap() : null; 76 | this.assignmentTracker = assignmentTracker; 77 | } 78 | 79 | /** 80 | * Returns a list of (count) nodes of the desired type. If the count is more than the number of 81 | * available nodes, an exception is thrown. Note that it is possible for this method to return a 82 | * list whose size is smaller than the requested size (count) if it is unable to select all the 83 | * nodes for any reason. Callers should check the size of the returned list and take action if 84 | * needed. 85 | * 86 | */ 87 | public List select(Node parent, long input, int count, int type) { 88 | return select(parent, input, count, type, Predicates.alwaysTrue()); 89 | } 90 | 91 | public List select(Node parent, long input, int count, int type, 92 | Predicate nodePredicate) { 93 | int childCount = parent.getChildrenCount(type); 94 | if (childCount < count) { 95 | throw new IllegalArgumentException(count + " nodes of type " + type + 96 | " were requested but the tree has only " + childCount + " nodes!"); 97 | } 98 | 99 | List selected = new ArrayList(count); 100 | // use the index stored in the map 101 | Integer offset; 102 | if (keepOffset) { 103 | offset = roundOffset.get(input); 104 | if (offset == null) { 105 | offset = 0; 106 | roundOffset.put(input, offset); 107 | } 108 | } else { 109 | offset = 0; 110 | } 111 | 112 | int rPrime = 0; 113 | for (int r = 1; r <= count; r++) { 114 | int failure = 0; 115 | // number of times we had to loop back to the origin 116 | int loopbackCount = 0; 117 | boolean escape = false; 118 | boolean retryOrigin; 119 | Node out = null; 120 | do { 121 | retryOrigin = false; // initialize at the outset 122 | Node in = parent; 123 | Set rejected = new HashSet(); 124 | boolean retryNode; 125 | do { 126 | retryNode = false; // initialize at the outset 127 | rPrime = r + offset + failure; 128 | logger.trace("{}.select({}, {})", new Object[] {in, input, rPrime}); 129 | out = in.select(input, rPrime); 130 | if (out.getType() != type) { 131 | logger.trace("selected output {} for data {} didn't match the type {}: walking down " + 132 | "the hierarchy...", new Object[] {out, input, type}); 133 | in = out; // walk down the hierarchy 134 | retryNode = true; // stay within the node and walk down the tree 135 | } else { // type matches 136 | boolean predicateRejected = !nodePredicate.apply(out); 137 | if (selected.contains(out) || predicateRejected) { 138 | if (predicateRejected) { 139 | logger.trace("{} was rejected by the node predicate for data {}: rejecting and " + 140 | "increasing rPrime", out, input); 141 | rejected.add(out); 142 | } else { // already selected 143 | logger.trace("{} was already selected for data {}: rejecting and increasing rPrime", 144 | out, input); 145 | } 146 | 147 | // we need to see if we have selected all possible nodes from this parent, in which 148 | // case we should loop back to the origin and start over 149 | if (allChildNodesEliminated(in, selected, rejected)) { 150 | logger.trace("all child nodes of {} have been eliminated", in); 151 | if (loopbackCount == MAX_LOOPBACK_COUNT) { 152 | // we looped back the maximum times we specified; we give up search, and exit 153 | escape = true; 154 | break; 155 | } 156 | loopbackCount++; 157 | logger.trace("looping back to the original parent node ({})", parent); 158 | retryOrigin = true; 159 | } else { 160 | retryNode = true; // go back and reselect on the same parent 161 | } 162 | failure++; 163 | } else if (nodeIsOut(out)) { 164 | logger.trace("{} is marked as out (failed or over the maximum assignment) for data " + 165 | "{}! looping back to the original parent node", out, input); 166 | failure++; 167 | if (loopbackCount == MAX_LOOPBACK_COUNT) { 168 | // we looped back the maximum times we specified; we give up search, and exit 169 | escape = true; 170 | break; 171 | } 172 | loopbackCount++; 173 | // re-selection on the same parent is detrimental in case of node failure: loop back 174 | // to the origin 175 | retryOrigin = true; 176 | } else { 177 | // we got a successful selection 178 | break; 179 | } 180 | } 181 | } while (retryNode); 182 | } while (retryOrigin); 183 | 184 | if (escape) { 185 | // cannot find a node under this parent; return a smaller set than was intended 186 | logger.debug("we could not select a node for data {} under parent {}; a smaller data set " + 187 | "than is requested will be returned", input, parent); 188 | continue; 189 | } 190 | 191 | logger.trace("{} was selected for data {}", out, input); 192 | selected.add(out); 193 | // track the assignment 194 | if (assignmentTracker != null) { 195 | assignmentTracker.trackAssignment(out); 196 | } 197 | } 198 | if (keepOffset) { 199 | roundOffset.put(input, rPrime); 200 | } 201 | return selected; 202 | } 203 | 204 | 205 | private boolean nodeIsOut(Node node) { 206 | if (node.isLeaf() && node.isFailed()) { 207 | return true; 208 | } 209 | if (assignmentTracker != null) { 210 | return assignmentTracker.rejectAssignment(node); 211 | } 212 | return false; 213 | } 214 | 215 | /** 216 | * Examines the immediate child nodes of the given parent node, and sees if all of the children 217 | * that can be selected (i.e. not failed) are already selected. This is used to determine whether 218 | * this parent node should no longer be used in the selection. 219 | */ 220 | private boolean allChildNodesEliminated(Node parent, List selected, Set rejected) { 221 | List children = parent.getChildren(); 222 | if (children != null) { 223 | for (Node child: children) { 224 | if (!nodeIsOut(child) && !selected.contains(child) && !rejected.contains(child)) { 225 | return false; 226 | } 227 | } 228 | } 229 | return true; 230 | } 231 | } 232 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/ConsistentHashingSelector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.nio.ByteBuffer; 19 | import java.security.MessageDigest; 20 | import java.security.NoSuchAlgorithmException; 21 | import java.util.ArrayList; 22 | import java.util.Collections; 23 | import java.util.HashMap; 24 | import java.util.List; 25 | import java.util.Map; 26 | 27 | /** 28 | * Simple implementation of selection based on consistent hashing. 29 | */ 30 | class ConsistentHashingSelector implements Selector { 31 | public static final long DEFAULT_MAX_TOKENS_PER_NODE = 500; 32 | 33 | private final MessageDigest md; 34 | private final List tokenList; 35 | private final Map tokenMap; 36 | 37 | public ConsistentHashingSelector(Node node) { 38 | if (node.isLeaf()) { 39 | throw new IllegalArgumentException(); 40 | } 41 | try { 42 | md = MessageDigest.getInstance("SHA-1"); 43 | } catch (NoSuchAlgorithmException ignore) { 44 | throw new IllegalArgumentException(ignore); 45 | } 46 | 47 | final long maxTokensPerNode = DEFAULT_MAX_TOKENS_PER_NODE; 48 | // long totalWeight = 0; 49 | long maxWeight = 0; 50 | List nodes = node.getChildren(); 51 | for (Node n: nodes) { 52 | // totalWeight += n.getWeight(); 53 | maxWeight = Math.max(n.getWeight(), maxWeight); 54 | } 55 | 56 | MessageDigest ringMd; 57 | try { 58 | ringMd = MessageDigest.getInstance("SHA-1"); 59 | } catch (NoSuchAlgorithmException ignore) { 60 | throw new IllegalArgumentException(ignore); 61 | } 62 | tokenMap = new HashMap(); 63 | for (Node n: nodes) { 64 | long tokenCount = maxTokensPerNode*n.getWeight()/maxWeight; 65 | byte[] h = null; 66 | for (int i = 0; i < tokenCount; i++) { 67 | byte[] input = (h == null) ? n.getName().getBytes() : h; 68 | h = ringMd.digest(input); 69 | long token = Utils.bstrTo32bit(h); 70 | if (!tokenMap.containsKey(token)) { 71 | tokenMap.put(token, n); 72 | } 73 | } 74 | } 75 | 76 | tokenList = new ArrayList(tokenMap.keySet()); 77 | Collections.sort(tokenList); 78 | } 79 | 80 | public Node select(long input, long round) { 81 | byte[] b = longToBytes(input, round); 82 | byte[] h = md.digest(b); 83 | long token = Utils.bstrTo32bit(h); 84 | return tokenMap.get(findSuccessorToken(token)); 85 | } 86 | 87 | private byte[] longToBytes(long a, long b) { 88 | ByteBuffer buf = ByteBuffer.allocate(8*2).putLong(a).putLong(b); 89 | return buf.array(); 90 | } 91 | 92 | private long findSuccessorToken(long token) { 93 | int i = Collections.binarySearch(tokenList, token); 94 | if (i < 0) { 95 | i = -1 - i; 96 | } 97 | // [sjlee] why? 98 | if (i == tokenList.size()) { 99 | i = 0; 100 | } 101 | return tokenList.get(i); 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/Crunch.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.security.MessageDigest; 19 | import java.security.NoSuchAlgorithmException; 20 | import java.util.ArrayList; 21 | import java.util.List; 22 | 23 | 24 | public class Crunch { 25 | private final MessageDigest md; 26 | 27 | public Crunch() { 28 | try { 29 | md = MessageDigest.getInstance("SHA-1"); 30 | } catch (NoSuchAlgorithmException ignore) { 31 | throw new IllegalArgumentException(ignore); 32 | } 33 | } 34 | 35 | /** 36 | * Creates a "crunched" tree from the topological tree input. It is assumed that the topological 37 | * tree begins with a root node with the right root type. 38 | *
39 | * As a result of this operation, a copy with the following properties is created: 40 | *
    41 | *
  • name, type, and selection properties are copied from the topological nodes
  • 42 | *
  • id's are assigned as a SHA-1 hash of the node name
  • 43 | *
  • both children and parent properties are set
  • 44 | *
  • weights are assigned as sums of child weights
  • 45 | *
  • the selector objects are created
  • 46 | *
47 | * No modifications are done on the original topological nodes. 48 | */ 49 | public Node makeCrunch(Node topology) { 50 | if (topology.getType() != Types.ROOT) { 51 | throw new IllegalArgumentException("the root node is not the ROOT type!"); 52 | } 53 | 54 | return makeCrunchNode(topology); 55 | } 56 | 57 | private Node makeCrunchNode(Node topologicalNode) { 58 | // copy the intrinsic properties: id, weights, relationship, and selectors will be set 59 | Node node = new Node(topologicalNode); 60 | // assign the id from the name hash 61 | node.setId(computeId(node)); 62 | if (!topologicalNode.isLeaf()) { 63 | List newChildren = new ArrayList(); 64 | List children = topologicalNode.getChildren(); 65 | for (Node child: children) { 66 | // depth-first traversal 67 | Node newChild = makeCrunchNode(child); 68 | // set the child-parent relationship 69 | newChildren.add(newChild); 70 | newChild.setParent(node); 71 | } 72 | node.setChildren(newChildren); 73 | 74 | // weights and selector should be set after all lower nodes are crunched 75 | computeWeightAndSelector(node); 76 | } 77 | return node; 78 | } 79 | 80 | private long computeId(Node node) { 81 | byte[] h = md.digest(node.getName().getBytes()); 82 | // TODO see if this is adequate as a unique id: I suspect it is... 83 | return Utils.bstrTo32bit(h); 84 | } 85 | 86 | /** 87 | * Performs modifications, and reassigns certain properties on the tree in place. The input is 88 | * assumed to be a properly "crunched" tree. This is mainly to aid creating the "mini-tree" for 89 | * the data selection in the RDF mapping. 90 | *
91 | * The only properties that are recalculated are the weights and the selectors. 92 | */ 93 | public void recrunch(Node node) { 94 | if (!node.isLeaf()) { 95 | for (Node child: node.getChildren()) { 96 | recrunch(child); 97 | } 98 | 99 | computeWeightAndSelector(node); 100 | } 101 | } 102 | 103 | private void computeWeightAndSelector(Node node) { 104 | // set the weight after all its children are already "crunched" 105 | int weight = 0; 106 | for (Node child: node.getChildren()) { 107 | weight += child.getWeight(); 108 | } 109 | node.setWeight(weight); 110 | // set the selector 111 | node.setSelector(pickSelector(node)); 112 | } 113 | 114 | private Selector pickSelector(Node node) { 115 | switch (node.getSelection()) { 116 | case CONSISTENT_HASHING: 117 | return new ConsistentHashingSelector(node); 118 | case STRAW: 119 | return new StrawSelector(node); 120 | default: 121 | throw new IllegalArgumentException("unrecognized type!"); 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/JenkinsHash.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | public class JenkinsHash implements MultiInputHash { 19 | // max value to limit it to 4 bytes 20 | private static final long MAX_VALUE = 0xFFFFFFFFL; 21 | private static final long CRUSH_HASH_SEED = 1315423911L; 22 | 23 | /** 24 | * Convert a byte into a long value without making it negative. 25 | */ 26 | private static long byteToLong(byte b) { 27 | long val = b & 0x7F; 28 | if ((b & 0x80) != 0) { 29 | val += 128; 30 | } 31 | return val; 32 | } 33 | 34 | /** 35 | * Do addition and turn into 4 bytes. 36 | */ 37 | private static long add(long val, long add) { 38 | return (val + add) & MAX_VALUE; 39 | } 40 | 41 | /** 42 | * Do subtraction and turn into 4 bytes. 43 | */ 44 | private static long subtract(long val, long subtract) { 45 | return (val - subtract) & MAX_VALUE; 46 | } 47 | 48 | /** 49 | * Left shift val by shift bits and turn in 4 bytes. 50 | */ 51 | private static long xor(long val, long xor) { 52 | return (val ^ xor) & MAX_VALUE; 53 | } 54 | 55 | /** 56 | * Left shift val by shift bits. Cut down to 4 bytes. 57 | */ 58 | private static long leftShift(long val, int shift) { 59 | return (val << shift) & MAX_VALUE; 60 | } 61 | 62 | /** 63 | * Convert 4 bytes from the buffer at offset into a long value. 64 | */ 65 | private static long fourByteToLong(byte[] bytes, int offset) { 66 | return (byteToLong(bytes[offset + 0]) 67 | + (byteToLong(bytes[offset + 1]) << 8) 68 | + (byteToLong(bytes[offset + 2]) << 16) 69 | + (byteToLong(bytes[offset + 3]) << 24)); 70 | } 71 | 72 | /** 73 | * Mix up the values in the hash function. 74 | */ 75 | private static Triple hashMix(Triple t) { 76 | long a = t.a; long b = t.b; long c = t.c; 77 | a = subtract(a, b); a = subtract(a, c); a = xor(a, c >> 13); 78 | b = subtract(b, c); b = subtract(b, a); b = xor(b, leftShift(a, 8)); 79 | c = subtract(c, a); c = subtract(c, b); c = xor(c, (b >> 13)); 80 | a = subtract(a, b); a = subtract(a, c); a = xor(a, (c >> 12)); 81 | b = subtract(b, c); b = subtract(b, a); b = xor(b, leftShift(a, 16)); 82 | c = subtract(c, a); c = subtract(c, b); c = xor(c, (b >> 5)); 83 | a = subtract(a, b); a = subtract(a, c); a = xor(a, (c >> 3)); 84 | b = subtract(b, c); b = subtract(b, a); b = xor(b, leftShift(a, 10)); 85 | c = subtract(c, a); c = subtract(c, b); c = xor(c, (b >> 15)); 86 | return new Triple(a, b, c); 87 | } 88 | 89 | private static class Triple { 90 | long a; 91 | long b; 92 | long c; 93 | 94 | public Triple(long a, long b, long c) { 95 | this.a = a; this.b = b; this.c = c; 96 | } 97 | } 98 | 99 | public long hash(long a) { 100 | long hash = xor(CRUSH_HASH_SEED, a); 101 | long b = a; 102 | long x = 231232L; 103 | long y = 1232L; 104 | Triple val = hashMix(new Triple(b, x, hash)); 105 | b = val.a; x = val.b; hash = val.c; 106 | val = hashMix(new Triple(y, a, hash)); 107 | hash = val.c; 108 | return hash; 109 | } 110 | 111 | public long hash(long a, long b) { 112 | long hash = xor(xor(CRUSH_HASH_SEED, a), b); 113 | long x = 231232L; 114 | long y = 1232L; 115 | Triple val = hashMix(new Triple(a, b, hash)); 116 | a = val.a; b = val.b; hash = val.c; 117 | val = hashMix(new Triple(x, a, hash)); 118 | x = val.a; a = val.b; hash = val.c; 119 | val = hashMix(new Triple(b, y, hash)); 120 | hash = val.c; 121 | return hash; 122 | } 123 | 124 | public long hash(long a, long b, long c) { 125 | long hash = xor(xor(xor(CRUSH_HASH_SEED, a), b), c); 126 | long x = 231232L; 127 | long y = 1232L; 128 | Triple val = hashMix(new Triple(a, b, hash)); 129 | a = val.a; b = val.b; hash = val.c; 130 | val = hashMix(new Triple(c, x, hash)); 131 | c = val.a; x = val.b; hash = val.c; 132 | val = hashMix(new Triple(y, a, hash)); 133 | y = val.a; a = val.b; hash = val.c; 134 | val = hashMix(new Triple(b, x, hash)); 135 | b = val.a; x = val.b; hash = val.c; 136 | val = hashMix(new Triple(y, c, hash)); 137 | hash = val.c; 138 | return hash; 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/MappingDiff.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Collections; 20 | import java.util.HashMap; 21 | import java.util.HashSet; 22 | import java.util.List; 23 | import java.util.Map; 24 | import java.util.Set; 25 | 26 | /** 27 | * Utility class that computes the diffs between two mappings. The diff is based on the 28 | * equals contract. It also indicates whether the particular change is addition or 29 | * removal. 30 | */ 31 | public class MappingDiff { 32 | /** 33 | * Returns the difference between the two mappings. It only contains the keys with which there are 34 | * any differences. The value lists are neither null nor empty. There is no particular ordering in 35 | * the value returned, so one should not rely on the ordering of values. 36 | */ 37 | public static Map>> calculateDiff(Map> before, 38 | Map> after) { 39 | Map>> result = new HashMap>>(); 40 | // iterate over m1 and compute the diff first 41 | for (K key: before.keySet()) { 42 | List l1 = before.get(key); 43 | List l2 = after.get(key); 44 | List> diff = calculateDiff(l1, l2); 45 | if (!diff.isEmpty()) { 46 | result.put(key, diff); 47 | } 48 | } 49 | // see if there is any key that is mapped in m2 but not in m1 50 | Set m2Keys = new HashSet(after.keySet()); 51 | m2Keys.removeAll(before.keySet()); 52 | for (K key: m2Keys) { 53 | // this is purely difference 54 | List list = after.get(key); 55 | if (!list.isEmpty()) { 56 | result.put(key, wrapList(list, Difference.ADDED)); 57 | } 58 | } 59 | return result; 60 | } 61 | 62 | /** 63 | * Returns the list that contains that have changed between before and after. If either is null, 64 | * the other list is returned. If both are null, an empty list is returned. 65 | */ 66 | private static List> calculateDiff(List before, List after) { 67 | if (before == null && after == null) { 68 | return Collections.emptyList(); 69 | } 70 | if (before == null) { 71 | return wrapList(after, Difference.ADDED); 72 | } 73 | if (after == null) { 74 | return wrapList(before, Difference.REMOVED); 75 | } 76 | // neither list is null 77 | List> result = new ArrayList>(); 78 | for (V v: before) { 79 | if (!after.contains(v)) { 80 | result.add(new Value(v, Difference.REMOVED)); 81 | } 82 | } 83 | for (V v: after) { 84 | if (!before.contains(v)) { 85 | result.add(new Value(v, Difference.ADDED)); 86 | } 87 | } 88 | return result; 89 | } 90 | 91 | private static List> wrapList(List list, Difference diff) { 92 | List> result = new ArrayList>(); 93 | for (V v: list) { 94 | result.add(new Value(v, diff)); 95 | } 96 | return result; 97 | } 98 | 99 | public static class Value { 100 | private final V value; 101 | private final Difference diff; 102 | 103 | public Value(V value, Difference diff) { 104 | this.value = value; 105 | this.diff = diff; 106 | } 107 | 108 | public V get() { 109 | return value; 110 | } 111 | 112 | public Difference getDifferenceType() { 113 | return diff; 114 | } 115 | } 116 | 117 | public enum Difference { ADDED, REMOVED } 118 | } 119 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/MappingEvaluator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.List; 19 | import java.util.Map; 20 | 21 | public class MappingEvaluator { 22 | 23 | public static double getMean(List distribution) { 24 | double sum = 0; 25 | for(double a: distribution) 26 | sum += a; 27 | 28 | return sum/distribution.size(); 29 | } 30 | 31 | public static double getStandardDeviation(List distribution) 32 | { 33 | double mean = getMean(distribution); 34 | 35 | double temp = 0; 36 | for(double a: distribution) 37 | temp += (mean-a)*(mean-a); 38 | 39 | return Math.sqrt(temp/distribution.size()); 40 | } 41 | 42 | public static double getWeightedMean(Map distribution, Map weight) { 43 | assert(distribution.size() == weight.size()); 44 | long sum1 = 0; 45 | long sum2 = 0; 46 | 47 | for (String node : distribution.keySet()) { 48 | sum1 += distribution.get(node) * weight.get(node); 49 | sum2 += weight.get(node); 50 | } 51 | 52 | return sum1/sum2; 53 | } 54 | 55 | public static double getWeightedStandardDeviation(Map distribution, Map weight){ 56 | assert(distribution.size() == weight.size()); 57 | double mean = getWeightedMean(distribution, weight); 58 | double sum1 = 0; 59 | double sum2 = 0; 60 | int m = 0; 61 | 62 | for (String node : distribution.keySet()) { 63 | sum1 += weight.get(node) * Math.pow(distribution.get(node) - mean, 2); 64 | } 65 | 66 | for (String node : weight.keySet()) { 67 | sum2 += weight.get(node); 68 | if (weight.get(node) != 0) m++; 69 | } 70 | 71 | return Math.sqrt(sum1/((m - 1)*sum2/m)); 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/MappingException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Exception that signifies failure to generate a mapping, given the provided input. This is a 20 | * pretty severe exception. 21 | */ 22 | public class MappingException extends RuntimeException { 23 | private static final long serialVersionUID = 2099244230202336093L; 24 | 25 | public MappingException(String msg) { 26 | super(msg); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/MappingFunction.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.List; 19 | import java.util.Map; 20 | 21 | /** 22 | * Interface that defines a mapping function. 23 | */ 24 | public interface MappingFunction { 25 | Map> computeMapping(List data, Node topology); 26 | } 27 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/MessageDigestHash.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.nio.ByteBuffer; 19 | import java.security.MessageDigest; 20 | import java.security.NoSuchAlgorithmException; 21 | 22 | public class MessageDigestHash implements MultiInputHash { 23 | private final String algorithm; 24 | private final MessageDigest md; 25 | 26 | public MessageDigestHash(String algorithm) { 27 | this.algorithm = algorithm; 28 | try { 29 | md = MessageDigest.getInstance(algorithm); 30 | } catch (NoSuchAlgorithmException e) { 31 | throw new IllegalArgumentException("invalid algorithm passed in", e); 32 | } 33 | } 34 | 35 | public String getAlgorithm() { 36 | return algorithm; 37 | } 38 | 39 | public long hash(long a) { 40 | ByteBuffer buf = ByteBuffer.allocate(8).putLong(a); 41 | return hashFromBuffer(buf); 42 | } 43 | 44 | public long hash(long a, long b) { 45 | ByteBuffer buf = ByteBuffer.allocate(8*2). 46 | putLong(a).putLong(b); 47 | return hashFromBuffer(buf); 48 | } 49 | 50 | public long hash(long a, long b, long c) { 51 | ByteBuffer buf = ByteBuffer.allocate(8*3). 52 | putLong(a).putLong(b).putLong(c); 53 | return hashFromBuffer(buf); 54 | } 55 | 56 | private long hashFromBuffer(ByteBuffer buf) { 57 | byte[] result = md.digest(buf.array()); 58 | return Utils.bstrTo32bit(result); 59 | } 60 | } 61 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/MultiInputHash.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | public interface MultiInputHash { 19 | long hash(long a); 20 | long hash(long a, long b); 21 | long hash(long a, long b, long c); 22 | } 23 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/NoOpAssignmentTracker.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Implementation of the assignment tracker that simply does not track. It is used when assignment 20 | * tracking is disabled (i.e. target balance is not used). 21 | */ 22 | class NoOpAssignmentTracker implements AssignmentTracker { 23 | /** 24 | * No tracking. 25 | */ 26 | public boolean trackAssignment(Node node) { 27 | return false; 28 | } 29 | 30 | /** 31 | * No rejection. 32 | */ 33 | public boolean rejectAssignment(Node node) { 34 | return false; 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/Node.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.List; 20 | 21 | public class Node implements Comparable { 22 | public enum Selection { STRAW, CONSISTENT_HASHING } 23 | 24 | private String name; 25 | private int type; 26 | private long id; 27 | private long weight; 28 | private Selection selection; 29 | 30 | private List children; 31 | private Node parent; 32 | 33 | private Selector selector; 34 | 35 | private boolean failed; 36 | 37 | public Node() {} 38 | 39 | public Node(Node node) { 40 | this.name = node.name; 41 | this.type = node.type; 42 | this.id = node.id; 43 | this.weight = node.weight; 44 | this.selection = node.selection; 45 | this.failed = node.failed; 46 | } 47 | 48 | public String getName() { 49 | return name; 50 | } 51 | 52 | public void setName(String name) { 53 | this.name = name; 54 | } 55 | 56 | public int getType() { 57 | return type; 58 | } 59 | 60 | public void setType(int type) { 61 | this.type = type; 62 | } 63 | 64 | public long getId() { 65 | return id; 66 | } 67 | 68 | public void setId(long id) { 69 | this.id = id; 70 | } 71 | 72 | public long getWeight() { 73 | return weight; 74 | } 75 | 76 | public void setWeight(long weight) { 77 | this.weight = weight; 78 | } 79 | 80 | public Selection getSelection() { 81 | return selection; 82 | } 83 | 84 | public void setSelection(Selection selection) { 85 | this.selection = selection; 86 | } 87 | 88 | 89 | public boolean isFailed() { 90 | return failed; 91 | } 92 | 93 | public void setFailed(boolean failed) { 94 | if (!isLeaf()) { 95 | throw new UnsupportedOperationException("you cannot set failed on a non-leaf!"); 96 | } 97 | this.failed = failed; 98 | } 99 | 100 | public List getChildren() { 101 | return children; 102 | } 103 | 104 | public void setChildren(List children) { 105 | this.children = children; 106 | } 107 | 108 | public boolean isLeaf() { 109 | return children == null || children.isEmpty(); 110 | } 111 | 112 | public Node getParent() { 113 | return parent; 114 | } 115 | 116 | public void setParent(Node parent) { 117 | this.parent = parent; 118 | } 119 | 120 | public Selector getSelector() { 121 | return selector; 122 | } 123 | 124 | public void setSelector(Selector selector) { 125 | this.selector = selector; 126 | } 127 | 128 | /** 129 | * Uses the selection algorithm that is assigned to the node and return the selected node. 130 | */ 131 | public Node select(long input, long round) { 132 | return selector.select(input, round); 133 | } 134 | 135 | /** 136 | * Returns all leaf nodes that belong in the tree. Returns itself if this node is a leaf. As with 137 | * most other methods in this class, the nodes are added via depth-first traversal. 138 | */ 139 | public List getAllLeafNodes() { 140 | // TODO optimize for performance (cache) 141 | List nodes = new ArrayList(); 142 | if (isLeaf()) { 143 | nodes.add(this); 144 | } else { 145 | for (Node child: children) { 146 | nodes.addAll(child.getAllLeafNodes()); 147 | } 148 | } 149 | return nodes; 150 | } 151 | 152 | /** 153 | * Returns all child nodes that match the type. Returns itself if this node matches it. If no 154 | * child matches the type, an empty list is returned. 155 | */ 156 | public List findChildren(int type) { 157 | List nodes = new ArrayList(); 158 | if (this.type == type) { 159 | nodes.add(this); 160 | } else if (!isLeaf()) { 161 | for (Node child: children) { 162 | nodes.addAll(child.findChildren(type)); 163 | } 164 | } 165 | return nodes; 166 | } 167 | 168 | /** 169 | * Returns the number of all child nodes that match the type. Returns 1 if this node matches it. 170 | * Returns 0 if no child matches the type. 171 | */ 172 | public int getChildrenCount(int type) { 173 | int count = 0; 174 | if (this.type == type) { 175 | count++; 176 | } else if (!isLeaf()) { 177 | for (Node child: children) { 178 | count += child.getChildrenCount(type); 179 | } 180 | } 181 | return count; 182 | } 183 | 184 | /** 185 | * Finds a parent that matches the given type. If the node itself matches it, it is returned. If 186 | * there is no matching parent in the hierarchy, null is returned. 187 | */ 188 | public Node findParent(int type) { 189 | Node node = this; 190 | while (node != null) { 191 | if (node.type == type) { 192 | return node; 193 | } 194 | node = node.parent; // keep walking up the tree 195 | } 196 | return null; // no match was found 197 | } 198 | 199 | /** 200 | * Returns the top-most ("root") node from this node. If this node itself does not have a parent, 201 | * returns itself. 202 | */ 203 | public Node getRoot() { 204 | Node node = this; 205 | while (node.parent != null) { 206 | node = node.parent; 207 | } 208 | return node; 209 | } 210 | 211 | @Override 212 | public String toString() { 213 | return name + ":" + id; 214 | } 215 | 216 | @Override 217 | public int hashCode() { 218 | return name.hashCode(); 219 | } 220 | 221 | @Override 222 | public boolean equals(Object obj) { 223 | if (obj == this) { 224 | return true; 225 | } 226 | if (!(obj instanceof Node)) { 227 | return false; 228 | } 229 | Node that = (Node)obj; 230 | return name.equals(that.name); 231 | } 232 | 233 | public int compareTo(Node o) { 234 | return name.compareTo(o.name); 235 | } 236 | } 237 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/NodeFilter.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | public interface NodeFilter { 19 | boolean accept(Node node); 20 | } 21 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/PlacementAlgorithm.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.List; 19 | 20 | import com.google.common.base.Predicate; 21 | 22 | /** 23 | * Encapsulation of the algorithm that selects a number of child nodes in the topology to place the 24 | * data based on the data input as well as the node properties such as the selection algorithm, 25 | * the weight, and the type. It is orthogonal to the placement rules, and is used as a building 26 | * block operations to create placement rules. 27 | */ 28 | public interface PlacementAlgorithm { 29 | /** 30 | * Returns a list of nodes of the desired type. If the count is more than the number of available 31 | * nodes, an exception is thrown. 32 | * 33 | * @return a list of nodes 34 | */ 35 | List select(Node parent, long input, int count, int type); 36 | 37 | /** 38 | * Returns a list of nodes that have the matching type and pass the predicate. If the count is 39 | * more than the number of available nodes, an exception is thrown. 40 | * 41 | * @return a list of nodes 42 | */ 43 | List select(Node parent, long input, int count, int type, Predicate pred); 44 | } 45 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/PlacementRules.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.List; 19 | 20 | /** 21 | * A way to express the placement rules for the crunch/CRUSH mapping. Placement rules are often 22 | * combined with a more specific topology (i.e. type definitions). In general, it should only 23 | * express the prescription of how a number of end nodes should be selected, and should not rely on 24 | * specific data, the top node from which the selection begins, or the placement algorithm. 25 | */ 26 | public interface PlacementRules { 27 | /** 28 | * Describes how a number of end nodes should be selected from the top node. 29 | */ 30 | List select(Node topNode, long data, int n, PlacementAlgorithm pa); 31 | /** 32 | * Returns the types values that the placement rules use. 33 | */ 34 | int getEndNodeType(); 35 | /** 36 | * Given a node, returns whether the replica end node is acceptable. It must be consistent with 37 | * the selection prescribed in the select methods. 38 | */ 39 | boolean acceptReplica(Node primary, Node replica); 40 | } 41 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/RDFCRUSHMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.*; 19 | 20 | public class RDFCRUSHMapping { 21 | private final int rf; 22 | private final PlacementRules rules; 23 | private final double targetBalance; 24 | 25 | private final Crunch cruncher = new Crunch(); 26 | 27 | public RDFCRUSHMapping(int rf, PlacementRules rules, double targetBalance) { 28 | if (rf < 1) { 29 | throw new IllegalArgumentException("RF must be positive"); 30 | } 31 | this.rf = rf; 32 | this.rules = rules; 33 | this.targetBalance = targetBalance; 34 | } 35 | 36 | /** 37 | * Given the list of data objects (as expressed as long values) and the processed topology, 38 | * returns the mapping from data objects to lists of end nodes onto which the data may be stored. 39 | */ 40 | public Map> createMapping(List data, Node crunchedRoot, Map> rdfMap) { 41 | // sort the data to ensure data is used in the same order 42 | List sorted = new ArrayList(data); 43 | Collections.sort(sorted); 44 | 45 | Map> map = new HashMap>(sorted.size()); 46 | // performance optimization 47 | // we create mini-trees to select the replicas; instead of creating the mini-trees every time, 48 | // we cache the result 49 | Map miniTreeCache = new HashMap(); 50 | List datacenters = crunchedRoot.findChildren(Types.DATA_CENTER); 51 | // iterate on all datacenters 52 | for (Node datacenter: datacenters) { 53 | AssignmentTracker tracker = AssignmentTrackerFactory.create(datacenter, rf*sorted.size(), targetBalance); 54 | PlacementAlgorithm pa = new CRUSHPlacementAlgorithm(tracker); 55 | 56 | for (Long l: sorted) { // ~ N 57 | List selected = pickNodes(l, datacenter, pa, rdfMap, miniTreeCache); 58 | List nodes = map.get(l); 59 | if (nodes == null) { 60 | nodes = new ArrayList(rf*datacenters.size()); 61 | map.put(l, nodes); 62 | } 63 | nodes.addAll(selected); 64 | } 65 | } 66 | return map; 67 | } 68 | 69 | private List pickNodes(long data, Node datacenter, PlacementAlgorithm pa, 70 | Map> rdfMap, Map miniTreeCache) { 71 | List nodes = new ArrayList(rf); 72 | // get the primary node 73 | Node primary = pa.select(datacenter, data, 1, rules.getEndNodeType()).get(0); 74 | nodes.add(primary); 75 | 76 | // obtain the "mini-tree" 77 | Node miniTree = miniTreeCache.get(primary); 78 | if (miniTree == null) { 79 | // we haven't seen this primary yet 80 | // get the (RF-1) secondary nodes 81 | List secondaries = rdfMap.get(primary); 82 | // construct the "mini-tree" 83 | miniTree = makeMiniTree(secondaries); 84 | miniTreeCache.put(primary, miniTree); 85 | } 86 | // select (RF-1) nodes from the mini-tree using the placement rules 87 | List selected = rules.select(miniTree, data, rf-1, pa); 88 | nodes.addAll(selected); 89 | return nodes; 90 | } 91 | 92 | private Node makeMiniTree(List nodes) { 93 | // this is used to look up parents nodes that are already created 94 | Map lookup = new HashMap(); 95 | Node root = null; 96 | for (Node node: nodes) { // ~ RDF 97 | // create a copy for this purpose 98 | Node newNode = new Node(node); 99 | root = handleParent(node, newNode, lookup); 100 | } 101 | 102 | // crunch 103 | crunchNode(root); 104 | return root; 105 | } 106 | 107 | /** 108 | * Recursively handles all the parents. Returns the root node as a result. 109 | */ 110 | private Node handleParent(Node node, Node newNode, Map lookup) { 111 | Node parent = node.getParent(); 112 | if (parent == null) { 113 | // root node: return it 114 | return newNode; 115 | } 116 | 117 | // process the parent 118 | Node newParent = lookup.get(parent.getId()); 119 | if (newParent != null) { // it is already mapped 120 | // set the relationship 121 | setRelationship(newNode, newParent); 122 | // we do not need to walk further because it is already processed 123 | // simply return the root 124 | return newParent.getRoot(); 125 | } else { 126 | // this is the first time we are seeing this node: we need to walk up the tree 127 | // create a copy 128 | newParent = new Node(parent); 129 | // add it to the lookup map 130 | lookup.put(newParent.getId(), newParent); 131 | // set the relationship 132 | setRelationship(newNode, newParent); 133 | // recurse for its parent 134 | return handleParent(parent, newParent, lookup); 135 | } 136 | } 137 | 138 | private void setRelationship(Node newNode, Node newParent) { 139 | newNode.setParent(newParent); 140 | List childList = newParent.getChildren(); 141 | if (childList == null) { 142 | childList = new ArrayList(); 143 | newParent.setChildren(childList); 144 | } 145 | childList.add(newNode); 146 | } 147 | 148 | private void crunchNode(Node root) { 149 | cruncher.recrunch(root); 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/RDFMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Collections; 20 | import java.util.HashMap; 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | import org.slf4j.Logger; 25 | import org.slf4j.LoggerFactory; 26 | 27 | /** 28 | * The mapping function based on implementing RDF (replica distribution factor), RF (replication 29 | * factor), and a multi-level topology and placement rules. 30 | *
31 | * Both parameters are interpreted as per-datacenter; i.e. if RF = 2, you will have two replicas per 32 | * datacenter. 33 | */ 34 | 35 | public class RDFMapping implements MappingFunction { 36 | private static final Logger logger = LoggerFactory.getLogger(RDFMapping.class); 37 | 38 | private final int rdf; 39 | private final int rf; 40 | private final PlacementRules rules; 41 | 42 | private final boolean bidirectional; 43 | private final boolean handleOverload; 44 | private final double targetBalance; 45 | 46 | private final Crunch cruncher = new Crunch(); 47 | 48 | private Map> rdfMap; 49 | 50 | public RDFMapping(int rdf, int rf, PlacementRules rules) { 51 | this(rdf, rf, rules, false, false, 0.0d); // bi-di and overload handling are off by default 52 | } 53 | 54 | public RDFMapping(int rdf, int rf, PlacementRules rules, double targetBalance) { 55 | this(rdf, rf, rules, false, false, targetBalance); 56 | } 57 | 58 | public RDFMapping(int rdf, int rf, PlacementRules rules, boolean bidirectional) { 59 | this(rdf, rf, rules, bidirectional, false, 0.0d); // overload handling is off by default 60 | } 61 | 62 | private RDFMapping(int rdf, int rf, PlacementRules rules, boolean bidirectional, 63 | boolean handleOverload, double targetBalance) { 64 | if (rf < 1) { 65 | throw new IllegalArgumentException("RF must be positive"); 66 | } 67 | if (rdf < rf) { 68 | throw new IllegalArgumentException("RDF must be equal to or greater than RF"); 69 | } 70 | this.rdf = rdf; 71 | this.rf = rf; 72 | this.rules = rules; 73 | this.bidirectional = bidirectional; 74 | this.handleOverload = handleOverload; 75 | this.targetBalance = targetBalance; 76 | } 77 | 78 | /** 79 | * Given the topology and the list of data as represented by long values, and the placement rules 80 | * specified by the RDF mapping object, produces the mapping from data to list of nodes. 81 | */ 82 | public Map> computeMapping(List data, Node topology) { 83 | Node crunched = cruncher.makeCrunch(topology); 84 | long begin = System.nanoTime(); 85 | rdfMap = createRDFMapping(crunched); 86 | long end = System.nanoTime(); 87 | logger.info("time taken to create the RDF mapping: {} ms", (end-begin)/1000000L); 88 | begin = System.nanoTime(); 89 | RDFCRUSHMapping crushMapping = new RDFCRUSHMapping(rf, rules, targetBalance); 90 | Map> map = crushMapping.createMapping(data, crunched, rdfMap); 91 | end = System.nanoTime(); 92 | logger.info("time taken to create mapping: {} ms", (end-begin)/1000000L); 93 | return map; 94 | } 95 | 96 | /** 97 | * Given the processed topology, returns the mapping from end nodes to lists of secondary end 98 | * nodes that are allowed to store the replicas. This mapping uses the same CRUSH algorithm as the 99 | * basic placement algorithm. 100 | */ 101 | public Map> createRDFMapping(Node crunchedRoot) { 102 | // iterate on all datacenters 103 | List datacenters = crunchedRoot.findChildren(Types.DATA_CENTER); 104 | Map> map = new HashMap>(crunchedRoot.getAllLeafNodes().size()); 105 | for (Node datacenter: datacenters) { 106 | createRDFMappingPerDC(datacenter, map); 107 | } 108 | return map; 109 | } 110 | 111 | public Map> getNewRdfMap() { 112 | Map> map = new HashMap>(); 113 | 114 | for (Map.Entry> entry: rdfMap.entrySet()) { 115 | List nodeList = new ArrayList(); 116 | for (Node node: entry.getValue()) { 117 | nodeList.add(node.getName()); 118 | } 119 | map.put(entry.getKey().getName(), nodeList); 120 | } 121 | 122 | return map; 123 | } 124 | 125 | private Map> createRDFMappingPerDC(Node datacenter, Map> map) { 126 | final List allLeaves = datacenter.getAllLeafNodes(); 127 | final int endNodeSize = allLeaves.size(); 128 | final int totalMapping = endNodeSize*(rdf-1); 129 | 130 | // use a placement algorithm object for this run and keep track of successive rounds 131 | PlacementAlgorithm pa = new CRUSHPlacementAlgorithm(true); 132 | 133 | // create the quota so we avoid overloading nodes 134 | Map quota = handleOverload ? createQuota(allLeaves) : null; 135 | int mapped = 0; 136 | 137 | while (mapped < totalMapping) { 138 | for (Node primary: allLeaves) { // <~ n 139 | List secondaries = map.get(primary); 140 | if (secondaries == null) { 141 | secondaries = new ArrayList(rdf-1); 142 | map.put(primary, secondaries); 143 | } 144 | // if it is already filled, we don't need to look at it 145 | if (secondaries.size() < rdf-1) { 146 | // CRUSH selection of nodes using the primary's id 147 | Node selected = pa.select(datacenter, primary.getId(), 1, rules.getEndNodeType()).get(0); 148 | 149 | if (handleOverload) { 150 | // pass through a number of filters to reject the selection 151 | int currentQuota = quota.get(selected); 152 | if (currentQuota == 0) { // we have used all the quota for this node 153 | logger.trace("rejecting {} because it is fully committed.", selected); 154 | continue; 155 | } 156 | } 157 | // first run it through placement rules' acceptance 158 | if (!rules.acceptReplica(primary, selected)) { 159 | // reject and move onto the next primary 160 | logger.trace("rejecting {} for {} from placement rules: we're at {} %", 161 | new Object[] {selected, primary, ((float)mapped)*100/totalMapping}); 162 | continue; 163 | } 164 | 165 | // for bi-di, we need to reject the mapping if the secondary is full too 166 | if (bidirectional) { 167 | List other = map.get(selected); 168 | if (other == null) { 169 | other = new ArrayList(rdf-1); 170 | map.put(selected, other); 171 | } else if (other.size() >= rdf-1) { 172 | // reject and move onto the next primary 173 | logger.trace("rejecting {} for {} because secondary is fully mapped already.", 174 | selected, primary); 175 | continue; 176 | } 177 | // make sure it's not selected already 178 | if (secondaries.contains(selected) || other.contains(primary)) { // these are really one and the same 179 | logger.trace("secondary {} is already mapped for {}", selected, primary); 180 | continue; 181 | } 182 | logger.trace("accepting {} for {}", selected, primary); 183 | // accept the node pair 184 | secondaries.add(selected); 185 | other.add(primary); 186 | mapped += 2; 187 | if (handleOverload) { 188 | // make sure to decrement the quota 189 | decrementQuota(selected, quota); 190 | decrementQuota(primary, quota); 191 | } 192 | } else { // uni-directional 193 | // make sure it's not selected already 194 | if (secondaries.contains(selected)) { 195 | logger.trace("secondary {} is already mapped for {}", selected, primary); 196 | continue; 197 | } 198 | logger.trace("accepting {} for {}", selected, primary); 199 | secondaries.add(selected); 200 | mapped++; 201 | if (handleOverload) { 202 | // make sure to decrement the quota 203 | decrementQuota(selected, quota); 204 | } 205 | } 206 | } 207 | } 208 | } 209 | return map; 210 | } 211 | 212 | private Map createQuota(List nodes) { 213 | Map quota = new HashMap(); 214 | final int headroom = 1; 215 | final int size = nodes.size(); 216 | long totalWeight = 0; 217 | for (Node node: nodes) { 218 | totalWeight += node.getWeight(); 219 | } 220 | for (Node node: nodes) { 221 | int value = (int)(node.getWeight()*(rdf-1)*size/totalWeight) + headroom; 222 | // logger.debug("quota for {}: {}", node, value); 223 | quota.put(node, value); 224 | } 225 | return quota; 226 | } 227 | 228 | private void decrementQuota(Node node, Map quota) { 229 | int value = quota.get(node); 230 | quota.put(node, --value); 231 | } 232 | } 233 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/RackBasedTypes.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Type system that is based on racks. The first three types are defined as ROOT, DATA_CENTER, and 20 | * RACK. It can be extended to describe more layers of topologies. It is used by 21 | * {@link BaseRackIsolationPlacementRules}. 22 | * 23 | * @see BaseRackIsolationPlacementRules 24 | * 25 | */ 26 | public interface RackBasedTypes extends Types { 27 | int RACK = 2; 28 | } 29 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/RackIsolationPlacementRules.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | 19 | /** 20 | * Rack isolation placement rules based on storage system types with disks as end nodes. 21 | *
22 | * This is provided as a typical concrete implementation for certain types of topologies. One should 23 | * define their own types and extend the {@link BaskRackIsolationPlacementRules} to suit their 24 | * needs. 25 | * 26 | * @see StorageSystemTypes 27 | */ 28 | public class RackIsolationPlacementRules extends BaseRackIsolationPlacementRules 29 | implements StorageSystemTypes { 30 | public int getEndNodeType() { 31 | return DISK; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/RandomSelector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.List; 19 | import java.util.Random; 20 | 21 | /** 22 | * Implementation of deterministic-but-random selection: identical series of calls will result 23 | * in identical selections. This is strictly for test purposes, and should not be used for any 24 | * real selection. 25 | */ 26 | class RandomSelector implements Selector { 27 | private final Random rng = new Random(42); 28 | private final Node node; 29 | 30 | public RandomSelector(Node node) { 31 | if (node.isLeaf()) { 32 | throw new IllegalArgumentException("count is larger than the number of nodes!"); 33 | } 34 | this.node = node; 35 | } 36 | 37 | public Node select(long input, long round) { 38 | List children = node.getChildren(); 39 | final int length = children.size(); 40 | if (length == 1) { 41 | return children.get(0); 42 | } 43 | 44 | // compute the sum of weights 45 | int totalWeight = 0; 46 | for (Node n: children) { 47 | totalWeight += n.getWeight(); 48 | } 49 | // random number 50 | int draw = rng.nextInt(totalWeight); 51 | // pick a node based on the random number 52 | int sum = 0; 53 | for (Node n: children) { 54 | sum += n.getWeight(); 55 | if (draw < sum) { 56 | // have a match: make a copy 57 | return n; 58 | } 59 | } 60 | return null; 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/Selector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Object that encapsulates the algorithm (or "bucket type" in CRUSH terms) that, given a node, 20 | * selects from its immediate children. On instantiation, it may calculate certain properties and 21 | * attributes specific to the selection algorithm but independent of the data input, and maintain 22 | * that state. Those properties will be used as part of the selection input. 23 | */ 24 | public interface Selector { 25 | /** 26 | * Selects one node based on the input value and an additional round value. 27 | */ 28 | Node select(long input, long round); 29 | } 30 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/SimpleCRUSHMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Collections; 20 | import java.util.HashMap; 21 | import java.util.List; 22 | import java.util.Map; 23 | 24 | /** 25 | * Mapping function that computes a simple CRUSH mapping. By default, it accepts RF as the only 26 | * parameter to control the replication factor. 27 | */ 28 | public class SimpleCRUSHMapping implements MappingFunction { 29 | private final int rf; 30 | private final PlacementRules rules; 31 | private final double targetBalance; 32 | 33 | public SimpleCRUSHMapping(int rf, PlacementRules rules) { 34 | this(rf, rules, 0.0d); 35 | } 36 | 37 | public SimpleCRUSHMapping(int rf, PlacementRules rules, double targetBalance) { 38 | this.rf = rf; 39 | this.rules = rules; 40 | this.targetBalance = targetBalance; 41 | } 42 | 43 | public Map> computeMapping(List data, Node topology) { 44 | // sort the data to ensure data is used in the same order 45 | List sorted = new ArrayList(data); 46 | Collections.sort(sorted); 47 | 48 | Node crunch = new Crunch().makeCrunch(topology); 49 | Map> map = new HashMap>(sorted.size()); 50 | // iterate over datacenters 51 | List datacenters = crunch.findChildren(Types.DATA_CENTER); 52 | for (Node datacenter: datacenters) { 53 | AssignmentTracker tracker = 54 | AssignmentTrackerFactory.create(datacenter, rf*sorted.size(), targetBalance); 55 | PlacementAlgorithm pa = new CRUSHPlacementAlgorithm(tracker); 56 | 57 | for (Long l: sorted) { 58 | // apply the placement rules 59 | List selected = rules.select(datacenter, l, rf, pa); 60 | List nodes = map.get(l); 61 | if (nodes == null) { 62 | nodes = new ArrayList(rf*datacenters.size()); 63 | map.put(l, nodes); 64 | } 65 | nodes.addAll(selected); 66 | } 67 | } 68 | return map; 69 | } 70 | } 71 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/StorageSystemTypes.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | /** 19 | * Describes types for a fairly typical storage system cluster. 20 | * 21 | * @see RackIsolationPlacementRules 22 | */ 23 | public interface StorageSystemTypes extends RackBasedTypes { 24 | int SUB_RACK = 3; 25 | int STORAGE_NODE = 4; 26 | int DISK = 5; 27 | } 28 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/StrawSelector.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.util.ArrayList; 19 | import java.util.Collections; 20 | import java.util.Comparator; 21 | import java.util.HashMap; 22 | import java.util.List; 23 | import java.util.Map; 24 | 25 | /** 26 | * Selection algorithm based on the "straw" bucket type as described in the CRUSH algorithm. 27 | */ 28 | class StrawSelector implements Selector { 29 | private final Map straws = new HashMap(); 30 | private final MultiInputHash hashFunction; 31 | 32 | public StrawSelector(Node node) { 33 | if (!node.isLeaf()) { 34 | // create a map from the nodes to their values 35 | List sortedNodes = sortNodes(node.getChildren()); // do a reverse sort by weight 36 | 37 | int numLeft = sortedNodes.size(); 38 | float straw = 1.0f; 39 | float wbelow = 0.0f; 40 | float lastw = 0.0f; 41 | int i = 0; 42 | final int length = sortedNodes.size(); 43 | while (i < length) { 44 | Node current = sortedNodes.get(i); 45 | if (current.getWeight() == 0) { 46 | straws.put(current, 0L); 47 | i++; 48 | continue; 49 | } 50 | straws.put(current, (long)(straw*0x10000)); 51 | i++; 52 | if (i == length) { 53 | break; 54 | } 55 | 56 | current = sortedNodes.get(i); 57 | Node previous = sortedNodes.get(i-1); 58 | if (current.getWeight() == previous.getWeight()) { 59 | continue; 60 | } 61 | wbelow += (float)(previous.getWeight() - lastw)*numLeft; 62 | for (int j = i; j < length; j++) { 63 | if (sortedNodes.get(j).getWeight() == current.getWeight()) { 64 | numLeft--; 65 | } else { 66 | break; 67 | } 68 | } 69 | float wnext = (float)(numLeft * (current.getWeight() - previous.getWeight())); 70 | float pbelow = wbelow/(wbelow + wnext); 71 | straw *= Math.pow(1.0/pbelow, 1.0/numLeft); 72 | lastw = previous.getWeight(); 73 | } 74 | } 75 | hashFunction = new JenkinsHash(); 76 | } 77 | 78 | /** 79 | * Returns a new list that's sorted in the reverse order of the weight. 80 | */ 81 | private List sortNodes(List nodes) { 82 | List ret = new ArrayList(nodes); 83 | sortNodesInPlace(ret); 84 | return ret; 85 | } 86 | 87 | /** 88 | * Sorts the list in place in the reverse order of the weight. 89 | */ 90 | private void sortNodesInPlace(List nodes) { 91 | Collections.sort(nodes, new Comparator() { 92 | public int compare(Node n1, Node n2) { 93 | if (n2.getWeight() == n1.getWeight()) { 94 | return 0; 95 | } 96 | return (n2.getWeight() - n1.getWeight() > 0) ? 1 : -1; 97 | // sort by weight only in the reverse order 98 | } 99 | }); 100 | } 101 | 102 | public Node select(long input, long round) { 103 | Node selected = null; 104 | long hiScore = -1; 105 | for (Map.Entry e: straws.entrySet()) { 106 | Node child = e.getKey(); 107 | long straw = e.getValue(); 108 | long score = weightedScore(child, straw, input, round); 109 | if (score > hiScore) { 110 | selected = child; 111 | hiScore = score; 112 | } 113 | } 114 | if (selected == null) { 115 | throw new IllegalStateException(); 116 | } 117 | return selected; 118 | } 119 | 120 | private long weightedScore(Node child, long straw, long input, long round) { 121 | long hash = hashFunction.hash(input, child.getId(), round); 122 | hash = hash&0xffff; 123 | long weightedScore = hash*straw; 124 | return weightedScore; 125 | } 126 | } 127 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/Types.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | public interface Types { 19 | int ROOT = 0; 20 | int DATA_CENTER = 1; 21 | } 22 | -------------------------------------------------------------------------------- /src/main/java/com/twitter/crunch/Utils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | 19 | public class Utils { 20 | public static long bstrTo32bit(byte[] bstr) { 21 | if (bstr.length < 4) { 22 | throw new IllegalArgumentException("hashed is less than 4 bytes!"); 23 | } 24 | // need to "simulate" unsigned int 25 | return (long)(((ord(bstr[0]) << 24) 26 | | (ord(bstr[1]) << 16) 27 | | (ord(bstr[2]) << 8) 28 | | (ord(bstr[3])))) 29 | & 0xffffffffL; 30 | } 31 | 32 | private static int ord(byte b) { 33 | return b & 0xff; 34 | } 35 | } 36 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/AssignmentTrackerImplTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import static org.junit.Assert.assertFalse; 19 | import static org.junit.Assert.assertTrue; 20 | import static org.mockito.Mockito.mock; 21 | import static org.mockito.Mockito.when; 22 | 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | 26 | import org.junit.Test; 27 | 28 | public class AssignmentTrackerImplTest { 29 | @Test 30 | public void testLowWatermark() { 31 | final int childType = 3; 32 | Node child = mockChildNode(childType); 33 | Node root = mockRootNode(childType, child); 34 | AssignmentTracker tracker = 35 | new AssignmentTrackerImpl(root, (int)(AssignmentTrackerImpl.LOW_WATERMARK-1), 0.25d); 36 | assertFalse(tracker.trackAssignment(child)); 37 | } 38 | 39 | private Node mockRootNode(final int childType, Node child) { 40 | Node root = mock(Node.class); 41 | List children = new ArrayList(); 42 | children.add(child); 43 | when(root.findChildren(childType)).thenReturn(children); 44 | return root; 45 | } 46 | 47 | private Node mockChildNode(final int childType) { 48 | Node child = mock(Node.class); 49 | when(child.getType()).thenReturn(childType); 50 | when(child.getWeight()).thenReturn(65536L); 51 | when(child.isLeaf()).thenReturn(true); 52 | return child; 53 | } 54 | 55 | @Test 56 | public void testDifferenceThreshold() { 57 | final int childType = 3; 58 | Node child = mockChildNode(childType); 59 | Node root = mockRootNode(childType, child); 60 | final int dataCount = 100; 61 | final double maxAllowed = ((double)(AssignmentTrackerImpl.DIFFERENCE_THRESHOLD - 1))/dataCount; 62 | assertTrue(dataCount*maxAllowed < AssignmentTrackerImpl.DIFFERENCE_THRESHOLD); 63 | AssignmentTracker tracker = new AssignmentTrackerImpl(root, dataCount, maxAllowed); 64 | assertFalse(tracker.trackAssignment(child)); 65 | } 66 | 67 | @Test 68 | public void testRejectAssignment() { 69 | final int childType = 3; 70 | Node child = mockChildNode(childType); 71 | Node root = mockRootNode(childType, child); 72 | final int dataCount = 50; 73 | final double maxAllowed = 0.25d; 74 | final int max = (int)Math.ceil(dataCount*(1.0d + maxAllowed)); 75 | AssignmentTracker tracker = new AssignmentTrackerImpl(root, dataCount, maxAllowed); 76 | // no assignment: should not be rejected 77 | assertFalse(tracker.rejectAssignment(child)); 78 | 79 | // fill it up to max 80 | for (int i = 0; i < max; i++) { 81 | assertTrue(tracker.trackAssignment(child)); 82 | } 83 | assertTrue(tracker.rejectAssignment(child)); 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/BaseSelectionTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.lang.reflect.Constructor; 19 | import java.lang.reflect.InvocationTargetException; 20 | import java.security.MessageDigest; 21 | import java.security.NoSuchAlgorithmException; 22 | import java.util.ArrayList; 23 | import java.util.Collections; 24 | import java.util.HashMap; 25 | import java.util.List; 26 | import java.util.Map; 27 | 28 | import com.twitter.crunch.MappingDiff.Value; 29 | import com.twitter.crunch.Node.Selection; 30 | 31 | public class BaseSelectionTest { 32 | protected void doTestBasic(Class type) { 33 | Node rack = createTree(); 34 | Selector selector = createSelector(type, rack); 35 | long input = getHashFromString("some random key"); 36 | Node selected = selector.select(input, 1); 37 | System.out.println(selected.getName()); 38 | selected = selector.select(input, 2); 39 | System.out.println(selected.getName()); 40 | } 41 | 42 | protected void doTestBalance(Class type, final int tries) { 43 | Node rack = createTree(); 44 | Selector selector = createSelector(type, rack); 45 | Map nodeCounts = new HashMap(); 46 | for (int i = 0; i < tries; i++) { 47 | long input = getHashFromString("key" + i); 48 | Node selected = selector.select(input, 1); 49 | String node = selected.getName(); 50 | Integer old = nodeCounts.get(node); 51 | int value = (old == null) ? 1 : old.intValue()+1; 52 | nodeCounts.put(node, value); 53 | } 54 | 55 | for (Map.Entry e: nodeCounts.entrySet()) { 56 | System.out.println(e.getKey() + ": " + e.getValue()); 57 | } 58 | 59 | printDeviation(nodeCounts, tries); 60 | } 61 | 62 | private Node createTree() { 63 | Node rack = createNode("rack", StorageSystemTypes.RACK, 0, 0, null); 64 | List children = new ArrayList(); 65 | children.add(createNode("node1", StorageSystemTypes.DISK, 1, 100, null)); 66 | children.add(createNode("node2", StorageSystemTypes.DISK, 2, 50, null)); 67 | children.add(createNode("node3", StorageSystemTypes.DISK, 3, 100, null)); 68 | rack.setChildren(children); 69 | return rack; 70 | } 71 | 72 | private Node createNode(String name, int type, long id, long weight, Selection selection) { 73 | Node node = new Node(); 74 | node.setName(name); 75 | node.setType(type); 76 | node.setId(id); 77 | node.setWeight(weight); 78 | node.setSelection(selection); 79 | return node; 80 | } 81 | 82 | private Node createLargeTree() { 83 | Node rack = createNode("rack", StorageSystemTypes.RACK, 0, 0, null); 84 | List children = new ArrayList(); 85 | final int size = 1024; 86 | for (int i = 1; i <= size; i++) { 87 | children.add(createNode("node" + i, StorageSystemTypes.DISK, i, 100, null)); 88 | } 89 | rack.setChildren(children); 90 | return rack; 91 | } 92 | 93 | private long getHashFromString(String string) { 94 | MessageDigest md = null; 95 | try { 96 | md = MessageDigest.getInstance("MD5"); 97 | } catch (NoSuchAlgorithmException ignore) {} 98 | byte[] hash = md.digest(string.getBytes()); 99 | return Utils.bstrTo32bit(hash); 100 | } 101 | 102 | private Map getExpectedBalance(int tries) { 103 | Map map = new HashMap(); 104 | map.put("node1", (int)(tries*0.4)); 105 | map.put("node2", (int)(tries*0.2)); 106 | map.put("node3", (int)(tries*0.4)); 107 | return map; 108 | } 109 | 110 | private void printDeviation(Map actual, int tries) { 111 | // compute the deviation 112 | Map expected = getExpectedBalance(tries); 113 | double varianceSum = 0.0; 114 | int count = actual.size(); 115 | for (Map.Entry e: actual.entrySet()) { 116 | int expectedCount = expected.get(e.getKey()); 117 | int actualCount = e.getValue(); 118 | double diff = expectedCount - actualCount; 119 | varianceSum += diff*diff/expectedCount/expectedCount; 120 | } 121 | double deviation = Math.sqrt(varianceSum/count); 122 | System.out.println("relative deviation (%): " + deviation*100); 123 | } 124 | 125 | protected void doTestLargeTree(Class type) { 126 | Node rack = createLargeTree(); 127 | Selector selector = createSelector(type, rack); 128 | Map nodeCounts = new HashMap(); 129 | final int size = rack.getChildren().size(); 130 | final int tries = 1024*128; 131 | for (int i = 1; i <= tries; i++) { 132 | Node selected = selector.select((long)i, 1); 133 | String node = selected.getName(); 134 | Integer old = nodeCounts.get(node); 135 | int value = (old == null) ? 1 : old.intValue()+1; 136 | nodeCounts.put(node, value); 137 | } 138 | 139 | for (Map.Entry e: nodeCounts.entrySet()) { 140 | System.out.println(e.getKey() + ": " + e.getValue()); 141 | } 142 | 143 | // compute the deviation 144 | final int expectedCount = tries/size; 145 | double varianceSum = 0.0; 146 | int count = nodeCounts.size(); 147 | for (Integer actualCount: nodeCounts.values()) { 148 | double diff = expectedCount - actualCount; 149 | varianceSum += diff*diff/expectedCount/expectedCount; 150 | } 151 | double deviation = Math.sqrt(varianceSum/count); 152 | System.out.println("relative deviation (%): " + deviation*100); 153 | } 154 | 155 | private void analyzeDiff(Map> mapping1, 156 | Map> mapping2) { 157 | // compute the diff 158 | Map>> diff = MappingDiff.calculateDiff(mapping1, mapping2); 159 | System.out.println("number of data objects that moved: " + diff.size()); 160 | System.out.println("relative movement (%): " + ((float)diff.size())*100/mapping1.size()); 161 | } 162 | 163 | protected void doTestStability(Class type, boolean removal) { 164 | // first try: create the full tree 165 | Node rack = createLargeTree(); 166 | Selector selector = createSelector(type, rack); 167 | Map> mapping1 = new HashMap>(); 168 | final int tries = 1024*128; 169 | for (int i = 1; i <= tries; i++) { 170 | Node selected = selector.select((long)i, 1); 171 | String node = selected.getName(); 172 | mapping1.put(i, Collections.singletonList(node)); 173 | } 174 | 175 | // second try: remove a node 176 | List children = rack.getChildren(); 177 | if (removal) { 178 | children.remove(0); // remove the first node 179 | } else { // addition 180 | Node extra = createNode("node1000000", StorageSystemTypes.DISK, 1000000, 100, null); 181 | children.add(extra); // remove the first node 182 | } 183 | 184 | Selector selector2 = createSelector(type, rack); 185 | Map> mapping2 = new HashMap>(); 186 | for (int i = 1; i <= tries; i++) { 187 | Node selected = selector2.select((long)i, 1); 188 | String node = selected.getName(); 189 | mapping2.put(i, Collections.singletonList(node)); 190 | } 191 | 192 | analyzeDiff(mapping1, mapping2); 193 | } 194 | 195 | /** 196 | * We require a constructor with a single argument that takes the Node. 197 | */ 198 | private Selector createSelector(Class type, Node rack) { 199 | try { 200 | Constructor ctr = type.getConstructor(Node.class); 201 | return ctr.newInstance(rack); 202 | } catch (NoSuchMethodException e) { // should not occur 203 | throw new IllegalArgumentException(e); 204 | } catch (InstantiationException e) { // should not occur 205 | throw new IllegalArgumentException(e); 206 | } catch (InvocationTargetException e) { // should not occur 207 | throw new IllegalArgumentException(e); 208 | } catch (IllegalAccessException e) { // should not occur 209 | throw new IllegalArgumentException(e); 210 | } 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/ConsistentHashingSelectionTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import org.junit.Test; 19 | 20 | public class ConsistentHashingSelectionTest extends BaseSelectionTest { 21 | private final Class cls = ConsistentHashingSelector.class; 22 | 23 | @Test 24 | public void testLargeTree() { 25 | doTestLargeTree(cls); 26 | } 27 | 28 | @Test 29 | public void testStabilityOnRemoval() { 30 | doTestStability(cls, true); 31 | } 32 | 33 | @Test 34 | public void testStabilityOnAddition() { 35 | doTestStability(cls, false); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/CrunchTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertNotNull; 20 | import static org.junit.Assert.assertTrue; 21 | import static org.junit.Assert.fail; 22 | 23 | import java.util.Arrays; 24 | import java.util.List; 25 | 26 | import org.junit.Test; 27 | 28 | public class CrunchTest { 29 | @Test 30 | public void testMakeCrunch() { 31 | Node root = TestUtils.createSimpleTree(); 32 | Node crunched = new Crunch().makeCrunch(root); 33 | verifyNode(crunched); 34 | assertEquals(800, crunched.getWeight()); 35 | } 36 | 37 | private void verifyNode(Node node) { 38 | System.out.println(node + " => " + node.getWeight()); 39 | assertTrue(node.getWeight() > 0); 40 | if (!node.isLeaf()) { 41 | assertNotNull(node.getSelector()); 42 | for (Node child: node.getChildren()) { 43 | verifyNode(child); 44 | } 45 | } 46 | } 47 | 48 | /** 49 | * Creates a situation where some parent nodes have all of their children picked. The mapping 50 | * should still converge fast in this case. 51 | */ 52 | @Test 53 | public void testSmallTopology() { 54 | Node root = TestUtils.createSimpleTree(); 55 | 56 | PlacementRules rules = new PlacementRules() { 57 | public List select(Node topCNode, long data, int count, PlacementAlgorithm pa) { 58 | return pa.select(topCNode, data, count, getEndNodeType()); 59 | } 60 | 61 | public int getEndNodeType() { return StorageSystemTypes.DISK; } 62 | 63 | public boolean acceptReplica(Node primary, Node replica) { 64 | return true; 65 | } 66 | }; 67 | 68 | new SimpleCRUSHMapping(3, rules).computeMapping(Arrays.asList(3L), root); 69 | assertTrue(true); 70 | } 71 | 72 | @Test 73 | public void testNonRootCrunch() { 74 | Node root = TestUtils.createSimpleTree(); 75 | // override the type to cause the exception 76 | root.setType(StorageSystemTypes.STORAGE_NODE); 77 | try { 78 | new Crunch().makeCrunch(root); 79 | fail("we shouldn't reach this line"); 80 | } catch (IllegalArgumentException e) { 81 | assertTrue(true); 82 | } 83 | } 84 | 85 | @Test 86 | public void testRecrunch() { 87 | Node root = TestUtils.createSimpleTree(); 88 | Node crunched = new Crunch().makeCrunch(root); 89 | // now remove a node from the tree and recrunch 90 | Node rack = crunched.getChildren().get(1).getChildren().get(1); 91 | assertEquals(StorageSystemTypes.RACK, rack.getType()); 92 | Node hd = rack.getChildren().get(1); 93 | assertEquals(StorageSystemTypes.DISK, hd.getType()); 94 | List children = rack.getChildren(); 95 | children.remove(hd); 96 | // recrunch 97 | new Crunch().recrunch(crunched); 98 | verifyNode(crunched); 99 | assertEquals(700, crunched.getWeight()); 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/CrunchTestSuite.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import org.junit.runner.RunWith; 19 | import org.junit.runners.Suite; 20 | import org.junit.runners.Suite.SuiteClasses; 21 | 22 | import com.twitter.crunch.integrated.RDFBalanceTest; 23 | import com.twitter.crunch.integrated.RDFStabilityTest; 24 | 25 | /** 26 | * Test suite that exercises all crunch tests. 27 | */ 28 | @RunWith(Suite.class) 29 | @SuiteClasses({ 30 | NodeTest.class, 31 | MessageDigestHashTest.class, 32 | MappingDiffTest.class, 33 | StrawSelectionTest.class, 34 | ConsistentHashingSelectionTest.class, 35 | RandomSelectionTest.class, 36 | CrunchTest.class, 37 | AssignmentTrackerImplTest.class, 38 | RDFBalanceTest.class, 39 | RDFStabilityTest.class 40 | }) 41 | public class CrunchTestSuite {} -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/MappingDiffTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertNotNull; 20 | import static org.junit.Assert.assertSame; 21 | import static org.junit.Assert.fail; 22 | 23 | import java.util.Arrays; 24 | import java.util.Collections; 25 | import java.util.HashMap; 26 | import java.util.HashSet; 27 | import java.util.List; 28 | import java.util.Map; 29 | import java.util.Set; 30 | 31 | import org.junit.Test; 32 | 33 | import com.twitter.crunch.MappingDiff.Difference; 34 | import com.twitter.crunch.MappingDiff.Value; 35 | 36 | public class MappingDiffTest { 37 | 38 | @Test 39 | public void testSimpleDifferences() { 40 | Map> m1 = new HashMap>(); 41 | // 1 => [1, 2, 3] 42 | // 2 => [4, 5, 6] 43 | // 3 => [7, 8, 9] 44 | m1.put(1, Arrays.asList(1, 2, 3)); 45 | m1.put(2, Arrays.asList(4, 5, 6)); 46 | m1.put(3, Arrays.asList(7, 8, 9)); 47 | Map> m2 = new HashMap>(); 48 | // 1 => [1, 2, 4] 49 | // 2 => [3, 5, 6] 50 | // 3 => [7, 8, 9] 51 | m2.put(1, Arrays.asList(1, 2, 4)); 52 | m2.put(2, Arrays.asList(3, 5, 6)); 53 | m2.put(3, Arrays.asList(9, 7, 8)); 54 | // expect 55 | // 1 => [3, 4] 56 | // 2 => [3, 4] 57 | Map>> diff = MappingDiff.calculateDiff(m1, m2); 58 | assertEquals(2, diff.size()); 59 | Set keys = diff.keySet(); 60 | Set expectedKeys = new HashSet(); 61 | Collections.addAll(expectedKeys, 1, 2); 62 | assertEquals(expectedKeys, keys); 63 | List> d1 = diff.get(1); 64 | assertNotNull(d1); 65 | for (Value v: d1) { 66 | Difference type = v.getDifferenceType(); 67 | switch (v.get()) { 68 | case 3: 69 | assertSame(Difference.REMOVED, type); 70 | break; 71 | case 4: 72 | assertSame(Difference.ADDED, type); 73 | break; 74 | default: 75 | fail("we shouldn't be here!"); 76 | } 77 | } 78 | List> d2 = diff.get(2); 79 | assertNotNull(d2); 80 | for (Value v: d2) { 81 | Difference type = v.getDifferenceType(); 82 | switch (v.get()) { 83 | case 3: 84 | assertSame(Difference.ADDED, type); 85 | break; 86 | case 4: 87 | assertSame(Difference.REMOVED, type); 88 | break; 89 | default: 90 | fail("we shouldn't be here!"); 91 | } 92 | } 93 | } 94 | 95 | @Test 96 | public void testMoreDiff() { 97 | Map> m1 = new HashMap>(); 98 | // 1 => [1, 2, 3] 99 | // 2 => [4, 5, 6] 100 | m1.put(1, Arrays.asList(1, 2, 3)); 101 | m1.put(2, Arrays.asList(4, 5, 6)); 102 | Map> m2 = new HashMap>(); 103 | // 1 => [1, 2, 4] 104 | // 2 => [3, 5, 6] 105 | // 3 => [7, 8, 9] 106 | m2.put(1, Arrays.asList(1, 2, 3)); 107 | m2.put(2, Arrays.asList(4, 5, 6)); 108 | m2.put(3, Arrays.asList(9, 7, 8)); 109 | // expect 110 | // 3 => [7, 8, 9] 111 | Map>> diff = MappingDiff.calculateDiff(m1, m2); 112 | assertEquals(1, diff.size()); 113 | Set keys = diff.keySet(); 114 | Set expectedKeys = new HashSet(); 115 | expectedKeys.add(3); 116 | assertEquals(expectedKeys, keys); 117 | List> values = diff.get(3); 118 | assertNotNull(values); 119 | for (Value v: values) { 120 | Difference type = v.getDifferenceType(); 121 | switch (v.get()) { 122 | case 7: 123 | case 8: 124 | case 9: 125 | assertSame(Difference.ADDED, type); 126 | break; 127 | default: 128 | fail("we shouldn't be here!"); 129 | } 130 | } 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/MessageDigestHashTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import org.junit.Test; 19 | 20 | public class MessageDigestHashTest { 21 | @Test 22 | public void testHashLongLongLong() { 23 | long a = 1; 24 | long b = 2; 25 | long c = 3; 26 | MultiInputHash hf = new MessageDigestHash("SHA-1"); 27 | long val = hf.hash(a, b, c); 28 | System.out.println(val); 29 | a = 2; b = 2; c = 3; 30 | val = hf.hash(a, b, c); 31 | System.out.println(val); 32 | } 33 | 34 | } 35 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/NodeTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | import static org.junit.Assert.assertNull; 20 | import static org.junit.Assert.assertSame; 21 | import static org.junit.Assert.assertTrue; 22 | 23 | import java.util.ArrayList; 24 | import java.util.List; 25 | 26 | import org.junit.Test; 27 | 28 | import com.twitter.crunch.Node.Selection; 29 | 30 | public class NodeTest { 31 | @Test 32 | public void testCopyConstructor() { 33 | Node node = new Node(); 34 | // properties that should be equal 35 | final String foo = "foo"; 36 | final int id = 1234; 37 | final int type = StorageSystemTypes.RACK; 38 | final long weight = 100; 39 | final Selection selection = Selection.STRAW; 40 | 41 | node.setName(foo); 42 | node.setId(id); 43 | node.setType(type); 44 | node.setWeight(weight); 45 | node.setSelection(selection); 46 | 47 | // properties that should not be copied 48 | node.setChildren(new ArrayList()); 49 | node.setParent(new Node()); 50 | 51 | Node copy = new Node(node); 52 | assertEquals(node.getName(), copy.getName()); 53 | assertEquals(node.getId(), copy.getId()); 54 | assertEquals(node.getType(), copy.getType()); 55 | assertEquals(node.getWeight(), copy.getWeight()); 56 | assertEquals(node.getSelection(), copy.getSelection()); 57 | // ensure relationship is not copied 58 | assertNull(copy.getChildren()); 59 | assertNull(copy.getParent()); 60 | } 61 | 62 | @Test 63 | public void testIsLeaf() { 64 | Node node = new Node(); 65 | List children = new ArrayList(); 66 | children.add(new Node()); 67 | children.remove(0); 68 | node.setChildren(children); 69 | 70 | assertTrue(node.isLeaf()); 71 | } 72 | 73 | @Test 74 | public void testGetAllLeafNodes() { 75 | Node root = TestUtils.createSimpleTree(); 76 | List leaves = root.getAllLeafNodes(); 77 | assertEquals(8, leaves.size()); 78 | // test a leaf node itself 79 | Node leaf = leaves.get(0); 80 | List self = leaf.getAllLeafNodes(); 81 | assertEquals(1, self.size()); 82 | assertSame(leaf, self.get(0)); 83 | } 84 | 85 | @Test 86 | public void testFindChildren() { 87 | Node root = TestUtils.createSimpleTree(); 88 | List racks = root.findChildren(StorageSystemTypes.RACK); 89 | assertEquals(4, racks.size()); 90 | for (Node r: racks) { 91 | assertEquals(StorageSystemTypes.RACK, r.getType()); 92 | } 93 | // test the node itself 94 | Node rack = racks.get(0); 95 | List self = rack.findChildren(StorageSystemTypes.RACK); 96 | assertEquals(1, self.size()); 97 | assertSame(rack, self.get(0)); 98 | // test the no match 99 | List empty = rack.findChildren(Types.DATA_CENTER); 100 | assertTrue(empty.isEmpty()); 101 | } 102 | 103 | @Test 104 | public void testChildrenCount() { 105 | Node root = TestUtils.createSimpleTree(); 106 | int count = root.getChildrenCount(StorageSystemTypes.RACK); 107 | assertEquals(4, count); 108 | // test the node itself 109 | List racks = root.findChildren(StorageSystemTypes.RACK); 110 | Node rack = racks.get(0); 111 | count = rack.getChildrenCount(StorageSystemTypes.RACK); 112 | assertEquals(1, count); 113 | // test the no match 114 | count = rack.getChildrenCount(Types.DATA_CENTER); 115 | assertEquals(0, count); 116 | } 117 | 118 | @Test 119 | public void testFindParent() { 120 | Node root = TestUtils.createSimpleTree(); 121 | Node rack = root.findChildren(StorageSystemTypes.RACK).get(0); 122 | Node dc = rack.findParent(Types.DATA_CENTER); 123 | assertEquals(Types.DATA_CENTER, dc.getType()); 124 | Node self = rack.findParent(StorageSystemTypes.RACK); 125 | assertSame(rack, self); 126 | Node none = rack.findParent(StorageSystemTypes.STORAGE_NODE); 127 | assertNull(none); 128 | } 129 | 130 | @Test 131 | public void testGetRoot() { 132 | Node root = TestUtils.createSimpleTree(); 133 | Node hd = root.findChildren(StorageSystemTypes.DISK).get(0); 134 | Node ret = hd.getRoot(); 135 | assertSame(root, ret); 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/RandomSelectionTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import org.junit.Test; 19 | 20 | public class RandomSelectionTest extends BaseSelectionTest { 21 | private final Class cls = RandomSelector.class; 22 | 23 | @Test 24 | public void testLargeTree() { 25 | doTestLargeTree(cls); 26 | } 27 | 28 | @Test 29 | public void testStabilityOnRemoval() { 30 | doTestStability(cls, true); 31 | } 32 | 33 | @Test 34 | public void testStabilityOnAddition() { 35 | doTestStability(cls, false); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/StrawSelectionTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import org.junit.Test; 19 | 20 | public class StrawSelectionTest extends BaseSelectionTest { 21 | private final Class cls = StrawSelector.class; 22 | 23 | @Test 24 | public void testBasic() { 25 | doTestBasic(cls); 26 | } 27 | 28 | @Test 29 | public void testBalance() { 30 | doTestBalance(cls, 1000); 31 | } 32 | 33 | @Test 34 | public void testLargeTree() { 35 | doTestLargeTree(cls); 36 | } 37 | 38 | @Test 39 | public void testStabilityOnRemoval() { 40 | doTestStability(cls, true); 41 | } 42 | 43 | @Test 44 | public void testStabilityOnAddition() { 45 | doTestStability(cls, false); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/Topology.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch; 17 | 18 | import java.io.File; 19 | import java.io.IOException; 20 | 21 | import org.codehaus.jackson.map.ObjectMapper; 22 | 23 | public class Topology { 24 | public Node readTopology(String file) throws IOException { 25 | ObjectMapper mapper = new ObjectMapper(); 26 | Node root = mapper.readValue(new File(file), Node.class); 27 | return root; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/integrated/RDFBalanceTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.integrated; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.HashSet; 21 | import java.util.List; 22 | import java.util.Map; 23 | import java.util.Set; 24 | 25 | import org.junit.Test; 26 | 27 | import com.twitter.crunch.Crunch; 28 | import com.twitter.crunch.MappingFunction; 29 | import com.twitter.crunch.Node; 30 | import com.twitter.crunch.RDFMapping; 31 | import com.twitter.crunch.RackIsolationPlacementRules; 32 | import com.twitter.crunch.SimpleCRUSHMapping; 33 | import com.twitter.crunch.TestUtils; 34 | import com.twitter.crunch.Types; 35 | 36 | public class RDFBalanceTest { 37 | @Test 38 | public void testRDFMapping() { 39 | Node topology = TestUtils.createLargeTree(); 40 | // reduce weight on one node to see if it gets less data 41 | Node smallNode = pickOneNode(topology); 42 | smallNode.setWeight(10); 43 | System.out.println("reduced weight on " + smallNode + " from 100 to 10."); 44 | Node crunch = new Crunch().makeCrunch(topology); 45 | final int rf = 2; 46 | final int rdf = 32; 47 | RDFMapping mappingFunction = new RDFMapping(rdf, rf, new RackIsolationPlacementRules()); 48 | Map> rdfMapping = mappingFunction.createRDFMapping(crunch); 49 | List leafNodes = crunch.getAllLeafNodes(); 50 | assertEquals(leafNodes.size(), rdfMapping.size()); 51 | for (Map.Entry> e: rdfMapping.entrySet()) { 52 | // ensure there are no duplicates 53 | List nodes = e.getValue(); 54 | Set set = new HashSet(nodes); 55 | assertEquals(nodes.size(), set.size()); 56 | } 57 | TestUtils.analyzeRDFMapping(rdfMapping); 58 | } 59 | 60 | private Node pickOneNode(Node topology) { 61 | Node node = topology; 62 | while (!node.isLeaf()) { 63 | List children = node.getChildren(); 64 | node = children.get(children.size()-1); 65 | } 66 | return node; 67 | } 68 | 69 | @Test 70 | public void testFullMapping() { 71 | doTestFullMapping(8); 72 | doTestFullMapping(32); 73 | doTestFullMapping(128); 74 | } 75 | 76 | @Test 77 | public void testFullMappingWithTargetBalance() { 78 | final double targetBalance = 0.3d; 79 | doTestFullMapping(8, targetBalance); 80 | doTestFullMapping(32, targetBalance); 81 | doTestFullMapping(128, targetBalance); 82 | } 83 | 84 | private void doTestFullMapping(final int rdf) { 85 | doTestFullMapping(rdf, 0.0f); 86 | } 87 | 88 | private void doTestFullMapping(final int rdf, final double targetBalance) { 89 | Node topology = TestUtils.createLargeTree(); 90 | final int rf = 2; 91 | System.out.print("RDF = " + rdf); 92 | if (targetBalance > 0.0d) { 93 | System.out.println(", target balance = " + targetBalance); 94 | } else { 95 | System.out.println(""); 96 | } 97 | MappingFunction mappingFunction = 98 | new RDFMapping(rdf, rf, new RackIsolationPlacementRules(), targetBalance); 99 | List data = TestUtils.createData(); 100 | 101 | long begin = System.nanoTime(); 102 | Map> mapping = mappingFunction.computeMapping(data, topology); 103 | long end = System.nanoTime(); 104 | System.out.println("mapping time: " + (end - begin)/1000000 + " ms"); 105 | TestUtils.analyzeMapping(rf, topology.getChildrenCount(Types.DATA_CENTER), data.size(), 106 | topology.getAllLeafNodes().size(), mapping); 107 | } 108 | 109 | @Test 110 | public void testPlainCrush() { 111 | System.out.println("testing distribution using plain CRUSH"); 112 | Node topo = TestUtils.createLargeTree(); 113 | final int rf = 2; 114 | MappingFunction mappingFunction = new SimpleCRUSHMapping(rf, new RackIsolationPlacementRules()); 115 | List data = TestUtils.createData(); 116 | long begin = System.nanoTime(); 117 | Map> map = mappingFunction.computeMapping(data, topo); 118 | long end = System.nanoTime(); 119 | System.out.println("mapping time: " + (end - begin)/1000000 + " ms"); 120 | TestUtils.analyzeMapping(rf, topo.getChildrenCount(Types.DATA_CENTER), data.size(), 121 | topo.getAllLeafNodes().size(), map); 122 | } 123 | 124 | @Test 125 | public void testPlainCrushWithTargetBalance() { 126 | final double targetBalance = 0.3d; 127 | System.out.println("testing distribution using plain CRUSH with target balance of " + targetBalance); 128 | Node topo = TestUtils.createLargeTree(); 129 | final int rf = 2; 130 | MappingFunction mappingFunction = 131 | new SimpleCRUSHMapping(rf, new RackIsolationPlacementRules(), targetBalance); 132 | List data = TestUtils.createData(); 133 | long begin = System.nanoTime(); 134 | Map> map = mappingFunction.computeMapping(data, topo); 135 | long end = System.nanoTime(); 136 | System.out.println("mapping time: " + (end - begin)/1000000 + " ms"); 137 | TestUtils.analyzeMapping(rf, topo.getChildrenCount(Types.DATA_CENTER), data.size(), 138 | topo.getAllLeafNodes().size(), map); 139 | } 140 | } 141 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/integrated/RDFStabilityTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.integrated; 17 | 18 | import static org.junit.Assert.assertEquals; 19 | 20 | import java.util.Arrays; 21 | import java.util.HashSet; 22 | import java.util.List; 23 | import java.util.Map; 24 | import java.util.Set; 25 | 26 | import org.junit.Test; 27 | 28 | import com.twitter.crunch.MappingDiff; 29 | import com.twitter.crunch.MappingDiff.Value; 30 | import com.twitter.crunch.MappingFunction; 31 | import com.twitter.crunch.Node; 32 | import com.twitter.crunch.RDFMapping; 33 | import com.twitter.crunch.RackIsolationPlacementRules; 34 | import com.twitter.crunch.SimpleCRUSHMapping; 35 | import com.twitter.crunch.TestUtils; 36 | import com.twitter.crunch.Types; 37 | 38 | public class RDFStabilityTest { 39 | @Test 40 | public void testStability() { 41 | doTestStability(8); 42 | doTestStability(32); 43 | doTestStability(128); 44 | } 45 | 46 | private void doTestStability(final int rdf) { 47 | Node topo = TestUtils.createLargeTree(); 48 | final int nodeSize = topo.getAllLeafNodes().size(); 49 | final int dcCount = topo.getChildrenCount(Types.DATA_CENTER); 50 | final int rf = 2; 51 | System.out.println("RDF = " + rdf); 52 | MappingFunction mappingFunction = new RDFMapping(rdf, rf, new RackIsolationPlacementRules(), 0.3d); 53 | List data = TestUtils.createData(); 54 | 55 | Map> before = mappingFunction.computeMapping(data, topo); 56 | verifyMapping(before, dcCount, rf, data.size()); 57 | 58 | // make changes to the topology and compute the mapping again 59 | // reduce weight on one node to see if it gets less data 60 | Node removed = TestUtils.removeOneNode(topo); 61 | Map> after = mappingFunction.computeMapping(data, topo); 62 | verifyMapping(after, dcCount, rf, data.size()); 63 | 64 | // calculate the diff: vb -> its node movement 65 | Map>> diff = MappingDiff.calculateDiff(before, after); 66 | analyzeDiff(before, diff, rf, dcCount, nodeSize, removed); 67 | 68 | // calculate per-data replica-replacement counts: high values might indicate that data 69 | // completely swapped RDF groups 70 | int[] movementHistogram = new int[rf + 1]; 71 | // for data not affected by the diff, increment 0 72 | for (Long input: before.keySet()) { 73 | if (!diff.containsKey(input)) 74 | movementHistogram[0]++; 75 | } 76 | // for data affected by the diff, increment the appropriate movement count 77 | for (List> value: diff.values()) { 78 | movementHistogram[value.size() / 2]++; 79 | } 80 | System.out.println("per-data movement histogram: " + Arrays.toString(movementHistogram)); 81 | } 82 | 83 | private void verifyMapping(Map> mapping, int dcCount, int rf, int dataSize) { 84 | assertEquals(dataSize, mapping.size()); 85 | for (List nodes: mapping.values()) { 86 | // ensure there are no duplicates 87 | Set set = new HashSet(nodes); 88 | assertEquals(rf*dcCount, set.size()); 89 | } 90 | } 91 | 92 | private void analyzeDiff(Map> before, Map>> diff, 93 | final int rf, final int dcCount, final int nodeSize, Node removed) { 94 | final int beforeSize = before.size(); 95 | // the same data may have moved in mulitple nodes (replicas) 96 | // need to count the actual movement 97 | int moves = 0; 98 | for (List> l: diff.values()) { 99 | moves += l.size(); 100 | } 101 | // it's a pretty good assumption that the moves are always pairs 102 | moves /= 2; 103 | System.out.println("number of data objects that moved: " + moves); 104 | float relativeMovement = ((float)moves)/(beforeSize*rf*dcCount); 105 | float multiplier = relativeMovement*nodeSize; 106 | System.out.println("relative movement (%): " + relativeMovement*100); 107 | System.out.println("movement multiplier: " + multiplier); 108 | System.out.println("data objects moved per node (mean): " + ((float)moves)/nodeSize); 109 | 110 | // reverse the map and process it again 111 | reverseDiff(before, diff, removed); 112 | } 113 | 114 | private void reverseDiff(Map> before, Map>> diff, Node removed) { 115 | Map>> map = TestUtils.calculateReverseDiff(before, diff, removed); 116 | 117 | // identify the worst case number 118 | int maxMoves = 0; 119 | for (Map.Entry>> e: map.entrySet()) { 120 | List> list = e.getValue(); 121 | int send = 0; 122 | int receive = 0; 123 | for (Value v: list) { 124 | switch (v.getDifferenceType()) { 125 | case REMOVED: 126 | send++; 127 | break; 128 | case ADDED: 129 | receive++; 130 | break; 131 | } 132 | } 133 | if (send >= 50 || receive >= 50) { 134 | System.err.println("node " + e.getKey() + " has movement of 50 or greater!"); 135 | } 136 | maxMoves = Math.max(Math.max(send, receive), maxMoves); 137 | } 138 | System.out.println("data objects moved per node (max): " + maxMoves); 139 | } 140 | 141 | @Test 142 | public void testStabilityPlainCrush() { 143 | System.out.println("testing stability using plain CRUSH"); 144 | Node topo = TestUtils.createLargeTree(); 145 | final int nodeSize = topo.getAllLeafNodes().size(); 146 | final int dcCount = topo.getChildrenCount(Types.DATA_CENTER); 147 | final int rf = 2; 148 | MappingFunction mappingFunction = new SimpleCRUSHMapping(rf, new RackIsolationPlacementRules(), 0.3d); 149 | List data = TestUtils.createData(); 150 | 151 | Map> before = mappingFunction.computeMapping(data, topo); 152 | verifyMapping(before, dcCount, rf, data.size()); 153 | 154 | // make changes to the topology and compute the mapping again 155 | // reduce weight on one node to see if it gets less data 156 | Node removed = TestUtils.removeOneNode(topo); 157 | 158 | Map> after = mappingFunction.computeMapping(data, topo); 159 | verifyMapping(after, dcCount, rf, data.size()); 160 | 161 | // calculate the diff 162 | Map>> diff = MappingDiff.calculateDiff(before, after); 163 | analyzeDiff(before, diff, rf, dcCount, nodeSize, removed); 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/integrated/SiblingBiasTest.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.integrated; 17 | 18 | import java.util.ArrayList; 19 | import java.util.HashMap; 20 | import java.util.List; 21 | import java.util.Map; 22 | 23 | import org.junit.Test; 24 | 25 | import com.twitter.crunch.MappingDiff; 26 | import com.twitter.crunch.MappingDiff.Difference; 27 | import com.twitter.crunch.MappingDiff.Value; 28 | import com.twitter.crunch.MappingFunction; 29 | import com.twitter.crunch.Node; 30 | import com.twitter.crunch.Node.Selection; 31 | import com.twitter.crunch.RackIsolationPlacementRules; 32 | import com.twitter.crunch.SimpleCRUSHMapping; 33 | import com.twitter.crunch.StorageSystemTypes; 34 | import com.twitter.crunch.Types; 35 | 36 | public class SiblingBiasTest { 37 | 38 | @Test 39 | public void testMultiLevel() { 40 | Node topo = createSmallTree(); 41 | doTest(topo, 400); 42 | } 43 | 44 | @Test 45 | public void testOneLevel() { 46 | Node topo = createFlatTree(); 47 | doTest(topo, 400); 48 | } 49 | 50 | private void doTest(Node topo, int dataSize) { 51 | List data = createData(dataSize); 52 | 53 | MappingFunction mappingFunction = new SimpleCRUSHMapping(1, new RackIsolationPlacementRules()); // RF = 1 54 | Map> before = mappingFunction.computeMapping(data, topo); 55 | analyzeDistribution(before); 56 | 57 | Node removed = removeOneNode(topo); 58 | Map> after = mappingFunction.computeMapping(data, topo); 59 | analyzeDistribution(after); 60 | // let's figure out where the data that belong in old node went 61 | analyzeMovement(before, after, removed); 62 | } 63 | 64 | private void printMap(Map map) { 65 | for (Map.Entry e: map.entrySet()) { 66 | System.out.println(e.getKey() + " => " + e.getValue()); 67 | } 68 | } 69 | 70 | private void analyzeDistribution(Map> map) { 71 | Map dist = new HashMap(); 72 | for (Map.Entry> e: map.entrySet()) { 73 | for (Node node: e.getValue()) { 74 | Integer count = dist.get(node); 75 | dist.put(node, 76 | count == null ? 1 : ++count); 77 | } 78 | } 79 | printMap(dist); 80 | } 81 | 82 | private Node createSmallTree() { 83 | final int rackCount = 2; 84 | final int hdCount = 2; 85 | 86 | int id = 0; 87 | // root 88 | Node root = new Node(); 89 | root.setName("root"); 90 | root.setId(id++); 91 | root.setType(Types.ROOT); 92 | root.setSelection(Selection.STRAW); 93 | // 2 racks 94 | List racks = new ArrayList(); 95 | for (int j = 1; j <= rackCount; j++) { 96 | Node rack = new Node(); 97 | racks.add(rack); 98 | rack.setName("rack" + j); 99 | rack.setId(id++); 100 | rack.setType(StorageSystemTypes.RACK); 101 | rack.setSelection(Selection.STRAW); 102 | rack.setParent(root); 103 | // 2 hds 104 | List hds = new ArrayList(); 105 | for (int k = 1; k <= hdCount; k++) { 106 | Node hd = new Node(); 107 | hds.add(hd); 108 | hd.setName(rack.getName() + "hd" + k); 109 | hd.setId(id++); 110 | hd.setType(StorageSystemTypes.DISK); 111 | hd.setWeight(100); 112 | hd.setParent(rack); 113 | } 114 | rack.setChildren(hds); 115 | } 116 | root.setChildren(racks); 117 | return root; 118 | } 119 | 120 | private Node createFlatTree() { 121 | int id = 0; 122 | // root 123 | Node root = new Node(); 124 | root.setName("root"); 125 | root.setId(id++); 126 | root.setType(Types.ROOT); 127 | root.setSelection(Selection.STRAW); 128 | // 4 hds 129 | List hds = new ArrayList(); 130 | Node hd = createNode("rack1hd1", StorageSystemTypes.DISK, id++, 100, null); 131 | hd.setParent(root); 132 | hds.add(hd); 133 | hd = createNode("rack1hd2", StorageSystemTypes.DISK, id++, 100, null); 134 | hd.setParent(root); 135 | hds.add(hd); 136 | hd = createNode("rack2hd1", StorageSystemTypes.DISK, id++, 100, null); 137 | hd.setParent(root); 138 | hds.add(hd); 139 | hd = createNode("rack2hd2", StorageSystemTypes.DISK, id++, 100, null); 140 | hd.setParent(root); 141 | hds.add(hd); 142 | root.setChildren(hds); 143 | return root; 144 | } 145 | 146 | private Node createNode(String name, int type, long id, long weight, Selection selection) { 147 | Node node = new Node(); 148 | node.setName(name); 149 | node.setType(type); 150 | node.setId(id); 151 | node.setWeight(weight); 152 | node.setSelection(selection); 153 | return node; 154 | } 155 | 156 | private List createData(final int size) { 157 | List data = new ArrayList(); 158 | for (int i = 1; i <= size; i++) { 159 | data.add((long)i); 160 | } 161 | return data; 162 | } 163 | 164 | private Node removeOneNode(Node topo) { 165 | Node node = topo; 166 | while (!node.isLeaf()) { 167 | List children = node.getChildren(); 168 | node = children.get(children.size()-1); 169 | if (node.isLeaf()) { 170 | children.remove(node); 171 | System.out.println("marked " + node + " as failed"); 172 | } 173 | } 174 | return node; 175 | } 176 | 177 | private void analyzeMovement(Map> before, Map> after, Node removed) { 178 | Map>> diff = MappingDiff.calculateDiff(before, after); 179 | Map distributed = new HashMap(); 180 | for (Map.Entry>> e: diff.entrySet()) { 181 | Long data = e.getKey(); 182 | List> moves = e.getValue(); 183 | boolean hit = false; 184 | for (Value v: moves) { 185 | Node node = v.get(); 186 | if (node.equals(removed)) { 187 | hit = true; 188 | break; 189 | } 190 | } 191 | if (hit) { 192 | for (Value v: moves) { 193 | if (v.getDifferenceType() == Difference.ADDED) { 194 | Node destination = v.get(); 195 | Integer count = distributed.get(destination); 196 | distributed.put(destination, 197 | count == null ? 1 : ++count); 198 | System.out.println(data + " moved from " + removed.getName() + " to " + destination.getName()); 199 | } 200 | } 201 | } 202 | } 203 | printMap(distributed); 204 | } 205 | } 206 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/CalculateMovement.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import java.util.*; 19 | 20 | public class CalculateMovement { 21 | 22 | 23 | private static void calTopologyChange(Map> before, Map> after) { 24 | int moved = 0; 25 | 26 | for (Long bucket: before.keySet()) { 27 | List beforeMap = before.get(bucket); 28 | List afterMap = after.get(bucket); 29 | for (String node: beforeMap) { 30 | if (!afterMap.contains(node)) moved++; 31 | } 32 | } 33 | 34 | System.out.print(String.format("%d", moved)); 35 | } 36 | 37 | public static void main(String[] args) throws Exception { 38 | if (args.length != 2) { 39 | System.out.println("Usage: old_map_filename new_map_filename"); 40 | System.out.println(" moved"); 41 | return; 42 | } 43 | 44 | String before = args[0]; 45 | String after = args[1]; 46 | 47 | calTopologyChange(Utils.importMap(before), Utils.importMap(after)); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/CreateBlobstoreMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.twitter.crunch.*; 22 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer; 23 | import com.twitter.crunch.tools.jsontopology.MappingParameters; 24 | import org.codehaus.jackson.map.ObjectMapper; 25 | import org.slf4j.Logger; 26 | import org.slf4j.LoggerFactory; 27 | 28 | public class CreateBlobstoreMapping { 29 | private static final Logger logger = LoggerFactory.getLogger(CreateBlobstoreMapping.class); 30 | 31 | private static List initializeVirtualBuckets(final int count) { 32 | List data = new ArrayList(count); 33 | for (long l = 1; l <= count; l++) { 34 | data.add(l); 35 | } 36 | 37 | return Collections.unmodifiableList(data); 38 | } 39 | 40 | public static Map> createNodeMapv1(MappingParameters params, Node root) throws InvalidTopologyException, IOException { 41 | final RDFMapping mappingFunction = new RDFMapping( 42 | params.getRdf(), 43 | params.getRf(), 44 | new RackIsolationPlacementRules(), 45 | params.getTargetBalance()); 46 | 47 | final List buckets = initializeVirtualBuckets(params.getVirtualBucketCount()); 48 | final Map> mapping = mappingFunction.computeMapping(buckets, root); 49 | 50 | Map> rdfMap = mappingFunction.getNewRdfMap(); 51 | Utils.exportRDFMap("rdfmap_v1", rdfMap); 52 | 53 | return mapping; 54 | } 55 | 56 | public static Map> createNodeMapv3(MappingParameters params, Node root, String oldFileName, String newFileName, 57 | int rackDiversity, boolean trackCapacity, String migrationMapFileName) throws InvalidTopologyException { 58 | final List buckets = initializeVirtualBuckets(params.getVirtualBucketCount()); 59 | 60 | Map> currentMap; 61 | Map> migrationMap = null; 62 | try { 63 | currentMap = Utils.importRDFMap(oldFileName); 64 | } catch (Exception e) { 65 | currentMap = new HashMap>(); 66 | } 67 | 68 | try { 69 | if (migrationMapFileName != null) 70 | migrationMap = Utils.importRDFMap(migrationMapFileName); 71 | } catch (Exception e) { 72 | migrationMap = null; 73 | } 74 | 75 | int rdfRange = (int)(params.getRdf() * 0.2); 76 | 77 | final StableRdfMapping mappingFunction = new StableRdfMapping( 78 | params.getRdf(), 79 | params.getRf(), 80 | new RackIsolationPlacementRules(), 81 | currentMap, 82 | params.getRdf() - rdfRange, 83 | params.getRdf() + rdfRange, 84 | params.getTargetBalance(), 85 | rackDiversity, 86 | trackCapacity, 87 | migrationMap); 88 | 89 | final Map> mapping = mappingFunction.computeMapping(buckets, root); 90 | 91 | final Map> newRdfMap = mappingFunction.getNewRdfMap(); 92 | try { 93 | Utils.exportRDFMap(newFileName, newRdfMap); 94 | } catch (IOException e) { 95 | System.out.println(e.getMessage()); 96 | } 97 | 98 | return mapping; 99 | } 100 | 101 | public static void main(String[] args) throws Exception { 102 | if (args.length < 4) { 103 | System.out.println("Usage: version topology_json topology_params_json mapping_filename rdf target_balance rack_diversity track_capacity [new_rdf_filename] [old_rdf_filename] [migration_map]"); 104 | System.out.println(" version 1: Generate RDF Map using libcrunch"); 105 | System.out.println(" version 3: Generate RDF Map using stateful distribution"); 106 | return; 107 | } 108 | 109 | final int version = Integer.parseInt(args[0]); 110 | final String topologyJson = args[1]; 111 | final String topologyParamsJson = args[2]; 112 | final String fileName = args[3]; 113 | 114 | final int rdf = Integer.parseInt(args[4]); 115 | final double targetBalance = Double.parseDouble(args[5]); 116 | 117 | final int rackDiversity = Integer.parseInt(args[6]); 118 | final boolean trackCapacity = Boolean.parseBoolean(args[7]); 119 | final String newFileName = args[8]; 120 | final String oldFileName = args[9]; 121 | String migrationMapFileName = null; 122 | if (args.length == 11) { 123 | migrationMapFileName = args[10]; 124 | } 125 | 126 | JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer(); 127 | com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyJson)); 128 | Node root = topology.getRootNode(); 129 | 130 | ObjectMapper mapper = new ObjectMapper(); 131 | MappingParameters params = mapper.readValue(new File(topologyParamsJson), MappingParameters.class); 132 | 133 | Map> map = null; 134 | switch(version) { 135 | case 1: 136 | map = createNodeMapv1(params, root); 137 | break; 138 | case 3: 139 | params.setRdf(rdf); 140 | params.setTargetBalance(targetBalance); 141 | map = createNodeMapv3(params, root, oldFileName, newFileName, rackDiversity, trackCapacity, migrationMapFileName); 142 | break; 143 | default: 144 | System.out.println("Wrong version"); 145 | break; 146 | } 147 | 148 | // Dump map 149 | Writer out = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8"); 150 | try { 151 | for (Long bucket: new TreeSet(map.keySet())) { 152 | out.append(bucket.toString()); 153 | List replicas = map.get(bucket); 154 | for (Node replica: replicas) { 155 | out.append(','); 156 | out.append(replica.getName()); 157 | } 158 | out.append('\n'); 159 | } 160 | } finally { 161 | out.flush(); 162 | out.close(); 163 | } 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/CreateDataMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | import com.twitter.crunch.*; 22 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer; 23 | import org.slf4j.Logger; 24 | import org.slf4j.LoggerFactory; 25 | import org.yaml.snakeyaml.Yaml; 26 | import org.yaml.snakeyaml.constructor.Constructor; 27 | 28 | public class CreateDataMapping { 29 | private static final Logger logger = LoggerFactory.getLogger(CreateDataMapping.class); 30 | 31 | private static List initializeVirtualBuckets(final int count) { 32 | List data = new ArrayList(count); 33 | for (long l = 1; l <= count; l++) { 34 | data.add(l); 35 | } 36 | 37 | return Collections.unmodifiableList(data); 38 | } 39 | 40 | public static Map> createNodeMapv1(YamlTopologyFactory factory, Node root) throws InvalidTopologyException { 41 | final MappingFunction mappingFunction = new RDFMapping( 42 | factory.replica_distribution_factor, 43 | factory.replication_factor, 44 | new RackIsolationPlacementRules(), 45 | factory.target_balance_max); 46 | 47 | final List buckets = initializeVirtualBuckets(factory.number_of_buckets); 48 | final Map> mapping = mappingFunction.computeMapping(buckets, root); 49 | return mapping; 50 | } 51 | 52 | public static Map> createNodeMapv2(YamlTopologyFactory factory, Node root) throws InvalidTopologyException { 53 | final ProbingRDFMapping mappingFunction = new ProbingRDFMapping( 54 | factory.replica_distribution_factor, 55 | factory.replication_factor, 56 | new RackIsolationPlacementRules(), 57 | factory.weight_balance_tries, 58 | factory.weight_balance_factor, 59 | factory.history_count, 60 | factory.sd_threshold, 61 | factory.target_balance_max); 62 | 63 | final List buckets = initializeVirtualBuckets(factory.number_of_buckets); 64 | final Map> mapping = mappingFunction.computeMapping(buckets, root); 65 | return mapping; 66 | } 67 | 68 | public static Map> createNodeMapv3(YamlTopologyFactory factory, Node root, String oldFileName, String newFileName) throws InvalidTopologyException { 69 | final List buckets = initializeVirtualBuckets(factory.number_of_buckets); 70 | 71 | Map> currentMap = null; 72 | try { 73 | currentMap = Utils.importRDFMap(oldFileName); 74 | } catch (Exception e) { 75 | currentMap = new HashMap>(); 76 | } 77 | final StableRdfMapping mappingFunction = new StableRdfMapping( 78 | factory.replica_distribution_factor, 79 | factory.replication_factor, 80 | new RackIsolationPlacementRules(), 81 | currentMap, 82 | factory.replica_distribution_factor_min, 83 | factory.replica_distribution_factor_max, 84 | factory.target_balance_max, 85 | 8, 86 | false); 87 | 88 | final Map> mapping = mappingFunction.computeMapping(buckets, root); 89 | 90 | final Map> newRdfMap = mappingFunction.getNewRdfMap(); 91 | try { 92 | Utils.exportRDFMap(newFileName, newRdfMap); 93 | } catch (IOException e) { 94 | System.out.println(e.getMessage()); 95 | } 96 | 97 | return mapping; 98 | } 99 | 100 | public static void main(String[] args) throws Exception { 101 | if (args.length < 4) { 102 | System.out.println("Usage: yaml version topology_yaml bucket_map_filename [old_rdf_filename] [new_rdf_filename]"); 103 | System.out.println(" json version topology_yaml topology_json bucket_map_filename [old_rdf_filename] [new_rdf_filename]"); 104 | System.out.println(" version 1: Generate RDF Map using libcrunch"); 105 | System.out.println(" version 2: Generate RDF Map using libcrunch with probing"); 106 | System.out.println(" version 3: Generate RDF Map using stateful distribution"); 107 | return; 108 | } 109 | 110 | final int version = Integer.parseInt(args[1]); 111 | final String topologyConfig = args[2]; 112 | 113 | String yamlContents = new String(Utils.slurp((new FileInputStream(topologyConfig)))); 114 | final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class)); 115 | final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents); 116 | 117 | int offset = 0; 118 | Node root = null; 119 | if (args[0].equalsIgnoreCase("yaml")) { 120 | offset = 0; 121 | root = topologyFactory.loadTopology(); 122 | } else { 123 | offset = 1; 124 | String topologyJson = args[3]; 125 | JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer(); 126 | com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyJson)); 127 | root = topology.getRootNode(); 128 | } 129 | final String fileName = args[3 + offset]; 130 | 131 | Map> map = null; 132 | switch(version) { 133 | case 1: 134 | map = createNodeMapv1(topologyFactory, root); 135 | break; 136 | case 2: 137 | map = createNodeMapv2(topologyFactory, root); 138 | break; 139 | case 3: 140 | final String oldFileName = args[4 + offset]; 141 | final String newFileName = args[5 + offset]; 142 | map = createNodeMapv3(topologyFactory, root, oldFileName, newFileName); 143 | break; 144 | default: 145 | System.out.println("Wrong version"); 146 | break; 147 | } 148 | 149 | // Dump map 150 | Writer out = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8"); 151 | try { 152 | for (Long bucket: new TreeSet(map.keySet())) { 153 | out.append(bucket.toString()); 154 | List replicas = map.get(bucket); 155 | for (Node replica: replicas) { 156 | out.append(','); 157 | out.append(replica.getName()); 158 | } 159 | out.append('\n'); 160 | } 161 | } finally { 162 | out.flush(); 163 | out.close(); 164 | } 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/EvaluateMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import com.twitter.crunch.Node; 19 | import com.twitter.crunch.MappingEvaluator; 20 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer; 21 | import org.yaml.snakeyaml.Yaml; 22 | import org.yaml.snakeyaml.constructor.Constructor; 23 | 24 | import java.io.File; 25 | import java.io.FileInputStream; 26 | import java.io.FileNotFoundException; 27 | import java.util.*; 28 | 29 | public class EvaluateMapping { 30 | private static void evaluateMap(Map> mapping, Map weight, Map> rdfMap) { 31 | Map keyDistribution; 32 | double mean; 33 | double stdDev; 34 | Set primaryNodes = null; 35 | 36 | if (rdfMap != null) primaryNodes = rdfMap.keySet(); 37 | 38 | keyDistribution = new HashMap(); 39 | for (Long key: mapping.keySet()) { 40 | for (String node: mapping.get(key)) { 41 | if (primaryNodes != null && !primaryNodes.contains(node)) { 42 | //System.out.println("RDF under min - " + node); 43 | continue; 44 | } 45 | 46 | if (keyDistribution.containsKey(node)) { 47 | final long count = keyDistribution.get(node) + 1; 48 | keyDistribution.put(node, count); 49 | } else { 50 | keyDistribution.put(node, (long)1); 51 | } 52 | } 53 | } 54 | 55 | for (String node : weight.keySet()) { 56 | if (primaryNodes != null && !primaryNodes.contains(node)) continue; 57 | 58 | if (!keyDistribution.containsKey(node)) keyDistribution.put(node, (long)0); 59 | } 60 | 61 | mean = MappingEvaluator.getWeightedMean(keyDistribution, weight); 62 | stdDev = MappingEvaluator.getWeightedStandardDeviation(keyDistribution, weight); 63 | Long min = Collections.min(keyDistribution.values()); 64 | Long max = Collections.max(keyDistribution.values()); 65 | final int replicaOnlyNodes = primaryNodes == null ? 0 : (weight.keySet().size() - primaryNodes.size()); 66 | 67 | System.out.print(String.format("%d,%d,%.4f,%.4f,%d", min, max, mean, stdDev, replicaOnlyNodes)); 68 | } 69 | 70 | public static void main(String[] args) throws Exception { 71 | if (args.length < 3) { 72 | System.out.println("Usage: yaml|json topology_file map_filename rdfmap_filename"); 73 | System.out.println(" min,max,mean,sd,replicaOnlyNodes"); 74 | return; 75 | } 76 | 77 | final String topologyConfig = args[1]; 78 | final String mapFileName = args[2]; 79 | Map> rdfMap = null; 80 | 81 | Node root = null; 82 | if (args[0].equalsIgnoreCase("yaml")) { 83 | String yamlContents = new String(Utils.slurp((new FileInputStream(topologyConfig)))); 84 | final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class)); 85 | final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents); 86 | root = topologyFactory.loadTopology(); 87 | } else { 88 | try { 89 | final String rdfMapFileName = args[3]; 90 | rdfMap = Utils.importRDFMap(rdfMapFileName); 91 | } catch (FileNotFoundException ex) { 92 | // ignore this 93 | } 94 | JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer(); 95 | com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyConfig)); 96 | root = topology.getRootNode(); 97 | } 98 | 99 | Map> map = Utils.importMap(mapFileName); 100 | 101 | final List allLeaves = root.getAllLeafNodes(); 102 | 103 | Map definedWeight = new HashMap(); 104 | for (Node node : allLeaves) { 105 | if (!node.isFailed() && node.getWeight() != 0) definedWeight.put(node.getName(), node.getWeight()); 106 | //if (node.getWeight() == 0) System.out.println("Weight 0 - " + node.getName()); 107 | } 108 | 109 | evaluateMap(map, definedWeight, rdfMap); 110 | } 111 | } 112 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/EvaluateRDFMapping.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import com.twitter.crunch.MappingEvaluator; 19 | import com.twitter.crunch.Node; 20 | import com.twitter.crunch.tools.jsontopology.JsonTopologyDeserializer; 21 | import org.yaml.snakeyaml.Yaml; 22 | import org.yaml.snakeyaml.constructor.Constructor; 23 | 24 | import java.io.File; 25 | import java.io.FileInputStream; 26 | import java.util.*; 27 | 28 | public class EvaluateRDFMapping { 29 | 30 | private static void evaluateRDFMap(Map> mapping, Map weight) { 31 | Map keyDistribution; 32 | double mean; 33 | double stdDev; 34 | 35 | keyDistribution = new HashMap(); 36 | for (String key: mapping.keySet()) { 37 | for (String node: mapping.get(key)) { 38 | if (keyDistribution.containsKey(node)) { 39 | final long count = keyDistribution.get(node) + 1; 40 | keyDistribution.put(node, count); 41 | } else { 42 | keyDistribution.put(node, (long)1); 43 | } 44 | } 45 | } 46 | 47 | for (String key: mapping.keySet()) { 48 | List replicaList = mapping.get(key); 49 | Set replicaSet = new HashSet(replicaList); 50 | if (replicaSet.size() != replicaList.size()) System.out.print("Duplicates found for " + key); 51 | } 52 | 53 | for (String node : weight.keySet()) { 54 | if (!keyDistribution.containsKey(node)) keyDistribution.put(node, (long)0); 55 | } 56 | 57 | mean = MappingEvaluator.getWeightedMean(keyDistribution, weight); 58 | stdDev = MappingEvaluator.getWeightedStandardDeviation(keyDistribution, weight); 59 | Long min = Collections.min(keyDistribution.values()); 60 | Long max = Collections.max(keyDistribution.values()); 61 | 62 | System.out.print(String.format("%d,%d,%.4f,%.4f", min, max, mean, stdDev)); 63 | } 64 | 65 | public static void main(String[] args) throws Exception { 66 | if (args.length != 3) { 67 | System.out.println("Usage: yaml|json topology_file rdfmap_filename"); 68 | System.out.println(" min,max,mean,sd"); 69 | return; 70 | } 71 | 72 | final String topologyConfig = args[1]; 73 | final String rdfMapFileName = args[2]; 74 | 75 | Map> map = Utils.importRDFMap(rdfMapFileName); 76 | 77 | Node root = null; 78 | if (args[0].equalsIgnoreCase("yaml")) { 79 | String yamlContents = new String(Utils.slurp((new FileInputStream(topologyConfig)))); 80 | final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class)); 81 | final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents); 82 | root = topologyFactory.loadTopology(); 83 | } else { 84 | JsonTopologyDeserializer deserializer = new JsonTopologyDeserializer(); 85 | com.twitter.crunch.tools.jsontopology.Topology topology = deserializer.readTopology(new File(topologyConfig)); 86 | root = topology.getRootNode(); 87 | } 88 | 89 | final List allLeaves = root.getAllLeafNodes(); 90 | 91 | Map definedWeight = new HashMap(); 92 | for (Node node : allLeaves) { 93 | if (!node.isFailed()) definedWeight.put(node.getName(), node.getWeight()); 94 | } 95 | 96 | evaluateRDFMap(map, definedWeight); 97 | } 98 | } 99 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/InvalidTopologyException.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | public class InvalidTopologyException extends Exception { 19 | public InvalidTopologyException() { 20 | } 21 | 22 | public InvalidTopologyException(String message) { 23 | super(message); 24 | } 25 | 26 | public InvalidTopologyException(String message, Throwable cause) { 27 | super(message, cause); 28 | } 29 | 30 | public InvalidTopologyException(Throwable cause) { 31 | super(cause); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/TopologyGenerator.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import org.yaml.snakeyaml.Yaml; 19 | import org.yaml.snakeyaml.constructor.Constructor; 20 | 21 | import java.io.FileInputStream; 22 | import java.io.PrintWriter; 23 | import java.util.ArrayList; 24 | 25 | public class TopologyGenerator { 26 | public static void main(String[] args) throws Exception { 27 | if (args.length != 4) { 28 | System.out.println("Usage: topology.template.yaml node_count node_weight output_filename"); 29 | return; 30 | } 31 | 32 | final String topologyTemplate = args[0]; 33 | final int nodeCount = Integer.parseInt(args[1]); 34 | final long nodeWeight = Long.parseLong(args[2]); 35 | final String fileName = args[3]; 36 | 37 | String yamlContents = new String(Utils.slurp((new FileInputStream(topologyTemplate)))); 38 | final Yaml yaml = new Yaml(new Constructor(YamlTopologyFactory.class)); 39 | final YamlTopologyFactory topologyFactory = (YamlTopologyFactory)yaml.load(yamlContents); 40 | 41 | topologyFactory.machine_list = new ArrayList(); 42 | for (int i = 1; i <= nodeCount; i++) { 43 | YamlTopologyFactory.TopologyMachine machine = new YamlTopologyFactory.TopologyMachine(); 44 | machine.name = String.format("smf1-%03d-01-sr1.prod.twitter.com", i); 45 | machine.weight = nodeWeight; 46 | topologyFactory.machine_list.add(machine); 47 | } 48 | 49 | PrintWriter out = new PrintWriter(fileName); 50 | out.print(yaml.dump(topologyFactory)); 51 | out.close(); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/Utils.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import java.io.*; 19 | import java.util.*; 20 | 21 | public class Utils { 22 | public static byte[] slurp(InputStream in) throws IOException { 23 | byte[] buf = new byte[Math.max(in.available(), 4096)]; 24 | 25 | int sofar = 0; 26 | while (true) { 27 | if (sofar == buf.length) { 28 | byte[] tmp = new byte[buf.length + 2]; 29 | System.arraycopy(buf, 0, tmp, 0, buf.length); 30 | buf = tmp; 31 | } 32 | int read = in.read(buf, sofar, buf.length - sofar); 33 | if (read == -1) { 34 | byte[] ret = new byte[sofar]; 35 | System.arraycopy(buf, 0, ret, 0, sofar); 36 | return ret; 37 | } 38 | sofar += read; 39 | } 40 | } 41 | 42 | public static Map> importMap(String fileName) throws IOException { 43 | Scanner scanner = new Scanner(new FileInputStream(fileName), "UTF-8"); 44 | Map> map = new HashMap>(); 45 | try { 46 | while (scanner.hasNextLine()){ 47 | String line = scanner.nextLine(); 48 | if (line.contains(",")) { 49 | String[] parts = line.split(","); 50 | Long name = Long.parseLong(parts[0]); 51 | String[] replicas = Arrays.copyOfRange(parts, 1, parts.length); 52 | map.put(name, Arrays.asList(replicas)); 53 | } 54 | } 55 | }finally { 56 | scanner.close(); 57 | } 58 | 59 | return map; 60 | } 61 | 62 | public static void exportRDFMap(String fileName, Map> map) throws IOException { 63 | 64 | Writer out = new OutputStreamWriter(new FileOutputStream(fileName), "UTF-8"); 65 | try { 66 | for (String node: map.keySet()) { 67 | out.append(node); 68 | List replicas = map.get(node); 69 | for (String replica: replicas) { 70 | out.append(','); 71 | out.append(replica); 72 | } 73 | out.append('\n'); 74 | } 75 | } finally { 76 | out.flush(); 77 | out.close(); 78 | } 79 | } 80 | 81 | public static Map> importRDFMap(String fileName) throws IOException { 82 | 83 | Scanner scanner = new Scanner(new FileInputStream(fileName), "UTF-8"); 84 | Map> map = new HashMap>(); 85 | try { 86 | while (scanner.hasNextLine()){ 87 | String line = scanner.nextLine(); 88 | if (line.contains(",")) { 89 | String[] parts = line.split(","); 90 | String name = parts[0]; 91 | String[] replicas = Arrays.copyOfRange(parts, 1, parts.length); 92 | map.put(name, Arrays.asList(replicas)); 93 | } 94 | } 95 | }finally { 96 | scanner.close(); 97 | } 98 | 99 | return map; 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/YamlTopologyFactory.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools; 17 | 18 | import com.twitter.crunch.*; 19 | 20 | import java.util.*; 21 | 22 | public class YamlTopologyFactory { 23 | public int number_of_buckets = 50000; 24 | public int replica_distribution_factor = 6; 25 | public int replication_factor = 3; 26 | public int replica_distribution_factor_min = 5; 27 | public int replica_distribution_factor_max = 7; 28 | 29 | public double target_balance_max = 0; 30 | public boolean dump_detail_map = false; 31 | public int weight_balance_tries = 1; 32 | public double weight_balance_factor = 0.1; 33 | public int history_count = 10; 34 | public double sd_threshold = 0.05; 35 | 36 | public List machine_list = null; 37 | public static class TopologyMachine { 38 | public String name; 39 | public Long weight; 40 | public String datacenter; 41 | public String rack; 42 | } 43 | 44 | private static void parseMachineName(TopologyMachine machine) throws InvalidTopologyException { 45 | String[] nameParts = machine.name.split("\\."); 46 | if (nameParts.length == 0) { 47 | throw new InvalidTopologyException("Machine name " + machine.name + " is not fully qualified domain name"); 48 | } 49 | String machineName = nameParts[0]; 50 | String[] parts = machineName.split("-"); 51 | if (parts.length != 4) { 52 | throw new InvalidTopologyException("Machine name " + machineName + " is not in dc-rack-subrack-# format"); 53 | } 54 | machine.datacenter = parts[0]; 55 | machine.rack = parts[1]; 56 | } 57 | 58 | private Node buildLibcrunchTree(Map>> datacenters) { 59 | int id = 0; 60 | 61 | // Build the root 62 | Node libcrunchRoot = new Node(); 63 | libcrunchRoot.setName("root"); 64 | libcrunchRoot.setId(id++); 65 | libcrunchRoot.setType(Types.ROOT); 66 | libcrunchRoot.setSelection(Node.Selection.STRAW); 67 | 68 | List libcrunchDcs = new ArrayList(); 69 | for (String datacenter : datacenters.keySet()) { 70 | Node libcrunchDc = new Node(); 71 | libcrunchDc.setName(datacenter); 72 | libcrunchDc.setId(id++); 73 | libcrunchDc.setType(Types.DATA_CENTER); 74 | libcrunchDc.setSelection(Node.Selection.STRAW); 75 | libcrunchDc.setParent(libcrunchRoot); 76 | 77 | List libcrunchRacks = new ArrayList(); 78 | for (String rack : datacenters.get(datacenter).keySet()) { 79 | Node libcrunchRack = new Node(); 80 | libcrunchRack.setName(rack); 81 | libcrunchRack.setId(id++); 82 | libcrunchRack.setType(StorageSystemTypes.RACK); 83 | libcrunchRack.setSelection(Node.Selection.STRAW); 84 | libcrunchRack.setParent(libcrunchDc); 85 | 86 | List libcrunchNodes = new ArrayList(); 87 | for (TopologyMachine machine : datacenters.get(datacenter).get(rack)) { 88 | Node libcrunchNode = new Node(); 89 | libcrunchNode.setName(machine.name); 90 | libcrunchNode.setWeight(machine.weight); 91 | libcrunchNode.setId(id++); 92 | libcrunchNode.setType(StorageSystemTypes.DISK); 93 | libcrunchNode.setSelection(Node.Selection.STRAW); 94 | libcrunchNode.setParent(libcrunchRack); 95 | libcrunchNodes.add(libcrunchNode); 96 | } 97 | libcrunchRack.setChildren(libcrunchNodes); 98 | libcrunchRacks.add(libcrunchRack); 99 | } 100 | libcrunchDc.setChildren(libcrunchRacks); 101 | libcrunchDcs.add(libcrunchDc); 102 | } 103 | libcrunchRoot.setChildren(libcrunchDcs); 104 | 105 | return libcrunchRoot; 106 | } 107 | 108 | public Node loadTopology() throws InvalidTopologyException { 109 | // Parse machine name to get datacenter and rack information 110 | Map>> datecenters = new HashMap>>(); 111 | for(TopologyMachine machine: machine_list) { 112 | parseMachineName(machine); 113 | if (datecenters.containsKey(machine.datacenter)) { 114 | Map> racks = datecenters.get(machine.datacenter); 115 | if (racks.containsKey(machine.rack)) { 116 | Set machines = racks.get(machine.rack); 117 | machines.add(machine); 118 | } else { 119 | Set machines = new HashSet(); 120 | machines.add(machine); 121 | racks.put(machine.rack, machines); 122 | } 123 | } else { 124 | Map> rack = new HashMap>(); 125 | Set machines = new HashSet(); 126 | machines.add(machine); 127 | rack.put(machine.rack, machines); 128 | datecenters.put(machine.datacenter, rack); 129 | } 130 | } 131 | 132 | return buildLibcrunchTree(datecenters); 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/JsonTopology.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | import com.twitter.crunch.Node; 19 | 20 | public class JsonTopology extends Topology { 21 | public void setVersion(long version) { 22 | this.version = version; 23 | } 24 | 25 | public void setRootNode(Node root) { 26 | this.root = root; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/JsonTopologyDeserializer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | import java.io.File; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | 22 | import org.codehaus.jackson.map.ObjectMapper; 23 | 24 | public final class JsonTopologyDeserializer implements TopologyDeserializer { 25 | public Topology readTopology(InputStream is) throws IOException { 26 | ObjectMapper mapper = new ObjectMapper(); 27 | return mapper.readValue(is, JsonTopology.class); 28 | } 29 | 30 | public Topology readTopology(File file) throws IOException { 31 | ObjectMapper mapper = new ObjectMapper(); 32 | return mapper.readValue(file, JsonTopology.class); 33 | } 34 | 35 | public Topology readTopology(String string) throws IOException { 36 | ObjectMapper mapper = new ObjectMapper(); 37 | return mapper.readValue(string, JsonTopology.class); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/JsonTopologySerializer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | import java.io.File; 19 | import java.io.IOException; 20 | import java.io.OutputStream; 21 | import java.util.List; 22 | import java.util.ListIterator; 23 | 24 | import org.codehaus.jackson.JsonGenerator; 25 | import org.codehaus.jackson.Version; 26 | import org.codehaus.jackson.annotate.JsonIgnore; 27 | import org.codehaus.jackson.map.JsonSerializer; 28 | import org.codehaus.jackson.map.Module; 29 | import org.codehaus.jackson.map.ObjectMapper; 30 | import org.codehaus.jackson.map.ObjectWriter; 31 | import org.codehaus.jackson.map.SerializationConfig; 32 | import org.codehaus.jackson.map.SerializerProvider; 33 | import org.codehaus.jackson.map.annotate.JsonSerialize; 34 | import org.codehaus.jackson.map.introspect.BasicBeanDescription; 35 | import org.codehaus.jackson.map.ser.BeanPropertyWriter; 36 | import org.codehaus.jackson.map.ser.BeanSerializerModifier; 37 | 38 | import com.twitter.crunch.Node; 39 | import com.twitter.crunch.Selector; 40 | 41 | public final class JsonTopologySerializer implements TopologySerializer { 42 | public void writeTopology(Topology topology, OutputStream os) throws IOException { 43 | getWriter().writeValue(os, topology); 44 | } 45 | 46 | public void writeTopology(Topology topology, String path) throws IOException { 47 | getWriter().writeValue(new File(path), topology); 48 | } 49 | 50 | private ObjectWriter getWriter() { 51 | ObjectMapper mapper = new ObjectMapper(); 52 | 53 | // omit null fields from serialization 54 | mapper.setSerializationInclusion(JsonSerialize.Inclusion.NON_NULL); 55 | // exclude certain fields and getter methods from node serialization via mixin 56 | mapper.getSerializationConfig().addMixInAnnotations(Node.class, MixIn.class); 57 | // register the module that suppresses the failed property if false 58 | mapper.registerModule(new IsFailedSuppressor()); 59 | 60 | return mapper.writer().withDefaultPrettyPrinter(); 61 | } 62 | 63 | private abstract class MixIn { 64 | @JsonIgnore public abstract long getId(); 65 | @JsonIgnore public abstract Node getParent(); 66 | @JsonIgnore public abstract boolean isLeaf(); 67 | @JsonIgnore public abstract Selector getSelector(); 68 | @JsonIgnore public abstract List getAllLeafNodes(); 69 | @JsonIgnore public abstract int getChildrenCount(); 70 | @JsonIgnore public abstract Node getRoot(); 71 | } 72 | 73 | private static class IsFailedSuppressor extends Module { 74 | public String getModuleName() { 75 | return "IsFailedSuppressor"; 76 | } 77 | 78 | public Version version() { 79 | return new Version(1, 0, 0, null); 80 | } 81 | 82 | public void setupModule(SetupContext context) { 83 | context.addBeanSerializerModifier(new BeanSerializerModifier() { 84 | @Override 85 | public List changeProperties(SerializationConfig config, 86 | BasicBeanDescription beanDesc, List beanProperties) { 87 | ListIterator it = beanProperties.listIterator(); 88 | while (it.hasNext()) { 89 | BeanPropertyWriter writer = it.next(); 90 | // replace the bean writer with my own if it is for "failed" 91 | if (writer.getName().equals("failed")) { 92 | BeanPropertyWriter newWriter = new IsFailedWriter(writer); 93 | it.set(newWriter); 94 | } 95 | } 96 | return beanProperties; 97 | } 98 | }); 99 | } 100 | } 101 | 102 | private static class IsFailedWriter extends BeanPropertyWriter { 103 | public IsFailedWriter(BeanPropertyWriter base) { 104 | super(base); 105 | } 106 | 107 | public IsFailedWriter(BeanPropertyWriter base, JsonSerializer ser) { 108 | super(base, ser); 109 | } 110 | 111 | @Override 112 | public void serializeAsField(Object bean, JsonGenerator jgen, SerializerProvider prov) 113 | throws Exception { 114 | Object value = get(bean); 115 | if (value instanceof Boolean) { 116 | Boolean b = (Boolean)value; 117 | if (!b.booleanValue()) { 118 | // filter if "failed" is false 119 | return; 120 | } 121 | } 122 | super.serializeAsField(bean, jgen, prov); 123 | } 124 | 125 | @Override 126 | public BeanPropertyWriter withSerializer(JsonSerializer ser) { 127 | return new IsFailedWriter(this, ser); 128 | } 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/MappingParameters.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | public class MappingParameters { 19 | // keys for mapping parameters 20 | public static final String RF = "rf"; 21 | public static final String RDF = "rdf"; 22 | public static final String TARGET_BALANCE = "target_balance"; 23 | public static final String VIRTUAL_BUCKET_COUNT = "virtual_bucket_count"; 24 | public static final String USE_CRUSH_MAPPING = "use_crush_mapping"; 25 | 26 | private volatile int rf; 27 | private volatile int rdf; 28 | private volatile double targetBalance; 29 | private volatile int virtualBucketCount; 30 | private volatile boolean useCrushMapping; 31 | 32 | public MappingParameters() {} 33 | 34 | public MappingParameters(MappingParameters params) { 35 | this.rf = params.rf; 36 | this.rdf = params.rdf; 37 | this.targetBalance = params.targetBalance; 38 | this.virtualBucketCount = params.virtualBucketCount; 39 | this.useCrushMapping = params.useCrushMapping; 40 | } 41 | 42 | public int getRf() { 43 | return rf; 44 | } 45 | 46 | public void setRf(int rf) { 47 | this.rf = rf; 48 | } 49 | 50 | public int getRdf() { 51 | return rdf; 52 | } 53 | 54 | public void setRdf(int rdf) { 55 | this.rdf = rdf; 56 | } 57 | 58 | public double getTargetBalance() { 59 | return targetBalance; 60 | } 61 | 62 | public void setTargetBalance(double targetBalance) { 63 | this.targetBalance = targetBalance; 64 | } 65 | 66 | public int getVirtualBucketCount() { 67 | return virtualBucketCount; 68 | } 69 | 70 | public void setVirtualBucketCount(int virtualBucketCount) { 71 | this.virtualBucketCount = virtualBucketCount; 72 | } 73 | 74 | public boolean isUseCrushMapping() { 75 | return useCrushMapping; 76 | } 77 | 78 | public void setUseCrushMapping(boolean useCrushMapping) { 79 | this.useCrushMapping = useCrushMapping; 80 | } 81 | 82 | @Override 83 | public String toString() { 84 | return "(" + RF + "=" + rf + ", " + RDF + "=" + rdf + ", " + TARGET_BALANCE + "=" + 85 | targetBalance + ", " + VIRTUAL_BUCKET_COUNT + "=" + virtualBucketCount + ", " + 86 | USE_CRUSH_MAPPING + "=" + useCrushMapping + ")"; 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/Topology.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | import com.twitter.crunch.Node; 19 | 20 | public class Topology { 21 | protected volatile Node root; 22 | protected volatile long version; 23 | 24 | public Topology() {} 25 | 26 | public Topology(Node root, long version) { 27 | this.root = root; 28 | this.version = version; 29 | } 30 | 31 | public long getVersion() { 32 | return version; 33 | } 34 | 35 | public Node getRootNode() { 36 | return root; 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/TopologyDeserializer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | import java.io.File; 19 | import java.io.IOException; 20 | import java.io.InputStream; 21 | 22 | public interface TopologyDeserializer { 23 | Topology readTopology(InputStream is) throws IOException; 24 | Topology readTopology(File file) throws IOException; 25 | Topology readTopology(String string) throws IOException; 26 | } 27 | -------------------------------------------------------------------------------- /src/test/java/com/twitter/crunch/tools/jsontopology/TopologySerializer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Twitter, Inc. 3 | * Licensed under the Apache License, Version 2.0 (the "License"); 4 | * you may not use this file except in compliance with the License. 5 | * You may obtain a copy of the License at 6 | * 7 | * http://www.apache.org/licenses/LICENSE-2.0 8 | * 9 | * Unless required by applicable law or agreed to in writing, software 10 | * distributed under the License is distributed on an "AS IS" BASIS, 11 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | * See the License for the specific language governing permissions and 13 | * limitations under the License. 14 | */ 15 | 16 | package com.twitter.crunch.tools.jsontopology; 17 | 18 | import java.io.IOException; 19 | import java.io.OutputStream; 20 | 21 | public interface TopologySerializer { 22 | void writeTopology(Topology topology, OutputStream os) throws IOException; 23 | void writeTopology(Topology topology, String path) throws IOException; 24 | } 25 | -------------------------------------------------------------------------------- /src/test/resources/logback-test.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | libcrunch.log 4 | 6 | 7 | %d{HH:mm:ss.SSS} [%thread] %-5level %logger{36} - %msg%n 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | -------------------------------------------------------------------------------- /src/test/resources/topology.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "my-cluster", 3 | "type": 0, 4 | "selection": "STRAW", 5 | "children": [ 6 | { 7 | "name": "dc1", 8 | "type": 1, 9 | "selection": "STRAW", 10 | "children": [ 11 | { 12 | "name": "dc1rack1", 13 | "type": 2, 14 | "selection": "STRAW", 15 | "children": [ 16 | { 17 | "name": "dc1rack1node1", 18 | "type": 5, 19 | "weight": 100 20 | }, 21 | { 22 | "name": "dc1rack1node2", 23 | "type": 5, 24 | "weight": 100 25 | } 26 | ] 27 | }, 28 | { 29 | "name": "dc1rack2", 30 | "type": 2, 31 | "selection": "STRAW", 32 | "children": [ 33 | { 34 | "name": "dc1rack2node1", 35 | "type": 5, 36 | "weight": 100 37 | }, 38 | { 39 | "name": "dc1rack2node2", 40 | "type": 5, 41 | "weight": 50 42 | } 43 | ] 44 | }, 45 | { 46 | "name": "dc1rack3", 47 | "type": 2, 48 | "selection": "STRAW", 49 | "children": [ 50 | { 51 | "name": "dc1rack3node1", 52 | "type": 5, 53 | "weight": 100 54 | }, 55 | { 56 | "name": "dc1rack3node2", 57 | "type": 5, 58 | "weight": 100 59 | } 60 | ] 61 | }, 62 | { 63 | "name": "dc1rack4", 64 | "type": 2, 65 | "selection": "STRAW", 66 | "children": [ 67 | { 68 | "name": "dc1rack4node1", 69 | "type": 5, 70 | "weight": 100 71 | }, 72 | { 73 | "name": "dc1rack4node2", 74 | "type": 5, 75 | "weight": 100 76 | } 77 | ] 78 | }, 79 | { 80 | "name": "dc1rack5", 81 | "type": 2, 82 | "selection": "STRAW", 83 | "children": [ 84 | { 85 | "name": "dc1rack5node1", 86 | "type": 5, 87 | "weight": 100 88 | }, 89 | { 90 | "name": "dc1rack5node2", 91 | "type": 5, 92 | "weight": 100 93 | } 94 | ] 95 | }, 96 | { 97 | "name": "dc1rack6", 98 | "type": 2, 99 | "selection": "STRAW", 100 | "children": [ 101 | { 102 | "name": "dc1rack6node1", 103 | "type": 5, 104 | "weight": 100 105 | }, 106 | { 107 | "name": "dc1rack6node2", 108 | "type": 5, 109 | "weight": 100 110 | } 111 | ] 112 | } 113 | ] 114 | }, 115 | { 116 | "name": "dc2", 117 | "type": 1, 118 | "selection": "STRAW", 119 | "children": [ 120 | { 121 | "name": "dc2rack1", 122 | "type": 2, 123 | "selection": "STRAW", 124 | "children": [ 125 | { 126 | "name": "dc2rack1node1", 127 | "type": 5, 128 | "weight": 100 129 | }, 130 | { 131 | "name": "dc2rack1node2", 132 | "type": 5, 133 | "weight": 75 134 | } 135 | ] 136 | }, 137 | { 138 | "name": "dc2rack2", 139 | "type": 2, 140 | "selection": "STRAW", 141 | "children": [ 142 | { 143 | "name": "dc2rack2node1", 144 | "type": 5, 145 | "weight": 25 146 | }, 147 | { 148 | "name": "dc2rack2node2", 149 | "type": 5, 150 | "weight": 25 151 | } 152 | ] 153 | } 154 | ] 155 | } 156 | ] 157 | } -------------------------------------------------------------------------------- /src/test/resources/topology.template.yaml: -------------------------------------------------------------------------------- 1 | target_balance_max: 0 2 | dump_detail_map: yes 3 | weight_balance_tries: 200 4 | weight_balance_factor: 0.1 5 | number_of_buckets: 50000 6 | replica_distribution_factor: 7 7 | replication_factor: 3 8 | replica_distribution_factor_min: 5 9 | replica_distribution_factor_max: 7 10 | machine_list: --------------------------------------------------------------------------------