├── .classpath ├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── LICENSE.md ├── README.md ├── cardinput.txt ├── conf └── log4j.properties ├── scripts ├── mapper.pl ├── mapper.py ├── mapper.rb ├── reducer.pl ├── reducer.py └── reducer.rb └── src ├── CardDriver.java ├── CardMapper.java └── CardTotalReducer.java /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | 16 | 17 | 18 | 19 | 20 | 21 | 22 | 23 | 24 | 25 | 26 | 27 | 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | bin/UnoDriver.class 2 | bin/UnoMapper.class 3 | bin/UnoTotalReducer.class 4 | bin 5 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | Card 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | eclipse.preferences.version=1 2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7 4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 5 | org.eclipse.jdt.core.compiler.compliance=1.7 6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 11 | org.eclipse.jdt.core.compiler.source=1.7 12 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, and 10 | distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright 13 | owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities 16 | that control, are controlled by, or are under common control with that entity. 17 | For the purposes of this definition, "control" means (i) the power, direct or 18 | indirect, to cause the direction or management of such entity, whether by 19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the 20 | outstanding shares, or (iii) beneficial ownership of such entity. 21 | 22 | "You" (or "Your") shall mean an individual or Legal Entity exercising 23 | permissions granted by this License. 24 | 25 | "Source" form shall mean the preferred form for making modifications, including 26 | but not limited to software source code, documentation source, and configuration 27 | files. 28 | 29 | "Object" form shall mean any form resulting from mechanical transformation or 30 | translation of a Source form, including but not limited to compiled object code, 31 | generated documentation, and conversions to other media types. 32 | 33 | "Work" shall mean the work of authorship, whether in Source or Object form, made 34 | available under the License, as indicated by a copyright notice that is included 35 | in or attached to the work (an example is provided in the Appendix below). 36 | 37 | "Derivative Works" shall mean any work, whether in Source or Object form, that 38 | is based on (or derived from) the Work and for which the editorial revisions, 39 | annotations, elaborations, or other modifications represent, as a whole, an 40 | original work of authorship. For the purposes of this License, Derivative Works 41 | shall not include works that remain separable from, or merely link (or bind by 42 | name) to the interfaces of, the Work and Derivative Works thereof. 43 | 44 | "Contribution" shall mean any work of authorship, including the original version 45 | of the Work and any modifications or additions to that Work or Derivative Works 46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work 47 | by the copyright owner or by an individual or Legal Entity authorized to submit 48 | on behalf of the copyright owner. For the purposes of this definition, 49 | "submitted" means any form of electronic, verbal, or written communication sent 50 | to the Licensor or its representatives, including but not limited to 51 | communication on electronic mailing lists, source code control systems, and 52 | issue tracking systems that are managed by, or on behalf of, the Licensor for 53 | the purpose of discussing and improving the Work, but excluding communication 54 | that is conspicuously marked or otherwise designated in writing by the copyright 55 | owner as "Not a Contribution." 56 | 57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf 58 | of whom a Contribution has been received by Licensor and subsequently 59 | incorporated within the Work. 60 | 61 | 2. Grant of Copyright License. 62 | 63 | Subject to the terms and conditions of this License, each Contributor hereby 64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 65 | irrevocable copyright license to reproduce, prepare Derivative Works of, 66 | publicly display, publicly perform, sublicense, and distribute the Work and such 67 | Derivative Works in Source or Object form. 68 | 69 | 3. Grant of Patent License. 70 | 71 | Subject to the terms and conditions of this License, each Contributor hereby 72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, 73 | irrevocable (except as stated in this section) patent license to make, have 74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where 75 | such license applies only to those patent claims licensable by such Contributor 76 | that are necessarily infringed by their Contribution(s) alone or by combination 77 | of their Contribution(s) with the Work to which such Contribution(s) was 78 | submitted. If You institute patent litigation against any entity (including a 79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a 80 | Contribution incorporated within the Work constitutes direct or contributory 81 | patent infringement, then any patent licenses granted to You under this License 82 | for that Work shall terminate as of the date such litigation is filed. 83 | 84 | 4. Redistribution. 85 | 86 | You may reproduce and distribute copies of the Work or Derivative Works thereof 87 | in any medium, with or without modifications, and in Source or Object form, 88 | provided that You meet the following conditions: 89 | 90 | You must give any other recipients of the Work or Derivative Works a copy of 91 | this License; and 92 | You must cause any modified files to carry prominent notices stating that You 93 | changed the files; and 94 | You must retain, in the Source form of any Derivative Works that You distribute, 95 | all copyright, patent, trademark, and attribution notices from the Source form 96 | of the Work, excluding those notices that do not pertain to any part of the 97 | Derivative Works; and 98 | If the Work includes a "NOTICE" text file as part of its distribution, then any 99 | Derivative Works that You distribute must include a readable copy of the 100 | attribution notices contained within such NOTICE file, excluding those notices 101 | that do not pertain to any part of the Derivative Works, in at least one of the 102 | following places: within a NOTICE text file distributed as part of the 103 | Derivative Works; within the Source form or documentation, if provided along 104 | with the Derivative Works; or, within a display generated by the Derivative 105 | Works, if and wherever such third-party notices normally appear. The contents of 106 | the NOTICE file are for informational purposes only and do not modify the 107 | License. You may add Your own attribution notices within Derivative Works that 108 | You distribute, alongside or as an addendum to the NOTICE text from the Work, 109 | provided that such additional attribution notices cannot be construed as 110 | modifying the License. 111 | You may add Your own copyright statement to Your modifications and may provide 112 | additional or different license terms and conditions for use, reproduction, or 113 | distribution of Your modifications, or for any such Derivative Works as a whole, 114 | provided Your use, reproduction, and distribution of the Work otherwise complies 115 | with the conditions stated in this License. 116 | 117 | 5. Submission of Contributions. 118 | 119 | Unless You explicitly state otherwise, any Contribution intentionally submitted 120 | for inclusion in the Work by You to the Licensor shall be under the terms and 121 | conditions of this License, without any additional terms or conditions. 122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of 123 | any separate license agreement you may have executed with Licensor regarding 124 | such Contributions. 125 | 126 | 6. Trademarks. 127 | 128 | This License does not grant permission to use the trade names, trademarks, 129 | service marks, or product names of the Licensor, except as required for 130 | reasonable and customary use in describing the origin of the Work and 131 | reproducing the content of the NOTICE file. 132 | 133 | 7. Disclaimer of Warranty. 134 | 135 | Unless required by applicable law or agreed to in writing, Licensor provides the 136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, 137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, 138 | including, without limitation, any warranties or conditions of TITLE, 139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are 140 | solely responsible for determining the appropriateness of using or 141 | redistributing the Work and assume any risks associated with Your exercise of 142 | permissions under this License. 143 | 144 | 8. Limitation of Liability. 145 | 146 | In no event and under no legal theory, whether in tort (including negligence), 147 | contract, or otherwise, unless required by applicable law (such as deliberate 148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be 149 | liable to You for damages, including any direct, indirect, special, incidental, 150 | or consequential damages of any character arising as a result of this License or 151 | out of the use or inability to use the Work (including but not limited to 152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or 153 | any and all other commercial damages or losses), even if such Contributor has 154 | been advised of the possibility of such damages. 155 | 156 | 9. Accepting Warranty or Additional Liability. 157 | 158 | While redistributing the Work or Derivative Works thereof, You may choose to 159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or 160 | other liability obligations and/or rights consistent with this License. However, 161 | in accepting such obligations, You may act only on Your own behalf and on Your 162 | sole responsibility, not on behalf of any other Contributor, and only if You 163 | agree to indemnify, defend, and hold each Contributor harmless for any liability 164 | incurred by, or claims asserted against, such Contributor by reason of your 165 | accepting any such warranty or additional liability. 166 | 167 | END OF TERMS AND CONDITIONS 168 | 169 | APPENDIX: How to apply the Apache License to your work 170 | 171 | To apply the Apache License to your work, attach the following boilerplate 172 | notice, with the fields enclosed by brackets "[]" replaced with your own 173 | identifying information. (Don't include the brackets!) The text should be 174 | enclosed in the appropriate comment syntax for the file format. We also 175 | recommend that a file or class name and description of purpose be included on 176 | the same "printed page" as the copyright notice for easier identification within 177 | third-party archives. 178 | 179 | Copyright 2013 Jesse Anderson 180 | 181 | Licensed under the Apache License, Version 2.0 (the "License"); 182 | you may not use this file except in compliance with the License. 183 | You may obtain a copy of the License at 184 | 185 | http://www.apache.org/licenses/LICENSE-2.0 186 | 187 | Unless required by applicable law or agreed to in writing, software 188 | distributed under the License is distributed on an "AS IS" BASIS, 189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 190 | See the License for the specific language governing permissions and 191 | limitations under the License. 192 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | Playing Card Example 2 | ========== 3 | 4 | Hadoop MapReduce example that uses regular playing cards to explain how mapping and reducing works. 5 | 6 | This is the example code for the first and third episodes of the [Hadoop MapReduce screencast](http://pragprog.com/screencasts/v-jamapr/processing-big-data-with-mapreduce). 7 | 8 | Licence 9 | ====== 10 | Copyright 2013 Jesse Anderson 11 | 12 | Licensed under the Apache License, Version 2.0 (the "License"); 13 | you may not use this file except in compliance with the License. 14 | You may obtain a copy of the License at 15 | 16 | http://www.apache.org/licenses/LICENSE-2.0 17 | 18 | Unless required by applicable law or agreed to in writing, software 19 | distributed under the License is distributed on an "AS IS" BASIS, 20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 21 | See the License for the specific language governing permissions and 22 | limitations under the License. 23 | -------------------------------------------------------------------------------- /cardinput.txt: -------------------------------------------------------------------------------- 1 | DIAMONDS 3 2 | sPADes Jack 3 | hearts 3 4 | hearts 2 5 | hearts Ace 6 | JOKER JOKER 7 | DIAMONDS Queen 8 | spades 3 9 | hearts 6 10 | spades 10 11 | cLUbs 8 12 | HeARts 7 13 | SpAdEs 4 14 | Diamonds 6 15 | Diamonds 3 16 | HEARTS 8 17 | diamonds 7 18 | CLUBs 4 19 | Diamonds 7 20 | Spades 9 21 | clubS 7 22 | SPADES 5 23 | diamonds 10 24 | DiamONDs King 25 | -------------------------------------------------------------------------------- /conf/log4j.properties: -------------------------------------------------------------------------------- 1 | # Autogenerated by Cloudera SCM on Tue Apr 10 13:04:56 CDT 2012 2 | # Define some default values that can be overridden by system properties 3 | hadoop.root.logger=INFO,DRFA,console 4 | hadoop.log.dir=. 5 | hadoop.log.file=hadoop.log 6 | 7 | # Define the root logger to the system property "hadoop.root.logger". 8 | log4j.rootLogger=${hadoop.root.logger}, EventCounter 9 | 10 | # Logging Threshold 11 | log4j.threshhold=ALL 12 | 13 | # 14 | # console 15 | # This is left here because hadoop scripts use it if the environment variable 16 | # HADOOP_ROOT_LOGGER is not set 17 | # 18 | 19 | log4j.appender.console=org.apache.log4j.ConsoleAppender 20 | log4j.appender.console.target=System.err 21 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 22 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n 23 | 24 | 25 | # 26 | # Daily Rolling File Appender 27 | # 28 | 29 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender 30 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file} 31 | 32 | # Rollver at midnight 33 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd 34 | 35 | # 30-day backup 36 | #log4j.appender.DRFA.MaxBackupIndex=30 37 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout 38 | 39 | # Pattern format: Date LogLevel LoggerName LogMessage 40 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n 41 | # Debugging Pattern format 42 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n 43 | 44 | #======= 45 | # security audit logging 46 | 47 | security.audit.logger=INFO, console 48 | log4j.category.SecurityLogger=${security.audit.logger} 49 | log4j.additivity.SecurityLogger=false 50 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender 51 | log4j.appender.DRFAS.File=${hadoop.log.dir}/security/${hadoop.id.str}-auth.log 52 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout 53 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 54 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd 55 | 56 | # hdfs audit logging 57 | 58 | hdfs.audit.logger=INFO, console 59 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger} 60 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false 61 | log4j.appender.DRFAAUDIT=org.apache.log4j.DailyRollingFileAppender 62 | log4j.appender.DRFAAUDIT.File=${hadoop.log.dir}/audit/hdfs-audit.log 63 | log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout 64 | log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n 65 | log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd 66 | 67 | 68 | # 69 | # FSNamesystem Audit logging 70 | # All audit events are logged at INFO level 71 | # 72 | log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN 73 | 74 | # Jets3t library 75 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR 76 | 77 | # 78 | # Event Counter Appender 79 | # Sends counts of logging messages at different severity levels to Hadoop Metrics. 80 | # 81 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter 82 | 83 | -------------------------------------------------------------------------------- /scripts/mapper.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Jesse Anderson 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # To run in Hadoop cluster 18 | # hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming*.jar -input cardinput.txt -output output -mapper mapper.pl -reducer reducer.pl -file mapper.pl -file reducer.pl 19 | 20 | # To run without Hadoop for debugging purposes 21 | # hadoop fs -cat cardinput.txt | ./mapper.pl | sort | ./reducer.pl 22 | 23 | # Iterate through every line passed in to stdin 24 | while (<>) { 25 | chomp; 26 | 27 | if (/(.*) (\d+)/) { 28 | # Line matches regex, output to reducer 29 | print lc $1 . "\t$2\n"; 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /scripts/mapper.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2013 Jesse Anderson 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # To run in Hadoop cluster 18 | # hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming*.jar -input cardinput.txt -output output -mapper mapper.py -reducer reducer.py -file mapper.py -file reducer.py 19 | 20 | # To run without Hadoop for debugging purposes 21 | # hadoop fs -cat cardinput.txt | ./mapper.py | sort | ./reducer.py 22 | import re 23 | import sys 24 | 25 | # Create regular expression to catch bad data 26 | cardinput = re.compile("(.*) (\d+)") 27 | 28 | # Iterate through every line passed in to stdin 29 | for input in sys.stdin.readlines(): 30 | match = cardinput.match(input) 31 | 32 | if match: 33 | # Line matches regex, output to reducer 34 | print match.group(1).lower() + '\t' + match.group(2) 35 | -------------------------------------------------------------------------------- /scripts/mapper.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | # Copyright 2013 Jesse Anderson 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # To run in Hadoop cluster 18 | # hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming*.jar -input cardinput.txt -output output -mapper mapper.rb -reducer reducer.rb -file mapper.rb -file reducer.rb 19 | 20 | # To run without Hadoop for debugging purposes 21 | # hadoop fs -cat cardinput.txt | ./mapper.rb | sort | ./reducer.rb 22 | 23 | # Iterate through every line passed in to stdin 24 | ARGF.each do |line| 25 | line = line.chomp 26 | 27 | if match = line.match(/(.*) (\d+)/) 28 | # Line matches regex, output to reducer 29 | key, value = match.captures 30 | 31 | puts key.downcase + "\t" + value 32 | end 33 | end 34 | -------------------------------------------------------------------------------- /scripts/reducer.pl: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env perl 2 | 3 | # Copyright 2013 Jesse Anderson 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Initialize variable to keep track of sums for a suit 18 | my $cardsum = 0; 19 | 20 | # Start off with cardsuit as undef 21 | # We have to keep track of the key ourselves 22 | my $cardsuit = undef; 23 | 24 | # Iterate through every line passed in to stdin 25 | # Will be key followed by tab then value 26 | while (<>) { 27 | chomp; 28 | 29 | # Split the line to key and value based on the tab 30 | @parts = split(/\t/); 31 | 32 | if (length(@parts) != 1) { 33 | print "Too many parts " . length(@parts) . " Parts:" . @parts[0] . ":" . @parts[1]; 34 | next; 35 | } 36 | 37 | # Set the variables up to make them easier to read 38 | $newcardsuit = @parts[0]; 39 | $cardnum = @parts[1]; 40 | 41 | if (!defined($cardsuit)) { 42 | # cardsuit not set yet, set it because we keep track of the key 43 | $cardsuit = $newcardsuit; 44 | } 45 | 46 | if ($cardsuit ne $newcardsuit) { 47 | # New cardsuit came in, output the previous suit and sum 48 | print $cardsuit . "\t" . $cardsum . "\n"; 49 | # Set the new key because we keep track of the key 50 | $cardsuit = $newcardsuit; 51 | # Set the sum to 0 because there is a new key 52 | $cardsum = 0; 53 | } 54 | 55 | # Add the new card number to the existing card sum 56 | $cardsum = $cardsum + $cardnum; 57 | } 58 | 59 | if (defined($cardsuit)) { 60 | # If a cardsuit was found, output the last key's data before exit 61 | print $cardsuit . "\t" . $cardsum . "\n"; 62 | } 63 | -------------------------------------------------------------------------------- /scripts/reducer.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | # Copyright 2013 Jesse Anderson 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | import sys 18 | 19 | # Initialize variable to keep track of sums for a suit 20 | cardsum = 0 21 | 22 | # Start off with cardsuit as None 23 | # We have to keep track of the key ourselves 24 | cardsuit = None 25 | 26 | # Iterate through every line passed in to stdin 27 | # Will be key followed by tab then value 28 | for input in sys.stdin.readlines(): 29 | input = input.rstrip() 30 | 31 | # Split the line to key and value based on the tab 32 | parts = input.split("\t") 33 | 34 | if len(parts) != 2: 35 | continue 36 | 37 | # Set the variables up to make them easier to read 38 | newcardsuit=parts[0] 39 | cardnum=int(parts[1]) 40 | 41 | if not cardsuit: 42 | # cardsuit not set yet, set it because we keep track of the key 43 | cardsuit = newcardsuit 44 | 45 | if cardsuit != newcardsuit: 46 | # New cardsuit came in, output the previous suit and sum 47 | print cardsuit + "\t" + str(cardsum) 48 | # Set the new key because we keep track of the key 49 | cardsuit = newcardsuit; 50 | # Set the sum to 0 because there is a new key 51 | cardsum = 0 52 | 53 | # Add the new card number to the existing card sum 54 | cardsum = cardsum + cardnum 55 | 56 | if cardsuit != None: 57 | # If a cardsuit was found, output the last key's data before exit 58 | print cardsuit + "\t" + str(cardsum) 59 | -------------------------------------------------------------------------------- /scripts/reducer.rb: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env ruby 2 | 3 | # Copyright 2013 Jesse Anderson 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | # Initialize variable to keep track of sums for a suit 18 | cardsum = 0 19 | 20 | # Start off with cardsuit as nil 21 | # We have to keep track of the key ourselves 22 | cardsuit = nil 23 | 24 | # Iterate through every line passed in to stdin 25 | # Will be key followed by tab then value 26 | ARGF.each do |line| 27 | line = line.chomp 28 | 29 | # Split the line to key and value based on the tab 30 | parts = line.split("\t") 31 | 32 | if parts.length != 2 33 | next 34 | end 35 | 36 | # Set the variables up to make them easier to read 37 | newcardsuit=parts[0] 38 | cardnum=Integer(parts[1]) 39 | 40 | if cardsuit.nil? 41 | # cardsuit not set yet, set it because we keep track of the key 42 | cardsuit = newcardsuit 43 | end 44 | 45 | if cardsuit != newcardsuit 46 | # New cardsuit came in, output the previous suit and sum 47 | puts cardsuit + "\t" + cardsum.to_s() 48 | # Set the new key because we keep track of the key 49 | cardsuit = newcardsuit 50 | # Set the sum to 0 because there is a new key 51 | cardsum = 0 52 | end 53 | 54 | # Add the new card number to the existing card sum 55 | cardsum = cardsum + cardnum 56 | end 57 | 58 | if cardsuit.nil? == false: 59 | # If a cardsuit was found, output the last key's data before exit 60 | puts cardsuit + "\t" + cardsum.to_s() 61 | end 62 | -------------------------------------------------------------------------------- /src/CardDriver.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Jesse Anderson 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import org.apache.hadoop.conf.Configured; 18 | import org.apache.hadoop.fs.Path; 19 | import org.apache.hadoop.io.IntWritable; 20 | import org.apache.hadoop.io.Text; 21 | import org.apache.hadoop.mapreduce.Job; 22 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 23 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 24 | import org.apache.hadoop.util.Tool; 25 | import org.apache.hadoop.util.ToolRunner; 26 | 27 | public class CardDriver extends Configured implements Tool { 28 | 29 | @Override 30 | public int run(String[] args) throws Exception { 31 | String input, output; 32 | if (args.length == 2) { 33 | input = args[0]; 34 | output = args[1]; 35 | } else { 36 | System.err.println("Incorrect number of arguments. Expected: input output"); 37 | return -1; 38 | } 39 | 40 | Job job = new Job(getConf()); 41 | job.setJarByClass(CardDriver.class); 42 | job.setJobName(this.getClass().getName()); 43 | 44 | FileInputFormat.setInputPaths(job, new Path(input)); 45 | FileOutputFormat.setOutputPath(job, new Path(output)); 46 | 47 | job.setMapperClass(CardMapper.class); 48 | job.setReducerClass(CardTotalReducer.class); 49 | 50 | job.setMapOutputKeyClass(Text.class); 51 | job.setMapOutputValueClass(IntWritable.class); 52 | 53 | job.setOutputKeyClass(Text.class); 54 | job.setOutputValueClass(IntWritable.class); 55 | 56 | boolean success = job.waitForCompletion(true); 57 | return success ? 0 : 1; 58 | } 59 | 60 | public static void main(String[] args) throws Exception { 61 | CardDriver driver = new CardDriver(); 62 | int exitCode = ToolRunner.run(driver, args); 63 | System.exit(exitCode); 64 | } 65 | } -------------------------------------------------------------------------------- /src/CardMapper.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Jesse Anderson 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import java.io.IOException; 18 | import java.util.regex.Matcher; 19 | import java.util.regex.Pattern; 20 | 21 | import org.apache.hadoop.io.IntWritable; 22 | import org.apache.hadoop.io.LongWritable; 23 | import org.apache.hadoop.io.Text; 24 | import org.apache.hadoop.mapreduce.Mapper; 25 | 26 | public class CardMapper extends Mapper { 27 | 28 | private static Pattern inputPattern = Pattern.compile("(.*) (\\d*)"); 29 | 30 | @Override 31 | public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 32 | String inputLine = value.toString(); 33 | 34 | Matcher inputMatch = inputPattern.matcher(inputLine); 35 | 36 | // Use regex to throw out Jacks, Queens, Kings, Aces and Jokers 37 | if (inputMatch.matches()) { 38 | // Normalize inconsistent case for card suits 39 | String cardSuit = inputMatch.group(1).toLowerCase(); 40 | int cardValue = Integer.parseInt(inputMatch.group(2)); 41 | 42 | context.write(new Text(cardSuit), new IntWritable(cardValue)); 43 | } 44 | } 45 | } -------------------------------------------------------------------------------- /src/CardTotalReducer.java: -------------------------------------------------------------------------------- 1 | /** 2 | * Copyright 2013 Jesse Anderson 3 | * 4 | * Licensed under the Apache License, Version 2.0 (the "License"); 5 | * you may not use this file except in compliance with the License. 6 | * You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | * See the License for the specific language governing permissions and 14 | * limitations under the License. 15 | */ 16 | 17 | import java.io.IOException; 18 | 19 | import org.apache.hadoop.io.IntWritable; 20 | import org.apache.hadoop.io.Text; 21 | import org.apache.hadoop.mapreduce.Reducer; 22 | 23 | public class CardTotalReducer extends Reducer { 24 | 25 | @Override 26 | public void reduce(Text key, Iterable values, Context context) throws IOException, 27 | InterruptedException { 28 | int sum = 0; 29 | 30 | // Go through all values to sum up card values for a card suit 31 | for (IntWritable value : values) { 32 | sum += value.get(); 33 | } 34 | 35 | context.write(key, new IntWritable(sum)); 36 | } 37 | } 38 | --------------------------------------------------------------------------------