├── .classpath
├── .gitignore
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── LICENSE.md
├── README.md
├── cardinput.txt
├── conf
└── log4j.properties
├── scripts
├── mapper.pl
├── mapper.py
├── mapper.rb
├── reducer.pl
├── reducer.py
└── reducer.rb
└── src
├── CardDriver.java
├── CardMapper.java
└── CardTotalReducer.java
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
81 |
82 |
83 |
84 |
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | bin/UnoDriver.class
2 | bin/UnoMapper.class
3 | bin/UnoTotalReducer.class
4 | bin
5 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | Card
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | eclipse.preferences.version=1
2 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
3 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.7
4 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
5 | org.eclipse.jdt.core.compiler.compliance=1.7
6 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
7 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
8 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
9 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
10 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
11 | org.eclipse.jdt.core.compiler.source=1.7
12 |
--------------------------------------------------------------------------------
/LICENSE.md:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction, and
10 | distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by the copyright
13 | owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all other entities
16 | that control, are controlled by, or are under common control with that entity.
17 | For the purposes of this definition, "control" means (i) the power, direct or
18 | indirect, to cause the direction or management of such entity, whether by
19 | contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the
20 | outstanding shares, or (iii) beneficial ownership of such entity.
21 |
22 | "You" (or "Your") shall mean an individual or Legal Entity exercising
23 | permissions granted by this License.
24 |
25 | "Source" form shall mean the preferred form for making modifications, including
26 | but not limited to software source code, documentation source, and configuration
27 | files.
28 |
29 | "Object" form shall mean any form resulting from mechanical transformation or
30 | translation of a Source form, including but not limited to compiled object code,
31 | generated documentation, and conversions to other media types.
32 |
33 | "Work" shall mean the work of authorship, whether in Source or Object form, made
34 | available under the License, as indicated by a copyright notice that is included
35 | in or attached to the work (an example is provided in the Appendix below).
36 |
37 | "Derivative Works" shall mean any work, whether in Source or Object form, that
38 | is based on (or derived from) the Work and for which the editorial revisions,
39 | annotations, elaborations, or other modifications represent, as a whole, an
40 | original work of authorship. For the purposes of this License, Derivative Works
41 | shall not include works that remain separable from, or merely link (or bind by
42 | name) to the interfaces of, the Work and Derivative Works thereof.
43 |
44 | "Contribution" shall mean any work of authorship, including the original version
45 | of the Work and any modifications or additions to that Work or Derivative Works
46 | thereof, that is intentionally submitted to Licensor for inclusion in the Work
47 | by the copyright owner or by an individual or Legal Entity authorized to submit
48 | on behalf of the copyright owner. For the purposes of this definition,
49 | "submitted" means any form of electronic, verbal, or written communication sent
50 | to the Licensor or its representatives, including but not limited to
51 | communication on electronic mailing lists, source code control systems, and
52 | issue tracking systems that are managed by, or on behalf of, the Licensor for
53 | the purpose of discussing and improving the Work, but excluding communication
54 | that is conspicuously marked or otherwise designated in writing by the copyright
55 | owner as "Not a Contribution."
56 |
57 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf
58 | of whom a Contribution has been received by Licensor and subsequently
59 | incorporated within the Work.
60 |
61 | 2. Grant of Copyright License.
62 |
63 | Subject to the terms and conditions of this License, each Contributor hereby
64 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
65 | irrevocable copyright license to reproduce, prepare Derivative Works of,
66 | publicly display, publicly perform, sublicense, and distribute the Work and such
67 | Derivative Works in Source or Object form.
68 |
69 | 3. Grant of Patent License.
70 |
71 | Subject to the terms and conditions of this License, each Contributor hereby
72 | grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free,
73 | irrevocable (except as stated in this section) patent license to make, have
74 | made, use, offer to sell, sell, import, and otherwise transfer the Work, where
75 | such license applies only to those patent claims licensable by such Contributor
76 | that are necessarily infringed by their Contribution(s) alone or by combination
77 | of their Contribution(s) with the Work to which such Contribution(s) was
78 | submitted. If You institute patent litigation against any entity (including a
79 | cross-claim or counterclaim in a lawsuit) alleging that the Work or a
80 | Contribution incorporated within the Work constitutes direct or contributory
81 | patent infringement, then any patent licenses granted to You under this License
82 | for that Work shall terminate as of the date such litigation is filed.
83 |
84 | 4. Redistribution.
85 |
86 | You may reproduce and distribute copies of the Work or Derivative Works thereof
87 | in any medium, with or without modifications, and in Source or Object form,
88 | provided that You meet the following conditions:
89 |
90 | You must give any other recipients of the Work or Derivative Works a copy of
91 | this License; and
92 | You must cause any modified files to carry prominent notices stating that You
93 | changed the files; and
94 | You must retain, in the Source form of any Derivative Works that You distribute,
95 | all copyright, patent, trademark, and attribution notices from the Source form
96 | of the Work, excluding those notices that do not pertain to any part of the
97 | Derivative Works; and
98 | If the Work includes a "NOTICE" text file as part of its distribution, then any
99 | Derivative Works that You distribute must include a readable copy of the
100 | attribution notices contained within such NOTICE file, excluding those notices
101 | that do not pertain to any part of the Derivative Works, in at least one of the
102 | following places: within a NOTICE text file distributed as part of the
103 | Derivative Works; within the Source form or documentation, if provided along
104 | with the Derivative Works; or, within a display generated by the Derivative
105 | Works, if and wherever such third-party notices normally appear. The contents of
106 | the NOTICE file are for informational purposes only and do not modify the
107 | License. You may add Your own attribution notices within Derivative Works that
108 | You distribute, alongside or as an addendum to the NOTICE text from the Work,
109 | provided that such additional attribution notices cannot be construed as
110 | modifying the License.
111 | You may add Your own copyright statement to Your modifications and may provide
112 | additional or different license terms and conditions for use, reproduction, or
113 | distribution of Your modifications, or for any such Derivative Works as a whole,
114 | provided Your use, reproduction, and distribution of the Work otherwise complies
115 | with the conditions stated in this License.
116 |
117 | 5. Submission of Contributions.
118 |
119 | Unless You explicitly state otherwise, any Contribution intentionally submitted
120 | for inclusion in the Work by You to the Licensor shall be under the terms and
121 | conditions of this License, without any additional terms or conditions.
122 | Notwithstanding the above, nothing herein shall supersede or modify the terms of
123 | any separate license agreement you may have executed with Licensor regarding
124 | such Contributions.
125 |
126 | 6. Trademarks.
127 |
128 | This License does not grant permission to use the trade names, trademarks,
129 | service marks, or product names of the Licensor, except as required for
130 | reasonable and customary use in describing the origin of the Work and
131 | reproducing the content of the NOTICE file.
132 |
133 | 7. Disclaimer of Warranty.
134 |
135 | Unless required by applicable law or agreed to in writing, Licensor provides the
136 | Work (and each Contributor provides its Contributions) on an "AS IS" BASIS,
137 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied,
138 | including, without limitation, any warranties or conditions of TITLE,
139 | NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are
140 | solely responsible for determining the appropriateness of using or
141 | redistributing the Work and assume any risks associated with Your exercise of
142 | permissions under this License.
143 |
144 | 8. Limitation of Liability.
145 |
146 | In no event and under no legal theory, whether in tort (including negligence),
147 | contract, or otherwise, unless required by applicable law (such as deliberate
148 | and grossly negligent acts) or agreed to in writing, shall any Contributor be
149 | liable to You for damages, including any direct, indirect, special, incidental,
150 | or consequential damages of any character arising as a result of this License or
151 | out of the use or inability to use the Work (including but not limited to
152 | damages for loss of goodwill, work stoppage, computer failure or malfunction, or
153 | any and all other commercial damages or losses), even if such Contributor has
154 | been advised of the possibility of such damages.
155 |
156 | 9. Accepting Warranty or Additional Liability.
157 |
158 | While redistributing the Work or Derivative Works thereof, You may choose to
159 | offer, and charge a fee for, acceptance of support, warranty, indemnity, or
160 | other liability obligations and/or rights consistent with this License. However,
161 | in accepting such obligations, You may act only on Your own behalf and on Your
162 | sole responsibility, not on behalf of any other Contributor, and only if You
163 | agree to indemnify, defend, and hold each Contributor harmless for any liability
164 | incurred by, or claims asserted against, such Contributor by reason of your
165 | accepting any such warranty or additional liability.
166 |
167 | END OF TERMS AND CONDITIONS
168 |
169 | APPENDIX: How to apply the Apache License to your work
170 |
171 | To apply the Apache License to your work, attach the following boilerplate
172 | notice, with the fields enclosed by brackets "[]" replaced with your own
173 | identifying information. (Don't include the brackets!) The text should be
174 | enclosed in the appropriate comment syntax for the file format. We also
175 | recommend that a file or class name and description of purpose be included on
176 | the same "printed page" as the copyright notice for easier identification within
177 | third-party archives.
178 |
179 | Copyright 2013 Jesse Anderson
180 |
181 | Licensed under the Apache License, Version 2.0 (the "License");
182 | you may not use this file except in compliance with the License.
183 | You may obtain a copy of the License at
184 |
185 | http://www.apache.org/licenses/LICENSE-2.0
186 |
187 | Unless required by applicable law or agreed to in writing, software
188 | distributed under the License is distributed on an "AS IS" BASIS,
189 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
190 | See the License for the specific language governing permissions and
191 | limitations under the License.
192 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | Playing Card Example
2 | ==========
3 |
4 | Hadoop MapReduce example that uses regular playing cards to explain how mapping and reducing works.
5 |
6 | This is the example code for the first and third episodes of the [Hadoop MapReduce screencast](http://pragprog.com/screencasts/v-jamapr/processing-big-data-with-mapreduce).
7 |
8 | Licence
9 | ======
10 | Copyright 2013 Jesse Anderson
11 |
12 | Licensed under the Apache License, Version 2.0 (the "License");
13 | you may not use this file except in compliance with the License.
14 | You may obtain a copy of the License at
15 |
16 | http://www.apache.org/licenses/LICENSE-2.0
17 |
18 | Unless required by applicable law or agreed to in writing, software
19 | distributed under the License is distributed on an "AS IS" BASIS,
20 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
21 | See the License for the specific language governing permissions and
22 | limitations under the License.
23 |
--------------------------------------------------------------------------------
/cardinput.txt:
--------------------------------------------------------------------------------
1 | DIAMONDS 3
2 | sPADes Jack
3 | hearts 3
4 | hearts 2
5 | hearts Ace
6 | JOKER JOKER
7 | DIAMONDS Queen
8 | spades 3
9 | hearts 6
10 | spades 10
11 | cLUbs 8
12 | HeARts 7
13 | SpAdEs 4
14 | Diamonds 6
15 | Diamonds 3
16 | HEARTS 8
17 | diamonds 7
18 | CLUBs 4
19 | Diamonds 7
20 | Spades 9
21 | clubS 7
22 | SPADES 5
23 | diamonds 10
24 | DiamONDs King
25 |
--------------------------------------------------------------------------------
/conf/log4j.properties:
--------------------------------------------------------------------------------
1 | # Autogenerated by Cloudera SCM on Tue Apr 10 13:04:56 CDT 2012
2 | # Define some default values that can be overridden by system properties
3 | hadoop.root.logger=INFO,DRFA,console
4 | hadoop.log.dir=.
5 | hadoop.log.file=hadoop.log
6 |
7 | # Define the root logger to the system property "hadoop.root.logger".
8 | log4j.rootLogger=${hadoop.root.logger}, EventCounter
9 |
10 | # Logging Threshold
11 | log4j.threshhold=ALL
12 |
13 | #
14 | # console
15 | # This is left here because hadoop scripts use it if the environment variable
16 | # HADOOP_ROOT_LOGGER is not set
17 | #
18 |
19 | log4j.appender.console=org.apache.log4j.ConsoleAppender
20 | log4j.appender.console.target=System.err
21 | log4j.appender.console.layout=org.apache.log4j.PatternLayout
22 | log4j.appender.console.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss} %p %c{2}: %m%n
23 |
24 |
25 | #
26 | # Daily Rolling File Appender
27 | #
28 |
29 | log4j.appender.DRFA=org.apache.log4j.DailyRollingFileAppender
30 | log4j.appender.DRFA.File=${hadoop.log.dir}/${hadoop.log.file}
31 |
32 | # Rollver at midnight
33 | log4j.appender.DRFA.DatePattern=.yyyy-MM-dd
34 |
35 | # 30-day backup
36 | #log4j.appender.DRFA.MaxBackupIndex=30
37 | log4j.appender.DRFA.layout=org.apache.log4j.PatternLayout
38 |
39 | # Pattern format: Date LogLevel LoggerName LogMessage
40 | log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %p %c: %m%n
41 | # Debugging Pattern format
42 | #log4j.appender.DRFA.layout.ConversionPattern=%d{ISO8601} %-5p %c{2} (%F:%M(%L)) - %m%n
43 |
44 | #=======
45 | # security audit logging
46 |
47 | security.audit.logger=INFO, console
48 | log4j.category.SecurityLogger=${security.audit.logger}
49 | log4j.additivity.SecurityLogger=false
50 | log4j.appender.DRFAS=org.apache.log4j.DailyRollingFileAppender
51 | log4j.appender.DRFAS.File=${hadoop.log.dir}/security/${hadoop.id.str}-auth.log
52 | log4j.appender.DRFAS.layout=org.apache.log4j.PatternLayout
53 | log4j.appender.DRFAS.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
54 | log4j.appender.DRFAS.DatePattern=.yyyy-MM-dd
55 |
56 | # hdfs audit logging
57 |
58 | hdfs.audit.logger=INFO, console
59 | log4j.logger.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=${hdfs.audit.logger}
60 | log4j.additivity.org.apache.hadoop.hdfs.server.namenode.FSNamesystem.audit=false
61 | log4j.appender.DRFAAUDIT=org.apache.log4j.DailyRollingFileAppender
62 | log4j.appender.DRFAAUDIT.File=${hadoop.log.dir}/audit/hdfs-audit.log
63 | log4j.appender.DRFAAUDIT.layout=org.apache.log4j.PatternLayout
64 | log4j.appender.DRFAAUDIT.layout.ConversionPattern=%d{ISO8601} %p %c{2}: %m%n
65 | log4j.appender.DRFAAUDIT.DatePattern=.yyyy-MM-dd
66 |
67 |
68 | #
69 | # FSNamesystem Audit logging
70 | # All audit events are logged at INFO level
71 | #
72 | log4j.logger.org.apache.hadoop.fs.FSNamesystem.audit=WARN
73 |
74 | # Jets3t library
75 | log4j.logger.org.jets3t.service.impl.rest.httpclient.RestS3Service=ERROR
76 |
77 | #
78 | # Event Counter Appender
79 | # Sends counts of logging messages at different severity levels to Hadoop Metrics.
80 | #
81 | log4j.appender.EventCounter=org.apache.hadoop.log.metrics.EventCounter
82 |
83 |
--------------------------------------------------------------------------------
/scripts/mapper.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2013 Jesse Anderson
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # To run in Hadoop cluster
18 | # hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming*.jar -input cardinput.txt -output output -mapper mapper.pl -reducer reducer.pl -file mapper.pl -file reducer.pl
19 |
20 | # To run without Hadoop for debugging purposes
21 | # hadoop fs -cat cardinput.txt | ./mapper.pl | sort | ./reducer.pl
22 |
23 | # Iterate through every line passed in to stdin
24 | while (<>) {
25 | chomp;
26 |
27 | if (/(.*) (\d+)/) {
28 | # Line matches regex, output to reducer
29 | print lc $1 . "\t$2\n";
30 | }
31 | }
32 |
--------------------------------------------------------------------------------
/scripts/mapper.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2013 Jesse Anderson
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # To run in Hadoop cluster
18 | # hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming*.jar -input cardinput.txt -output output -mapper mapper.py -reducer reducer.py -file mapper.py -file reducer.py
19 |
20 | # To run without Hadoop for debugging purposes
21 | # hadoop fs -cat cardinput.txt | ./mapper.py | sort | ./reducer.py
22 | import re
23 | import sys
24 |
25 | # Create regular expression to catch bad data
26 | cardinput = re.compile("(.*) (\d+)")
27 |
28 | # Iterate through every line passed in to stdin
29 | for input in sys.stdin.readlines():
30 | match = cardinput.match(input)
31 |
32 | if match:
33 | # Line matches regex, output to reducer
34 | print match.group(1).lower() + '\t' + match.group(2)
35 |
--------------------------------------------------------------------------------
/scripts/mapper.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | # Copyright 2013 Jesse Anderson
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # To run in Hadoop cluster
18 | # hadoop jar /usr/lib/hadoop-0.20-mapreduce/contrib/streaming/hadoop-streaming*.jar -input cardinput.txt -output output -mapper mapper.rb -reducer reducer.rb -file mapper.rb -file reducer.rb
19 |
20 | # To run without Hadoop for debugging purposes
21 | # hadoop fs -cat cardinput.txt | ./mapper.rb | sort | ./reducer.rb
22 |
23 | # Iterate through every line passed in to stdin
24 | ARGF.each do |line|
25 | line = line.chomp
26 |
27 | if match = line.match(/(.*) (\d+)/)
28 | # Line matches regex, output to reducer
29 | key, value = match.captures
30 |
31 | puts key.downcase + "\t" + value
32 | end
33 | end
34 |
--------------------------------------------------------------------------------
/scripts/reducer.pl:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env perl
2 |
3 | # Copyright 2013 Jesse Anderson
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Initialize variable to keep track of sums for a suit
18 | my $cardsum = 0;
19 |
20 | # Start off with cardsuit as undef
21 | # We have to keep track of the key ourselves
22 | my $cardsuit = undef;
23 |
24 | # Iterate through every line passed in to stdin
25 | # Will be key followed by tab then value
26 | while (<>) {
27 | chomp;
28 |
29 | # Split the line to key and value based on the tab
30 | @parts = split(/\t/);
31 |
32 | if (length(@parts) != 1) {
33 | print "Too many parts " . length(@parts) . " Parts:" . @parts[0] . ":" . @parts[1];
34 | next;
35 | }
36 |
37 | # Set the variables up to make them easier to read
38 | $newcardsuit = @parts[0];
39 | $cardnum = @parts[1];
40 |
41 | if (!defined($cardsuit)) {
42 | # cardsuit not set yet, set it because we keep track of the key
43 | $cardsuit = $newcardsuit;
44 | }
45 |
46 | if ($cardsuit ne $newcardsuit) {
47 | # New cardsuit came in, output the previous suit and sum
48 | print $cardsuit . "\t" . $cardsum . "\n";
49 | # Set the new key because we keep track of the key
50 | $cardsuit = $newcardsuit;
51 | # Set the sum to 0 because there is a new key
52 | $cardsum = 0;
53 | }
54 |
55 | # Add the new card number to the existing card sum
56 | $cardsum = $cardsum + $cardnum;
57 | }
58 |
59 | if (defined($cardsuit)) {
60 | # If a cardsuit was found, output the last key's data before exit
61 | print $cardsuit . "\t" . $cardsum . "\n";
62 | }
63 |
--------------------------------------------------------------------------------
/scripts/reducer.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/python
2 |
3 | # Copyright 2013 Jesse Anderson
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | import sys
18 |
19 | # Initialize variable to keep track of sums for a suit
20 | cardsum = 0
21 |
22 | # Start off with cardsuit as None
23 | # We have to keep track of the key ourselves
24 | cardsuit = None
25 |
26 | # Iterate through every line passed in to stdin
27 | # Will be key followed by tab then value
28 | for input in sys.stdin.readlines():
29 | input = input.rstrip()
30 |
31 | # Split the line to key and value based on the tab
32 | parts = input.split("\t")
33 |
34 | if len(parts) != 2:
35 | continue
36 |
37 | # Set the variables up to make them easier to read
38 | newcardsuit=parts[0]
39 | cardnum=int(parts[1])
40 |
41 | if not cardsuit:
42 | # cardsuit not set yet, set it because we keep track of the key
43 | cardsuit = newcardsuit
44 |
45 | if cardsuit != newcardsuit:
46 | # New cardsuit came in, output the previous suit and sum
47 | print cardsuit + "\t" + str(cardsum)
48 | # Set the new key because we keep track of the key
49 | cardsuit = newcardsuit;
50 | # Set the sum to 0 because there is a new key
51 | cardsum = 0
52 |
53 | # Add the new card number to the existing card sum
54 | cardsum = cardsum + cardnum
55 |
56 | if cardsuit != None:
57 | # If a cardsuit was found, output the last key's data before exit
58 | print cardsuit + "\t" + str(cardsum)
59 |
--------------------------------------------------------------------------------
/scripts/reducer.rb:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env ruby
2 |
3 | # Copyright 2013 Jesse Anderson
4 | #
5 | # Licensed under the Apache License, Version 2.0 (the "License");
6 | # you may not use this file except in compliance with the License.
7 | # You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # Initialize variable to keep track of sums for a suit
18 | cardsum = 0
19 |
20 | # Start off with cardsuit as nil
21 | # We have to keep track of the key ourselves
22 | cardsuit = nil
23 |
24 | # Iterate through every line passed in to stdin
25 | # Will be key followed by tab then value
26 | ARGF.each do |line|
27 | line = line.chomp
28 |
29 | # Split the line to key and value based on the tab
30 | parts = line.split("\t")
31 |
32 | if parts.length != 2
33 | next
34 | end
35 |
36 | # Set the variables up to make them easier to read
37 | newcardsuit=parts[0]
38 | cardnum=Integer(parts[1])
39 |
40 | if cardsuit.nil?
41 | # cardsuit not set yet, set it because we keep track of the key
42 | cardsuit = newcardsuit
43 | end
44 |
45 | if cardsuit != newcardsuit
46 | # New cardsuit came in, output the previous suit and sum
47 | puts cardsuit + "\t" + cardsum.to_s()
48 | # Set the new key because we keep track of the key
49 | cardsuit = newcardsuit
50 | # Set the sum to 0 because there is a new key
51 | cardsum = 0
52 | end
53 |
54 | # Add the new card number to the existing card sum
55 | cardsum = cardsum + cardnum
56 | end
57 |
58 | if cardsuit.nil? == false:
59 | # If a cardsuit was found, output the last key's data before exit
60 | puts cardsuit + "\t" + cardsum.to_s()
61 | end
62 |
--------------------------------------------------------------------------------
/src/CardDriver.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2013 Jesse Anderson
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import org.apache.hadoop.conf.Configured;
18 | import org.apache.hadoop.fs.Path;
19 | import org.apache.hadoop.io.IntWritable;
20 | import org.apache.hadoop.io.Text;
21 | import org.apache.hadoop.mapreduce.Job;
22 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
23 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
24 | import org.apache.hadoop.util.Tool;
25 | import org.apache.hadoop.util.ToolRunner;
26 |
27 | public class CardDriver extends Configured implements Tool {
28 |
29 | @Override
30 | public int run(String[] args) throws Exception {
31 | String input, output;
32 | if (args.length == 2) {
33 | input = args[0];
34 | output = args[1];
35 | } else {
36 | System.err.println("Incorrect number of arguments. Expected: input output");
37 | return -1;
38 | }
39 |
40 | Job job = new Job(getConf());
41 | job.setJarByClass(CardDriver.class);
42 | job.setJobName(this.getClass().getName());
43 |
44 | FileInputFormat.setInputPaths(job, new Path(input));
45 | FileOutputFormat.setOutputPath(job, new Path(output));
46 |
47 | job.setMapperClass(CardMapper.class);
48 | job.setReducerClass(CardTotalReducer.class);
49 |
50 | job.setMapOutputKeyClass(Text.class);
51 | job.setMapOutputValueClass(IntWritable.class);
52 |
53 | job.setOutputKeyClass(Text.class);
54 | job.setOutputValueClass(IntWritable.class);
55 |
56 | boolean success = job.waitForCompletion(true);
57 | return success ? 0 : 1;
58 | }
59 |
60 | public static void main(String[] args) throws Exception {
61 | CardDriver driver = new CardDriver();
62 | int exitCode = ToolRunner.run(driver, args);
63 | System.exit(exitCode);
64 | }
65 | }
--------------------------------------------------------------------------------
/src/CardMapper.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2013 Jesse Anderson
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import java.io.IOException;
18 | import java.util.regex.Matcher;
19 | import java.util.regex.Pattern;
20 |
21 | import org.apache.hadoop.io.IntWritable;
22 | import org.apache.hadoop.io.LongWritable;
23 | import org.apache.hadoop.io.Text;
24 | import org.apache.hadoop.mapreduce.Mapper;
25 |
26 | public class CardMapper extends Mapper {
27 |
28 | private static Pattern inputPattern = Pattern.compile("(.*) (\\d*)");
29 |
30 | @Override
31 | public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
32 | String inputLine = value.toString();
33 |
34 | Matcher inputMatch = inputPattern.matcher(inputLine);
35 |
36 | // Use regex to throw out Jacks, Queens, Kings, Aces and Jokers
37 | if (inputMatch.matches()) {
38 | // Normalize inconsistent case for card suits
39 | String cardSuit = inputMatch.group(1).toLowerCase();
40 | int cardValue = Integer.parseInt(inputMatch.group(2));
41 |
42 | context.write(new Text(cardSuit), new IntWritable(cardValue));
43 | }
44 | }
45 | }
--------------------------------------------------------------------------------
/src/CardTotalReducer.java:
--------------------------------------------------------------------------------
1 | /**
2 | * Copyright 2013 Jesse Anderson
3 | *
4 | * Licensed under the Apache License, Version 2.0 (the "License");
5 | * you may not use this file except in compliance with the License.
6 | * You may obtain a copy of the License at
7 | *
8 | * http://www.apache.org/licenses/LICENSE-2.0
9 | *
10 | * Unless required by applicable law or agreed to in writing, software
11 | * distributed under the License is distributed on an "AS IS" BASIS,
12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 | * See the License for the specific language governing permissions and
14 | * limitations under the License.
15 | */
16 |
17 | import java.io.IOException;
18 |
19 | import org.apache.hadoop.io.IntWritable;
20 | import org.apache.hadoop.io.Text;
21 | import org.apache.hadoop.mapreduce.Reducer;
22 |
23 | public class CardTotalReducer extends Reducer {
24 |
25 | @Override
26 | public void reduce(Text key, Iterable values, Context context) throws IOException,
27 | InterruptedException {
28 | int sum = 0;
29 |
30 | // Go through all values to sum up card values for a card suit
31 | for (IntWritable value : values) {
32 | sum += value.get();
33 | }
34 |
35 | context.write(key, new IntWritable(sum));
36 | }
37 | }
38 |
--------------------------------------------------------------------------------