├── .gitignore
├── .project
├── .settings
└── org.eclipse.jdt.core.prefs
├── .classpath
├── README.txt
└── src
└── hbase_mapred1
├── PrintUserCount.java
├── Importer1.java
└── FreqCounter1.java
/.gitignore:
--------------------------------------------------------------------------------
1 | *.class
2 | *.jar
3 |
4 | *.log
5 | *.out
6 | *~
7 |
--------------------------------------------------------------------------------
/.project:
--------------------------------------------------------------------------------
1 |
2 |
3 | hbase-mapreduce
4 |
5 |
6 |
7 |
8 |
9 | org.eclipse.jdt.core.javabuilder
10 |
11 |
12 |
13 |
14 |
15 | org.eclipse.jdt.core.javanature
16 |
17 |
18 |
--------------------------------------------------------------------------------
/.settings/org.eclipse.jdt.core.prefs:
--------------------------------------------------------------------------------
1 | #Mon Mar 22 22:55:41 PDT 2010
2 | eclipse.preferences.version=1
3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled
4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6
5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve
6 | org.eclipse.jdt.core.compiler.compliance=1.6
7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate
8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate
9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate
10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error
11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error
12 | org.eclipse.jdt.core.compiler.source=1.6
13 |
--------------------------------------------------------------------------------
/.classpath:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
--------------------------------------------------------------------------------
/README.txt:
--------------------------------------------------------------------------------
1 | for detailed tutorial :
2 | http://sujee.net/tech/articles/hbase-map-reduce-freq-counter/
3 |
4 |
5 | To compile the project:
6 |
7 | 1) open in Eclisle
8 | 2) set HBASE_HOME class variable to hbase install dir
9 |
10 |
11 | setup hbase tables:
12 | ---
13 | open hbase shell
14 | $ hbase shell
15 | create 'access_logs', 'details'
16 | create 'summary_user', {NAME=>'details', VERSIONS=>1}
17 |
18 | 'access_logs' is the 'raw' logs. The key is userID+counter (int + int)
19 | 'summary_user' is to compute summary. key is 'userID' (int)
20 |
21 | Running map reduce
22 | --
23 | 1) run 'FreqCounter1' directly from Eclipse, as a Java application
24 |
25 | 2) run on cluster / command line
26 | a) make a jar
27 | jar cf freqCounter.jar -C classes .
28 |
29 | b) hadoop jar freqCounter.jar hbase_mapred1.FreqCounter1
30 | check progress at task tracker : http://localhost:50070
31 |
32 |
--------------------------------------------------------------------------------
/src/hbase_mapred1/PrintUserCount.java:
--------------------------------------------------------------------------------
1 | package hbase_mapred1;
2 |
3 | import org.apache.hadoop.hbase.HBaseConfiguration;
4 | import org.apache.hadoop.hbase.client.HTable;
5 | import org.apache.hadoop.hbase.client.Result;
6 | import org.apache.hadoop.hbase.client.ResultScanner;
7 | import org.apache.hadoop.hbase.client.Scan;
8 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
9 | import org.apache.hadoop.hbase.util.Bytes;
10 |
11 | public class PrintUserCount {
12 |
13 | public static void main(String[] args) throws Exception {
14 |
15 | HBaseConfiguration conf = new HBaseConfiguration();
16 | HTable htable = new HTable(conf, "summary_user");
17 |
18 | Scan scan = new Scan();
19 | ResultScanner scanner = htable.getScanner(scan);
20 | Result r;
21 | while (((r = scanner.next()) != null)) {
22 | ImmutableBytesWritable b = r.getBytes();
23 | byte[] key = r.getRow();
24 | int userId = Bytes.toInt(key);
25 | byte[] totalValue = r.getValue(Bytes.toBytes("details"), Bytes.toBytes("total"));
26 | int count = Bytes.toInt(totalValue);
27 |
28 | System.out.println("key: " + userId+ ", count: " + count);
29 | }
30 | scanner.close();
31 | htable.close();
32 | }
33 | }
--------------------------------------------------------------------------------
/src/hbase_mapred1/Importer1.java:
--------------------------------------------------------------------------------
1 | package hbase_mapred1;
2 |
3 | import java.util.Random;
4 |
5 | import org.apache.hadoop.hbase.HBaseConfiguration;
6 | import org.apache.hadoop.hbase.client.HTable;
7 | import org.apache.hadoop.hbase.client.Put;
8 | import org.apache.hadoop.hbase.util.Bytes;
9 |
10 | /**
11 | * writes random access logs into hbase table
12 | *
13 | * userID_count => {
14 | * details => {
15 | * page
16 | * }
17 | * }
18 | *
19 | * @author sujee ==at== sujee.net
20 | *
21 | */
22 | public class Importer1 {
23 |
24 | public static void main(String[] args) throws Exception {
25 |
26 | String [] pages = {"/", "/a.html", "/b.html", "/c.html"};
27 |
28 | HBaseConfiguration hbaseConfig = new HBaseConfiguration();
29 | HTable htable = new HTable(hbaseConfig, "access_logs");
30 | htable.setAutoFlush(false);
31 | htable.setWriteBufferSize(1024 * 1024 * 12);
32 |
33 | int totalRecords = 100000;
34 | int maxID = totalRecords / 1000;
35 | Random rand = new Random();
36 | System.out.println("importing " + totalRecords + " records ....");
37 | for (int i=0; i < totalRecords; i++)
38 | {
39 | int userID = rand.nextInt(maxID) + 1;
40 | byte [] rowkey = Bytes.add(Bytes.toBytes(userID), Bytes.toBytes(i));
41 | String randomPage = pages[rand.nextInt(pages.length)];
42 | Put put = new Put(rowkey);
43 | put.add(Bytes.toBytes("details"), Bytes.toBytes("page"), Bytes.toBytes(randomPage));
44 | htable.put(put);
45 | }
46 | htable.flushCommits();
47 | htable.close();
48 | System.out.println("done");
49 | }
50 | }
51 |
--------------------------------------------------------------------------------
/src/hbase_mapred1/FreqCounter1.java:
--------------------------------------------------------------------------------
1 | package hbase_mapred1;
2 |
3 | import java.io.IOException;
4 |
5 | import org.apache.hadoop.hbase.HBaseConfiguration;
6 | import org.apache.hadoop.hbase.client.Put;
7 | import org.apache.hadoop.hbase.client.Result;
8 | import org.apache.hadoop.hbase.client.Scan;
9 | import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
10 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
11 | import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
12 | import org.apache.hadoop.hbase.mapreduce.TableMapper;
13 | import org.apache.hadoop.hbase.mapreduce.TableReducer;
14 | import org.apache.hadoop.hbase.util.Bytes;
15 | import org.apache.hadoop.io.IntWritable;
16 | import org.apache.hadoop.mapreduce.Job;
17 |
18 | /**
19 | * counts the number of userIDs
20 | *
21 | * @author sujee ==at== sujee.net
22 | *
23 | */
24 | public class FreqCounter1 {
25 |
26 | static class Mapper1 extends TableMapper {
27 |
28 | private int numRecords = 0;
29 | private static final IntWritable one = new IntWritable(1);
30 |
31 | @Override
32 | public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException {
33 | // extract userKey from the compositeKey (userId + counter)
34 | ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get(), 0, Bytes.SIZEOF_INT);
35 | try {
36 | context.write(userKey, one);
37 | } catch (InterruptedException e) {
38 | throw new IOException(e);
39 | }
40 | numRecords++;
41 | if ((numRecords % 10000) == 0) {
42 | context.setStatus("mapper processed " + numRecords + " records so far");
43 | }
44 | }
45 | }
46 |
47 | public static class Reducer1 extends TableReducer {
48 |
49 | public void reduce(ImmutableBytesWritable key, Iterable values, Context context)
50 | throws IOException, InterruptedException {
51 | int sum = 0;
52 | for (IntWritable val : values) {
53 | sum += val.get();
54 | }
55 |
56 | Put put = new Put(key.get());
57 | put.add(Bytes.toBytes("details"), Bytes.toBytes("total"), Bytes.toBytes(sum));
58 | System.out.println(String.format("stats : key : %d, count : %d", Bytes.toInt(key.get()), sum));
59 | context.write(key, put);
60 | }
61 | }
62 |
63 | public static void main(String[] args) throws Exception {
64 | HBaseConfiguration conf = new HBaseConfiguration();
65 | Job job = new Job(conf, "Hbase_FreqCounter1");
66 | job.setJarByClass(FreqCounter1.class);
67 | Scan scan = new Scan();
68 | String columns = "details"; // comma seperated
69 | scan.addColumns(columns);
70 | scan.setFilter(new FirstKeyOnlyFilter());
71 | TableMapReduceUtil.initTableMapperJob("access_logs", scan, Mapper1.class, ImmutableBytesWritable.class,
72 | IntWritable.class, job);
73 | TableMapReduceUtil.initTableReducerJob("summary_user", Reducer1.class, job);
74 | System.exit(job.waitForCompletion(true) ? 0 : 1);
75 | }
76 | }
77 |
--------------------------------------------------------------------------------