├── .gitignore ├── .project ├── .settings └── org.eclipse.jdt.core.prefs ├── .classpath ├── README.txt └── src └── hbase_mapred1 ├── PrintUserCount.java ├── Importer1.java └── FreqCounter1.java /.gitignore: -------------------------------------------------------------------------------- 1 | *.class 2 | *.jar 3 | 4 | *.log 5 | *.out 6 | *~ 7 | -------------------------------------------------------------------------------- /.project: -------------------------------------------------------------------------------- 1 | 2 | 3 | hbase-mapreduce 4 | 5 | 6 | 7 | 8 | 9 | org.eclipse.jdt.core.javabuilder 10 | 11 | 12 | 13 | 14 | 15 | org.eclipse.jdt.core.javanature 16 | 17 | 18 | -------------------------------------------------------------------------------- /.settings/org.eclipse.jdt.core.prefs: -------------------------------------------------------------------------------- 1 | #Mon Mar 22 22:55:41 PDT 2010 2 | eclipse.preferences.version=1 3 | org.eclipse.jdt.core.compiler.codegen.inlineJsrBytecode=enabled 4 | org.eclipse.jdt.core.compiler.codegen.targetPlatform=1.6 5 | org.eclipse.jdt.core.compiler.codegen.unusedLocal=preserve 6 | org.eclipse.jdt.core.compiler.compliance=1.6 7 | org.eclipse.jdt.core.compiler.debug.lineNumber=generate 8 | org.eclipse.jdt.core.compiler.debug.localVariable=generate 9 | org.eclipse.jdt.core.compiler.debug.sourceFile=generate 10 | org.eclipse.jdt.core.compiler.problem.assertIdentifier=error 11 | org.eclipse.jdt.core.compiler.problem.enumIdentifier=error 12 | org.eclipse.jdt.core.compiler.source=1.6 13 | -------------------------------------------------------------------------------- /.classpath: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | -------------------------------------------------------------------------------- /README.txt: -------------------------------------------------------------------------------- 1 | for detailed tutorial : 2 | http://sujee.net/tech/articles/hbase-map-reduce-freq-counter/ 3 | 4 | 5 | To compile the project: 6 | 7 | 1) open in Eclisle 8 | 2) set HBASE_HOME class variable to hbase install dir 9 | 10 | 11 | setup hbase tables: 12 | --- 13 | open hbase shell 14 | $ hbase shell 15 | create 'access_logs', 'details' 16 | create 'summary_user', {NAME=>'details', VERSIONS=>1} 17 | 18 | 'access_logs' is the 'raw' logs. The key is userID+counter (int + int) 19 | 'summary_user' is to compute summary. key is 'userID' (int) 20 | 21 | Running map reduce 22 | -- 23 | 1) run 'FreqCounter1' directly from Eclipse, as a Java application 24 | 25 | 2) run on cluster / command line 26 | a) make a jar 27 | jar cf freqCounter.jar -C classes . 28 | 29 | b) hadoop jar freqCounter.jar hbase_mapred1.FreqCounter1 30 | check progress at task tracker : http://localhost:50070 31 | 32 | -------------------------------------------------------------------------------- /src/hbase_mapred1/PrintUserCount.java: -------------------------------------------------------------------------------- 1 | package hbase_mapred1; 2 | 3 | import org.apache.hadoop.hbase.HBaseConfiguration; 4 | import org.apache.hadoop.hbase.client.HTable; 5 | import org.apache.hadoop.hbase.client.Result; 6 | import org.apache.hadoop.hbase.client.ResultScanner; 7 | import org.apache.hadoop.hbase.client.Scan; 8 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 9 | import org.apache.hadoop.hbase.util.Bytes; 10 | 11 | public class PrintUserCount { 12 | 13 | public static void main(String[] args) throws Exception { 14 | 15 | HBaseConfiguration conf = new HBaseConfiguration(); 16 | HTable htable = new HTable(conf, "summary_user"); 17 | 18 | Scan scan = new Scan(); 19 | ResultScanner scanner = htable.getScanner(scan); 20 | Result r; 21 | while (((r = scanner.next()) != null)) { 22 | ImmutableBytesWritable b = r.getBytes(); 23 | byte[] key = r.getRow(); 24 | int userId = Bytes.toInt(key); 25 | byte[] totalValue = r.getValue(Bytes.toBytes("details"), Bytes.toBytes("total")); 26 | int count = Bytes.toInt(totalValue); 27 | 28 | System.out.println("key: " + userId+ ", count: " + count); 29 | } 30 | scanner.close(); 31 | htable.close(); 32 | } 33 | } -------------------------------------------------------------------------------- /src/hbase_mapred1/Importer1.java: -------------------------------------------------------------------------------- 1 | package hbase_mapred1; 2 | 3 | import java.util.Random; 4 | 5 | import org.apache.hadoop.hbase.HBaseConfiguration; 6 | import org.apache.hadoop.hbase.client.HTable; 7 | import org.apache.hadoop.hbase.client.Put; 8 | import org.apache.hadoop.hbase.util.Bytes; 9 | 10 | /** 11 | * writes random access logs into hbase table 12 | * 13 | * userID_count => { 14 | * details => { 15 | * page 16 | * } 17 | * } 18 | * 19 | * @author sujee ==at== sujee.net 20 | * 21 | */ 22 | public class Importer1 { 23 | 24 | public static void main(String[] args) throws Exception { 25 | 26 | String [] pages = {"/", "/a.html", "/b.html", "/c.html"}; 27 | 28 | HBaseConfiguration hbaseConfig = new HBaseConfiguration(); 29 | HTable htable = new HTable(hbaseConfig, "access_logs"); 30 | htable.setAutoFlush(false); 31 | htable.setWriteBufferSize(1024 * 1024 * 12); 32 | 33 | int totalRecords = 100000; 34 | int maxID = totalRecords / 1000; 35 | Random rand = new Random(); 36 | System.out.println("importing " + totalRecords + " records ...."); 37 | for (int i=0; i < totalRecords; i++) 38 | { 39 | int userID = rand.nextInt(maxID) + 1; 40 | byte [] rowkey = Bytes.add(Bytes.toBytes(userID), Bytes.toBytes(i)); 41 | String randomPage = pages[rand.nextInt(pages.length)]; 42 | Put put = new Put(rowkey); 43 | put.add(Bytes.toBytes("details"), Bytes.toBytes("page"), Bytes.toBytes(randomPage)); 44 | htable.put(put); 45 | } 46 | htable.flushCommits(); 47 | htable.close(); 48 | System.out.println("done"); 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /src/hbase_mapred1/FreqCounter1.java: -------------------------------------------------------------------------------- 1 | package hbase_mapred1; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.hbase.HBaseConfiguration; 6 | import org.apache.hadoop.hbase.client.Put; 7 | import org.apache.hadoop.hbase.client.Result; 8 | import org.apache.hadoop.hbase.client.Scan; 9 | import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter; 10 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 11 | import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 12 | import org.apache.hadoop.hbase.mapreduce.TableMapper; 13 | import org.apache.hadoop.hbase.mapreduce.TableReducer; 14 | import org.apache.hadoop.hbase.util.Bytes; 15 | import org.apache.hadoop.io.IntWritable; 16 | import org.apache.hadoop.mapreduce.Job; 17 | 18 | /** 19 | * counts the number of userIDs 20 | * 21 | * @author sujee ==at== sujee.net 22 | * 23 | */ 24 | public class FreqCounter1 { 25 | 26 | static class Mapper1 extends TableMapper { 27 | 28 | private int numRecords = 0; 29 | private static final IntWritable one = new IntWritable(1); 30 | 31 | @Override 32 | public void map(ImmutableBytesWritable row, Result values, Context context) throws IOException { 33 | // extract userKey from the compositeKey (userId + counter) 34 | ImmutableBytesWritable userKey = new ImmutableBytesWritable(row.get(), 0, Bytes.SIZEOF_INT); 35 | try { 36 | context.write(userKey, one); 37 | } catch (InterruptedException e) { 38 | throw new IOException(e); 39 | } 40 | numRecords++; 41 | if ((numRecords % 10000) == 0) { 42 | context.setStatus("mapper processed " + numRecords + " records so far"); 43 | } 44 | } 45 | } 46 | 47 | public static class Reducer1 extends TableReducer { 48 | 49 | public void reduce(ImmutableBytesWritable key, Iterable values, Context context) 50 | throws IOException, InterruptedException { 51 | int sum = 0; 52 | for (IntWritable val : values) { 53 | sum += val.get(); 54 | } 55 | 56 | Put put = new Put(key.get()); 57 | put.add(Bytes.toBytes("details"), Bytes.toBytes("total"), Bytes.toBytes(sum)); 58 | System.out.println(String.format("stats : key : %d, count : %d", Bytes.toInt(key.get()), sum)); 59 | context.write(key, put); 60 | } 61 | } 62 | 63 | public static void main(String[] args) throws Exception { 64 | HBaseConfiguration conf = new HBaseConfiguration(); 65 | Job job = new Job(conf, "Hbase_FreqCounter1"); 66 | job.setJarByClass(FreqCounter1.class); 67 | Scan scan = new Scan(); 68 | String columns = "details"; // comma seperated 69 | scan.addColumns(columns); 70 | scan.setFilter(new FirstKeyOnlyFilter()); 71 | TableMapReduceUtil.initTableMapperJob("access_logs", scan, Mapper1.class, ImmutableBytesWritable.class, 72 | IntWritable.class, job); 73 | TableMapReduceUtil.initTableReducerJob("summary_user", Reducer1.class, job); 74 | System.exit(job.waitForCompletion(true) ? 0 : 1); 75 | } 76 | } 77 | --------------------------------------------------------------------------------