├── .gitignore ├── src ├── main │ ├── java │ │ ├── HBaseIA │ │ │ └── TwitBase │ │ │ │ ├── coprocessors │ │ │ │ ├── RelationCountProtocol.java │ │ │ │ ├── RelationCountImpl.java │ │ │ │ └── FollowsObserver.java │ │ │ │ ├── model │ │ │ │ ├── Twit.java │ │ │ │ ├── Relation.java │ │ │ │ └── User.java │ │ │ │ ├── filters │ │ │ │ ├── PasswordStrengthFilterExample.java │ │ │ │ └── PasswordStrengthFilter.java │ │ │ │ ├── UsersTool.java │ │ │ │ ├── LoadUsers.java │ │ │ │ ├── TwitsTool.java │ │ │ │ ├── LoadTwits.java │ │ │ │ ├── RelationsTool.java │ │ │ │ ├── mapreduce │ │ │ │ ├── CountShakespeare.java │ │ │ │ ├── TimeSpent.java │ │ │ │ └── HamletTagger.java │ │ │ │ ├── InitTables.java │ │ │ │ └── hbase │ │ │ │ ├── TwitsDAO.java │ │ │ │ ├── UsersDAO.java │ │ │ │ └── RelationsDAO.java │ │ └── utils │ │ │ ├── Md5Utils.java │ │ │ ├── LoadUtils.java │ │ │ └── TablePreSplitter.java │ └── resources │ │ ├── log4j.properties │ │ ├── assemblies │ │ └── filtered-jar-with-deps.xml │ │ └── dict │ │ ├── README │ │ └── propernames └── test │ └── resource │ └── listing 3.3.txt ├── sample data └── timespent.txt ├── bin ├── TwitBase.jrb ├── init_twitbase.sh └── launcher ├── README.md └── pom.xml /.gitignore: -------------------------------------------------------------------------------- 1 | target 2 | .classpath 3 | .project 4 | .settings 5 | **/part-* 6 | **/_SUCCESS 7 | **/.*.crc 8 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/coprocessors/RelationCountProtocol.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.coprocessors; 2 | 3 | import java.io.IOException; 4 | 5 | import org.apache.hadoop.hbase.ipc.CoprocessorProtocol; 6 | 7 | public interface RelationCountProtocol extends CoprocessorProtocol { 8 | public long followedByCount(String userId) throws IOException; 9 | } 10 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/model/Twit.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.model; 2 | 3 | import org.joda.time.DateTime; 4 | 5 | public abstract class Twit { 6 | 7 | public String user; 8 | public DateTime dt; 9 | public String text; 10 | 11 | @Override 12 | public String toString() { 13 | return String.format( 14 | "", 15 | user, dt, text); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/model/Relation.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.model; 2 | 3 | public abstract class Relation { 4 | 5 | public String relation; 6 | public String from; 7 | public String to; 8 | 9 | @Override 10 | public String toString() { 11 | return String.format( 12 | "", 13 | from, 14 | relation, 15 | to); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/model/User.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.model; 2 | 3 | public abstract class User { 4 | 5 | public String user; 6 | public String name; 7 | public String email; 8 | public String password; 9 | public long tweetCount; 10 | 11 | @Override 12 | public String toString() { 13 | return String.format( 14 | "", 15 | user, name, email, tweetCount); 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /src/main/resources/log4j.properties: -------------------------------------------------------------------------------- 1 | log4j.rootLogger=INFO, console 2 | 3 | log4j.appender.console=org.apache.log4j.ConsoleAppender 4 | log4j.appender.console.target=System.err 5 | log4j.appender.console.layout=org.apache.log4j.PatternLayout 6 | log4j.appender.console.layout.ConversionPattern=%d{HH:mm:ss} %p %c{2}: %m%n 7 | 8 | # we care primarily for TwitBase cli utils and Hadoop MR output. 9 | log4j.logger.TwitBase=INFO 10 | log4j.logger.org.apache.hadoop.mapred=INFO 11 | log4j.logger.org.apache.hadoop.hbase=WARN 12 | log4j.logger.org.apache=WARN 13 | -------------------------------------------------------------------------------- /sample data/timespent.txt: -------------------------------------------------------------------------------- 1 | 01/01/2011 18:00 user1 load_page1 3s 2 | 01/01/2011 18:01 user1 load_page2 5s 3 | 01/01/2011 18:01 user2 load_page1 2s 4 | 01/01/2011 18:01 user3 load_page1 3s 5 | 01/01/2011 18:04 user4 load_page3 10s 6 | 01/01/2011 18:05 user1 load_page3 5s 7 | 01/01/2011 18:05 user3 load_page5 3s 8 | 01/01/2011 18:06 user4 load_page4 6s 9 | 01/01/2011 18:06 user1 purchase 5s 10 | 01/01/2011 18:10 user4 purchase 8s 11 | 01/01/2011 18:10 user1 confirm 9s 12 | 01/01/2011 18:10 user4 confirm 11s 13 | 01/01/2011 18:11 user1 load_page3 3s 14 | -------------------------------------------------------------------------------- /src/test/resource/listing 3.3.txt: -------------------------------------------------------------------------------- 1 | 01/01/2011 18:00 user1 load_page1 3s 2 | 01/01/2011 18:01 user1 load_page2 5s 3 | 01/01/2011 18:01 user2 load_page1 2s 4 | 01/01/2011 18:01 user3 load_page1 3s 5 | 01/01/2011 18:04 user4 load_page3 10s 6 | 01/01/2011 18:05 user1 load_page3 5s 7 | 01/01/2011 18:05 user3 load_page5 3s 8 | 01/01/2011 18:06 user4 load_page4 6s 9 | 01/01/2011 18:06 user1 purchase 5s 10 | 01/01/2011 18:10 user4 purchase 8s 11 | 01/01/2011 18:10 user1 confirm 9s 12 | 01/01/2011 18:10 user4 confirm 11s 13 | 01/01/2011 18:11 user1 load_page3 3s 14 | -------------------------------------------------------------------------------- /src/main/java/utils/Md5Utils.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import java.security.MessageDigest; 4 | import java.security.NoSuchAlgorithmException; 5 | 6 | import org.apache.hadoop.hbase.util.Bytes; 7 | 8 | public class Md5Utils { 9 | 10 | public static final int MD5_LENGTH = 16; // bytes 11 | 12 | public static byte[] md5sum(String s) { 13 | MessageDigest d; 14 | try { 15 | d = MessageDigest.getInstance("MD5"); 16 | } catch (NoSuchAlgorithmException e) { 17 | throw new RuntimeException("MD5 algorithm not available!", e); 18 | } 19 | 20 | return d.digest(Bytes.toBytes(s)); 21 | } 22 | 23 | } 24 | -------------------------------------------------------------------------------- /bin/TwitBase.jrb: -------------------------------------------------------------------------------- 1 | def list_users() 2 | users_table = @hbase.table('users', @formatter) 3 | scan = {"COLUMNS" => ['info:user', 'info:name', 'info:email']} 4 | results = {} 5 | users_table.scan(scan) do |row,col| 6 | results[row] ||= {} 7 | m = /^.*info:(.*), t.*value=(.*)$/.match(col) 8 | results[row][m[1]] = m[2] if m 9 | end 10 | 11 | results.each do |row,vals| 12 | puts "" % [vals['user'], vals['name'], vals['email']] 13 | end 14 | end 15 | 16 | def main(args) 17 | if args.length == 0 || args[0] == 'help' 18 | puts <&2 'HBASE_HOME not set. using hbase on $PATH' 7 | HBASE_CLI=$(which hbase) 8 | } 9 | 10 | TWITS_TABLE=${TWITS_TABLE-'twits'} 11 | TWITS_FAM=${TWITS_FAM-'twits'} 12 | USERS_TABLE=${USERS_TABLE-'users'} 13 | USERS_FAM=${USERS_FAM-'info'} 14 | FOLLOWS_TABLE=${FOLLOWS_TABLE-'follows'} 15 | FOLLOWS_FAM=${FOLLOWS_FAM-'f'} 16 | FOLLOWEDBY_TABLE=${FOLLOWED_TABLE-'followedBy'} 17 | FOLLOWEDBY_FAM=${FOLLOWED_FAM-'f'} 18 | 19 | exec "$HBASE_CLI" shell < '$TWITS_FAM', VERSIONS => 1} 22 | 23 | create '$USERS_TABLE', 24 | {NAME => '$USERS_FAM'} 25 | 26 | create '$FOLLOWS_TABLE', 27 | {NAME => '$FOLLOWS_FAM', VERSIONS => 1} 28 | 29 | create '$FOLLOWEDBY_TABLE', 30 | {NAME => '$FOLLOWEDBY_FAM', VERSIONS => 1} 31 | EOF 32 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/filters/PasswordStrengthFilterExample.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.filters; 2 | 3 | import HBaseIA.TwitBase.hbase.UsersDAO; 4 | 5 | import org.apache.hadoop.hbase.client.*; 6 | import org.apache.hadoop.hbase.filter.*; 7 | import java.io.IOException; 8 | 9 | 10 | public class PasswordStrengthFilterExample { 11 | 12 | public static void main (String[] args) { 13 | try { 14 | HTable t = new HTable(UsersDAO.TABLE_NAME); 15 | Scan scan = new Scan(); 16 | scan.addColumn(UsersDAO.INFO_FAM, UsersDAO.PASS_COL); 17 | scan.addColumn(UsersDAO.INFO_FAM, UsersDAO.NAME_COL); 18 | scan.addColumn(UsersDAO.INFO_FAM, UsersDAO.EMAIL_COL); 19 | Filter f = new PasswordStrengthFilter(4); 20 | scan.setFilter(f); 21 | ResultScanner rs = t.getScanner(scan); 22 | for (Result r : rs) { 23 | System.out.println(r); 24 | } 25 | 26 | } catch (IOException e) { 27 | e.printStackTrace(); 28 | } 29 | 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/main/resources/assemblies/filtered-jar-with-deps.xml: -------------------------------------------------------------------------------- 1 | 5 | filtered-jar-with-deps 6 | 7 | jar 8 | 9 | false 10 | 11 | 12 | / 13 | true 14 | 16 | 17 | 18 | META-INF/LICENSE 19 | 20 | 21 | true 22 | compile 23 | 24 | 25 | 26 | -------------------------------------------------------------------------------- /src/main/java/utils/LoadUtils.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import java.io.BufferedReader; 4 | import java.io.IOException; 5 | import java.io.InputStream; 6 | import java.io.InputStreamReader; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import HBaseIA.TwitBase.LoadUsers; 11 | 12 | public class LoadUtils { 13 | 14 | public static final String WORDS_PATH = "/dict/web2"; 15 | public static final String NAMES_PATH = "/dict/propernames"; 16 | 17 | public static List readResource(String path) throws IOException { 18 | List lines = new ArrayList(); 19 | String line; 20 | InputStream s = LoadUsers.class.getResourceAsStream(path); 21 | BufferedReader reader = new BufferedReader(new InputStreamReader(s)); 22 | while ((line = reader.readLine()) != null) { 23 | lines.add(line); 24 | } 25 | s.close(); 26 | return lines; 27 | } 28 | 29 | public static int randInt(int max) { 30 | return (int)Math.floor(Math.random() * max); 31 | } 32 | 33 | public static String randNth(List words) { 34 | int val = randInt(words.size()); 35 | return words.get(val); 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/filters/PasswordStrengthFilter.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.filters; 2 | 3 | import HBaseIA.TwitBase.hbase.UsersDAO; 4 | 5 | import org.apache.hadoop.hbase.KeyValue; 6 | import org.apache.hadoop.hbase.filter.FilterBase; 7 | import org.apache.hadoop.hbase.util.Bytes; 8 | 9 | import java.io.DataInput; 10 | import java.io.DataOutput; 11 | import java.io.IOException; 12 | 13 | public class PasswordStrengthFilter extends FilterBase { 14 | private int len; 15 | private boolean filterRow = false; 16 | 17 | public PasswordStrengthFilter() { 18 | super(); 19 | } 20 | 21 | public PasswordStrengthFilter(int len) { 22 | this.len = len; 23 | } 24 | 25 | public ReturnCode filterKeyValue(KeyValue v) { 26 | if (Bytes.toString(v.getQualifier()).equals(Bytes.toString(UsersDAO.PASS_COL))) { 27 | if(v.getValueLength() >= len) 28 | this.filterRow = true; 29 | return ReturnCode.SKIP; 30 | } 31 | return ReturnCode.INCLUDE; 32 | } 33 | 34 | public boolean filterRow() { 35 | return this.filterRow; 36 | } 37 | 38 | public void reset() { 39 | this.filterRow = false; 40 | } 41 | 42 | public void write(DataOutput out) throws IOException { 43 | out.writeInt(len); 44 | } 45 | 46 | public void readFields(DataInput in) throws IOException { 47 | this.len = in.readInt(); 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/coprocessors/RelationCountImpl.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.coprocessors; 2 | 3 | import static HBaseIA.TwitBase.hbase.RelationsDAO.FROM; 4 | import static HBaseIA.TwitBase.hbase.RelationsDAO.RELATION_FAM; 5 | 6 | import java.io.IOException; 7 | import java.util.ArrayList; 8 | import java.util.List; 9 | 10 | import org.apache.hadoop.hbase.KeyValue; 11 | import org.apache.hadoop.hbase.client.Scan; 12 | import org.apache.hadoop.hbase.coprocessor.BaseEndpointCoprocessor; 13 | import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; 14 | import org.apache.hadoop.hbase.filter.PrefixFilter; 15 | import org.apache.hadoop.hbase.regionserver.InternalScanner; 16 | 17 | import utils.Md5Utils; 18 | 19 | public class RelationCountImpl 20 | extends BaseEndpointCoprocessor implements RelationCountProtocol { 21 | 22 | @Override 23 | public long followedByCount(String userId) throws IOException { 24 | byte[] startkey = Md5Utils.md5sum(userId); 25 | Scan scan = new Scan(startkey); 26 | scan.setFilter(new PrefixFilter(startkey)); 27 | scan.addColumn(RELATION_FAM, FROM); 28 | scan.setMaxVersions(1); 29 | 30 | RegionCoprocessorEnvironment env 31 | = (RegionCoprocessorEnvironment)getEnvironment(); 32 | InternalScanner scanner = env.getRegion().getScanner(scan); 33 | 34 | long sum = 0; 35 | List results = new ArrayList(); 36 | boolean hasMore = false; 37 | do { 38 | hasMore = scanner.next(results); 39 | sum += results.size(); 40 | results.clear(); 41 | } while (hasMore); 42 | scanner.close(); 43 | return sum; 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/UsersTool.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import org.apache.hadoop.hbase.client.HTablePool; 7 | import org.apache.log4j.Logger; 8 | 9 | import HBaseIA.TwitBase.hbase.UsersDAO; 10 | import HBaseIA.TwitBase.model.User; 11 | 12 | public class UsersTool { 13 | 14 | private static final Logger log = Logger.getLogger(UsersTool.class); 15 | 16 | public static final String usage = 17 | "usertool action ...\n" + 18 | " help - print this message and exit.\n" + 19 | " add user name email password - add a new user.\n" + 20 | " get user - retrieve a specific user.\n" + 21 | " list - list all installed users.\n"; 22 | 23 | public static void main(String[] args) throws IOException { 24 | if (args.length == 0 || "help".equals(args[0])) { 25 | System.out.println(usage); 26 | System.exit(0); 27 | } 28 | 29 | HTablePool pool = new HTablePool(); 30 | UsersDAO dao = new UsersDAO(pool); 31 | 32 | if ("get".equals(args[0])) { 33 | log.debug(String.format("Getting user %s", args[1])); 34 | User u = dao.getUser(args[1]); 35 | System.out.println(u); 36 | } 37 | 38 | if ("add".equals(args[0])) { 39 | log.debug("Adding user..."); 40 | dao.addUser(args[1], args[2], args[3], args[4]); 41 | User u = dao.getUser(args[1]); 42 | System.out.println("Successfully added user " + u); 43 | } 44 | 45 | if ("list".equals(args[0])) { 46 | List users = dao.getUsers(); 47 | log.info(String.format("Found %s users.", users.size())); 48 | for(User u : users) { 49 | System.out.println(u); 50 | } 51 | } 52 | 53 | pool.closeTablePool(UsersDAO.TABLE_NAME); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/LoadUsers.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import org.apache.hadoop.hbase.client.HTablePool; 7 | 8 | import utils.LoadUtils; 9 | 10 | import HBaseIA.TwitBase.hbase.UsersDAO; 11 | 12 | public class LoadUsers { 13 | 14 | public static final String usage = 15 | "loadusers count\n" + 16 | " help - print this message and exit.\n" + 17 | " count - add count random TwitBase users.\n"; 18 | 19 | private static String randName(List names) { 20 | String name = LoadUtils.randNth(names) + " "; 21 | name += LoadUtils.randNth(names); 22 | return name; 23 | } 24 | 25 | private static String randUser(String name) { 26 | return String.format("%s%2d", name.substring(5), LoadUtils.randInt(100)); 27 | } 28 | 29 | private static String randEmail(String user, List words) { 30 | return String.format("%s@%s.com", user, LoadUtils.randNth(words)); 31 | } 32 | 33 | public static void main(String[] args) throws IOException { 34 | if (args.length == 0 || "help".equals(args[0])) { 35 | System.out.println(usage); 36 | System.exit(0); 37 | } 38 | 39 | HTablePool pool = new HTablePool(); 40 | UsersDAO dao = new UsersDAO(pool); 41 | 42 | int count = Integer.parseInt(args[0]); 43 | List names = LoadUtils.readResource(LoadUtils.NAMES_PATH); 44 | List words = LoadUtils.readResource(LoadUtils.WORDS_PATH); 45 | 46 | for (int i = 0; i < count; i++) { 47 | String name = randName(names); 48 | String user = randUser(name); 49 | String email = randEmail(user, words); 50 | dao.addUser(user, name, email, "abc123"); 51 | } 52 | 53 | pool.closeTablePool(UsersDAO.TABLE_NAME); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/TwitsTool.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import org.apache.hadoop.hbase.client.HTablePool; 7 | import org.apache.log4j.Logger; 8 | import org.joda.time.DateTime; 9 | 10 | import HBaseIA.TwitBase.hbase.TwitsDAO; 11 | import HBaseIA.TwitBase.hbase.UsersDAO; 12 | import HBaseIA.TwitBase.model.Twit; 13 | 14 | public class TwitsTool { 15 | 16 | private static final Logger log = Logger.getLogger(TwitsTool.class); 17 | 18 | public static final String usage = 19 | "twitstool action ...\n" + 20 | " help - print this message and exit.\n" + 21 | " post user text - post a new twit on user's behalf.\n" + 22 | " list user - list all twits for the specified user.\n"; 23 | 24 | public static void main(String[] args) throws IOException { 25 | if (args.length == 0 || "help".equals(args[0])) { 26 | System.out.println(usage); 27 | System.exit(0); 28 | } 29 | 30 | HTablePool pool = new HTablePool(); 31 | TwitsDAO twitsDao = new TwitsDAO(pool); 32 | UsersDAO usersDao = new UsersDAO(pool); 33 | 34 | if ("post".equals(args[0])) { 35 | DateTime now = new DateTime(); 36 | log.debug(String.format("Posting twit at ...", now)); 37 | twitsDao.postTwit(args[1], now, args[2]); 38 | Twit t = twitsDao.getTwit(args[1], now); 39 | usersDao.incTweetCount(args[1]); 40 | System.out.println("Successfully posted " + t); 41 | } 42 | 43 | if ("list".equals(args[0])) { 44 | List twits = twitsDao.list(args[1]); 45 | log.info(String.format("Found %s twits.", twits.size())); 46 | for(Twit t : twits) { 47 | System.out.println(t); 48 | } 49 | } 50 | 51 | pool.closeTablePool(TwitsDAO.TABLE_NAME); 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /bin/launcher: -------------------------------------------------------------------------------- 1 | #! /usr/bin/env bash 2 | 3 | if [ $# = 0 ]; then 4 | echo "Usage: launcher " 5 | echo "where is an option from one of the following:" 6 | echo "" 7 | echo "TwitBase tools" 8 | echo " init : Create tables for TwitBase" 9 | echo " twits : Tool for managing twits" 10 | echo " users : Tool for managing users" 11 | echo " loadusers : Tool for loading random users" 12 | echo " loadtwits : Tool for loading random twits" 13 | echo "" 14 | echo "MapReduce jobs" 15 | echo " timespent : Run the TimeSpent log processing MapReduce job" 16 | echo " countshakespeare : Run the Shakespearean counter MapReduce job" 17 | echo " hamlettagger : Run the Hamlet tagging MapReduce job" 18 | echo "" 19 | echo "Utils" 20 | echo " tablepresplitter : Create a pre-split table" 21 | exit 1 22 | fi 23 | 24 | COMMAND=$1 25 | 26 | # Choose class to run 27 | if [ "$COMMAND" = "init" ] ; then 28 | CLASS='HBaseIA.TwitBase.InitTables' 29 | elif [ "$COMMAND" = "twits" ] ; then 30 | CLASS='HBaseIA.TwitBase.TwitsTool' 31 | elif [ "$COMMAND" = "users" ] ; then 32 | CLASS='HBaseIA.TwitBase.UsersTool' 33 | elif [ "$COMMAND" = "loadusers" ] ; then 34 | CLASS='HBaseIA.TwitBase.LoadUsers' 35 | elif [ "$COMMAND" = "loadtwits" ] ; then 36 | CLASS='HBaseIA.TwitBase.LoadTwits' 37 | elif [ "$COMMAND" = "timespent" ] ; then 38 | CLASS='HBaseIA.TwitBase.mapreduce.TimeSpent' 39 | elif [ "$COMMAND" = "countshakespeare" ] ; then 40 | CLASS='HBaseIA.TwitBase.mapreduce.CountShakespeare' 41 | elif [ "$COMMAND" = "hamlettagger" ] ; then 42 | CLASS='HBaseIA.TwitBase.mapreduce.HamletTagger' 43 | elif [ "$COMMAND" = "tablepresplitter" ] ; then 44 | CLASS='utils.TablePreSplitter' 45 | else 46 | CLASS=$COMMAND 47 | fi 48 | 49 | shift 50 | java -cp target/twitbase-1.0.0.jar $CLASS "$@" 51 | -------------------------------------------------------------------------------- /src/main/resources/dict/README: -------------------------------------------------------------------------------- 1 | # @(#)README 8.1 (Berkeley) 6/5/93 2 | # $FreeBSD: src/share/dict/README,v 1.13.34.1.4.1 2010/06/14 02:09:06 kensmith Exp $ 3 | 4 | WEB ---- (introduction provided by jaw@riacs) ------------------------- 5 | 6 | Welcome to web2 (Webster's Second International) all 234,936 words worth. 7 | The 1934 copyright has lapsed, according to the supplier. The 8 | supplemental 'web2a' list contains hyphenated terms as well as assorted 9 | noun and adverbial phrases. The wordlist makes a dandy 'grep' victim. 10 | 11 | -- James A. Woods {ihnp4,hplabs}!ames!jaw (or jaw@riacs) 12 | 13 | Dictionaries for other languages, e.g. Afrikaans, American, Aussie, 14 | Chinese, Croatian, Czech, Danish, Dutch, Esperanto, Finnish, French, 15 | German, Hindi, Hungarian, Italian, Japanese, Latin, Norwegian, Polish, 16 | Russian, Spanish, Swahili, Swedish, Yiddish, are available 17 | at ftp://ftp.ox.ac.uk/pub/wordlists. 18 | 19 | Country names are stored in the file /usr/share/misc/iso3166. 20 | 21 | 22 | FreeBSD Maintenance Notes --------------------------------------------- 23 | 24 | Note that FreeBSD is not maintaining a historical document, we're 25 | maintaining a list of current [American] English spellings. 26 | 27 | A few words have been removed because their spellings have depreciated. 28 | This list of words includes: 29 | corelation (and its derivatives) "correlation" is the preferred spelling 30 | freen typographical error in original file 31 | freend archaic spelling no longer in use; 32 | masks common typo in modern text 33 | 34 | -- 35 | 36 | A list of technical terms has been added in the file 'freebsd'. This 37 | word list contains FreeBSD/Unix lexicon that is used by the system 38 | documentation. It makes a great ispell(1) personal dictionary to 39 | supplement the standard English language dictionary. 40 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/coprocessors/FollowsObserver.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.coprocessors; 2 | 3 | import static HBaseIA.TwitBase.hbase.RelationsDAO.FOLLOWS_TABLE_NAME; 4 | import static HBaseIA.TwitBase.hbase.RelationsDAO.FROM; 5 | import static HBaseIA.TwitBase.hbase.RelationsDAO.RELATION_FAM; 6 | import static HBaseIA.TwitBase.hbase.RelationsDAO.TO; 7 | 8 | import java.io.IOException; 9 | 10 | import org.apache.hadoop.hbase.CoprocessorEnvironment; 11 | import org.apache.hadoop.hbase.KeyValue; 12 | import org.apache.hadoop.hbase.client.HTablePool; 13 | import org.apache.hadoop.hbase.client.Put; 14 | import org.apache.hadoop.hbase.coprocessor.BaseRegionObserver; 15 | import org.apache.hadoop.hbase.coprocessor.ObserverContext; 16 | import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; 17 | import org.apache.hadoop.hbase.regionserver.wal.WALEdit; 18 | import org.apache.hadoop.hbase.util.Bytes; 19 | 20 | import HBaseIA.TwitBase.hbase.RelationsDAO; 21 | 22 | public class FollowsObserver extends BaseRegionObserver { 23 | 24 | private HTablePool pool = null; 25 | 26 | @Override 27 | public void start(CoprocessorEnvironment env) throws IOException { 28 | pool = new HTablePool(env.getConfiguration(), Integer.MAX_VALUE); 29 | } 30 | 31 | @Override 32 | public void stop(CoprocessorEnvironment env) throws IOException { 33 | pool.close(); 34 | } 35 | 36 | @Override 37 | public void postPut( 38 | final ObserverContext e, 39 | final Put put, 40 | final WALEdit edit, 41 | final boolean writeToWAL) 42 | throws IOException { 43 | 44 | byte[] table 45 | = e.getEnvironment().getRegion().getRegionInfo().getTableName(); 46 | if (!Bytes.equals(table, FOLLOWS_TABLE_NAME)) 47 | return; 48 | 49 | KeyValue kv = put.get(RELATION_FAM, FROM).get(0); 50 | String from = Bytes.toString(kv.getValue()); 51 | kv = put.get(RELATION_FAM, TO).get(0); 52 | String to = Bytes.toString(kv.getValue()); 53 | 54 | RelationsDAO relations = new RelationsDAO(pool); 55 | relations.addFollowedBy(to, from); 56 | } 57 | } 58 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/LoadTwits.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase; 2 | 3 | import java.io.IOException; 4 | import java.util.List; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | import org.apache.hadoop.hbase.HBaseConfiguration; 8 | import org.apache.hadoop.hbase.client.HBaseAdmin; 9 | import org.apache.hadoop.hbase.client.HTablePool; 10 | import org.joda.time.DateTime; 11 | 12 | import utils.LoadUtils; 13 | import HBaseIA.TwitBase.hbase.TwitsDAO; 14 | import HBaseIA.TwitBase.hbase.UsersDAO; 15 | import HBaseIA.TwitBase.model.User; 16 | 17 | public class LoadTwits { 18 | 19 | public static final String usage = 20 | "loadtwits count\n" + 21 | " help - print this message and exit.\n" + 22 | " count - add count random twits to all TwitBase users.\n"; 23 | 24 | private static String randTwit(List words) { 25 | String twit = ""; 26 | for (int i = 0; i < 12; i++) { 27 | twit += LoadUtils.randNth(words) + " "; 28 | } 29 | return twit; 30 | } 31 | 32 | private static DateTime randDT() { 33 | int year = 2010 + LoadUtils.randInt(5); 34 | int month = 1 + LoadUtils.randInt(12); 35 | int day = 1 + LoadUtils.randInt(28); 36 | return new DateTime(year, month, day, 0, 0, 0, 0); 37 | } 38 | 39 | public static void main(String[] args) throws IOException { 40 | if (args.length == 0 || "help".equals(args[0])) { 41 | System.out.println(usage); 42 | System.exit(0); 43 | } 44 | 45 | Configuration conf = HBaseConfiguration.create(); 46 | HBaseAdmin admin = new HBaseAdmin(conf); 47 | 48 | if (!admin.tableExists(UsersDAO.TABLE_NAME) || 49 | !admin.tableExists(TwitsDAO.TABLE_NAME)) { 50 | System.out.println("Please use the InitTables utility to create " + 51 | "destination tables first."); 52 | System.exit(0); 53 | } 54 | 55 | HTablePool pool = new HTablePool(conf, Integer.MAX_VALUE); 56 | UsersDAO users = new UsersDAO(pool); 57 | TwitsDAO twits = new TwitsDAO(pool); 58 | 59 | int count = Integer.parseInt(args[0]); 60 | List words = LoadUtils.readResource(LoadUtils.WORDS_PATH); 61 | 62 | for(User u : users.getUsers()) { 63 | for (int i = 0; i < count; i++) { 64 | twits.postTwit(u.user, randDT(), randTwit(words)); 65 | } 66 | } 67 | 68 | pool.closeTablePool(UsersDAO.TABLE_NAME); 69 | } 70 | 71 | } 72 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/RelationsTool.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | 6 | import org.apache.hadoop.hbase.client.HTablePool; 7 | import org.apache.log4j.Logger; 8 | 9 | import HBaseIA.TwitBase.hbase.RelationsDAO; 10 | import HBaseIA.TwitBase.model.Relation; 11 | 12 | public class RelationsTool { 13 | 14 | private static final Logger log = Logger.getLogger(UsersTool.class); 15 | 16 | public static final String usage = 17 | "relationstool action ...\n" + 18 | " help - print this message and exit.\n" + 19 | " follows fromId toId - add a new relationship where from follows to.\n" + 20 | " list follows userId - list everyone userId follows.\n" + 21 | " list followedBy userId - list everyone who follows userId.\n" + 22 | " followedByScan userId - count users' followers using a client-side scanner" + 23 | " followedByCoproc userId - count users' followers using the Endpoint coprocessor"; 24 | 25 | public static void main(String[] args) throws Throwable { 26 | if (args.length == 0 || "help".equals(args[0])) { 27 | System.out.println(usage); 28 | System.exit(0); 29 | } 30 | 31 | HTablePool pool = new HTablePool(); 32 | RelationsDAO dao = new RelationsDAO(pool); 33 | 34 | if ("follows".equals(args[0])) { 35 | log.debug(String.format("Adding follower %s -> %s", args[1], args[2])); 36 | dao.addFollows(args[1], args[2]); 37 | System.out.println("Successfully added relationship"); 38 | } 39 | 40 | if ("list".equals(args[0])) { 41 | List results = new ArrayList(); 42 | if (args[1].equals("follows")) 43 | results.addAll(dao.listFollows(args[2])); 44 | else if (args[1].equals("followedBy")) 45 | results.addAll(dao.listFollowedBy(args[2])); 46 | 47 | if (results.isEmpty()) 48 | System.out.println("No relations found."); 49 | for (Relation r : results) { 50 | System.out.println(r); 51 | } 52 | } 53 | 54 | if ("followedByScan".equals(args[0])) { 55 | long count = dao.followedByCountScan(args[1]); 56 | System.out.println(String.format("%s has %s followers.", args[1], count)); 57 | } 58 | 59 | if ("followedByCoproc".equals(args[0])) { 60 | long count = dao.followedByCount(args[1]); 61 | System.out.println(String.format("%s has %s followers.", args[1], count)); 62 | } 63 | 64 | pool.closeTablePool(RelationsDAO.FOLLOWS_TABLE_NAME); 65 | pool.closeTablePool(RelationsDAO.FOLLOWED_TABLE_NAME); 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HBase In Action: TwitBase 2 | 3 | [http://www.manning.com/dimidukkhurana][0] 4 | 5 | ## Compiling the project 6 | 7 | Code is managed by maven. Be sure to install maven on your platform 8 | before running these commands. Also be aware that HBase is not yet 9 | supported on the OpenJDK platform, the default JVM installed on most 10 | modern Linux distributions. You'll want to install the Oracle (Sun) 11 | Java 6 runtime and make sure it's configured on your `$PATH` before 12 | you continue. Again, on Ubuntu, you may find the [`oab-java6`][1] 13 | utility to be of use. 14 | 15 | To build a self-contained jar: 16 | 17 | $ mvn package 18 | 19 | The jar created using this by default will allow you to interact with 20 | HBase running in standalone mode on your local machine. If you want 21 | to interact with a remote (possibly fully distributed) HBase 22 | deployment, you can put your `hbase-site.xml` file in the 23 | `src/main/resources` directory before compiling the jar. 24 | 25 | ## Using TwitBase 26 | 27 | We have provided a launcher script to run TwitBase and the utilities 28 | that the HBaseIA project comes with. 29 | 30 | $ bin/launcher 31 | 32 | Just run the launcher without any arguments and it'll print out the 33 | usage information. 34 | 35 | TwitBase applications can also be run using java directly: 36 | 37 | $ java -cp target/twitbase-1.0.0.jar [options...] 38 | 39 | Utilities for interacting with TwitBase include: 40 | 41 | - `HBaseIA.TwitBase.InitTables` : create TwitBase tables 42 | - `HBaseIA.TwitBase.TwitsTool` : tool for managing Twits 43 | - `HBaseIA.TwitBase.UsersTool` : tool for managing Users 44 | - `HBaseIA.TwitBase.LoadUsers` : tool for loading random Users 45 | - `HBaseIA.TwitBase.LoadTwits` : tool for loading random Twits 46 | 47 | The following MapReduce jobs can be launched the same way: 48 | 49 | - `HBaseIA.TwitBase.mapreduce.TimeSpent` : run TimeSpent log 50 | processing MR job 51 | - `HBaseIA.TwitBase.mapreduce.CountShakespeare` : run 52 | Shakespearean counter MR job 53 | - `HBaseIA.TwitBase.mapreduce.HamletTagger` : run 54 | hamlet-tagging MR job 55 | 56 | ## Other utilities and scripts 57 | 58 | The following utilities are available for you to play with: 59 | 60 | - `utils.TablePreSplitter` : create pre-split table 61 | 62 | ## License 63 | 64 | Copyright (C) 2012 Nick Dimiduk, Amandeep Khurana 65 | 66 | Distributed under the [Apache License, version 2.0][2], the same as HBase. 67 | 68 | [0]: http://www.manning.com/dimidukkhurana 69 | [1]: https://github.com/flexiondotorg/oab-java6 70 | [2]: http://www.apache.org/licenses/LICENSE-2.0.html 71 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/mapreduce/CountShakespeare.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.mapreduce; 2 | 3 | import java.util.Random; 4 | 5 | import org.apache.hadoop.conf.Configuration; 6 | import org.apache.hadoop.hbase.HBaseConfiguration; 7 | import org.apache.hadoop.hbase.client.Result; 8 | import org.apache.hadoop.hbase.client.Scan; 9 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 10 | import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 11 | import org.apache.hadoop.hbase.mapreduce.TableMapper; 12 | import org.apache.hadoop.hbase.util.Bytes; 13 | import org.apache.hadoop.io.LongWritable; 14 | import org.apache.hadoop.io.Text; 15 | import org.apache.hadoop.mapreduce.Job; 16 | import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat; 17 | 18 | import HBaseIA.TwitBase.hbase.TwitsDAO; 19 | 20 | public class CountShakespeare { 21 | 22 | public static class Map 23 | extends TableMapper { 24 | 25 | public static enum Counters {ROWS, SHAKESPEAREAN}; 26 | private Random rand; 27 | 28 | /** 29 | * Determines if the message pertains to Shakespeare. 30 | */ 31 | private boolean containsShakespear(String msg) { 32 | return rand.nextBoolean(); 33 | } 34 | 35 | @Override 36 | protected void setup(Context context) { 37 | rand = new Random(System.currentTimeMillis()); 38 | } 39 | 40 | @Override 41 | protected void map( 42 | ImmutableBytesWritable rowkey, 43 | Result result, 44 | Context context) { 45 | byte[] b = result.getColumnLatest( 46 | TwitsDAO.TWITS_FAM, 47 | TwitsDAO.TWIT_COL).getValue(); 48 | if (b == null) return; 49 | 50 | String msg = Bytes.toString(b); 51 | if (msg.isEmpty()) return; 52 | 53 | context.getCounter(Counters.ROWS).increment(1); 54 | if (containsShakespear(msg)) 55 | context.getCounter(Counters.SHAKESPEAREAN).increment(1); 56 | } 57 | } 58 | 59 | public static void main(String[] args) throws Exception { 60 | Configuration conf = HBaseConfiguration.create(); 61 | Job job = new Job(conf, "TwitBase Shakespeare counter"); 62 | job.setJarByClass(CountShakespeare.class); 63 | 64 | Scan scan = new Scan(); 65 | scan.addColumn(TwitsDAO.TWITS_FAM, TwitsDAO.TWIT_COL); 66 | TableMapReduceUtil.initTableMapperJob( 67 | Bytes.toString(TwitsDAO.TABLE_NAME), 68 | scan, 69 | Map.class, 70 | ImmutableBytesWritable.class, 71 | Result.class, 72 | job); 73 | 74 | job.setOutputFormatClass(NullOutputFormat.class); 75 | job.setNumReduceTasks(0); 76 | System.exit(job.waitForCompletion(true) ? 0 : 1); 77 | } 78 | } 79 | -------------------------------------------------------------------------------- /src/main/java/utils/TablePreSplitter.java: -------------------------------------------------------------------------------- 1 | package utils; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.hbase.*; 5 | import org.apache.hadoop.hbase.client.HBaseAdmin; 6 | import org.apache.hadoop.hbase.util.Bytes; 7 | 8 | import java.io.IOException; 9 | import java.util.ArrayList; 10 | import java.util.List; 11 | 12 | public class TablePreSplitter { 13 | 14 | Configuration conf; 15 | 16 | public static final String usage = 17 | "tablepresplit commands ...\n" + 18 | " help - print this message and exit.\n" + 19 | " create - create a new table with the provided start key, end key and number of splits.\n" + 20 | " syntax: create \n" ; 21 | 22 | public TablePreSplitter() { 23 | conf = HBaseConfiguration.create(); 24 | } 25 | 26 | 27 | public boolean createPreSplitTable(String tableName, String family, String startKey, String endKey, int splits) throws IOException { 28 | List l = new ArrayList(); 29 | l.add(family); 30 | return createPreSplitTable(tableName, l, startKey, endKey, splits); 31 | } 32 | 33 | public boolean createPreSplitTable(String tableName, List families, String startKey, String endKey, int splits) throws IOException { 34 | HBaseAdmin admin = new HBaseAdmin(conf); 35 | byte[] table = Bytes.toBytes(tableName); 36 | byte[] start = Bytes.toBytes(startKey); 37 | byte[] end = Bytes.toBytes(endKey); 38 | HTableDescriptor desc = new HTableDescriptor(table); 39 | for(String f : families) { 40 | HColumnDescriptor col = new HColumnDescriptor(f); 41 | desc.addFamily(col); 42 | } 43 | admin.createTable(desc, start, end, splits); 44 | return admin.tableExists(table) && admin.getTableRegions(table).size()==splits; 45 | } 46 | 47 | public static void main(String[] args) { 48 | if (args.length == 0 || "help".equals(args[0])) { 49 | System.out.println(usage); 50 | System.exit(0); 51 | } 52 | 53 | if ("create".equals(args[0])) { 54 | TablePreSplitter mysplitter = new TablePreSplitter(); 55 | String tableName = args[1]; 56 | String familyName = args[2]; 57 | String startKey = args[3]; 58 | String endKey = args[4]; 59 | int numOfSplits = Integer.parseInt(args[5]); 60 | try { 61 | boolean status = mysplitter.createPreSplitTable(tableName, familyName, startKey, endKey, numOfSplits); 62 | if(status) 63 | System.out.println("Table created successfully."); 64 | else 65 | System.out.println("Table creation failed."); 66 | } catch (IOException e) { 67 | e.printStackTrace(); 68 | } 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 6 | 4.0.0 7 | HBaseIA 8 | twitbase 9 | 1.0.0 10 | TwitBase 11 | http://www.manning.com/dimidukkhurana/ 12 | TwitBase is a running example used throughout HBase In Action 13 | 14 | 15 | UTF-8 16 | 17 | 18 | 19 | 20 | 21 | org.apache.maven.plugins 22 | maven-compiler-plugin 23 | 2.5.1 24 | 25 | 1.6 26 | 1.6 27 | 28 | 29 | 30 | maven-assembly-plugin 31 | 2.3 32 | 33 | 34 | filtered-jar-with-deps 35 | package 36 | 37 | single 38 | 39 | 40 | src/main/resources/assemblies/filtered-jar-with-deps.xml 41 | false 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | apache release 52 | https://repository.apache.org/content/repositories/releases/ 53 | 54 | 55 | 56 | 57 | 58 | org.apache.hadoop 59 | hadoop-core 60 | 1.0.3 61 | 62 | 63 | org.apache.hbase 64 | hbase 65 | 0.92.1 66 | 67 | 68 | maven-release-plugin 69 | org.apache.maven.plugins 70 | 71 | 72 | 73 | 75 | 76 | commons-io 77 | commons-io 78 | 2.1 79 | 80 | 81 | 82 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/mapreduce/TimeSpent.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.mapreduce; 2 | 3 | import java.io.IOException; 4 | import org.apache.hadoop.conf.Configuration; 5 | import org.apache.hadoop.fs.FileSystem; 6 | import org.apache.hadoop.fs.Path; 7 | import org.apache.hadoop.io.LongWritable; 8 | import org.apache.hadoop.io.Text; 9 | import org.apache.hadoop.mapreduce.Job; 10 | import org.apache.hadoop.mapreduce.Mapper; 11 | import org.apache.hadoop.mapreduce.Reducer; 12 | import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 13 | import org.apache.hadoop.mapreduce.lib.input.TextInputFormat; 14 | import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 15 | import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat; 16 | 17 | public class TimeSpent { 18 | 19 | public static class Map extends Mapper { 20 | 21 | private static final String splitRE = "\\W+"; 22 | private Text user = new Text(); 23 | private LongWritable time = new LongWritable(); 24 | 25 | public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { 26 | String line = value.toString(); 27 | String[] splits = line.split(splitRE); 28 | if(null == splits || splits.length < 8) 29 | return; 30 | 31 | user.set(splits[5]); 32 | time.set(new Long(splits[7].substring(0, splits[7].length()-1))); 33 | context.write(user, time); 34 | } 35 | } 36 | 37 | public static class Reduce extends Reducer { 38 | 39 | public void reduce(Text key, Iterable values, Context context) throws IOException, InterruptedException { 40 | long sum = 0; 41 | for(LongWritable time : values) { 42 | sum += time.get(); 43 | } 44 | context.write(key, new LongWritable(sum)); 45 | } 46 | } 47 | 48 | public static void main(String[] args) throws Exception { 49 | if (args.length != 2) { 50 | String usage = 51 | "TimeSpent is the log processing example app used in " + 52 | "Chapter 03 to demonstrate a MapReduce application.\n" + 53 | "Usage:\n" + 54 | " TimeSpent path/to/input path/to/output\n"; 55 | System.out.print(usage); 56 | System.exit(1); 57 | } 58 | 59 | Path inputPath = new Path(args[0]); 60 | Path outputPath = new Path(args[1]); 61 | 62 | Configuration conf = new Configuration(); 63 | Job job = new Job(conf, "TimeSpent"); 64 | job.setOutputKeyClass(Text.class); 65 | job.setOutputValueClass(LongWritable.class); 66 | job.setMapperClass(Map.class); 67 | job.setCombinerClass(Reduce.class); 68 | job.setReducerClass(Reduce.class); 69 | job.setInputFormatClass(TextInputFormat.class); 70 | job.setOutputFormatClass(TextOutputFormat.class); 71 | FileInputFormat.addInputPath(job, inputPath); 72 | FileOutputFormat.setOutputPath(job, outputPath); 73 | 74 | FileSystem fs = outputPath.getFileSystem(conf); 75 | if (fs.exists(outputPath)) { 76 | System.out.println("Deleting output path before proceeding."); 77 | fs.delete(outputPath, true); 78 | } 79 | 80 | System.exit(job.waitForCompletion(true) ? 0 : 1); 81 | 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/mapreduce/HamletTagger.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.mapreduce; 2 | 3 | import java.util.Iterator; 4 | import java.util.Random; 5 | 6 | import org.apache.hadoop.conf.Configuration; 7 | import org.apache.hadoop.hbase.HBaseConfiguration; 8 | import org.apache.hadoop.hbase.client.Put; 9 | import org.apache.hadoop.hbase.client.Result; 10 | import org.apache.hadoop.hbase.client.Scan; 11 | import org.apache.hadoop.hbase.io.ImmutableBytesWritable; 12 | import org.apache.hadoop.hbase.mapreduce.IdentityTableReducer; 13 | import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; 14 | import org.apache.hadoop.hbase.mapreduce.TableMapper; 15 | import org.apache.hadoop.hbase.mapreduce.TableReducer; 16 | import org.apache.hadoop.hbase.util.Bytes; 17 | import org.apache.hadoop.mapreduce.Job; 18 | 19 | import HBaseIA.TwitBase.hbase.TwitsDAO; 20 | import HBaseIA.TwitBase.hbase.UsersDAO; 21 | 22 | public class HamletTagger { 23 | 24 | public static class Map 25 | extends TableMapper { 26 | 27 | public static enum Counters {HAMLET_TAGS}; 28 | private Random rand; 29 | 30 | private boolean mentionsHamlet(String msg) { 31 | return rand.nextBoolean(); 32 | } 33 | 34 | protected void setup(Context context) { 35 | rand = new Random(System.currentTimeMillis()); 36 | } 37 | 38 | protected void map( 39 | ImmutableBytesWritable rowkey, 40 | Result result, 41 | Context context) { 42 | byte[] b = result.getColumnLatest( 43 | TwitsDAO.TWITS_FAM, 44 | TwitsDAO.TWIT_COL).getValue(); 45 | String msg = Bytes.toString(b); 46 | b = result.getColumnLatest( 47 | TwitsDAO.TWITS_FAM, 48 | TwitsDAO.USER_COL).getValue(); 49 | String user = Bytes.toString(b); 50 | 51 | if (mentionsHamlet(msg)) { 52 | Put p = UsersDAO.mkPut( 53 | user, 54 | UsersDAO.INFO_FAM, 55 | UsersDAO.HAMLET_COL, 56 | Bytes.toBytes(true)); 57 | ImmutableBytesWritable outkey = 58 | new ImmutableBytesWritable(p.getRow()); 59 | try { 60 | context.write(outkey, p); 61 | context.getCounter(Counters.HAMLET_TAGS).increment(1); 62 | } catch (Exception e) { 63 | // gulp! 64 | } 65 | } 66 | } 67 | } 68 | 69 | public static class Reduce 70 | extends TableReducer< 71 | ImmutableBytesWritable, 72 | Put, 73 | ImmutableBytesWritable> { 74 | 75 | @Override 76 | protected void reduce( 77 | ImmutableBytesWritable rowkey, 78 | Iterable values, 79 | Context context) { 80 | Iterator i = values.iterator(); 81 | if (i.hasNext()) { 82 | try { 83 | context.write(rowkey, i.next()); 84 | } catch (Exception e) { 85 | // gulp! 86 | } 87 | } 88 | } 89 | } 90 | 91 | public static void main(String[] args) throws Exception { 92 | Configuration conf = HBaseConfiguration.create(); 93 | Job job = new Job(conf, "TwitBase Hamlet tagger"); 94 | job.setJarByClass(HamletTagger.class); 95 | 96 | Scan scan = new Scan(); 97 | scan.addColumn(TwitsDAO.TWITS_FAM, TwitsDAO.USER_COL); 98 | scan.addColumn(TwitsDAO.TWITS_FAM, TwitsDAO.TWIT_COL); 99 | TableMapReduceUtil.initTableMapperJob( 100 | Bytes.toString(TwitsDAO.TABLE_NAME), 101 | scan, 102 | Map.class, 103 | ImmutableBytesWritable.class, 104 | Put.class, 105 | job); 106 | TableMapReduceUtil.initTableReducerJob( 107 | Bytes.toString(UsersDAO.TABLE_NAME), 108 | IdentityTableReducer.class, 109 | job); 110 | 111 | job.setNumReduceTasks(0); 112 | System.exit(job.waitForCompletion(true) ? 0 : 1); 113 | } 114 | } 115 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/InitTables.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase; 2 | 3 | import org.apache.hadoop.conf.Configuration; 4 | import org.apache.hadoop.hbase.HBaseConfiguration; 5 | import org.apache.hadoop.hbase.HColumnDescriptor; 6 | import org.apache.hadoop.hbase.HTableDescriptor; 7 | import org.apache.hadoop.hbase.client.HBaseAdmin; 8 | import org.apache.hadoop.hbase.util.Bytes; 9 | 10 | import HBaseIA.TwitBase.hbase.RelationsDAO; 11 | import HBaseIA.TwitBase.hbase.TwitsDAO; 12 | import HBaseIA.TwitBase.hbase.UsersDAO; 13 | 14 | public class InitTables { 15 | 16 | public static void main(String[] args) throws Exception { 17 | Configuration conf = HBaseConfiguration.create(); 18 | HBaseAdmin admin = new HBaseAdmin(conf); 19 | 20 | // first do no harm 21 | if (args.length > 0 && args[0].equalsIgnoreCase("-f")) { 22 | System.out.println("!!! dropping tables in..."); 23 | for (int i = 5; i > 0; i--) { 24 | System.out.println(i); 25 | Thread.sleep(1000); 26 | } 27 | 28 | if (admin.tableExists(UsersDAO.TABLE_NAME)) { 29 | System.out.printf("Deleting %s\n", Bytes.toString(UsersDAO.TABLE_NAME)); 30 | if (admin.isTableEnabled(UsersDAO.TABLE_NAME)) 31 | admin.disableTable(UsersDAO.TABLE_NAME); 32 | admin.deleteTable(UsersDAO.TABLE_NAME); 33 | } 34 | 35 | if (admin.tableExists(TwitsDAO.TABLE_NAME)) { 36 | System.out.printf("Deleting %s\n", Bytes.toString(TwitsDAO.TABLE_NAME)); 37 | if (admin.isTableEnabled(TwitsDAO.TABLE_NAME)) 38 | admin.disableTable(TwitsDAO.TABLE_NAME); 39 | admin.deleteTable(TwitsDAO.TABLE_NAME); 40 | } 41 | 42 | if (admin.tableExists(RelationsDAO.FOLLOWS_TABLE_NAME)) { 43 | System.out.printf("Deleting %s\n", Bytes.toString(RelationsDAO.FOLLOWS_TABLE_NAME)); 44 | if (admin.isTableEnabled(RelationsDAO.FOLLOWS_TABLE_NAME)) 45 | admin.disableTable(RelationsDAO.FOLLOWS_TABLE_NAME); 46 | admin.deleteTable(RelationsDAO.FOLLOWS_TABLE_NAME); 47 | } 48 | 49 | if (admin.tableExists(RelationsDAO.FOLLOWED_TABLE_NAME)) { 50 | System.out.printf("Deleting %s\n", Bytes.toString(RelationsDAO.FOLLOWED_TABLE_NAME)); 51 | if (admin.isTableEnabled(RelationsDAO.FOLLOWED_TABLE_NAME)) 52 | admin.disableTable(RelationsDAO.FOLLOWED_TABLE_NAME); 53 | admin.deleteTable(RelationsDAO.FOLLOWED_TABLE_NAME); 54 | } 55 | } 56 | 57 | if (admin.tableExists(UsersDAO.TABLE_NAME)) { 58 | System.out.println("User table already exists."); 59 | } else { 60 | System.out.println("Creating User table..."); 61 | HTableDescriptor desc = new HTableDescriptor(UsersDAO.TABLE_NAME); 62 | HColumnDescriptor c = new HColumnDescriptor(UsersDAO.INFO_FAM); 63 | desc.addFamily(c); 64 | admin.createTable(desc); 65 | System.out.println("User table created."); 66 | } 67 | 68 | if (admin.tableExists(TwitsDAO.TABLE_NAME)) { 69 | System.out.println("Twits table already exists."); 70 | } else { 71 | System.out.println("Creating Twits table..."); 72 | HTableDescriptor desc = new HTableDescriptor(TwitsDAO.TABLE_NAME); 73 | HColumnDescriptor c = new HColumnDescriptor(TwitsDAO.TWITS_FAM); 74 | c.setMaxVersions(1); 75 | desc.addFamily(c); 76 | admin.createTable(desc); 77 | System.out.println("Twits table created."); 78 | } 79 | 80 | if (admin.tableExists(RelationsDAO.FOLLOWS_TABLE_NAME)) { 81 | System.out.println("Follows table already exists."); 82 | } else { 83 | System.out.println("Creating Follows table..."); 84 | HTableDescriptor desc = new HTableDescriptor(RelationsDAO.FOLLOWS_TABLE_NAME); 85 | HColumnDescriptor c = new HColumnDescriptor(RelationsDAO.RELATION_FAM); 86 | c.setMaxVersions(1); 87 | desc.addFamily(c); 88 | admin.createTable(desc); 89 | System.out.println("Follows table created."); 90 | } 91 | 92 | if (admin.tableExists(RelationsDAO.FOLLOWED_TABLE_NAME)) { 93 | System.out.println("Followed table already exists."); 94 | } else { 95 | System.out.println("Creating Followed table..."); 96 | HTableDescriptor desc = new HTableDescriptor(RelationsDAO.FOLLOWED_TABLE_NAME); 97 | HColumnDescriptor c = new HColumnDescriptor(RelationsDAO.RELATION_FAM); 98 | c.setMaxVersions(1); 99 | desc.addFamily(c); 100 | admin.createTable(desc); 101 | System.out.println("Followed table created."); 102 | } 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/hbase/TwitsDAO.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.hbase; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.List; 7 | 8 | import org.apache.hadoop.hbase.client.Get; 9 | import org.apache.hadoop.hbase.client.HTableInterface; 10 | import org.apache.hadoop.hbase.client.HTablePool; 11 | import org.apache.hadoop.hbase.client.Put; 12 | import org.apache.hadoop.hbase.client.Result; 13 | import org.apache.hadoop.hbase.client.ResultScanner; 14 | import org.apache.hadoop.hbase.client.Scan; 15 | import org.apache.hadoop.hbase.util.Bytes; 16 | import org.apache.log4j.Logger; 17 | import org.joda.time.DateTime; 18 | 19 | import utils.Md5Utils; 20 | 21 | public class TwitsDAO { 22 | 23 | public static final byte[] TABLE_NAME = Bytes.toBytes("twits"); 24 | public static final byte[] TWITS_FAM = Bytes.toBytes("twits"); 25 | 26 | public static final byte[] USER_COL = Bytes.toBytes("user"); 27 | public static final byte[] TWIT_COL = Bytes.toBytes("twit"); 28 | private static final int longLength = 8; // bytes 29 | 30 | private HTablePool pool; 31 | 32 | private static final Logger log = Logger.getLogger(TwitsDAO.class); 33 | 34 | public TwitsDAO(HTablePool pool) { 35 | this.pool = pool; 36 | } 37 | 38 | private static byte[] mkRowKey(Twit t) { 39 | return mkRowKey(t.user, t.dt); 40 | } 41 | 42 | private static byte[] mkRowKey(String user, DateTime dt) { 43 | byte[] userHash = Md5Utils.md5sum(user); 44 | byte[] timestamp = Bytes.toBytes(-1 * dt.getMillis()); 45 | byte[] rowKey = new byte[Md5Utils.MD5_LENGTH + longLength]; 46 | 47 | int offset = 0; 48 | offset = Bytes.putBytes(rowKey, offset, userHash, 0, userHash.length); 49 | Bytes.putBytes(rowKey, offset, timestamp, 0, timestamp.length); 50 | return rowKey; 51 | } 52 | 53 | private static Put mkPut(Twit t) { 54 | Put p = new Put(mkRowKey(t)); 55 | p.add(TWITS_FAM, USER_COL, Bytes.toBytes(t.user)); 56 | p.add(TWITS_FAM, TWIT_COL, Bytes.toBytes(t.text)); 57 | return p; 58 | } 59 | 60 | private static Get mkGet(String user, DateTime dt) { 61 | Get g = new Get(mkRowKey(user, dt)); 62 | g.addColumn(TWITS_FAM, USER_COL); 63 | g.addColumn(TWITS_FAM, TWIT_COL); 64 | return g; 65 | } 66 | 67 | private static String to_str(byte[] xs) { 68 | StringBuilder sb = new StringBuilder(xs.length *2); 69 | for(byte b : xs) { 70 | sb.append(b).append(" "); 71 | } 72 | sb.deleteCharAt(sb.length() -1); 73 | return sb.toString(); 74 | } 75 | 76 | private static Scan mkScan(String user) { 77 | byte[] userHash = Md5Utils.md5sum(user); 78 | byte[] startRow = Bytes.padTail(userHash, longLength); // 212d...866f00... 79 | byte[] stopRow = Bytes.padTail(userHash, longLength); 80 | stopRow[Md5Utils.MD5_LENGTH-1]++; // 212d...867000... 81 | 82 | log.debug("Scan starting at: '" + to_str(startRow) + "'"); 83 | log.debug("Scan stopping at: '" + to_str(stopRow) + "'"); 84 | 85 | Scan s = new Scan(startRow, stopRow); 86 | s.addColumn(TWITS_FAM, USER_COL); 87 | s.addColumn(TWITS_FAM, TWIT_COL); 88 | return s; 89 | } 90 | 91 | public void postTwit(String user, DateTime dt, String text) throws IOException { 92 | 93 | HTableInterface twits = pool.getTable(TABLE_NAME); 94 | 95 | Put p = mkPut(new Twit(user, dt, text)); 96 | twits.put(p); 97 | 98 | twits.close(); 99 | } 100 | 101 | public HBaseIA.TwitBase.model.Twit getTwit(String user, DateTime dt) throws IOException { 102 | 103 | HTableInterface twits = pool.getTable(TABLE_NAME); 104 | 105 | Get g = mkGet(user, dt); 106 | Result result = twits.get(g); 107 | if (result.isEmpty()) 108 | return null; 109 | 110 | Twit t = new Twit(result); 111 | twits.close(); 112 | return t; 113 | } 114 | 115 | public List list(String user) throws IOException { 116 | 117 | HTableInterface twits = pool.getTable(TABLE_NAME); 118 | 119 | ResultScanner results = twits.getScanner(mkScan(user)); 120 | List ret = new ArrayList(); 121 | for(Result r : results) { 122 | ret.add(new Twit(r)); 123 | } 124 | 125 | twits.close(); 126 | return ret; 127 | } 128 | 129 | private static class Twit extends HBaseIA.TwitBase.model.Twit { 130 | 131 | private Twit(Result r) { 132 | this( 133 | r.getColumnLatest(TWITS_FAM, USER_COL).getValue(), 134 | Arrays.copyOfRange(r.getRow(), Md5Utils.MD5_LENGTH, Md5Utils.MD5_LENGTH + longLength), 135 | r.getColumnLatest(TWITS_FAM, TWIT_COL).getValue()); 136 | } 137 | 138 | private Twit(byte[] user, byte[] dt, byte[] text) { 139 | this( 140 | Bytes.toString(user), 141 | new DateTime(-1 * Bytes.toLong(dt)), 142 | Bytes.toString(text)); 143 | } 144 | 145 | private Twit(String user, DateTime dt, String text) { 146 | this.user = user; 147 | this.dt = dt; 148 | this.text = text; 149 | } 150 | } 151 | } 152 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/hbase/UsersDAO.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.hbase; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import org.apache.hadoop.hbase.client.Delete; 8 | import org.apache.hadoop.hbase.client.Get; 9 | import org.apache.hadoop.hbase.client.HTableInterface; 10 | import org.apache.hadoop.hbase.client.HTablePool; 11 | import org.apache.hadoop.hbase.client.Put; 12 | import org.apache.hadoop.hbase.client.Result; 13 | import org.apache.hadoop.hbase.client.ResultScanner; 14 | import org.apache.hadoop.hbase.client.Scan; 15 | import org.apache.hadoop.hbase.util.Bytes; 16 | import org.apache.log4j.Logger; 17 | 18 | public class UsersDAO { 19 | 20 | public static final byte[] TABLE_NAME = Bytes.toBytes("users"); 21 | public static final byte[] INFO_FAM = Bytes.toBytes("info"); 22 | 23 | public static final byte[] USER_COL = Bytes.toBytes("user"); 24 | public static final byte[] NAME_COL = Bytes.toBytes("name"); 25 | public static final byte[] EMAIL_COL = Bytes.toBytes("email"); 26 | public static final byte[] PASS_COL = Bytes.toBytes("password"); 27 | public static final byte[] TWEETS_COL = Bytes.toBytes("tweet_count"); 28 | 29 | public static final byte[] HAMLET_COL = Bytes.toBytes("hamlet_tag"); 30 | 31 | private static final Logger log = Logger.getLogger(UsersDAO.class); 32 | 33 | private HTablePool pool; 34 | 35 | public UsersDAO(HTablePool pool) { 36 | this.pool = pool; 37 | } 38 | 39 | private static Get mkGet(String user) throws IOException { 40 | log.debug(String.format("Creating Get for %s", user)); 41 | 42 | Get g = new Get(Bytes.toBytes(user)); 43 | g.addFamily(INFO_FAM); 44 | return g; 45 | } 46 | 47 | private static Put mkPut(User u) { 48 | log.debug(String.format("Creating Put for %s", u)); 49 | 50 | Put p = new Put(Bytes.toBytes(u.user)); 51 | p.add(INFO_FAM, USER_COL, Bytes.toBytes(u.user)); 52 | p.add(INFO_FAM, NAME_COL, Bytes.toBytes(u.name)); 53 | p.add(INFO_FAM, EMAIL_COL, Bytes.toBytes(u.email)); 54 | p.add(INFO_FAM, PASS_COL, Bytes.toBytes(u.password)); 55 | return p; 56 | } 57 | 58 | public static Put mkPut(String username, 59 | byte[] fam, 60 | byte[] qual, 61 | byte[] val) { 62 | Put p = new Put(Bytes.toBytes(username)); 63 | p.add(fam, qual, val); 64 | return p; 65 | } 66 | 67 | private static Delete mkDel(String user) { 68 | log.debug(String.format("Creating Delete for %s", user)); 69 | 70 | Delete d = new Delete(Bytes.toBytes(user)); 71 | return d; 72 | } 73 | 74 | private static Scan mkScan() { 75 | Scan s = new Scan(); 76 | s.addFamily(INFO_FAM); 77 | return s; 78 | } 79 | 80 | public void addUser(String user, 81 | String name, 82 | String email, 83 | String password) 84 | throws IOException { 85 | 86 | HTableInterface users = pool.getTable(TABLE_NAME); 87 | 88 | Put p = mkPut(new User(user, name, email, password)); 89 | users.put(p); 90 | 91 | users.close(); 92 | } 93 | 94 | public HBaseIA.TwitBase.model.User getUser(String user) 95 | throws IOException { 96 | HTableInterface users = pool.getTable(TABLE_NAME); 97 | 98 | Get g = mkGet(user); 99 | Result result = users.get(g); 100 | if (result.isEmpty()) { 101 | log.info(String.format("user %s not found.", user)); 102 | return null; 103 | } 104 | 105 | User u = new User(result); 106 | users.close(); 107 | return u; 108 | } 109 | 110 | public void deleteUser(String user) throws IOException { 111 | HTableInterface users = pool.getTable(TABLE_NAME); 112 | 113 | Delete d = mkDel(user); 114 | users.delete(d); 115 | 116 | users.close(); 117 | } 118 | 119 | public List getUsers() 120 | throws IOException { 121 | HTableInterface users = pool.getTable(TABLE_NAME); 122 | 123 | ResultScanner results = users.getScanner(mkScan()); 124 | ArrayList ret 125 | = new ArrayList(); 126 | for(Result r : results) { 127 | ret.add(new User(r)); 128 | } 129 | 130 | users.close(); 131 | return ret; 132 | } 133 | 134 | public long incTweetCount(String user) throws IOException { 135 | HTableInterface users = pool.getTable(TABLE_NAME); 136 | 137 | long ret = users.incrementColumnValue(Bytes.toBytes(user), 138 | INFO_FAM, 139 | TWEETS_COL, 140 | 1L); 141 | 142 | users.close(); 143 | return ret; 144 | } 145 | 146 | private static class User 147 | extends HBaseIA.TwitBase.model.User { 148 | private User(Result r) { 149 | this(r.getValue(INFO_FAM, USER_COL), 150 | r.getValue(INFO_FAM, NAME_COL), 151 | r.getValue(INFO_FAM, EMAIL_COL), 152 | r.getValue(INFO_FAM, PASS_COL), 153 | r.getValue(INFO_FAM, TWEETS_COL) == null 154 | ? Bytes.toBytes(0L) 155 | : r.getValue(INFO_FAM, TWEETS_COL)); 156 | } 157 | 158 | private User(byte[] user, 159 | byte[] name, 160 | byte[] email, 161 | byte[] password, 162 | byte[] tweetCount) { 163 | this(Bytes.toString(user), 164 | Bytes.toString(name), 165 | Bytes.toString(email), 166 | Bytes.toString(password)); 167 | this.tweetCount = Bytes.toLong(tweetCount); 168 | } 169 | 170 | private User(String user, 171 | String name, 172 | String email, 173 | String password) { 174 | this.user = user; 175 | this.name = name; 176 | this.email = email; 177 | this.password = password; 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/main/java/HBaseIA/TwitBase/hbase/RelationsDAO.java: -------------------------------------------------------------------------------- 1 | package HBaseIA.TwitBase.hbase; 2 | 3 | import java.io.IOException; 4 | import java.util.ArrayList; 5 | import java.util.Arrays; 6 | import java.util.List; 7 | import java.util.Map; 8 | 9 | import org.apache.hadoop.hbase.KeyValue; 10 | import org.apache.hadoop.hbase.client.HTableInterface; 11 | import org.apache.hadoop.hbase.client.HTablePool; 12 | import org.apache.hadoop.hbase.client.Put; 13 | import org.apache.hadoop.hbase.client.Result; 14 | import org.apache.hadoop.hbase.client.ResultScanner; 15 | import org.apache.hadoop.hbase.client.Scan; 16 | import org.apache.hadoop.hbase.client.coprocessor.Batch; 17 | import org.apache.hadoop.hbase.util.Bytes; 18 | 19 | import utils.Md5Utils; 20 | import HBaseIA.TwitBase.coprocessors.RelationCountProtocol; 21 | 22 | public class RelationsDAO { 23 | 24 | // md5(id_from)md5(id_to) -> 'f':id_to=name_to 25 | // md5(id_from)md5(id_to) -> 'f':'to'=id_to, 'f':'from'=id_from 26 | 27 | public static final byte[] FOLLOWS_TABLE_NAME = Bytes.toBytes("follows"); 28 | public static final byte[] FOLLOWED_TABLE_NAME = Bytes.toBytes("followedBy"); 29 | public static final byte[] RELATION_FAM = Bytes.toBytes("f"); 30 | public static final byte[] FROM = Bytes.toBytes("from"); 31 | public static final byte[] TO = Bytes.toBytes("to"); 32 | 33 | private static final int KEY_WIDTH = 2 * Md5Utils.MD5_LENGTH; 34 | 35 | private HTablePool pool; 36 | 37 | public RelationsDAO(HTablePool pool) { 38 | this.pool = pool; 39 | } 40 | 41 | public static byte[] mkRowKey(String a) { 42 | byte[] ahash = Md5Utils.md5sum(a); 43 | byte[] rowkey = new byte[KEY_WIDTH]; 44 | 45 | Bytes.putBytes(rowkey, 0, ahash, 0, ahash.length); 46 | return rowkey; 47 | } 48 | 49 | public static byte[] mkRowKey(String a, String b) { 50 | byte[] ahash = Md5Utils.md5sum(a); 51 | byte[] bhash = Md5Utils.md5sum(b); 52 | byte[] rowkey = new byte[KEY_WIDTH]; 53 | 54 | int offset = 0; 55 | offset = Bytes.putBytes(rowkey, offset, ahash, 0, ahash.length); 56 | Bytes.putBytes(rowkey, offset, bhash, 0, bhash.length); 57 | return rowkey; 58 | } 59 | 60 | public static byte[][] splitRowkey(byte[] rowkey) { 61 | byte[][] result = new byte[2][]; 62 | 63 | result[0] = Arrays.copyOfRange(rowkey, 0, Md5Utils.MD5_LENGTH); 64 | result[1] = Arrays.copyOfRange(rowkey, Md5Utils.MD5_LENGTH, KEY_WIDTH); 65 | return result; 66 | } 67 | 68 | public void addFollows(String fromId, String toId) throws IOException { 69 | addRelation(FOLLOWS_TABLE_NAME, fromId, toId); 70 | } 71 | 72 | public void addFollowedBy(String fromId, String toId) throws IOException { 73 | addRelation(FOLLOWED_TABLE_NAME, fromId, toId); 74 | } 75 | 76 | public void addRelation(byte[] table, String fromId, String toId) throws IOException { 77 | 78 | HTableInterface t = pool.getTable(table); 79 | 80 | Put p = new Put(mkRowKey(fromId, toId)); 81 | p.add(RELATION_FAM, FROM, Bytes.toBytes(fromId)); 82 | p.add(RELATION_FAM, TO, Bytes.toBytes(toId)); 83 | t.put(p); 84 | 85 | t.close(); 86 | } 87 | 88 | public List listFollows(String fromId) throws IOException { 89 | return listRelations(FOLLOWS_TABLE_NAME, fromId); 90 | } 91 | 92 | public List listFollowedBy(String fromId) throws IOException { 93 | return listRelations(FOLLOWED_TABLE_NAME, fromId); 94 | } 95 | 96 | public List listRelations(byte[] table, String fromId) throws IOException { 97 | 98 | HTableInterface t = pool.getTable(table); 99 | String rel = (Bytes.equals(table, FOLLOWS_TABLE_NAME)) ? "->" : "<-"; 100 | 101 | byte[] startKey = mkRowKey(fromId); 102 | byte[] endKey = Arrays.copyOf(startKey, startKey.length); 103 | endKey[Md5Utils.MD5_LENGTH-1]++; 104 | Scan scan = new Scan(startKey, endKey); 105 | scan.addColumn(RELATION_FAM, TO); 106 | scan.setMaxVersions(1); 107 | 108 | ResultScanner results = t.getScanner(scan); 109 | List ret 110 | = new ArrayList(); 111 | for (Result r : results) { 112 | KeyValue kv = r.getColumnLatest(RELATION_FAM, TO); 113 | String toId = Bytes.toString(kv.getValue()); 114 | ret.add(new Relation(rel, fromId, toId)); 115 | } 116 | 117 | t.close(); 118 | return ret; 119 | } 120 | 121 | @SuppressWarnings("unused") 122 | public long followedByCountScan (String user) throws IOException { 123 | HTableInterface followed = pool.getTable(FOLLOWED_TABLE_NAME); 124 | 125 | final byte[] startKey = Md5Utils.md5sum(user); 126 | final byte[] endKey = Arrays.copyOf(startKey, startKey.length); 127 | endKey[endKey.length-1]++; 128 | Scan scan = new Scan(startKey, endKey); 129 | scan.setMaxVersions(1); 130 | 131 | long sum = 0; 132 | ResultScanner rs = followed.getScanner(scan); 133 | for(Result r : rs) { 134 | sum++; 135 | } 136 | return sum; 137 | } 138 | 139 | public long followedByCount (final String userId) throws Throwable { 140 | HTableInterface followed = pool.getTable(FOLLOWED_TABLE_NAME); 141 | 142 | final byte[] startKey = Md5Utils.md5sum(userId); 143 | final byte[] endKey = Arrays.copyOf(startKey, startKey.length); 144 | endKey[endKey.length-1]++; 145 | 146 | Batch.Call callable = 147 | new Batch.Call() { 148 | @Override 149 | public Long call(RelationCountProtocol instance) 150 | throws IOException { 151 | return instance.followedByCount(userId); 152 | } 153 | }; 154 | 155 | Map results = 156 | followed.coprocessorExec( 157 | RelationCountProtocol.class, 158 | startKey, 159 | endKey, 160 | callable); 161 | 162 | long sum = 0; 163 | for(Map.Entry e : results.entrySet()) { 164 | sum += e.getValue().longValue(); 165 | } 166 | return sum; 167 | } 168 | 169 | private static class Relation extends HBaseIA.TwitBase.model.Relation { 170 | 171 | private Relation(String relation, String from, String to) { 172 | this.relation = relation; 173 | this.from = from; 174 | this.to = to; 175 | } 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /src/main/resources/dict/propernames: -------------------------------------------------------------------------------- 1 | Aaron 2 | Adam 3 | Adlai 4 | Adrian 5 | Agatha 6 | Ahmed 7 | Ahmet 8 | Aimee 9 | Al 10 | Alain 11 | Alan 12 | Alastair 13 | Albert 14 | Alberto 15 | Alejandro 16 | Alex 17 | Alexander 18 | Alexis 19 | Alf 20 | Alfred 21 | Alison 22 | Allan 23 | Allen 24 | Alvin 25 | Amanda 26 | Amarth 27 | Amedeo 28 | Ami 29 | Amigo 30 | Amir 31 | Amos 32 | Amy 33 | Anatole 34 | Anatoly 35 | Anderson 36 | Andre 37 | Andrea 38 | Andreas 39 | Andrew 40 | Andries 41 | Andy 42 | Angela 43 | Angus 44 | Anita 45 | Ann 46 | Anna 47 | Annard 48 | Anne 49 | Annie 50 | Anthony 51 | Anton 52 | Antonella 53 | Antonio 54 | Antony 55 | Archie 56 | Ariel 57 | Arlene 58 | Arne 59 | Arnold 60 | Art 61 | Arthur 62 | Audrey 63 | Avery 64 | Axel 65 | Barbara 66 | Barbra 67 | Barney 68 | Barrett 69 | Barrio 70 | Barry 71 | Bart 72 | Barton 73 | Bea 74 | Beckie 75 | Becky 76 | Belinda 77 | Ben 78 | Benjamin 79 | Benson 80 | Bernard 81 | Bernie 82 | Bert 83 | Bertrand 84 | Beth 85 | Betsy 86 | Betty 87 | Beverly 88 | Bill 89 | Billie 90 | Billy 91 | Bjorne 92 | Blaine 93 | Blair 94 | Blake 95 | Blayne 96 | Bob 97 | Bobbie 98 | Bobby 99 | Bonnie 100 | Boyce 101 | Boyd 102 | Brad 103 | Bradford 104 | Bradley 105 | Brandi 106 | Brandon 107 | Brandy 108 | Brenda 109 | Brendan 110 | Brender 111 | Brent 112 | Bret 113 | Brett 114 | Brian 115 | Briggs 116 | Brodie 117 | Brooke 118 | Bruce 119 | Bruno 120 | Bryan 121 | Bryce 122 | Bucky 123 | Bud 124 | Butler 125 | Byron 126 | Caleb 127 | Calvin 128 | Carisa 129 | Carl 130 | Carlo 131 | Carlos 132 | Carol 133 | Carole 134 | Caroline 135 | Carolyn 136 | Carsten 137 | Carter 138 | Cary 139 | Case 140 | Casey 141 | Casper 142 | Catherine 143 | Cathrin 144 | Cathryn 145 | Cathy 146 | Cecilia 147 | Celeste 148 | Celia 149 | Charleen 150 | Charlene 151 | Charles 152 | Charley 153 | Charlie 154 | Chet 155 | Chip 156 | Chris 157 | Christian 158 | Christie 159 | Christina 160 | Christofer 161 | Christophe 162 | Christopher 163 | Chuck 164 | Cindie 165 | Cindy 166 | Claire 167 | Clara 168 | Clare 169 | Clarence 170 | Clarissa 171 | Clark 172 | Claude 173 | Claudia 174 | Claudio 175 | Clay 176 | Clayton 177 | Clem 178 | Cliff 179 | Clifford 180 | Clyde 181 | Cole 182 | Coleen 183 | Colin 184 | Collin 185 | Connie 186 | Conrad 187 | Corey 188 | Cory 189 | Courtney 190 | Craig 191 | Cris 192 | Cristi 193 | Cristina 194 | Cristopher 195 | Curt 196 | Curtis 197 | Cynthia 198 | Cyrus 199 | Dale 200 | Dalton 201 | Damon 202 | Damone 203 | Dan 204 | Dana 205 | Dani 206 | Daniel 207 | Daniele 208 | Danielle 209 | Dannie 210 | Danny 211 | Darci 212 | Daren 213 | Darin 214 | Darrell 215 | Darren 216 | Darryl 217 | Daryl 218 | Dave 219 | David 220 | Dawn 221 | Dawson 222 | Dean 223 | Deb 224 | Debbie 225 | Debi 226 | Deborah 227 | Deirdre 228 | Del 229 | Delbert 230 | Denis 231 | Dennis 232 | Derek 233 | Devon 234 | Dewey 235 | Diana 236 | Diane 237 | Dick 238 | Dieter 239 | Dimetry 240 | Dimitry 241 | Dion 242 | Dirk 243 | Dominic 244 | Dominick 245 | Don 246 | Donal 247 | Donald 248 | Donn 249 | Donna 250 | Donne 251 | Donnie 252 | Donovan 253 | Dori 254 | Dorian 255 | Dorothy 256 | Dory 257 | Doug 258 | Douglas 259 | Doyle 260 | Drew 261 | Duane 262 | Duke 263 | Duncan 264 | Dustin 265 | Dwayne 266 | Dwight 267 | Dylan 268 | Earl 269 | Earle 270 | Earnie 271 | Ed 272 | Eddie 273 | Eddy 274 | Edgar 275 | Edith 276 | Edmond 277 | Edmund 278 | Eduardo 279 | Edward 280 | Edwin 281 | Eileen 282 | Elaine 283 | Eli 284 | Elias 285 | Elijah 286 | Eliot 287 | Elisabeth 288 | Elizabeth 289 | Ellen 290 | Elliot 291 | Elliott 292 | Elric 293 | Elsa 294 | Elvis 295 | Elwood 296 | Emil 297 | Emily 298 | Emma 299 | Emmett 300 | Eric 301 | Erick 302 | Erik 303 | Ernest 304 | Ernie 305 | Ernst 306 | Erwin 307 | Ethan 308 | Eugene 309 | Eva 310 | Evan 311 | Evelyn 312 | Everett 313 | Farouk 314 | Fay 315 | Felix 316 | Fletcher 317 | Floria 318 | Florian 319 | Floyd 320 | Frances 321 | Francis 322 | Francisco 323 | Francois 324 | Frank 325 | Franklin 326 | Fred 327 | Frederic 328 | Frederick 329 | Fritz 330 | Gabriel 331 | Gail 332 | Gale 333 | Galen 334 | Gary 335 | Gene 336 | Geoff 337 | Geoffrey 338 | George 339 | Gerald 340 | Gerard 341 | Gideon 342 | Gigi 343 | Gil 344 | Giles 345 | Gill 346 | Gilles 347 | Ginny 348 | Giovanni 349 | Glen 350 | Glenn 351 | Glynn 352 | Gordon 353 | Grace 354 | Graeme 355 | Graham 356 | Grant 357 | Granville 358 | Greg 359 | Gregg 360 | Gregge 361 | Gregor 362 | Gregory 363 | Gretchen 364 | Griff 365 | Guido 366 | Guillermo 367 | Gunnar 368 | Gunter 369 | Guy 370 | Gypsy 371 | Hal 372 | Hamilton 373 | Hank 374 | Hans 375 | Harmon 376 | Harold 377 | Harris 378 | Harry 379 | Hartmann 380 | Harv 381 | Harvey 382 | Hazel 383 | Heather 384 | Hector 385 | Heidi 386 | Hein 387 | Heinrich 388 | Heinz 389 | Helen 390 | Helge 391 | Henry 392 | Herb 393 | Herbert 394 | Herman 395 | Herve 396 | Hienz 397 | Hilda 398 | Hillary 399 | Hillel 400 | Himawan 401 | Hirofumi 402 | Hirotoshi 403 | Hiroyuki 404 | Hitoshi 405 | Hohn 406 | Holly 407 | Hon 408 | Honzo 409 | Horst 410 | Hotta 411 | Howard 412 | Hsi 413 | Hsuan 414 | Huashi 415 | Hubert 416 | Huey 417 | Hugh 418 | Hughes 419 | Hui 420 | Hume 421 | Hunter 422 | Hurf 423 | Hwa 424 | Hy 425 | Ian 426 | Ilya 427 | Ima 428 | Indra 429 | Ira 430 | Irfan 431 | Irvin 432 | Irving 433 | Irwin 434 | Isaac 435 | Isabelle 436 | Isidore 437 | Israel 438 | Izchak 439 | Izumi 440 | Izzy 441 | Jack 442 | Jackye 443 | Jacob 444 | Jacobson 445 | Jacques 446 | Jagath 447 | Jaime 448 | Jakob 449 | James 450 | Jamie 451 | Jan 452 | Jane 453 | Janet 454 | Janice 455 | Janos 456 | Jared 457 | Jarl 458 | Jarmo 459 | Jarvis 460 | Jason 461 | Jay 462 | Jayant 463 | Jayesh 464 | Jean 465 | Jean-Christophe 466 | Jean-Pierre 467 | Jeanette 468 | Jeanne 469 | Jeannette 470 | Jeannie 471 | Jeany 472 | Jef 473 | Jeff 474 | Jeffery 475 | Jeffie 476 | Jeffrey 477 | Jelske 478 | Jem 479 | Jenine 480 | Jennie 481 | Jennifer 482 | Jerald 483 | Jeremy 484 | Jerome 485 | Jerrie 486 | Jerry 487 | Jesper 488 | Jess 489 | Jesse 490 | Jesus 491 | Ji 492 | Jianyun 493 | Jill 494 | Jim 495 | Jimmy 496 | Jin 497 | Jinchao 498 | Jingbai 499 | Jinny 500 | Jiri 501 | Jisheng 502 | Jitendra 503 | Joachim 504 | Joanne 505 | Jochen 506 | Jock 507 | Joe 508 | Joel 509 | Johan 510 | Johann 511 | John 512 | Johnathan 513 | Johnnie 514 | Johnny 515 | Jon 516 | Jonathan 517 | Jones 518 | Jong 519 | Joni 520 | Joon 521 | Jordan 522 | Jorge 523 | Jos 524 | Jose 525 | Joseph 526 | Josh 527 | Joshua 528 | Josip 529 | Joubert 530 | Joyce 531 | Juan 532 | Judge 533 | Judith 534 | Judy 535 | Juergen 536 | Juha 537 | Julia 538 | Julian 539 | Juliane 540 | Julianto 541 | Julie 542 | Juliet 543 | Julius 544 | Jun 545 | June 546 | Jurevis 547 | Juri 548 | Jussi 549 | Justin 550 | Jwahar 551 | Kaj 552 | Kamel 553 | Kamiya 554 | Kanthan 555 | Karen 556 | Kari 557 | Karl 558 | Kate 559 | Kathleen 560 | Kathryn 561 | Kathy 562 | Kay 563 | Kayvan 564 | Kazuhiro 565 | Kee 566 | Kees 567 | Keith 568 | Kelly 569 | Kelvin 570 | Kemal 571 | Ken 572 | Kenn 573 | Kenneth 574 | Kent 575 | Kenton 576 | Kerri 577 | Kerry 578 | Kevan 579 | Kevin 580 | Kevyn 581 | Kieran 582 | Kiki 583 | Kikki 584 | Kim 585 | Kimberly 586 | Kimmo 587 | Kinch 588 | King 589 | Kirk 590 | Kirsten 591 | Kit 592 | Kitty 593 | Klaudia 594 | Klaus 595 | Knapper 596 | Knudsen 597 | Knut 598 | Knute 599 | Kolkka 600 | Konrad 601 | Konstantinos 602 | Kory 603 | Kris 604 | Kristen 605 | Kristi 606 | Kristian 607 | Kristin 608 | Kriton 609 | Krzysztof 610 | Kuldip 611 | Kurt 612 | Kusum 613 | Kyle 614 | Kylo 615 | Kyu 616 | Kyung 617 | Lana 618 | Lance 619 | Lanny 620 | Lar 621 | Larry 622 | Lars 623 | Laura 624 | Laurel 625 | Laurence 626 | Laurent 627 | Laurianne 628 | Laurie 629 | Lawrence 630 | Lea 631 | Leads 632 | Lee 633 | Leif 634 | Leigh 635 | Leila 636 | Leith 637 | Len 638 | Lenny 639 | Lenora 640 | Leo 641 | Leon 642 | Leonard 643 | Leora 644 | Les 645 | Leslie 646 | Lester 647 | Leung 648 | Lewis 649 | Lex 650 | Liber 651 | Lievaart 652 | Lila 653 | Lin 654 | Linda 655 | Linder 656 | Lindsay 657 | Lindsey 658 | Linley 659 | Lisa 660 | List 661 | Liyuan 662 | Liz 663 | Liza 664 | Lloyd 665 | Lois 666 | Lonhyn 667 | Lord 668 | Loren 669 | Lorenzo 670 | Lori 671 | Lorien 672 | Lorraine 673 | Lou 674 | Louie 675 | Louiqa 676 | Louis 677 | Louise 678 | Loukas 679 | Lowell 680 | Loyd 681 | Luc 682 | Lucifer 683 | Lucius 684 | Lui 685 | Luis 686 | Lukas 687 | Luke 688 | Lum 689 | Lyndon 690 | Lynn 691 | Lynne 692 | Lynnette 693 | Maarten 694 | Mac 695 | Magnus 696 | Mah 697 | Mahesh 698 | Mahmoud 699 | Major 700 | Malaclypse 701 | Malcolm 702 | Malloy 703 | Malus 704 | Manavendra 705 | Manjeri 706 | Mann 707 | Manny 708 | Manolis 709 | Manuel 710 | Mara 711 | Marc 712 | Marcel 713 | Marci 714 | Marcia 715 | Marco 716 | Marcos 717 | Marek 718 | Margaret 719 | Margie 720 | Margot 721 | Marguerite 722 | Maria 723 | Marian 724 | Marie 725 | Marilyn 726 | Mario 727 | Marion 728 | Mariou 729 | Mark 730 | Markus 731 | Marla 732 | Marlena 733 | Marnix 734 | Marsh 735 | Marsha 736 | Marshall 737 | Martha 738 | Martin 739 | Marty 740 | Martyn 741 | Marvin 742 | Mary 743 | Masanao 744 | Masanobu 745 | Mason 746 | Mat 747 | Mats 748 | Matt 749 | Matthew 750 | Matthias 751 | Matthieu 752 | Matti 753 | Maureen 754 | Maurice 755 | Max 756 | Mayo 757 | Mechael 758 | Meehan 759 | Meeks 760 | Mehrdad 761 | Melinda 762 | Merat 763 | Merril 764 | Merton 765 | Metin 766 | Micah 767 | Michael 768 | Micheal 769 | Michel 770 | Michelle 771 | Michiel 772 | Mick 773 | Mickey 774 | Micky 775 | Miek 776 | Mikael 777 | Mike 778 | Mikey 779 | Miki 780 | Miles 781 | Milner 782 | Milo 783 | Miltos 784 | Miriam 785 | Miriamne 786 | Mitch 787 | Mitchell 788 | Moe 789 | Mohammad 790 | Molly 791 | Mongo 792 | Monica 793 | Monty 794 | Moore 795 | Moran 796 | Morgan 797 | Morris 798 | Morton 799 | Moses 800 | Mosur 801 | Mott 802 | Murat 803 | Murph 804 | Murray 805 | Murthy 806 | Mwa 807 | Myrick 808 | Myron 809 | Mysore 810 | Nadeem 811 | Naim 812 | Nancy 813 | Nanda 814 | Naomi 815 | Naoto 816 | Naren 817 | Narendra 818 | Naresh 819 | Nate 820 | Nathan 821 | Nathaniel 822 | Natraj 823 | Neal 824 | Ned 825 | Neil 826 | Nelken 827 | Neville 828 | Nguyen 829 | Nhan 830 | Niall 831 | Nichael 832 | Nicholas 833 | Nici 834 | Nick 835 | Nicolas 836 | Nicolette 837 | Nicolo 838 | Niels 839 | Nigel 840 | Nikolai 841 | Nils 842 | Ning 843 | Ninja 844 | No 845 | Noam 846 | Noemi 847 | Nora 848 | Norbert 849 | Norm 850 | Norma 851 | Norman 852 | Nou 853 | Novo 854 | Novorolsky 855 | Ofer 856 | Olaf 857 | Old 858 | Ole 859 | Oleg 860 | Oliver 861 | Olivier 862 | Olof 863 | Olson 864 | Omar 865 | Orville 866 | Oscar 867 | Oskar 868 | Owen 869 | Ozan 870 | Pablo 871 | Page 872 | Pam 873 | Pamela 874 | Panacea 875 | Pandora 876 | Panos 877 | Pantelis 878 | Panzer 879 | Paola 880 | Part 881 | Pascal 882 | Pat 883 | Patrice 884 | Patricia 885 | Patricio 886 | Patrick 887 | Patty 888 | Paul 889 | Paula 890 | Pedro 891 | Peggy 892 | Penny 893 | Per 894 | Perry 895 | Pete 896 | Peter 897 | Petr 898 | Phil 899 | Philip 900 | Philippe 901 | Phill 902 | Phillip 903 | Phiroze 904 | Pia 905 | Piercarlo 906 | Pierce 907 | Pierette 908 | Pierre 909 | Piet 910 | Piete 911 | Pieter 912 | Pilar 913 | Pilot 914 | Pim 915 | Ping 916 | Piotr 917 | Pitawas 918 | Plastic 919 | Po 920 | Polly 921 | Pontus 922 | Pradeep 923 | Prakash 924 | Pratap 925 | Pratapwant 926 | Pratt 927 | Pravin 928 | Presley 929 | Pria 930 | Price 931 | Raanan 932 | Rabin 933 | Radek 934 | Rafael 935 | Rafik 936 | Raghu 937 | Ragnar 938 | Rahul 939 | Raif 940 | Rainer 941 | Raj 942 | Raja 943 | Rajarshi 944 | Rajeev 945 | Rajendra 946 | Rajesh 947 | Rajiv 948 | Rakhal 949 | Ralf 950 | Ralph 951 | Ram 952 | Ramadoss 953 | Raman 954 | Ramanan 955 | Ramesh 956 | Ramiro 957 | Ramneek 958 | Ramon 959 | Ramsey 960 | Rand 961 | Randal 962 | Randall 963 | Randell 964 | Randolph 965 | Randy 966 | Ranjit 967 | Raphael 968 | Rathnakumar 969 | Raul 970 | Ravi 971 | Ravindran 972 | Ravindranath 973 | Ray 974 | Rayan 975 | Raymond 976 | Real 977 | Rebecca 978 | Rees 979 | Reid 980 | Reiner 981 | Reinhard 982 | Renu 983 | Revised 984 | Rex 985 | Rhonda 986 | Ric 987 | Ricardo 988 | Rich 989 | Richard 990 | Rick 991 | Ricky 992 | Rik 993 | Ritalynne 994 | Ritchey 995 | Ro 996 | Rob 997 | Robbin 998 | Robert 999 | Roberta 1000 | Roberto 1001 | Robin 1002 | Rod 1003 | Rodent 1004 | Roderick 1005 | Rodger 1006 | Rodney 1007 | Roger 1008 | Rogue 1009 | Roland 1010 | Rolf 1011 | Rolfe 1012 | Romain 1013 | Roman 1014 | Ron 1015 | Ronald 1016 | Ronni 1017 | Root 1018 | Ross 1019 | Roxana 1020 | Roxane 1021 | Roxanne 1022 | Roxie 1023 | Roy 1024 | Rudolf 1025 | Rudolph 1026 | Rudy 1027 | Rupert 1028 | Russ 1029 | Russell 1030 | Rusty 1031 | Ruth 1032 | Saad 1033 | Sabrina 1034 | Saify 1035 | Saiid 1036 | Sal 1037 | Sally 1038 | Sam 1039 | Samir 1040 | Samuel 1041 | Sanand 1042 | Sanche 1043 | Sandeep 1044 | Sandip 1045 | Sandra 1046 | Sandy 1047 | Sanford 1048 | Sangho 1049 | Sanity 1050 | Sanjay 1051 | Sanjeev 1052 | Sanjib 1053 | Santa 1054 | Saqib 1055 | Sarah 1056 | Sassan 1057 | Saul 1058 | Saumya 1059 | Scot 1060 | Scott 1061 | Sean 1062 | Sedat 1063 | Sedovic 1064 | Seenu 1065 | Sehyo 1066 | Sekar 1067 | Serdar 1068 | Sergeant 1069 | Sergei 1070 | Sergio 1071 | Sergiu 1072 | Seth 1073 | Seymour 1074 | Shadow 1075 | Shahid 1076 | Shai 1077 | Shakil 1078 | Shamim 1079 | Shane 1080 | Shankar 1081 | Shannon 1082 | Sharada 1083 | Sharan 1084 | Shari 1085 | Sharon 1086 | Shatter 1087 | Shaw 1088 | Shawn 1089 | Shean 1090 | Sheila 1091 | Shel 1092 | Sherman 1093 | Sherri 1094 | Shirley 1095 | Sho 1096 | Shutoku 1097 | Shuvra 1098 | Shyam 1099 | Sid 1100 | Sidney 1101 | Siegurd 1102 | Sigurd 1103 | Simon 1104 | Siping 1105 | Sir 1106 | Sjaak 1107 | Sjouke 1108 | Skeeter 1109 | Skef 1110 | Skip 1111 | Slartibartfast 1112 | Socorrito 1113 | Sofia 1114 | Sofoklis 1115 | Son 1116 | Sonja 1117 | Sonny 1118 | Soohong 1119 | Sorrel 1120 | Space 1121 | Spass 1122 | Spencer 1123 | Spike 1124 | Spock 1125 | Spudboy 1126 | Spy 1127 | Spyros 1128 | Sri 1129 | Sridhar 1130 | Sridharan 1131 | Srikanth 1132 | Srinivas 1133 | Srinivasan 1134 | Sriram 1135 | Srivatsan 1136 | Ssi 1137 | Stacey 1138 | Stacy 1139 | Stagger 1140 | Stan 1141 | Stanislaw 1142 | Stanley 1143 | Stanly 1144 | Starbuck 1145 | Steen 1146 | Stefan 1147 | Stephan 1148 | Stephanie 1149 | Stephe 1150 | Stephen 1151 | Stevan 1152 | Steve 1153 | Steven 1154 | Stewart 1155 | Straka 1156 | Stu 1157 | Stuart 1158 | Subra 1159 | Sue 1160 | Sugih 1161 | Sumitro 1162 | Sundar 1163 | Sundaresan 1164 | Sunil 1165 | Suresh 1166 | Surya 1167 | Susan 1168 | Susanne 1169 | Susumu 1170 | Suu 1171 | Suwandi 1172 | Suyog 1173 | Suzan 1174 | Suzanne 1175 | Svante 1176 | Swamy 1177 | Syd 1178 | Syed 1179 | Sylvan 1180 | Syun 1181 | Tad 1182 | Tahsin 1183 | Tai 1184 | Tait 1185 | Takao 1186 | Takayuki 1187 | Takeuchi 1188 | Tal 1189 | Tammy 1190 | Tanaka 1191 | Tandy 1192 | Tanya 1193 | Tao 1194 | Tareq 1195 | Tarmi 1196 | Taurus 1197 | Ted 1198 | Teresa 1199 | Teri 1200 | Teriann 1201 | Terrance 1202 | Terrence 1203 | Terri 1204 | Terry 1205 | Teruyuki 1206 | Thad 1207 | Tharen 1208 | The 1209 | Theo 1210 | Theodore 1211 | Thierry 1212 | Think 1213 | Thomas 1214 | Those 1215 | Thuan 1216 | Ti 1217 | Tiefenthal 1218 | Tigger 1219 | Tim 1220 | Timo 1221 | Timothy 1222 | Tobias 1223 | Toby 1224 | Todd 1225 | Toerless 1226 | Toft 1227 | Tolerant 1228 | Tollefsen 1229 | Tom 1230 | Tomas 1231 | Tommy 1232 | Tony 1233 | Tor 1234 | Torsten 1235 | Toufic 1236 | Tovah 1237 | Tracey 1238 | Tracy 1239 | Tran 1240 | Travis 1241 | Trent 1242 | Trevor 1243 | Trey 1244 | Triantaphyllos 1245 | Tricia 1246 | Troy 1247 | Trying 1248 | Tuan 1249 | Tuna 1250 | Turkeer 1251 | Tyler 1252 | Uri 1253 | Urs 1254 | Vadim 1255 | Val 1256 | Valentin 1257 | Valeria 1258 | Valerie 1259 | Van 1260 | Vance 1261 | Varda 1262 | Vassos 1263 | Vaughn 1264 | Venkata 1265 | Vern 1266 | Vernon 1267 | Vic 1268 | Vice 1269 | Vick 1270 | Vicki 1271 | Vickie 1272 | Vicky 1273 | Victor 1274 | Victoria 1275 | Vidhyanath 1276 | Vijay 1277 | Vilhelm 1278 | Vince 1279 | Vincent 1280 | Vincenzo 1281 | Vinod 1282 | Vishal 1283 | Vistlik 1284 | Vivek 1285 | Vladimir 1286 | Vladislav 1287 | Wade 1288 | Walt 1289 | Walter 1290 | Warren 1291 | Wayne 1292 | Wendell 1293 | Wendi 1294 | Wendy 1295 | Werner 1296 | Wes 1297 | Will 1298 | William 1299 | Willie 1300 | Wilmer 1301 | Wilson 1302 | Win 1303 | Winnie 1304 | Winston 1305 | Wolf 1306 | Wolfgang 1307 | Woody 1308 | Yvonne 1309 | --------------------------------------------------------------------------------