├── .gitignore ├── img ├── magic_view_example.png ├── lambda_architecture.png └── simplified_lambda_architecture.png ├── pom.xml ├── src └── main │ └── java │ ├── flickr │ ├── SimplifiedLambdaDemo │ │ └── SimplifiedLambdaDemo.java │ └── SimplifiedLambda │ │ └── SimplifiedLambda.java │ └── utility │ └── MockHTable.java ├── test └── java │ └── flickr │ └── SimplifiedLambda │ └── SimplifiedLambdaTest.java ├── LICENSE.txt └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea 2 | *.iml 3 | target 4 | flickr-simplified-lambda.iml -------------------------------------------------------------------------------- /img/magic_view_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YahooArchive/simplified-lambda/HEAD/img/magic_view_example.png -------------------------------------------------------------------------------- /img/lambda_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YahooArchive/simplified-lambda/HEAD/img/lambda_architecture.png -------------------------------------------------------------------------------- /img/simplified_lambda_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/YahooArchive/simplified-lambda/HEAD/img/simplified_lambda_architecture.png -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | flickr-streamlined-lambda 8 | flickr-streamlined-lambda 9 | 0.1 10 | 11 | 12 | 13 | central 14 | Maven Repository Switchboard 15 | default 16 | http://repo1.maven.org/maven2 17 | 18 | false 19 | 20 | 21 | 22 | 23 | 24 | 25 | org.testng 26 | testng 27 | 6.8.7 28 | 29 | 30 | org.apache.hbase 31 | hbase-common 32 | 0.98.7-hadoop2 33 | 34 | 35 | org.apache.hbase 36 | hbase-it 37 | 0.98.7-hadoop2 38 | 39 | 40 | 41 | 42 | test/java 43 | 44 | 45 | org.apache.maven.plugins 46 | maven-compiler-plugin 47 | 2.3.2 48 | 49 | 1.7 50 | 1.7 51 | 52 | 53 | 54 | org.apache.maven.plugins 55 | maven-surefire-plugin 56 | 2.13 57 | 58 | 59 | firefox 60 | 61 | 62 | 63 | 64 | org.codehaus.mojo 65 | exec-maven-plugin 66 | 1.2.1 67 | 68 | flickr.SimplifiedLambdaDemo.SimplifiedLambdaDemo 69 | 70 | 71 | 72 | 73 | 74 | 75 | -------------------------------------------------------------------------------- /src/main/java/flickr/SimplifiedLambdaDemo/SimplifiedLambdaDemo.java: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Yahoo Inc. 2 | // Licensed under the terms of the Apache 2.0 License 3 | // See LICENSE.txt file in the project root folder for License terms. 4 | 5 | package flickr.SimplifiedLambdaDemo; 6 | 7 | import flickr.SimplifiedLambda.SimplifiedLambda; 8 | import utility.MockHTable; 9 | import org.apache.hadoop.hbase.client.HTableInterface; 10 | 11 | /** 12 | * Created by bjoshi on 8/18/15. 13 | */ 14 | public class SimplifiedLambdaDemo { 15 | // mvn exec:java -Dexec.mainClass="flickr.SimplifiedLambdaDemo.SimplifiedLambdaDemo" 16 | public static void main(final String[] args) throws Exception { 17 | printHeader(); 18 | 19 | MockHTable lambdaTable = new MockHTable("lambdaTable"); 20 | lambdaTable.addColumnFamily(new String(SimplifiedLambda.FAMILY)); 21 | 22 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 23 | 24 | System.out.println("\nInitial table:"); 25 | System.out.print(simplifiedLambda.dumpTable()); 26 | 27 | System.out.println("\nPush bulk rows:"); 28 | simplifiedLambda.pushBulkEntry("rowA", "bulk"); 29 | simplifiedLambda.pushBulkEntry("rowB", "bulk"); 30 | simplifiedLambda.pushBulkEntry("rowC", "bulk"); 31 | simplifiedLambda.cleaner(); 32 | System.out.print(simplifiedLambda.dumpTable()); 33 | 34 | System.out.println("\nOverride rows A,B with realtime:"); 35 | simplifiedLambda.pushRealtimeEntry("rowA", "rtOvrd"); 36 | simplifiedLambda.pushRealtimeEntry("rowB", "rtOvrd"); 37 | System.out.print(simplifiedLambda.dumpTable()); 38 | 39 | System.out.println("\nClean table:"); 40 | simplifiedLambda.cleaner(); 41 | System.out.print(simplifiedLambda.dumpTable()); 42 | 43 | System.out.println("\nOverride rows B,C with realtime:"); 44 | simplifiedLambda.pushRealtimeEntry("rowB", "rtOvrd2"); 45 | simplifiedLambda.pushRealtimeEntry("rowC", "rtOvrd2"); 46 | System.out.print(simplifiedLambda.dumpTable()); 47 | 48 | System.out.println("\nOverride all rows with bulk:"); 49 | simplifiedLambda.pushBulkEntry("rowA", "bulkOvr"); 50 | simplifiedLambda.pushBulkEntry("rowB", "bulkOvr"); 51 | simplifiedLambda.cleaner(); 52 | System.out.print(simplifiedLambda.dumpTable()); 53 | 54 | } 55 | 56 | 57 | static private void printHeader() { 58 | System.out.println(" _"); 59 | System.out.println(" . - ` : ` '.' `` . - '` ` ."); 60 | System.out.println(" ' ,gi$@$q pggq pggq . ' pggq"); 61 | System.out.println(" + j@@@P*\\7 @@@@ @@@@ _ : @@@@ ! ._ , . _ - ."); 62 | System.out.println(" . . @@@K @@@@ ; -` `_,_ ` . @@@@ ;/ ` _,,_ `"); 63 | System.out.println(" ; pgg@@@@gggq @@@@ @@@@ .' ,iS@@@@@Si @@@@ .6@@@P' !!!! j!!!!7 ;"); 64 | System.out.println(" @@@@@@@@@@@ @@@@ @@@@ ` j@@@P*\"*+Y7 @@@@ .6@@@P !!!!47*\"*+;"); 65 | System.out.println(" `_ @@@@ @@@@ @@@@ .@@@7 . ` @@@@.6@@@P ` !!!!; . '"); 66 | System.out.println(" . @@@@ ' @@@@ @@@@ :@@@! !: @@@@7@@@K `; !!!! ' ` '"); 67 | System.out.println(" @@@@ . @@@@ @@@@ `%@@@. . @@@@`7@@@b . !!!! :"); 68 | System.out.println(" ! @@@@ @@@@ @@@@ \\@@@$+,,+4b @@@@ `7@@@b !!!!"); 69 | System.out.println(" @@@@ : @@@@ @@@@ `7%S@@hX!P' @@@@ `7@@@b !!!! ."); 70 | System.out.println(" : \"\"\"\" \"\"\"\" \"\"\"\" :. `^\"^` \"\"\"\" `\"\"\"\"\" ''''"); 71 | System.out.println(" ` - . . _._ ` _._ _ . -"); 72 | System.out.println(" , ` ,glllllllllg, `-: ' .~ . . . ~. `"); 73 | System.out.println(" ,jlllllllllllllllp, .!' .+. . . . . . .+. `."); 74 | System.out.println(" ` jllllllllllllllllllll ` +. . . . . . . . .+ ."); 75 | System.out.println(" . jllllllllllllllllllllll . . . . . . . . . . ."); 76 | System.out.println(" .l@@@@@@@lllllllllllllll. j. . . . . . . :::::::l `"); 77 | System.out.println(" ; ;@@@@@@@@@@@@@@@@@@@lllll :. . :::::::::::::::::: ;"); 78 | System.out.println(" :l@@@@@@@@@@@@@@@@@@@@@l; ::::::::::::::::::::::;"); 79 | System.out.println(" ` Y@@@@@@@@@@@@@@@@@@@@@P ::::::::::::::::::::: '"); 80 | System.out.println(" - Y@@@@@@@@@@@@@@@@@@@P . ::::::::::::::::::: ."); 81 | System.out.println(" `*@@@@@@@@@@@@@@@*` ` ` `:::::::::::::::`"); 82 | System.out.println(" `. `*%@@@@@@@%*` . ` `+:::::::::+` '"); 83 | System.out.println(" . ``` _ ' - . ``` -"); 84 | System.out.println(" ` ' ` ' `"); 85 | System.out.println(" You're reading. We're hiring. "); 86 | System.out.println(" https://www.flickr.com/jobs/"); 87 | System.out.println(""); 88 | System.out.println("Simplified Lambda Example"); 89 | System.out.println("========================="); 90 | } 91 | 92 | 93 | } 94 | -------------------------------------------------------------------------------- /test/java/flickr/SimplifiedLambda/SimplifiedLambdaTest.java: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Yahoo Inc. 2 | // Licensed under the terms of the Apache 2.0 License 3 | // See LICENSE.txt file in the project root folder for License terms. 4 | 5 | package flickr.SimplifiedLambda; 6 | 7 | import utility.MockHTable; 8 | 9 | import org.testng.annotations.Test; 10 | 11 | import java.io.IOException; 12 | import java.util.logging.Logger; 13 | 14 | import static org.testng.Assert.assertEquals; 15 | import static org.testng.Assert.assertNotEquals; 16 | import static org.testng.Assert.assertTrue; 17 | 18 | /** 19 | * Created by bjoshi on 8/17/15. 20 | */ 21 | public class SimplifiedLambdaTest { 22 | private static Logger LOG = Logger.getLogger(SimplifiedLambda.class.getName()); 23 | 24 | private MockHTable createLambdaTable() { 25 | MockHTable lambdaTable = new MockHTable("lambdaTable"); 26 | lambdaTable.addColumnFamily(new String(SimplifiedLambda.FAMILY)); 27 | return lambdaTable; 28 | } 29 | 30 | @Test 31 | public void testTableCreate() throws Exception { 32 | MockHTable lambdaTable = createLambdaTable(); 33 | 34 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 35 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 36 | } 37 | 38 | @Test 39 | public void testPushAndGetData() throws Exception { 40 | MockHTable lambdaTable = createLambdaTable(); 41 | 42 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 43 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 44 | 45 | simplifiedLambda.pushBulkEntry("bulk","0"); 46 | simplifiedLambda.pushRealtimeEntry("realtime", "1"); 47 | 48 | assertEquals(simplifiedLambda.getItemForKey("bulk", SimplifiedLambda.BULK_COLUMN),"0"); 49 | assertEquals(simplifiedLambda.getItemForKey("realtime", SimplifiedLambda.REALTIME_COLUMN),"1"); 50 | 51 | try { 52 | assertNotEquals(simplifiedLambda.getItemForKey("realtime", SimplifiedLambda.BULK_COLUMN), "0"); 53 | } catch (IOException e) { 54 | assertEquals(e.toString(), "java.io.IOException: Cannot retrieve string from hbase"); 55 | } 56 | 57 | try { 58 | assertNotEquals(simplifiedLambda.getItemForKey("bulk", SimplifiedLambda.REALTIME_COLUMN), "1"); 59 | } catch (IOException e) { 60 | assertEquals(e.toString(), "java.io.IOException: Cannot retrieve string from hbase"); 61 | } 62 | } 63 | 64 | @Test 65 | public void testTimestamps() throws Exception { 66 | MockHTable lambdaTable = createLambdaTable(); 67 | 68 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 69 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 70 | 71 | simplifiedLambda.pushBulkEntry("bulk","0"); 72 | simplifiedLambda.pushRealtimeEntry("realtime", "1"); 73 | 74 | long bulkTimestamp = simplifiedLambda.getTimestampForKey("bulk", SimplifiedLambda.BULK_COLUMN); 75 | long realtimeTimestamp = simplifiedLambda.getTimestampForKey("realtime", SimplifiedLambda.REALTIME_COLUMN); 76 | 77 | assertTrue(realtimeTimestamp>bulkTimestamp); 78 | } 79 | 80 | @Test 81 | public void testBulkOnly() throws Exception { 82 | MockHTable lambdaTable = createLambdaTable(); 83 | 84 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 85 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 86 | 87 | simplifiedLambda.pushBulkEntry("bulk","0"); 88 | 89 | assertEquals(simplifiedLambda.combiner("bulk"), "0"); 90 | 91 | simplifiedLambda.cleaner(); 92 | 93 | assertEquals(simplifiedLambda.combiner("bulk"), "0"); 94 | assertEquals(simplifiedLambda.getItemForKey("bulk", SimplifiedLambda.BULK_COLUMN), "0"); 95 | try { 96 | assertNotEquals(simplifiedLambda.getItemForKey("bulk", SimplifiedLambda.REALTIME_COLUMN), "0"); 97 | } catch (IOException e) { 98 | assertEquals(e.toString(), "java.io.IOException: Cannot retrieve string from hbase"); 99 | } 100 | } 101 | 102 | @Test 103 | public void testRealtimeOnly() throws Exception { 104 | MockHTable lambdaTable = createLambdaTable(); 105 | 106 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 107 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 108 | 109 | simplifiedLambda.pushRealtimeEntry("realtime", "0"); 110 | 111 | assertEquals(simplifiedLambda.combiner("realtime"), "0"); 112 | 113 | simplifiedLambda.cleaner(); 114 | 115 | assertEquals(simplifiedLambda.combiner("realtime"), "0"); 116 | 117 | // check that realtime is pushed to bulk after cleaner 118 | assertEquals(simplifiedLambda.getItemForKey("realtime", SimplifiedLambda.BULK_COLUMN), "0"); 119 | 120 | // assert that realtime column is pushed to bulk and is deleted 121 | try { 122 | assertNotEquals(simplifiedLambda.getItemForKey("realtime", SimplifiedLambda.REALTIME_COLUMN), "0"); 123 | } catch (IOException e) { 124 | assertEquals(e.toString(), "java.io.IOException: Cannot retrieve string from hbase"); 125 | } 126 | } 127 | 128 | @Test 129 | public void testBulkBeforeRealtime() throws Exception { 130 | MockHTable lambdaTable = createLambdaTable(); 131 | 132 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 133 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 134 | 135 | simplifiedLambda.pushBulkEntry("common","0"); 136 | simplifiedLambda.pushRealtimeEntry("common", "1"); 137 | 138 | assertEquals(simplifiedLambda.combiner("common"), "1"); 139 | 140 | simplifiedLambda.cleaner(); 141 | 142 | // check that realtime is pushed to bulk after cleaner 143 | assertEquals(simplifiedLambda.getItemForKey("common", SimplifiedLambda.BULK_COLUMN), "1"); 144 | 145 | // assert that realtime column is pushed to bulk and is deleted 146 | try { 147 | assertNotEquals(simplifiedLambda.getItemForKey("common", SimplifiedLambda.REALTIME_COLUMN), "1"); 148 | } catch (IOException e) { 149 | assertEquals(e.toString(), "java.io.IOException: Cannot retrieve string from hbase"); 150 | } 151 | } 152 | 153 | @Test 154 | public void testRealtimeBeforeBulk() throws Exception { 155 | MockHTable lambdaTable = createLambdaTable(); 156 | 157 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 158 | assertTrue(simplifiedLambda instanceof SimplifiedLambda); 159 | 160 | simplifiedLambda.pushRealtimeEntry("common", "1"); 161 | simplifiedLambda.pushBulkEntry("common","0"); 162 | 163 | try { 164 | assertEquals(simplifiedLambda.combiner("common"), "1"); 165 | } catch (IOException e) { 166 | assertEquals(e.toString(), "java.io.IOException: Bulk timestamp newer than realtime: shouldn't happen!"); 167 | } 168 | simplifiedLambda.cleaner(); 169 | 170 | // bulk timestamp is newer, so check that it's intact 171 | assertEquals(simplifiedLambda.getItemForKey("common", SimplifiedLambda.BULK_COLUMN), "0"); 172 | 173 | // assert that realtime column is deleted 174 | try { 175 | assertNotEquals(simplifiedLambda.getItemForKey("common", SimplifiedLambda.REALTIME_COLUMN), "1"); 176 | } catch (IOException e) { 177 | assertEquals(e.toString(), "java.io.IOException: Cannot retrieve string from hbase"); 178 | } 179 | } 180 | } 181 | -------------------------------------------------------------------------------- /src/main/java/flickr/SimplifiedLambda/SimplifiedLambda.java: -------------------------------------------------------------------------------- 1 | // Copyright 2015 Yahoo Inc. 2 | // Licensed under the terms of the Apache 2.0 License 3 | // See LICENSE.txt file in the project root folder for License terms. 4 | 5 | package flickr.SimplifiedLambda; 6 | 7 | import org.apache.hadoop.hbase.Cell; 8 | import org.apache.hadoop.hbase.CellUtil; 9 | import org.apache.hadoop.hbase.client.*; 10 | import org.apache.hadoop.hbase.util.Bytes; 11 | 12 | import java.io.IOException; 13 | 14 | /** 15 | * Created by bjoshi on 8/17/15. 16 | */ 17 | public class SimplifiedLambda { 18 | public final static byte[] FAMILY = Bytes.toBytes("f"); 19 | public final static byte[] REALTIME_COLUMN = Bytes.toBytes("REALTIME"); 20 | public final static byte[] BULK_COLUMN = Bytes.toBytes("BULK"); 21 | 22 | HTableInterface lambdaTable; 23 | public SimplifiedLambda(HTableInterface _lambdaTable) 24 | { 25 | this.lambdaTable = _lambdaTable; 26 | } 27 | 28 | static private byte[] stringToBytes(String input) { return Bytes.toBytes(input); } 29 | static private byte[] getEntityKey(String entity) { return stringToBytes(entity); } 30 | 31 | public void pushBulkEntry(String key, String value) throws IOException { 32 | byte[] entityKey = this.getEntityKey(key); 33 | 34 | Put insertOp = new Put(entityKey); 35 | insertOp.add(FAMILY, BULK_COLUMN, stringToBytes(value)); 36 | this.lambdaTable.put(insertOp); 37 | } 38 | 39 | public void pushRealtimeEntry(String key, String value) throws IOException { 40 | byte[] entityKey = this.getEntityKey(key); 41 | 42 | Put insertOp = new Put(entityKey); 43 | insertOp.add(FAMILY, REALTIME_COLUMN, stringToBytes(value)); 44 | this.lambdaTable.put(insertOp); 45 | } 46 | 47 | private Cell getCell(String key, byte[] column) throws IOException { 48 | byte[] rowKey = this.getEntityKey(key); 49 | 50 | Get entityRowGet = new Get(rowKey); 51 | Result result = this.lambdaTable.get(entityRowGet); 52 | 53 | Cell cell = result.getColumnLatestCell(FAMILY, column); 54 | return cell; 55 | } 56 | 57 | private String getValueFromCell(Cell cell) { 58 | return Bytes.toString(CellUtil.cloneValue(cell)); 59 | } 60 | 61 | public String getItemForKey(String key, byte[] column) throws IOException { 62 | Cell cell = getCell(key, column); 63 | 64 | if (cell != null) { 65 | return getValueFromCell(cell); 66 | } else { 67 | throw new IOException("Cannot retrieve string from hbase"); 68 | } 69 | } 70 | 71 | public long getTimestampForKey(String key, byte[] column) throws IOException { 72 | Cell cell = getCell(key, column); 73 | 74 | if (cell != null) { 75 | return cell.getTimestamp(); 76 | } else { 77 | throw new IOException("Cannot retrieve string from hbase"); 78 | } 79 | } 80 | 81 | public void cleaner() throws IOException { 82 | // algorithm: 83 | // iterate over all rows 84 | // foreach row: 85 | // if hasRealtime && !hasBulk: 86 | // move realtime to bulk 87 | // else if hasRealtime && hasBulk: 88 | // if bulkTimestamp>realtimeTimestamp: 89 | // delete realtimeTimestamp 90 | // else: 91 | // move realtime to bulk 92 | // 93 | 94 | Scan scan = new Scan(); 95 | scan.addFamily(FAMILY); 96 | ResultScanner resultScanner = this.lambdaTable.getScanner(scan); 97 | 98 | for (Result result : resultScanner) { 99 | if (result.isEmpty()) 100 | continue; 101 | String key = Bytes.toString(result.getRow()); 102 | boolean hasBulk = false; 103 | boolean hasRealtime = false; 104 | 105 | Cell bulkCell = getCell(key, BULK_COLUMN); 106 | if (bulkCell != null) 107 | hasBulk = true; 108 | 109 | Cell realtimeCell = getCell(key, REALTIME_COLUMN); 110 | if (realtimeCell != null) 111 | hasRealtime = true; 112 | 113 | byte[] rowKey = this.getEntityKey(key); 114 | 115 | if (hasRealtime && !hasBulk) { 116 | // move realtime to bulk 117 | byte[] realtimeValue = CellUtil.cloneValue(realtimeCell); 118 | Put insertOp = new Put(rowKey); 119 | insertOp.add(FAMILY, BULK_COLUMN, realtimeValue); 120 | Delete deleteOp = new Delete(rowKey); 121 | deleteOp.deleteColumn(FAMILY, REALTIME_COLUMN); 122 | 123 | this.lambdaTable.put(insertOp); 124 | this.lambdaTable.delete(deleteOp); 125 | } else if (hasRealtime && hasBulk) { 126 | long bulkTimestamp = bulkCell.getTimestamp(); 127 | long realtimeTimestamp = realtimeCell.getTimestamp(); 128 | if (bulkTimestamp > realtimeTimestamp ) { 129 | //delete realtimeTimestamp 130 | Delete deleteOp = new Delete(rowKey); 131 | deleteOp.deleteColumn(FAMILY, REALTIME_COLUMN); 132 | this.lambdaTable.delete(deleteOp); 133 | } else { 134 | // move realtime to bulk 135 | byte[] realtimeValue = CellUtil.cloneValue(realtimeCell); 136 | Put insertOp = new Put(rowKey); 137 | insertOp.add(FAMILY, BULK_COLUMN, realtimeValue); 138 | Delete deleteOp = new Delete(rowKey); 139 | deleteOp.deleteColumn(FAMILY, REALTIME_COLUMN); 140 | this.lambdaTable.put(insertOp); 141 | this.lambdaTable.delete(deleteOp); 142 | } 143 | } 144 | } 145 | } 146 | 147 | public String combiner(String key) throws IOException { 148 | boolean hasBulk = false; 149 | boolean hasRealtime = false; 150 | 151 | Cell bulkCell = getCell(key, BULK_COLUMN); 152 | if (bulkCell != null) 153 | hasBulk = true; 154 | 155 | Cell realtimeCell = getCell(key, REALTIME_COLUMN); 156 | if (realtimeCell != null) 157 | hasRealtime = true; 158 | 159 | if (!hasBulk && !hasRealtime) { 160 | throw new IOException("Cannot retrieve string from hbase"); 161 | } else if (hasBulk && !hasRealtime) { 162 | return getValueFromCell(bulkCell); 163 | } else if (!hasBulk && hasRealtime) { 164 | return getValueFromCell(realtimeCell); 165 | } else { 166 | long bulkTimestamp = bulkCell.getTimestamp(); 167 | long realtimeTimestamp = realtimeCell.getTimestamp(); 168 | if (realtimeTimestamp > bulkTimestamp) { 169 | return getValueFromCell(realtimeCell); 170 | } else { 171 | throw new IOException("Bulk timestamp newer than realtime: shouldn't happen!"); 172 | } 173 | } 174 | } 175 | 176 | public String dumpTable() throws IOException { 177 | String res = "key\tbulk\trealtime\tcombined\n"; 178 | res += "........................................\n"; 179 | 180 | Scan scan = new Scan(); 181 | scan.addFamily(FAMILY); 182 | ResultScanner resultScanner = this.lambdaTable.getScanner(scan); 183 | 184 | for (Result result : resultScanner) { 185 | if (result.isEmpty()) 186 | continue; 187 | String key = Bytes.toString(result.getRow()); 188 | boolean hasBulk = false; 189 | boolean hasRealtime = false; 190 | 191 | Cell bulkCell = getCell(key, BULK_COLUMN); 192 | if (bulkCell != null) 193 | hasBulk = true; 194 | 195 | Cell realtimeCell = getCell(key, REALTIME_COLUMN); 196 | if (realtimeCell != null) 197 | hasRealtime = true; 198 | 199 | res += key; 200 | res += '\t'; 201 | if (hasBulk) { 202 | res += getValueFromCell(bulkCell); 203 | } else { 204 | res += "None"; 205 | } 206 | res += '\t'; 207 | if (hasRealtime) { 208 | res += getValueFromCell(realtimeCell); 209 | } else { 210 | res += "None"; 211 | } 212 | res += "\t\t"; 213 | 214 | String combinedResult = combiner(key); 215 | res += combinedResult; 216 | res += '\n'; 217 | } 218 | return res; 219 | } 220 | } 221 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Apache License 2 | 3 | Version 2.0, January 2004 4 | 5 | http://www.apache.org/licenses/ 6 | 7 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 8 | 9 | 1. Definitions. 10 | 11 | "License" shall mean the terms and conditions for use, reproduction, and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all other entities that control, are controlled by, or are under common control with that entity. For the purposes of this definition, "control" means (i) the power, direct or indirect, to cause the direction or management of such entity, whether by contract or otherwise, or (ii) ownership of fifty percent (50%) or more of the outstanding shares, or (iii) beneficial ownership of such entity. 16 | 17 | "You" (or "Your") shall mean an individual or Legal Entity exercising permissions granted by this License. 18 | 19 | "Source" form shall mean the preferred form for making modifications, including but not limited to software source code, documentation source, and configuration files. 20 | 21 | "Object" form shall mean any form resulting from mechanical transformation or translation of a Source form, including but not limited to compiled object code, generated documentation, and conversions to other media types. 22 | 23 | "Work" shall mean the work of authorship, whether in Source or Object form, made available under the License, as indicated by a copyright notice that is included in or attached to the work (an example is provided in the Appendix below). 24 | 25 | "Derivative Works" shall mean any work, whether in Source or Object form, that is based on (or derived from) the Work and for which the editorial revisions, annotations, elaborations, or other modifications represent, as a whole, an original work of authorship. For the purposes of this License, Derivative Works shall not include works that remain separable from, or merely link (or bind by name) to the interfaces of, the Work and Derivative Works thereof. 26 | 27 | "Contribution" shall mean any work of authorship, including the original version of the Work and any modifications or additions to that Work or Derivative Works thereof, that is intentionally submitted to Licensor for inclusion in the Work by the copyright owner or by an individual or Legal Entity authorized to submit on behalf of the copyright owner. For the purposes of this definition, "submitted" means any form of electronic, verbal, or written communication sent to the Licensor or its representatives, including but not limited to communication on electronic mailing lists, source code control systems, and issue tracking systems that are managed by, or on behalf of, the Licensor for the purpose of discussing and improving the Work, but excluding communication that is conspicuously marked or otherwise designated in writing by the copyright owner as "Not a Contribution." 28 | 29 | "Contributor" shall mean Licensor and any individual or Legal Entity on behalf of whom a Contribution has been received by Licensor and subsequently incorporated within the Work. 30 | 31 | 2. Grant of Copyright License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable copyright license to reproduce, prepare Derivative Works of, publicly display, publicly perform, sublicense, and distribute the Work and such Derivative Works in Source or Object form. 32 | 33 | 3. Grant of Patent License. Subject to the terms and conditions of this License, each Contributor hereby grants to You a perpetual, worldwide, non-exclusive, no-charge, royalty-free, irrevocable (except as stated in this section) patent license to make, have made, use, offer to sell, sell, import, and otherwise transfer the Work, where such license applies only to those patent claims licensable by such Contributor that are necessarily infringed by their Contribution(s) alone or by combination of their Contribution(s) with the Work to which such Contribution(s) was submitted. If You institute patent litigation against any entity (including a cross-claim or counterclaim in a lawsuit) alleging that the Work or a Contribution incorporated within the Work constitutes direct or contributory patent infringement, then any patent licenses granted to You under this License for that Work shall terminate as of the date such litigation is filed. 34 | 35 | 4. Redistribution. You may reproduce and distribute copies of the Work or Derivative Works thereof in any medium, with or without modifications, and in Source or Object form, provided that You meet the following conditions: 36 | 37 | You must give any other recipients of the Work or Derivative Works a copy of this License; and 38 | You must cause any modified files to carry prominent notices stating that You changed the files; and 39 | You must retain, in the Source form of any Derivative Works that You distribute, all copyright, patent, trademark, and attribution notices from the Source form of the Work, excluding those notices that do not pertain to any part of the Derivative Works; and 40 | If the Work includes a "NOTICE" text file as part of its distribution, then any Derivative Works that You distribute must include a readable copy of the attribution notices contained within such NOTICE file, excluding those notices that do not pertain to any part of the Derivative Works, in at least one of the following places: within a NOTICE text file distributed as part of the Derivative Works; within the Source form or documentation, if provided along with the Derivative Works; or, within a display generated by the Derivative Works, if and wherever such third-party notices normally appear. The contents of the NOTICE file are for informational purposes only and do not modify the License. You may add Your own attribution notices within Derivative Works that You distribute, alongside or as an addendum to the NOTICE text from the Work, provided that such additional attribution notices cannot be construed as modifying the License. 41 | 42 | You may add Your own copyright statement to Your modifications and may provide additional or different license terms and conditions for use, reproduction, or distribution of Your modifications, or for any such Derivative Works as a whole, provided Your use, reproduction, and distribution of the Work otherwise complies with the conditions stated in this License. 43 | 5. Submission of Contributions. Unless You explicitly state otherwise, any Contribution intentionally submitted for inclusion in the Work by You to the Licensor shall be under the terms and conditions of this License, without any additional terms or conditions. Notwithstanding the above, nothing herein shall supersede or modify the terms of any separate license agreement you may have executed with Licensor regarding such Contributions. 44 | 45 | 6. Trademarks. This License does not grant permission to use the trade names, trademarks, service marks, or product names of the Licensor, except as required for reasonable and customary use in describing the origin of the Work and reproducing the content of the NOTICE file. 46 | 47 | 7. Disclaimer of Warranty. Unless required by applicable law or agreed to in writing, Licensor provides the Work (and each Contributor provides its Contributions) on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied, including, without limitation, any warranties or conditions of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A PARTICULAR PURPOSE. You are solely responsible for determining the appropriateness of using or redistributing the Work and assume any risks associated with Your exercise of permissions under this License. 48 | 49 | 8. Limitation of Liability. In no event and under no legal theory, whether in tort (including negligence), contract, or otherwise, unless required by applicable law (such as deliberate and grossly negligent acts) or agreed to in writing, shall any Contributor be liable to You for damages, including any direct, indirect, special, incidental, or consequential damages of any character arising as a result of this License or out of the use or inability to use the Work (including but not limited to damages for loss of goodwill, work stoppage, computer failure or malfunction, or any and all other commercial damages or losses), even if such Contributor has been advised of the possibility of such damages. 50 | 51 | 9. Accepting Warranty or Additional Liability. While redistributing the Work or Derivative Works thereof, You may choose to offer, and charge a fee for, acceptance of support, warranty, indemnity, or other liability obligations and/or rights consistent with this License. However, in accepting such obligations, You may act only on Your own behalf and on Your sole responsibility, not on behalf of any other Contributor, and only if You agree to indemnify, defend, and hold each Contributor harmless for any liability incurred by, or claims asserted against, such Contributor by reason of your accepting any such warranty or additional liability. 52 | 53 | END OF TERMS AND CONDITIONS -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Simplified Lambda Example 2 | 3 | This repository is a working example of Flickr's Simplified Lambda Architecture. The architecture is used to fuse bulk and realtime data for Flickr's Magic View at a gigantic scale using HBase. 4 | 5 | ## Introduction 6 | Flickr’s Magic View takes the hassle out of organizing your own photos by applying our cutting-edge, computer-vision technology to automatically categorize photos in your photostream and present them in a seamless view based on the content in the photos. This all happens in real-time - as soon as a photo is uploaded, it is categorized and placed into the Magic View. 7 | 8 | ![Magic View Example](/img/magic_view_example.png?raw=true "Magic View Example") 9 | 10 | ### The Challenge 11 | Our computational footprint made it easy to create per-user Magic View categories for over 12 billion images on Flickr; however, we also needed to combine this with updating the categories with the the tens of millions of tags generated from photos as they are uploaded in real-time. Ideally, the system has to allow us to efficiently but separately manage the bulk and real-time data that only computes the final state when requested. We turned to Yahoo’s Hadoop stack to find a way to build this at the massive scale we needed. 12 | 13 | ### Our Solution 14 | Powered by Apache HBase, we developed a new scheme to fuse results from bulk and real-time aggregations. Using a single table in HBase, we are able to independently update and manage the bulk and real-time data in the system while always being able to provide a consistent, correct result. 15 | 16 | We believe that this solution is a novel simplification of what is sometimes known as Lambda Architecture. We improve it by simplifying some of its complicated components making maintenance and development easier. 17 | 18 | ## Lambda Architecture 19 | ### Existing approach 20 | We’ll start with Nathan Marz’s book, ‘Big Data’, which proposes the database concept of ‘Lambda Architecture’. In his analysis, he states that a database query can be represented as a function - Query - which operates on all the data: 21 | 22 | ``` 23 | result = Query(data) 24 | ``` 25 | The core of the Lambda architecture allows for separately maintained real-time and bulk databases. Minimizing the number of sacrifices needed to be made but maintaining the goal of operating on all available data, the equation is now expressed as: 26 | 27 | ``` 28 | result = Combiner(Query(real-time data) + Query(bulk data)) 29 | ``` 30 | 31 | ![Classic Lambda Architecture](/img/lambda_architecture.png?raw=true "Classic Lambda Architecture") 32 | 33 | This equation is shown graphically in the figure above. The real-time and bulk compute subsystem write to independent databases, which could be totally different systems. When dealing with a high volume of realtime data, the operational advantage here can be significant - there’s no need to have the expense of combining it with bulk data every time an event comes in. 34 | 35 | Concerns around this approach center on the complicated nature of the Combiner function - there is the developer and systems cost from the need to maintain two separate databases, the differing latencies of querying both sides and then the mechanics of merging the result. 36 | 37 | ### Our Approach 38 | We addressed the complications of the Combiner by instead using a single database to store the real-time and bulk data. A Combiner is still required to compute a final result: 39 | 40 | ``` 41 | result = Combiner(Query(data)) 42 | ``` 43 | 44 | ![Simplified Lambda Architecture](/img/simplified_lambda_architecture.png?raw=true "Simplified Lambda Architecture") 45 | 46 | How was this achieved? We implement our simplified Lambda architecture in HBase by giving each row two sets of columns - real-time and bulk - which are managed independently by the real-time subsystem (Storm and Java) and the bulk compute subsystem (Pig Latin and Oozie). It’s worth noting that FiloDb takes a similar approach - but since we only require the latest version of the data, our implementation is simpler. 47 | 48 | The combiner stage is abstracted into a single Java process running on its own hardware which computes on the data in HBase and pushes the photostream tag aggregations to a cache for serving. 49 | 50 | ### The Combiner and Cleaner 51 | When reading a single row of data from HBase, we need to combine the data from the real-time and the bulk columns. If only the bulk or real-time data exists, then selecting the data is obvious. If both bulk and realtime data exists, we always pick real-time. This seems reasonable, but causes a subtle problem. 52 | 53 | Let’s say a photos computer vision tags are added via real-time compute - there is no bulk data. Later on, we recompute all available photos using a new version of the computer vision tagging, and load this data (including this photo) via a bulk load. Even though the newer data exists in the bulk column, we can’t get to it because the combiner will only read the real-time column. We solve this by running the Cleaner process on all the data in HBase after we do a bulk load. 54 | 55 | The Cleaner simply visits each row and sees if the HBase timestamp for the real-time data is older than the bulk load. If it is, then we delete the real-time data for that row since it’s already captured in the bulk columns. This way the results of the bulk compute aren’t ‘published’ until the cleaner has run. 56 | 57 | ## Acknowledgements 58 | Thanks to the entire Flickr Magic View and team for helping out and to Nathan Marz for kindly reviewing this work. 59 | 60 | ## Running the example 61 | 62 | Once you've checked out the repository, you can run the demo using: 63 | ``` 64 | mvn package; mvn exec:java -Dexec.mainClass="flickr.SimplifiedLambdaDemo.SimplifiedLambdaDemo" 65 | ``` 66 | To run the tests, simply run: 67 | ``` 68 | mvn package; mvn test 69 | ``` 70 | 71 | ## Worked Example 72 | Let's walk through a specific example that is used in the demo. 73 | 74 | ### Initial table 75 | Lets set up an in-memory simulation of HBase using MockHTable: 76 | 77 | ``` 78 | MockHTable lambdaTable = new MockHTable("lambdaTable"); 79 | lambdaTable.addColumnFamily(new String(SimplifiedLambda.FAMILY)); 80 | ``` 81 | 82 | Then we'll pass it to SimplifiedLambda - our central class which implements our Simplified Lambda architecture - to manage. 83 | ``` 84 | SimplifiedLambda simplifiedLambda = new SimplifiedLambda(lambdaTable); 85 | ``` 86 | 87 | Lets dump the initial state of the table: 88 | ``` 89 | System.out.println("\nInitial table:"); 90 | System.out.print(simplifiedLambda.dumpTable()); 91 | ``` 92 | 93 | Output: 94 | ``` 95 | key bulk realtime combined 96 | ........................................ 97 | ``` 98 | 99 | The columns here are: 100 | key - the row key 101 | bulk - the value in the bulk column 102 | realtime - the value in the realtime column 103 | combined - the final value that SimplifiedLambda will chose between realtime or bulk 104 | 105 | ### Push bulk rows 106 | Let's push some data to the table via the bulk load mechanism. We have to call the cleaner after every time we run the bulk load to keep the system in a consistent state: 107 | 108 | ``` 109 | simplifiedLambda.pushBulkEntry("rowA", "bulk"); 110 | simplifiedLambda.pushBulkEntry("rowB", "bulk"); 111 | simplifiedLambda.pushBulkEntry("rowC", "bulk"); 112 | simplifiedLambda.cleaner(); 113 | ``` 114 | 115 | The table now looks like: 116 | ``` 117 | key bulk realtime combined 118 | ........................................ 119 | rowA bulk None bulk 120 | rowB bulk None bulk 121 | rowC bulk None bulk 122 | ``` 123 | 124 | There's only bulk data available so the combiner only chooses data from the bulk column. 125 | 126 | ### Override rows A,B with realtime 127 | 128 | Now lets take rowA and rowB and push realtime data to them: 129 | 130 | ``` 131 | simplifiedLambda.pushRealtimeEntry("rowA", "rtOvrd"); 132 | simplifiedLambda.pushRealtimeEntry("rowB", "rtOvrd"); 133 | ``` 134 | 135 | The table now looks like: 136 | 137 | ``` 138 | key bulk realtime combined 139 | ........................................ 140 | rowA bulk rtOvrd rtOvrd 141 | rowB bulk rtOvrd rtOvrd 142 | rowC bulk None bulk 143 | ```` 144 | Here for rowA and rowB, there is now realtime data available so the combiner will automatically choose it. 145 | 146 | ### Clean table 147 | 148 | For rowA and rowB, we can take the realtime data and 'publish' it to the bulk columns since it overrides the bulk columns: 149 | 150 | ``` 151 | simplifiedLambda.cleaner(); 152 | ``` 153 | 154 | Now the table state is: 155 | 156 | ``` 157 | key bulk realtime combined 158 | ........................................ 159 | rowA rtOvrd None rtOvrd 160 | rowB rtOvrd None rtOvrd 161 | rowC bulk None bulk 162 | ``` 163 | This isn't actually necessary after a realtime updates - it's an illlustration of how the cleaner works in this scenario. 164 | 165 | ### Override rows B,C with realtime 166 | 167 | Lets do another realtime override, now on rowB and rowC: 168 | 169 | ``` 170 | simplifiedLambda.pushRealtimeEntry("rowB", "rtOvrd2"); 171 | simplifiedLambda.pushRealtimeEntry("rowC", "rtOvrd2"); 172 | ``` 173 | 174 | Now the table looks like: 175 | 176 | ``` 177 | key bulk realtime combined 178 | ........................................ 179 | rowA rtOvrd None rtOvrd 180 | rowB rtOvrd rtOvrd2 rtOvrd2 181 | rowC bulk rtOvrd2 rtOvrd2 182 | ``` 183 | 184 | Again: the realtime columns override the bulk data. 185 | 186 | ### Override rows with bulk: 187 | 188 | Lastly, lets push some bulk data after the realtime updates - to rowA and rowB. Since the bulk data is newer than the realtime updates for these rows, the bulk data takes precedence over the realtime data that is there. 189 | 190 | ``` 191 | simplifiedLambda.pushBulkEntry("rowA", "bulkOvr"); 192 | simplifiedLambda.pushBulkEntry("rowB", "bulkOvr"); 193 | simplifiedLambda.cleaner(); 194 | ``` 195 | 196 | The result: 197 | ``` 198 | key bulk realtime combined 199 | ........................................ 200 | rowA bulkOvr None bulkOvr 201 | rowB bulkOvr None bulkOvr 202 | rowC rtOvrd2 None rtOvrd2 203 | ``` 204 | # Copyright and License 205 | Copyright 2015 Yahoo Inc. 206 | Licensed under the terms of the Apache 2.0 License 207 | See LICENSE.txt file in the project root folder for License terms. 208 | -------------------------------------------------------------------------------- /src/main/java/utility/MockHTable.java: -------------------------------------------------------------------------------- 1 | package utility; 2 | 3 | /** 4 | * This file is licensed to you under the Apache License, Version 2.0 (the 5 | * "License"); you may not use this file except in compliance with the 6 | * License. You may obtain a copy of the License at 7 | * 8 | * http://www.apache.org/licenses/LICENSE-2.0 9 | * 10 | * Unless required by applicable law or agreed to in writing, software 11 | * distributed under the License is distributed on an "AS IS" BASIS, 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | */ 14 | 15 | import com.google.protobuf.Descriptors; 16 | import com.google.protobuf.Message; 17 | import com.google.protobuf.Service; 18 | import com.google.protobuf.ServiceException; 19 | import org.apache.hadoop.conf.Configuration; 20 | import org.apache.hadoop.hbase.HColumnDescriptor; 21 | import org.apache.hadoop.hbase.HTableDescriptor; 22 | import org.apache.hadoop.hbase.KeyValue; 23 | import org.apache.hadoop.hbase.TableName; 24 | import org.apache.hadoop.hbase.client.*; 25 | import org.apache.hadoop.hbase.client.coprocessor.Batch; 26 | import org.apache.hadoop.hbase.filter.Filter; 27 | import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel; 28 | import org.apache.hadoop.hbase.util.Bytes; 29 | import org.apache.hadoop.hbase.filter.CompareFilter.CompareOp; 30 | 31 | import java.io.IOException; 32 | import java.util.*; 33 | import java.util.logging.Logger; 34 | 35 | /** 36 | * utility.MockHTable. 37 | * 38 | * original utility.MockHTable (by agaoglu) : https://gist.github.com/agaoglu/613217#file_mock_h_table.java 39 | * upgraded by bhautikj 20150327 40 | * upgraded by bhautikj 20150901 41 | * 42 | * Modifications 43 | * 44 | * 56 | */ 57 | public class MockHTable implements HTableInterface { 58 | private static Logger LOG = Logger.getLogger(MockHTable.class.getName()); 59 | 60 | private final String tableName; 61 | private final List columnFamilies = new ArrayList<>(); 62 | 63 | private NavigableMap>>> data 64 | = new TreeMap<>(Bytes.BYTES_COMPARATOR); 65 | 66 | private static List toKeyValue(byte[] row, NavigableMap>> rowdata, int maxVersions) { 67 | return toKeyValue(row, rowdata, 0, Long.MAX_VALUE, maxVersions); 68 | } 69 | 70 | public MockHTable(String tableName) { 71 | this.tableName = tableName; 72 | } 73 | 74 | public MockHTable(String tableName, String... columnFamilies) { 75 | this.tableName = tableName; 76 | this.columnFamilies.addAll(Arrays.asList(columnFamilies)); 77 | } 78 | 79 | public void addColumnFamily(String columnFamily) { 80 | this.columnFamilies.add(columnFamily); 81 | } 82 | 83 | /** 84 | * {@inheritDoc} 85 | */ 86 | @Override 87 | public byte[] getTableName() { 88 | return tableName.getBytes(); 89 | } 90 | 91 | public TableName getName() { 92 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 93 | } 94 | 95 | 96 | /** 97 | * {@inheritDoc} 98 | */ 99 | @Override 100 | public Configuration getConfiguration() { 101 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 102 | } 103 | 104 | /** 105 | * {@inheritDoc} 106 | */ 107 | @Override 108 | public HTableDescriptor getTableDescriptor() throws IOException { 109 | HTableDescriptor table = new HTableDescriptor(tableName); 110 | for (String columnFamily : columnFamilies) { 111 | table.addFamily(new HColumnDescriptor(columnFamily)); 112 | } 113 | return table; 114 | } 115 | 116 | /** 117 | * {@inheritDoc} 118 | */ 119 | @Override 120 | public void mutateRow(RowMutations rm) throws IOException { 121 | this.sleeper(); 122 | // currently only support Put and Delete 123 | for (Mutation mutation : rm.getMutations()) { 124 | if (mutation instanceof Put) { 125 | put((Put) mutation); 126 | } else if (mutation instanceof Delete) { 127 | delete((Delete) mutation); 128 | } 129 | } 130 | } 131 | 132 | /** 133 | * {@inheritDoc} 134 | */ 135 | @Override 136 | public Result append(Append append) throws IOException { 137 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 138 | } 139 | 140 | private static List toKeyValue(byte[] row, NavigableMap>> rowdata, long timestampStart, long timestampEnd, int maxVersions) { 141 | //LOG.info("Maxversions: " + maxVersions); 142 | List ret = new ArrayList(); 143 | for (byte[] family : rowdata.keySet()) 144 | for (byte[] qualifier : rowdata.get(family).keySet()) { 145 | int versionsAdded = 0; 146 | //LOG.info("num cells: " + rowdata.get(family).get(qualifier).descendingMap().entrySet().size()); 147 | for (Map.Entry tsToVal : rowdata.get(family).get(qualifier).descendingMap().entrySet()) { 148 | if (versionsAdded++ == maxVersions) 149 | break; 150 | Long timestamp = tsToVal.getKey(); 151 | if (timestamp < timestampStart) 152 | continue; 153 | if (timestamp > timestampEnd) 154 | continue; 155 | byte[] value = tsToVal.getValue(); 156 | ret.add(new KeyValue(row, family, qualifier, timestamp, value)); 157 | } 158 | } 159 | return ret; 160 | } 161 | 162 | /** 163 | * {@inheritDoc} 164 | */ 165 | @Override 166 | public boolean exists(Get get) throws IOException { 167 | Result result = get(get); 168 | return result != null && result.isEmpty() == false; 169 | } 170 | 171 | public Boolean[] exists(List var1) throws IOException { 172 | Boolean[] rv = new Boolean[var1.size()]; 173 | for (int i = 0; i < var1.size(); i++) { 174 | rv[i] = exists(var1.get(i)); 175 | } 176 | return rv; 177 | } 178 | 179 | 180 | //public boolean[] get(List var1) throws IOException { 181 | //} 182 | 183 | /** 184 | * {@inheritDoc} 185 | */ 186 | @Override 187 | public void batch(List actions, Object[] results) throws IOException, InterruptedException { 188 | results = batch(actions); 189 | } 190 | 191 | /** 192 | * {@inheritDoc} 193 | */ 194 | @Override 195 | public Object[] batch(List actions) throws IOException, InterruptedException { 196 | Object[] results = new Object[actions.size()]; // same size. 197 | for (int i = 0; i < actions.size(); i++) { 198 | Row r = actions.get(i); 199 | if (r instanceof Delete) { 200 | delete((Delete) r); 201 | results[i] = new Result(); 202 | } 203 | if (r instanceof Put) { 204 | put((Put) r); 205 | results[i] = new Result(); 206 | } 207 | if (r instanceof Get) { 208 | Result result = get((Get) r); 209 | results[i] = result; 210 | } 211 | if (r instanceof Increment) { 212 | Result result = increment((Increment) r); 213 | results[i] = result; 214 | } 215 | if (r instanceof Append) { 216 | Result result = append((Append) r); 217 | results[i] = result; 218 | } 219 | } 220 | return results; 221 | } 222 | 223 | /** 224 | * {@inheritDoc} 225 | */ 226 | @Override 227 | public Result get(Get get) throws IOException { 228 | this.sleeper(); 229 | if (!data.containsKey(get.getRow())) 230 | return new Result(); 231 | byte[] row = get.getRow(); 232 | List kvs = new ArrayList(); 233 | if (!get.hasFamilies()) { 234 | kvs = toKeyValue(row, data.get(row), get.getMaxVersions()); 235 | } else { 236 | for (byte[] family : get.getFamilyMap().keySet()) { 237 | if (data.get(row).get(family) == null) 238 | continue; 239 | NavigableSet qualifiers = get.getFamilyMap().get(family); 240 | if (qualifiers == null || qualifiers.isEmpty()) 241 | qualifiers = data.get(row).get(family).navigableKeySet(); 242 | for (byte[] qualifier : qualifiers) { 243 | if (qualifier == null) 244 | qualifier = "".getBytes(); 245 | if (!data.get(row).containsKey(family) || 246 | !data.get(row).get(family).containsKey(qualifier) || 247 | data.get(row).get(family).get(qualifier).isEmpty()) 248 | continue; 249 | Map.Entry timestampAndValue = data.get(row).get(family).get(qualifier).lastEntry(); 250 | kvs.add(new KeyValue(row, family, qualifier, timestampAndValue.getKey(), timestampAndValue.getValue())); 251 | } 252 | } 253 | } 254 | Filter filter = get.getFilter(); 255 | if (filter != null) { 256 | kvs = filter(filter, kvs); 257 | } 258 | 259 | return new Result(kvs); 260 | } 261 | 262 | /** 263 | * {@inheritDoc} 264 | */ 265 | @Override 266 | public Result[] get(List gets) throws IOException { 267 | List results = new ArrayList(); 268 | for (Get g : gets) { 269 | results.add(get(g)); 270 | } 271 | return results.toArray(new Result[results.size()]); 272 | } 273 | 274 | /** 275 | * {@inheritDoc} 276 | */ 277 | @Override 278 | public Result getRowOrBefore(byte[] row, byte[] family) throws IOException { 279 | // FIXME: implement 280 | return null; 281 | } 282 | 283 | /** 284 | * {@inheritDoc} 285 | */ 286 | @Override 287 | public ResultScanner getScanner(Scan scan) throws IOException { 288 | final List ret = new ArrayList(); 289 | byte[] st = scan.getStartRow(); 290 | byte[] sp = scan.getStopRow(); 291 | Filter filter = scan.getFilter(); 292 | 293 | for (byte[] row : data.keySet()) { 294 | // if row is equal to startRow emit it. When startRow (inclusive) and 295 | // stopRow (exclusive) is the same, it should not be excluded which would 296 | // happen w/o this control. 297 | if (st != null && st.length > 0 && 298 | Bytes.BYTES_COMPARATOR.compare(st, row) != 0) { 299 | // if row is before startRow do not emit, pass to next row 300 | if (st != null && st.length > 0 && 301 | Bytes.BYTES_COMPARATOR.compare(st, row) > 0) 302 | continue; 303 | // if row is equal to stopRow or after it do not emit, stop iteration 304 | if (sp != null && sp.length > 0 && 305 | Bytes.BYTES_COMPARATOR.compare(sp, row) <= 0) 306 | break; 307 | } 308 | 309 | List kvs = null; 310 | if (!scan.hasFamilies()) { 311 | kvs = toKeyValue(row, data.get(row), scan.getTimeRange().getMin(), scan.getTimeRange().getMax(), scan.getMaxVersions()); 312 | } else { 313 | kvs = new ArrayList(); 314 | for (byte[] family : scan.getFamilyMap().keySet()) { 315 | if (data.get(row).get(family) == null) 316 | continue; 317 | NavigableSet qualifiers = scan.getFamilyMap().get(family); 318 | if (qualifiers == null || qualifiers.isEmpty()) 319 | qualifiers = data.get(row).get(family).navigableKeySet(); 320 | for (byte[] qualifier : qualifiers) { 321 | if (data.get(row).get(family).get(qualifier) == null) 322 | continue; 323 | for (Long timestamp : data.get(row).get(family).get(qualifier).descendingKeySet()) { 324 | if (timestamp < scan.getTimeRange().getMin()) 325 | continue; 326 | if (timestamp > scan.getTimeRange().getMax()) 327 | continue; 328 | byte[] value = data.get(row).get(family).get(qualifier).get(timestamp); 329 | kvs.add(new KeyValue(row, family, qualifier, timestamp, value)); 330 | if (kvs.size() == scan.getMaxVersions()) { 331 | break; 332 | } 333 | } 334 | } 335 | } 336 | } 337 | if (filter != null) { 338 | kvs = filter(filter, kvs); 339 | // Check for early out optimization 340 | if (filter.filterAllRemaining()) { 341 | break; 342 | } 343 | } 344 | if (!kvs.isEmpty()) { 345 | ret.add(new Result(kvs)); 346 | } 347 | } 348 | 349 | return new ResultScanner() { 350 | private final Iterator iterator = ret.iterator(); 351 | 352 | public Iterator iterator() { 353 | return iterator; 354 | } 355 | 356 | public Result[] next(int nbRows) throws IOException { 357 | ArrayList resultSets = new ArrayList(nbRows); 358 | for (int i = 0; i < nbRows; i++) { 359 | Result next = next(); 360 | if (next != null) { 361 | resultSets.add(next); 362 | } else { 363 | break; 364 | } 365 | } 366 | return resultSets.toArray(new Result[resultSets.size()]); 367 | } 368 | 369 | public Result next() throws IOException { 370 | try { 371 | return iterator().next(); 372 | } catch (NoSuchElementException e) { 373 | return null; 374 | } 375 | } 376 | 377 | public void close() { 378 | } 379 | }; 380 | } 381 | 382 | /** 383 | * Follows the logical flow through the filter methods for a single row. 384 | * 385 | * @param filter HBase filter. 386 | * @param kvs List of a row's KeyValues 387 | * @return List of KeyValues that were not filtered. 388 | */ 389 | private List filter(Filter filter, List kvs) throws IOException { 390 | filter.reset(); 391 | 392 | List tmp = new ArrayList(kvs.size()); 393 | tmp.addAll(kvs); 394 | 395 | /* 396 | * Note. Filter flow for a single row. Adapted from 397 | * "HBase: The Definitive Guide" (p. 163) by Lars George, 2011. 398 | * See Figure 4-2 on p. 163. 399 | */ 400 | boolean filteredOnRowKey = false; 401 | List nkvs = new ArrayList(tmp.size()); 402 | for (KeyValue kv : tmp) { 403 | if (filter.filterRowKey(kv.getBuffer(), kv.getRowOffset(), kv.getRowLength())) { 404 | filteredOnRowKey = true; 405 | break; 406 | } 407 | Filter.ReturnCode filterResult = filter.filterKeyValue(kv); 408 | if (filterResult == Filter.ReturnCode.INCLUDE) { 409 | nkvs.add(kv); 410 | } else if (filterResult == Filter.ReturnCode.NEXT_ROW) { 411 | break; 412 | } else if (filterResult == Filter.ReturnCode.NEXT_COL || filterResult == Filter.ReturnCode.SKIP) { 413 | continue; 414 | } 415 | /* 416 | * Ignoring next key hint which is a optimization to reduce file 417 | * system IO 418 | */ 419 | } 420 | if (filter.hasFilterRow() && !filteredOnRowKey) { 421 | filter.filterRow(nkvs); 422 | } 423 | if (filter.filterRow() || filteredOnRowKey) { 424 | nkvs.clear(); 425 | } 426 | tmp = nkvs; 427 | return tmp; 428 | } 429 | 430 | /** 431 | * {@inheritDoc} 432 | */ 433 | @Override 434 | public ResultScanner getScanner(byte[] family) throws IOException { 435 | Scan scan = new Scan(); 436 | scan.addFamily(family); 437 | return getScanner(scan); 438 | } 439 | 440 | /** 441 | * {@inheritDoc} 442 | */ 443 | @Override 444 | public ResultScanner getScanner(byte[] family, byte[] qualifier) throws IOException { 445 | Scan scan = new Scan(); 446 | scan.addColumn(family, qualifier); 447 | return getScanner(scan); 448 | } 449 | 450 | private V forceFind(NavigableMap map, K key, V newObject) { 451 | V data = map.get(key); 452 | if (data == null) { 453 | data = newObject; 454 | map.put(key, data); 455 | } 456 | return data; 457 | } 458 | 459 | private void sleeper() { 460 | try { 461 | Thread.sleep(10); 462 | } catch (InterruptedException e) { 463 | 464 | } 465 | } 466 | 467 | /** 468 | * {@inheritDoc} 469 | */ 470 | @Override 471 | public void put(Put put) throws IOException { 472 | this.sleeper(); 473 | byte[] row = put.getRow(); 474 | NavigableMap>> rowData = forceFind(data, row, new TreeMap>>(Bytes.BYTES_COMPARATOR)); 475 | for (byte[] family : put.getFamilyMap().keySet()) { 476 | if (columnFamilies.contains(new String(family)) == false) { 477 | throw new RuntimeException("Not Exists columnFamily : " + new String(family)); 478 | } 479 | NavigableMap> familyData = forceFind(rowData, family, new TreeMap>(Bytes.BYTES_COMPARATOR)); 480 | for (KeyValue kv : put.getFamilyMap().get(family)) { 481 | kv.updateLatestStamp(Bytes.toBytes(System.currentTimeMillis())); 482 | byte[] qualifier = kv.getQualifier(); 483 | NavigableMap qualifierData = forceFind(familyData, qualifier, new TreeMap()); 484 | qualifierData.put(kv.getTimestamp(), kv.getValue()); 485 | } 486 | } 487 | } 488 | 489 | /** 490 | * {@inheritDoc} 491 | */ 492 | @Override 493 | public void put(List puts) throws IOException { 494 | for (Put put : puts) { 495 | put(put); 496 | } 497 | 498 | } 499 | 500 | private boolean check(byte[] row, byte[] family, byte[] qualifier, byte[] value) { 501 | if (value == null || value.length == 0) 502 | return !data.containsKey(row) || 503 | !data.get(row).containsKey(family) || 504 | !data.get(row).get(family).containsKey(qualifier); 505 | else 506 | return data.containsKey(row) && 507 | data.get(row).containsKey(family) && 508 | data.get(row).get(family).containsKey(qualifier) && 509 | !data.get(row).get(family).get(qualifier).isEmpty() && 510 | Arrays.equals(data.get(row).get(family).get(qualifier).lastEntry().getValue(), value); 511 | } 512 | 513 | /** 514 | * {@inheritDoc} 515 | */ 516 | @Override 517 | public boolean checkAndPut(byte[] row, byte[] family, byte[] qualifier, byte[] value, Put put) throws IOException { 518 | if (check(row, family, qualifier, value)) { 519 | put(put); 520 | return true; 521 | } 522 | return false; 523 | } 524 | 525 | /** 526 | * {@inheritDoc} 527 | */ 528 | @Override 529 | public boolean checkAndMutate(byte[] row, 530 | byte[] family, 531 | byte[] qualifier, 532 | CompareOp compareOp, 533 | byte[] value, 534 | RowMutations mutation) 535 | throws IOException 536 | { 537 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 538 | } 539 | 540 | /** 541 | * {@inheritDoc} 542 | */ 543 | @Override 544 | public void delete(Delete delete) throws IOException { 545 | this.sleeper(); 546 | byte[] row = delete.getRow(); 547 | if (data.get(row) == null) 548 | return; 549 | if (delete.getFamilyMap().size() == 0) { 550 | data.remove(row); 551 | return; 552 | } 553 | for (byte[] family : delete.getFamilyMap().keySet()) { 554 | if (data.get(row).get(family) == null) 555 | continue; 556 | if (delete.getFamilyMap().get(family).isEmpty()) { 557 | data.get(row).remove(family); 558 | continue; 559 | } 560 | for (KeyValue kv : delete.getFamilyMap().get(family)) { 561 | data.get(row).get(kv.getFamily()).remove(kv.getQualifier()); 562 | } 563 | if (data.get(row).get(family).isEmpty()) { 564 | data.get(row).remove(family); 565 | } 566 | } 567 | if (data.get(row).isEmpty()) { 568 | data.remove(row); 569 | } 570 | } 571 | 572 | /** 573 | * {@inheritDoc} 574 | */ 575 | @Override 576 | public void delete(List deletes) throws IOException { 577 | for (Delete delete : deletes) { 578 | delete(delete); 579 | } 580 | } 581 | 582 | /** 583 | * {@inheritDoc} 584 | */ 585 | @Override 586 | public boolean checkAndDelete(byte[] row, byte[] family, byte[] qualifier, byte[] value, Delete delete) throws IOException { 587 | this.sleeper(); 588 | if (check(row, family, qualifier, value)) { 589 | delete(delete); 590 | return true; 591 | } 592 | return false; 593 | } 594 | 595 | 596 | 597 | 598 | /** 599 | * {@inheritDoc} 600 | */ 601 | @Override 602 | public Result increment(Increment increment) throws IOException { 603 | this.sleeper(); 604 | List kvs = new ArrayList(); 605 | Map> famToVal = increment.getFamilyMapOfLongs(); 606 | for (Map.Entry> ef : famToVal.entrySet()) { 607 | byte[] family = ef.getKey(); 608 | NavigableMap qToVal = ef.getValue(); 609 | for (Map.Entry eq : qToVal.entrySet()) { 610 | long newValue = incrementColumnValue(increment.getRow(), family, eq.getKey(), eq.getValue()); 611 | kvs.add(new KeyValue(increment.getRow(), family, eq.getKey(), Bytes.toBytes(newValue))); 612 | } 613 | } 614 | return new Result(kvs); 615 | } 616 | 617 | /** 618 | * {@inheritDoc} 619 | */ 620 | @Override 621 | public long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, long amount) throws IOException { 622 | return incrementColumnValue(row, family, qualifier, amount, true); 623 | } 624 | 625 | /** 626 | * {@inheritDoc} 627 | */ 628 | @Override 629 | public long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, long amount, boolean writeToWAL) throws IOException { 630 | this.sleeper(); 631 | if (check(row, family, qualifier, null)) { 632 | Put put = new Put(row); 633 | put.add(family, qualifier, Bytes.toBytes(amount)); 634 | put(put); 635 | return amount; 636 | } 637 | long newValue = Bytes.toLong(data.get(row).get(family).get(qualifier).lastEntry().getValue()) + amount; 638 | data.get(row).get(family).get(qualifier).put(System.currentTimeMillis(), 639 | Bytes.toBytes(newValue)); 640 | return newValue; 641 | } 642 | 643 | @Override 644 | public long incrementColumnValue(byte[] row, byte[] family, byte[] qualifier, long amount, Durability var6) throws IOException { 645 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 646 | } 647 | 648 | /** 649 | * {@inheritDoc} 650 | */ 651 | @Override 652 | public boolean isAutoFlush() { 653 | return true; 654 | } 655 | 656 | /** 657 | * {@inheritDoc} 658 | */ 659 | @Override 660 | public void flushCommits() throws IOException { 661 | 662 | } 663 | 664 | /** 665 | * {@inheritDoc} 666 | */ 667 | @Override 668 | public void close() throws IOException { 669 | 670 | } 671 | 672 | /** 673 | * {@inheritDoc} 674 | */ 675 | //@Override 676 | //public HRegion.RowLock lockRow(byte[] row) throws IOException { 677 | // throw new RuntimeException(this.getClass() + " does NOT implement this method."); 678 | //} 679 | 680 | /** 681 | * {@inheritDoc} 682 | */ 683 | //@Override 684 | //public void unlockRow(HRegion.RowLock rl) throws IOException { 685 | // throw new RuntimeException(this.getClass() + " does NOT implement this method."); 686 | //} 687 | 688 | /** 689 | * {@inheritDoc} 690 | */ 691 | @Override 692 | public CoprocessorRpcChannel coprocessorService(byte[] var1) { 693 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 694 | } 695 | 696 | @Override 697 | public Map coprocessorService(Class var1, byte[] var2, byte[] var3, Batch.Call var4) throws ServiceException, Throwable { 698 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 699 | } 700 | 701 | @Override 702 | public void coprocessorService(Class var1, byte[] var2, byte[] var3, Batch.Call var4, Batch.Callback var5) throws ServiceException, Throwable { 703 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 704 | } 705 | 706 | @Override 707 | public Map batchCoprocessorService(Descriptors.MethodDescriptor var1, Message var2, byte[] var3, byte[] var4, R var5) throws ServiceException, Throwable { 708 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 709 | } 710 | 711 | @Override 712 | public void batchCoprocessorService(Descriptors.MethodDescriptor var1, Message var2, byte[] var3, byte[] var4, R var5, Batch.Callback var6) throws ServiceException, Throwable { 713 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 714 | } 715 | 716 | @Override 717 | public void batchCallback(List var1, Object[] var2, Batch.Callback var3) throws IOException, InterruptedException { 718 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 719 | } 720 | 721 | /** @deprecated */ 722 | @Override 723 | public Object[] batchCallback(List var1, Batch.Callback var2) throws IOException, InterruptedException { 724 | throw new RuntimeException(this.getClass() + " does NOT implement this method."); 725 | } 726 | /** 727 | * {@inheritDoc} 728 | */ 729 | @Override 730 | public void setAutoFlush(boolean autoFlush) { 731 | 732 | } 733 | 734 | @Override 735 | public void setAutoFlushTo(boolean var1) { 736 | 737 | } 738 | 739 | 740 | /** 741 | * {@inheritDoc} 742 | */ 743 | @Override 744 | public void setAutoFlush(boolean autoFlush, boolean clearBufferOnFail) { 745 | 746 | } 747 | 748 | /** 749 | * {@inheritDoc} 750 | */ 751 | @Override 752 | public long getWriteBufferSize() { 753 | return 0; 754 | } 755 | 756 | /** 757 | * {@inheritDoc} 758 | */ 759 | @Override 760 | public void setWriteBufferSize(long writeBufferSize) throws IOException { 761 | 762 | } 763 | 764 | } 765 | --------------------------------------------------------------------------------