├── .gitignore ├── test ├── clojure-hbase-util-test.clj └── clojure-hbase-test.clj ├── project.clj ├── LICENSE ├── src └── com │ └── compass │ └── hbase │ ├── admin_utils.clj │ ├── admin.clj │ ├── filters.clj │ ├── schema.clj │ └── client.clj └── README.markdown /.gitignore: -------------------------------------------------------------------------------- 1 | pom-generated.xml 2 | Manifest.txt 3 | clojure-hbase.jar 4 | lib 5 | classes 6 | /*.jar 7 | /pom.xml 8 | /.lein-deps-sum 9 | /.nrepl-port 10 | /target/ 11 | /.lein-repl-history 12 | -------------------------------------------------------------------------------- /test/clojure-hbase-util-test.clj: -------------------------------------------------------------------------------- 1 | (ns clojure-hbase-util-test 2 | (:refer-clojure :rename {get map-get}) 3 | (:use clojure.test 4 | com.davidsantiago.clojure-hbase.util) 5 | (:import org.apache.hadoop.hbase.util.Bytes)) 6 | 7 | (deftest check-simple-converters 8 | (is (= "test" (as-str (Bytes/toBytes "test")))) 9 | (is (= :test (as-kw (Bytes/toBytes "test")))) 10 | (is (= 'test (as-sym (Bytes/toBytes "test"))))) 11 | 12 | (deftest check-obj-converters 13 | (is (= '(1 2 3) (as-obj (to-bytes '(1 2 3))))) 14 | (is (= [1 2 3]) (as-obj (to-bytes [1 2 3]))) 15 | (is (= {:test '(1 2 3)} (as-obj (to-bytes {:test '(1 2 3)}))))) -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject com.compasslabs/clojure-hbase-schemas "1.0.1.1" 2 | :description "A convenient Clojure interface to HBase." 3 | :license "See LICENSE file" 4 | :url "http://github.com/compasslabs/clojure-hbase-schemas" 5 | :dependencies [[org.clojure/clojure "1.6.0"] 6 | [org.apache.hbase/hbase-client "1.0.1.1"] 7 | [org.apache.hadoop/hadoop-hdfs "2.4.1"] 8 | [clj-time "0.7.0"] 9 | [clj-serializer "0.1.3"] 10 | [org.clojure/data.json "0.2.6"] 11 | [log4j/log4j "1.2.17" :exclusions [javax.mail/mail 12 | javax.jms/jms 13 | com.sun.jdmk/jmxtools 14 | com.sun.jmx/jmxri]] 15 | ] 16 | ) 17 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2011, Compass Labs, Inc. 2 | 3 | All rights reserved. 4 | 5 | Redistribution and use in source and binary forms, with or without 6 | modification, are permitted provided that the following conditions are 7 | met: 8 | 9 | Redistributions of source code must retain the above copyright 10 | notice, this list of conditions and the following disclaimer. 11 | 12 | Redistributions in binary form must reproduce the above 13 | copyright notice, this list of conditions and the following 14 | disclaimer in the documentation and/or other materials 15 | provided with the distribution. 16 | 17 | Neither the name of Compass Labs, Inc. nor the names of any 18 | contributors may be used to endorse or promote products 19 | derived from this software without specific prior written 20 | permission. 21 | 22 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 23 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 24 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 25 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 26 | HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 27 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 28 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 29 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 30 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 31 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 32 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -------------------------------------------------------------------------------- /src/com/compass/hbase/admin_utils.clj: -------------------------------------------------------------------------------- 1 | (ns com.compass.hbase.admin-utils 2 | (:refer-clojure :rename {get map-get}) 3 | (:import [org.apache.hadoop.hbase.util Bytes])) 4 | 5 | ;; Utility function 6 | 7 | (defn partition-query 8 | "Given a query sequence and a command argnum map (each keyword in map 9 | mapped to how many arguments that item expects), this function returns 10 | a sequence of sequences; each sub-sequence is just a single command, 11 | command keyword followed by args." 12 | [query cmd-argnum-map] 13 | (loop [result [] 14 | remaining-commands query] 15 | (let [kw (first remaining-commands)] 16 | (if (nil? kw) 17 | result 18 | (let [[a-cmd rest-cmds] (split-at (inc (map-get cmd-argnum-map kw 1)) 19 | remaining-commands)] 20 | (recur (conj result a-cmd) rest-cmds)))))) 21 | 22 | ;; Some default conversions 23 | 24 | (defmulti to-bytes-impl 25 | "Converts its argument into an array of bytes. By default, uses HBase's 26 | Bytes/toBytes and does nothing to byte arrays. Since it is a multimethod 27 | you can redefine it to create your own serialization routines for new types." 28 | class) 29 | (defmethod to-bytes-impl (Class/forName "[B") 30 | [arg] 31 | arg) 32 | (defmethod to-bytes-impl clojure.lang.Keyword 33 | [arg] 34 | (Bytes/toBytes (name arg))) 35 | (defmethod to-bytes-impl clojure.lang.Symbol 36 | [arg] 37 | (Bytes/toBytes (name arg))) 38 | (defmethod to-bytes-impl clojure.lang.IPersistentList 39 | [arg] 40 | (Bytes/toBytes (binding [*print-dup* false] (pr-str arg)))) 41 | (defmethod to-bytes-impl clojure.lang.IPersistentVector 42 | [arg] 43 | (Bytes/toBytes (binding [*print-dup* false] (pr-str arg)))) 44 | (defmethod to-bytes-impl clojure.lang.IPersistentMap 45 | [arg] 46 | (Bytes/toBytes (binding [*print-dup* false] (pr-str arg)))) 47 | (defmethod to-bytes-impl :default 48 | [arg] 49 | (Bytes/toBytes arg)) 50 | 51 | (defn to-bytes 52 | "Converts its argument to an array of bytes using the to-bytes-impl 53 | multimethod. We can't type hint a multimethod, so we type hint this 54 | shell function and calls all over this module don't need reflection." 55 | {:tag (Class/forName "[B")} 56 | [arg] 57 | (to-bytes-impl arg)) 58 | -------------------------------------------------------------------------------- /test/clojure-hbase-test.clj: -------------------------------------------------------------------------------- 1 | (ns clojure-hbase-test 2 | (:refer-clojure :rename {get map-get}) 3 | (:use clojure.test 4 | [com.davidsantiago.clojure-hbase] 5 | [com.davidsantiago.clojure-hbase.admin :exclude [flush]]) 6 | (:import [org.apache.hadoop.hbase.util Bytes] 7 | [java.util UUID])) 8 | 9 | ;; This file creates a table to do all its work in, and requires an already- 10 | ;; configured running instance of HBase. Obviously, make sure this is not a 11 | ;; production version of HBase you're testing on. 12 | 13 | (def test-tbl-name (str "clojure-hbase-test-db" (UUID/randomUUID))) 14 | (defn setup-tbl [] (create-table (table-descriptor test-tbl-name))) 15 | (defn remove-tbl [] 16 | (disable-table test-tbl-name) 17 | (delete-table test-tbl-name)) 18 | 19 | (defmacro as-test [& body] 20 | `(do 21 | (try 22 | (setup-tbl) 23 | ~@body 24 | (finally 25 | (remove-tbl))))) 26 | 27 | (deftest create-delete-table 28 | (as-test 29 | (is (.contains (map #(Bytes/toString (.getName %)) (list-tables)) 30 | test-tbl-name) 31 | "The table was created at the beginning of the as-test.")) 32 | (is (not (.contains (map #(Bytes/toString (.getName %)) (list-tables)) 33 | test-tbl-name)) 34 | "Now that we are out of the as-test, the table doesn't exist.")) 35 | 36 | (deftest add-delete-CF 37 | (let [cf-name "test-cf-name"] 38 | (as-test 39 | (disable-table test-tbl-name) 40 | (add-column-family test-tbl-name (column-descriptor cf-name)) 41 | (is (= (.getNameAsString (.getFamily 42 | (get-table-descriptor test-tbl-name) 43 | (to-bytes cf-name))) 44 | cf-name) 45 | "Created a new column family and retrieved its column descriptor.") 46 | (delete-column-family test-tbl-name cf-name) 47 | (is (= nil (.getFamily (get-table-descriptor test-tbl-name) 48 | (to-bytes cf-name))) 49 | "Deleted the column family successfully.")))) 50 | 51 | (deftest get-put-delete 52 | (let [cf-name "test-cf-name" 53 | row "testrow" 54 | value "testval"] 55 | (as-test 56 | (disable-table test-tbl-name) 57 | (add-column-family test-tbl-name (column-descriptor cf-name)) 58 | (enable-table test-tbl-name) 59 | (with-table [test-tbl (table test-tbl-name)] 60 | (put test-tbl row :value [cf-name :testqual value]) 61 | (is (= value (Bytes/toString (last (first 62 | (as-vector 63 | (get test-tbl row :column 64 | [cf-name :testqual])))))) 65 | "Successfully executed Put and Get.") 66 | (delete test-tbl row :column [cf-name :testqual]) 67 | (is (= '() (as-vector (get test-tbl row :column 68 | [cf-name :testqual]))) 69 | "Successfully executed Delete of the Put."))))) 70 | 71 | (def scan-row-values (sort-by #(first %) 72 | (for [k (range 10000)] 73 | [(str (UUID/randomUUID)) 74 | (str (UUID/randomUUID))]))) 75 | 76 | (deftest scan-check 77 | (let [cf-name "test-cf-name"] 78 | (as-test 79 | (disable-table test-tbl-name) 80 | (add-column-family test-tbl-name (column-descriptor cf-name)) 81 | (enable-table test-tbl-name) 82 | (with-table [test-tbl (table test-tbl-name)] 83 | (doseq [[key value] scan-row-values] 84 | (put test-tbl key :value [cf-name :value value])) 85 | (is (= true 86 | (reduce #(and %1 %2) 87 | (with-scanner [scan-results (scan test-tbl)] 88 | (map #(= (first %1) 89 | (Bytes/toString (.getRow %2))) 90 | scan-row-values (seq scan-results)))))))))) 91 | 92 | (deftest as-map-test 93 | (let [cf-name "test-cf-name" 94 | qual "testqual" 95 | row "testrow" 96 | value "testval"] 97 | (as-test 98 | (disable-table test-tbl-name) 99 | (add-column-family test-tbl-name (column-descriptor cf-name)) 100 | (enable-table test-tbl-name) 101 | (with-table [test-tbl (table test-tbl-name)] 102 | (put test-tbl row :time-stamp 1 :value [cf-name qual value]) 103 | (is (= {cf-name {qual {"1" value}}} 104 | (as-map (get test-tbl row) 105 | :map-family #(Bytes/toString %) 106 | :map-qualifier #(Bytes/toString %) 107 | :map-timestamp str 108 | :map-value #(Bytes/toString %))) 109 | "as-map works.") 110 | (is (= {cf-name {qual value}} 111 | (latest-as-map (get test-tbl row) 112 | :map-family #(Bytes/toString %) 113 | :map-qualifier #(Bytes/toString %) 114 | :map-value #(Bytes/toString %))) 115 | "latest-as-map works."))))) -------------------------------------------------------------------------------- /src/com/compass/hbase/admin.clj: -------------------------------------------------------------------------------- 1 | (ns com.compass.hbase.admin 2 | (:refer-clojure :rename {get map-get} :exclude [flush]) 3 | (:use com.compass.hbase.admin-utils) 4 | (:import [org.apache.hadoop.hbase HBaseConfiguration HConstants 5 | HTableDescriptor HColumnDescriptor] 6 | [org.apache.hadoop.hbase.client HBaseAdmin] 7 | [org.apache.hadoop.hbase.util Bytes] 8 | [org.apache.hadoop.hbase.io.compress Compression])) 9 | 10 | (def ^:dynamic *admin* nil) 11 | 12 | (defn get-admin 13 | "Enforce a lazy create-once policy" 14 | [] 15 | (if-let [admin *admin*] admin 16 | (alter-var-root #'*admin* (fn [old] 17 | (if old old 18 | (HBaseAdmin. (HBaseConfiguration.))))))) 19 | 20 | ;; 21 | ;; HColumnDescriptor 22 | ;; 23 | 24 | (def column-desc-argnums 25 | "This maps each get command to its number of arguments, for helping us 26 | partition the command sequence." 27 | {:block-cache-enabled 1 ;; :block-cache-enabled 28 | :block-size 1 ;; :block-size 29 | :bloom-filter 1 ;; :bloom-filter 30 | :compression-type 1 ;; :compression-type 31 | :in-memory 1 ;; :in-memory 32 | :map-file-index-interval 1 ;; :map-file-index-interval 33 | :max-versions 1 ;; :max-versions 34 | :time-to-live 1});; :time-to-live 35 | 36 | (defn column-descriptor 37 | [family-name & args] 38 | (let [specs (partition-query args column-desc-argnums) 39 | cd (HColumnDescriptor. (to-bytes family-name))] 40 | (doseq [spec specs] 41 | (condp = (first spec) 42 | :block-cache-enabled (.setBlockCacheEnabled cd (second spec)) 43 | :block-size (.setBlockSize cd (second spec)) 44 | :bloom-filter (.setBloomFilterType cd (second spec)) 45 | :compression-type (.setCompressionType cd (second spec)) 46 | :in-memory (.setInMemory cd (second spec)) 47 | :map-file-index-interval (.setMapFileIndexInterval cd (second spec)) 48 | :max-versions (.setMaxVersions cd (second spec)) 49 | :time-to-live (.setTimeToLive cd (second spec)))) 50 | cd)) 51 | 52 | ;; 53 | ;; HTableDescriptor 54 | ;; 55 | 56 | (def table-desc-argnums 57 | "This maps each get command to its number of arguments, for helping us 58 | partition the command sequence." 59 | {:max-file-size 1 ;; :max-file-size 60 | :mem-store-flush-size 1 ;; :mem-store-flush-size 61 | :read-only 1 ;; :read-only 62 | :family 1}) ;; :family 63 | 64 | (defn table-descriptor 65 | [table-name & args] 66 | (let [specs (partition-query args table-desc-argnums) 67 | td (HTableDescriptor. (to-bytes table-name))] 68 | (doseq [spec specs] 69 | (condp = (first spec) 70 | :max-file-size (.setMaxFileSize td (second spec)) 71 | :mem-store-flush-size (.setMemStoreFlushSize td (second spec)) 72 | :read-only (.setReadOnly td (second spec)) 73 | :family (.addFamily td (second spec)))) 74 | td)) 75 | 76 | 77 | ;; 78 | ;; HBaseAdmin 79 | ;; 80 | 81 | (defn add-column-family 82 | [table-name column-descriptor] 83 | (.addColumn (get-admin) (to-bytes table-name) column-descriptor)) 84 | 85 | (defn hbase-available? 86 | [] 87 | (HBaseAdmin/checkHBaseAvailable (HBaseConfiguration.))) 88 | 89 | (defn compact 90 | [table-or-region-name] 91 | (.compact (get-admin) (to-bytes table-or-region-name))) 92 | 93 | (defn create-table 94 | ([table-descriptor] 95 | (.createTable (get-admin) table-descriptor)) 96 | ([table-descriptor start end regions] 97 | (.createTable (get-admin) table-descriptor start end regions))) 98 | 99 | (defn create-table-async 100 | [table-descriptor] 101 | (.createTableAsync (get-admin) table-descriptor)) 102 | 103 | (defn delete-column-family 104 | [table-name column-name] 105 | (.deleteColumn (get-admin) (to-bytes table-name) (to-bytes column-name))) 106 | 107 | (defn delete-table 108 | [table-name] 109 | (.deleteTable (get-admin) (to-bytes table-name))) 110 | 111 | (defn disable-table 112 | [table-name] 113 | (.disableTable (get-admin) (to-bytes table-name))) 114 | 115 | (defn enable-table 116 | [table-name] 117 | (.enableTable (get-admin) (to-bytes table-name))) 118 | 119 | (defn flush 120 | [table-or-region-name] 121 | (.flush (get-admin) (to-bytes table-or-region-name))) 122 | 123 | (defn cluster-status 124 | [] 125 | (.getClusterStatus (get-admin))) 126 | 127 | (defn get-connection 128 | [] 129 | (.getConnection (get-admin))) 130 | 131 | (defn get-master 132 | [] 133 | (.getMaster (get-admin))) 134 | 135 | (defn get-table-descriptor 136 | [table-name] 137 | (.getTableDescriptor (get-admin) (to-bytes table-name))) 138 | 139 | (defn master-running? 140 | [] 141 | (.isMasterRunning (get-admin))) 142 | 143 | (defn table-available? 144 | [table-name] 145 | (.isTableAvailable (get-admin) (to-bytes table-name))) 146 | 147 | (defn table-disabled? 148 | [table-name] 149 | (.isTableDisabled (get-admin) (to-bytes table-name))) 150 | 151 | (defn table-enabled? 152 | [table-name] 153 | (.isTableEnabled (get-admin) (to-bytes table-name))) 154 | 155 | (defn list-tables 156 | [] 157 | (seq (.listTables (get-admin)))) 158 | 159 | (defn major-compact 160 | [table-or-region-name] 161 | (.majorCompact (get-admin) (to-bytes table-or-region-name))) 162 | 163 | (defn modify-column-family 164 | [table-name column-name column-descriptor] 165 | (.modifyColumn (get-admin) (to-bytes table-name) (to-bytes column-name) 166 | column-descriptor)) 167 | 168 | (defn modify-table 169 | [table-name table-descriptor] 170 | (.modifyTable (get-admin) (to-bytes table-name) table-descriptor)) 171 | 172 | (defn shutdown 173 | [] 174 | (.shutdown (get-admin))) 175 | 176 | (defn split 177 | [table-or-region-name] 178 | (.split (get-admin) (to-bytes table-or-region-name))) 179 | 180 | (defn table-exists? 181 | [table-name] 182 | (.tableExists (get-admin) (to-bytes table-name))) 183 | -------------------------------------------------------------------------------- /README.markdown: -------------------------------------------------------------------------------- 1 | # clojure-hbase-schemas 2 | 3 | Clojure-HBase-Schemas is a simple library for accessing HBase from 4 | Clojure. The library was inspired by David Santiago's 5 | [clojure-hbase](http://github.com/davidsantiago/clojure-hbase) and 6 | lifts support for HTable admin functions directly from his library. 7 | 8 | Releases are maintained on clojars. The latest release is: 9 | 10 | com.compasslabs/clojure-hbase-schemas "0.90.4" 11 | 12 | ## Description 13 | 14 | Two main facilities are introduced: schemas and constraints. Schemas 15 | are type templates that dictate data encoding/decoding for HBase 16 | operations. Constraints result in method calls on Gets and Scans as 17 | well as passing appropriate sets of filter objects to the Get/Scan 18 | operation. 19 | 20 | (require '[com.compass.hbase.client :as client]) 21 | (require '[com.compass.hbase.filters :as f]) 22 | 23 | ### Schemas 24 | 25 | Define a schema for a table called users with two column families, 26 | userinfo and friends. The first seq after the table name is metadata. 27 | :key-type and :value-type determines the default data type for 28 | qualifiers and values in any column family not already defined in the 29 | schema. The :row-type must be defined. The remainder of the 30 | definition consists of type specification for the other families where 31 | not covered by the defaults. The framework has global defaults of a 32 | String row type, a Keyword qualifier type and a JSON value type. 33 | 34 | Families are specified using two formats: a map with key, value and 35 | specific qualifier exceptions or a seq of two values, the key and 36 | value types. If one or more are not specified, the table or global 37 | defaults will be used. 38 | 39 | (define-schema :users [:key-type :string 40 | :value-type :json-key 41 | :row-type :long] 42 | :userinfo {:value-type :json-key 43 | :exceptions {:age :int 44 | :name :string}} 45 | :friends [:long :bool] 46 | :votes [:keyword :long]) 47 | 48 | Legal types are :bool, :int, :long, :string, :symbol, :keyword, :ser, 49 | :json and :json-key. :ser uses the clj-serializer library to 50 | ser/deser clojure data structures and :json and :json-key store data 51 | as JSON binary strings and deserialize to string-based clojure maps or 52 | keyword-based clojure maps using the default clojure JSON library. 53 | 54 | ### Client API 55 | 56 | Put then Get all values for row ID 100. The Get procedure looks up a 57 | schema in a global registry (configured by define-schema) for the 58 | table named :users. Gets and scans return a "family map" for each row 59 | that consists of a dictionary of family names to maps where each 60 | map consists of the keys and values for that family. 61 | 62 | (client/put :users 100 {:userinfo {:name "Test User" :id "21412"} :votes {:up 2 :down 4}}) 63 | (client/get :users 100) => {:userinfo {:name "Test User" :id "21412"} :votes {:up 2 :down 4}} 64 | 65 | Increment only returns the modified fields othewise it works the same as put. 66 | 67 | (client/increment :users 100 {:votes {:up 1 :down -2}}) => [100 {:votes {:up 3, :down 2}}] 68 | 69 | Additional commands are straightforward 70 | 71 | (client/del :users 100) => fmap 72 | (client/get-multi :users [100 101 102]) => [fmap fmap fmap] 73 | (client/put-multi :users [[100 fmap] [200 fmap]]) 74 | (client/scan (fn [id fmap] fmap) :users) => [fmap, fmap, ...] 75 | (client/do-scan (fn [id fmap] fmap) :users) => [fmap, fmap, ...] 76 | (client/raw-scan (fn [id fmap] fmap) :users) => [ResultSet, ...] 77 | 78 | 79 | ### Constraints 80 | 81 | The Get, Increment and Scan commands above all accept constraint objects which 82 | are used to restrict the rows, families, qualifiers, values and 83 | timestamps returned from a query. The new API provides a relatively 84 | primitive, but nicely composable mini-language for expressing these 85 | constraints in terms of filters and return value restrictions. 86 | Moreover, it is fairly easy to use constraints in map-reduce job 87 | configuration also. If you're interested in this in the context of 88 | map-reduce, check out the discussion of [steps and flows in clojure-hadoop](http://ianeslick.com/higher-level-composition-in-clojure-hadoop-st). 89 | 90 | Constraints are simply a clojure record that documents the various 91 | constraints you've composed together. When a Get or Scan command is 92 | executed, the constraints are converted into the specific method calls 93 | or filter objects necessary to satisfy them. 94 | 95 | (f/constraints) will create an empty constraint object. 96 | 97 | The Constraint protocol supports three methods: 98 | 99 | * (project type data) 100 | * (filter type comparison value) 101 | * (page size) 102 | 103 | For example, to get users restricted to the :userinfo family 104 | 105 | (client/get :users <id> (-> (f/constraints) 106 | (f/project :families [:userinfo]))) 107 | 108 | To return the userinfo data for all users with a name starting with 109 | "a", the constraint expression is. 110 | 111 | (client/scan (fn [a b] b) :users 112 | (-> (f/constraints) 113 | (f/project :families [:userinfo]) 114 | (f/filter :qualifier [:prefix :<] [:userinfo :name "b"])) 115 | 116 | Similar to ClojureQL, constraints can be made and are not materialized until 117 | the get or scan command is actually started, meaning we can store 118 | constraints in vars or have functions that define a set of constraints 119 | and then compose them later. There are also two convenience functions for 120 | composing these higher order constraint expressions. 121 | 122 | (make-constraints expr1 expr2 ...) and 123 | (add-constraints constraints expr1 expr2 ...) 124 | 125 | So we can now easily define appropriate variables and functions 126 | 127 | (def userinfo (make-constraints 128 | (f/project :families [:userinfo]))) 129 | 130 | (defn filter-user-name-prefix [c comp prefix] 131 | (add-constraints c (f/filter :qualifier [:prefix comp] [:userinfo :name prefix]))) 132 | 133 | And then apply them interactively or programmatically to perform scans. 134 | 135 | (client/scan (fn [a b] b) :users (filter-user-name-prefix userinfo :< "b")) 136 | 137 | The currently support projection types include: 138 | 139 | * :families - Restrict results to one or more families 140 | * :columns - Restrict row results to a matching family + qualifier 141 | * :row-range - Restrict scan to a range of row values (f/project :row-range [low high]) 142 | * :timestamp - Only return values for the given long timestamp 143 | * :timerange - Return values for the given low / high timestamps 144 | * :max-versions - The maximum number of versions of any qualifier+value to return 145 | 146 | It is fairly trivial to add new projections or filters; please feel 147 | free to send patches. 148 | 149 | Two utility functions make dealing with time ranges easier, (timestamp 150 | ref), (timestamp-now) and (timestamp-ago reference type amount). 151 | timestamp-ago takes a reference timestamp and returns a long value 152 | according to type {:minutes | :hours | :days} and a number. Arguments 153 | to timestamp and timerange use the timestamp function to interpret 154 | arguments. This makes it easy then to say things like: 155 | 156 | Scan from two days ago until now: 157 | 158 | (f/project constraints :timerange [[:days 2] :now]) 159 | 160 | Or from 1 month before ref, a long-valued reference timestamp. 161 | 162 | (f/project constraints :timerange [[ref :months 1] ref]) 163 | 164 | Filter expressions all include a comparison expression. Typically 165 | you'll use :=, but you can use a variety of comparison types {:binary 166 | | :prefix | :substr | :regex } and the usual boolean comparitors. 167 | 168 | Beware that filters don't limit the scan row, so a row filter will 169 | test every row and only return those that pass the test, but if you're 170 | doing a scan operation, this will touch every row in the table which 171 | can take quite a bit of time. 172 | 173 | Filter types include: 174 | 175 | * (f/filter :row <compare> <value>) - Filter rows by value comparison 176 | * (f/filter :qualifier <compare> [<family> <name>]) - Passes all qualifier names in the given family where (<compare> qualifier <name>) is true 177 | * (f/filter :column <compare> [<family> <qualifier> <value>]) - Pass all columns where the value comparison is true 178 | * (f/filter :cell <compare> [<value> <type>]) - Pass all qualifier-value pairs where the value matches <value>. 179 | * (f/filter :keys-only <ignored>) - Only return the qualifiers, no values 180 | * (f/filter :first-kv-only <ignored> - Only return the first qualifier-value pair (good for getting matching rows without returning much data 181 | * (f/filter :limit <size>) - Only return <size> rows using PageFilter. 182 | 183 | There are some compositional semantics missing, such as ignoring rows 184 | where certain columns don't match, rather than filtering just 185 | key-value pairs. This will be addressed in a later revision. 186 | 187 | ## License 188 | 189 | BSD 190 | -------------------------------------------------------------------------------- /src/com/compass/hbase/filters.clj: -------------------------------------------------------------------------------- 1 | (ns com.compass.hbase.filters 2 | (:refer-clojure :exclude [filter]) 3 | (:use com.compass.hbase.schema) 4 | (:require [clj-time.core :as time]) 5 | (:import 6 | org.apache.hadoop.hbase.util.Bytes 7 | [org.apache.hadoop.hbase.client Get Scan] 8 | [org.apache.hadoop.hbase.filter 9 | ;; Base classes 10 | Filter 11 | CompareFilter 12 | CompareFilter$CompareOp 13 | ;; Comparators 14 | BinaryComparator 15 | BinaryPrefixComparator 16 | RegexStringComparator 17 | SubstringComparator 18 | ;; Filters 19 | ColumnCountGetFilter 20 | ColumnPaginationFilter 21 | ColumnPrefixFilter 22 | DependentColumnFilter 23 | FamilyFilter 24 | FilterList 25 | FilterList$Operator 26 | FirstKeyOnlyFilter 27 | InclusiveStopFilter 28 | KeyOnlyFilter 29 | PageFilter 30 | PrefixFilter 31 | QualifierFilter 32 | RowFilter 33 | SingleColumnValueExcludeFilter 34 | SingleColumnValueFilter 35 | SkipFilter 36 | TimestampsFilter 37 | ValueFilter 38 | WhileMatchFilter])) 39 | 40 | ;; 41 | ;; Provide a generic framework for defining Get, Scan and Map-Reduce 42 | ;; constraints over HBase data. 43 | ;; 44 | ;; Intended to capture server side constraints in a single filter object 45 | ;; 46 | 47 | (defprotocol ConstraintSet 48 | "Manipulate a set of HBase constraints for a Get,Scan or M-R interface" 49 | (filter [this type compare value-spec] [this type arg] 50 | "Filter out rows or columns based on contents") 51 | (page [this size] 52 | "Returns a set of results of size 'size'") 53 | (project [this type values] 54 | "Restrict the nature of a given row result")) 55 | 56 | (def project-specifiers 57 | #{:families :row-range :columns :timestamp :timerange :max-versions}) 58 | 59 | (defrecord HBaseConstraints [projections filters page all-versions] 60 | ConstraintSet 61 | (project [#^HBaseConstraints this specifier values] 62 | (if (= specifier :all-versions) 63 | (assoc this :all-versions true) 64 | (do (assert (contains? project-specifiers specifier)) 65 | (assoc-in this [:projections specifier] values)))) 66 | (filter [#^HBaseConstraints this type compare value-spec] 67 | (assoc this :filters (conj (:filters this) [type [compare value-spec]]))) 68 | (filter [#^HBaseConstraints this type arg] 69 | (assoc this :filters (conj (:filters this) [type arg]))) 70 | (page [#^HBaseConstraints this size] 71 | (assoc this :page size))) 72 | 73 | (defmethod print-method HBaseConstraints [this writer] 74 | (.write writer (format "#" 75 | (count (:projections this)) 76 | (count (:filters this))))) 77 | 78 | (defn constraints [] 79 | (HBaseConstraints. nil nil nil nil)) 80 | 81 | ;; 82 | ;; Apply constraints 83 | ;; 84 | 85 | (defn timestamp-now [] 86 | (.getMillis (time/now))) 87 | 88 | (defn timestamp-ago 89 | "Return a Unix timestamp from a reference time object 90 | minus a number of :hours, :minutes or :days" 91 | ([reference type amount] 92 | (assert (keyword? type)) 93 | (.getMillis 94 | (cond (= type :hours) 95 | (time/minus reference (time/hours amount)) 96 | (= type :minutes) 97 | (time/minus reference (time/minutes amount)) 98 | (= type :days) 99 | (time/minus reference (time/days amount)) 100 | true 101 | (throw (java.lang.Error. "Unrecognized argument to timestamp-ago"))))) 102 | ([type amount] 103 | (timestamp-ago (time/now) type amount))) 104 | 105 | (defn timestamp [reference] 106 | (cond (number? reference) (long reference) 107 | (= reference :now) (timestamp-now) 108 | (sequential? reference) (apply timestamp-ago reference))) 109 | 110 | ;; Selecting result subsets 111 | 112 | (defmulti apply-project (fn [op schema [select values]] select)) 113 | 114 | (defmethod apply-project :families 115 | [op schema [select values]] 116 | (doseq [family values] 117 | (.addFamily op (encode-family schema family)))) 118 | 119 | (defmethod apply-project :columns 120 | [op schema [select values]] 121 | (doseq [spec values] 122 | (if (sequential? spec) 123 | (.addColumn op 124 | (encode-family schema (first spec)) 125 | (encode-column schema (first spec) (second spec))) 126 | (.addColumn op 127 | (encode-column schema nil spec))))) 128 | 129 | (defmethod apply-project :row-range 130 | [op schema [select [min-value max-value]]] 131 | (doto op 132 | (.setStartRow (encode-row schema min-value)) 133 | (.setStopRow (encode-row schema max-value)))) 134 | 135 | (defmethod apply-project :timestamp 136 | [op schema [select value]] 137 | (.setTimeStamp op (timestamp value))) 138 | 139 | (defmethod apply-project :timerange 140 | [op schema [select [start end]]] 141 | (.setTimeRange op (timestamp start) (timestamp end))) 142 | 143 | (defmethod apply-project :max-versions 144 | [op schema [select values]] 145 | (.setMaxVersions op (int values))) 146 | 147 | ;; 148 | ;; Create filters and comparators to filter 149 | ;; rows based on columns, values, etc. 150 | ;; 151 | ;; 152 | ;; FILTERS: [ELEMENT COMPARITOR VALUE-SPEC] | [ELEMENT VALUE-SPEC] 153 | ;; ELEMENT: :row | :qualifier | :column | :cell 154 | ;; COMPARITOR: [COMP-TYPE COMPARISON] | Comparison 155 | ;; COMPARISON: =,>,<,<=,>=,not= 156 | ;; COMP-TYPE: :binary | :prefix | :regex | :substr 157 | ;; VALUE-SPEC: | [ ] | [ ] 158 | 159 | (comment 160 | ;; Get All rows with ID > 2 with :text qualifiers and message usernames 161 | ;; starting in IE, return only :message and :test families where :message 162 | ;; families have :text2 or :text fields present 163 | (-> (constraints) 164 | (filter :row [:binary >] 2) 165 | (filter :qualifier := [:messages :content]) 166 | (filter :column [:prefix :=] [:message :username "ie"]) 167 | (project :families [:message :test]) 168 | (project :columns [[:message :text2] [:message :text]]))) 169 | 170 | (defn lookup-compare [sym] 171 | (let [sym (if (vector? sym) (second sym) sym)] 172 | (case sym 173 | (:= = '=) CompareFilter$CompareOp/EQUAL 174 | (:> > '>) CompareFilter$CompareOp/GREATER 175 | (:>= >= '>=) CompareFilter$CompareOp/GREATER_OR_EQUAL 176 | (:<= <= '<=) CompareFilter$CompareOp/LESS_OR_EQUAL 177 | (:< < '<) CompareFilter$CompareOp/LESS 178 | (:not= not= 'not=) CompareFilter$CompareOp/NOT_EQUAL 179 | (:nop nop 'nop) CompareFilter$CompareOp/NO_OP))) 180 | 181 | (defn as-comparator [compare value] 182 | (if (vector? compare) 183 | (case (first compare) 184 | :binary (BinaryComparator. value) 185 | :prefix (BinaryPrefixComparator. value) 186 | :regex (RegexStringComparator. (if (string? value) value (Bytes/toString value))) 187 | :substr (SubstringComparator. (if (string? value) value (Bytes/toString value)))) 188 | (BinaryComparator. value))) 189 | 190 | (defmulti make-filter (fn [schema [type & args]] type)) 191 | 192 | (defmethod make-filter :default 193 | [schema [type & rest]] 194 | (println "Unrecognized filter option " type) 195 | (throw (java.lang.Error. "Bad option"))) 196 | 197 | (defmethod make-filter :row 198 | [schema [type [compare row-value]]] 199 | (RowFilter. (lookup-compare compare) 200 | (as-comparator compare (encode-row schema row-value)))) 201 | 202 | (defmethod make-filter :row-range 203 | [schema [type [min-value max-value]]] 204 | (FilterList. FilterList$Operator/MUST_PASS_ALL 205 | [(RowFilter. (lookup-compare :>) 206 | (as-comparator :> (encode-row schema min-value))) 207 | (RowFilter. (lookup-compare :<) 208 | (as-comparator :< (encode-row schema max-value)))])) 209 | 210 | (defmethod make-filter :qualifier 211 | [schema [type [compare [family qual-value]]]] 212 | (QualifierFilter. (lookup-compare compare) 213 | (as-comparator compare (encode-column schema family qual-value)))) 214 | 215 | (defmethod make-filter :column 216 | [schema [type [compare [family qualifier value & {:keys [filter-missing] 217 | :or {filter-missing true}}]]]] 218 | (doto (SingleColumnValueFilter. 219 | (encode-family schema family) 220 | (encode-column schema family qualifier) 221 | (lookup-compare compare) 222 | (cond 223 | (or (= (first compare) :regex) (= (first compare) :substr)) 224 | (as-comparator compare value) 225 | :else 226 | (as-comparator compare (encode-cell schema family qualifier value)))) 227 | (.setFilterIfMissing filter-missing))) 228 | 229 | (defmethod make-filter :cell 230 | [schema [type [compare [value encoding]]]] 231 | (ValueFilter. (lookup-compare compare) 232 | (as-comparator compare value))) ;;(encode-value value encoding)))) 233 | 234 | (defmethod make-filter :keys-only 235 | [schema [type [rest]]] 236 | (KeyOnlyFilter.)) 237 | 238 | (defmethod make-filter :first-kv-only 239 | [schema [type empty]] 240 | (FirstKeyOnlyFilter.)) 241 | 242 | (defmethod make-filter :limit 243 | [schema [type size]] 244 | (PageFilter. (long size))) 245 | 246 | (defmethod make-filter :all 247 | [schema [type flist]] 248 | (let [filters (map (partial make-filter schema) flist)] 249 | (FilterList. FilterList$Operator/MUST_PASS_ALL filters))) 250 | 251 | (defmethod make-filter :or 252 | [schema [type flist]] 253 | (let [filters (map (partial make-filter schema) flist)] 254 | (FilterList. FilterList$Operator/MUST_PASS_ONE filters))) 255 | 256 | ;; 257 | ;; Turn constraint specs into HBase objects 258 | ;; 259 | 260 | (defn filter-list [list combine-op] 261 | (cond 262 | (= combine-op :all) 263 | (FilterList. FilterList$Operator/MUST_PASS_ALL list) 264 | (= combine-op :any) 265 | (FilterList. FilterList$Operator/MUST_PASS_ONE list))) 266 | 267 | (defn constrain-op [op schema #^HBaseConstraints c] 268 | (let [flist (map (partial make-filter schema) 269 | (:filters c))] 270 | (assert (every? #(not (nil? %)) flist)) 271 | (.setFilter op (filter-list flist :all)) 272 | (doall 273 | (map (partial apply-project op schema) 274 | (:projections c))) 275 | op)) 276 | 277 | ;; (defn combine-filters 278 | ;; [schema combine-op & filters] 279 | ;; (let [flist (map (partial make-filter schema) filters)] 280 | ;; (when (= combine-op :or) 281 | ;; (FilterList. FilterList$Operator/MUST_PASS_ONE flist)) 282 | ;; (when (= combine-op :and) 283 | ;; (FilterList. FilterList$Operator/MUST_PASS_ALL flist)))) 284 | 285 | 286 | 287 | 288 | -------------------------------------------------------------------------------- /src/com/compass/hbase/schema.clj: -------------------------------------------------------------------------------- 1 | (ns com.compass.hbase.schema 2 | (:import org.apache.hadoop.hbase.util.Bytes) 3 | (:require [clojure.stacktrace] 4 | ;; [clj-serializer.core :as ser] 5 | [clj-time.core :as time] 6 | [clojure.data.json :as json])) 7 | 8 | ;; 9 | ;; Schema-based translation between HBase byte representations 10 | ;; and clojure representations. Remove visibility into icky 11 | ;; HBase Java interface. 12 | ;; 13 | ;; Simplifying assumptions: 14 | ;; - Tables and families are clojure keywords, strings in the DB 15 | ;; - Qualifiers can be anything, but default to strings (not keywords) 16 | ;; - Values, of course, can be anything and default to strings 17 | ;; - Timestamps are Java longs 18 | ;; - Exceptions map specific keys to a value type for deser. 19 | ;; this only works for column families where all qualifiers have the same type 20 | ;; because they have to be decoded prior to the lookup 21 | 22 | (comment 23 | (def define-schema test-table [:row-type :long 24 | :key-type :string 25 | :value-type :json-key] 26 | :family1 {:key-type :long 27 | :value-type :string 28 | :exceptions {(long 0) :json 29 | (long 1) :double}} 30 | :family2 {:value-type :json})) 31 | 32 | (defrecord hbase-schema [name metadata families]) 33 | 34 | (defmethod print-method hbase-schema [schema writer] 35 | (.write writer (format "#" (:name schema)))) 36 | 37 | (defn make-schema 38 | "Families is a map of family names to type definitions" 39 | [name families metadata] 40 | (hbase-schema. name metadata families)) 41 | 42 | ;; 43 | ;; Schema accessors 44 | ;; 45 | 46 | (def row-default :string) 47 | (def qualifier-default :keyword) 48 | (def value-default :json) 49 | (def valid-types [:bool :int :long :string :symbol :keyword :ser :json :json-key]) 50 | 51 | (defn check-schema [schema] 52 | (when (not (and (map? schema) (:metadata schema))) 53 | (throw (java.lang.Error. (format "Invalid schema: %s" schema))))) 54 | 55 | (defn- schema-metadata 56 | "Get metadata for a schema" 57 | [schema name] 58 | (check-schema schema) 59 | ((:metadata schema) name)) 60 | 61 | (defn- schema-family 62 | "Return the family's schema" 63 | [schema family] 64 | (check-schema schema) 65 | ((:families schema) family)) 66 | 67 | (defn- qualifier-type 68 | "Return the specified type of the qualifier. All qualifiers must have the 69 | same serialization type" 70 | [schema family] 71 | (check-schema schema) 72 | (if-let [type (:key-type (schema-family schema family))] 73 | type 74 | (if-let [type (schema-metadata schema :key-type)] 75 | type 76 | qualifier-default))) 77 | 78 | (defn- family-value-type [family qualifier] 79 | (or (and (:exceptions family) 80 | ((:exceptions family) qualifier)) 81 | (:value-type family))) 82 | 83 | (defn- value-type 84 | [schema family qualifier] 85 | (check-schema schema) 86 | (or (family-value-type (schema-family schema family) qualifier) 87 | (schema-metadata schema :value-type) 88 | value-default)) 89 | 90 | (defn row-type 91 | [schema] 92 | (check-schema schema) 93 | (schema-metadata schema :row-type)) 94 | 95 | ;; 96 | ;; Define and cache schemas for convenience 97 | ;; 98 | 99 | (defonce schemas (atom nil)) 100 | (defn- put-schema* [orig name schema] (assoc orig name schema)) 101 | (defn put-schema [name schema] (swap! schemas put-schema* name schema)) 102 | (defn get-schema [name] (if-let [recs @schemas] 103 | (recs (keyword name)))) 104 | 105 | (defn- matching-type? [rtype vtype] 106 | (case rtype 107 | :string (string? vtype) 108 | :keyword (keyword? vtype) 109 | :symbol (symbol? vtype) 110 | :bool (or (= vtype true) (= vtype false)) 111 | (:int :long) (integer? vtype) 112 | (:float :double) (float? vtype) 113 | (:json :json-key :ser :raw) true 114 | true)) 115 | 116 | (defn- valid-exception-map? [type emap] 117 | (cond (nil? emap) true 118 | (nil? type) 119 | (throw (java.lang.Error. "Families with exceptions must specify key-type")) 120 | (not (every? (partial matching-type? type) (keys emap))) 121 | (throw (java.lang.Error. "Exception keys must match family key-type")))) 122 | 123 | (defn- canonical-family-spec [fams [fam fspec]] 124 | (assert (or (keyword? fam) (string? fam))) 125 | (cond (map? fspec) 126 | (do (assert (every? #{:key-type :value-type :exceptions} (keys fspec))) 127 | (valid-exception-map? (:key-type fspec) (:exceptions fspec)) 128 | (assoc fams fam fspec)) 129 | (and (sequential? fspec) (= (count fspec) 2)) 130 | (assoc fams 131 | fam {:key-type (first fspec) 132 | :value-type (second fspec)}) 133 | true (throw (java.lang.Error. 134 | (str "Unknown family " fspec " for family " fam))))) 135 | 136 | (defn canonical-families [spec] 137 | (reduce canonical-family-spec (hash-map) (partition 2 spec))) 138 | 139 | (defmacro define-schema 140 | "A convenience macro for systems to use" 141 | [table-name [& metadata] & family-defs] 142 | (let [table-name (name table-name)] 143 | `(put-schema '~(keyword table-name) 144 | (make-schema 145 | ~(str table-name) 146 | ~(canonical-families family-defs) 147 | ~(assoc (apply hash-map metadata) 148 | :table (keyword table-name)))))) 149 | 150 | (define-schema :schemas [:row-type :keyword 151 | :key-type :string 152 | :value-type :json-key]) 153 | 154 | ;; 155 | ;; Schema-guided encoding for HBase 156 | ;; 157 | 158 | (defmulti encode-value 159 | "Encode clojure values according to schema definition. Reasonable conversions 160 | are supported for strings (e.g. symbols->strings)" 161 | (fn [value type] type)) 162 | 163 | ;; Primitives 164 | (defmethod encode-value :keyword [arg typ] (Bytes/toBytes (name arg))) 165 | (defmethod encode-value :symbol [arg typ] (Bytes/toBytes (name arg))) 166 | (defmethod encode-value :string [arg typ] 167 | (assert (or (symbol? arg) (keyword? arg) (string? arg)) 168 | (str "Wrong value: type='" (type arg) "', val='" arg "', typ='" typ "'")) 169 | (Bytes/toBytes (name arg))) 170 | (defmethod encode-value :bool [arg typ] (Bytes/toBytes (boolean arg))) 171 | (defmethod encode-value :long [arg typ] (Bytes/toBytes (long arg))) 172 | (defmethod encode-value :int [arg typ] (Bytes/toBytes (int arg))) 173 | (defmethod encode-value :float [arg typ] (Bytes/toBytes (float arg))) 174 | (defmethod encode-value :double [arg typ] (Bytes/toBytes (double arg))) 175 | (defmethod encode-value :raw [arg typ] arg) 176 | 177 | ;; Aggregates 178 | ;;(defmethod encode-value :ser [arg type] (ser/serialize arg)) 179 | (defmethod encode-value :json [arg type] (Bytes/toBytes (json/json-str arg))) 180 | (defmethod encode-value :json-key [arg type] (Bytes/toBytes (json/json-str arg))) 181 | 182 | (defmethod encode-value :default [arg] (assert false)) 183 | 184 | ;; 185 | ;; Schema-guided encoding 186 | ;; 187 | 188 | (defn encode-row [schema row] 189 | (encode-value row (row-type schema))) 190 | 191 | (defn encode-family [schema family] 192 | (encode-value family :string)) 193 | 194 | (defn encode-column [schema family column] 195 | (encode-value column (qualifier-type schema family))) 196 | 197 | (defn encode-cell [schema family column value] 198 | (encode-value value (value-type schema family column))) 199 | 200 | ;; 201 | ;; Schema-guided decoding for HBase 202 | ;; 203 | 204 | (defmulti decode-value 205 | "Decode byte sequences according to type specification" 206 | (fn [data type] type)) 207 | 208 | ;; Primitive types 209 | (defmethod decode-value :string [bytes type] (Bytes/toString bytes)) 210 | (defmethod decode-value :symbol [bytes type] (intern (Bytes/toString bytes))) 211 | (defmethod decode-value :keyword [bytes type] (keyword (Bytes/toString bytes))) 212 | (defmethod decode-value :long [bytes type] (Bytes/toLong bytes)) 213 | (defmethod decode-value :int [bytes type] (Bytes/toInt bytes)) 214 | (defmethod decode-value :bool [bytes type] (Bytes/toBoolean bytes)) 215 | (defmethod decode-value :float [bytes type] (Bytes/toFloat bytes)) 216 | (defmethod decode-value :double [bytes type] (Bytes/toDouble bytes)) 217 | (defmethod decode-value :raw [bytes type] bytes) 218 | 219 | ;; Aggregate data methods 220 | ;;(defmethod decode-value :ser [bytes type] (ser/deserialize bytes nil)) 221 | (defmethod decode-value :json [bytes type] (json/read-json (Bytes/toString bytes) nil)) 222 | (defmethod decode-value :json-key [bytes type] (json/read-json (Bytes/toString bytes) true)) 223 | 224 | (defmacro with-robust-decode [[type result] & body] 225 | `(try 226 | ~@body 227 | (catch java.lang.Throwable e# 228 | (clojure.stacktrace/print-throwable e#) 229 | (println "Can't decode " ~type " for row " (.getRow ~result)) 230 | nil))) 231 | 232 | (defn decode-row [schema result] 233 | (with-robust-decode [:row result] 234 | (decode-value (.getRow result) (row-type schema)))) 235 | 236 | (defn decode-all 237 | "Given an HBase Result object, decode all the versions such that for each 238 | family and column there is a map of timestamp and values for historical versions" 239 | [schema result] 240 | (assert schema) 241 | (if (or (not result) (.isEmpty result)) 242 | (do (println "Empty results") nil) 243 | [(with-robust-decode [:row result] 244 | (decode-row schema result)) 245 | (loop [kvs (.raw result) 246 | kv-map {}] 247 | (if-let [kv (first kvs)] 248 | (let [family (with-robust-decode [:keyword result] 249 | (decode-value (.getFamily kv) :keyword)) 250 | qualifier (with-robust-decode [:qualifier result] 251 | (decode-value (.getQualifier kv) 252 | (qualifier-type schema family))) 253 | timestamp (.getTimestamp kv) 254 | value (let [value (.getValue kv)] 255 | (with-robust-decode [:keyword result] 256 | (decode-value (.getValue kv) 257 | (value-type schema family qualifier))))] 258 | (recur (next kvs) 259 | (assoc-in kv-map [family qualifier timestamp] value))) 260 | kv-map))])) 261 | 262 | (defn decode-latest 263 | "Given an HBase Result object, decode the latest versions of all the 264 | available columns" 265 | [schema result] 266 | (assert schema) 267 | (if (or (not result) (.isEmpty result)) 268 | nil 269 | [(with-robust-decode [:row result] 270 | (decode-row schema result)) 271 | (loop [remaining-kvs (seq (.raw result)) 272 | keys #{}] 273 | (if-let [kv (first remaining-kvs)] 274 | (let [family (.getFamily kv) 275 | qualifier (.getQualifier kv)] 276 | (recur (next remaining-kvs) 277 | (conj keys [family qualifier]))) 278 | ;; At this point, we have a duplicate-less list of [f q] keys in keys. 279 | ;; Go back through, pulling the latest values for these keys. 280 | (loop [remaining-keys keys 281 | kv-map {}] 282 | (if-let [[family qualifier] (first remaining-keys)] 283 | (let [keyfam (decode-value family :keyword) 284 | qual (with-robust-decode [:qualifier result] 285 | (decode-value qualifier (qualifier-type schema keyfam)))] 286 | (recur (next remaining-keys) 287 | (assoc-in kv-map [keyfam qual] 288 | (let [value (.getValue result family qualifier)] 289 | (with-robust-decode [:cell result] 290 | (decode-value value (value-type schema keyfam qual))))))) 291 | kv-map))))])) 292 | -------------------------------------------------------------------------------- /src/com/compass/hbase/client.clj: -------------------------------------------------------------------------------- 1 | (ns com.compass.hbase.client 2 | (:refer-clojure :exclude [get]) 3 | (:use com.compass.hbase.schema) 4 | (:require [com.compass.hbase.filters :as f]) 5 | (:import org.apache.hadoop.hbase.util.Bytes 6 | org.apache.hadoop.hbase.HBaseConfiguration 7 | org.apache.hadoop.conf.Configuration 8 | [org.apache.hadoop.hbase.client HTablePool HTable HTablePool$PooledHTable 9 | Get Put Increment Delete Scan HConnectionManager] 10 | [java.util.concurrent ThreadPoolExecutor ArrayBlockingQueue TimeUnit])) 11 | 12 | ;; ==================================== 13 | ;; Connections 14 | ;; ==================================== 15 | 16 | (def ^:dynamic *configuration* (HBaseConfiguration/create)) 17 | 18 | (defn- get-connection [] 19 | (HConnectionManager/getConnection *configuration*)) 20 | 21 | ;; ==================================== 22 | ;; Tables 23 | ;; ==================================== 24 | 25 | (def db 26 | "This holds the HTablePool reference for all users. Users never have to see 27 | this, and the HBase API does not appear to me to allow configuration in code 28 | nor the use of multiple databases simultaneously (configuration is driven by 29 | the XML config files). So we just hide this detail from the user.)" 30 | (atom nil)) 31 | 32 | (defn table-pool [] 33 | (if-let [pool @db] 34 | pool 35 | (swap! db (fn [_] (HTablePool.))))) 36 | 37 | (defn table 38 | "Gets an HTable from the open HTablePool by name." 39 | [table-name] 40 | (io! 41 | (.getTable (table-pool) (encode-value table-name :string)))) 42 | 43 | (defn as-table [ref] 44 | (if (or (= (type ref) HTable) 45 | (= (type ref) HTablePool$PooledHTable)) 46 | ref 47 | (table (name ref)))) 48 | 49 | (defn- release-table [table] 50 | (io! (.putTable (table-pool) table))) 51 | 52 | (defmacro with-table [[var expr] & body] 53 | `(let [~var (as-table ~expr)] 54 | (let [result# (do ~@body)] 55 | (release-table ~var) 56 | result#))) 57 | 58 | (defn- table-schema [table] 59 | (with-table [table table] 60 | (get-schema (decode-value (.getTableName table) :string)))) 61 | 62 | 63 | ;; ================================== 64 | ;; SINGLE ROW GET OPERATIONS 65 | ;; ================================== 66 | 67 | (defn make-get 68 | ([table schema row constraints] 69 | (if constraints 70 | (-> (Get. (encode-row schema row)) 71 | (f/constrain-op schema constraints)) 72 | (Get. (encode-row schema row))))) 73 | 74 | (defn get 75 | "Get primitive row elements" 76 | ([table row constraints] 77 | (with-table [table table] 78 | (let [schema (table-schema table) 79 | g (make-get table schema row constraints)] 80 | (io! (if (:all-versions constraints) 81 | (decode-all schema (.get table g)) 82 | (decode-latest schema (.get table g))))))) 83 | ([table row] 84 | (get table row nil))) 85 | 86 | ;; ================================== 87 | ;; SINGLE ROW PUT OPERATIONS 88 | ;; ================================== 89 | 90 | (defn- put-add [#^Put put schema family col value] 91 | (.add put 92 | (encode-family schema family) 93 | (encode-column schema family col) 94 | (encode-cell schema family col value))) 95 | 96 | (defn make-put [schema row values] 97 | ;; (println "Encoding row: " row " of type: " (type row) " for schema row type: " 98 | ;; (row-type schema)) 99 | ;; (println "row " row) 100 | ;; (println "values " values) 101 | ;; (println "row type" (row-type schema)) 102 | (let [rowbytes (encode-row schema row) 103 | put (new Put rowbytes)] 104 | (if (map? values) 105 | (doseq [[family cols] values] 106 | (doseq [[col value] cols] 107 | (put-add put schema family col value))) 108 | (doseq [[family col value] values] 109 | (put-add put schema family col value))) 110 | put)) 111 | 112 | (defn put 113 | "Put data into a row using a value map or a vector sequence: 114 | of vectors. Value maps are {:family {:column value :column value}} 115 | and vector inputs are [[family column value] [family column value]] 116 | No options are currently supported, but are maintained for future 117 | improvements." 118 | [table row values & opts] 119 | (with-table [table table] 120 | (let [schema (table-schema table) ;; NOTE: or from schema argument 121 | p (make-put schema row values)] 122 | (io! (.put table p))))) 123 | 124 | (defn put-one [table row family column value] 125 | (put table row [[family column value]])) 126 | 127 | ;; ========================================= 128 | ;; SINGLE ROW DELETES 129 | ;; ========================================= 130 | 131 | (defn make-del [schema row] 132 | (Delete. (encode-row schema row))) 133 | 134 | (defn add-del-family [del schema family] 135 | (doto del 136 | (.deleteFamily (encode-family schema family)))) 137 | 138 | (defn add-del-column [del schema family column] 139 | (doto del 140 | (.deleteColumns (encode-family schema family) 141 | (encode-column schema family column)))) 142 | 143 | (defn do-del [table del] 144 | (with-table [table table] 145 | (io! (.delete table del)))) 146 | 147 | (defn del 148 | "Directly delete all or part of a row (all versions)" 149 | ([table row] 150 | (with-table [table table] 151 | (let [schema (table-schema table)] 152 | (do-del table (make-del schema row))))) 153 | ([table row family] 154 | (with-table [table table] 155 | (let [schema (table-schema table)] 156 | (do-del table (-> (make-del schema row) 157 | (add-del-family schema family)))))) 158 | ([table row family column] 159 | (with-table [table table] 160 | (let [schema (table-schema table)] 161 | (do-del table (-> (make-del schema row) 162 | (add-del-column schema family column))))))) 163 | 164 | ;; ========================================= 165 | ;; Increment operation 166 | 167 | ;; (c/define-schema :items [:defaults [:keyword :long] 168 | ;; :row-type :integer] 169 | ;; :counters [:keyword :long]) 170 | ;; (client/increment :items 100 {:counters {:downvote -2 upvote: 1}}) 171 | ;; ========================================= 172 | 173 | (defn- increment-add [#^Increment increment schema family col value] 174 | (.addColumn increment 175 | (encode-family schema family) 176 | (encode-column schema family col) 177 | (long value))) 178 | 179 | (defn make-increment [schema row values] 180 | (let [rowbytes (encode-row schema row) 181 | increment (new Increment rowbytes)] 182 | (if (map? values) 183 | (doseq [[family cols] values] 184 | (doseq [[col value] cols] 185 | (increment-add increment schema family col value))) 186 | (doseq [[family col value] values] 187 | (increment-add increment schema family col value))) 188 | increment)) 189 | 190 | (defn increment 191 | "Increment data into a row using a value map or a vector sequence: 192 | of vectors. Value maps are {:family {:column value :column value}} 193 | and vector in increment are [[family column value] [family column value]]" 194 | ([table row values constraints] 195 | (with-table [table table] 196 | (let [schema (table-schema table) 197 | i (make-increment schema row values)] 198 | (io! (if (:all-versions constraints) 199 | (decode-all schema (.increment table i)) 200 | (decode-latest schema (.increment table i))))))) 201 | ([table row values] 202 | (increment table row values nil))) 203 | 204 | (defn increment-one [table row family column value] 205 | (increment table row [[family column value]])) 206 | 207 | ;; ========================================= 208 | ;; Multi Row Get / Put operations 209 | ;; ========================================= 210 | 211 | (def multi-action-executor (atom nil)) 212 | 213 | (defn- get-action-executor [] 214 | (if-let [exec @multi-action-executor] 215 | exec 216 | (let [start-pool 10 217 | max-pool 20 218 | keepalive 60 219 | queue (ArrayBlockingQueue. 40)] 220 | (swap! multi-action-executor 221 | (fn [a e] e) 222 | (ThreadPoolExecutor. start-pool max-pool keepalive 223 | TimeUnit/SECONDS queue))))) 224 | 225 | (defn process-batch 226 | "Low level execution of batch commands" 227 | [table actions] 228 | (let [results (make-array java.lang.Object (count actions))] 229 | (let [exec (get-action-executor)] 230 | (println (format "Executor: complete:%s active:%s (core:%s cursize:%s qsize:%s)" 231 | (.getCompletedTaskCount exec) (.getActiveCount exec) 232 | (.getCorePoolSize exec) (.getPoolSize exec) 233 | (.size (.getQueue exec)))) 234 | (io! (.processBatch (get-connection) 235 | actions 236 | (encode-value table :string) 237 | exec 238 | results))) 239 | results)) 240 | 241 | (defn make-puts 242 | "Like make put, but accepts vectors of arguments to " 243 | [schema records] 244 | (doall 245 | (map #(make-put schema (first %) (second %)) 246 | records))) 247 | 248 | (defn put-multi 249 | "Give a table reference and a sequence of value vectors of the form 250 | [[row ] [row ]] where = {:family {:col } ...} | 251 | [[family col value] [family col value] ...]. Performs a single 252 | batch of actions using a fixed thread pool." 253 | [table records] 254 | (let [schema (table-schema table) 255 | puts (make-puts schema records)] 256 | (map (partial decode-latest schema) 257 | (process-batch table puts)))) 258 | 259 | (defn make-gets 260 | ([table schema records] 261 | (map (fn [rec] 262 | (if (sequential? rec) 263 | (make-get table schema (first rec) (second rec)) 264 | (make-get table schema rec nil))) 265 | records)) 266 | ([table schema records constraints] 267 | (map (fn [rec] 268 | (make-get table schema rec constraints)) 269 | records))) 270 | 271 | (defn get-multi 272 | "Similar to put multi, except the input records are [[row ] ...] 273 | where is a flat list of keyvalue pairs suitable to pass to make-get" 274 | ([table records] 275 | (let [schema (table-schema table) 276 | gets (make-gets table schema records)] 277 | (doall 278 | (map (partial decode-latest schema) 279 | (process-batch table gets))))) 280 | ([table records common-constraints] 281 | (let [schema (table-schema table) 282 | gets (make-gets table schema records common-constraints)] 283 | (doall 284 | (map (partial decode-latest schema) 285 | (process-batch table gets)))))) 286 | 287 | 288 | ;; ================================== 289 | ;; Scanning 290 | ;; ================================== 291 | 292 | ;; Scan ranges of tables 293 | ;; 1) Map over a set of elements 294 | ;; 2) Return all the elements 295 | ;; 3) Procedural filtering elements 296 | 297 | (def cache-block-size 100) 298 | 299 | (defn make-scan [schema constraints] 300 | (let [scan (Scan.)] 301 | (.setCaching scan cache-block-size) 302 | (f/constrain-op scan schema constraints))) 303 | 304 | (defn scan 305 | "Apply filter/processing function fn to all entries in the 306 | table as constrained by the optional filter object. fn receives 307 | the decoded row and a family map of values for that row" 308 | ([fn table constraints all?] 309 | (with-table [table table] 310 | (let [schema (table-schema table) 311 | scan (make-scan schema constraints) 312 | scanner (io! (.getScanner table scan)) 313 | decoder (if all? decode-all decode-latest) 314 | results (doall 315 | (keep #(apply fn (decoder schema %)) 316 | scanner))] 317 | (.close scanner) 318 | results))) 319 | ([fn table constraints] 320 | (scan fn table constraints nil)) 321 | ([fn table] 322 | (scan fn table (f/constraints) nil))) 323 | 324 | (defn do-scan 325 | "Do scan will run a function over the returned results without 326 | collecting them (presumably for side effects)" 327 | ([fn table constraints] 328 | (with-table [table table] 329 | (let [schema (table-schema table) 330 | scan (make-scan schema constraints) 331 | scanner (io! (.getScanner table scan))] 332 | (doseq [result scanner] 333 | (apply fn (decode-latest schema result))) 334 | (.close scanner) 335 | nil)))) 336 | 337 | 338 | (defn raw-scan 339 | "This function collects the scan results without decoding 340 | The function can filter results by returning nil" 341 | ([fn table constraints] 342 | (with-table [table table] 343 | (let [schema (table-schema table) 344 | scan (make-scan schema constraints) 345 | scanner (io! (.getScanner table scan)) 346 | results (doall 347 | (keep fn scanner))] 348 | (.close scanner) 349 | results)))) 350 | 351 | ;; All de-referencing actions must take 352 | ;; place within a scanner context 353 | ;; (defn record-scanner 354 | ;; "Like duck read-lines; a lazy sequence that reads hbase records 355 | ;; until it reaches the end and closes the scanner" 356 | ;; [table & filter] 357 | ;; (let [read-record (fn this [^HTable$ClientScanner scan] 358 | ;; (lazy-seq 359 | ;; (if-let [record (.next scan)] 360 | ;; (cons (translate-result record) (this scan)) 361 | ;; (.close scan))))] 362 | ;; (read-record 363 | ;; (hbase/scanner (as-table table) 364 | ;; (make-scan filter))))) 365 | 366 | 367 | --------------------------------------------------------------------------------