├── lib
└── cassandra-composite-type-0.0.1.jar
├── .gitignore
├── src
├── test
│ ├── resources
│ │ ├── log4j.properties
│ │ └── cassandra.yaml
│ └── java
│ │ └── indexedcollections
│ │ └── IndexTest.java
└── main
│ └── java
│ └── indexedcollections
│ └── IndexedCollections.java
├── README
└── pom.xml
/lib/cassandra-composite-type-0.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edanuff/CassandraIndexedCollections/HEAD/lib/cassandra-composite-type-0.0.1.jar
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea
2 | *.DS_Store
3 | hector.iml
4 | releases
5 | target
6 | tmp
7 | bin
8 | .classpath
9 | .project
10 | .settings
11 | out
12 | *.svn
13 | *.ipr
14 | *.iws
15 | DS_Store
16 |
17 | /.DS_Store
--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
1 | # Licensed to the Apache Software Foundation (ASF) under one
2 | # or more contributor license agreements. See the NOTICE file
3 | # distributed with this work for additional information
4 | # regarding copyright ownership. The ASF licenses this file
5 | # to you under the Apache License, Version 2.0 (the
6 | # "License"); you may not use this file except in compliance
7 | # with the License. You may obtain a copy of the License at
8 | #
9 | # http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 |
17 | # for production, you should probably set the root to INFO
18 | # and the pattern to %c instead of %l. (%l is slower.)
19 |
20 | # output messages into a rolling log file as well as stdout
21 | log4j.rootLogger=INFO,stdout
22 |
23 | # stdout
24 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
25 | #log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout
26 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
27 | log4j.appender.stdout.layout.ConversionPattern=%d %p (%t) [%c] - %m%n
28 |
29 | log4j.category.org.apache=ERROR, stdout
30 |
--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
1 | Indexed Collections for Cassandra
2 |
3 | NOTE: This has been updated to use the new built-in composite types in Cassandra 0.8.1
4 |
5 | You will need to use Hector 0.8.0-2-SNAPSHOT or later and Cassandra 0.8.1 or later.
6 |
7 | This is an implementation of the indexing technique described here:
8 |
9 | http://www.anuff.com/2010/07/secondary-indexes-in-cassandra.html
10 |
11 | The original article describes the use of a custom composite column comparator. A
12 | version of this comparator has recently been added to the latest verion of Cassandra,
13 | meaning that it's no longer necessary to install anthing on the Cassandra instance
14 | to handle composite types.
15 |
16 | This is a simplified version of a more complex indexing scheme thats been in used
17 | for some time now as part of a large project. However, this simplified implementation was
18 | largely created from scratch and hasn't been extensively tested.
19 |
20 | This indexing, as opposed to Cassandra's built-in secondary indexes, is completely
21 | dynamic. It's possible to create new indexes at any time and the index value types don't
22 | have to be predefined, making use with JSON data easier.
23 |
24 | One advantage of this indexing technique is that it combines relationships with mini-indexes
25 | so that you can have a user who's friends with 5 other users and then be able to search the
26 | user's friend list. In this example, the user is what is referred to in the
27 | code as the "container" and the other users are "items".
28 |
29 | For examples of use, look at the class IndextText.
30 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | cassandra-indexed-collections
5 | cassandra-indexed-collections
6 | 0.0.1
7 | CassandraIndexedCollections
8 |
9 |
10 |
11 | org.apache.maven.plugins
12 | maven-surefire-plugin
13 | 2.6
14 |
15 |
16 |
17 | ${basedir}/src/test/conf
18 |
19 | always
20 | -Xmx512M -Xms512M
21 |
22 |
23 |
24 | org.apache.maven.plugins
25 | maven-compiler-plugin
26 | 2.3.2
27 |
28 | 1.6
29 | 1.6
30 | true
31 | true
32 | true
33 | true
34 |
35 |
36 |
37 | org.apache.maven.plugins
38 | maven-install-plugin
39 | 2.3.1
40 |
41 |
42 | install cassandra-composite-types
43 | initialize
44 |
45 | install-file
46 |
47 |
48 | jar
49 | cassandra-composite-type
50 | cassandra-composite-type
51 | 0.0.1
52 | ${basedir}/lib/cassandra-composite-type-0.0.1.jar
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 | org.apache.cassandra
62 | cassandra-all
63 | 0.8.1
64 | jar
65 |
66 |
67 | org.apache.cassandra
68 | cassandra-javautils
69 | 0.7.0
70 | test
71 |
72 |
73 | me.prettyprint
74 | hector-core
75 | 0.8.0-2-SNAPSHOT
76 |
77 |
78 | com.github.stephenc.eaio-uuid
79 | uuid
80 | 3.2.0
81 |
82 |
83 | junit
84 | junit
85 | 4.8.1
86 | test
87 |
88 |
89 | log4j
90 | log4j
91 | 1.2.14
92 |
93 |
94 | org.slf4j
95 | slf4j-api
96 | 1.6.1
97 |
98 |
99 | org.slf4j
100 | slf4j-log4j12
101 | 1.6.1
102 |
103 |
104 | org.perf4j
105 | perf4j
106 | 0.9.12
107 |
108 |
109 |
110 |
111 | codehaus
112 | codehaus
113 | http://repository.codehaus.org/
114 |
115 |
116 | nexus-snapshots
117 | Sonatype Nexus Snapshots
118 | http://oss.sonatype.org/content/repositories/snapshots
119 |
120 | true
121 |
122 |
123 |
124 |
--------------------------------------------------------------------------------
/src/test/java/indexedcollections/IndexTest.java:
--------------------------------------------------------------------------------
1 | package indexedcollections;
2 |
3 | import static me.prettyprint.hector.api.beans.DynamicComposite.DEFAULT_DYNAMIC_COMPOSITE_ALIASES;
4 | import static me.prettyprint.hector.api.ddl.ComparatorType.DYNAMICCOMPOSITETYPE;
5 | import static me.prettyprint.hector.api.factory.HFactory.createColumn;
6 | import static me.prettyprint.hector.api.factory.HFactory.createKeyspace;
7 | import static me.prettyprint.hector.api.factory.HFactory.createMutator;
8 | import static me.prettyprint.hector.api.factory.HFactory.getOrCreateCluster;
9 | import static org.junit.Assert.assertEquals;
10 | import static org.junit.Assert.assertTrue;
11 | import indexedcollections.IndexedCollections.ContainerCollection;
12 |
13 | import java.io.IOException;
14 | import java.util.ArrayList;
15 | import java.util.LinkedHashSet;
16 | import java.util.List;
17 | import java.util.Set;
18 | import java.util.UUID;
19 |
20 | import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
21 | import me.prettyprint.cassandra.serializers.BytesArraySerializer;
22 | import me.prettyprint.cassandra.serializers.DynamicCompositeSerializer;
23 | import me.prettyprint.cassandra.serializers.LongSerializer;
24 | import me.prettyprint.cassandra.serializers.StringSerializer;
25 | import me.prettyprint.cassandra.serializers.UUIDSerializer;
26 | import me.prettyprint.cassandra.service.ThriftCfDef;
27 | import me.prettyprint.cassandra.service.ThriftKsDef;
28 | import me.prettyprint.cassandra.testutils.EmbeddedServerHelper;
29 | import me.prettyprint.hector.api.Cluster;
30 | import me.prettyprint.hector.api.Keyspace;
31 | import me.prettyprint.hector.api.Serializer;
32 |
33 | import org.apache.cassandra.config.ConfigurationException;
34 | import org.apache.cassandra.db.marshal.BytesType;
35 | import org.apache.cassandra.db.marshal.TimeUUIDType;
36 | import org.apache.cassandra.thrift.CfDef;
37 | import org.apache.cassandra.thrift.KsDef;
38 | import org.apache.log4j.Logger;
39 | import org.apache.thrift.transport.TTransportException;
40 | import org.junit.AfterClass;
41 | import org.junit.BeforeClass;
42 | import org.junit.Test;
43 |
44 | /**
45 | * Example class showing usage of IndexedCollections.
46 | */
47 | public class IndexTest {
48 |
49 | private static final Logger logger = Logger.getLogger(IndexTest.class
50 | .getName());
51 |
52 | public static final String KEYSPACE = "Keyspace";
53 |
54 | public static final StringSerializer se = new StringSerializer();
55 | public static final ByteBufferSerializer be = new ByteBufferSerializer();
56 | public static final DynamicCompositeSerializer ce = new DynamicCompositeSerializer();
57 | public static final UUIDSerializer ue = new UUIDSerializer();
58 | public static final LongSerializer le = new LongSerializer();
59 | public static final BytesArraySerializer bae = new BytesArraySerializer();
60 |
61 | static EmbeddedServerHelper embedded;
62 |
63 | static Cluster cluster;
64 | static Keyspace ko;
65 |
66 | @Test
67 | public void testIndexes() throws IOException, TTransportException,
68 | InterruptedException, ConfigurationException {
69 |
70 | // Create a container entity
71 |
72 | UUID g1 = createEntity("company");
73 |
74 | ContainerCollection container = new ContainerCollection(g1,
75 | "employees");
76 | Set> containers = new LinkedHashSet>();
77 | containers.add(container);
78 |
79 | // Create a set of items to add to the container
80 |
81 | UUID e1 = createEntity("employee");
82 | UUID e2 = createEntity("employee");
83 | UUID e3 = createEntity("employee");
84 |
85 | // Create container/item relationship
86 |
87 | addEntityToCollection(container, e1);
88 | addEntityToCollection(container, e2);
89 | addEntityToCollection(container, e3);
90 |
91 | // Check the entities in the container
92 |
93 | List entities = getEntitiesInCollection(container);
94 | assertEquals(3, entities.size());
95 |
96 | // Set name column values
97 |
98 | setEntityColumn(e1, "name", "bob", containers, se);
99 |
100 | setEntityColumn(e2, "name", "fred", containers, se);
101 |
102 | setEntityColumn(e3, "name", "bill", containers, se);
103 |
104 | // Do an exact match search for name column
105 |
106 | logger.info("SELECT WHERE name = 'fred'");
107 |
108 | List results = searchContainer(container, "name", "fred");
109 |
110 | logger.info(results.size() + " results found");
111 |
112 | assertEquals(1, results.size());
113 | assertTrue(results.get(0).equals(e2));
114 |
115 | logger.info("Result found is " + results.get(0));
116 |
117 | // Change the value of a name column and make sure the old value is no
118 | // longer in the index
119 |
120 | setEntityColumn(e2, "name", "steve", containers, se);
121 |
122 | logger.info("SELECT WHERE name = 'fred'");
123 |
124 | results = searchContainer(container, "name", "fred");
125 |
126 | logger.info(results.size() + " results found");
127 |
128 | assertEquals(0, results.size());
129 |
130 | // Do a range search
131 |
132 | logger.info("SELECT WHERE name >= 'bill' AND name < 'c'");
133 |
134 | results = searchContainer(container, "name", "bill", "c", false);
135 |
136 | logger.info(results.size() + " results found");
137 |
138 | assertEquals(2, results.size());
139 |
140 | // Set column values for height
141 |
142 | setEntityColumn(e1, "height", (long) 5, containers, le);
143 |
144 | setEntityColumn(e2, "height", (long) 6, containers, le);
145 |
146 | setEntityColumn(e3, "height", (long) 7, containers, le);
147 |
148 | // Do an numeric exact match search for height
149 |
150 | logger.info("SELECT WHERE height = 6");
151 |
152 | results = searchContainer(container, "height", 6);
153 |
154 | logger.info(results.size() + " results found");
155 |
156 | assertEquals(1, results.size());
157 |
158 | // Do a numeric range search for height
159 |
160 | logger.info("SELECT WHERE height >= 6 AND name < 10");
161 |
162 | results = searchContainer(container, "height", 6, 10, false);
163 |
164 | logger.info(results.size() + " results found");
165 |
166 | assertEquals(2, results.size());
167 |
168 | // Change a numeric column value and make sure it's no longer in the
169 | // index
170 |
171 | setEntityColumn(e3, "height", (long) 5, containers, le);
172 |
173 | results = searchContainer(container, "height", 6, 10, false);
174 |
175 | logger.info(results.size() + " results found");
176 |
177 | assertEquals(1, results.size());
178 |
179 | // Set byte values in columns
180 |
181 | setEntityColumn(e1, "bytes", new byte[] { 1, 2, 3 }, containers, bae);
182 |
183 | setEntityColumn(e2, "bytes", new byte[] { 1, 2, 4 }, containers, bae);
184 |
185 | setEntityColumn(e3, "bytes", new byte[] { 1, 2, 5 }, containers, bae);
186 |
187 | // Do a byte array exact match search
188 |
189 | results = searchContainer(container, "bytes", new byte[] { 1, 2, 4 });
190 |
191 | logger.info(results.size() + " results found");
192 |
193 | assertEquals(1, results.size());
194 |
195 | // Do a byte array range search
196 |
197 | results = searchContainer(container, "bytes", new byte[] { 1, 2, 4 },
198 | new byte[] { 10 }, false);
199 |
200 | logger.info(results.size() + " results found");
201 |
202 | assertEquals(2, results.size());
203 |
204 | // Store some text columns
205 |
206 | setEntityColumn(e1, "location", "san francisco", containers, se);
207 |
208 | setEntityColumn(e2, "location", "san diego", containers, se);
209 |
210 | setEntityColumn(e3, "location", "santa clara", containers, se);
211 |
212 | // Do a range search exclusive on the same value for start and end and
213 | // make sure we get 0 results
214 |
215 | results = searchContainer(container, "location", "san francisco",
216 | "san francisco", false);
217 |
218 | logger.info(results.size() + " results found");
219 |
220 | assertEquals(0, results.size());
221 |
222 | // Do a range search inclusive on the same value for start and end and
223 | // make sure we get 1 result
224 |
225 | results = searchContainer(container, "location", "san francisco",
226 | "san francisco", true);
227 |
228 | logger.info(results.size() + " results found");
229 |
230 | assertEquals(1, results.size());
231 |
232 | }
233 |
234 | @BeforeClass
235 | public static void setup() throws TTransportException, IOException,
236 | InterruptedException, ConfigurationException {
237 | embedded = new EmbeddedServerHelper();
238 | embedded.setup();
239 |
240 | cluster = getOrCreateCluster("MyCluster", "127.0.0.1:9170");
241 | ko = createKeyspace(KEYSPACE, cluster);
242 |
243 | ArrayList cfDefList = new ArrayList(2);
244 |
245 | setupColumnFamilies(cfDefList);
246 |
247 | makeKeyspace(cluster, KEYSPACE,
248 | "org.apache.cassandra.locator.SimpleStrategy", 1, cfDefList);
249 |
250 | }
251 |
252 | @AfterClass
253 | public static void teardown() throws IOException {
254 | EmbeddedServerHelper.teardown();
255 | embedded = null;
256 | }
257 |
258 | /**
259 | * Create the four required column families for values and indexes.
260 | *
261 | * @param cfDefList
262 | */
263 | public static void setupColumnFamilies(List cfDefList) {
264 |
265 | createCF(IndexedCollections.DEFAULT_ITEM_CF,
266 | BytesType.class.getSimpleName(), cfDefList);
267 |
268 | createCF(IndexedCollections.DEFAULT_COLLECTION_CF,
269 | TimeUUIDType.class.getSimpleName(), cfDefList);
270 |
271 | createCF(IndexedCollections.DEFAULT_COLLECTION_INDEX_CF,
272 | DYNAMICCOMPOSITETYPE.getTypeName()
273 | + DEFAULT_DYNAMIC_COMPOSITE_ALIASES, cfDefList);
274 |
275 | createCF(IndexedCollections.DEFAULT_ITEM_INDEX_ENTRIES,
276 | DYNAMICCOMPOSITETYPE.getTypeName()
277 | + DEFAULT_DYNAMIC_COMPOSITE_ALIASES, cfDefList);
278 |
279 | }
280 |
281 | public static void createCF(String name, String comparator_type,
282 | List cfDefList) {
283 | cfDefList.add(new CfDef(KEYSPACE, name)
284 | .setComparator_type(comparator_type).setKey_cache_size(0)
285 | .setRow_cache_size(0).setGc_grace_seconds(86400));
286 | }
287 |
288 | public static void makeKeyspace(Cluster cluster, String name,
289 | String strategy, int replicationFactor, List cfDefList) {
290 |
291 | if (cfDefList == null) {
292 | cfDefList = new ArrayList();
293 | }
294 |
295 | try {
296 | KsDef ksDef = new KsDef(name, strategy, cfDefList);
297 | cluster.addKeyspace(new ThriftKsDef(ksDef));
298 | return;
299 | } catch (Throwable e) {
300 | logger.error("Exception while creating keyspace, " + name
301 | + " - probably already exists", e);
302 | }
303 |
304 | for (CfDef cfDef : cfDefList) {
305 | try {
306 | cluster.addColumnFamily(new ThriftCfDef(cfDef));
307 | } catch (Throwable e) {
308 | logger.error("Exception while creating CF, " + cfDef.getName()
309 | + " - probably already exists", e);
310 | }
311 | }
312 | }
313 |
314 | public static java.util.UUID newTimeUUID() {
315 | com.eaio.uuid.UUID eaioUUID = new com.eaio.uuid.UUID();
316 | return new UUID(eaioUUID.time, eaioUUID.clockSeqAndNode);
317 | }
318 |
319 | /*
320 | * Convenience methods for wrapping IndexedCollections methods
321 | */
322 |
323 | public UUID createEntity(String type) {
324 | UUID id = newTimeUUID();
325 | createMutator(ko, ue).insert(id, IndexedCollections.DEFAULT_ITEM_CF,
326 | createColumn("type", type, se, se));
327 | return id;
328 | }
329 |
330 | public void addEntityToCollection(ContainerCollection container,
331 | UUID itemEntity) {
332 | IndexedCollections.addItemToCollection(ko, container, itemEntity,
333 | IndexedCollections.defaultCFSet, ue);
334 | }
335 |
336 | public List getEntitiesInCollection(
337 | ContainerCollection container) {
338 | return IndexedCollections.getItemsInCollection(ko, container,
339 | IndexedCollections.defaultCFSet, ue);
340 | }
341 |
342 | public static void setEntityColumn(UUID itemEntity, String columnName,
343 | V columnValue, Set> containers,
344 | Serializer valueSerializer) {
345 | IndexedCollections.setItemColumn(ko, itemEntity, columnName,
346 | columnValue, containers, IndexedCollections.defaultCFSet, ue,
347 | se, valueSerializer, ue);
348 | }
349 |
350 | public static List searchContainer(
351 | ContainerCollection container, String columnName,
352 | Object searchValue) {
353 |
354 | return IndexedCollections.searchContainer(ko, container, columnName,
355 | searchValue, null, 100, false, IndexedCollections.defaultCFSet,
356 | ue, ue, se);
357 | }
358 |
359 | public static List searchContainer(
360 | ContainerCollection container, String columnName,
361 | Object startValue, Object endValue, boolean inclusive) {
362 |
363 | return IndexedCollections.searchContainer(ko, container, columnName,
364 | startValue, endValue, inclusive, null, 100, false,
365 | IndexedCollections.defaultCFSet, ue, ue, se);
366 | }
367 |
368 | }
369 |
--------------------------------------------------------------------------------
/src/test/resources/cassandra.yaml:
--------------------------------------------------------------------------------
1 | # Cassandra storage config YAML
2 |
3 | # NOTE:
4 | # See http://wiki.apache.org/cassandra/StorageConfiguration for
5 | # full explanations of configuration directives
6 | # /NOTE
7 |
8 | # The name of the cluster. This is mainly used to prevent machines in
9 | # one logical cluster from joining another.
10 | cluster_name: 'Test Cluster'
11 |
12 | # You should always specify InitialToken when setting up a production
13 | # cluster for the first time, and often when adding capacity later.
14 | # The principle is that each node should be given an equal slice of
15 | # the token ring; see http://wiki.apache.org/cassandra/Operations
16 | # for more details.
17 | #
18 | # If blank, Cassandra will request a token bisecting the range of
19 | # the heaviest-loaded existing node. If there is no load information
20 | # available, such as is the case with a new cluster, it will pick
21 | # a random token, which will lead to hot spots.
22 | initial_token:
23 |
24 | # Set to true to make new [non-seed] nodes automatically migrate data
25 | # to themselves from the pre-existing nodes in the cluster. Defaults
26 | # to false because you can only bootstrap N machines at a time from
27 | # an existing cluster of N, so if you are bringing up a cluster of
28 | # 10 machines with 3 seeds you would have to do it in stages. Leaving
29 | # this off for the initial start simplifies that.
30 | auto_bootstrap: false
31 |
32 | # See http://wiki.apache.org/cassandra/HintedHandoff
33 | hinted_handoff_enabled: true
34 | # this defines the maximum amount of time a dead host will have hints
35 | # generated. After it has been dead this long, hints will be dropped.
36 | max_hint_window_in_ms: 3600000 # one hour
37 | # Sleep this long after delivering each row or row fragment
38 | hinted_handoff_throttle_delay_in_ms: 50
39 |
40 | # authentication backend, implementing IAuthenticator; used to identify users
41 | authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
42 |
43 | # authorization backend, implementing IAuthority; used to limit access/provide permissions
44 | authority: org.apache.cassandra.auth.AllowAllAuthority
45 |
46 | # The partitioner is responsible for distributing rows (by key) across
47 | # nodes in the cluster. Any IPartitioner may be used, including your
48 | # own as long as it is on the classpath. Out of the box, Cassandra
49 | # provides org.apache.cassandra.dht.RandomPartitioner
50 | # org.apache.cassandra.dht.ByteOrderedPartitioner,
51 | # org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated),
52 | # and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner
53 | # (deprecated).
54 | #
55 | # - RandomPartitioner distributes rows across the cluster evenly by md5.
56 | # When in doubt, this is the best option.
57 | # - ByteOrderedPartitioner orders rows lexically by key bytes. BOP allows
58 | # scanning rows in key order, but the ordering can generate hot spots
59 | # for sequential insertion workloads.
60 | # - OrderPreservingPartitioner is an obsolete form of BOP, that stores
61 | # - keys in a less-efficient format and only works with keys that are
62 | # UTF8-encoded Strings.
63 | # - CollatingOPP colates according to EN,US rules rather than lexical byte
64 | # ordering. Use this as an example if you need custom collation.
65 | #
66 | # See http://wiki.apache.org/cassandra/Operations for more on
67 | # partitioners and token selection.
68 | partitioner: org.apache.cassandra.dht.RandomPartitioner
69 |
70 | # directories where Cassandra should store data on disk.
71 | data_file_directories:
72 | - ./tmp/data
73 |
74 | # commit log
75 | commitlog_directory: ./tmp/commitlog
76 |
77 | # saved caches
78 | saved_caches_directory: ./tmp/saved_caches
79 |
80 | # Size to allow commitlog to grow to before creating a new segment
81 | commitlog_rotation_threshold_in_mb: 128
82 |
83 | # commitlog_sync may be either "periodic" or "batch."
84 | # When in batch mode, Cassandra won't ack writes until the commit log
85 | # has been fsynced to disk. It will wait up to
86 | # CommitLogSyncBatchWindowInMS milliseconds for other writes, before
87 | # performing the sync.
88 | commitlog_sync: periodic
89 |
90 | # the other option is "periodic" where writes may be acked immediately
91 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms
92 | # milliseconds.
93 | commitlog_sync_period_in_ms: 10000
94 |
95 | # any class that implements the SeedProvider interface and has a constructor that takes a Map of
96 | # parameters will do.
97 | seed_provider:
98 | # Addresses of hosts that are deemed contact points.
99 | # Cassandra nodes use this list of hosts to find each other and learn
100 | # the topology of the ring. You must change this if you are running
101 | # multiple nodes!
102 | - class_name: org.apache.cassandra.locator.SimpleSeedProvider
103 | parameters:
104 | # seeds is actually a comma-delimited list of addresses.
105 | - seeds: "127.0.0.1"
106 |
107 | # emergency pressure valve: each time heap usage after a full (CMS)
108 | # garbage collection is above this fraction of the max, Cassandra will
109 | # flush the largest memtables.
110 | #
111 | # Set to 1.0 to disable. Setting this lower than
112 | # CMSInitiatingOccupancyFraction is not likely to be useful.
113 | #
114 | # RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY:
115 | # it is most effective under light to moderate load, or read-heavy
116 | # workloads; under truly massive write load, it will often be too
117 | # little, too late.
118 | flush_largest_memtables_at: 0.75
119 |
120 | # emergency pressure valve #2: the first time heap usage after a full
121 | # (CMS) garbage collection is above this fraction of the max,
122 | # Cassandra will reduce cache maximum _capacity_ to the given fraction
123 | # of the current _size_. Should usually be set substantially above
124 | # flush_largest_memtables_at, since that will have less long-term
125 | # impact on the system.
126 | #
127 | # Set to 1.0 to disable. Setting this lower than
128 | # CMSInitiatingOccupancyFraction is not likely to be useful.
129 | reduce_cache_sizes_at: 0.85
130 | reduce_cache_capacity_to: 0.6
131 |
132 | # For workloads with more data than can fit in memory, Cassandra's
133 | # bottleneck will be reads that need to fetch data from
134 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
135 | # order to allow the operations to enqueue low enough in the stack
136 | # that the OS and drives can reorder them.
137 | #
138 | # On the other hand, since writes are almost never IO bound, the ideal
139 | # number of "concurrent_writes" is dependent on the number of cores in
140 | # your system; (8 * number_of_cores) is a good rule of thumb.
141 | concurrent_reads: 32
142 | concurrent_writes: 32
143 |
144 | # Total memory to use for memtables. Cassandra will flush the largest
145 | # memtable when this much memory is used. Prefer using this to
146 | # the older, per-ColumnFamily memtable flush thresholds.
147 | # If omitted, Cassandra will set it to 1/3 of the heap.
148 | # If set to 0, only the old flush thresholds are used.
149 | # memtable_total_space_in_mb: 2048
150 |
151 | # This sets the amount of memtable flush writer threads. These will
152 | # be blocked by disk io, and each one will hold a memtable in memory
153 | # while blocked. If you have a large heap and many data directories,
154 | # you can increase this value for better flush performance.
155 | # By default this will be set to the amount of data directories defined.
156 | #memtable_flush_writers: 1
157 |
158 | # the number of full memtables to allow pending flush, that is,
159 | # waiting for a writer thread. At a minimum, this should be set to
160 | # the maximum number of secondary indexes created on a single CF.
161 | memtable_flush_queue_size: 4
162 |
163 | # Buffer size to use when performing contiguous column slices.
164 | # Increase this to the size of the column slices you typically perform
165 | sliced_buffer_size_in_kb: 64
166 |
167 | # TCP port, for commands and data
168 | storage_port: 7000
169 |
170 | # Address to bind to and tell other Cassandra nodes to connect to. You
171 | # _must_ change this if you want multiple nodes to be able to
172 | # communicate!
173 | #
174 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This
175 | # will always do the Right Thing *if* the node is properly configured
176 | # (hostname, name resolution, etc), and the Right Thing is to use the
177 | # address associated with the hostname (it might not be).
178 | #
179 | # Setting this to 0.0.0.0 is always wrong.
180 | listen_address: localhost
181 |
182 | # The address to bind the Thrift RPC service to -- clients connect
183 | # here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
184 | # you want Thrift to listen on all interfaces.
185 | #
186 | # Leaving this blank has the same effect it does for ListenAddress,
187 | # (i.e. it will be based on the configured hostname of the node).
188 | rpc_address: localhost
189 | # port for Thrift to listen for clients on
190 | rpc_port: 9170
191 |
192 | # enable or disable keepalive on rpc connections
193 | rpc_keepalive: true
194 |
195 | # Cassandra uses thread-per-client for client RPC. This can
196 | # be expensive in memory used for thread stack for a large
197 | # enough number of clients. (Hence, connection pooling is
198 | # very, very strongly recommended.)
199 | #
200 | # Uncomment rpc_min|max|thread to set request pool size.
201 | # You would primarily set max as a safeguard against misbehaved
202 | # clients; if you do hit the max, Cassandra will block until
203 | # one disconnects before accepting more. The defaults are
204 | # min of 16 and max unlimited.
205 | #
206 | # rpc_min_threads: 16
207 | # rpc_max_threads: 2048
208 |
209 | # uncomment to set socket buffer sizes on rpc connections
210 | # rpc_send_buff_size_in_bytes:
211 | # rpc_recv_buff_size_in_bytes:
212 |
213 | # Frame size for thrift (maximum field length).
214 | # 0 disables TFramedTransport in favor of TSocket. This option
215 | # is deprecated; we strongly recommend using Framed mode.
216 | thrift_framed_transport_size_in_mb: 15
217 |
218 | # The max length of a thrift message, including all fields and
219 | # internal thrift overhead.
220 | thrift_max_message_length_in_mb: 16
221 |
222 | # Set to true to have Cassandra create a hard link to each sstable
223 | # flushed or streamed locally in a backups/ subdirectory of the
224 | # Keyspace data. Removing these links is the operator's
225 | # responsibility.
226 | incremental_backups: false
227 |
228 | # Whether or not to take a snapshot before each compaction. Be
229 | # careful using this option, since Cassandra won't clean up the
230 | # snapshots for you. Mostly useful if you're paranoid when there
231 | # is a data format change.
232 | snapshot_before_compaction: false
233 |
234 | # change this to increase the compaction thread's priority. In java, 1 is the
235 | # lowest priority and that is our default.
236 | # compaction_thread_priority: 1
237 |
238 | # Add column indexes to a row after its contents reach this size.
239 | # Increase if your column values are large, or if you have a very large
240 | # number of columns. The competing causes are, Cassandra has to
241 | # deserialize this much of the row to read a single column, so you want
242 | # it to be small - at least if you do many partial-row reads - but all
243 | # the index data is read for each access, so you don't want to generate
244 | # that wastefully either.
245 | column_index_size_in_kb: 64
246 |
247 | # Size limit for rows being compacted in memory. Larger rows will spill
248 | # over to disk and use a slower two-pass compaction process. A message
249 | # will be logged specifying the row key.
250 | in_memory_compaction_limit_in_mb: 64
251 |
252 | # Number of compaction threads. This default to the number of processors,
253 | # enabling multiple compactions to execute at once. Using more than one
254 | # thread is highly recommended to preserve read performance in a mixed
255 | # read/write workload as this avoids sstables from accumulating during long
256 | # running compactions. The default is usually fine and if you experience
257 | # problems with compaction running too slowly or too fast, you should look at
258 | # compaction_throughput_mb_per_sec first.
259 | # Uncomment to make compaction mono-threaded.
260 | #concurrent_compactors: 1
261 |
262 | # Throttles compaction to the given total throughput across the entire
263 | # system. The faster you insert data, the faster you need to compact in
264 | # order to keep the sstable count down, but in general, setting this to
265 | # 16 to 32 times the rate you are inserting data is more than sufficient.
266 | # Setting this to 0 disables throttling.
267 | compaction_throughput_mb_per_sec: 16
268 |
269 | # Track cached row keys during compaction, and re-cache their new
270 | # positions in the compacted sstable. Disable if you use really large
271 | # key caches.
272 | compaction_preheat_key_cache: true
273 |
274 | # Time to wait for a reply from other nodes before failing the command
275 | rpc_timeout_in_ms: 10000
276 |
277 | # phi value that must be reached for a host to be marked down.
278 | # most users should never need to adjust this.
279 | # phi_convict_threshold: 8
280 |
281 | # endpoint_snitch -- Set this to a class that implements
282 | # IEndpointSnitch, which will let Cassandra know enough
283 | # about your network topology to route requests efficiently.
284 | # Out of the box, Cassandra provides
285 | # - org.apache.cassandra.locator.SimpleSnitch:
286 | # Treats Strategy order as proximity. This improves cache locality
287 | # when disabling read repair, which can further improve throughput.
288 | # - org.apache.cassandra.locator.RackInferringSnitch:
289 | # Proximity is determined by rack and data center, which are
290 | # assumed to correspond to the 3rd and 2nd octet of each node's
291 | # IP address, respectively
292 | # org.apache.cassandra.locator.PropertyFileSnitch:
293 | # - Proximity is determined by rack and data center, which are
294 | # explicitly configured in cassandra-topology.properties.
295 | endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
296 |
297 | # dynamic_snitch -- This boolean controls whether the above snitch is
298 | # wrapped with a dynamic snitch, which will monitor read latencies
299 | # and avoid reading from hosts that have slowed (due to compaction,
300 | # for instance)
301 | dynamic_snitch: true
302 | # controls how often to perform the more expensive part of host score
303 | # calculation
304 | dynamic_snitch_update_interval_in_ms: 100
305 | # controls how often to reset all host scores, allowing a bad host to
306 | # possibly recover
307 | dynamic_snitch_reset_interval_in_ms: 600000
308 | # if set greater than zero and read_repair_chance is < 1.0, this will allow
309 | # 'pinning' of replicas to hosts in order to increase cache capacity.
310 | # The badness threshold will control how much worse the pinned host has to be
311 | # before the dynamic snitch will prefer other replicas over it. This is
312 | # expressed as a double which represents a percentage. Thus, a value of
313 | # 0.2 means Cassandra would continue to prefer the static snitch values
314 | # until the pinned host was 20% worse than the fastest.
315 | dynamic_snitch_badness_threshold: 0.0
316 |
317 | # request_scheduler -- Set this to a class that implements
318 | # RequestScheduler, which will schedule incoming client requests
319 | # according to the specific policy. This is useful for multi-tenancy
320 | # with a single Cassandra cluster.
321 | # NOTE: This is specifically for requests from the client and does
322 | # not affect inter node communication.
323 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
324 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
325 | # client requests to a node with a separate queue for each
326 | # request_scheduler_id. The scheduler is further customized by
327 | # request_scheduler_options as described below.
328 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler
329 |
330 | # Scheduler Options vary based on the type of scheduler
331 | # NoScheduler - Has no options
332 | # RoundRobin
333 | # - throttle_limit -- The throttle_limit is the number of in-flight
334 | # requests per client. Requests beyond
335 | # that limit are queued up until
336 | # running requests can complete.
337 | # The value of 80 here is twice the number of
338 | # concurrent_reads + concurrent_writes.
339 | # - default_weight -- default_weight is optional and allows for
340 | # overriding the default which is 1.
341 | # - weights -- Weights are optional and will default to 1 or the
342 | # overridden default_weight. The weight translates into how
343 | # many requests are handled during each turn of the
344 | # RoundRobin, based on the scheduler id.
345 | #
346 | # request_scheduler_options:
347 | # throttle_limit: 80
348 | # default_weight: 5
349 | # weights:
350 | # Keyspace1: 1
351 | # Keyspace2: 5
352 |
353 | # request_scheduler_id -- An identifer based on which to perform
354 | # the request scheduling. Currently the only valid option is keyspace.
355 | # request_scheduler_id: keyspace
356 |
357 | # The Index Interval determines how large the sampling of row keys
358 | # is for a given SSTable. The larger the sampling, the more effective
359 | # the index is at the cost of space.
360 | index_interval: 128
361 |
362 | # Enable or disable inter-node encryption
363 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
364 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
365 | # suite for authentication, key exchange and encryption of the actual data transfers.
366 | # NOTE: No custom encryption options are enabled at the moment
367 | # The available internode options are : all, none
368 | #
369 | # The passwords used in these options must match the passwords used when generating
370 | # the keystore and truststore. For instructions on generating these files, see:
371 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
372 | encryption_options:
373 | internode_encryption: none
374 | keystore: conf/.keystore
375 | keystore_password: cassandra
376 | truststore: conf/.truststore
377 | truststore_password: cassandra
378 |
--------------------------------------------------------------------------------
/src/main/java/indexedcollections/IndexedCollections.java:
--------------------------------------------------------------------------------
1 | package indexedcollections;
2 |
3 | /*
4 | *
5 | * Licensed to the Apache Software Foundation (ASF) under one
6 | * or more contributor license agreements. See the NOTICE file
7 | * distributed with this work for additional information
8 | * regarding copyright ownership. The ASF licenses this file
9 | * to you under the Apache License, Version 2.0 (the
10 | * "License"); you may not use this file except in compliance
11 | * with the License. You may obtain a copy of the License at
12 | *
13 | * http://www.apache.org/licenses/LICENSE-2.0
14 | *
15 | * Unless required by applicable law or agreed to in writing,
16 | * software distributed under the License is distributed on an
17 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
18 | * KIND, either express or implied. See the License for the
19 | * specific language governing permissions and limitations
20 | * under the License.
21 | *
22 | */
23 |
24 | import static me.prettyprint.hector.api.factory.HFactory.createColumn;
25 | import static me.prettyprint.hector.api.factory.HFactory.createMutator;
26 | import static me.prettyprint.hector.api.factory.HFactory.createSliceQuery;
27 |
28 | import java.math.BigInteger;
29 | import java.nio.ByteBuffer;
30 | import java.util.ArrayList;
31 | import java.util.List;
32 | import java.util.Set;
33 | import java.util.UUID;
34 |
35 | import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
36 | import me.prettyprint.cassandra.serializers.BytesArraySerializer;
37 | import me.prettyprint.cassandra.serializers.DynamicCompositeSerializer;
38 | import me.prettyprint.cassandra.serializers.LongSerializer;
39 | import me.prettyprint.cassandra.serializers.SerializerTypeInferer;
40 | import me.prettyprint.cassandra.serializers.StringSerializer;
41 | import me.prettyprint.cassandra.serializers.TypeInferringSerializer;
42 | import me.prettyprint.cassandra.serializers.UUIDSerializer;
43 | import me.prettyprint.hector.api.Keyspace;
44 | import me.prettyprint.hector.api.Serializer;
45 | import me.prettyprint.hector.api.beans.AbstractComposite;
46 | import me.prettyprint.hector.api.beans.AbstractComposite.Component;
47 | import me.prettyprint.hector.api.beans.ColumnSlice;
48 | import me.prettyprint.hector.api.beans.DynamicComposite;
49 | import me.prettyprint.hector.api.beans.HColumn;
50 | import me.prettyprint.hector.api.factory.HFactory;
51 | import me.prettyprint.hector.api.mutation.Mutator;
52 | import me.prettyprint.hector.api.query.QueryResult;
53 | import me.prettyprint.hector.api.query.SliceQuery;
54 |
55 | import org.apache.log4j.Logger;
56 |
57 | /**
58 | * Simple indexing library using composite types
59 | * (https://github.com/edanuff/CassandraCompositeType) to implement indexed
60 | * collections in Cassandra.
61 | *
62 | * See http://www.anuff.com/2010/07/secondary-indexes-in-cassandra.html for a
63 | * detailed discussion of the technique used here.
64 | *
65 | * @author Ed Anuff
66 | * @see Secondary
68 | * indexes in Cassandra
69 | * @see "org.apache.cassandra.db.marshal.CompositeType"
70 | *
71 | */
72 | public class IndexedCollections {
73 |
74 | private static final Logger logger = Logger
75 | .getLogger(IndexedCollections.class.getName());
76 |
77 | public static final String DEFAULT_ITEM_CF = "Item";
78 | public static final String DEFAULT_COLLECTION_CF = "Collection";
79 | public static final String DEFAULT_ITEM_INDEX_ENTRIES = "Item_Index_Entries";
80 | public static final String DEFAULT_COLLECTION_INDEX_CF = "Collection_Index";
81 |
82 | public static final byte VALUE_CODE_BYTES = 0;
83 | public static final byte VALUE_CODE_UTF8 = 1;
84 | public static final byte VALUE_CODE_UUID = 2;
85 | public static final byte VALUE_CODE_INT = 3;
86 | public static final byte VALUE_CODE_MAX = 127;
87 |
88 | public static final int DEFAULT_COUNT = 100;
89 | public static final int ALL_COUNT = 100000;
90 |
91 | public static final CollectionCFSet defaultCFSet = new CollectionCFSet();
92 |
93 | public static final StringSerializer se = new StringSerializer();
94 | public static final ByteBufferSerializer be = new ByteBufferSerializer();
95 | public static final BytesArraySerializer bae = new BytesArraySerializer();
96 | public static final DynamicCompositeSerializer ce = new DynamicCompositeSerializer();
97 | public static final LongSerializer le = new LongSerializer();
98 | public static final UUIDSerializer ue = new UUIDSerializer();
99 |
100 | public static UUID newTimeUUID() {
101 | com.eaio.uuid.UUID eaioUUID = new com.eaio.uuid.UUID();
102 | return new UUID(eaioUUID.time, eaioUUID.clockSeqAndNode);
103 | }
104 |
105 | /**
106 | * Convert values to be indexed into types that can be compared by
107 | * Cassandra: UTF8Type, UUIDType, IntegerType, and BytesType
108 | *
109 | * @param value
110 | * @return value transformed into String, UUID, BigInteger, or ByteBuffer
111 | */
112 | public static Object getIndexableValue(Object value) {
113 |
114 | if (value == null) {
115 | return null;
116 | }
117 |
118 | // Strings, UUIDs, and BigIntegers map to Cassandra
119 | // UTF8Type, UUIDType, and IntegerType
120 | if ((value instanceof String) || (value instanceof UUID)
121 | || (value instanceof BigInteger)) {
122 | return value;
123 | }
124 |
125 | // For any numeric values, turn them into a long
126 | // and make them BigIntegers for IntegerType
127 | if (value instanceof Number) {
128 | return BigInteger.valueOf(((Number) value).longValue());
129 | }
130 |
131 | // Anything else, we're going to have to use BytesType
132 | return TypeInferringSerializer.get().toByteBuffer(value);
133 | }
134 |
135 | /**
136 | * The Cassandra DynamicCompositeType will complain if component values of
137 | * two different types are attempted to be compared. The way to prevent this
138 | * and still allow for indexes to store different dynamic values is have a
139 | * value code component that precedes the actual indexed value component in
140 | * the composite. The DynamicCompositeType will first compare the two
141 | * components holding the value codes, and if they don't match, then won't
142 | * compare the next pair of components, avoiding the DynamicCompositeType
143 | * throwing an error.
144 | *
145 | * @param value
146 | * @return value code
147 | */
148 | public static int getIndexableValueCode(Object value) {
149 | if (value instanceof String) {
150 | return VALUE_CODE_UTF8;
151 | } else if (value instanceof UUID) {
152 | return VALUE_CODE_UUID;
153 | } else if (value instanceof Number) {
154 | return VALUE_CODE_INT;
155 | } else {
156 | return VALUE_CODE_BYTES;
157 | }
158 | }
159 |
160 | private static void addIndexInsertion(Mutator batch,
161 | CollectionCFSet cf, String columnIndexKey, IK itemKey,
162 | Object columnValue, UUID ts_uuid, long timestamp) {
163 |
164 | logger.info("UPDATE " + cf.getIndex() + " SET composite("
165 | + getIndexableValueCode(columnValue) + ", "
166 | + getIndexableValue(columnValue) + ", " + itemKey + ", "
167 | + ts_uuid + ") = null WHERE KEY = " + columnIndexKey);
168 |
169 | DynamicComposite indexComposite = new DynamicComposite(
170 | getIndexableValueCode(columnValue),
171 | getIndexableValue(columnValue), itemKey, ts_uuid);
172 |
173 | batch.addInsertion(se.toByteBuffer(columnIndexKey), cf.getIndex(),
174 | HFactory.createColumn(indexComposite, new byte[0], timestamp,
175 | ce, bae));
176 |
177 | }
178 |
179 | private static void addIndexDeletion(Mutator batch,
180 | CollectionCFSet cf, String columnIndexKey, IK itemKey,
181 | Object columnValue, UUID prev_timestamp, long timestamp) {
182 |
183 | logger.info("DELETE composite(" + getIndexableValueCode(columnValue)
184 | + ", " + getIndexableValue(columnValue) + ", " + itemKey + ", "
185 | + prev_timestamp + ") FROM " + cf.getIndex() + " WHERE KEY = "
186 | + columnIndexKey);
187 |
188 | DynamicComposite indexComposite = new DynamicComposite(
189 | getIndexableValueCode(columnValue),
190 | getIndexableValue(columnValue), itemKey, prev_timestamp);
191 |
192 | batch.addDeletion(se.toByteBuffer(columnIndexKey), cf.getIndex(),
193 | indexComposite, ce, timestamp);
194 | }
195 |
196 | private static void addEntriesInsertion(Mutator batch,
197 | CollectionCFSet cf, IK itemKey, Object columnName,
198 | Object columnValue, UUID ts_uuid, Serializer itemKeySerializer,
199 | long timestamp) {
200 |
201 | logger.info("UPDATE " + cf.getEntries() + " SET composite("
202 | + columnName + ", " + ts_uuid + ") = composite(" + columnValue
203 | + ") WHERE KEY = " + itemKey);
204 |
205 | batch.addInsertion(itemKeySerializer.toByteBuffer(itemKey), cf
206 | .getEntries(), HFactory.createColumn(new DynamicComposite(
207 | columnName, ts_uuid), new DynamicComposite(columnValue),
208 | timestamp, ce, ce));
209 | }
210 |
211 | private static void addEntriesDeletion(Mutator batch,
212 | CollectionCFSet cf, IK itemKey, DynamicComposite columnName,
213 | Object columnValue, UUID prev_timestamp,
214 | Serializer itemKeySerializer, long timestamp) {
215 |
216 | logger.info("DELETE composite(" + columnName + ", " + prev_timestamp
217 | + ") FROM " + cf.getEntries() + " WHERE KEY = " + itemKey);
218 |
219 | batch.addDeletion(itemKeySerializer.toByteBuffer(itemKey),
220 | cf.getEntries(), columnName, ce, timestamp);
221 |
222 | }
223 |
224 | /**
225 | * Sets the item column value for an item contained in a set of collections.
226 | *
227 | * @param
228 | * the container's key type
229 | * @param
230 | * the item's key type
231 | * @param
232 | * the item's column name type
233 | * @param
234 | * the item's column value type
235 | * @param ko
236 | * the keyspace operator
237 | * @param itemKey
238 | * the item row key
239 | * @param columnName
240 | * the name of the column to set
241 | * @param columnValue
242 | * the value to set the column to
243 | * @param containers
244 | * the set of containers the item is in
245 | * @param cf
246 | * the column families to use
247 | * @param itemKeySerializer
248 | * the item key serializer
249 | * @param nameSerializer
250 | * the column name serializer
251 | * @param valueSerializer
252 | * the column value serializer
253 | * @param containerKeySerializer
254 | * the container key serializer
255 | */
256 | public static void setItemColumn(Keyspace ko, IK itemKey,
257 | N columnName, V columnValue,
258 | Set> containers, CollectionCFSet cf,
259 | Serializer itemKeySerializer, Serializer nameSerializer,
260 | Serializer valueSerializer, Serializer containerKeySerializer) {
261 |
262 | logger.info("SET " + columnName + " = '" + columnValue + "' FOR ITEM "
263 | + itemKey);
264 |
265 | long timestamp = HFactory.createClock();
266 | Mutator batch = createMutator(ko, be);
267 | UUID ts_uuid = newTimeUUID();
268 |
269 | // Get all know previous index entries for this item's
270 | // indexed column from the item's index entry list
271 |
272 | SliceQuery q = createSliceQuery(
273 | ko, itemKeySerializer, ce, ce);
274 | q.setColumnFamily(cf.getEntries());
275 | q.setKey(itemKey);
276 | q.setRange(new DynamicComposite(columnName, new UUID(0, 0)),
277 | new DynamicComposite(columnName, new UUID(Long.MAX_VALUE
278 | | Long.MIN_VALUE, Long.MAX_VALUE | Long.MIN_VALUE)),
279 | false, ALL_COUNT);
280 | QueryResult> r = q
281 | .execute();
282 | ColumnSlice slice = r.get();
283 | List> entries = slice
284 | .getColumns();
285 |
286 | logger.info(entries.size() + " previous values for " + columnName
287 | + " found in index for removal");
288 |
289 | // Delete all previous index entities from the item's index entry list
290 |
291 | for (HColumn entry : entries) {
292 | UUID prev_timestamp = entry.getName().get(1, ue);
293 | Object prev_value = entry.getValue().get(0);
294 |
295 | addEntriesDeletion(batch, cf, itemKey, entry.getName(), prev_value,
296 | prev_timestamp, itemKeySerializer, timestamp);
297 | }
298 |
299 | // Add the new index entry to the item's index entry list
300 |
301 | if (columnValue != null) {
302 | addEntriesInsertion(batch, cf, itemKey, columnName, columnValue,
303 | ts_uuid, itemKeySerializer, timestamp);
304 | }
305 |
306 | for (ContainerCollection container : containers) {
307 |
308 | String columnIndexKey = container.getKey() + ":"
309 | + columnName.toString();
310 |
311 | // Delete all previous index entities from both the container's
312 | // index
313 |
314 | for (HColumn entry : entries) {
315 | UUID prev_timestamp = entry.getName().get(1, ue);
316 | Object prev_value = entry.getValue().get(0);
317 |
318 | addIndexDeletion(batch, cf, columnIndexKey, itemKey,
319 | prev_value, prev_timestamp, timestamp);
320 |
321 | }
322 |
323 | // Add the new index entry into the container's index
324 |
325 | if (columnValue != null) {
326 | addIndexInsertion(batch, cf, columnIndexKey, itemKey,
327 | columnValue, ts_uuid, timestamp);
328 | }
329 |
330 | }
331 |
332 | // Store the new column value into the item
333 | // If new value is null, delete the value instead
334 |
335 | if (columnValue != null) {
336 |
337 | logger.info("UPDATE " + cf.getItem() + " SET " + columnName + " = "
338 | + columnValue + " WHERE KEY = " + itemKey);
339 | batch.addInsertion(itemKeySerializer.toByteBuffer(itemKey), cf
340 | .getItem(), HFactory.createColumn(columnName, columnValue,
341 | timestamp, nameSerializer, valueSerializer));
342 | } else {
343 | logger.info("DELETE " + columnName + " FROM " + cf.getItem()
344 | + " WHERE KEY = " + itemKey);
345 | batch.addDeletion(itemKeySerializer.toByteBuffer(itemKey),
346 | cf.getItem(), columnName, nameSerializer, timestamp);
347 | }
348 |
349 | batch.execute();
350 |
351 | }
352 |
353 | /**
354 | * Search container.
355 | *
356 | * @param
357 | * the item's key type
358 | * @param
359 | * the container's key type
360 | * @param
361 | * the item's column name type
362 | * @param ko
363 | * the keyspace operator
364 | * @param container
365 | * the ContainerCollection (container key and collection name)
366 | * @param columnName
367 | * the item's column name
368 | * @param searchValue
369 | * the exact value for the specified column
370 | * @param startResult
371 | * the start result row key
372 | * @param count
373 | * the number of row keys to return
374 | * @param reversed
375 | * search in reverse order
376 | * @param cf
377 | * the column family set
378 | * @param containerKeySerializer
379 | * the container key serializer
380 | * @param itemKeySerializer
381 | * the item key serializer
382 | * @param nameSerializer
383 | * the column name serializer
384 | * @return the list of row keys for items who's column value matches
385 | */
386 | public static List searchContainer(Keyspace ko,
387 | ContainerCollection container, N columnName,
388 | Object searchValue, IK startResult, int count, boolean reversed,
389 | CollectionCFSet cf, Serializer containerKeySerializer,
390 | Serializer itemKeySerializer, Serializer nameSerializer) {
391 |
392 | return searchContainer(ko, container, columnName, searchValue,
393 | searchValue, true, startResult, count, reversed, cf,
394 | containerKeySerializer, itemKeySerializer, nameSerializer);
395 | }
396 |
397 | /**
398 | * Search container.
399 | *
400 | * @param
401 | * the item's key type
402 | * @param
403 | * the container's key type
404 | * @param
405 | * the item's column name type
406 | * @param ko
407 | * the keyspace operator
408 | * @param container
409 | * the ContainerCollection (container key and collection name)
410 | * @param columnName
411 | * the item's column name
412 | * @param startValue
413 | * the start value for the specified column (inclusive)
414 | * @param endValue
415 | * the end value for the specified column
416 | * @param inclusive
417 | * whether end value for the specified column is inclusive
418 | * @param startResult
419 | * the start result row key
420 | * @param count
421 | * the number of row keys to return
422 | * @param reversed
423 | * search in reverse order
424 | * @param cf
425 | * the column family set
426 | * @param containerKeySerializer
427 | * the container key serializer
428 | * @param itemKeySerializer
429 | * the item key serializer
430 | * @param nameSerializer
431 | * the column name serializer
432 | * @return the list of row keys for items who's column value matches
433 | */
434 | @SuppressWarnings("unchecked")
435 | public static List searchContainer(Keyspace ko,
436 | ContainerCollection container, N columnName, Object startValue,
437 | Object endValue, boolean inclusive, IK startResult, int count,
438 | boolean reversed, CollectionCFSet cf,
439 | Serializer containerKeySerializer,
440 | Serializer itemKeySerializer, Serializer nameSerializer) {
441 | List items = new ArrayList();
442 |
443 | String columnIndexKey = container.getKey() + ":"
444 | + columnName.toString();
445 |
446 | if (count == 0) {
447 | count = DEFAULT_COUNT;
448 | }
449 |
450 | SliceQuery q = createSliceQuery(
451 | ko, be, ce, be);
452 | q.setColumnFamily(cf.getIndex());
453 | q.setKey(se.toByteBuffer(columnIndexKey));
454 |
455 | DynamicComposite start = null;
456 |
457 | if (startValue == null) {
458 | if (startResult != null) {
459 | start = new DynamicComposite(VALUE_CODE_BYTES, new byte[0],
460 | startResult);
461 | } else {
462 | start = new DynamicComposite(VALUE_CODE_BYTES, new byte[0]);
463 | }
464 | } else if (startResult != null) {
465 | start = new DynamicComposite(getIndexableValueCode(startValue),
466 | getIndexableValue(startValue), startResult);
467 | } else {
468 | start = new DynamicComposite(getIndexableValueCode(startValue),
469 | getIndexableValue(startValue));
470 | }
471 |
472 | DynamicComposite finish = null;
473 |
474 | if (endValue != null) {
475 | finish = new DynamicComposite(getIndexableValueCode(endValue),
476 | getIndexableValue(endValue));
477 | if (inclusive) {
478 | @SuppressWarnings("rawtypes")
479 | Component c = finish.getComponent(1);
480 | finish.setComponent(1, c.getValue(), c.getSerializer(),
481 | c.getComparator(),
482 | AbstractComposite.ComponentEquality.GREATER_THAN_EQUAL);
483 | }
484 | }
485 |
486 | q.setRange(start, finish, reversed, count);
487 | QueryResult> r = q.execute();
488 | ColumnSlice slice = r.get();
489 | List> results = slice
490 | .getColumns();
491 |
492 | if (results != null) {
493 | for (HColumn result : results) {
494 | Object value = result.getName().get(1);
495 | logger.info("Value found: " + value);
496 |
497 | IK key = result.getName().get(2, itemKeySerializer);
498 | if (key != null) {
499 | items.add(key);
500 | }
501 | }
502 | }
503 |
504 | return items;
505 | }
506 |
507 | /**
508 | * Adds the item to collection.
509 | *
510 | * @param
511 | * the container's key type
512 | * @param
513 | * the item's key type
514 | * @param ko
515 | * the keyspace operator
516 | * @param container
517 | * the ContainerCollection (container key and collection name)
518 | * @param itemKey
519 | * the item's row key
520 | * @param cf
521 | * the column families to use
522 | * @param containerKeySerializer
523 | * the container key serializer
524 | * @param itemKeySerializer
525 | * the item key serializer
526 | */
527 | public static void addItemToCollection(Keyspace ko,
528 | ContainerCollection container, IK itemKey, CollectionCFSet cf,
529 | Serializer itemKeySerializer) {
530 |
531 | createMutator(ko, se).insert(
532 | container.getKey(),
533 | cf.getItems(),
534 | createColumn(itemKey, HFactory.createClock(),
535 | itemKeySerializer, le));
536 |
537 | }
538 |
539 | public static List getItemsInCollection(Keyspace ko,
540 | ContainerCollection container, CollectionCFSet cf,
541 | Serializer itemKeySerializer) {
542 | List keys = new ArrayList();
543 | SliceQuery q = createSliceQuery(ko, se,
544 | itemKeySerializer, be);
545 | q.setColumnFamily(cf.getItems());
546 | q.setKey(container.getKey());
547 | q.setRange(null, null, false, ALL_COUNT);
548 | QueryResult> r = q.execute();
549 | ColumnSlice slice = r.get();
550 | List> results = slice.getColumns();
551 | for (HColumn column : results) {
552 | keys.add(column.getName());
553 | }
554 | return keys;
555 | }
556 |
557 | @SuppressWarnings("unchecked")
558 | public static T getAsType(K obj, Serializer st) {
559 | Serializer so = SerializerTypeInferer.getSerializer(obj);
560 | if (so == null) {
561 | return null;
562 | }
563 | if (so.getClass().equals(st.getClass())) {
564 | return (T) obj;
565 | }
566 | return st.fromByteBuffer(so.toByteBuffer(obj));
567 | }
568 |
569 | /**
570 | * CollectionCFSet contains the names of the four column families needed to
571 | * implement indexed collections. Default CF names are provided, but can be
572 | * anything that makes sense for the application.
573 | */
574 | public static class CollectionCFSet {
575 |
576 | private String item = DEFAULT_ITEM_CF;
577 | private String items = DEFAULT_COLLECTION_CF;
578 | private String index = DEFAULT_COLLECTION_INDEX_CF;
579 | private String entries = DEFAULT_ITEM_INDEX_ENTRIES;
580 |
581 | public CollectionCFSet() {
582 | }
583 |
584 | public CollectionCFSet(String item, String items, String index,
585 | String entries) {
586 | this.item = item;
587 | this.items = items;
588 | this.index = index;
589 | this.entries = entries;
590 | }
591 |
592 | public String getItem() {
593 | return item;
594 | }
595 |
596 | public void setItem(String item) {
597 | this.item = item;
598 | }
599 |
600 | public String getItems() {
601 | return items;
602 | }
603 |
604 | public void setItems(String items) {
605 | this.items = items;
606 | }
607 |
608 | public String getIndex() {
609 | return index;
610 | }
611 |
612 | public void setIndex(String index) {
613 | this.index = index;
614 | }
615 |
616 | public String getEntries() {
617 | return entries;
618 | }
619 |
620 | public void setEntries(String entries) {
621 | this.entries = entries;
622 | }
623 | }
624 |
625 | /**
626 | * ContainerCollection represents the containing entity's key and collection
627 | * name. The assumption is that an entity can have multiple collections,
628 | * each with their own name.
629 | *
630 | * @param
631 | * the container's row key type
632 | */
633 | public static class ContainerCollection {
634 | private CK ownerKey;
635 | private String collectionName;
636 |
637 | public ContainerCollection(CK ownerKey, String collectionName) {
638 | this.ownerKey = ownerKey;
639 | this.collectionName = collectionName;
640 | }
641 |
642 | public CK getOwnerKey() {
643 | return ownerKey;
644 | }
645 |
646 | public void setOwnerKey(CK ownerKey) {
647 | this.ownerKey = ownerKey;
648 | }
649 |
650 | public String getCollectionName() {
651 | return collectionName;
652 | }
653 |
654 | public void setCollectionName(String collectionName) {
655 | this.collectionName = collectionName;
656 | }
657 |
658 | public String getKey() {
659 | return ownerKey + ":" + collectionName;
660 | }
661 |
662 | @Override
663 | public int hashCode() {
664 | final int prime = 31;
665 | int result = 1;
666 | result = prime
667 | * result
668 | + ((collectionName == null) ? 0 : collectionName.hashCode());
669 | result = prime * result
670 | + ((ownerKey == null) ? 0 : ownerKey.hashCode());
671 | return result;
672 | }
673 |
674 | @Override
675 | public boolean equals(Object obj) {
676 | if (this == obj) {
677 | return true;
678 | }
679 | if (obj == null) {
680 | return false;
681 | }
682 | if (getClass() != obj.getClass()) {
683 | return false;
684 | }
685 | @SuppressWarnings("rawtypes")
686 | ContainerCollection other = (ContainerCollection) obj;
687 | if (collectionName == null) {
688 | if (other.collectionName != null) {
689 | return false;
690 | }
691 | } else if (!collectionName.equals(other.collectionName)) {
692 | return false;
693 | }
694 | if (ownerKey == null) {
695 | if (other.ownerKey != null) {
696 | return false;
697 | }
698 | } else if (!ownerKey.equals(other.ownerKey)) {
699 | return false;
700 | }
701 | return true;
702 | }
703 | }
704 | }
705 |
--------------------------------------------------------------------------------