├── lib
    └── cassandra-composite-type-0.0.1.jar
├── .gitignore
├── src
    ├── test
    │   ├── resources
    │   │   ├── log4j.properties
    │   │   └── cassandra.yaml
    │   └── java
    │   │   └── indexedcollections
    │   │       └── IndexTest.java
    └── main
    │   └── java
    │       └── indexedcollections
    │           └── IndexedCollections.java
├── README
└── pom.xml


/lib/cassandra-composite-type-0.0.1.jar:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/edanuff/CassandraIndexedCollections/HEAD/lib/cassandra-composite-type-0.0.1.jar


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .idea
 2 | *.DS_Store
 3 | hector.iml
 4 | releases
 5 | target
 6 | tmp
 7 | bin
 8 | .classpath
 9 | .project
10 | .settings
11 | out
12 | *.svn
13 | *.ipr
14 | *.iws
15 | DS_Store
16 | 
17 | /.DS_Store


--------------------------------------------------------------------------------
/src/test/resources/log4j.properties:
--------------------------------------------------------------------------------
 1 | # Licensed to the Apache Software Foundation (ASF) under one
 2 | # or more contributor license agreements.  See the NOTICE file
 3 | # distributed with this work for additional information
 4 | # regarding copyright ownership.  The ASF licenses this file
 5 | # to you under the Apache License, Version 2.0 (the
 6 | # "License"); you may not use this file except in compliance
 7 | # with the License.  You may obtain a copy of the License at
 8 | #
 9 | #     http://www.apache.org/licenses/LICENSE-2.0
10 | #
11 | # Unless required by applicable law or agreed to in writing, software
12 | # distributed under the License is distributed on an "AS IS" BASIS,
13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 | # See the License for the specific language governing permissions and
15 | # limitations under the License.
16 | 
17 | # for production, you should probably set the root to INFO
18 | # and the pattern to %c instead of %l.  (%l is slower.)
19 | 
20 | # output messages into a rolling log file as well as stdout
21 | log4j.rootLogger=INFO,stdout
22 | 
23 | # stdout
24 | log4j.appender.stdout=org.apache.log4j.ConsoleAppender
25 | #log4j.appender.stdout.layout=org.apache.log4j.SimpleLayout
26 | log4j.appender.stdout.layout=org.apache.log4j.PatternLayout
27 | log4j.appender.stdout.layout.ConversionPattern=%d %p (%t) [%c] - %m%n
28 | 
29 | log4j.category.org.apache=ERROR, stdout
30 | 


--------------------------------------------------------------------------------
/README:
--------------------------------------------------------------------------------
 1 | Indexed Collections for Cassandra
 2 | 
 3 | NOTE: This has been updated to use the new built-in composite types in Cassandra 0.8.1
 4 | 
 5 | You will need to use Hector 0.8.0-2-SNAPSHOT or later and Cassandra 0.8.1 or later.
 6 | 
 7 | This is an implementation of the indexing technique described here:
 8 | 
 9 | http://www.anuff.com/2010/07/secondary-indexes-in-cassandra.html
10 | 
11 | The original article describes the use of a custom composite column comparator.  A
12 | version of this comparator has recently been added to the latest verion of Cassandra,
13 | meaning that it's no longer necessary to install anthing on the Cassandra instance
14 | to handle composite types.
15 | 
16 | This is a simplified version of a more complex indexing scheme thats been in used
17 | for some time now as part of a large project.  However, this simplified implementation was
18 | largely created from scratch and hasn't been extensively tested.
19 | 
20 | This indexing, as opposed to Cassandra's built-in secondary indexes, is completely
21 | dynamic.  It's possible to create new indexes at any time and the index value types don't
22 | have to be predefined, making use with JSON data easier.
23 | 
24 | One advantage of this indexing technique is that it combines relationships with mini-indexes
25 | so that you can have a user who's friends with 5 other users and then be able to search the
26 | user's friend list.  In this example, the user is what is referred to in the
27 | code as the "container" and the other users are "items".
28 | 
29 | For examples of use, look at the class IndextText.
30 | 


--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
  1 | <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
  2 | 	xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
  3 | 	<modelVersion>4.0.0</modelVersion>
  4 | 	<groupId>cassandra-indexed-collections</groupId>
  5 | 	<artifactId>cassandra-indexed-collections</artifactId>
  6 | 	<version>0.0.1</version>
  7 | 	<name>CassandraIndexedCollections</name>
  8 | 	<build>
  9 | 		<plugins>
 10 | 			<plugin>
 11 | 				<groupId>org.apache.maven.plugins</groupId>
 12 | 				<artifactId>maven-surefire-plugin</artifactId>
 13 | 				<version>2.6</version>
 14 | 				<configuration>
 15 | 					<!-- <groups>fast,${groups}</groups> -->
 16 | 					<systemPropertyVariables>
 17 | 						<storage-config>${basedir}/src/test/conf</storage-config>
 18 | 					</systemPropertyVariables>
 19 | 					<forkMode>always</forkMode>
 20 | 					<argLine>-Xmx512M -Xms512M</argLine>
 21 | 				</configuration>
 22 | 			</plugin>
 23 | 			<plugin>
 24 | 				<groupId>org.apache.maven.plugins</groupId>
 25 | 				<artifactId>maven-compiler-plugin</artifactId>
 26 | 				<version>2.3.2</version>
 27 | 				<configuration>
 28 | 					<source>1.6</source>
 29 | 					<target>1.6</target>
 30 | 					<optimize>true</optimize>
 31 | 					<debug>true</debug>
 32 | 					<showDeprecation>true</showDeprecation>
 33 | 					<showWarnings>true</showWarnings>
 34 | 				</configuration>
 35 | 			</plugin>
 36 | 			<plugin>
 37 | 				<groupId>org.apache.maven.plugins</groupId>
 38 | 				<artifactId>maven-install-plugin</artifactId>
 39 | 				<version>2.3.1</version>
 40 | 				<executions>
 41 | 					<execution>
 42 | 						<id>install cassandra-composite-types</id>
 43 | 						<phase>initialize</phase>
 44 | 						<goals>
 45 | 							<goal>install-file</goal>
 46 | 						</goals>
 47 | 						<configuration>
 48 | 							<packaging>jar</packaging>
 49 | 							<groupId>cassandra-composite-type</groupId>
 50 | 							<artifactId>cassandra-composite-type</artifactId>
 51 | 							<version>0.0.1</version>
 52 | 							<file>${basedir}/lib/cassandra-composite-type-0.0.1.jar</file>
 53 | 						</configuration>
 54 | 					</execution>
 55 | 				</executions>
 56 | 			</plugin>
 57 | 		</plugins>
 58 | 	</build>
 59 | 	<dependencies>
 60 | 		<dependency>
 61 | 			<groupId>org.apache.cassandra</groupId>
 62 | 			<artifactId>cassandra-all</artifactId>
 63 | 			<version>0.8.1</version>
 64 | 			<type>jar</type>
 65 | 		</dependency>
 66 | 		<dependency>
 67 | 			<groupId>org.apache.cassandra</groupId>
 68 | 			<artifactId>cassandra-javautils</artifactId>
 69 | 			<version>0.7.0</version>
 70 | 			<scope>test</scope>
 71 | 		</dependency>
 72 | 		<dependency>
 73 | 			<groupId>me.prettyprint</groupId>
 74 | 			<artifactId>hector-core</artifactId>
 75 | 			<version>0.8.0-2-SNAPSHOT</version>
 76 | 		</dependency>
 77 | 		<dependency>
 78 | 			<groupId>com.github.stephenc.eaio-uuid</groupId>
 79 | 			<artifactId>uuid</artifactId>
 80 | 			<version>3.2.0</version>
 81 | 		</dependency>
 82 | 		<dependency>
 83 | 			<groupId>junit</groupId>
 84 | 			<artifactId>junit</artifactId>
 85 | 			<version>4.8.1</version>
 86 | 			<scope>test</scope>
 87 | 		</dependency>
 88 | 		<dependency>
 89 | 			<groupId>log4j</groupId>
 90 | 			<artifactId>log4j</artifactId>
 91 | 			<version>1.2.14</version>
 92 | 		</dependency>
 93 | 		<dependency>
 94 | 			<groupId>org.slf4j</groupId>
 95 | 			<artifactId>slf4j-api</artifactId>
 96 | 			<version>1.6.1</version>
 97 | 		</dependency>
 98 | 		<dependency>
 99 | 			<groupId>org.slf4j</groupId>
100 | 			<artifactId>slf4j-log4j12</artifactId>
101 | 			<version>1.6.1</version>
102 | 		</dependency>
103 | 		<dependency>
104 | 			<groupId>org.perf4j</groupId>
105 | 			<artifactId>perf4j</artifactId>
106 | 			<version>0.9.12</version>
107 | 		</dependency>
108 | 	</dependencies>
109 | 	<repositories>
110 | 		<repository>
111 | 			<id>codehaus</id>
112 | 			<name>codehaus</name>
113 | 			<url>http://repository.codehaus.org/</url>
114 | 		</repository>
115 | 		<repository>
116 | 			<id>nexus-snapshots</id>
117 | 			<name>Sonatype Nexus Snapshots</name>
118 | 			<url>http://oss.sonatype.org/content/repositories/snapshots</url>
119 | 			<snapshots>
120 | 				<enabled>true</enabled>
121 | 			</snapshots>
122 | 		</repository>
123 | 	</repositories>
124 | </project>


--------------------------------------------------------------------------------
/src/test/java/indexedcollections/IndexTest.java:
--------------------------------------------------------------------------------
  1 | package indexedcollections;
  2 | 
  3 | import static me.prettyprint.hector.api.beans.DynamicComposite.DEFAULT_DYNAMIC_COMPOSITE_ALIASES;
  4 | import static me.prettyprint.hector.api.ddl.ComparatorType.DYNAMICCOMPOSITETYPE;
  5 | import static me.prettyprint.hector.api.factory.HFactory.createColumn;
  6 | import static me.prettyprint.hector.api.factory.HFactory.createKeyspace;
  7 | import static me.prettyprint.hector.api.factory.HFactory.createMutator;
  8 | import static me.prettyprint.hector.api.factory.HFactory.getOrCreateCluster;
  9 | import static org.junit.Assert.assertEquals;
 10 | import static org.junit.Assert.assertTrue;
 11 | import indexedcollections.IndexedCollections.ContainerCollection;
 12 | 
 13 | import java.io.IOException;
 14 | import java.util.ArrayList;
 15 | import java.util.LinkedHashSet;
 16 | import java.util.List;
 17 | import java.util.Set;
 18 | import java.util.UUID;
 19 | 
 20 | import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
 21 | import me.prettyprint.cassandra.serializers.BytesArraySerializer;
 22 | import me.prettyprint.cassandra.serializers.DynamicCompositeSerializer;
 23 | import me.prettyprint.cassandra.serializers.LongSerializer;
 24 | import me.prettyprint.cassandra.serializers.StringSerializer;
 25 | import me.prettyprint.cassandra.serializers.UUIDSerializer;
 26 | import me.prettyprint.cassandra.service.ThriftCfDef;
 27 | import me.prettyprint.cassandra.service.ThriftKsDef;
 28 | import me.prettyprint.cassandra.testutils.EmbeddedServerHelper;
 29 | import me.prettyprint.hector.api.Cluster;
 30 | import me.prettyprint.hector.api.Keyspace;
 31 | import me.prettyprint.hector.api.Serializer;
 32 | 
 33 | import org.apache.cassandra.config.ConfigurationException;
 34 | import org.apache.cassandra.db.marshal.BytesType;
 35 | import org.apache.cassandra.db.marshal.TimeUUIDType;
 36 | import org.apache.cassandra.thrift.CfDef;
 37 | import org.apache.cassandra.thrift.KsDef;
 38 | import org.apache.log4j.Logger;
 39 | import org.apache.thrift.transport.TTransportException;
 40 | import org.junit.AfterClass;
 41 | import org.junit.BeforeClass;
 42 | import org.junit.Test;
 43 | 
 44 | /**
 45 |  * Example class showing usage of IndexedCollections.
 46 |  */
 47 | public class IndexTest {
 48 | 
 49 | 	private static final Logger logger = Logger.getLogger(IndexTest.class
 50 | 			.getName());
 51 | 
 52 | 	public static final String KEYSPACE = "Keyspace";
 53 | 
 54 | 	public static final StringSerializer se = new StringSerializer();
 55 | 	public static final ByteBufferSerializer be = new ByteBufferSerializer();
 56 | 	public static final DynamicCompositeSerializer ce = new DynamicCompositeSerializer();
 57 | 	public static final UUIDSerializer ue = new UUIDSerializer();
 58 | 	public static final LongSerializer le = new LongSerializer();
 59 | 	public static final BytesArraySerializer bae = new BytesArraySerializer();
 60 | 
 61 | 	static EmbeddedServerHelper embedded;
 62 | 
 63 | 	static Cluster cluster;
 64 | 	static Keyspace ko;
 65 | 
 66 | 	@Test
 67 | 	public void testIndexes() throws IOException, TTransportException,
 68 | 			InterruptedException, ConfigurationException {
 69 | 
 70 | 		// Create a container entity
 71 | 
 72 | 		UUID g1 = createEntity("company");
 73 | 
 74 | 		ContainerCollection<UUID> container = new ContainerCollection<UUID>(g1,
 75 | 				"employees");
 76 | 		Set<ContainerCollection<UUID>> containers = new LinkedHashSet<ContainerCollection<UUID>>();
 77 | 		containers.add(container);
 78 | 
 79 | 		// Create a set of items to add to the container
 80 | 
 81 | 		UUID e1 = createEntity("employee");
 82 | 		UUID e2 = createEntity("employee");
 83 | 		UUID e3 = createEntity("employee");
 84 | 
 85 | 		// Create container/item relationship
 86 | 
 87 | 		addEntityToCollection(container, e1);
 88 | 		addEntityToCollection(container, e2);
 89 | 		addEntityToCollection(container, e3);
 90 | 
 91 | 		// Check the entities in the container
 92 | 
 93 | 		List<UUID> entities = getEntitiesInCollection(container);
 94 | 		assertEquals(3, entities.size());
 95 | 
 96 | 		// Set name column values
 97 | 
 98 | 		setEntityColumn(e1, "name", "bob", containers, se);
 99 | 
100 | 		setEntityColumn(e2, "name", "fred", containers, se);
101 | 
102 | 		setEntityColumn(e3, "name", "bill", containers, se);
103 | 
104 | 		// Do an exact match search for name column
105 | 
106 | 		logger.info("SELECT WHERE name = 'fred'");
107 | 
108 | 		List<UUID> results = searchContainer(container, "name", "fred");
109 | 
110 | 		logger.info(results.size() + " results found");
111 | 
112 | 		assertEquals(1, results.size());
113 | 		assertTrue(results.get(0).equals(e2));
114 | 
115 | 		logger.info("Result found is " + results.get(0));
116 | 
117 | 		// Change the value of a name column and make sure the old value is no
118 | 		// longer in the index
119 | 
120 | 		setEntityColumn(e2, "name", "steve", containers, se);
121 | 
122 | 		logger.info("SELECT WHERE name = 'fred'");
123 | 
124 | 		results = searchContainer(container, "name", "fred");
125 | 
126 | 		logger.info(results.size() + " results found");
127 | 
128 | 		assertEquals(0, results.size());
129 | 
130 | 		// Do a range search
131 | 
132 | 		logger.info("SELECT WHERE name >= 'bill' AND name < 'c'");
133 | 
134 | 		results = searchContainer(container, "name", "bill", "c", false);
135 | 
136 | 		logger.info(results.size() + " results found");
137 | 
138 | 		assertEquals(2, results.size());
139 | 
140 | 		// Set column values for height
141 | 
142 | 		setEntityColumn(e1, "height", (long) 5, containers, le);
143 | 
144 | 		setEntityColumn(e2, "height", (long) 6, containers, le);
145 | 
146 | 		setEntityColumn(e3, "height", (long) 7, containers, le);
147 | 
148 | 		// Do an numeric exact match search for height
149 | 
150 | 		logger.info("SELECT WHERE height = 6");
151 | 
152 | 		results = searchContainer(container, "height", 6);
153 | 
154 | 		logger.info(results.size() + " results found");
155 | 
156 | 		assertEquals(1, results.size());
157 | 
158 | 		// Do a numeric range search for height
159 | 
160 | 		logger.info("SELECT WHERE height >= 6 AND name < 10");
161 | 
162 | 		results = searchContainer(container, "height", 6, 10, false);
163 | 
164 | 		logger.info(results.size() + " results found");
165 | 
166 | 		assertEquals(2, results.size());
167 | 
168 | 		// Change a numeric column value and make sure it's no longer in the
169 | 		// index
170 | 
171 | 		setEntityColumn(e3, "height", (long) 5, containers, le);
172 | 
173 | 		results = searchContainer(container, "height", 6, 10, false);
174 | 
175 | 		logger.info(results.size() + " results found");
176 | 
177 | 		assertEquals(1, results.size());
178 | 
179 | 		// Set byte values in columns
180 | 
181 | 		setEntityColumn(e1, "bytes", new byte[] { 1, 2, 3 }, containers, bae);
182 | 
183 | 		setEntityColumn(e2, "bytes", new byte[] { 1, 2, 4 }, containers, bae);
184 | 
185 | 		setEntityColumn(e3, "bytes", new byte[] { 1, 2, 5 }, containers, bae);
186 | 
187 | 		// Do a byte array exact match search
188 | 
189 | 		results = searchContainer(container, "bytes", new byte[] { 1, 2, 4 });
190 | 
191 | 		logger.info(results.size() + " results found");
192 | 
193 | 		assertEquals(1, results.size());
194 | 
195 | 		// Do a byte array range search
196 | 
197 | 		results = searchContainer(container, "bytes", new byte[] { 1, 2, 4 },
198 | 				new byte[] { 10 }, false);
199 | 
200 | 		logger.info(results.size() + " results found");
201 | 
202 | 		assertEquals(2, results.size());
203 | 
204 | 		// Store some text columns
205 | 
206 | 		setEntityColumn(e1, "location", "san francisco", containers, se);
207 | 
208 | 		setEntityColumn(e2, "location", "san diego", containers, se);
209 | 
210 | 		setEntityColumn(e3, "location", "santa clara", containers, se);
211 | 
212 | 		// Do a range search exclusive on the same value for start and end and
213 | 		// make sure we get 0 results
214 | 
215 | 		results = searchContainer(container, "location", "san francisco",
216 | 				"san francisco", false);
217 | 
218 | 		logger.info(results.size() + " results found");
219 | 
220 | 		assertEquals(0, results.size());
221 | 
222 | 		// Do a range search inclusive on the same value for start and end and
223 | 		// make sure we get 1 result
224 | 
225 | 		results = searchContainer(container, "location", "san francisco",
226 | 				"san francisco", true);
227 | 
228 | 		logger.info(results.size() + " results found");
229 | 
230 | 		assertEquals(1, results.size());
231 | 
232 | 	}
233 | 
234 | 	@BeforeClass
235 | 	public static void setup() throws TTransportException, IOException,
236 | 			InterruptedException, ConfigurationException {
237 | 		embedded = new EmbeddedServerHelper();
238 | 		embedded.setup();
239 | 
240 | 		cluster = getOrCreateCluster("MyCluster", "127.0.0.1:9170");
241 | 		ko = createKeyspace(KEYSPACE, cluster);
242 | 
243 | 		ArrayList<CfDef> cfDefList = new ArrayList<CfDef>(2);
244 | 
245 | 		setupColumnFamilies(cfDefList);
246 | 
247 | 		makeKeyspace(cluster, KEYSPACE,
248 | 				"org.apache.cassandra.locator.SimpleStrategy", 1, cfDefList);
249 | 
250 | 	}
251 | 
252 | 	@AfterClass
253 | 	public static void teardown() throws IOException {
254 | 		EmbeddedServerHelper.teardown();
255 | 		embedded = null;
256 | 	}
257 | 
258 | 	/**
259 | 	 * Create the four required column families for values and indexes.
260 | 	 * 
261 | 	 * @param cfDefList
262 | 	 */
263 | 	public static void setupColumnFamilies(List<CfDef> cfDefList) {
264 | 
265 | 		createCF(IndexedCollections.DEFAULT_ITEM_CF,
266 | 				BytesType.class.getSimpleName(), cfDefList);
267 | 
268 | 		createCF(IndexedCollections.DEFAULT_COLLECTION_CF,
269 | 				TimeUUIDType.class.getSimpleName(), cfDefList);
270 | 
271 | 		createCF(IndexedCollections.DEFAULT_COLLECTION_INDEX_CF,
272 | 				DYNAMICCOMPOSITETYPE.getTypeName()
273 | 						+ DEFAULT_DYNAMIC_COMPOSITE_ALIASES, cfDefList);
274 | 
275 | 		createCF(IndexedCollections.DEFAULT_ITEM_INDEX_ENTRIES,
276 | 				DYNAMICCOMPOSITETYPE.getTypeName()
277 | 						+ DEFAULT_DYNAMIC_COMPOSITE_ALIASES, cfDefList);
278 | 
279 | 	}
280 | 
281 | 	public static void createCF(String name, String comparator_type,
282 | 			List<CfDef> cfDefList) {
283 | 		cfDefList.add(new CfDef(KEYSPACE, name)
284 | 				.setComparator_type(comparator_type).setKey_cache_size(0)
285 | 				.setRow_cache_size(0).setGc_grace_seconds(86400));
286 | 	}
287 | 
288 | 	public static void makeKeyspace(Cluster cluster, String name,
289 | 			String strategy, int replicationFactor, List<CfDef> cfDefList) {
290 | 
291 | 		if (cfDefList == null) {
292 | 			cfDefList = new ArrayList<CfDef>();
293 | 		}
294 | 
295 | 		try {
296 | 			KsDef ksDef = new KsDef(name, strategy, cfDefList);
297 | 			cluster.addKeyspace(new ThriftKsDef(ksDef));
298 | 			return;
299 | 		} catch (Throwable e) {
300 | 			logger.error("Exception while creating keyspace, " + name
301 | 					+ " - probably already exists", e);
302 | 		}
303 | 
304 | 		for (CfDef cfDef : cfDefList) {
305 | 			try {
306 | 				cluster.addColumnFamily(new ThriftCfDef(cfDef));
307 | 			} catch (Throwable e) {
308 | 				logger.error("Exception while creating CF, " + cfDef.getName()
309 | 						+ " - probably already exists", e);
310 | 			}
311 | 		}
312 | 	}
313 | 
314 | 	public static java.util.UUID newTimeUUID() {
315 | 		com.eaio.uuid.UUID eaioUUID = new com.eaio.uuid.UUID();
316 | 		return new UUID(eaioUUID.time, eaioUUID.clockSeqAndNode);
317 | 	}
318 | 
319 | 	/*
320 | 	 * Convenience methods for wrapping IndexedCollections methods
321 | 	 */
322 | 
323 | 	public UUID createEntity(String type) {
324 | 		UUID id = newTimeUUID();
325 | 		createMutator(ko, ue).insert(id, IndexedCollections.DEFAULT_ITEM_CF,
326 | 				createColumn("type", type, se, se));
327 | 		return id;
328 | 	}
329 | 
330 | 	public void addEntityToCollection(ContainerCollection<UUID> container,
331 | 			UUID itemEntity) {
332 | 		IndexedCollections.addItemToCollection(ko, container, itemEntity,
333 | 				IndexedCollections.defaultCFSet, ue);
334 | 	}
335 | 
336 | 	public List<UUID> getEntitiesInCollection(
337 | 			ContainerCollection<UUID> container) {
338 | 		return IndexedCollections.getItemsInCollection(ko, container,
339 | 				IndexedCollections.defaultCFSet, ue);
340 | 	}
341 | 
342 | 	public static <V> void setEntityColumn(UUID itemEntity, String columnName,
343 | 			V columnValue, Set<ContainerCollection<UUID>> containers,
344 | 			Serializer<V> valueSerializer) {
345 | 		IndexedCollections.setItemColumn(ko, itemEntity, columnName,
346 | 				columnValue, containers, IndexedCollections.defaultCFSet, ue,
347 | 				se, valueSerializer, ue);
348 | 	}
349 | 
350 | 	public static List<UUID> searchContainer(
351 | 			ContainerCollection<UUID> container, String columnName,
352 | 			Object searchValue) {
353 | 
354 | 		return IndexedCollections.searchContainer(ko, container, columnName,
355 | 				searchValue, null, 100, false, IndexedCollections.defaultCFSet,
356 | 				ue, ue, se);
357 | 	}
358 | 
359 | 	public static List<UUID> searchContainer(
360 | 			ContainerCollection<UUID> container, String columnName,
361 | 			Object startValue, Object endValue, boolean inclusive) {
362 | 
363 | 		return IndexedCollections.searchContainer(ko, container, columnName,
364 | 				startValue, endValue, inclusive, null, 100, false,
365 | 				IndexedCollections.defaultCFSet, ue, ue, se);
366 | 	}
367 | 
368 | }
369 | 


--------------------------------------------------------------------------------
/src/test/resources/cassandra.yaml:
--------------------------------------------------------------------------------
  1 | # Cassandra storage config YAML 
  2 | 
  3 | # NOTE:
  4 | #   See http://wiki.apache.org/cassandra/StorageConfiguration for
  5 | #   full explanations of configuration directives
  6 | # /NOTE
  7 | 
  8 | # The name of the cluster. This is mainly used to prevent machines in
  9 | # one logical cluster from joining another.
 10 | cluster_name: 'Test Cluster'
 11 | 
 12 | # You should always specify InitialToken when setting up a production
 13 | # cluster for the first time, and often when adding capacity later.
 14 | # The principle is that each node should be given an equal slice of
 15 | # the token ring; see http://wiki.apache.org/cassandra/Operations
 16 | # for more details.
 17 | #
 18 | # If blank, Cassandra will request a token bisecting the range of
 19 | # the heaviest-loaded existing node.  If there is no load information
 20 | # available, such as is the case with a new cluster, it will pick
 21 | # a random token, which will lead to hot spots.
 22 | initial_token:
 23 | 
 24 | # Set to true to make new [non-seed] nodes automatically migrate data
 25 | # to themselves from the pre-existing nodes in the cluster.  Defaults
 26 | # to false because you can only bootstrap N machines at a time from
 27 | # an existing cluster of N, so if you are bringing up a cluster of
 28 | # 10 machines with 3 seeds you would have to do it in stages.  Leaving
 29 | # this off for the initial start simplifies that.
 30 | auto_bootstrap: false
 31 | 
 32 | # See http://wiki.apache.org/cassandra/HintedHandoff
 33 | hinted_handoff_enabled: true
 34 | # this defines the maximum amount of time a dead host will have hints
 35 | # generated.  After it has been dead this long, hints will be dropped.
 36 | max_hint_window_in_ms: 3600000 # one hour
 37 | # Sleep this long after delivering each row or row fragment
 38 | hinted_handoff_throttle_delay_in_ms: 50
 39 | 
 40 | # authentication backend, implementing IAuthenticator; used to identify users
 41 | authenticator: org.apache.cassandra.auth.AllowAllAuthenticator
 42 | 
 43 | # authorization backend, implementing IAuthority; used to limit access/provide permissions
 44 | authority: org.apache.cassandra.auth.AllowAllAuthority
 45 | 
 46 | # The partitioner is responsible for distributing rows (by key) across
 47 | # nodes in the cluster.  Any IPartitioner may be used, including your
 48 | # own as long as it is on the classpath.  Out of the box, Cassandra
 49 | # provides org.apache.cassandra.dht.RandomPartitioner
 50 | # org.apache.cassandra.dht.ByteOrderedPartitioner,
 51 | # org.apache.cassandra.dht.OrderPreservingPartitioner (deprecated),
 52 | # and org.apache.cassandra.dht.CollatingOrderPreservingPartitioner
 53 | # (deprecated).
 54 | # 
 55 | # - RandomPartitioner distributes rows across the cluster evenly by md5.
 56 | #   When in doubt, this is the best option.
 57 | # - ByteOrderedPartitioner orders rows lexically by key bytes.  BOP allows
 58 | #   scanning rows in key order, but the ordering can generate hot spots
 59 | #   for sequential insertion workloads.
 60 | # - OrderPreservingPartitioner is an obsolete form of BOP, that stores
 61 | # - keys in a less-efficient format and only works with keys that are
 62 | #   UTF8-encoded Strings.
 63 | # - CollatingOPP colates according to EN,US rules rather than lexical byte
 64 | #   ordering.  Use this as an example if you need custom collation.
 65 | #
 66 | # See http://wiki.apache.org/cassandra/Operations for more on
 67 | # partitioners and token selection.
 68 | partitioner: org.apache.cassandra.dht.RandomPartitioner
 69 | 
 70 | # directories where Cassandra should store data on disk.
 71 | data_file_directories:
 72 |     - ./tmp/data
 73 | 
 74 | # commit log
 75 | commitlog_directory: ./tmp/commitlog
 76 | 
 77 | # saved caches
 78 | saved_caches_directory: ./tmp/saved_caches
 79 | 
 80 | # Size to allow commitlog to grow to before creating a new segment 
 81 | commitlog_rotation_threshold_in_mb: 128
 82 | 
 83 | # commitlog_sync may be either "periodic" or "batch." 
 84 | # When in batch mode, Cassandra won't ack writes until the commit log
 85 | # has been fsynced to disk.  It will wait up to
 86 | # CommitLogSyncBatchWindowInMS milliseconds for other writes, before
 87 | # performing the sync.
 88 | commitlog_sync: periodic
 89 | 
 90 | # the other option is "periodic" where writes may be acked immediately
 91 | # and the CommitLog is simply synced every commitlog_sync_period_in_ms
 92 | # milliseconds.
 93 | commitlog_sync_period_in_ms: 10000
 94 | 
 95 | # any class that implements the SeedProvider interface and has a constructor that takes a Map<String, String> of
 96 | # parameters will do.
 97 | seed_provider:
 98 |     # Addresses of hosts that are deemed contact points. 
 99 |     # Cassandra nodes use this list of hosts to find each other and learn
100 |     # the topology of the ring.  You must change this if you are running
101 |     # multiple nodes!
102 |     - class_name: org.apache.cassandra.locator.SimpleSeedProvider
103 |       parameters:
104 |           # seeds is actually a comma-delimited list of addresses.
105 |           - seeds: "127.0.0.1"
106 | 
107 | # emergency pressure valve: each time heap usage after a full (CMS)
108 | # garbage collection is above this fraction of the max, Cassandra will
109 | # flush the largest memtables.  
110 | #
111 | # Set to 1.0 to disable.  Setting this lower than
112 | # CMSInitiatingOccupancyFraction is not likely to be useful.
113 | #
114 | # RELYING ON THIS AS YOUR PRIMARY TUNING MECHANISM WILL WORK POORLY:
115 | # it is most effective under light to moderate load, or read-heavy
116 | # workloads; under truly massive write load, it will often be too
117 | # little, too late.
118 | flush_largest_memtables_at: 0.75
119 | 
120 | # emergency pressure valve #2: the first time heap usage after a full
121 | # (CMS) garbage collection is above this fraction of the max,
122 | # Cassandra will reduce cache maximum _capacity_ to the given fraction
123 | # of the current _size_.  Should usually be set substantially above
124 | # flush_largest_memtables_at, since that will have less long-term
125 | # impact on the system.  
126 | # 
127 | # Set to 1.0 to disable.  Setting this lower than
128 | # CMSInitiatingOccupancyFraction is not likely to be useful.
129 | reduce_cache_sizes_at: 0.85
130 | reduce_cache_capacity_to: 0.6
131 | 
132 | # For workloads with more data than can fit in memory, Cassandra's
133 | # bottleneck will be reads that need to fetch data from
134 | # disk. "concurrent_reads" should be set to (16 * number_of_drives) in
135 | # order to allow the operations to enqueue low enough in the stack
136 | # that the OS and drives can reorder them.
137 | #
138 | # On the other hand, since writes are almost never IO bound, the ideal
139 | # number of "concurrent_writes" is dependent on the number of cores in
140 | # your system; (8 * number_of_cores) is a good rule of thumb.
141 | concurrent_reads: 32
142 | concurrent_writes: 32
143 | 
144 | # Total memory to use for memtables.  Cassandra will flush the largest
145 | # memtable when this much memory is used.  Prefer using this to
146 | # the older, per-ColumnFamily memtable flush thresholds.
147 | # If omitted, Cassandra will set it to 1/3 of the heap.
148 | # If set to 0, only the old flush thresholds are used.
149 | # memtable_total_space_in_mb: 2048
150 | 
151 | # This sets the amount of memtable flush writer threads.  These will
152 | # be blocked by disk io, and each one will hold a memtable in memory
153 | # while blocked. If you have a large heap and many data directories,
154 | # you can increase this value for better flush performance.
155 | # By default this will be set to the amount of data directories defined.
156 | #memtable_flush_writers: 1
157 | 
158 | # the number of full memtables to allow pending flush, that is,
159 | # waiting for a writer thread.  At a minimum, this should be set to
160 | # the maximum number of secondary indexes created on a single CF.
161 | memtable_flush_queue_size: 4
162 | 
163 | # Buffer size to use when performing contiguous column slices. 
164 | # Increase this to the size of the column slices you typically perform
165 | sliced_buffer_size_in_kb: 64
166 | 
167 | # TCP port, for commands and data
168 | storage_port: 7000
169 | 
170 | # Address to bind to and tell other Cassandra nodes to connect to. You
171 | # _must_ change this if you want multiple nodes to be able to
172 | # communicate!
173 | # 
174 | # Leaving it blank leaves it up to InetAddress.getLocalHost(). This
175 | # will always do the Right Thing *if* the node is properly configured
176 | # (hostname, name resolution, etc), and the Right Thing is to use the
177 | # address associated with the hostname (it might not be).
178 | #
179 | # Setting this to 0.0.0.0 is always wrong.
180 | listen_address: localhost
181 | 
182 | # The address to bind the Thrift RPC service to -- clients connect
183 | # here. Unlike ListenAddress above, you *can* specify 0.0.0.0 here if
184 | # you want Thrift to listen on all interfaces.
185 | # 
186 | # Leaving this blank has the same effect it does for ListenAddress,
187 | # (i.e. it will be based on the configured hostname of the node).
188 | rpc_address: localhost
189 | # port for Thrift to listen for clients on
190 | rpc_port: 9170
191 | 
192 | # enable or disable keepalive on rpc connections
193 | rpc_keepalive: true
194 | 
195 | # Cassandra uses thread-per-client for client RPC.  This can
196 | # be expensive in memory used for thread stack for a large
197 | # enough number of clients.  (Hence, connection pooling is
198 | # very, very strongly recommended.)
199 | # 
200 | # Uncomment rpc_min|max|thread to set request pool size.
201 | # You would primarily set max as a safeguard against misbehaved
202 | # clients; if you do hit the max, Cassandra will block until
203 | # one disconnects before accepting more.  The defaults are
204 | # min of 16 and max unlimited.
205 | #
206 | # rpc_min_threads: 16
207 | # rpc_max_threads: 2048
208 | 
209 | # uncomment to set socket buffer sizes on rpc connections
210 | # rpc_send_buff_size_in_bytes:
211 | # rpc_recv_buff_size_in_bytes:
212 | 
213 | # Frame size for thrift (maximum field length).
214 | # 0 disables TFramedTransport in favor of TSocket. This option
215 | # is deprecated; we strongly recommend using Framed mode.
216 | thrift_framed_transport_size_in_mb: 15
217 | 
218 | # The max length of a thrift message, including all fields and
219 | # internal thrift overhead.
220 | thrift_max_message_length_in_mb: 16
221 | 
222 | # Set to true to have Cassandra create a hard link to each sstable
223 | # flushed or streamed locally in a backups/ subdirectory of the
224 | # Keyspace data.  Removing these links is the operator's
225 | # responsibility.
226 | incremental_backups: false
227 | 
228 | # Whether or not to take a snapshot before each compaction.  Be
229 | # careful using this option, since Cassandra won't clean up the
230 | # snapshots for you.  Mostly useful if you're paranoid when there
231 | # is a data format change.
232 | snapshot_before_compaction: false
233 | 
234 | # change this to increase the compaction thread's priority.  In java, 1 is the
235 | # lowest priority and that is our default.
236 | # compaction_thread_priority: 1
237 | 
238 | # Add column indexes to a row after its contents reach this size.
239 | # Increase if your column values are large, or if you have a very large
240 | # number of columns.  The competing causes are, Cassandra has to
241 | # deserialize this much of the row to read a single column, so you want
242 | # it to be small - at least if you do many partial-row reads - but all
243 | # the index data is read for each access, so you don't want to generate
244 | # that wastefully either.
245 | column_index_size_in_kb: 64
246 | 
247 | # Size limit for rows being compacted in memory.  Larger rows will spill
248 | # over to disk and use a slower two-pass compaction process.  A message
249 | # will be logged specifying the row key.
250 | in_memory_compaction_limit_in_mb: 64
251 | 
252 | # Number of compaction threads. This default to the number of processors,
253 | # enabling multiple compactions to execute at once. Using more than one
254 | # thread is highly recommended to preserve read performance in a mixed
255 | # read/write workload as this avoids sstables from accumulating during long
256 | # running compactions. The default is usually fine and if you experience
257 | # problems with compaction running too slowly or too fast, you should look at
258 | # compaction_throughput_mb_per_sec first.
259 | # Uncomment to make compaction mono-threaded.
260 | #concurrent_compactors: 1
261 | 
262 | # Throttles compaction to the given total throughput across the entire
263 | # system. The faster you insert data, the faster you need to compact in
264 | # order to keep the sstable count down, but in general, setting this to
265 | # 16 to 32 times the rate you are inserting data is more than sufficient.
266 | # Setting this to 0 disables throttling.
267 | compaction_throughput_mb_per_sec: 16
268 | 
269 | # Track cached row keys during compaction, and re-cache their new
270 | # positions in the compacted sstable.  Disable if you use really large
271 | # key caches.
272 | compaction_preheat_key_cache: true
273 | 
274 | # Time to wait for a reply from other nodes before failing the command 
275 | rpc_timeout_in_ms: 10000
276 | 
277 | # phi value that must be reached for a host to be marked down.
278 | # most users should never need to adjust this.
279 | # phi_convict_threshold: 8
280 | 
281 | # endpoint_snitch -- Set this to a class that implements
282 | # IEndpointSnitch, which will let Cassandra know enough
283 | # about your network topology to route requests efficiently.
284 | # Out of the box, Cassandra provides
285 | #  - org.apache.cassandra.locator.SimpleSnitch:
286 | #    Treats Strategy order as proximity. This improves cache locality
287 | #    when disabling read repair, which can further improve throughput.
288 | #  - org.apache.cassandra.locator.RackInferringSnitch:
289 | #    Proximity is determined by rack and data center, which are
290 | #    assumed to correspond to the 3rd and 2nd octet of each node's
291 | #    IP address, respectively
292 | # org.apache.cassandra.locator.PropertyFileSnitch:
293 | #  - Proximity is determined by rack and data center, which are
294 | #    explicitly configured in cassandra-topology.properties.
295 | endpoint_snitch: org.apache.cassandra.locator.SimpleSnitch
296 | 
297 | # dynamic_snitch -- This boolean controls whether the above snitch is
298 | # wrapped with a dynamic snitch, which will monitor read latencies
299 | # and avoid reading from hosts that have slowed (due to compaction,
300 | # for instance)
301 | dynamic_snitch: true
302 | # controls how often to perform the more expensive part of host score
303 | # calculation
304 | dynamic_snitch_update_interval_in_ms: 100 
305 | # controls how often to reset all host scores, allowing a bad host to
306 | # possibly recover
307 | dynamic_snitch_reset_interval_in_ms: 600000
308 | # if set greater than zero and read_repair_chance is < 1.0, this will allow
309 | # 'pinning' of replicas to hosts in order to increase cache capacity.
310 | # The badness threshold will control how much worse the pinned host has to be
311 | # before the dynamic snitch will prefer other replicas over it.  This is
312 | # expressed as a double which represents a percentage.  Thus, a value of
313 | # 0.2 means Cassandra would continue to prefer the static snitch values
314 | # until the pinned host was 20% worse than the fastest.
315 | dynamic_snitch_badness_threshold: 0.0
316 | 
317 | # request_scheduler -- Set this to a class that implements
318 | # RequestScheduler, which will schedule incoming client requests
319 | # according to the specific policy. This is useful for multi-tenancy
320 | # with a single Cassandra cluster.
321 | # NOTE: This is specifically for requests from the client and does
322 | # not affect inter node communication.
323 | # org.apache.cassandra.scheduler.NoScheduler - No scheduling takes place
324 | # org.apache.cassandra.scheduler.RoundRobinScheduler - Round robin of
325 | # client requests to a node with a separate queue for each
326 | # request_scheduler_id. The scheduler is further customized by
327 | # request_scheduler_options as described below.
328 | request_scheduler: org.apache.cassandra.scheduler.NoScheduler
329 | 
330 | # Scheduler Options vary based on the type of scheduler
331 | # NoScheduler - Has no options
332 | # RoundRobin
333 | #  - throttle_limit -- The throttle_limit is the number of in-flight
334 | #                      requests per client.  Requests beyond 
335 | #                      that limit are queued up until
336 | #                      running requests can complete.
337 | #                      The value of 80 here is twice the number of
338 | #                      concurrent_reads + concurrent_writes.
339 | #  - default_weight -- default_weight is optional and allows for
340 | #                      overriding the default which is 1.
341 | #  - weights -- Weights are optional and will default to 1 or the
342 | #               overridden default_weight. The weight translates into how
343 | #               many requests are handled during each turn of the
344 | #               RoundRobin, based on the scheduler id.
345 | #
346 | # request_scheduler_options:
347 | #    throttle_limit: 80
348 | #    default_weight: 5
349 | #    weights:
350 | #      Keyspace1: 1
351 | #      Keyspace2: 5
352 | 
353 | # request_scheduler_id -- An identifer based on which to perform
354 | # the request scheduling. Currently the only valid option is keyspace.
355 | # request_scheduler_id: keyspace
356 | 
357 | # The Index Interval determines how large the sampling of row keys
358 | #  is for a given SSTable. The larger the sampling, the more effective
359 | #  the index is at the cost of space.
360 | index_interval: 128
361 | 
362 | # Enable or disable inter-node encryption
363 | # Default settings are TLS v1, RSA 1024-bit keys (it is imperative that
364 | # users generate their own keys) TLS_RSA_WITH_AES_128_CBC_SHA as the cipher
365 | # suite for authentication, key exchange and encryption of the actual data transfers.
366 | # NOTE: No custom encryption options are enabled at the moment
367 | # The available internode options are : all, none
368 | #
369 | # The passwords used in these options must match the passwords used when generating
370 | # the keystore and truststore.  For instructions on generating these files, see:
371 | # http://download.oracle.com/javase/6/docs/technotes/guides/security/jsse/JSSERefGuide.html#CreateKeystore
372 | encryption_options:
373 |     internode_encryption: none
374 |     keystore: conf/.keystore
375 |     keystore_password: cassandra
376 |     truststore: conf/.truststore
377 |     truststore_password: cassandra
378 | 


--------------------------------------------------------------------------------
/src/main/java/indexedcollections/IndexedCollections.java:
--------------------------------------------------------------------------------
  1 | package indexedcollections;
  2 | 
  3 | /*
  4 |  * 
  5 |  * Licensed to the Apache Software Foundation (ASF) under one
  6 |  * or more contributor license agreements.  See the NOTICE file
  7 |  * distributed with this work for additional information
  8 |  * regarding copyright ownership.  The ASF licenses this file
  9 |  * to you under the Apache License, Version 2.0 (the
 10 |  * "License"); you may not use this file except in compliance
 11 |  * with the License.  You may obtain a copy of the License at
 12 |  * 
 13 |  *   http://www.apache.org/licenses/LICENSE-2.0
 14 |  * 
 15 |  * Unless required by applicable law or agreed to in writing,
 16 |  * software distributed under the License is distributed on an
 17 |  * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 18 |  * KIND, either express or implied.  See the License for the
 19 |  * specific language governing permissions and limitations
 20 |  * under the License.
 21 |  * 
 22 |  */
 23 | 
 24 | import static me.prettyprint.hector.api.factory.HFactory.createColumn;
 25 | import static me.prettyprint.hector.api.factory.HFactory.createMutator;
 26 | import static me.prettyprint.hector.api.factory.HFactory.createSliceQuery;
 27 | 
 28 | import java.math.BigInteger;
 29 | import java.nio.ByteBuffer;
 30 | import java.util.ArrayList;
 31 | import java.util.List;
 32 | import java.util.Set;
 33 | import java.util.UUID;
 34 | 
 35 | import me.prettyprint.cassandra.serializers.ByteBufferSerializer;
 36 | import me.prettyprint.cassandra.serializers.BytesArraySerializer;
 37 | import me.prettyprint.cassandra.serializers.DynamicCompositeSerializer;
 38 | import me.prettyprint.cassandra.serializers.LongSerializer;
 39 | import me.prettyprint.cassandra.serializers.SerializerTypeInferer;
 40 | import me.prettyprint.cassandra.serializers.StringSerializer;
 41 | import me.prettyprint.cassandra.serializers.TypeInferringSerializer;
 42 | import me.prettyprint.cassandra.serializers.UUIDSerializer;
 43 | import me.prettyprint.hector.api.Keyspace;
 44 | import me.prettyprint.hector.api.Serializer;
 45 | import me.prettyprint.hector.api.beans.AbstractComposite;
 46 | import me.prettyprint.hector.api.beans.AbstractComposite.Component;
 47 | import me.prettyprint.hector.api.beans.ColumnSlice;
 48 | import me.prettyprint.hector.api.beans.DynamicComposite;
 49 | import me.prettyprint.hector.api.beans.HColumn;
 50 | import me.prettyprint.hector.api.factory.HFactory;
 51 | import me.prettyprint.hector.api.mutation.Mutator;
 52 | import me.prettyprint.hector.api.query.QueryResult;
 53 | import me.prettyprint.hector.api.query.SliceQuery;
 54 | 
 55 | import org.apache.log4j.Logger;
 56 | 
 57 | /**
 58 |  * Simple indexing library using composite types
 59 |  * (https://github.com/edanuff/CassandraCompositeType) to implement indexed
 60 |  * collections in Cassandra.
 61 |  * 
 62 |  * See http://www.anuff.com/2010/07/secondary-indexes-in-cassandra.html for a
 63 |  * detailed discussion of the technique used here.
 64 |  * 
 65 |  * @author Ed Anuff
 66 |  * @see <a
 67 |  *      href="http://www.anuff.com/2010/07/secondary-indexes-in-cassandra.html">Secondary
 68 |  *      indexes in Cassandra</a>
 69 |  * @see "org.apache.cassandra.db.marshal.CompositeType"
 70 |  * 
 71 |  */
 72 | public class IndexedCollections {
 73 | 
 74 | 	private static final Logger logger = Logger
 75 | 			.getLogger(IndexedCollections.class.getName());
 76 | 
 77 | 	public static final String DEFAULT_ITEM_CF = "Item";
 78 | 	public static final String DEFAULT_COLLECTION_CF = "Collection";
 79 | 	public static final String DEFAULT_ITEM_INDEX_ENTRIES = "Item_Index_Entries";
 80 | 	public static final String DEFAULT_COLLECTION_INDEX_CF = "Collection_Index";
 81 | 
 82 | 	public static final byte VALUE_CODE_BYTES = 0;
 83 | 	public static final byte VALUE_CODE_UTF8 = 1;
 84 | 	public static final byte VALUE_CODE_UUID = 2;
 85 | 	public static final byte VALUE_CODE_INT = 3;
 86 | 	public static final byte VALUE_CODE_MAX = 127;
 87 | 
 88 | 	public static final int DEFAULT_COUNT = 100;
 89 | 	public static final int ALL_COUNT = 100000;
 90 | 
 91 | 	public static final CollectionCFSet defaultCFSet = new CollectionCFSet();
 92 | 
 93 | 	public static final StringSerializer se = new StringSerializer();
 94 | 	public static final ByteBufferSerializer be = new ByteBufferSerializer();
 95 | 	public static final BytesArraySerializer bae = new BytesArraySerializer();
 96 | 	public static final DynamicCompositeSerializer ce = new DynamicCompositeSerializer();
 97 | 	public static final LongSerializer le = new LongSerializer();
 98 | 	public static final UUIDSerializer ue = new UUIDSerializer();
 99 | 
100 | 	public static UUID newTimeUUID() {
101 | 		com.eaio.uuid.UUID eaioUUID = new com.eaio.uuid.UUID();
102 | 		return new UUID(eaioUUID.time, eaioUUID.clockSeqAndNode);
103 | 	}
104 | 
105 | 	/**
106 | 	 * Convert values to be indexed into types that can be compared by
107 | 	 * Cassandra: UTF8Type, UUIDType, IntegerType, and BytesType
108 | 	 * 
109 | 	 * @param value
110 | 	 * @return value transformed into String, UUID, BigInteger, or ByteBuffer
111 | 	 */
112 | 	public static Object getIndexableValue(Object value) {
113 | 
114 | 		if (value == null) {
115 | 			return null;
116 | 		}
117 | 
118 | 		// Strings, UUIDs, and BigIntegers map to Cassandra
119 | 		// UTF8Type, UUIDType, and IntegerType
120 | 		if ((value instanceof String) || (value instanceof UUID)
121 | 				|| (value instanceof BigInteger)) {
122 | 			return value;
123 | 		}
124 | 
125 | 		// For any numeric values, turn them into a long
126 | 		// and make them BigIntegers for IntegerType
127 | 		if (value instanceof Number) {
128 | 			return BigInteger.valueOf(((Number) value).longValue());
129 | 		}
130 | 
131 | 		// Anything else, we're going to have to use BytesType
132 | 		return TypeInferringSerializer.get().toByteBuffer(value);
133 | 	}
134 | 
135 | 	/**
136 | 	 * The Cassandra DynamicCompositeType will complain if component values of
137 | 	 * two different types are attempted to be compared. The way to prevent this
138 | 	 * and still allow for indexes to store different dynamic values is have a
139 | 	 * value code component that precedes the actual indexed value component in
140 | 	 * the composite. The DynamicCompositeType will first compare the two
141 | 	 * components holding the value codes, and if they don't match, then won't
142 | 	 * compare the next pair of components, avoiding the DynamicCompositeType
143 | 	 * throwing an error.
144 | 	 * 
145 | 	 * @param value
146 | 	 * @return value code
147 | 	 */
148 | 	public static int getIndexableValueCode(Object value) {
149 | 		if (value instanceof String) {
150 | 			return VALUE_CODE_UTF8;
151 | 		} else if (value instanceof UUID) {
152 | 			return VALUE_CODE_UUID;
153 | 		} else if (value instanceof Number) {
154 | 			return VALUE_CODE_INT;
155 | 		} else {
156 | 			return VALUE_CODE_BYTES;
157 | 		}
158 | 	}
159 | 
160 | 	private static <IK> void addIndexInsertion(Mutator<ByteBuffer> batch,
161 | 			CollectionCFSet cf, String columnIndexKey, IK itemKey,
162 | 			Object columnValue, UUID ts_uuid, long timestamp) {
163 | 
164 | 		logger.info("UPDATE " + cf.getIndex() + " SET composite("
165 | 				+ getIndexableValueCode(columnValue) + ", "
166 | 				+ getIndexableValue(columnValue) + ", " + itemKey + ", "
167 | 				+ ts_uuid + ") = null WHERE KEY = " + columnIndexKey);
168 | 
169 | 		DynamicComposite indexComposite = new DynamicComposite(
170 | 				getIndexableValueCode(columnValue),
171 | 				getIndexableValue(columnValue), itemKey, ts_uuid);
172 | 
173 | 		batch.addInsertion(se.toByteBuffer(columnIndexKey), cf.getIndex(),
174 | 				HFactory.createColumn(indexComposite, new byte[0], timestamp,
175 | 						ce, bae));
176 | 
177 | 	}
178 | 
179 | 	private static <IK> void addIndexDeletion(Mutator<ByteBuffer> batch,
180 | 			CollectionCFSet cf, String columnIndexKey, IK itemKey,
181 | 			Object columnValue, UUID prev_timestamp, long timestamp) {
182 | 
183 | 		logger.info("DELETE composite(" + getIndexableValueCode(columnValue)
184 | 				+ ", " + getIndexableValue(columnValue) + ", " + itemKey + ", "
185 | 				+ prev_timestamp + ") FROM " + cf.getIndex() + " WHERE KEY = "
186 | 				+ columnIndexKey);
187 | 
188 | 		DynamicComposite indexComposite = new DynamicComposite(
189 | 				getIndexableValueCode(columnValue),
190 | 				getIndexableValue(columnValue), itemKey, prev_timestamp);
191 | 
192 | 		batch.addDeletion(se.toByteBuffer(columnIndexKey), cf.getIndex(),
193 | 				indexComposite, ce, timestamp);
194 | 	}
195 | 
196 | 	private static <IK> void addEntriesInsertion(Mutator<ByteBuffer> batch,
197 | 			CollectionCFSet cf, IK itemKey, Object columnName,
198 | 			Object columnValue, UUID ts_uuid, Serializer<IK> itemKeySerializer,
199 | 			long timestamp) {
200 | 
201 | 		logger.info("UPDATE " + cf.getEntries() + " SET composite("
202 | 				+ columnName + ", " + ts_uuid + ") = composite(" + columnValue
203 | 				+ ") WHERE KEY = " + itemKey);
204 | 
205 | 		batch.addInsertion(itemKeySerializer.toByteBuffer(itemKey), cf
206 | 				.getEntries(), HFactory.createColumn(new DynamicComposite(
207 | 				columnName, ts_uuid), new DynamicComposite(columnValue),
208 | 				timestamp, ce, ce));
209 | 	}
210 | 
211 | 	private static <IK> void addEntriesDeletion(Mutator<ByteBuffer> batch,
212 | 			CollectionCFSet cf, IK itemKey, DynamicComposite columnName,
213 | 			Object columnValue, UUID prev_timestamp,
214 | 			Serializer<IK> itemKeySerializer, long timestamp) {
215 | 
216 | 		logger.info("DELETE composite(" + columnName + ", " + prev_timestamp
217 | 				+ ") FROM " + cf.getEntries() + " WHERE KEY = " + itemKey);
218 | 
219 | 		batch.addDeletion(itemKeySerializer.toByteBuffer(itemKey),
220 | 				cf.getEntries(), columnName, ce, timestamp);
221 | 
222 | 	}
223 | 
224 | 	/**
225 | 	 * Sets the item column value for an item contained in a set of collections.
226 | 	 * 
227 | 	 * @param <CK>
228 | 	 *            the container's key type
229 | 	 * @param <IK>
230 | 	 *            the item's key type
231 | 	 * @param <N>
232 | 	 *            the item's column name type
233 | 	 * @param <V>
234 | 	 *            the item's column value type
235 | 	 * @param ko
236 | 	 *            the keyspace operator
237 | 	 * @param itemKey
238 | 	 *            the item row key
239 | 	 * @param columnName
240 | 	 *            the name of the column to set
241 | 	 * @param columnValue
242 | 	 *            the value to set the column to
243 | 	 * @param containers
244 | 	 *            the set of containers the item is in
245 | 	 * @param cf
246 | 	 *            the column families to use
247 | 	 * @param itemKeySerializer
248 | 	 *            the item key serializer
249 | 	 * @param nameSerializer
250 | 	 *            the column name serializer
251 | 	 * @param valueSerializer
252 | 	 *            the column value serializer
253 | 	 * @param containerKeySerializer
254 | 	 *            the container key serializer
255 | 	 */
256 | 	public static <CK, IK, N, V> void setItemColumn(Keyspace ko, IK itemKey,
257 | 			N columnName, V columnValue,
258 | 			Set<ContainerCollection<CK>> containers, CollectionCFSet cf,
259 | 			Serializer<IK> itemKeySerializer, Serializer<N> nameSerializer,
260 | 			Serializer<V> valueSerializer, Serializer<CK> containerKeySerializer) {
261 | 
262 | 		logger.info("SET " + columnName + " = '" + columnValue + "' FOR ITEM "
263 | 				+ itemKey);
264 | 
265 | 		long timestamp = HFactory.createClock();
266 | 		Mutator<ByteBuffer> batch = createMutator(ko, be);
267 | 		UUID ts_uuid = newTimeUUID();
268 | 
269 | 		// Get all know previous index entries for this item's
270 | 		// indexed column from the item's index entry list
271 | 
272 | 		SliceQuery<IK, DynamicComposite, DynamicComposite> q = createSliceQuery(
273 | 				ko, itemKeySerializer, ce, ce);
274 | 		q.setColumnFamily(cf.getEntries());
275 | 		q.setKey(itemKey);
276 | 		q.setRange(new DynamicComposite(columnName, new UUID(0, 0)),
277 | 				new DynamicComposite(columnName, new UUID(Long.MAX_VALUE
278 | 						| Long.MIN_VALUE, Long.MAX_VALUE | Long.MIN_VALUE)),
279 | 				false, ALL_COUNT);
280 | 		QueryResult<ColumnSlice<DynamicComposite, DynamicComposite>> r = q
281 | 				.execute();
282 | 		ColumnSlice<DynamicComposite, DynamicComposite> slice = r.get();
283 | 		List<HColumn<DynamicComposite, DynamicComposite>> entries = slice
284 | 				.getColumns();
285 | 
286 | 		logger.info(entries.size() + " previous values for " + columnName
287 | 				+ " found in index for removal");
288 | 
289 | 		// Delete all previous index entities from the item's index entry list
290 | 
291 | 		for (HColumn<DynamicComposite, DynamicComposite> entry : entries) {
292 | 			UUID prev_timestamp = entry.getName().get(1, ue);
293 | 			Object prev_value = entry.getValue().get(0);
294 | 
295 | 			addEntriesDeletion(batch, cf, itemKey, entry.getName(), prev_value,
296 | 					prev_timestamp, itemKeySerializer, timestamp);
297 | 		}
298 | 
299 | 		// Add the new index entry to the item's index entry list
300 | 
301 | 		if (columnValue != null) {
302 | 			addEntriesInsertion(batch, cf, itemKey, columnName, columnValue,
303 | 					ts_uuid, itemKeySerializer, timestamp);
304 | 		}
305 | 
306 | 		for (ContainerCollection<CK> container : containers) {
307 | 
308 | 			String columnIndexKey = container.getKey() + ":"
309 | 					+ columnName.toString();
310 | 
311 | 			// Delete all previous index entities from both the container's
312 | 			// index
313 | 
314 | 			for (HColumn<DynamicComposite, DynamicComposite> entry : entries) {
315 | 				UUID prev_timestamp = entry.getName().get(1, ue);
316 | 				Object prev_value = entry.getValue().get(0);
317 | 
318 | 				addIndexDeletion(batch, cf, columnIndexKey, itemKey,
319 | 						prev_value, prev_timestamp, timestamp);
320 | 
321 | 			}
322 | 
323 | 			// Add the new index entry into the container's index
324 | 
325 | 			if (columnValue != null) {
326 | 				addIndexInsertion(batch, cf, columnIndexKey, itemKey,
327 | 						columnValue, ts_uuid, timestamp);
328 | 			}
329 | 
330 | 		}
331 | 
332 | 		// Store the new column value into the item
333 | 		// If new value is null, delete the value instead
334 | 
335 | 		if (columnValue != null) {
336 | 
337 | 			logger.info("UPDATE " + cf.getItem() + " SET " + columnName + " = "
338 | 					+ columnValue + " WHERE KEY = " + itemKey);
339 | 			batch.addInsertion(itemKeySerializer.toByteBuffer(itemKey), cf
340 | 					.getItem(), HFactory.createColumn(columnName, columnValue,
341 | 					timestamp, nameSerializer, valueSerializer));
342 | 		} else {
343 | 			logger.info("DELETE " + columnName + " FROM " + cf.getItem()
344 | 					+ " WHERE KEY = " + itemKey);
345 | 			batch.addDeletion(itemKeySerializer.toByteBuffer(itemKey),
346 | 					cf.getItem(), columnName, nameSerializer, timestamp);
347 | 		}
348 | 
349 | 		batch.execute();
350 | 
351 | 	}
352 | 
353 | 	/**
354 | 	 * Search container.
355 | 	 * 
356 | 	 * @param <IK>
357 | 	 *            the item's key type
358 | 	 * @param <CK>
359 | 	 *            the container's key type
360 | 	 * @param <N>
361 | 	 *            the item's column name type
362 | 	 * @param ko
363 | 	 *            the keyspace operator
364 | 	 * @param container
365 | 	 *            the ContainerCollection (container key and collection name)
366 | 	 * @param columnName
367 | 	 *            the item's column name
368 | 	 * @param searchValue
369 | 	 *            the exact value for the specified column
370 | 	 * @param startResult
371 | 	 *            the start result row key
372 | 	 * @param count
373 | 	 *            the number of row keys to return
374 | 	 * @param reversed
375 | 	 *            search in reverse order
376 | 	 * @param cf
377 | 	 *            the column family set
378 | 	 * @param containerKeySerializer
379 | 	 *            the container key serializer
380 | 	 * @param itemKeySerializer
381 | 	 *            the item key serializer
382 | 	 * @param nameSerializer
383 | 	 *            the column name serializer
384 | 	 * @return the list of row keys for items who's column value matches
385 | 	 */
386 | 	public static <IK, CK, N> List<IK> searchContainer(Keyspace ko,
387 | 			ContainerCollection<CK> container, N columnName,
388 | 			Object searchValue, IK startResult, int count, boolean reversed,
389 | 			CollectionCFSet cf, Serializer<CK> containerKeySerializer,
390 | 			Serializer<IK> itemKeySerializer, Serializer<N> nameSerializer) {
391 | 
392 | 		return searchContainer(ko, container, columnName, searchValue,
393 | 				searchValue, true, startResult, count, reversed, cf,
394 | 				containerKeySerializer, itemKeySerializer, nameSerializer);
395 | 	}
396 | 
397 | 	/**
398 | 	 * Search container.
399 | 	 * 
400 | 	 * @param <IK>
401 | 	 *            the item's key type
402 | 	 * @param <CK>
403 | 	 *            the container's key type
404 | 	 * @param <N>
405 | 	 *            the item's column name type
406 | 	 * @param ko
407 | 	 *            the keyspace operator
408 | 	 * @param container
409 | 	 *            the ContainerCollection (container key and collection name)
410 | 	 * @param columnName
411 | 	 *            the item's column name
412 | 	 * @param startValue
413 | 	 *            the start value for the specified column (inclusive)
414 | 	 * @param endValue
415 | 	 *            the end value for the specified column
416 | 	 * @param inclusive
417 | 	 *            whether end value for the specified column is inclusive
418 | 	 * @param startResult
419 | 	 *            the start result row key
420 | 	 * @param count
421 | 	 *            the number of row keys to return
422 | 	 * @param reversed
423 | 	 *            search in reverse order
424 | 	 * @param cf
425 | 	 *            the column family set
426 | 	 * @param containerKeySerializer
427 | 	 *            the container key serializer
428 | 	 * @param itemKeySerializer
429 | 	 *            the item key serializer
430 | 	 * @param nameSerializer
431 | 	 *            the column name serializer
432 | 	 * @return the list of row keys for items who's column value matches
433 | 	 */
434 | 	@SuppressWarnings("unchecked")
435 | 	public static <IK, CK, N> List<IK> searchContainer(Keyspace ko,
436 | 			ContainerCollection<CK> container, N columnName, Object startValue,
437 | 			Object endValue, boolean inclusive, IK startResult, int count,
438 | 			boolean reversed, CollectionCFSet cf,
439 | 			Serializer<CK> containerKeySerializer,
440 | 			Serializer<IK> itemKeySerializer, Serializer<N> nameSerializer) {
441 | 		List<IK> items = new ArrayList<IK>();
442 | 
443 | 		String columnIndexKey = container.getKey() + ":"
444 | 				+ columnName.toString();
445 | 
446 | 		if (count == 0) {
447 | 			count = DEFAULT_COUNT;
448 | 		}
449 | 
450 | 		SliceQuery<ByteBuffer, DynamicComposite, ByteBuffer> q = createSliceQuery(
451 | 				ko, be, ce, be);
452 | 		q.setColumnFamily(cf.getIndex());
453 | 		q.setKey(se.toByteBuffer(columnIndexKey));
454 | 
455 | 		DynamicComposite start = null;
456 | 
457 | 		if (startValue == null) {
458 | 			if (startResult != null) {
459 | 				start = new DynamicComposite(VALUE_CODE_BYTES, new byte[0],
460 | 						startResult);
461 | 			} else {
462 | 				start = new DynamicComposite(VALUE_CODE_BYTES, new byte[0]);
463 | 			}
464 | 		} else if (startResult != null) {
465 | 			start = new DynamicComposite(getIndexableValueCode(startValue),
466 | 					getIndexableValue(startValue), startResult);
467 | 		} else {
468 | 			start = new DynamicComposite(getIndexableValueCode(startValue),
469 | 					getIndexableValue(startValue));
470 | 		}
471 | 
472 | 		DynamicComposite finish = null;
473 | 
474 | 		if (endValue != null) {
475 | 			finish = new DynamicComposite(getIndexableValueCode(endValue),
476 | 					getIndexableValue(endValue));
477 | 			if (inclusive) {
478 | 				@SuppressWarnings("rawtypes")
479 | 				Component c = finish.getComponent(1);
480 | 				finish.setComponent(1, c.getValue(), c.getSerializer(),
481 | 						c.getComparator(),
482 | 						AbstractComposite.ComponentEquality.GREATER_THAN_EQUAL);
483 | 			}
484 | 		}
485 | 
486 | 		q.setRange(start, finish, reversed, count);
487 | 		QueryResult<ColumnSlice<DynamicComposite, ByteBuffer>> r = q.execute();
488 | 		ColumnSlice<DynamicComposite, ByteBuffer> slice = r.get();
489 | 		List<HColumn<DynamicComposite, ByteBuffer>> results = slice
490 | 				.getColumns();
491 | 
492 | 		if (results != null) {
493 | 			for (HColumn<DynamicComposite, ByteBuffer> result : results) {
494 | 				Object value = result.getName().get(1);
495 | 				logger.info("Value found: " + value);
496 | 
497 | 				IK key = result.getName().get(2, itemKeySerializer);
498 | 				if (key != null) {
499 | 					items.add(key);
500 | 				}
501 | 			}
502 | 		}
503 | 
504 | 		return items;
505 | 	}
506 | 
507 | 	/**
508 | 	 * Adds the item to collection.
509 | 	 * 
510 | 	 * @param <CK>
511 | 	 *            the container's key type
512 | 	 * @param <IK>
513 | 	 *            the item's key type
514 | 	 * @param ko
515 | 	 *            the keyspace operator
516 | 	 * @param container
517 | 	 *            the ContainerCollection (container key and collection name)
518 | 	 * @param itemKey
519 | 	 *            the item's row key
520 | 	 * @param cf
521 | 	 *            the column families to use
522 | 	 * @param containerKeySerializer
523 | 	 *            the container key serializer
524 | 	 * @param itemKeySerializer
525 | 	 *            the item key serializer
526 | 	 */
527 | 	public static <CK, IK> void addItemToCollection(Keyspace ko,
528 | 			ContainerCollection<CK> container, IK itemKey, CollectionCFSet cf,
529 | 			Serializer<IK> itemKeySerializer) {
530 | 
531 | 		createMutator(ko, se).insert(
532 | 				container.getKey(),
533 | 				cf.getItems(),
534 | 				createColumn(itemKey, HFactory.createClock(),
535 | 						itemKeySerializer, le));
536 | 
537 | 	}
538 | 
539 | 	public static <CK, IK> List<IK> getItemsInCollection(Keyspace ko,
540 | 			ContainerCollection<CK> container, CollectionCFSet cf,
541 | 			Serializer<IK> itemKeySerializer) {
542 | 		List<IK> keys = new ArrayList<IK>();
543 | 		SliceQuery<String, IK, ByteBuffer> q = createSliceQuery(ko, se,
544 | 				itemKeySerializer, be);
545 | 		q.setColumnFamily(cf.getItems());
546 | 		q.setKey(container.getKey());
547 | 		q.setRange(null, null, false, ALL_COUNT);
548 | 		QueryResult<ColumnSlice<IK, ByteBuffer>> r = q.execute();
549 | 		ColumnSlice<IK, ByteBuffer> slice = r.get();
550 | 		List<HColumn<IK, ByteBuffer>> results = slice.getColumns();
551 | 		for (HColumn<IK, ByteBuffer> column : results) {
552 | 			keys.add(column.getName());
553 | 		}
554 | 		return keys;
555 | 	}
556 | 
557 | 	@SuppressWarnings("unchecked")
558 | 	public static <T, K> T getAsType(K obj, Serializer<T> st) {
559 | 		Serializer<K> so = SerializerTypeInferer.getSerializer(obj);
560 | 		if (so == null) {
561 | 			return null;
562 | 		}
563 | 		if (so.getClass().equals(st.getClass())) {
564 | 			return (T) obj;
565 | 		}
566 | 		return st.fromByteBuffer(so.toByteBuffer(obj));
567 | 	}
568 | 
569 | 	/**
570 | 	 * CollectionCFSet contains the names of the four column families needed to
571 | 	 * implement indexed collections. Default CF names are provided, but can be
572 | 	 * anything that makes sense for the application.
573 | 	 */
574 | 	public static class CollectionCFSet {
575 | 
576 | 		private String item = DEFAULT_ITEM_CF;
577 | 		private String items = DEFAULT_COLLECTION_CF;
578 | 		private String index = DEFAULT_COLLECTION_INDEX_CF;
579 | 		private String entries = DEFAULT_ITEM_INDEX_ENTRIES;
580 | 
581 | 		public CollectionCFSet() {
582 | 		}
583 | 
584 | 		public CollectionCFSet(String item, String items, String index,
585 | 				String entries) {
586 | 			this.item = item;
587 | 			this.items = items;
588 | 			this.index = index;
589 | 			this.entries = entries;
590 | 		}
591 | 
592 | 		public String getItem() {
593 | 			return item;
594 | 		}
595 | 
596 | 		public void setItem(String item) {
597 | 			this.item = item;
598 | 		}
599 | 
600 | 		public String getItems() {
601 | 			return items;
602 | 		}
603 | 
604 | 		public void setItems(String items) {
605 | 			this.items = items;
606 | 		}
607 | 
608 | 		public String getIndex() {
609 | 			return index;
610 | 		}
611 | 
612 | 		public void setIndex(String index) {
613 | 			this.index = index;
614 | 		}
615 | 
616 | 		public String getEntries() {
617 | 			return entries;
618 | 		}
619 | 
620 | 		public void setEntries(String entries) {
621 | 			this.entries = entries;
622 | 		}
623 | 	}
624 | 
625 | 	/**
626 | 	 * ContainerCollection represents the containing entity's key and collection
627 | 	 * name. The assumption is that an entity can have multiple collections,
628 | 	 * each with their own name.
629 | 	 * 
630 | 	 * @param <CK>
631 | 	 *            the container's row key type
632 | 	 */
633 | 	public static class ContainerCollection<CK> {
634 | 		private CK ownerKey;
635 | 		private String collectionName;
636 | 
637 | 		public ContainerCollection(CK ownerKey, String collectionName) {
638 | 			this.ownerKey = ownerKey;
639 | 			this.collectionName = collectionName;
640 | 		}
641 | 
642 | 		public CK getOwnerKey() {
643 | 			return ownerKey;
644 | 		}
645 | 
646 | 		public void setOwnerKey(CK ownerKey) {
647 | 			this.ownerKey = ownerKey;
648 | 		}
649 | 
650 | 		public String getCollectionName() {
651 | 			return collectionName;
652 | 		}
653 | 
654 | 		public void setCollectionName(String collectionName) {
655 | 			this.collectionName = collectionName;
656 | 		}
657 | 
658 | 		public String getKey() {
659 | 			return ownerKey + ":" + collectionName;
660 | 		}
661 | 
662 | 		@Override
663 | 		public int hashCode() {
664 | 			final int prime = 31;
665 | 			int result = 1;
666 | 			result = prime
667 | 					* result
668 | 					+ ((collectionName == null) ? 0 : collectionName.hashCode());
669 | 			result = prime * result
670 | 					+ ((ownerKey == null) ? 0 : ownerKey.hashCode());
671 | 			return result;
672 | 		}
673 | 
674 | 		@Override
675 | 		public boolean equals(Object obj) {
676 | 			if (this == obj) {
677 | 				return true;
678 | 			}
679 | 			if (obj == null) {
680 | 				return false;
681 | 			}
682 | 			if (getClass() != obj.getClass()) {
683 | 				return false;
684 | 			}
685 | 			@SuppressWarnings("rawtypes")
686 | 			ContainerCollection other = (ContainerCollection) obj;
687 | 			if (collectionName == null) {
688 | 				if (other.collectionName != null) {
689 | 					return false;
690 | 				}
691 | 			} else if (!collectionName.equals(other.collectionName)) {
692 | 				return false;
693 | 			}
694 | 			if (ownerKey == null) {
695 | 				if (other.ownerKey != null) {
696 | 					return false;
697 | 				}
698 | 			} else if (!ownerKey.equals(other.ownerKey)) {
699 | 				return false;
700 | 			}
701 | 			return true;
702 | 		}
703 | 	}
704 | }
705 | 


--------------------------------------------------------------------------------