├── README.txt ├── pom.xml └── src ├── main └── java │ └── com │ └── jayway │ └── mongodb │ ├── AnalyzedDBObject.java │ └── MongoDatastore.java └── test └── java └── com └── jayway └── mongodb └── MongoAnalysisTest.java /README.txt: -------------------------------------------------------------------------------- 1 | Shows how to use mongo with lucene analyzers. 2 | 3 | http://blog.jayway.com/2010/11/14/full-text-search-with-mongodb-and-lucene-analyzers/ -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | 5 | com.jayway 6 | mongo-text-search 7 | 1.0-SNAPSHOT 8 | jar 9 | 10 | mongo-vs-lucene 11 | http://maven.apache.org 12 | 13 | 14 | UTF-8 15 | 16 | 17 | 18 | 19 | junit 20 | junit 21 | 4.8.1 22 | test 23 | 24 | 25 | org.mongodb 26 | mongo-java-driver 27 | 2.3 28 | 29 | 30 | org.apache.lucene 31 | lucene-core 32 | 3.0.1 33 | 34 | 35 | 36 | 37 | 38 | 39 | maven-compiler-plugin 40 | 41 | 1.5 42 | true 43 | 1.6 44 | 1.6 45 | 46 | 47 | 48 | 49 | 50 | -------------------------------------------------------------------------------- /src/main/java/com/jayway/mongodb/AnalyzedDBObject.java: -------------------------------------------------------------------------------- 1 | package com.jayway.mongodb; 2 | 3 | import java.io.IOException; 4 | import java.io.StringReader; 5 | import java.util.ArrayList; 6 | import java.util.List; 7 | 8 | import org.apache.lucene.analysis.Analyzer; 9 | import org.apache.lucene.analysis.TokenStream; 10 | import org.apache.lucene.analysis.tokenattributes.TermAttribute; 11 | 12 | import com.mongodb.BasicDBObject; 13 | 14 | public class AnalyzedDBObject extends BasicDBObject { 15 | 16 | private static final long serialVersionUID = -3375115283230844689L; 17 | 18 | public static enum Condition {ALL,IN} 19 | 20 | private Analyzer analyzer; 21 | 22 | public AnalyzedDBObject(Analyzer analyzer) { 23 | this.analyzer = analyzer; 24 | } 25 | 26 | public AnalyzedDBObject createQuery(String name,String text) throws IOException { 27 | return createQuery(name, text, Condition.ALL); 28 | } 29 | 30 | public AnalyzedDBObject createQuery(String name,String text,Condition condition) throws IOException { 31 | List tokens = tokenize(analyzer.tokenStream(name, new StringReader(text))); 32 | 33 | append(name,new BasicDBObject( 34 | String.format("$%s",condition.toString().toLowerCase()), 35 | tokens.toArray(new String[0]))); 36 | return this; 37 | } 38 | 39 | public AnalyzedDBObject appendAndAnalyzeFullText(String name, String text) 40 | throws IOException { 41 | append(name, 42 | tokenize(analyzer.tokenStream(name, new StringReader(text)))); 43 | return this; 44 | } 45 | 46 | private List tokenize(TokenStream stream) throws IOException { 47 | List tokens = new ArrayList(); 48 | TermAttribute term = (TermAttribute) stream 49 | .addAttribute(TermAttribute.class); 50 | while (stream.incrementToken()) { 51 | // Not sure if we somehow can use termBuffer() to get a char[] 52 | // so we do no have to create a new String for each term 53 | tokens.add(term.term()); 54 | } 55 | return tokens; 56 | } 57 | 58 | } 59 | -------------------------------------------------------------------------------- /src/main/java/com/jayway/mongodb/MongoDatastore.java: -------------------------------------------------------------------------------- 1 | package com.jayway.mongodb; 2 | 3 | import java.net.UnknownHostException; 4 | import java.util.ArrayList; 5 | import java.util.List; 6 | 7 | import com.mongodb.BasicDBObject; 8 | import com.mongodb.DB; 9 | import com.mongodb.DBCollection; 10 | import com.mongodb.DBCursor; 11 | import com.mongodb.DBObject; 12 | import com.mongodb.Mongo; 13 | import com.mongodb.MongoException; 14 | 15 | public class MongoDatastore { 16 | 17 | DB db; 18 | 19 | public MongoDatastore() throws UnknownHostException, MongoException { 20 | Mongo m = new Mongo( "localhost" , 27017 ); 21 | db = m.getDB( "mongotextsearch" ); 22 | } 23 | 24 | private DBCollection getCollection(String name) { 25 | return db.getCollection(name); 26 | } 27 | 28 | public void dropDatabase() { 29 | db.dropDatabase(); 30 | } 31 | 32 | public void ensureIndex(String col, String field) { 33 | getCollection(col).createIndex(new BasicDBObject(field, new Integer(1))); 34 | } 35 | 36 | public void save(DBObject object,String col) { 37 | getCollection(col).save(object); 38 | } 39 | 40 | public DBObject get(DBObject query,String col) { 41 | return getCollection(col).findOne(query); 42 | } 43 | 44 | public List find(DBObject query,String col) { 45 | 46 | DBCursor cursor = getCollection(col).find(query); 47 | 48 | List dbObjects = new ArrayList(); 49 | while(cursor.hasNext()) { 50 | dbObjects.add(cursor.next()); 51 | } 52 | return dbObjects; 53 | } 54 | 55 | public long cnt(String col) { 56 | return getCollection(col).count(); 57 | } 58 | 59 | } 60 | -------------------------------------------------------------------------------- /src/test/java/com/jayway/mongodb/MongoAnalysisTest.java: -------------------------------------------------------------------------------- 1 | package com.jayway.mongodb; 2 | 3 | import static junit.framework.Assert.assertEquals; 4 | import static junit.framework.Assert.assertNull; 5 | 6 | import java.io.IOException; 7 | import java.net.UnknownHostException; 8 | 9 | import org.apache.lucene.analysis.Analyzer; 10 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 11 | import org.apache.lucene.util.Version; 12 | import org.junit.BeforeClass; 13 | import org.junit.Test; 14 | 15 | import com.jayway.mongodb.AnalyzedDBObject.Condition; 16 | import com.mongodb.DBObject; 17 | import com.mongodb.MongoException; 18 | 19 | public class MongoAnalysisTest { 20 | 21 | 22 | static final String TEXT = "I Would like to use mongodb for full text search"; 23 | static final String COLLECTION_NAME = "article"; 24 | static final String INDEXED_FIELD = "indtext"; 25 | static final String TEXT_FIELD = "text"; 26 | static MongoDatastore mongo; 27 | 28 | static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30); 29 | 30 | @BeforeClass 31 | public static void setup() throws UnknownHostException, MongoException { 32 | mongo = new MongoDatastore(); 33 | mongo.dropDatabase(); 34 | } 35 | 36 | @Test 37 | public void verifySaveAndGetTestSearchWithAll() throws IOException { 38 | 39 | mongo.save( 40 | new AnalyzedDBObject(analyzer) 41 | .appendAndAnalyzeFullText(INDEXED_FIELD, TEXT) 42 | .append(TEXT_FIELD, TEXT), 43 | COLLECTION_NAME); 44 | 45 | DBObject result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,TEXT),COLLECTION_NAME); 46 | assertEquals(TEXT, result.get(TEXT_FIELD)); 47 | 48 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDB sEarch woulD TO"),COLLECTION_NAME); 49 | assertEquals(TEXT, result.get(TEXT_FIELD)); 50 | 51 | // In this query, only "search" matches 52 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDBs wouldd search "),COLLECTION_NAME); 53 | assertNull(result); 54 | } 55 | 56 | @Test 57 | public void verifySaveAndGetTestSearchWithAtLeastOne() throws IOException { 58 | 59 | mongo.save( 60 | new AnalyzedDBObject(analyzer) 61 | .appendAndAnalyzeFullText(INDEXED_FIELD, TEXT) 62 | .append(TEXT_FIELD, TEXT), 63 | COLLECTION_NAME); 64 | 65 | DBObject result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,TEXT,Condition.IN),COLLECTION_NAME); 66 | assertEquals(TEXT, result.get(TEXT_FIELD)); 67 | 68 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDB sEarch woulD",Condition.IN),COLLECTION_NAME); 69 | assertEquals(TEXT, result.get(TEXT_FIELD)); 70 | 71 | // In this query, only "search" matches 72 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDBs wouldd search ",Condition.IN),COLLECTION_NAME); 73 | assertEquals(TEXT, result.get(TEXT_FIELD)); 74 | } 75 | } 76 | --------------------------------------------------------------------------------