├── README.txt
├── pom.xml
└── src
├── main
└── java
│ └── com
│ └── jayway
│ └── mongodb
│ ├── AnalyzedDBObject.java
│ └── MongoDatastore.java
└── test
└── java
└── com
└── jayway
└── mongodb
└── MongoAnalysisTest.java
/README.txt:
--------------------------------------------------------------------------------
1 | Shows how to use mongo with lucene analyzers.
2 |
3 | http://blog.jayway.com/2010/11/14/full-text-search-with-mongodb-and-lucene-analyzers/
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 |
5 | com.jayway
6 | mongo-text-search
7 | 1.0-SNAPSHOT
8 | jar
9 |
10 | mongo-vs-lucene
11 | http://maven.apache.org
12 |
13 |
14 | UTF-8
15 |
16 |
17 |
18 |
19 | junit
20 | junit
21 | 4.8.1
22 | test
23 |
24 |
25 | org.mongodb
26 | mongo-java-driver
27 | 2.3
28 |
29 |
30 | org.apache.lucene
31 | lucene-core
32 | 3.0.1
33 |
34 |
35 |
36 |
37 |
38 |
39 | maven-compiler-plugin
40 |
41 | 1.5
42 | true
43 | 1.6
44 | 1.6
45 |
46 |
47 |
48 |
49 |
50 |
--------------------------------------------------------------------------------
/src/main/java/com/jayway/mongodb/AnalyzedDBObject.java:
--------------------------------------------------------------------------------
1 | package com.jayway.mongodb;
2 |
3 | import java.io.IOException;
4 | import java.io.StringReader;
5 | import java.util.ArrayList;
6 | import java.util.List;
7 |
8 | import org.apache.lucene.analysis.Analyzer;
9 | import org.apache.lucene.analysis.TokenStream;
10 | import org.apache.lucene.analysis.tokenattributes.TermAttribute;
11 |
12 | import com.mongodb.BasicDBObject;
13 |
14 | public class AnalyzedDBObject extends BasicDBObject {
15 |
16 | private static final long serialVersionUID = -3375115283230844689L;
17 |
18 | public static enum Condition {ALL,IN}
19 |
20 | private Analyzer analyzer;
21 |
22 | public AnalyzedDBObject(Analyzer analyzer) {
23 | this.analyzer = analyzer;
24 | }
25 |
26 | public AnalyzedDBObject createQuery(String name,String text) throws IOException {
27 | return createQuery(name, text, Condition.ALL);
28 | }
29 |
30 | public AnalyzedDBObject createQuery(String name,String text,Condition condition) throws IOException {
31 | List tokens = tokenize(analyzer.tokenStream(name, new StringReader(text)));
32 |
33 | append(name,new BasicDBObject(
34 | String.format("$%s",condition.toString().toLowerCase()),
35 | tokens.toArray(new String[0])));
36 | return this;
37 | }
38 |
39 | public AnalyzedDBObject appendAndAnalyzeFullText(String name, String text)
40 | throws IOException {
41 | append(name,
42 | tokenize(analyzer.tokenStream(name, new StringReader(text))));
43 | return this;
44 | }
45 |
46 | private List tokenize(TokenStream stream) throws IOException {
47 | List tokens = new ArrayList();
48 | TermAttribute term = (TermAttribute) stream
49 | .addAttribute(TermAttribute.class);
50 | while (stream.incrementToken()) {
51 | // Not sure if we somehow can use termBuffer() to get a char[]
52 | // so we do no have to create a new String for each term
53 | tokens.add(term.term());
54 | }
55 | return tokens;
56 | }
57 |
58 | }
59 |
--------------------------------------------------------------------------------
/src/main/java/com/jayway/mongodb/MongoDatastore.java:
--------------------------------------------------------------------------------
1 | package com.jayway.mongodb;
2 |
3 | import java.net.UnknownHostException;
4 | import java.util.ArrayList;
5 | import java.util.List;
6 |
7 | import com.mongodb.BasicDBObject;
8 | import com.mongodb.DB;
9 | import com.mongodb.DBCollection;
10 | import com.mongodb.DBCursor;
11 | import com.mongodb.DBObject;
12 | import com.mongodb.Mongo;
13 | import com.mongodb.MongoException;
14 |
15 | public class MongoDatastore {
16 |
17 | DB db;
18 |
19 | public MongoDatastore() throws UnknownHostException, MongoException {
20 | Mongo m = new Mongo( "localhost" , 27017 );
21 | db = m.getDB( "mongotextsearch" );
22 | }
23 |
24 | private DBCollection getCollection(String name) {
25 | return db.getCollection(name);
26 | }
27 |
28 | public void dropDatabase() {
29 | db.dropDatabase();
30 | }
31 |
32 | public void ensureIndex(String col, String field) {
33 | getCollection(col).createIndex(new BasicDBObject(field, new Integer(1)));
34 | }
35 |
36 | public void save(DBObject object,String col) {
37 | getCollection(col).save(object);
38 | }
39 |
40 | public DBObject get(DBObject query,String col) {
41 | return getCollection(col).findOne(query);
42 | }
43 |
44 | public List find(DBObject query,String col) {
45 |
46 | DBCursor cursor = getCollection(col).find(query);
47 |
48 | List dbObjects = new ArrayList();
49 | while(cursor.hasNext()) {
50 | dbObjects.add(cursor.next());
51 | }
52 | return dbObjects;
53 | }
54 |
55 | public long cnt(String col) {
56 | return getCollection(col).count();
57 | }
58 |
59 | }
60 |
--------------------------------------------------------------------------------
/src/test/java/com/jayway/mongodb/MongoAnalysisTest.java:
--------------------------------------------------------------------------------
1 | package com.jayway.mongodb;
2 |
3 | import static junit.framework.Assert.assertEquals;
4 | import static junit.framework.Assert.assertNull;
5 |
6 | import java.io.IOException;
7 | import java.net.UnknownHostException;
8 |
9 | import org.apache.lucene.analysis.Analyzer;
10 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
11 | import org.apache.lucene.util.Version;
12 | import org.junit.BeforeClass;
13 | import org.junit.Test;
14 |
15 | import com.jayway.mongodb.AnalyzedDBObject.Condition;
16 | import com.mongodb.DBObject;
17 | import com.mongodb.MongoException;
18 |
19 | public class MongoAnalysisTest {
20 |
21 |
22 | static final String TEXT = "I Would like to use mongodb for full text search";
23 | static final String COLLECTION_NAME = "article";
24 | static final String INDEXED_FIELD = "indtext";
25 | static final String TEXT_FIELD = "text";
26 | static MongoDatastore mongo;
27 |
28 | static Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
29 |
30 | @BeforeClass
31 | public static void setup() throws UnknownHostException, MongoException {
32 | mongo = new MongoDatastore();
33 | mongo.dropDatabase();
34 | }
35 |
36 | @Test
37 | public void verifySaveAndGetTestSearchWithAll() throws IOException {
38 |
39 | mongo.save(
40 | new AnalyzedDBObject(analyzer)
41 | .appendAndAnalyzeFullText(INDEXED_FIELD, TEXT)
42 | .append(TEXT_FIELD, TEXT),
43 | COLLECTION_NAME);
44 |
45 | DBObject result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,TEXT),COLLECTION_NAME);
46 | assertEquals(TEXT, result.get(TEXT_FIELD));
47 |
48 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDB sEarch woulD TO"),COLLECTION_NAME);
49 | assertEquals(TEXT, result.get(TEXT_FIELD));
50 |
51 | // In this query, only "search" matches
52 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDBs wouldd search "),COLLECTION_NAME);
53 | assertNull(result);
54 | }
55 |
56 | @Test
57 | public void verifySaveAndGetTestSearchWithAtLeastOne() throws IOException {
58 |
59 | mongo.save(
60 | new AnalyzedDBObject(analyzer)
61 | .appendAndAnalyzeFullText(INDEXED_FIELD, TEXT)
62 | .append(TEXT_FIELD, TEXT),
63 | COLLECTION_NAME);
64 |
65 | DBObject result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,TEXT,Condition.IN),COLLECTION_NAME);
66 | assertEquals(TEXT, result.get(TEXT_FIELD));
67 |
68 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDB sEarch woulD",Condition.IN),COLLECTION_NAME);
69 | assertEquals(TEXT, result.get(TEXT_FIELD));
70 |
71 | // In this query, only "search" matches
72 | result = mongo.get(new AnalyzedDBObject(analyzer).createQuery(INDEXED_FIELD,"MonGoDBs wouldd search ",Condition.IN),COLLECTION_NAME);
73 | assertEquals(TEXT, result.get(TEXT_FIELD));
74 | }
75 | }
76 |
--------------------------------------------------------------------------------