├── .gitignore
├── README.md
└── lucene-s3-searcher
├── pom.xml
└── src
└── com
└── printlele
└── SearchFiles.java
/.gitignore:
--------------------------------------------------------------------------------
1 | .project
2 | .classpath
3 | org.eclipse.jdt.core.prefs
4 | lucene-s3-searcher/.settings
5 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # aws-lambda-s3-index-search
2 | This will allow you to maintain your Lucene index in S3 and run your Lucene search across S3 bucket.
3 |
4 | The idea here is to make complete search architecture serverless by keeping the Lucene index in S3 which provides us with unlimited cheap storage and leveraging AWS Lambda serverless platform to also keep the request cheap and scalable.
5 |
6 | Though it is not truly scalable yet, as first time initialization of IndexReader takes time to load. Hence to keep reader hot by keeping its initialization in Constructor. This makes the first run slower but all the subsequent runs can attain ms.
7 |
8 | As lucene interfaces with files over java nio2, this implementation is made possible using com.upplication.s3fs package which provides nio2 implementation on the top of a S3 file system.
9 |
10 | Future State
11 | - Indexing directly to S3 making both way serverless: We are unable to do it yet as S3 do not allow Atomic Move of files that is required by Lucene.
12 | - Keeping the IndexReader object somewhere in cache to allow fast load on first run
13 |
14 |
15 |
16 |
17 |
--------------------------------------------------------------------------------
/lucene-s3-searcher/pom.xml:
--------------------------------------------------------------------------------
1 |
3 | 4.0.0
4 | lucene-s3-searcher
5 | lucene-s3-searcher
6 | 0.0.1-SNAPSHOT
7 |
8 | src
9 |
10 |
11 | maven-compiler-plugin
12 | 3.5.1
13 |
14 | 1.8
15 | 1.8
16 |
17 |
18 |
19 |
20 | maven-assembly-plugin
21 |
22 |
23 | package
24 |
25 | single
26 |
27 |
28 |
29 |
30 |
31 | jar-with-dependencies
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 | org.apache.lucene
44 | lucene-core
45 | 7.3.0
46 |
47 |
48 |
49 | org.apache.lucene
50 | lucene-queryparser
51 | 7.3.0
52 |
53 |
54 | com.upplication
55 | s3fs
56 | 2.2.1
57 |
58 |
59 |
60 | com.amazonaws
61 | aws-lambda-java-core
62 | 1.2.0
63 |
64 |
65 |
66 |
67 |
68 |
--------------------------------------------------------------------------------
/lucene-s3-searcher/src/com/printlele/SearchFiles.java:
--------------------------------------------------------------------------------
1 | package com.printlele;
2 |
3 | import java.io.IOException;
4 | import java.net.URI;
5 | import java.nio.file.Path;
6 | import java.util.ArrayList;
7 | import java.util.HashMap;
8 | import java.util.List;
9 | import java.util.Map;
10 |
11 | import org.apache.lucene.analysis.Analyzer;
12 | import org.apache.lucene.analysis.standard.StandardAnalyzer;
13 | import org.apache.lucene.document.Document;
14 | import org.apache.lucene.index.DirectoryReader;
15 | import org.apache.lucene.index.IndexReader;
16 | import org.apache.lucene.queryparser.classic.ParseException;
17 | import org.apache.lucene.queryparser.classic.QueryParser;
18 | import org.apache.lucene.search.IndexSearcher;
19 | import org.apache.lucene.search.Query;
20 | import org.apache.lucene.search.TopDocs;
21 | import org.apache.lucene.store.FSDirectory;
22 |
23 | import com.amazonaws.services.lambda.runtime.Context;
24 | import com.amazonaws.services.lambda.runtime.RequestHandler;
25 | import com.upplication.s3fs.S3FileSystemProvider;
26 |
27 | /** Simple command-line based search demo. */
28 | public class SearchFiles implements RequestHandler> {
29 |
30 | IndexReader reader;
31 |
32 | public SearchFiles() {
33 | String index = "///";
34 |
35 | //########### OPTIONAL #############################
36 | // You can directly assign a role to your lambda function
37 | Map env = new HashMap<>();
38 | env.put(com.upplication.s3fs.AmazonS3Factory.ACCESS_KEY, "ACCESS_KEY");
39 | env.put(com.upplication.s3fs.AmazonS3Factory.SECRET_KEY, "SECRET_KEY");
40 | String endpoint = "s3://s3.amazonaws.com/";
41 |
42 | Path path = new S3FileSystemProvider().newFileSystem(URI.create(endpoint), env).getPath(index);
43 |
44 | try {
45 | reader = DirectoryReader.open(FSDirectory.open(path));
46 | } catch (IOException e) {
47 | e.printStackTrace();
48 | }
49 | }
50 |
51 | @Override
52 | protected void finalize() throws Throwable {
53 | reader.close();
54 | }
55 |
56 | @Override
57 | public List handleRequest(String searchTerm, Context context) {
58 |
59 | context.getLogger().log("Input: " + searchTerm);
60 |
61 | String field = "FIELD_TO_SEARCH";
62 | IndexSearcher searcher = new IndexSearcher(reader);
63 | Analyzer analyzer = new StandardAnalyzer();
64 | QueryParser parser = new QueryParser(field, analyzer);
65 |
66 | List list = new ArrayList<>();
67 |
68 | try {
69 | Query query = parser.parse(searchTerm);
70 | TopDocs results = searcher.search(query, 100);
71 |
72 | for (int i = 0; i < results.scoreDocs.length; i++) {
73 | int docId = results.scoreDocs[i].doc;
74 | Document d = searcher.doc(docId);
75 | list.add((i + 1) + ". " + d.get("path"));
76 | }
77 |
78 | } catch (IOException | ParseException e) {
79 | // TODO Auto-generated catch block
80 | e.printStackTrace();
81 | }
82 |
83 | return list;
84 | }
85 |
86 | public static void main(String[] args) {
87 | System.out.println(new SearchFiles().handleRequest("testString", null));
88 | }
89 |
90 | }
91 |
--------------------------------------------------------------------------------