├── .gitignore ├── README.md └── lucene-s3-searcher ├── pom.xml └── src └── com └── printlele └── SearchFiles.java /.gitignore: -------------------------------------------------------------------------------- 1 | .project 2 | .classpath 3 | org.eclipse.jdt.core.prefs 4 | lucene-s3-searcher/.settings 5 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # aws-lambda-s3-index-search 2 | This will allow you to maintain your Lucene index in S3 and run your Lucene search across S3 bucket. 3 | 4 | The idea here is to make complete search architecture serverless by keeping the Lucene index in S3 which provides us with unlimited cheap storage and leveraging AWS Lambda serverless platform to also keep the request cheap and scalable. 5 | 6 | Though it is not truly scalable yet, as first time initialization of IndexReader takes time to load. Hence to keep reader hot by keeping its initialization in Constructor. This makes the first run slower but all the subsequent runs can attain ms. 7 | 8 | As lucene interfaces with files over java nio2, this implementation is made possible using com.upplication.s3fs package which provides nio2 implementation on the top of a S3 file system. 9 | 10 | Future State 11 | - Indexing directly to S3 making both way serverless: We are unable to do it yet as S3 do not allow Atomic Move of files that is required by Lucene. 12 | - Keeping the IndexReader object somewhere in cache to allow fast load on first run 13 | 14 | 15 | 16 | 17 | -------------------------------------------------------------------------------- /lucene-s3-searcher/pom.xml: -------------------------------------------------------------------------------- 1 | 3 | 4.0.0 4 | lucene-s3-searcher 5 | lucene-s3-searcher 6 | 0.0.1-SNAPSHOT 7 | 8 | src 9 | 10 | 11 | maven-compiler-plugin 12 | 3.5.1 13 | 14 | 1.8 15 | 1.8 16 | 17 | 18 | 19 | 20 | maven-assembly-plugin 21 | 22 | 23 | package 24 | 25 | single 26 | 27 | 28 | 29 | 30 | 31 | jar-with-dependencies 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | org.apache.lucene 44 | lucene-core 45 | 7.3.0 46 | 47 | 48 | 49 | org.apache.lucene 50 | lucene-queryparser 51 | 7.3.0 52 | 53 | 54 | com.upplication 55 | s3fs 56 | 2.2.1 57 | 58 | 59 | 60 | com.amazonaws 61 | aws-lambda-java-core 62 | 1.2.0 63 | 64 | 65 | 66 | 67 | 68 | -------------------------------------------------------------------------------- /lucene-s3-searcher/src/com/printlele/SearchFiles.java: -------------------------------------------------------------------------------- 1 | package com.printlele; 2 | 3 | import java.io.IOException; 4 | import java.net.URI; 5 | import java.nio.file.Path; 6 | import java.util.ArrayList; 7 | import java.util.HashMap; 8 | import java.util.List; 9 | import java.util.Map; 10 | 11 | import org.apache.lucene.analysis.Analyzer; 12 | import org.apache.lucene.analysis.standard.StandardAnalyzer; 13 | import org.apache.lucene.document.Document; 14 | import org.apache.lucene.index.DirectoryReader; 15 | import org.apache.lucene.index.IndexReader; 16 | import org.apache.lucene.queryparser.classic.ParseException; 17 | import org.apache.lucene.queryparser.classic.QueryParser; 18 | import org.apache.lucene.search.IndexSearcher; 19 | import org.apache.lucene.search.Query; 20 | import org.apache.lucene.search.TopDocs; 21 | import org.apache.lucene.store.FSDirectory; 22 | 23 | import com.amazonaws.services.lambda.runtime.Context; 24 | import com.amazonaws.services.lambda.runtime.RequestHandler; 25 | import com.upplication.s3fs.S3FileSystemProvider; 26 | 27 | /** Simple command-line based search demo. */ 28 | public class SearchFiles implements RequestHandler> { 29 | 30 | IndexReader reader; 31 | 32 | public SearchFiles() { 33 | String index = "///"; 34 | 35 | //########### OPTIONAL ############################# 36 | // You can directly assign a role to your lambda function 37 | Map env = new HashMap<>(); 38 | env.put(com.upplication.s3fs.AmazonS3Factory.ACCESS_KEY, "ACCESS_KEY"); 39 | env.put(com.upplication.s3fs.AmazonS3Factory.SECRET_KEY, "SECRET_KEY"); 40 | String endpoint = "s3://s3.amazonaws.com/"; 41 | 42 | Path path = new S3FileSystemProvider().newFileSystem(URI.create(endpoint), env).getPath(index); 43 | 44 | try { 45 | reader = DirectoryReader.open(FSDirectory.open(path)); 46 | } catch (IOException e) { 47 | e.printStackTrace(); 48 | } 49 | } 50 | 51 | @Override 52 | protected void finalize() throws Throwable { 53 | reader.close(); 54 | } 55 | 56 | @Override 57 | public List handleRequest(String searchTerm, Context context) { 58 | 59 | context.getLogger().log("Input: " + searchTerm); 60 | 61 | String field = "FIELD_TO_SEARCH"; 62 | IndexSearcher searcher = new IndexSearcher(reader); 63 | Analyzer analyzer = new StandardAnalyzer(); 64 | QueryParser parser = new QueryParser(field, analyzer); 65 | 66 | List list = new ArrayList<>(); 67 | 68 | try { 69 | Query query = parser.parse(searchTerm); 70 | TopDocs results = searcher.search(query, 100); 71 | 72 | for (int i = 0; i < results.scoreDocs.length; i++) { 73 | int docId = results.scoreDocs[i].doc; 74 | Document d = searcher.doc(docId); 75 | list.add((i + 1) + ". " + d.get("path")); 76 | } 77 | 78 | } catch (IOException | ParseException e) { 79 | // TODO Auto-generated catch block 80 | e.printStackTrace(); 81 | } 82 | 83 | return list; 84 | } 85 | 86 | public static void main(String[] args) { 87 | System.out.println(new SearchFiles().handleRequest("testString", null)); 88 | } 89 | 90 | } 91 | --------------------------------------------------------------------------------