├── .gitignore ├── .JuliaFormatter.toml ├── search.sh ├── Project.toml ├── search.py ├── app ├── go.mod ├── main.go ├── go.sum └── templates │ └── index.html ├── embed.py ├── README.md ├── rag.jl └── Manifest.toml /.gitignore: -------------------------------------------------------------------------------- 1 | bindata 2 | databases 3 | -------------------------------------------------------------------------------- /.JuliaFormatter.toml: -------------------------------------------------------------------------------- 1 | indent=2 2 | always_for_in=true 3 | margin=80 4 | -------------------------------------------------------------------------------- /search.sh: -------------------------------------------------------------------------------- 1 | curl -X POST \ 2 | http://0.0.0.0:8003/find_matches \ 3 | -H 'Content-Type: application/json' \ 4 | -d '{ 5 | "query": "What is the role of GLP-1 and GLP-1 agonists in losing excess weight?", 6 | "k": 5 7 | }' 8 | -------------------------------------------------------------------------------- /Project.toml: -------------------------------------------------------------------------------- 1 | [deps] 2 | DBInterface = "a10d1c49-ce27-4219-8d33-6db1a4562965" 3 | DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" 4 | HTTP = "cd3eb016-35fb-5094-929b-558a96fad6f3" 5 | JSON = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 6 | JSON3 = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" 7 | SQLite = "0aa819cd-b072-5ff4-a722-6bc24af294d9" 8 | StaticArrays = "90137ffa-7385-5640-81b9-e52037218182" 9 | StructTypes = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" 10 | -------------------------------------------------------------------------------- /search.py: -------------------------------------------------------------------------------- 1 | import requests 2 | 3 | 4 | def find_matches(text, k=5): 5 | # rag.jl endpoint 6 | url = "http://localhost:8003/find_matches" 7 | payload = {"query": text, "k": k} 8 | response = requests.post(url, json=payload) 9 | if response.status_code == 200: 10 | return response.json() 11 | else: 12 | raise Exception(f"Error from RAG service: {response.text}") 13 | 14 | 15 | if __name__ == "__main__": 16 | text = "What is the role of GLP-1 and GLP-1 agonists in losing excess weight?" 17 | # text = "What are the biologies of TEAD?" 18 | 19 | matches = find_matches(text) 20 | 21 | for match in matches: 22 | print(f"ID: {match['pmid']}, Distance: {match['distance']}") 23 | print(f"Title: {match['title']}") 24 | print(f"Authors: {match['authors']}") 25 | print(f"Publication Year: {match['publication_year']}") 26 | print(f"Abstract: {match['abstract'][:200]}...") 27 | print() 28 | -------------------------------------------------------------------------------- /app/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/domluna/pubmedFastRAG/app 2 | 3 | go 1.22.0 4 | 5 | require github.com/gin-gonic/gin v1.10.0 6 | 7 | require ( 8 | github.com/bytedance/sonic v1.11.6 // indirect 9 | github.com/bytedance/sonic/loader v0.1.1 // indirect 10 | github.com/cloudwego/base64x v0.1.4 // indirect 11 | github.com/cloudwego/iasm v0.2.0 // indirect 12 | github.com/gabriel-vasile/mimetype v1.4.3 // indirect 13 | github.com/gin-contrib/sse v0.1.0 // indirect 14 | github.com/go-playground/locales v0.14.1 // indirect 15 | github.com/go-playground/universal-translator v0.18.1 // indirect 16 | github.com/go-playground/validator/v10 v10.20.0 // indirect 17 | github.com/goccy/go-json v0.10.2 // indirect 18 | github.com/json-iterator/go v1.1.12 // indirect 19 | github.com/klauspost/cpuid/v2 v2.2.7 // indirect 20 | github.com/leodido/go-urn v1.4.0 // indirect 21 | github.com/mattn/go-isatty v0.0.20 // indirect 22 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect 23 | github.com/modern-go/reflect2 v1.0.2 // indirect 24 | github.com/pelletier/go-toml/v2 v2.2.2 // indirect 25 | github.com/twitchyliquid64/golang-asm v0.15.1 // indirect 26 | github.com/ugorji/go/codec v1.2.12 // indirect 27 | golang.org/x/arch v0.8.0 // indirect 28 | golang.org/x/crypto v0.23.0 // indirect 29 | golang.org/x/net v0.25.0 // indirect 30 | golang.org/x/sys v0.20.0 // indirect 31 | golang.org/x/text v0.15.0 // indirect 32 | google.golang.org/protobuf v1.34.1 // indirect 33 | gopkg.in/yaml.v3 v3.0.1 // indirect 34 | ) 35 | -------------------------------------------------------------------------------- /app/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "bytes" 5 | "encoding/json" 6 | "io" 7 | "log" 8 | "net/http" 9 | 10 | "github.com/gin-gonic/gin" 11 | ) 12 | 13 | type RAGRequest struct { 14 | Query string `json:"query"` 15 | K int `json:"k"` 16 | } 17 | 18 | type RAGResponse struct { 19 | PMID int `json:"pmid"` 20 | Distance int `json:"distance"` 21 | Authors string `json:"authors"` 22 | Title string `json:"title"` 23 | Abstract string `json:"abstract"` 24 | PublicationYear int `json:"publication_year"` 25 | } 26 | 27 | func handleHome(c *gin.Context) { 28 | c.HTML(http.StatusOK, "index.html", nil) 29 | } 30 | 31 | // body is RAGRequest 32 | func handleSearch(c *gin.Context) { 33 | var req RAGRequest 34 | if err := c.ShouldBindJSON(&req); err != nil { 35 | log.Println(err) 36 | c.JSON(http.StatusBadRequest, gin.H{"error": "Invalid request body"}) 37 | return 38 | } 39 | if req.K > 100 { 40 | log.Printf("Request for k = %d truncated to 100", req.K) 41 | req.K = 100 42 | } 43 | 44 | body, err := json.Marshal(req) 45 | if err != nil { 46 | log.Println(err) 47 | c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to marshal request"}) 48 | return 49 | } 50 | 51 | apiResp, err := http.Post("http://0.0.0.0:8003/find_matches", "application/json", bytes.NewBuffer(body)) 52 | if err != nil { 53 | log.Println(err) 54 | c.JSON(http.StatusInternalServerError, gin.H{"error": err.Error()}) 55 | return 56 | } 57 | defer apiResp.Body.Close() 58 | 59 | respBody, err := io.ReadAll(apiResp.Body) 60 | if err != nil { 61 | log.Println(err) 62 | c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to read response"}) 63 | return 64 | } 65 | 66 | var data []RAGResponse 67 | if err := json.Unmarshal(respBody, &data); err != nil { 68 | log.Println(err) 69 | c.JSON(http.StatusInternalServerError, gin.H{"error": "Failed to parse response"}) 70 | return 71 | } 72 | 73 | c.JSON(http.StatusOK, data) 74 | } 75 | 76 | func main() { 77 | r := gin.Default() 78 | r.LoadHTMLGlob("templates/*") 79 | r.GET("/", handleHome) 80 | r.POST("/search", handleSearch) 81 | log.Fatal(r.Run(":8080")) 82 | } 83 | -------------------------------------------------------------------------------- /embed.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, HTTPException 2 | from pydantic import BaseModel 3 | import numpy as np 4 | import torch 5 | from transformers import AutoTokenizer, AutoModel 6 | import torch.nn.functional as F 7 | import time 8 | import uvicorn 9 | import fire 10 | 11 | MATRYOSHKA_DIM = 512 12 | 13 | app = FastAPI() 14 | 15 | tokenizer = None 16 | model = None 17 | 18 | 19 | class TextInput(BaseModel): 20 | text: str 21 | 22 | 23 | def mean_pooling(model_output, attention_mask): 24 | token_embeddings = model_output[0] 25 | input_mask_expanded = ( 26 | attention_mask.unsqueeze(-1).expand(token_embeddings.size()).float() 27 | ) 28 | return torch.sum(token_embeddings * input_mask_expanded, 1) / torch.clamp( 29 | input_mask_expanded.sum(1), min=1e-9 30 | ) 31 | 32 | 33 | def embed_text(text: str) -> (np.ndarray, dict): 34 | global tokenizer, model 35 | timings = {} 36 | 37 | start_time = time.time() 38 | 39 | with torch.no_grad(): 40 | tokenize_start = time.time() 41 | inputs = tokenizer( 42 | text, 43 | return_tensors="pt", 44 | ).to(model.device) 45 | tokenize_end = time.time() 46 | timings["tokenization"] = tokenize_end - tokenize_start 47 | 48 | model_start = time.time() 49 | outputs = model(**inputs) 50 | model_end = time.time() 51 | timings["model_inference"] = model_end - model_start 52 | 53 | process_start = time.time() 54 | 55 | embeddings = mean_pooling(outputs, inputs["attention_mask"]) 56 | embeddings = F.layer_norm(embeddings, normalized_shape=(embeddings.shape[1],)) 57 | embeddings = embeddings[:, :MATRYOSHKA_DIM] 58 | embeddings = F.normalize(embeddings, p=2, dim=1).cpu().numpy().reshape(-1) 59 | 60 | process_end = time.time() 61 | timings["post_processing"] = process_end - process_start 62 | 63 | quantize_start = time.time() 64 | quantized_embeddings = np.packbits(embeddings > 0) 65 | quantize_end = time.time() 66 | timings["quantization"] = quantize_end - quantize_start 67 | 68 | total_time = time.time() - start_time 69 | timings["total"] = total_time 70 | 71 | return embeddings, quantized_embeddings, timings 72 | 73 | 74 | @app.post("/embed") 75 | async def embed(input: TextInput): 76 | try: 77 | embedding, binary_embeddings, timings = embed_text(input.text) 78 | print(timings) 79 | 80 | return { 81 | "embedding": embedding.tolist(), 82 | "binary_embedding": binary_embeddings.tolist(), 83 | } 84 | except Exception as e: 85 | raise HTTPException(status_code=500, detail=str(e)) 86 | 87 | 88 | def main(port: int = 8002, device: str = "cpu"): 89 | global tokenizer, model 90 | 91 | # Set the device 92 | device = torch.device( 93 | device if torch.cuda.is_available() and device == "cuda" else "cpu" 94 | ) 95 | print(f"Using device: {device}") 96 | 97 | # Load the tokenizer and model 98 | tokenizer = AutoTokenizer.from_pretrained( 99 | "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True 100 | ) 101 | model = AutoModel.from_pretrained( 102 | "nomic-ai/nomic-embed-text-v1.5", trust_remote_code=True 103 | ).to(device) 104 | model.eval() 105 | 106 | uvicorn.run(app, host="0.0.0.0", port=port) 107 | 108 | 109 | if __name__ == "__main__": 110 | fire.Fire(main) 111 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # pubmedFastRAG 2 | 3 | > TLDR: Using binary RAG search, we can perform exact searches on PubMed in approximately 100ms end-to-end. 4 | 5 | This builds off of https://github.com/kyunghyuncho/pubmed-vectors by leveraging extremely fast binary exact search. 6 | 7 | The original embeddings, which were ~110GB, are compressed to 512 dimensions instead of 768 through [MRL](https://blog.nomic.ai/posts/nomic-embed-matryoshka). The 512 float32 values are then quantized to binary, resulting in 64 uint8 values. These values are then reinterpreted as 8 uint64 values which can be compared against each other with a single SIMD operation. 8 | 9 | - `rag.jl` - This file contains the `RAGServer` which exposes an endpoint @ '0.0.0.0:8003/find_matches'. This endpoint accepts a POST request with a query (str) and k (int). `k` is the number of most relevant results to return. 10 | - `embed.py` - This file contains a server that takes a query and returns a list of 64 uint8 values. This is called internally by `RAGServer`. 11 | 12 | ## Get Started 13 | 14 | ```sh 15 | python embed.py 16 | 17 | INFO: Started server process [888699] 18 | INFO: Waiting for application startup. 19 | INFO: Application startup complete. 20 | INFO: Uvicorn running on http://0.0.0.0:8002 (Press CTRL+C to quit) 21 | ``` 22 | 23 | 24 | ```sh 25 | # start a julia REPL with max threads 26 | j --project=. -t auto 27 | ``` 28 | 29 | ```julia 30 | include("rag.jl") 31 | # You need to create the database using the original repo linked above. It will be ~34GB. 32 | rag = RAGServer("databases/pubmed_data.db") 33 | start_server(rag) 34 | ``` 35 | 36 | You need the binary data for RAG. If you're interested in them, here's the [Google Drive link](https://drive.google.com/file/d/1LuCaUcILQuQgkDm3_tWBWr4X7AQ518kX/view?usp=sharing). 37 | 38 | ```sh 39 | λ ~/code/pubmedRAG: ls -lh bindata/ 40 | total 2.5G 41 | -rw-rw-r-- 1 dom dom 2.2G Jun 23 13:22 data.bin 42 | -rw-rw-r-- 1 dom dom 279M Jun 23 00:44 ids.bin 43 | ``` 44 | 45 | ### Example query 1 46 | 47 | > query = "What is the role of GLP-1 and GLP-1 agonists in losing excess weight?" k = 5 48 | 49 | ID: 26961053, Distance: 86 50 | Title: [GLP-1 agonist supports weight loss]. 51 | Authors: Maria Weiß 52 | Publication Year: 2016 53 | Abstract: Abstract not found... 54 | 55 | ID: 37100640, Distance: 89 56 | Title: Glucagon-like peptide 1 receptor agonists in end-staged kidney disease and kidney transplantation: A narrative review. 57 | Authors: Kristin K Clemens, Jaclyn Ernst, Tayyab Khan, Sonja Reichert, Qasim Khan, Heather LaPier, Michael Chiu, Saverio Stranges, Gurleen Sahi, Fabio Castrillon-Ramirez, Louise Moist 58 | Publication Year: 2023 59 | Abstract: Glucagon-like peptide 1 receptor agonists (GLP-1RA) improve glycemic control and promote weight loss in type 2 diabetes (DM2) and obesity. We identified studies describing the metabolic benefits of GL... 60 | 61 | ID: 36321278, Distance: 89 62 | Title: Weight loss between glucagon-like peptide-1 receptor agonists and bariatric surgery in adults with obesity: A systematic review and meta-analysis. 63 | Authors: Shohinee Sarma, Patricia Palcu 64 | Publication Year: 2022 65 | Abstract: Glucagon-like peptide-1 (GLP-1) receptor agonists recently demonstrated 15% to 20% weight loss in adults with obesity, a range which has previously been achieved only with bariatric surgery. This syst... 66 | 67 | ID: 34160039, Distance: 90 68 | Title: Glucagon-Like Peptide-1 (GLP-1) Receptor Agonism and Exercise: An Effective Strategy to Maintain Diet-Induced Weight Loss. 69 | Authors: Leonarda Galiuto, Giovanna Liuzzo 70 | Publication Year: 2021 71 | Abstract: Abstract not found... 72 | 73 | ID: 35914933, Distance: 90 74 | Title: The role of GLP-1 receptor agonists in managing type 2 diabetes. 75 | Authors: Noura Nachawi, Pratibha Pr Rao, Vinni Makin 76 | Publication Year: 2022 77 | Abstract: Glucagon-like peptide-1 (GLP-1) receptor agonists improve glycemic control in patients with type 2 diabetes mellitus, have cardioprotective and renoprotective effects, and do not cause weight gain or ... 78 | 79 | 80 | ### Example query 2 81 | > query = "What are the biologies of TEAD?" k = 5 82 | 83 | ID: 27421669, Distance: 116 84 | Title: An evolutionary, structural and functional overview of the mammalian TEAD1 and TEAD2 transcription factors. 85 | Authors: André Landin-Malt, Ataaillah Benhaddou, Alain Zider, Domenico Flagiello 86 | Publication Year: 2016 87 | Abstract: TEAD proteins constitute a family of highly conserved transcription factors, characterized by a DNA-binding domain called the TEA domain and a protein-binding domain that permits association with tran... 88 | 89 | ID: 33611407, Distance: 116 90 | Title: Exploring TEAD2 as a drug target for therapeutic intervention of cancer: A multi-computational case study. 91 | Authors: Rajesh Pal, Amit Kumar, Gauri Misra 92 | Publication Year: 2021 93 | Abstract: Transcriptional enhanced associate domain (TEAD) is a family of transcription factors that plays a significant role during embryonic developmental processes, and its dysregulation is responsible for t... 94 | 95 | ID: 36063664, Distance: 117 96 | Title: A chemical perspective on the modulation of TEAD transcriptional activities: Recent progress, challenges, and opportunities. 97 | Authors: Jianfeng Lou, Yuhang Lu, Jing Cheng, Feilong Zhou, Ziqin Yan, Daizhou Zhang, Xiangjing Meng, Yujun Zhao 98 | Publication Year: 2022 99 | Abstract: TEADs are transcription factors and core downstream components of the Hippo pathway. Mutations of the Hippo pathway and/or dysregulation of YAP/TAZ culminate in aberrant transcriptional activities of ... 100 | 101 | ID: 28198677, Distance: 118 102 | Title: Decipher the ancestry of the plant-specific LBD gene family. 103 | Authors: Yimeng Kong, Peng Xu, Xinyun Jing, Longxian Chen, Laigeng Li, Xuan Li 104 | Publication Year: 2017 105 | Abstract: Lateral Organ Boundaries Domain (LBD) genes arise from charophyte algae and evolve essential functions in land plants in regulating organ development and secondary metabolism. Although diverse plant s... 106 | 107 | ID: 33352993, Distance: 120 108 | Title: Protein-Protein Interaction Disruptors of the YAP/TAZ-TEAD Transcriptional Complex. 109 | Authors: Ajaybabu V Pobbati, Brian P Rubin 110 | Publication Year: 2020 111 | Abstract: The identification of protein-protein interaction disruptors (PPIDs) that disrupt the YAP/TAZ-TEAD interaction has gained considerable momentum. Several studies have shown that YAP/TAZ are no longer o... 112 | 113 | ## Timings 114 | 115 | On my machine, after JIT compilation, here are the RAG timings (in seconds): 116 | 117 | ``` 118 | Time taken for RAG 0.09114909172058105 119 | Time taken for DB query 0.0003159046173095703 120 | Time taken for RAG 0.09531593322753906 121 | Time taken for DB query 0.00039505958557128906 122 | Time taken for RAG 0.08925509452819824 123 | Time taken for DB query 0.04568600654602051 124 | ``` 125 | 126 | For embeddings (in seconds): 127 | 128 | ``` 129 | {'tokenization': 0.016788721084594727, 'model_inference': 0.0098114013671875, 'post_processing': 0.00022363662719726562, 'quantization': 2.5272369384765625e-05, 'total': 0.026870012283325195} 130 | 'total': 0.026870012283325195 131 | ``` 132 | 133 | Using CPU or GPU doesn't make a noticeable difference. The RAG timings do not differ either. 134 | -------------------------------------------------------------------------------- /app/go.sum: -------------------------------------------------------------------------------- 1 | github.com/bytedance/sonic v1.11.6 h1:oUp34TzMlL+OY1OUWxHqsdkgC/Zfc85zGqw9siXjrc0= 2 | github.com/bytedance/sonic v1.11.6/go.mod h1:LysEHSvpvDySVdC2f87zGWf6CIKJcAvqab1ZaiQtds4= 3 | github.com/bytedance/sonic/loader v0.1.1 h1:c+e5Pt1k/cy5wMveRDyk2X4B9hF4g7an8N3zCYjJFNM= 4 | github.com/bytedance/sonic/loader v0.1.1/go.mod h1:ncP89zfokxS5LZrJxl5z0UJcsk4M4yY2JpfqGeCtNLU= 5 | github.com/cloudwego/base64x v0.1.4 h1:jwCgWpFanWmN8xoIUHa2rtzmkd5J2plF/dnLS6Xd/0Y= 6 | github.com/cloudwego/base64x v0.1.4/go.mod h1:0zlkT4Wn5C6NdauXdJRhSKRlJvmclQ1hhJgA0rcu/8w= 7 | github.com/cloudwego/iasm v0.2.0 h1:1KNIy1I1H9hNNFEEH3DVnI4UujN+1zjpuk6gwHLTssg= 8 | github.com/cloudwego/iasm v0.2.0/go.mod h1:8rXZaNYT2n95jn+zTI1sDr+IgcD2GVs0nlbbQPiEFhY= 9 | github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 10 | github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= 11 | github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= 12 | github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0= 13 | github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk= 14 | github.com/gin-contrib/sse v0.1.0 h1:Y/yl/+YNO8GZSjAhjMsSuLt29uWRFHdHYUb5lYOV9qE= 15 | github.com/gin-contrib/sse v0.1.0/go.mod h1:RHrZQHXnP2xjPF+u1gW/2HnVO7nvIa9PG3Gm+fLHvGI= 16 | github.com/gin-gonic/gin v1.10.0 h1:nTuyha1TYqgedzytsKYqna+DfLos46nTv2ygFy86HFU= 17 | github.com/gin-gonic/gin v1.10.0/go.mod h1:4PMNQiOhvDRa013RKVbsiNwoyezlm2rm0uX/T7kzp5Y= 18 | github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s= 19 | github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4= 20 | github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA= 21 | github.com/go-playground/locales v0.14.1/go.mod h1:hxrqLVvrK65+Rwrd5Fc6F2O76J/NuW9t0sjnWqG1slY= 22 | github.com/go-playground/universal-translator v0.18.1 h1:Bcnm0ZwsGyWbCzImXv+pAJnYK9S473LQFuzCbDbfSFY= 23 | github.com/go-playground/universal-translator v0.18.1/go.mod h1:xekY+UJKNuX9WP91TpwSH2VMlDf28Uj24BCp08ZFTUY= 24 | github.com/go-playground/validator/v10 v10.20.0 h1:K9ISHbSaI0lyB2eWMPJo+kOS/FBExVwjEviJTixqxL8= 25 | github.com/go-playground/validator/v10 v10.20.0/go.mod h1:dbuPbCMFw/DrkbEynArYaCwl3amGuJotoKCe95atGMM= 26 | github.com/goccy/go-json v0.10.2 h1:CrxCmQqYDkv1z7lO7Wbh2HN93uovUHgrECaO5ZrCXAU= 27 | github.com/goccy/go-json v0.10.2/go.mod h1:6MelG93GURQebXPDq3khkgXZkazVtN9CRI+MGFi0w8I= 28 | github.com/google/go-cmp v0.5.5 h1:Khx7svrCpmxxtHBq5j2mp/xVjsi8hQMfNLvJFAlrGgU= 29 | github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= 30 | github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= 31 | github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= 32 | github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= 33 | github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= 34 | github.com/klauspost/cpuid/v2 v2.2.7 h1:ZWSB3igEs+d0qvnxR/ZBzXVmxkgt8DdzP6m9pfuVLDM= 35 | github.com/klauspost/cpuid/v2 v2.2.7/go.mod h1:Lcz8mBdAVJIBVzewtcLocK12l3Y+JytZYpaMropDUws= 36 | github.com/knz/go-libedit v1.10.1/go.mod h1:MZTVkCWyz0oBc7JOWP3wNAzd002ZbM/5hgShxwh4x8M= 37 | github.com/leodido/go-urn v1.4.0 h1:WT9HwE9SGECu3lg4d/dIA+jxlljEa1/ffXKmRjqdmIQ= 38 | github.com/leodido/go-urn v1.4.0/go.mod h1:bvxc+MVxLKB4z00jd1z+Dvzr47oO32F/QSNjSBOlFxI= 39 | github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= 40 | github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= 41 | github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 42 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w8PVh93nsPXa1VrQ6jlwL5oN8l14QlcNfg= 43 | github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= 44 | github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= 45 | github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= 46 | github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= 47 | github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= 48 | github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= 49 | github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= 50 | github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= 51 | github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= 52 | github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= 53 | github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= 54 | github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= 55 | github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 56 | github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= 57 | github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= 58 | github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= 59 | github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= 60 | github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= 61 | github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= 62 | github.com/twitchyliquid64/golang-asm v0.15.1 h1:SU5vSMR7hnwNxj24w34ZyCi/FmDZTkS4MhqMhdFk5YI= 63 | github.com/twitchyliquid64/golang-asm v0.15.1/go.mod h1:a1lVb/DtPvCB8fslRZhAngC2+aY1QWCk3Cedj/Gdt08= 64 | github.com/ugorji/go/codec v1.2.12 h1:9LC83zGrHhuUA9l16C9AHXAqEV/2wBQ4nkvumAE65EE= 65 | github.com/ugorji/go/codec v1.2.12/go.mod h1:UNopzCgEMSXjBc6AOMqYvWC1ktqTAfzJZUZgYf6w6lg= 66 | golang.org/x/arch v0.0.0-20210923205945-b76863e36670/go.mod h1:5om86z9Hs0C8fWVUuoMHwpExlXzs5Tkyp9hOrfG7pp8= 67 | golang.org/x/arch v0.8.0 h1:3wRIsP3pM4yUptoR96otTUOXI367OS0+c9eeRi9doIc= 68 | golang.org/x/arch v0.8.0/go.mod h1:FEVrYAQjsQXMVJ1nsMoVVXPZg6p2JE2mx8psSWTDQys= 69 | golang.org/x/crypto v0.23.0 h1:dIJU/v2J8Mdglj/8rJ6UUOM3Zc9zLZxVZwwxMooUSAI= 70 | golang.org/x/crypto v0.23.0/go.mod h1:CKFgDieR+mRhux2Lsu27y0fO304Db0wZe70UKqHu0v8= 71 | golang.org/x/net v0.25.0 h1:d/OCCoBEUq33pjydKrGQhw7IlUPI2Oylr+8qLx49kac= 72 | golang.org/x/net v0.25.0/go.mod h1:JkAGAh7GEvH74S6FOH42FLoXpXbE/aqXSrIQjXgsiwM= 73 | golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 74 | golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= 75 | golang.org/x/sys v0.20.0 h1:Od9JTbYCk261bKm4M/mw7AklTlFYIa0bIp9BgSm1S8Y= 76 | golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= 77 | golang.org/x/text v0.15.0 h1:h1V/4gjBv8v9cjcR6+AR5+/cIYK5N/WAgiv4xlsEtAk= 78 | golang.org/x/text v0.15.0/go.mod h1:18ZOQIKpY8NJVqYksKHtTdi31H5itFRjB5/qKTNYzSU= 79 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543 h1:E7g+9GITq07hpfrRu66IVDexMakfv52eLZ2CXBWiKr4= 80 | golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= 81 | google.golang.org/protobuf v1.34.1 h1:9ddQBjfCyZPOHPUiPxpYESBLc+T8P3E+Vo4IbKZgFWg= 82 | google.golang.org/protobuf v1.34.1/go.mod h1:c6P6GXX6sHbq/GpV6MGZEdwhWPcYBgnhAHhKbcUYpos= 83 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= 84 | gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= 85 | gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 86 | gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= 87 | gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= 88 | nullprogram.com/x/optparse v1.0.0/go.mod h1:KdyPE+Igbe0jQUrVfMqDMeJQIJZEuyV7pjYmp6pbG50= 89 | rsc.io/pdf v0.1.1/go.mod h1:n8OzWcQ6Sp37PL01nO98y4iUCRdTGarVfzxY20ICaU4= 90 | -------------------------------------------------------------------------------- /rag.jl: -------------------------------------------------------------------------------- 1 | using Base.Threads 2 | using StaticArrays 3 | using JSON3 4 | using HTTP 5 | using SQLite 6 | using DataFrames 7 | using DBInterface 8 | 9 | struct RAGServer 10 | ids::Vector{Int64} 11 | data::Matrix{UInt64} 12 | db::SQLite.DB 13 | end 14 | 15 | function RAGServer(dbpath::AbstractString) 16 | db = SQLite.DB(dbpath) 17 | ids, data = load_data_from_bin() 18 | return RAGServer(ids, data, db) 19 | end 20 | 21 | 22 | function load_data_from_bin() 23 | open("bindata/data.bin", "r") do file 24 | num_columns = read(file, UInt64) 25 | num_rows = read(file, UInt64) 26 | data = Matrix{UInt64}(undef, num_rows, num_columns) 27 | read!(file, data) 28 | 29 | ids = Vector{Int64}(undef, num_columns) 30 | open("bindata/ids.bin", "r") do id_file 31 | read!(id_file, ids) 32 | end 33 | 34 | return ids, data 35 | end 36 | end 37 | 38 | function get_article_data(db::SQLite.DB, pmids::Vector{String}) 39 | placeholders = join(fill("?", length(pmids)), ",") 40 | query = "SELECT pmid, title, authors, abstract, publication_year FROM articles WHERE pmid IN ($placeholders)" 41 | 42 | df = DBInterface.execute(db, query, pmids) |> DataFrame 43 | 44 | article_data = Dict{String,Dict{String,Any}}() 45 | for row in eachrow(df) 46 | article_data[string(row.pmid)] = Dict( 47 | "pmid" => row.pmid, 48 | "title" => !ismissing(row.title) ? row.title : "Title not found", 49 | "authors" => 50 | !ismissing(row.authors) ? row.authors : "Authors not found", 51 | "abstract" => 52 | !ismissing(row.abstract) ? row.abstract : "Abstract not found", 53 | "publication_year" => 54 | !ismissing(row.publication_year) ? row.publication_year : 55 | "Year not found", 56 | ) 57 | end 58 | 59 | return article_data 60 | end 61 | 62 | struct EmbedResponse 63 | embedding::Vector{Float32} 64 | binary_embedding::Vector{UInt8} 65 | end 66 | 67 | struct FindMatchesRequest 68 | query::String 69 | k::Int 70 | end 71 | 72 | struct FindMatchesResponse 73 | pmid::Int 74 | distance::Int 75 | authors::String 76 | title::String 77 | abstract::String 78 | publication_year::Int 79 | end 80 | 81 | function start_server(rag::RAGServer; port::Int = 8003) 82 | router = HTTP.Router() 83 | 84 | HTTP.register!( 85 | router, 86 | "POST", 87 | "/find_matches", 88 | function (req) 89 | try 90 | body = try 91 | JSON3.read(String(req.body), FindMatchesRequest) 92 | catch 93 | return HTTP.Response(400, "invalid JSON body") 94 | end 95 | 96 | if body.k <= 0 || body.k > 100 97 | return HTTP.Response( 98 | 400, 99 | "'k' parameter must be a positive integer and <= 100.", 100 | ) 101 | end 102 | 103 | # Call embed service 104 | embed_response = try 105 | HTTP.post( 106 | "http://0.0.0.0:8002/embed", 107 | ["Content-Type" => "application/json"], 108 | JSON3.write(Dict("text" => body.query)), 109 | ) 110 | catch e 111 | return HTTP.Response(500, "Internal server error") 112 | end 113 | 114 | if embed_response.status != 200 115 | return HTTP.Response( 116 | 502, 117 | "Error from embed service: $(String(embed_response.body))", 118 | ) 119 | end 120 | 121 | embed_body = try 122 | JSON3.read(String(embed_response.body), EmbedResponse) 123 | catch e 124 | return HTTP.Response(500, "Internal server error") 125 | end 126 | 127 | if length(embed_body.binary_embedding) != 64 128 | return HTTP.Response( 129 | 502, 130 | "Embedded query must be an array of 64 UInt8 elements", 131 | ) 132 | end 133 | 134 | query_uint64 = 135 | SVector{8,UInt64}(reinterpret(UInt64, embed_body.binary_embedding)) 136 | t1 = time() 137 | results = k_closest_parallel(rag.data, query_uint64, body.k) 138 | t2 = time() 139 | println("Time taken for RAG $(t2 - t1)") 140 | 141 | # Fetch article data from SQLite 142 | pmids = [string(rag.ids[result.second]) for result in results] 143 | t1 = time() 144 | article_data = get_article_data(rag.db, pmids) 145 | t2 = time() 146 | println("Time taken for DB query $(t2 - t1)") 147 | 148 | response = FindMatchesResponse[] 149 | for r in results 150 | id = string(rag.ids[r.second]) 151 | !haskey(article_data, id) && continue 152 | a = article_data[id] 153 | push!( 154 | response, 155 | FindMatchesResponse( 156 | parse(Int, a["pmid"]), 157 | r.first, 158 | a["authors"], 159 | a["title"], 160 | a["abstract"], 161 | a["publication_year"], 162 | ), 163 | ) 164 | end 165 | 166 | return HTTP.Response(200, JSON3.write(response)) 167 | catch e 168 | @info "error" e 169 | return HTTP.Response(500, "Internal server error") 170 | end 171 | end, 172 | ) 173 | 174 | HTTP.serve(router, "0.0.0.0", port) 175 | end 176 | 177 | 178 | @inline function hamming_distance(s1::AbstractString, s2::AbstractString)::Int 179 | s = 0 180 | for (c1, c2) in zip(s1, s2) 181 | if c1 != c2 182 | s += 1 183 | end 184 | end 185 | s 186 | end 187 | 188 | @inline function hamming_distance(x1::T, x2::T)::Int where {T<:Integer} 189 | return Int(count_ones(x1 ⊻ x2)) 190 | end 191 | 192 | @inline function hamming_distance1( 193 | x1::AbstractArray{T}, 194 | x2::AbstractArray{T}, 195 | )::Int where {T<:Integer} 196 | s = 0 197 | for i in eachindex(x1, x2) 198 | s += hamming_distance(x1[i], x2[i]) 199 | end 200 | s 201 | end 202 | 203 | @inline function hamming_distance( 204 | x1::AbstractArray{T}, 205 | x2::AbstractArray{T}, 206 | )::Int where {T<:Integer} 207 | s = 0 208 | @inbounds @simd for i in eachindex(x1, x2) 209 | s += hamming_distance(x1[i], x2[i]) 210 | end 211 | s 212 | end 213 | 214 | 215 | mutable struct MaxHeap 216 | const data::Vector{Pair{Int,Int}} 217 | current_idx::Int # add pairs until current_idx > length(data) 218 | const k::Int 219 | 220 | function MaxHeap(k::Int) 221 | new(fill((typemax(Int) => -1), k), 1, k) 222 | end 223 | end 224 | 225 | function insert!(heap::MaxHeap, value::Pair{Int,Int}) 226 | if heap.current_idx <= heap.k 227 | heap.data[heap.current_idx] = value 228 | heap.current_idx += 1 229 | if heap.current_idx > heap.k 230 | makeheap!(heap) 231 | end 232 | elseif value.first < heap.data[1].first 233 | heap.data[1] = value 234 | heapify!(heap, 1) 235 | end 236 | end 237 | 238 | function makeheap!(heap::MaxHeap) 239 | for i in div(heap.k, 2):-1:1 240 | heapify!(heap, i) 241 | end 242 | end 243 | 244 | function heapify!(heap::MaxHeap, i::Int) 245 | left = 2 * i 246 | right = 2 * i + 1 247 | largest = i 248 | 249 | if left <= length(heap.data) && 250 | heap.data[left].first > heap.data[largest].first 251 | largest = left 252 | end 253 | 254 | if right <= length(heap.data) && 255 | heap.data[right].first > heap.data[largest].first 256 | largest = right 257 | end 258 | 259 | if largest != i 260 | heap.data[i], heap.data[largest] = heap.data[largest], heap.data[i] 261 | heapify!(heap, largest) 262 | end 263 | end 264 | 265 | function _k_closest( 266 | db::AbstractMatrix{T}, 267 | query::AbstractVector{T}, 268 | k::Int; 269 | startind::Int = 1, 270 | ) where {T<:Integer} 271 | heap = MaxHeap(k) 272 | @inbounds for i in 1:size(db, 2) 273 | d = hamming_distance(view(db, :, i), query) 274 | insert!(heap, d => startind + i - 1) 275 | end 276 | return heap.data 277 | end 278 | 279 | function k_closest( 280 | db::AbstractMatrix{T}, 281 | query::AbstractVector{T}, 282 | k::Int; 283 | startind::Int = 1, 284 | ) where {T<:Integer} 285 | data = _k_closest(db, query, k; startind = startind) 286 | return sort!(data, by = x -> x.first) 287 | end 288 | 289 | function k_closest_parallel( 290 | db::AbstractMatrix{T}, 291 | query::AbstractVector{T}, 292 | k::Int; 293 | t::Int = nthreads(), 294 | ) where {T<:Integer} 295 | n = size(db, 2) 296 | if n < 10_000 || t == 1 297 | return k_closest(db, query, k) 298 | end 299 | task_ranges = [(i:min(i + n ÷ t - 1, n)) for i in 1:n÷t:n] 300 | tasks = map(task_ranges) do r 301 | Threads.@spawn _k_closest(view(db, :, r), query, k; startind = r[1]) 302 | end 303 | results = fetch.(tasks) 304 | sort!(vcat(results...), by = x -> x.first)[1:k] 305 | end 306 | -------------------------------------------------------------------------------- /app/templates/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | Pubmed Paper RAG Search 8 | 184 | 185 | 186 | 187 | 188 |
189 |

Pubmed Paper RAG Search

190 |
191 | 193 |
194 | 195 |
196 | 199 | 201 | 202 | 203 |
204 |
205 |

206 |
207 |
208 |

Search Settings

209 |
210 | 211 | 212 | 5 213 |
214 |

215 | Adjust the slider to set the number of matching papers to return. A higher number will provide more results but 216 | may include less relevant papers. 217 |

218 |
219 |
Searching...
220 |
221 |
222 | 223 | 293 | 294 | 295 | 296 | -------------------------------------------------------------------------------- /Manifest.toml: -------------------------------------------------------------------------------- 1 | # This file is machine-generated - editing it directly is not advised 2 | 3 | julia_version = "1.11.0-beta2" 4 | manifest_format = "2.0" 5 | project_hash = "43bd58d8d718acc3882480925be1d160e7d5408e" 6 | 7 | [[deps.Artifacts]] 8 | uuid = "56f22d72-fd6d-98f1-02f0-08ddc0907c33" 9 | version = "1.11.0" 10 | 11 | [[deps.Base64]] 12 | uuid = "2a0f44e3-6c83-55bd-87e4-b1978d98bd5f" 13 | version = "1.11.0" 14 | 15 | [[deps.BitFlags]] 16 | git-tree-sha1 = "0691e34b3bb8be9307330f88d1a3c3f25466c24d" 17 | uuid = "d1d4a3ce-64b1-5f1a-9ba4-7e7e69966f35" 18 | version = "0.1.9" 19 | 20 | [[deps.CodecZlib]] 21 | deps = ["TranscodingStreams", "Zlib_jll"] 22 | git-tree-sha1 = "59939d8a997469ee05c4b4944560a820f9ba0d73" 23 | uuid = "944b1d66-785c-5afd-91f1-9de20f533193" 24 | version = "0.7.4" 25 | 26 | [[deps.Compat]] 27 | deps = ["TOML", "UUIDs"] 28 | git-tree-sha1 = "b1c55339b7c6c350ee89f2c1604299660525b248" 29 | uuid = "34da2185-b29b-5c13-b0c7-acf172513d20" 30 | version = "4.15.0" 31 | weakdeps = ["Dates", "LinearAlgebra"] 32 | 33 | [deps.Compat.extensions] 34 | CompatLinearAlgebraExt = "LinearAlgebra" 35 | 36 | [[deps.CompilerSupportLibraries_jll]] 37 | deps = ["Artifacts", "Libdl"] 38 | uuid = "e66e0078-7015-5450-92f7-15fbd957f2ae" 39 | version = "1.1.1+0" 40 | 41 | [[deps.ConcurrentUtilities]] 42 | deps = ["Serialization", "Sockets"] 43 | git-tree-sha1 = "6cbbd4d241d7e6579ab354737f4dd95ca43946e1" 44 | uuid = "f0e56b4a-5159-44fe-b623-3e5288b988bb" 45 | version = "2.4.1" 46 | 47 | [[deps.Crayons]] 48 | git-tree-sha1 = "249fe38abf76d48563e2f4556bebd215aa317e15" 49 | uuid = "a8cc5b0e-0ffa-5ad4-8c14-923d3ee1735f" 50 | version = "4.1.1" 51 | 52 | [[deps.DBInterface]] 53 | git-tree-sha1 = "a444404b3f94deaa43ca2a58e18153a82695282b" 54 | uuid = "a10d1c49-ce27-4219-8d33-6db1a4562965" 55 | version = "2.6.1" 56 | 57 | [[deps.DataAPI]] 58 | git-tree-sha1 = "abe83f3a2f1b857aac70ef8b269080af17764bbe" 59 | uuid = "9a962f9c-6df0-11e9-0e5d-c546b8b5ee8a" 60 | version = "1.16.0" 61 | 62 | [[deps.DataFrames]] 63 | deps = ["Compat", "DataAPI", "DataStructures", "Future", "InlineStrings", "InvertedIndices", "IteratorInterfaceExtensions", "LinearAlgebra", "Markdown", "Missings", "PooledArrays", "PrecompileTools", "PrettyTables", "Printf", "REPL", "Random", "Reexport", "SentinelArrays", "SortingAlgorithms", "Statistics", "TableTraits", "Tables", "Unicode"] 64 | git-tree-sha1 = "04c738083f29f86e62c8afc341f0967d8717bdb8" 65 | uuid = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0" 66 | version = "1.6.1" 67 | 68 | [[deps.DataStructures]] 69 | deps = ["Compat", "InteractiveUtils", "OrderedCollections"] 70 | git-tree-sha1 = "1d0a14036acb104d9e89698bd408f63ab58cdc82" 71 | uuid = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8" 72 | version = "0.18.20" 73 | 74 | [[deps.DataValueInterfaces]] 75 | git-tree-sha1 = "bfc1187b79289637fa0ef6d4436ebdfe6905cbd6" 76 | uuid = "e2d170a0-9d28-54be-80f0-106bbe20a464" 77 | version = "1.0.0" 78 | 79 | [[deps.Dates]] 80 | deps = ["Printf"] 81 | uuid = "ade2ca70-3891-5945-98fb-dc099432e06a" 82 | version = "1.11.0" 83 | 84 | [[deps.ExceptionUnwrapping]] 85 | deps = ["Test"] 86 | git-tree-sha1 = "dcb08a0d93ec0b1cdc4af184b26b591e9695423a" 87 | uuid = "460bff9d-24e4-43bc-9d9f-a8973cb893f4" 88 | version = "0.1.10" 89 | 90 | [[deps.Future]] 91 | deps = ["Random"] 92 | uuid = "9fa8497b-333b-5362-9e8d-4d0656e87820" 93 | version = "1.11.0" 94 | 95 | [[deps.HTTP]] 96 | deps = ["Base64", "CodecZlib", "ConcurrentUtilities", "Dates", "ExceptionUnwrapping", "Logging", "LoggingExtras", "MbedTLS", "NetworkOptions", "OpenSSL", "Random", "SimpleBufferStream", "Sockets", "URIs", "UUIDs"] 97 | git-tree-sha1 = "d1d712be3164d61d1fb98e7ce9bcbc6cc06b45ed" 98 | uuid = "cd3eb016-35fb-5094-929b-558a96fad6f3" 99 | version = "1.10.8" 100 | 101 | [[deps.InlineStrings]] 102 | deps = ["Parsers"] 103 | git-tree-sha1 = "86356004f30f8e737eff143d57d41bd580e437aa" 104 | uuid = "842dd82b-1e85-43dc-bf29-5d0ee9dffc48" 105 | version = "1.4.1" 106 | 107 | [deps.InlineStrings.extensions] 108 | ArrowTypesExt = "ArrowTypes" 109 | 110 | [deps.InlineStrings.weakdeps] 111 | ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" 112 | 113 | [[deps.InteractiveUtils]] 114 | deps = ["Markdown"] 115 | uuid = "b77e0a4c-d291-57a0-90e8-8db25a27a240" 116 | version = "1.11.0" 117 | 118 | [[deps.InvertedIndices]] 119 | git-tree-sha1 = "0dc7b50b8d436461be01300fd8cd45aa0274b038" 120 | uuid = "41ab1584-1d38-5bbf-9106-f11c6c58b48f" 121 | version = "1.3.0" 122 | 123 | [[deps.IteratorInterfaceExtensions]] 124 | git-tree-sha1 = "a3f24677c21f5bbe9d2a714f95dcd58337fb2856" 125 | uuid = "82899510-4779-5014-852e-03e436cf321d" 126 | version = "1.0.0" 127 | 128 | [[deps.JLLWrappers]] 129 | deps = ["Artifacts", "Preferences"] 130 | git-tree-sha1 = "7e5d6779a1e09a36db2a7b6cff50942a0a7d0fca" 131 | uuid = "692b3bcd-3c85-4b1f-b108-f13ce0eb3210" 132 | version = "1.5.0" 133 | 134 | [[deps.JSON]] 135 | deps = ["Dates", "Mmap", "Parsers", "Unicode"] 136 | git-tree-sha1 = "31e996f0a15c7b280ba9f76636b3ff9e2ae58c9a" 137 | uuid = "682c06a0-de6a-54ab-a142-c8b1cf79cde6" 138 | version = "0.21.4" 139 | 140 | [[deps.JSON3]] 141 | deps = ["Dates", "Mmap", "Parsers", "PrecompileTools", "StructTypes", "UUIDs"] 142 | git-tree-sha1 = "eb3edce0ed4fa32f75a0a11217433c31d56bd48b" 143 | uuid = "0f8b85d8-7281-11e9-16c2-39a750bddbf1" 144 | version = "1.14.0" 145 | 146 | [deps.JSON3.extensions] 147 | JSON3ArrowExt = ["ArrowTypes"] 148 | 149 | [deps.JSON3.weakdeps] 150 | ArrowTypes = "31f734f8-188a-4ce0-8406-c8a06bd891cd" 151 | 152 | [[deps.LaTeXStrings]] 153 | git-tree-sha1 = "50901ebc375ed41dbf8058da26f9de442febbbec" 154 | uuid = "b964fa9f-0449-5b57-a5c2-d3ea65f4040f" 155 | version = "1.3.1" 156 | 157 | [[deps.Libdl]] 158 | uuid = "8f399da3-3557-5675-b5ff-fb832c97cbdb" 159 | version = "1.11.0" 160 | 161 | [[deps.LinearAlgebra]] 162 | deps = ["Libdl", "OpenBLAS_jll", "libblastrampoline_jll"] 163 | uuid = "37e2e46d-f89d-539d-b4ee-838fcccc9c8e" 164 | version = "1.11.0" 165 | 166 | [[deps.Logging]] 167 | uuid = "56ddb016-857b-54e1-b83d-db4d58db5568" 168 | version = "1.11.0" 169 | 170 | [[deps.LoggingExtras]] 171 | deps = ["Dates", "Logging"] 172 | git-tree-sha1 = "c1dd6d7978c12545b4179fb6153b9250c96b0075" 173 | uuid = "e6f89c97-d47a-5376-807f-9c37f3926c36" 174 | version = "1.0.3" 175 | 176 | [[deps.Markdown]] 177 | deps = ["Base64"] 178 | uuid = "d6f4376e-aef5-505a-96c1-9c027394607a" 179 | version = "1.11.0" 180 | 181 | [[deps.MbedTLS]] 182 | deps = ["Dates", "MbedTLS_jll", "MozillaCACerts_jll", "NetworkOptions", "Random", "Sockets"] 183 | git-tree-sha1 = "c067a280ddc25f196b5e7df3877c6b226d390aaf" 184 | uuid = "739be429-bea8-5141-9913-cc70e7f3736d" 185 | version = "1.1.9" 186 | 187 | [[deps.MbedTLS_jll]] 188 | deps = ["Artifacts", "Libdl"] 189 | uuid = "c8ffd9c3-330d-5841-b78e-0817d7145fa1" 190 | version = "2.28.6+0" 191 | 192 | [[deps.Missings]] 193 | deps = ["DataAPI"] 194 | git-tree-sha1 = "ec4f7fbeab05d7747bdf98eb74d130a2a2ed298d" 195 | uuid = "e1d29d7a-bbdc-5cf2-9ac0-f12de2c33e28" 196 | version = "1.2.0" 197 | 198 | [[deps.Mmap]] 199 | uuid = "a63ad114-7e13-5084-954f-fe012c677804" 200 | version = "1.11.0" 201 | 202 | [[deps.MozillaCACerts_jll]] 203 | uuid = "14a3606d-f60d-562e-9121-12d972cd8159" 204 | version = "2023.12.12" 205 | 206 | [[deps.NetworkOptions]] 207 | uuid = "ca575930-c2e3-43a9-ace4-1e988b2c1908" 208 | version = "1.2.0" 209 | 210 | [[deps.OpenBLAS_jll]] 211 | deps = ["Artifacts", "CompilerSupportLibraries_jll", "Libdl"] 212 | uuid = "4536629a-c528-5b80-bd46-f80d51c5b363" 213 | version = "0.3.27+1" 214 | 215 | [[deps.OpenSSL]] 216 | deps = ["BitFlags", "Dates", "MozillaCACerts_jll", "OpenSSL_jll", "Sockets"] 217 | git-tree-sha1 = "38cb508d080d21dc1128f7fb04f20387ed4c0af4" 218 | uuid = "4d8831e6-92b7-49fb-bdf8-b643e874388c" 219 | version = "1.4.3" 220 | 221 | [[deps.OpenSSL_jll]] 222 | deps = ["Artifacts", "JLLWrappers", "Libdl"] 223 | git-tree-sha1 = "a028ee3cb5641cccc4c24e90c36b0a4f7707bdf5" 224 | uuid = "458c3c95-2e84-50aa-8efc-19380b2a3a95" 225 | version = "3.0.14+0" 226 | 227 | [[deps.OrderedCollections]] 228 | git-tree-sha1 = "dfdf5519f235516220579f949664f1bf44e741c5" 229 | uuid = "bac558e1-5e72-5ebc-8fee-abe8a469f55d" 230 | version = "1.6.3" 231 | 232 | [[deps.Parsers]] 233 | deps = ["Dates", "PrecompileTools", "UUIDs"] 234 | git-tree-sha1 = "8489905bcdbcfac64d1daa51ca07c0d8f0283821" 235 | uuid = "69de0a69-1ddd-5017-9359-2bf0b02dc9f0" 236 | version = "2.8.1" 237 | 238 | [[deps.PooledArrays]] 239 | deps = ["DataAPI", "Future"] 240 | git-tree-sha1 = "36d8b4b899628fb92c2749eb488d884a926614d3" 241 | uuid = "2dfb63ee-cc39-5dd5-95bd-886bf059d720" 242 | version = "1.4.3" 243 | 244 | [[deps.PrecompileTools]] 245 | deps = ["Preferences"] 246 | git-tree-sha1 = "5aa36f7049a63a1528fe8f7c3f2113413ffd4e1f" 247 | uuid = "aea7be01-6a6a-4083-8856-8a6e6704d82a" 248 | version = "1.2.1" 249 | 250 | [[deps.Preferences]] 251 | deps = ["TOML"] 252 | git-tree-sha1 = "9306f6085165d270f7e3db02af26a400d580f5c6" 253 | uuid = "21216c6a-2e73-6563-6e65-726566657250" 254 | version = "1.4.3" 255 | 256 | [[deps.PrettyTables]] 257 | deps = ["Crayons", "LaTeXStrings", "Markdown", "PrecompileTools", "Printf", "Reexport", "StringManipulation", "Tables"] 258 | git-tree-sha1 = "66b20dd35966a748321d3b2537c4584cf40387c7" 259 | uuid = "08abe8d2-0d0c-5749-adfa-8a2ac140af0d" 260 | version = "2.3.2" 261 | 262 | [[deps.Printf]] 263 | deps = ["Unicode"] 264 | uuid = "de0858da-6303-5e67-8744-51eddeeeb8d7" 265 | version = "1.11.0" 266 | 267 | [[deps.REPL]] 268 | deps = ["InteractiveUtils", "Markdown", "Sockets", "StyledStrings", "Unicode"] 269 | uuid = "3fa0cd96-eef1-5676-8a61-b3b8758bbffb" 270 | version = "1.11.0" 271 | 272 | [[deps.Random]] 273 | deps = ["SHA"] 274 | uuid = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c" 275 | version = "1.11.0" 276 | 277 | [[deps.Reexport]] 278 | git-tree-sha1 = "45e428421666073eab6f2da5c9d310d99bb12f9b" 279 | uuid = "189a3867-3050-52da-a836-e630ba90ab69" 280 | version = "1.2.2" 281 | 282 | [[deps.SHA]] 283 | uuid = "ea8e919c-243c-51af-8825-aaa63cd721ce" 284 | version = "0.7.0" 285 | 286 | [[deps.SQLite]] 287 | deps = ["DBInterface", "Random", "SQLite_jll", "Serialization", "Tables", "WeakRefStrings"] 288 | git-tree-sha1 = "38b82dbc52b7db40bea182688c7a1103d06948a4" 289 | uuid = "0aa819cd-b072-5ff4-a722-6bc24af294d9" 290 | version = "1.6.1" 291 | 292 | [[deps.SQLite_jll]] 293 | deps = ["Artifacts", "JLLWrappers", "Libdl", "Zlib_jll"] 294 | git-tree-sha1 = "004fffbe2711abdc7263a980bbb1af9620781dd9" 295 | uuid = "76ed43ae-9a5d-5a62-8c75-30186b810ce8" 296 | version = "3.45.3+0" 297 | 298 | [[deps.SentinelArrays]] 299 | deps = ["Dates", "Random"] 300 | git-tree-sha1 = "90b4f68892337554d31cdcdbe19e48989f26c7e6" 301 | uuid = "91c51154-3ec4-41a3-a24f-3f23e20d615c" 302 | version = "1.4.3" 303 | 304 | [[deps.Serialization]] 305 | uuid = "9e88b42a-f829-5b0c-bbe9-9e923198166b" 306 | version = "1.11.0" 307 | 308 | [[deps.SimpleBufferStream]] 309 | git-tree-sha1 = "874e8867b33a00e784c8a7e4b60afe9e037b74e1" 310 | uuid = "777ac1f9-54b0-4bf8-805c-2214025038e7" 311 | version = "1.1.0" 312 | 313 | [[deps.Sockets]] 314 | uuid = "6462fe0b-24de-5631-8697-dd941f90decc" 315 | version = "1.11.0" 316 | 317 | [[deps.SortingAlgorithms]] 318 | deps = ["DataStructures"] 319 | git-tree-sha1 = "66e0a8e672a0bdfca2c3f5937efb8538b9ddc085" 320 | uuid = "a2af1166-a08f-5f64-846c-94a0d3cef48c" 321 | version = "1.2.1" 322 | 323 | [[deps.StaticArrays]] 324 | deps = ["LinearAlgebra", "PrecompileTools", "Random", "StaticArraysCore"] 325 | git-tree-sha1 = "6e00379a24597be4ae1ee6b2d882e15392040132" 326 | uuid = "90137ffa-7385-5640-81b9-e52037218182" 327 | version = "1.9.5" 328 | 329 | [deps.StaticArrays.extensions] 330 | StaticArraysChainRulesCoreExt = "ChainRulesCore" 331 | StaticArraysStatisticsExt = "Statistics" 332 | 333 | [deps.StaticArrays.weakdeps] 334 | ChainRulesCore = "d360d2e6-b24c-11e9-a2a3-2a2ae2dbcce4" 335 | Statistics = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 336 | 337 | [[deps.StaticArraysCore]] 338 | git-tree-sha1 = "192954ef1208c7019899fbf8049e717f92959682" 339 | uuid = "1e83bf80-4336-4d27-bf5d-d5a4f845583c" 340 | version = "1.4.3" 341 | 342 | [[deps.Statistics]] 343 | deps = ["LinearAlgebra"] 344 | git-tree-sha1 = "ae3bb1eb3bba077cd276bc5cfc337cc65c3075c0" 345 | uuid = "10745b16-79ce-11e8-11f9-7d13ad32a3b2" 346 | version = "1.11.1" 347 | 348 | [deps.Statistics.extensions] 349 | SparseArraysExt = ["SparseArrays"] 350 | 351 | [deps.Statistics.weakdeps] 352 | SparseArrays = "2f01184e-e22b-5df5-ae63-d93ebab69eaf" 353 | 354 | [[deps.StringManipulation]] 355 | deps = ["PrecompileTools"] 356 | git-tree-sha1 = "a04cabe79c5f01f4d723cc6704070ada0b9d46d5" 357 | uuid = "892a3eda-7b42-436c-8928-eab12a02cf0e" 358 | version = "0.3.4" 359 | 360 | [[deps.StructTypes]] 361 | deps = ["Dates", "UUIDs"] 362 | git-tree-sha1 = "ca4bccb03acf9faaf4137a9abc1881ed1841aa70" 363 | uuid = "856f2bd8-1eba-4b0a-8007-ebc267875bd4" 364 | version = "1.10.0" 365 | 366 | [[deps.StyledStrings]] 367 | uuid = "f489334b-da3d-4c2e-b8f0-e476e12c162b" 368 | version = "1.11.0" 369 | 370 | [[deps.TOML]] 371 | deps = ["Dates"] 372 | uuid = "fa267f1f-6049-4f14-aa54-33bafae1ed76" 373 | version = "1.0.3" 374 | 375 | [[deps.TableTraits]] 376 | deps = ["IteratorInterfaceExtensions"] 377 | git-tree-sha1 = "c06b2f539df1c6efa794486abfb6ed2022561a39" 378 | uuid = "3783bdb8-4a98-5b6b-af9a-565f29a5fe9c" 379 | version = "1.0.1" 380 | 381 | [[deps.Tables]] 382 | deps = ["DataAPI", "DataValueInterfaces", "IteratorInterfaceExtensions", "LinearAlgebra", "OrderedCollections", "TableTraits"] 383 | git-tree-sha1 = "cb76cf677714c095e535e3501ac7954732aeea2d" 384 | uuid = "bd369af6-aec1-5ad0-b16a-f7cc5008161c" 385 | version = "1.11.1" 386 | 387 | [[deps.Test]] 388 | deps = ["InteractiveUtils", "Logging", "Random", "Serialization"] 389 | uuid = "8dfed614-e22c-5e08-85e1-65c5234f0b40" 390 | version = "1.11.0" 391 | 392 | [[deps.TranscodingStreams]] 393 | git-tree-sha1 = "a947ea21087caba0a798c5e494d0bb78e3a1a3a0" 394 | uuid = "3bb67fe8-82b1-5028-8e26-92a6c54297fa" 395 | version = "0.10.9" 396 | weakdeps = ["Random", "Test"] 397 | 398 | [deps.TranscodingStreams.extensions] 399 | TestExt = ["Test", "Random"] 400 | 401 | [[deps.URIs]] 402 | git-tree-sha1 = "67db6cc7b3821e19ebe75791a9dd19c9b1188f2b" 403 | uuid = "5c2747f8-b7ea-4ff2-ba2e-563bfd36b1d4" 404 | version = "1.5.1" 405 | 406 | [[deps.UUIDs]] 407 | deps = ["Random", "SHA"] 408 | uuid = "cf7118a7-6976-5b1a-9a39-7adc72f591a4" 409 | version = "1.11.0" 410 | 411 | [[deps.Unicode]] 412 | uuid = "4ec0a83e-493e-50e2-b9ac-8f72acf5a8f5" 413 | version = "1.11.0" 414 | 415 | [[deps.WeakRefStrings]] 416 | deps = ["DataAPI", "InlineStrings", "Parsers"] 417 | git-tree-sha1 = "b1be2855ed9ed8eac54e5caff2afcdb442d52c23" 418 | uuid = "ea10d353-3f73-51f8-a26c-33c1cb351aa5" 419 | version = "1.4.2" 420 | 421 | [[deps.Zlib_jll]] 422 | deps = ["Libdl"] 423 | uuid = "83775a58-1f1d-513f-b197-d71354ab007a" 424 | version = "1.2.13+1" 425 | 426 | [[deps.libblastrampoline_jll]] 427 | deps = ["Artifacts", "Libdl"] 428 | uuid = "8e850b90-86db-534c-a0d3-1478176c7d93" 429 | version = "5.8.0+1" 430 | --------------------------------------------------------------------------------