├── .gitignore ├── LICENSE ├── README.md ├── backend ├── .env.example ├── helpers │ ├── ClueWeb22Api.py │ ├── concurrent_fetch.py │ ├── embedding.py │ └── range_dictionary.py ├── rag │ ├── client.py │ └── server.py ├── ragviz.py ├── requirements.txt ├── search │ ├── clueweb │ │ ├── ranges │ │ │ ├── 0.tsv │ │ │ ├── 1.tsv │ │ │ ├── 10.tsv │ │ │ ├── 11.tsv │ │ │ ├── 12.tsv │ │ │ ├── 13.tsv │ │ │ ├── 14.tsv │ │ │ ├── 15.tsv │ │ │ ├── 16.tsv │ │ │ ├── 17.tsv │ │ │ ├── 18.tsv │ │ │ ├── 19.tsv │ │ │ ├── 2.tsv │ │ │ ├── 20.tsv │ │ │ ├── 21.tsv │ │ │ ├── 22.tsv │ │ │ ├── 23.tsv │ │ │ ├── 24.tsv │ │ │ ├── 25.tsv │ │ │ ├── 26.tsv │ │ │ ├── 27.tsv │ │ │ ├── 28.tsv │ │ │ ├── 29.tsv │ │ │ ├── 3.tsv │ │ │ ├── 30.tsv │ │ │ ├── 31.tsv │ │ │ ├── 32.tsv │ │ │ ├── 33.tsv │ │ │ ├── 34.tsv │ │ │ ├── 35.tsv │ │ │ ├── 36.tsv │ │ │ ├── 37.tsv │ │ │ ├── 38.tsv │ │ │ ├── 39.tsv │ │ │ ├── 4.tsv │ │ │ ├── 40.tsv │ │ │ ├── 41.tsv │ │ │ ├── 42.tsv │ │ │ ├── 43.tsv │ │ │ ├── 44.tsv │ │ │ ├── 45.tsv │ │ │ ├── 46.tsv │ │ │ ├── 5.tsv │ │ │ ├── 6.tsv │ │ │ ├── 7.tsv │ │ │ ├── 8.tsv │ │ │ ├── 9.tsv │ │ │ └── clueweb.tsv │ │ └── search.py │ ├── pile │ │ ├── ranges │ │ │ └── pile.tsv │ │ └── search.py │ └── search.py └── snippet │ ├── naive_first.py │ ├── sliding_window.py │ └── snippet.py └── frontend ├── .env.local.example ├── .eslintrc.json ├── next-env.d.ts ├── next.config.mjs ├── package-lock.json ├── package.json ├── postcss.config.js ├── public ├── android-chrome-192x192.png ├── android-chrome-512x512.png ├── apple-touch-icon.png ├── bg.svg ├── favicon-16x16.png ├── favicon-32x32.png ├── favicon.ico ├── ragviz-square.png └── site.webmanifest ├── src └── app │ ├── components │ ├── answer.tsx │ ├── footer.tsx │ ├── logo.tsx │ ├── popover.tsx │ ├── preset-query.tsx │ ├── relates.tsx │ ├── result.tsx │ ├── search.tsx │ ├── skeleton.tsx │ ├── sources.tsx │ ├── title.tsx │ └── wrapper.tsx │ ├── globals.css │ ├── interfaces │ ├── relate.ts │ └── source.ts │ ├── layout.tsx │ ├── page.tsx │ ├── search │ └── page.tsx │ └── utils │ ├── cn.ts │ ├── fetch-stream.ts │ ├── get-search-url.ts │ ├── parse-rag.ts │ └── parse-rewrite.ts ├── tailwind.config.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | .env 3 | .env.local 4 | node_modules/ 5 | .next/ 6 | ui/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Tevin Wang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # drawing

RAGViz

2 | 3 | RAGViz (Retrieval Augmented Generation Visualization) is a tool that visualizes both document and token-level attention on the retrieved context feeded to the LLM to ground answer generation. 4 | 5 | - RAGViz provides an add/remove document functionality to compare the generated tokens when certain documents are not included in the context. 6 | - Combining both functionalities allows for a diagnosis on the effectiveness and influence of certain retrieved documents or sections of text on the LLM's answer generation. 7 | 8 | ### Demo Video 9 | A basic demonstration of RAGViz is available [here](https://www.youtube.com/embed/cTAbuTu6ur4?si=-uZ2AyNLx-5p8MZC). 10 | 11 | ### Configuration 12 | 13 | The following are the system configurations of our RAGViz demonstration: 14 | - The [Pile-CC](https://github.com/EleutherAI/pile-cc) English documents are used for retrieval 15 | - Documents are partioned into 4 [DiskANN](https://github.com/microsoft/DiskANN/) indexes on separate nodes, each with ~20 million documents 16 | - Documents are embedded into feature vectors using [AnchorDR](https://github.com/yiqingxyq/AnchorDR). **To use [AnchorDR](https://github.com/yiqingxyq/AnchorDR) in RAGViz you must follow the installation instructions on the repo [here](https://github.com/yiqingxyq/AnchorDR) to ensure your Python environment is set up correctly. Do this after running `pip install -r backend/requirements.txt`.** 17 | - [LLaMa2](https://huggingface.co/docs/transformers/v4.34.0/en/model_doc/llama2) generation/attention output done with [vLLM](https://github.com/vllm-project/vllm) and [HuggingFace](https://huggingface.co/) transformers library 18 | - Frontend UI is adapted from [Lepton search engine](https://github.com/leptonai/search_with_lepton) 19 | 20 | ### Customization 21 | 22 | #### Snippets: 23 | You can modify the snippets used for context in RAG by adding a new file and class in `backend/snippet`, adding it to `backend/ragviz.py` and `frontend/src/app/components/search.tsx`. We currently offer the following snippets: 24 | - Naive First: 25 | - Represent a document with its first 128 tokens 26 | - Sliding Window 27 | - Compute inner product similarity between windows of 128 tokens and the query; use the most similar window to the query to represent a document 28 | 29 | #### Datasets: 30 | New datasets for retrieval can be added using a new file and class in `backend/search`, and modifying `backend/ragviz.py` accordingly. 31 | 32 | We currently have implemented both a implementation the following datasets: 33 | - Clueweb22B english documents 34 | - Pile-CC dataset 35 | 36 | #### LLMs: 37 | Any model supported by [HuggingFace](https://huggingface.co/) transformers library can be used as the LLM backbone. 38 | 39 | To apply vLLM for fast inference, the LLM backbone needs to be supported by vLLM. A list of vLLM supported model is available [here](https://docs.vllm.ai/en/latest/models/supported_models.html). 40 | 41 | You can set the model path of the model for RAG inside of `backend/.env.example`. We used `meta-llama/Llama-2-7b-chat-hf` for the demo. 42 | 43 | -------------------------------------------------------------------------------- /backend/.env.example: -------------------------------------------------------------------------------- 1 | PROJECT_DIR= 2 | API_KEY= 3 | BACKEND_ADDR= 4 | BACKEND_PORT= 5 | RAG_ADDR= 6 | RAG_PORT= 7 | RAG_MODEL= 8 | 9 | CLUEWEB_ADDR_0= 10 | CLUEWEB_PORT_0= 11 | CLUEWEB_ADDR_1= 12 | CLUEWEB_PORT_1= 13 | CLUEWEB_ADDR_2= 14 | CLUEWEB_PORT_2= 15 | CLUEWEB_ADDR_3= 16 | CLUEWEB_PORT_3= 17 | CLUEWEB_PATH= 18 | 19 | PILE_ADDR_0= 20 | PILE_PORT_0= 21 | PILE_ADDR_1= 22 | PILE_PORT_1= 23 | PILE_ADDR_2= 24 | PILE_PORT_2= 25 | PILE_ADDR_3= 26 | PILE_PORT_3= 27 | PILE_PATH= 28 | -------------------------------------------------------------------------------- /backend/helpers/ClueWeb22Api.py: -------------------------------------------------------------------------------- 1 | 2 | import os 3 | import gzip 4 | 5 | class ClueWeb22Api: 6 | 7 | def __init__(self, cw22id, cw22root_path): 8 | self.cw22id = cw22id 9 | self.cw22root_path = cw22root_path 10 | 11 | def get_base_filename_by_id(self, cw22id, cw22root_path, file_type='html'): 12 | html_path = self.cw22root_path + os.sep + file_type 13 | id_parts = cw22id.split('-') 14 | doc = int(id_parts[len(id_parts) - 1]) 15 | 16 | language = id_parts[1][:2] 17 | segment = id_parts[1][:4] 18 | directory = id_parts[1] 19 | base_path = html_path + os.sep + language + os.sep + segment + os.sep + directory + os.sep 20 | base_filename = base_path + id_parts[1] + '-' + id_parts[2] 21 | return base_filename 22 | 23 | def get_primary_node_ids(self, annotate_html): 24 | annotations = annotate_html.annotations 25 | primary_node_ids = [] 26 | for annotation in annotations: 27 | if annotation.type == AnnotateHtml.AnnotationType.Primary: 28 | primary_node_ids.append(int(annotation.nodeId)) 29 | primary_node_ids.sort() 30 | return primary_node_ids 31 | 32 | def get_html_from_warc(self): 33 | cw22id = self.cw22id 34 | cw22root_path = self.cw22root_path 35 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path) 36 | 37 | warc_path = base_filename + '.warc.gz' 38 | offset_path = base_filename + '.warc.offset' 39 | 40 | id_parts = cw22id.split('-') 41 | doc = int(id_parts[len(id_parts) - 1]) 42 | 43 | #Get html from warc using offset 44 | offset_length = len('{:010d}\n'.format(0, 0)) 45 | with open (warc_path,'rb') as f_warc: 46 | with open (offset_path, 'r') as f_offset: 47 | f_offset.seek(int(doc) * int(offset_length)) 48 | start_bytes = int (f_offset.read (offset_length).strip()) 49 | end_bytes = int (f_offset.read (offset_length).strip()) 50 | f_warc.seek(start_bytes) 51 | record = f_warc.read(end_bytes - start_bytes) 52 | record = gzip.decompress(record).decode('utf-8') 53 | 54 | #Remove the WARC header to get the htmlStr 55 | warc_header = '' 56 | for line in record.splitlines(): 57 | warc_header += line 58 | warc_header += '\r\n' 59 | if len(line.strip()) == 0: 60 | break 61 | record = record[len(warc_header):] 62 | 63 | return record 64 | 65 | def get_node_features(self): 66 | cw22id = self.cw22id 67 | cw22root_path = self.cw22root_path 68 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type='vdom') 69 | vdom_path = base_filename + '.zip' 70 | 71 | with zipfile.ZipFile(vdom_path, 'r') as z: 72 | doc_num = 0 73 | filename = cw22id + '.bin' 74 | with z.open(filename) as f: 75 | data = f.read() 76 | annotate_html = AnnotateHtml() 77 | annotate_html.ParseFromString(data) 78 | 79 | html_string = self.get_html_from_warc() 80 | api = AnnotateHtmlApi(annotate_html, init_nodes=False, html_string=html_string) 81 | vdom_features = api.get_all_node_features_no_offset() 82 | return vdom_features 83 | 84 | def get_node_features_with_text(self, is_primary=True): 85 | cw22id = self.cw22id 86 | cw22root_path = self.cw22root_path 87 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type='vdom') 88 | vdom_path = base_filename + '.zip' 89 | 90 | json_path = base_filename + '.json.gz' 91 | offset_path = base_filename + '.offset' 92 | 93 | id_parts = cw22id.split('-') 94 | doc = int(id_parts[len(id_parts) - 1]) 95 | 96 | nodes_and_features = [] 97 | with zipfile.ZipFile(vdom_path, 'r') as z: 98 | doc_num = 0 99 | filename = cw22id + '.bin' 100 | with z.open(filename) as f: 101 | data = f.read() 102 | annotate_html = AnnotateHtml() 103 | annotate_html.ParseFromString(data) 104 | 105 | html_string = self.get_html_from_warc() 106 | api = AnnotateHtmlApi(annotate_html, init_nodes=True, html_string=html_string) 107 | 108 | all_soup_nodes = api.soup.find_all() 109 | primary_node_ids = all_soup_nodes 110 | if is_primary: 111 | primary_node_ids = self.get_primary_node_ids(annotate_html) 112 | 113 | htmlnode_vdomfeatures = {} 114 | for htmlnode in all_soup_nodes: 115 | #print(htmlnode) 116 | node_text = htmlnode.text.strip() 117 | if 'data-dcnode-id' in htmlnode.attrs and len(node_text) > 0: 118 | nodeid = int(htmlnode.attrs['data-dcnode-id']) 119 | if nodeid in primary_node_ids: 120 | vdom_feature = api.all_nodes[nodeid].vdom_feature 121 | node_dict = {'id': nodeid, 'text':htmlnode.text, 'vdom_feature':vdom_feature} 122 | nodes_and_features.append(node_dict) 123 | #htmlnode_vdomfeatures[nodeid] = vdom_feature 124 | return nodes_and_features 125 | 126 | 127 | def get_primary_content_with_annotations(self): 128 | cw22id = self.cw22id 129 | cw22root_path = self.cw22root_path 130 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type='vdom') 131 | vdom_path = base_filename + '.zip' 132 | 133 | id_parts = cw22id.split('-') 134 | doc = int(id_parts[len(id_parts) - 1]) 135 | 136 | with zipfile.ZipFile(vdom_path, 'r') as z: 137 | doc_num = 0 138 | filename = cw22id + '.bin' 139 | with z.open(filename) as f: 140 | data = f.read() 141 | annotate_html = AnnotateHtml() 142 | annotate_html.ParseFromString(data) 143 | 144 | html_string = self.get_html_from_warc() 145 | api = AnnotateHtmlApi(annotate_html, init_nodes=True, html_string=html_string) 146 | primary_content_with_offset = api.get_primary_content_with_annotation_offset(get_binary_text=True) 147 | return primary_content_with_offset 148 | 149 | def get_json_record(self, record_type): 150 | cw22id = self.cw22id 151 | cw22root_path = self.cw22root_path 152 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type=record_type) 153 | 154 | json_path = base_filename + '.json.gz' 155 | offset_path = base_filename + '.offset' 156 | 157 | id_parts = cw22id.split('-') 158 | doc = int(id_parts[len(id_parts) - 1]) 159 | 160 | offset_length = len('{:010d}\n'.format(0, 0)) 161 | with open (json_path,'rb') as f_json: 162 | with open (offset_path, 'r') as f_offset: 163 | f_offset.seek(int(doc) * int(offset_length)) 164 | start_bytes = int (f_offset.read (offset_length).strip()) 165 | end_bytes = int (f_offset.read (offset_length).strip()) 166 | f_json.seek(start_bytes) 167 | record = f_json.read(end_bytes - start_bytes) 168 | record = gzip.decompress(record).decode('utf-8') 169 | return record 170 | 171 | 172 | def get_clean_text(self): 173 | record = self.get_json_record('txt') 174 | return record 175 | 176 | def get_inlinks(self): 177 | record = self.get_json_record('inlink') 178 | return record 179 | 180 | def get_outlinks(self): 181 | record = self.get_json_record('outlink') 182 | return record 183 | 184 | 185 | -------------------------------------------------------------------------------- /backend/helpers/concurrent_fetch.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import concurrent.futures 3 | import time 4 | 5 | def fetch(i, url, jsonquery): 6 | start_time = time.perf_counter() 7 | response = requests.post(url, json=jsonquery) 8 | end_time = time.perf_counter() 9 | elapsed_time = end_time - start_time 10 | print(f"ANNS TIME: {elapsed_time} seconds") 11 | return i, response.json() 12 | 13 | def fetch_all(urls, jsonquery): 14 | with concurrent.futures.ThreadPoolExecutor() as executor: 15 | # Submit requests for all URLs concurrently 16 | future_to_url = {executor.submit(fetch, i, url, jsonquery): url for i, url in urls} 17 | responses = [] 18 | for future in concurrent.futures.as_completed(future_to_url): 19 | url = future_to_url[future] 20 | try: 21 | data = future.result() 22 | responses.append(data) 23 | except Exception as exc: 24 | print(f"Error fetching data from {url}: {exc}") 25 | return responses -------------------------------------------------------------------------------- /backend/helpers/embedding.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import time 3 | 4 | def embedding_function(tokenizer, model, query): 5 | start_time = time.perf_counter() 6 | input_ids = tokenizer(query, return_tensors="pt").input_ids 7 | print(f"TOKEN_COUNT: {len(input_ids[0])}") 8 | decoder_input_ids = tokenizer(query, return_tensors="pt").input_ids 9 | 10 | # Forward pass through the model to obtain embeddings 11 | with torch.no_grad(): 12 | outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids) 13 | 14 | # Extract the embeddings 15 | embeddings = outputs.last_hidden_state # Last layer hidden states 16 | 17 | embeddings_np = embeddings.numpy() 18 | res = embeddings_np[0,0].tolist() 19 | end_time = time.perf_counter() 20 | elapsed_time = end_time - start_time 21 | print(f"EMBEDDING TIME: {elapsed_time} seconds") 22 | return res -------------------------------------------------------------------------------- /backend/helpers/range_dictionary.py: -------------------------------------------------------------------------------- 1 | def create_range_dictionary(file_path): 2 | range_list = [] 3 | with open(file_path, 'r') as file: 4 | for line in file: 5 | # Assuming each line contains two numbers separated by a tab 6 | start, end = map(int, line.strip().split()) 7 | # Append the range as a tuple to the list 8 | range_list.append((start, end)) 9 | return range_list 10 | 11 | def query_range_dictionary(range_list, query): 12 | for i, (start, end) in enumerate(range_list): 13 | if start <= query < end: 14 | return i, query - start 15 | return 0 -------------------------------------------------------------------------------- /backend/rag/client.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | 4 | def rag_client(query: str, results): 5 | jsonquery = { 6 | "query": query, 7 | "docs": results 8 | } 9 | 10 | response = requests.post(f'http://{os.getenv("RAG_ADDR")}:{os.getenv("RAG_PORT")}/generate', json=jsonquery) 11 | for i, result in enumerate(results): 12 | result['attn'] = response.json()['attn'][i] 13 | result['nameTokens'] = response.json()['docs'][i]['name'] 14 | result['snippetTokens'] = response.json()['docs'][i]['snippet'] 15 | return {'docs': results, 'answer': response.json()['tokens']} -------------------------------------------------------------------------------- /backend/rag/server.py: -------------------------------------------------------------------------------- 1 | from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer # LlamaTokenizer, LlamaForCausalLM, 2 | import torch 3 | import numpy as np 4 | import os 5 | 6 | from fastapi import FastAPI, Query 7 | from pydantic import BaseModel 8 | 9 | from vllm import LLM, SamplingParams 10 | import torch 11 | 12 | import uvicorn 13 | import json 14 | import time 15 | 16 | def load_model(model_name, tp_size=1): 17 | if "minicpm" in model_name.lower(): 18 | llm = LLM( 19 | model_name, 20 | trust_remote_code=True, 21 | dtype='half', 22 | tensor_parallel_size=tp_size, 23 | device=torch.device("cuda:0"), 24 | gpu_memory_utilization=0.5 25 | ) 26 | else: 27 | llm = LLM(model_name, tensor_parallel_size=tp_size, device=torch.device("cuda:0"), gpu_memory_utilization=0.5) 28 | return llm 29 | 30 | os.environ["CUDA_VISIBLE_DEVICES"] = "0" 31 | 32 | model_path = os.getenv("RAG_MODEL") 33 | tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True) 34 | hf_model = AutoModelForCausalLM.from_pretrained( 35 | model_path, 36 | trust_remote_code=True, 37 | device_map="auto", 38 | quantization_config=BitsAndBytesConfig(load_in_4bit=True), 39 | max_memory={0: "12GB"} 40 | ) 41 | 42 | 43 | torch.cuda.manual_seed(42) 44 | torch.manual_seed(42) 45 | 46 | model_name = os.getenv("RAG_MODEL") 47 | model = load_model(model_name) 48 | 49 | app = FastAPI() 50 | 51 | def process_tokens(tokenized_docs): 52 | tokens = [] 53 | 54 | special_tokens = [] 55 | 56 | for i, token in enumerate(tokenized_docs): 57 | if token not in special_tokens: 58 | token = token.replace('▁', ' ') 59 | token = token.replace('<0x0A>', '\n') 60 | token = token.replace('Ġ', ' ') 61 | token = token.replace('Ċ', ' ') 62 | tokens.append(token) 63 | return tokens 64 | 65 | def vllm(model, query, docs, max_new_tokens=100, user_prompt=None, top_p=0.9, temperature=0.8): 66 | context = "" 67 | doc_starts = [] 68 | doc_tokens = [] 69 | input_len = 0 70 | context_ids = tokenizer("Context: ", return_tensors='pt')['input_ids'] 71 | input_ids = context_ids.clone() 72 | for i, doc in enumerate(docs): 73 | new_title = f"{doc['name']}: " 74 | title_ids = tokenizer(new_title, return_tensors='pt')['input_ids'][:, 1:] 75 | title_tokens = process_tokens(tokenizer.convert_ids_to_tokens(tokenizer(new_title)['input_ids']))[1:] 76 | title_start = input_ids.size(1) 77 | input_ids = torch.cat([input_ids, title_ids], dim=-1).clone() 78 | 79 | new_context = f"{doc['snippet']};" if i < len(docs) - 1 else f"{doc['snippet']}" 80 | context += new_context 81 | snippet_ids = tokenizer(new_context, return_tensors='pt')['input_ids'][:, 1:] 82 | snippet_tokens = process_tokens(tokenizer.convert_ids_to_tokens(tokenizer(new_context)['input_ids']))[1:] 83 | doc_starts.append((title_start, input_ids.size(1))) 84 | input_ids = torch.cat([input_ids, snippet_ids], dim=-1).clone() 85 | doc_tokens.append({'name': title_tokens, 'snippet': snippet_tokens}) 86 | query_ids = tokenizer("\n Question: {query} Answer in less than 100 tokens:", return_tensors='pt')['input_ids'] 87 | docs_len = input_ids.size(1) 88 | input_ids = torch.cat([input_ids, query_ids], dim=-1).clone() 89 | input_len = input_ids.size(1) 90 | 91 | sampling_param = SamplingParams(top_p=top_p, temperature=temperature, max_tokens=max_new_tokens) 92 | 93 | prompt = f"Context: {context}\n Question: {query} Answer in less than 100 tokens:" 94 | start_time = time.perf_counter() 95 | outputs = model.generate(prompt, sampling_params=sampling_param) 96 | end_time = time.perf_counter() 97 | elapsed_time = end_time - start_time 98 | print(f"VLLM CHAT COMPLETION TIME: {elapsed_time} seconds") 99 | print(outputs) 100 | 101 | return input_ids, doc_starts, docs_len, input_len, outputs[0].outputs[0].text, doc_tokens 102 | 103 | def format_attention(attention, layers=None, heads=None): 104 | if layers: 105 | attention = [attention[layer_index] for layer_index in layers] 106 | squeezed = [] 107 | for layer_attention in attention: 108 | # 1 x num_heads x seq_len x seq_len 109 | if len(layer_attention.shape) != 4: 110 | raise ValueError("The attention tensor does not have the correct number of dimensions. Make sure you set " 111 | "output_attentions=True when initializing your model.") 112 | layer_attention = layer_attention.squeeze(0) 113 | if heads: 114 | layer_attention = layer_attention[heads] 115 | squeezed.append(layer_attention) 116 | # num_layers x num_heads x seq_len x seq_len 117 | return torch.stack(squeezed) 118 | 119 | def num_layers(attention): 120 | return len(attention) 121 | 122 | def num_heads(attention): 123 | return attention[0][0].size(0) 124 | 125 | 126 | def hf(model, input_ids, doc_starts, docs_len, input_len, generated_text): 127 | generated_ids = tokenizer(generated_text, return_tensors='pt')['input_ids'] 128 | generated_tokens = tokenizer.convert_ids_to_tokens(tokenizer(generated_text)['input_ids']) 129 | output_ids = torch.cat([input_ids, generated_ids], dim=-1).clone() 130 | with torch.no_grad(): 131 | start_time = time.perf_counter() 132 | outputs = model(output_ids, output_attentions=True) 133 | end_time = time.perf_counter() 134 | elapsed_time = end_time - start_time 135 | print(f"ATTENTION FORWARD PASS TIME: {elapsed_time} seconds") 136 | attentions = outputs.attentions 137 | n_heads = num_heads(attentions) 138 | include_layers = list(range(num_layers(attentions))) 139 | include_heads = list(range(n_heads)) 140 | attention = format_attention(attentions, include_layers, include_heads) 141 | 142 | att_q = [] 143 | att_d = [[] for doc in doc_starts] 144 | att = torch.mean(attention, dim=[0,1]).numpy() 145 | 146 | for t_num in range(input_len, output_ids.shape[1]): 147 | for i, doc in enumerate(doc_starts): 148 | title_start, snippet_start = doc 149 | doc_end = doc_starts[i + 1][0] if i < len(doc_starts) - 1 else docs_len 150 | att_d[i].append({"name": att[t_num, title_start:snippet_start].tolist(), "snippet": att[t_num, snippet_start:doc_end].tolist(), "score": float(np.sum(att[t_num, title_start:doc_end]))}) # doc 151 | 152 | return att_d, generated_tokens 153 | 154 | def process_text(doc_tokens, tokenized_text, attn_d): 155 | processed_tokens = [] 156 | 157 | special_tokens = [''] 158 | 159 | attn = [[] for i in range (len(attn_d))] 160 | 161 | for i, token in enumerate(tokenized_text): 162 | if token not in special_tokens: 163 | token = token.replace('▁', ' ') 164 | token = token.replace('<0x0A>', '\n') 165 | token = token.replace('Ġ', ' ') 166 | token = token.replace('Ċ', ' ') 167 | processed_tokens.append(token) 168 | for j in range(len(attn)): 169 | attn[j].append(attn_d[j][i]) 170 | return {'docs': doc_tokens, 'tokens': processed_tokens, 'attn': attn} 171 | 172 | class RequestData(BaseModel): 173 | query: str 174 | docs: list 175 | 176 | @app.post("/generate") 177 | def generate(request_data: RequestData = None, max_new_tokens: int = Query(100), top_p: float = Query(0.9), temperature: float = Query(0.8)): 178 | if request_data: 179 | query = request_data.query 180 | docs = request_data.docs 181 | input_ids, doc_starts, docs_len, input_len, response_text, doc_tokens = vllm(model, query, docs) 182 | attn_d, tokenized_text = hf(hf_model, input_ids, doc_starts, docs_len, input_len, response_text) 183 | return process_text(doc_tokens, tokenized_text, attn_d) 184 | 185 | uvicorn.run(app, host="0.0.0.0", port=8080) 186 | -------------------------------------------------------------------------------- /backend/ragviz.py: -------------------------------------------------------------------------------- 1 | import json 2 | import re 3 | import os 4 | from fastapi import HTTPException, FastAPI, Request 5 | from fastapi.responses import JSONResponse 6 | import uvicorn 7 | import time 8 | 9 | from search.pile.search import PileSearch 10 | from snippet.naive_first import NaiveFirstSnippet 11 | from snippet.sliding_window import SlidingWindowSnippet 12 | from helpers.embedding import embedding_function 13 | from rag.client import rag_client 14 | 15 | from transformers import AutoTokenizer, AutoModel 16 | import torch 17 | 18 | if __name__ == "__main__": 19 | tokenizer = AutoTokenizer.from_pretrained("yiqingx/AnchorDR") 20 | model = AutoModel.from_pretrained("yiqingx/AnchorDR") 21 | pile_search = PileSearch() 22 | naive_first = NaiveFirstSnippet(tokenizer) 23 | sliding_window = SlidingWindowSnippet(tokenizer, model, 64, 128) 24 | app = FastAPI() 25 | 26 | @app.post("/query") 27 | def query_function(item: dict) -> JSONResponse: 28 | """ 29 | Query RAGViz and returns the response. 30 | 31 | The query can have the following fields: 32 | - query: the user query. 33 | """ 34 | start_time = time.perf_counter() 35 | query = item['query'] or _default_query 36 | # Basic attack protection: remove "[INST]" or "[/INST]" from the query 37 | query = re.sub(r"\[/?INST\]", "", query) 38 | k = int(item['k']) 39 | snippet_type = item['snippet'] 40 | 41 | embeddings = embedding_function(tokenizer, model, query) 42 | 43 | if snippet_type == "first": 44 | results = pile_search.get_search_results(embeddings, k, query, naive_first) 45 | else: 46 | results = pile_search.get_search_results(embeddings, k, query, sliding_window) 47 | 48 | rag_response = rag_client(query, results) 49 | 50 | res = JSONResponse(content=json.dumps(rag_response), media_type="application/json") 51 | end_time = time.perf_counter() 52 | elapsed_time = end_time - start_time 53 | print(f"TOTAL QUERY TIME: {elapsed_time} seconds") 54 | return res 55 | 56 | # Define your API keys 57 | API_KEYS = { 58 | "key": os.getenv("API_KEY"), 59 | } 60 | 61 | @app.middleware("http") 62 | async def check_api_key(request: Request, call_next): 63 | api_key = request.headers.get("X-API-Key") 64 | 65 | if api_key not in API_KEYS.values(): 66 | return JSONResponse(status_code=401, content={"error": "Invalid API key"}) 67 | 68 | response = await call_next(request) 69 | return response 70 | 71 | @app.post("/rewrite") 72 | async def rewrite(item: dict): 73 | return JSONResponse(content=json.dumps(rag_client(item['query'], item['results'])), media_type="application/json") 74 | 75 | if __name__ == "__main__": 76 | uvicorn.run(app, host=os.getenv("BACKEND_ADDR"), port=int(os.getenv("BACKEND_PORT"))) -------------------------------------------------------------------------------- /backend/requirements.txt: -------------------------------------------------------------------------------- 1 | torch 2 | vllm 3 | transformers 4 | fastapi 5 | uvicorn 6 | requests 7 | pydantic 8 | numpy 9 | bitsandbytes 10 | accelerate>=0.26.0 11 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/0.tsv: -------------------------------------------------------------------------------- 1 | 0 19012 2 | 19012 37463 3 | 37463 57240 4 | 57240 76855 5 | 76855 96968 6 | 96968 115565 7 | 115565 135228 8 | 135228 155670 9 | 155670 175677 10 | 175677 196038 11 | 196038 216354 12 | 216354 234317 13 | 234317 253331 14 | 253331 273734 15 | 273734 293656 16 | 293656 313792 17 | 313792 333411 18 | 333411 350870 19 | 350870 369667 20 | 369667 386046 21 | 386046 406438 22 | 406438 425786 23 | 425786 445832 24 | 445832 465141 25 | 465141 484849 26 | 484849 505218 27 | 505218 520648 28 | 520648 540793 29 | 540793 558073 30 | 558073 578670 31 | 578670 595429 32 | 595429 615361 33 | 615361 635673 34 | 635673 651218 35 | 651218 671516 36 | 671516 691457 37 | 691457 709979 38 | 709979 729412 39 | 729412 748591 40 | 748591 768670 41 | 768670 786578 42 | 786578 806118 43 | 806118 825684 44 | 825684 845645 45 | 845645 865668 46 | 865668 883640 47 | 883640 903375 48 | 903375 923724 49 | 923724 943398 50 | 943398 963455 51 | 963455 983465 52 | 983465 1003057 53 | 1003057 1022567 54 | 1022567 1042576 55 | 1042576 1062877 56 | 1062877 1082720 57 | 1082720 1102066 58 | 1102066 1122329 59 | 1122329 1142586 60 | 1142586 1162950 61 | 1162950 1182935 62 | 1182935 1203077 63 | 1203077 1221442 64 | 1221442 1239195 65 | 1239195 1259987 66 | 1259987 1279874 67 | 1279874 1298295 68 | 1298295 1316572 69 | 1316572 1336185 70 | 1336185 1356965 71 | 1356965 1375004 72 | 1375004 1395002 73 | 1395002 1415056 74 | 1415056 1434753 75 | 1434753 1454837 76 | 1454837 1472977 77 | 1472977 1487671 78 | 1487671 1506012 79 | 1506012 1521407 80 | 1521407 1541846 81 | 1541846 1562115 82 | 1562115 1581217 83 | 1581217 1601281 84 | 1601281 1621764 85 | 1621764 1641577 86 | 1641577 1661461 87 | 1661461 1680050 88 | 1680050 1693221 89 | 1693221 1713195 90 | 1713195 1732768 91 | 1732768 1753153 92 | 1753153 1773011 93 | 1773011 1793165 94 | 1793165 1810107 95 | 1810107 1829883 96 | 1829883 1847631 97 | 1847631 1868446 98 | 1868446 1888189 99 | 1888189 1905471 100 | 1905471 1923448 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/1.tsv: -------------------------------------------------------------------------------- 1 | 0 20172 2 | 20172 39185 3 | 39185 59373 4 | 59373 78740 5 | 78740 94505 6 | 94505 113927 7 | 113927 133601 8 | 133601 151967 9 | 151967 172446 10 | 172446 192310 11 | 192310 212688 12 | 212688 232873 13 | 232873 253138 14 | 253138 268508 15 | 268508 287466 16 | 287466 308018 17 | 308018 327566 18 | 327566 348471 19 | 348471 366581 20 | 366581 384899 21 | 384899 404953 22 | 404953 424021 23 | 424021 442112 24 | 442112 456806 25 | 456806 476318 26 | 476318 496071 27 | 496071 515828 28 | 515828 534934 29 | 534934 555480 30 | 555480 573941 31 | 573941 593592 32 | 593592 611658 33 | 611658 631334 34 | 631334 645720 35 | 645720 665435 36 | 665435 685940 37 | 685940 705703 38 | 705703 726047 39 | 726047 746528 40 | 746528 766084 41 | 766084 786116 42 | 786116 805644 43 | 805644 826164 44 | 826164 846727 45 | 846727 864257 46 | 864257 879716 47 | 879716 899229 48 | 899229 914478 49 | 914478 934095 50 | 934095 954131 51 | 954131 973703 52 | 973703 991094 53 | 991094 1006931 54 | 1006931 1027122 55 | 1027122 1047190 56 | 1047190 1068071 57 | 1068071 1084092 58 | 1084092 1103447 59 | 1103447 1124041 60 | 1124041 1143549 61 | 1143549 1163819 62 | 1163819 1183546 63 | 1183546 1204099 64 | 1204099 1225031 65 | 1225031 1242509 66 | 1242509 1260496 67 | 1260496 1280529 68 | 1280529 1300793 69 | 1300793 1319686 70 | 1319686 1340503 71 | 1340503 1359088 72 | 1359088 1377255 73 | 1377255 1394468 74 | 1394468 1413615 75 | 1413615 1433939 76 | 1433939 1452225 77 | 1452225 1470812 78 | 1470812 1491158 79 | 1491158 1511395 80 | 1511395 1529854 81 | 1529854 1546221 82 | 1546221 1564971 83 | 1564971 1584271 84 | 1584271 1604502 85 | 1604502 1624656 86 | 1624656 1644133 87 | 1644133 1664251 88 | 1664251 1682014 89 | 1682014 1700641 90 | 1700641 1720798 91 | 1720798 1740891 92 | 1740891 1761059 93 | 1761059 1780395 94 | 1780395 1797726 95 | 1797726 1817565 96 | 1817565 1836878 97 | 1836878 1856935 98 | 1856935 1871731 99 | 1871731 1892547 100 | 1892547 1912593 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/10.tsv: -------------------------------------------------------------------------------- 1 | 0 18915 2 | 18915 38930 3 | 38930 57633 4 | 57633 77288 5 | 77288 96782 6 | 96782 116769 7 | 116769 135842 8 | 135842 151833 9 | 151833 171590 10 | 171590 190485 11 | 190485 208440 12 | 208440 227954 13 | 227954 246765 14 | 246765 266653 15 | 266653 286769 16 | 286769 306523 17 | 306523 326936 18 | 326936 347145 19 | 347145 367141 20 | 367141 386908 21 | 386908 406694 22 | 406694 426257 23 | 426257 444956 24 | 444956 464891 25 | 464891 483972 26 | 483972 501667 27 | 501667 521264 28 | 521264 540709 29 | 540709 559796 30 | 559796 580669 31 | 580669 599657 32 | 599657 617618 33 | 617618 637883 34 | 637883 655988 35 | 655988 673415 36 | 673415 693362 37 | 693362 711456 38 | 711456 731421 39 | 731421 749644 40 | 749644 769333 41 | 769333 787715 42 | 787715 807483 43 | 807483 826919 44 | 826919 847696 45 | 847696 863487 46 | 863487 879206 47 | 879206 898521 48 | 898521 917530 49 | 917530 937905 50 | 937905 953121 51 | 953121 972647 52 | 972647 992294 53 | 992294 1012273 54 | 1012273 1032676 55 | 1032676 1050922 56 | 1050922 1071301 57 | 1071301 1088013 58 | 1088013 1108171 59 | 1108171 1124178 60 | 1124178 1144754 61 | 1144754 1165062 62 | 1165062 1182299 63 | 1182299 1201911 64 | 1201911 1219090 65 | 1219090 1238460 66 | 1238460 1256831 67 | 1256831 1272523 68 | 1272523 1287930 69 | 1287930 1308111 70 | 1308111 1327344 71 | 1327344 1338864 72 | 1338864 1357329 73 | 1357329 1377560 74 | 1377560 1396878 75 | 1396878 1416731 76 | 1416731 1436648 77 | 1436648 1457015 78 | 1457015 1468397 79 | 1468397 1488759 80 | 1488759 1508706 81 | 1508706 1528952 82 | 1528952 1544492 83 | 1544492 1562737 84 | 1562737 1581727 85 | 1581727 1600616 86 | 1600616 1613774 87 | 1613774 1633590 88 | 1633590 1653463 89 | 1653463 1671964 90 | 1671964 1691779 91 | 1691779 1711281 92 | 1711281 1726549 93 | 1726549 1744332 94 | 1744332 1762460 95 | 1762460 1779155 96 | 1779155 1799353 97 | 1799353 1813468 98 | 1813468 1831619 99 | 1831619 1851515 100 | 1851515 1870486 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/11.tsv: -------------------------------------------------------------------------------- 1 | 0 16024 2 | 16024 33179 3 | 33179 53533 4 | 53533 73636 5 | 73636 93710 6 | 93710 113203 7 | 113203 133104 8 | 133104 153398 9 | 153398 173272 10 | 173272 192052 11 | 192052 211909 12 | 211909 229078 13 | 229078 248924 14 | 248924 265990 15 | 265990 286418 16 | 286418 304378 17 | 304378 319288 18 | 319288 339149 19 | 339149 358899 20 | 358899 374443 21 | 374443 394500 22 | 394500 413311 23 | 413311 432215 24 | 432215 452046 25 | 452046 469857 26 | 469857 490044 27 | 490044 510088 28 | 510088 529191 29 | 529191 548491 30 | 548491 568915 31 | 568915 586225 32 | 586225 604832 33 | 604832 625715 34 | 625715 645947 35 | 645947 665984 36 | 665984 681737 37 | 681737 702162 38 | 702162 722731 39 | 722731 742884 40 | 742884 763013 41 | 763013 778923 42 | 778923 797520 43 | 797520 817926 44 | 817926 835903 45 | 835903 856335 46 | 856335 876558 47 | 876558 896968 48 | 896968 917357 49 | 917357 937418 50 | 937418 955824 51 | 955824 974274 52 | 974274 993673 53 | 993673 1013411 54 | 1013411 1033906 55 | 1033906 1054570 56 | 1054570 1074308 57 | 1074308 1095028 58 | 1095028 1113189 59 | 1113189 1131237 60 | 1131237 1149739 61 | 1149739 1169111 62 | 1169111 1187516 63 | 1187516 1206936 64 | 1206936 1224771 65 | 1224771 1244744 66 | 1244744 1263023 67 | 1263023 1278791 68 | 1278791 1294171 69 | 1294171 1314556 70 | 1314556 1334297 71 | 1334297 1354155 72 | 1354155 1370417 73 | 1370417 1389901 74 | 1389901 1408537 75 | 1408537 1428616 76 | 1428616 1446956 77 | 1446956 1465463 78 | 1465463 1484247 79 | 1484247 1504474 80 | 1504474 1524135 81 | 1524135 1542776 82 | 1542776 1562466 83 | 1562466 1582697 84 | 1582697 1602753 85 | 1602753 1622314 86 | 1622314 1642309 87 | 1642309 1661954 88 | 1661954 1682689 89 | 1682689 1702685 90 | 1702685 1722016 91 | 1722016 1741866 92 | 1741866 1759752 93 | 1759752 1779609 94 | 1779609 1794852 95 | 1794852 1814505 96 | 1814505 1832397 97 | 1832397 1851822 98 | 1851822 1871251 99 | 1871251 1886369 100 | 1886369 1903582 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/12.tsv: -------------------------------------------------------------------------------- 1 | 0 20536 2 | 20536 40840 3 | 40840 60548 4 | 60548 80205 5 | 80205 99338 6 | 99338 119724 7 | 119724 139911 8 | 139911 158004 9 | 158004 175892 10 | 175892 194317 11 | 194317 214858 12 | 214858 232850 13 | 232850 253163 14 | 253163 273145 15 | 273145 289209 16 | 289209 309330 17 | 309330 329602 18 | 329602 342159 19 | 342159 362988 20 | 362988 380465 21 | 380465 400108 22 | 400108 418357 23 | 418357 438574 24 | 438574 458433 25 | 458433 475058 26 | 475058 494644 27 | 494644 514214 28 | 514214 533601 29 | 533601 553323 30 | 553323 572426 31 | 572426 591694 32 | 591694 611745 33 | 611745 631094 34 | 631094 651284 35 | 651284 667154 36 | 667154 681416 37 | 681416 699932 38 | 699932 720826 39 | 720826 740316 40 | 740316 759890 41 | 759890 779884 42 | 779884 798616 43 | 798616 816742 44 | 816742 835093 45 | 835093 855669 46 | 855669 875573 47 | 875573 894956 48 | 894956 913287 49 | 913287 932164 50 | 932164 952199 51 | 952199 972537 52 | 972537 984791 53 | 984791 1000817 54 | 1000817 1020780 55 | 1020780 1040945 56 | 1040945 1059132 57 | 1059132 1077061 58 | 1077061 1096507 59 | 1096507 1116055 60 | 1116055 1135508 61 | 1135508 1153735 62 | 1153735 1173119 63 | 1173119 1191066 64 | 1191066 1207119 65 | 1207119 1227643 66 | 1227643 1247907 67 | 1247907 1267701 68 | 1267701 1285623 69 | 1285623 1301297 70 | 1301297 1321398 71 | 1321398 1339046 72 | 1339046 1358282 73 | 1358282 1374790 74 | 1374790 1393606 75 | 1393606 1408534 76 | 1408534 1426959 77 | 1426959 1444985 78 | 1444985 1464912 79 | 1464912 1484618 80 | 1484618 1504670 81 | 1504670 1525017 82 | 1525017 1541079 83 | 1541079 1561806 84 | 1561806 1577742 85 | 1577742 1595931 86 | 1595931 1616605 87 | 1616605 1636749 88 | 1636749 1653156 89 | 1653156 1673440 90 | 1673440 1691681 91 | 1691681 1710386 92 | 1710386 1726270 93 | 1726270 1745106 94 | 1745106 1765157 95 | 1765157 1785245 96 | 1785245 1803186 97 | 1803186 1823564 98 | 1823564 1843362 99 | 1843362 1862962 100 | 1862962 1878332 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/13.tsv: -------------------------------------------------------------------------------- 1 | 0 18477 2 | 18477 36925 3 | 36925 57160 4 | 57160 76301 5 | 76301 91513 6 | 91513 111444 7 | 111444 127133 8 | 127133 147337 9 | 147337 166938 10 | 166938 186406 11 | 186406 202185 12 | 202185 222697 13 | 222697 239694 14 | 239694 255448 15 | 255448 275720 16 | 275720 292777 17 | 292777 312527 18 | 312527 332685 19 | 332685 351199 20 | 351199 371303 21 | 371303 390458 22 | 390458 408704 23 | 408704 428413 24 | 428413 448335 25 | 448335 463713 26 | 463713 483280 27 | 483280 499584 28 | 499584 519168 29 | 519168 539336 30 | 539336 559155 31 | 559155 579879 32 | 579879 599852 33 | 599852 619567 34 | 619567 639560 35 | 639560 659280 36 | 659280 678282 37 | 678282 693375 38 | 693375 711599 39 | 711599 732048 40 | 732048 752235 41 | 752235 772406 42 | 772406 788942 43 | 788942 807707 44 | 807707 826810 45 | 826810 846922 46 | 846922 862171 47 | 862171 882418 48 | 882418 899679 49 | 899679 916855 50 | 916855 935974 51 | 935974 950447 52 | 950447 966417 53 | 966417 986055 54 | 986055 1001423 55 | 1001423 1021234 56 | 1021234 1041206 57 | 1041206 1058160 58 | 1058160 1076241 59 | 1076241 1095889 60 | 1095889 1116194 61 | 1116194 1136235 62 | 1136235 1155809 63 | 1155809 1175535 64 | 1175535 1195643 65 | 1195643 1214935 66 | 1214935 1232794 67 | 1232794 1251003 68 | 1251003 1270848 69 | 1270848 1290887 70 | 1290887 1310356 71 | 1310356 1327676 72 | 1327676 1346640 73 | 1346640 1364944 74 | 1364944 1383140 75 | 1383140 1403661 76 | 1403661 1423435 77 | 1423435 1443779 78 | 1443779 1463036 79 | 1463036 1480634 80 | 1480634 1501048 81 | 1501048 1520693 82 | 1520693 1541257 83 | 1541257 1560607 84 | 1560607 1575126 85 | 1575126 1593798 86 | 1593798 1613497 87 | 1613497 1633329 88 | 1633329 1648986 89 | 1648986 1667398 90 | 1667398 1687086 91 | 1687086 1707648 92 | 1707648 1726843 93 | 1726843 1744877 94 | 1744877 1764451 95 | 1764451 1783182 96 | 1783182 1800606 97 | 1800606 1820685 98 | 1820685 1838317 99 | 1838317 1855872 100 | 1855872 1876437 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/14.tsv: -------------------------------------------------------------------------------- 1 | 0 17171 2 | 17171 37453 3 | 37453 57237 4 | 57237 74453 5 | 74453 95418 6 | 95418 115431 7 | 115431 135557 8 | 135557 155630 9 | 155630 176347 10 | 176347 195891 11 | 195891 215340 12 | 215340 234975 13 | 234975 255315 14 | 255315 274598 15 | 274598 290609 16 | 290609 310756 17 | 310756 327014 18 | 327014 346715 19 | 346715 361873 20 | 361873 377175 21 | 377175 396160 22 | 396160 416433 23 | 416433 434434 24 | 434434 454360 25 | 454360 472755 26 | 472755 492025 27 | 492025 511505 28 | 511505 531427 29 | 531427 545345 30 | 545345 565005 31 | 565005 583063 32 | 583063 603126 33 | 603126 618644 34 | 618644 638889 35 | 638889 656169 36 | 656169 676698 37 | 676698 695743 38 | 695743 713996 39 | 713996 733091 40 | 733091 753197 41 | 753197 771465 42 | 771465 787286 43 | 787286 807280 44 | 807280 825284 45 | 825284 845592 46 | 845592 865833 47 | 865833 886037 48 | 886037 905855 49 | 905855 924810 50 | 924810 945696 51 | 945696 965634 52 | 965634 984873 53 | 984873 1004901 54 | 1004901 1023158 55 | 1023158 1042969 56 | 1042969 1062322 57 | 1062322 1082237 58 | 1082237 1102226 59 | 1102226 1120139 60 | 1120139 1138026 61 | 1138026 1157580 62 | 1157580 1178063 63 | 1178063 1195127 64 | 1195127 1215700 65 | 1215700 1234544 66 | 1234544 1255069 67 | 1255069 1274629 68 | 1274629 1292777 69 | 1292777 1312442 70 | 1312442 1331926 71 | 1331926 1352248 72 | 1352248 1370841 73 | 1370841 1390846 74 | 1390846 1409874 75 | 1409874 1428054 76 | 1428054 1447772 77 | 1447772 1463658 78 | 1463658 1471590 79 | 1471590 1492299 80 | 1492299 1508127 81 | 1508127 1527987 82 | 1527987 1548002 83 | 1548002 1567649 84 | 1567649 1587664 85 | 1587664 1605527 86 | 1605527 1625475 87 | 1625475 1643633 88 | 1643633 1662622 89 | 1662622 1682525 90 | 1682525 1698343 91 | 1698343 1717191 92 | 1717191 1736780 93 | 1736780 1756597 94 | 1756597 1777002 95 | 1777002 1796035 96 | 1796035 1812953 97 | 1812953 1832483 98 | 1832483 1852373 99 | 1852373 1871358 100 | 1871358 1890816 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/15.tsv: -------------------------------------------------------------------------------- 1 | 0 20150 2 | 20150 38649 3 | 38649 55632 4 | 55632 75858 5 | 75858 95567 6 | 95567 113880 7 | 113880 133996 8 | 133996 154510 9 | 154510 174297 10 | 174297 194965 11 | 194965 212824 12 | 212824 231846 13 | 231846 251904 14 | 251904 272522 15 | 272522 290368 16 | 290368 310387 17 | 310387 329129 18 | 329129 347497 19 | 347497 366110 20 | 366110 382625 21 | 382625 401793 22 | 401793 421091 23 | 421091 439927 24 | 439927 458903 25 | 458903 477427 26 | 477427 497021 27 | 497021 516720 28 | 516720 534424 29 | 534424 554237 30 | 554237 574597 31 | 574597 594350 32 | 594350 614644 33 | 614644 633063 34 | 633063 653424 35 | 653424 671156 36 | 671156 690188 37 | 690188 705555 38 | 705555 725089 39 | 725089 744644 40 | 744644 763891 41 | 763891 784433 42 | 784433 805357 43 | 805357 824817 44 | 824817 842700 45 | 842700 862806 46 | 862806 879644 47 | 879644 900061 48 | 900061 919657 49 | 919657 940283 50 | 940283 958389 51 | 958389 978361 52 | 978361 995813 53 | 995813 1015724 54 | 1015724 1034742 55 | 1034742 1054589 56 | 1054589 1074631 57 | 1074631 1093770 58 | 1093770 1114066 59 | 1114066 1134715 60 | 1134715 1154309 61 | 1154309 1174379 62 | 1174379 1193292 63 | 1193292 1213484 64 | 1213484 1233904 65 | 1233904 1253231 66 | 1253231 1272934 67 | 1272934 1292028 68 | 1292028 1310493 69 | 1310493 1328700 70 | 1328700 1347260 71 | 1347260 1367363 72 | 1367363 1382775 73 | 1382775 1402762 74 | 1402762 1423020 75 | 1423020 1443844 76 | 1443844 1460710 77 | 1460710 1477771 78 | 1477771 1496146 79 | 1496146 1516330 80 | 1516330 1535981 81 | 1535981 1554400 82 | 1554400 1573099 83 | 1573099 1590259 84 | 1590259 1609716 85 | 1609716 1628591 86 | 1628591 1645149 87 | 1645149 1663540 88 | 1663540 1684039 89 | 1684039 1703972 90 | 1703972 1718626 91 | 1718626 1735850 92 | 1735850 1755127 93 | 1755127 1775039 94 | 1775039 1795423 95 | 1795423 1814617 96 | 1814617 1833233 97 | 1833233 1853661 98 | 1853661 1874164 99 | 1874164 1894537 100 | 1894537 1914849 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/16.tsv: -------------------------------------------------------------------------------- 1 | 0 20379 2 | 20379 39929 3 | 39929 55687 4 | 55687 73050 5 | 73050 91501 6 | 91501 110783 7 | 110783 130632 8 | 130632 151258 9 | 151258 171289 10 | 171289 190769 11 | 190769 205471 12 | 205471 224320 13 | 224320 243944 14 | 243944 263971 15 | 263971 283629 16 | 283629 304063 17 | 304063 323899 18 | 323899 341320 19 | 341320 358591 20 | 358591 378669 21 | 378669 398813 22 | 398813 416454 23 | 416454 434558 24 | 434558 454490 25 | 454490 474721 26 | 474721 494806 27 | 494806 514889 28 | 514889 534248 29 | 534248 549672 30 | 549672 569612 31 | 569612 589650 32 | 589650 609679 33 | 609679 629096 34 | 629096 647449 35 | 647449 668009 36 | 668009 687717 37 | 687717 707324 38 | 707324 725135 39 | 725135 744350 40 | 744350 764254 41 | 764254 784421 42 | 784421 804666 43 | 804666 825149 44 | 825149 844936 45 | 844936 865304 46 | 865304 884533 47 | 884533 903986 48 | 903986 923628 49 | 923628 939231 50 | 939231 959566 51 | 959566 974955 52 | 974955 995476 53 | 995476 1015918 54 | 1015918 1035549 55 | 1035549 1053884 56 | 1053884 1072730 57 | 1072730 1092794 58 | 1092794 1112198 59 | 1112198 1132273 60 | 1132273 1150722 61 | 1150722 1170515 62 | 1170515 1188866 63 | 1188866 1208087 64 | 1208087 1227432 65 | 1227432 1247845 66 | 1247845 1268335 67 | 1268335 1288410 68 | 1288410 1307366 69 | 1307366 1327810 70 | 1327810 1348064 71 | 1348064 1365013 72 | 1365013 1385158 73 | 1385158 1404742 74 | 1404742 1422744 75 | 1422744 1442906 76 | 1442906 1463026 77 | 1463026 1482445 78 | 1482445 1502931 79 | 1502931 1523642 80 | 1523642 1542506 81 | 1542506 1562128 82 | 1562128 1579686 83 | 1579686 1599788 84 | 1599788 1619282 85 | 1619282 1633864 86 | 1633864 1651034 87 | 1651034 1666928 88 | 1666928 1684863 89 | 1684863 1704155 90 | 1704155 1722938 91 | 1722938 1740731 92 | 1740731 1761093 93 | 1761093 1776673 94 | 1776673 1797260 95 | 1797260 1815826 96 | 1815826 1836010 97 | 1836010 1851699 98 | 1851699 1872250 99 | 1872250 1889599 100 | 1889599 1909564 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/17.tsv: -------------------------------------------------------------------------------- 1 | 0 20744 2 | 20744 36653 3 | 36653 51678 4 | 51678 71588 5 | 71588 91312 6 | 91312 111482 7 | 111482 130023 8 | 130023 150088 9 | 150088 169283 10 | 169283 189283 11 | 189283 207065 12 | 207065 227255 13 | 227255 247335 14 | 247335 266819 15 | 266819 286229 16 | 286229 305021 17 | 305021 324576 18 | 324576 344701 19 | 344701 363040 20 | 363040 381727 21 | 381727 401634 22 | 401634 418714 23 | 418714 439136 24 | 439136 459250 25 | 459250 479907 26 | 479907 499706 27 | 499706 519620 28 | 519620 535628 29 | 535628 555431 30 | 555431 573902 31 | 573902 591267 32 | 591267 609832 33 | 609832 629144 34 | 629144 648803 35 | 648803 668685 36 | 668685 688967 37 | 688967 707898 38 | 707898 725302 39 | 725302 745357 40 | 745357 766352 41 | 766352 786244 42 | 786244 805879 43 | 805879 825563 44 | 825563 843981 45 | 843981 861910 46 | 861910 880420 47 | 880420 899833 48 | 899833 918180 49 | 918180 936099 50 | 936099 956403 51 | 956403 976472 52 | 976472 996117 53 | 996117 1015019 54 | 1015019 1031683 55 | 1031683 1051300 56 | 1051300 1064739 57 | 1064739 1080688 58 | 1080688 1098529 59 | 1098529 1116482 60 | 1116482 1136364 61 | 1136364 1156191 62 | 1156191 1176234 63 | 1176234 1193832 64 | 1193832 1212773 65 | 1212773 1231200 66 | 1231200 1245061 67 | 1245061 1264751 68 | 1264751 1276065 69 | 1276065 1294753 70 | 1294753 1314353 71 | 1314353 1333837 72 | 1333837 1354021 73 | 1354021 1368991 74 | 1368991 1388543 75 | 1388543 1405885 76 | 1405885 1425862 77 | 1425862 1445961 78 | 1445961 1465236 79 | 1465236 1485088 80 | 1485088 1504670 81 | 1504670 1522593 82 | 1522593 1540419 83 | 1540419 1557158 84 | 1557158 1576344 85 | 1576344 1594557 86 | 1594557 1614673 87 | 1614673 1634523 88 | 1634523 1654641 89 | 1654641 1672511 90 | 1672511 1693444 91 | 1693444 1711272 92 | 1711272 1728607 93 | 1728607 1749425 94 | 1749425 1769513 95 | 1769513 1789709 96 | 1789709 1809075 97 | 1809075 1826031 98 | 1826031 1845888 99 | 1845888 1866021 100 | 1866021 1885979 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/18.tsv: -------------------------------------------------------------------------------- 1 | 0 20500 2 | 20500 40570 3 | 40570 58530 4 | 58530 76164 5 | 76164 95335 6 | 95335 109833 7 | 109833 128938 8 | 128938 145279 9 | 145279 165048 10 | 165048 185360 11 | 185360 203277 12 | 203277 221492 13 | 221492 239254 14 | 239254 259201 15 | 259201 279539 16 | 279539 300137 17 | 300137 319704 18 | 319704 338552 19 | 338552 359178 20 | 359178 378976 21 | 378976 399101 22 | 399101 418425 23 | 418425 436260 24 | 436260 456716 25 | 456716 471251 26 | 471251 489734 27 | 489734 509470 28 | 509470 525448 29 | 525448 545520 30 | 545520 566075 31 | 566075 586476 32 | 586476 606430 33 | 606430 625839 34 | 625839 645270 35 | 645270 663370 36 | 663370 683832 37 | 683832 704134 38 | 704134 721350 39 | 721350 742290 40 | 742290 762416 41 | 762416 783101 42 | 783101 801839 43 | 801839 822068 44 | 822068 841727 45 | 841727 861909 46 | 861909 881578 47 | 881578 901169 48 | 901169 921334 49 | 921334 939752 50 | 939752 959946 51 | 959946 975408 52 | 975408 993762 53 | 993762 1013061 54 | 1013061 1033662 55 | 1033662 1049007 56 | 1049007 1069248 57 | 1069248 1088312 58 | 1088312 1107997 59 | 1107997 1126494 60 | 1126494 1145405 61 | 1145405 1162927 62 | 1162927 1183548 63 | 1183548 1202668 64 | 1202668 1218405 65 | 1218405 1237146 66 | 1237146 1256006 67 | 1256006 1274064 68 | 1274064 1294552 69 | 1294552 1315086 70 | 1315086 1334910 71 | 1334910 1355105 72 | 1355105 1374733 73 | 1374733 1394985 74 | 1394985 1414824 75 | 1414824 1434025 76 | 1434025 1453133 77 | 1453133 1473006 78 | 1473006 1493393 79 | 1493393 1513756 80 | 1513756 1530974 81 | 1530974 1550821 82 | 1550821 1570325 83 | 1570325 1589455 84 | 1589455 1608261 85 | 1608261 1627731 86 | 1627731 1647905 87 | 1647905 1666408 88 | 1666408 1686777 89 | 1686777 1706972 90 | 1706972 1727572 91 | 1727572 1747162 92 | 1747162 1767397 93 | 1767397 1787518 94 | 1787518 1805917 95 | 1805917 1825454 96 | 1825454 1844399 97 | 1844399 1862518 98 | 1862518 1877878 99 | 1877878 1893381 100 | 1893381 1911625 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/19.tsv: -------------------------------------------------------------------------------- 1 | 0 18590 2 | 18590 38258 3 | 38258 58601 4 | 58601 78086 5 | 78086 98015 6 | 98015 116214 7 | 116214 133956 8 | 133956 154652 9 | 154652 174572 10 | 174572 192321 11 | 192321 209734 12 | 209734 229406 13 | 229406 244700 14 | 244700 264434 15 | 264434 283913 16 | 283913 304538 17 | 304538 320597 18 | 320597 340944 19 | 340944 359541 20 | 359541 377293 21 | 377293 396831 22 | 396831 417451 23 | 417451 435693 24 | 435693 455326 25 | 455326 474872 26 | 474872 495017 27 | 495017 514855 28 | 514855 533005 29 | 533005 552956 30 | 552956 571114 31 | 571114 589296 32 | 589296 607995 33 | 607995 628060 34 | 628060 648663 35 | 648663 668958 36 | 668958 687381 37 | 687381 706929 38 | 706929 726253 39 | 726253 745853 40 | 745853 763527 41 | 763527 782798 42 | 782798 799947 43 | 799947 819968 44 | 819968 837852 45 | 837852 858432 46 | 858432 878205 47 | 878205 898664 48 | 898664 915950 49 | 915950 935405 50 | 935405 955480 51 | 955480 975167 52 | 975167 993204 53 | 993204 1013511 54 | 1013511 1033196 55 | 1033196 1052300 56 | 1052300 1072719 57 | 1072719 1092729 58 | 1092729 1112402 59 | 1112402 1130646 60 | 1130646 1150175 61 | 1150175 1170228 62 | 1170228 1188199 63 | 1188199 1208141 64 | 1208141 1227010 65 | 1227010 1245338 66 | 1245338 1264639 67 | 1264639 1285061 68 | 1285061 1302169 69 | 1302169 1322115 70 | 1322115 1339794 71 | 1339794 1358129 72 | 1358129 1373533 73 | 1373533 1393240 74 | 1393240 1408777 75 | 1408777 1428887 76 | 1428887 1447362 77 | 1447362 1465741 78 | 1465741 1485197 79 | 1485197 1503885 80 | 1503885 1521994 81 | 1521994 1542464 82 | 1542464 1560491 83 | 1560491 1579713 84 | 1579713 1599147 85 | 1599147 1619193 86 | 1619193 1639442 87 | 1639442 1660506 88 | 1660506 1681273 89 | 1681273 1701107 90 | 1701107 1721003 91 | 1721003 1740989 92 | 1740989 1760636 93 | 1760636 1778404 94 | 1778404 1798290 95 | 1798290 1817921 96 | 1817921 1837832 97 | 1837832 1857760 98 | 1857760 1875117 99 | 1875117 1894736 100 | 1894736 1910192 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/2.tsv: -------------------------------------------------------------------------------- 1 | 0 18746 2 | 18746 34335 3 | 34335 52371 4 | 52371 72932 5 | 72932 84309 6 | 84309 104183 7 | 104183 123510 8 | 123510 139120 9 | 139120 157425 10 | 157425 178167 11 | 178167 196835 12 | 196835 215815 13 | 215815 236291 14 | 236291 255730 15 | 255730 276036 16 | 276036 295083 17 | 295083 309534 18 | 309534 327607 19 | 327607 346604 20 | 346604 366441 21 | 366441 386877 22 | 386877 404088 23 | 404088 422219 24 | 422219 442422 25 | 442422 462046 26 | 462046 481910 27 | 481910 502064 28 | 502064 519085 29 | 519085 538176 30 | 538176 556480 31 | 556480 572660 32 | 572660 588068 33 | 588068 607541 34 | 607541 627624 35 | 627624 648366 36 | 648366 668834 37 | 668834 689203 38 | 689203 709316 39 | 709316 728611 40 | 728611 748063 41 | 748063 768443 42 | 768443 788598 43 | 788598 805783 44 | 805783 826103 45 | 826103 846266 46 | 846266 865867 47 | 865867 886083 48 | 886083 905877 49 | 905877 925801 50 | 925801 943638 51 | 943638 963658 52 | 963658 982511 53 | 982511 998955 54 | 998955 1019487 55 | 1019487 1039418 56 | 1039418 1059503 57 | 1059503 1079256 58 | 1079256 1098724 59 | 1098724 1118532 60 | 1118532 1138759 61 | 1138759 1157959 62 | 1157959 1178102 63 | 1178102 1196261 64 | 1196261 1216790 65 | 1216790 1236739 66 | 1236739 1256742 67 | 1256742 1277650 68 | 1277650 1295603 69 | 1295603 1316010 70 | 1316010 1335973 71 | 1335973 1355514 72 | 1355514 1375727 73 | 1375727 1395385 74 | 1395385 1415710 75 | 1415710 1433008 76 | 1433008 1450983 77 | 1450983 1471355 78 | 1471355 1488903 79 | 1488903 1509265 80 | 1509265 1529222 81 | 1529222 1549171 82 | 1549171 1565174 83 | 1565174 1579213 84 | 1579213 1597678 85 | 1597678 1617577 86 | 1617577 1637757 87 | 1637757 1655853 88 | 1655853 1675614 89 | 1675614 1694904 90 | 1694904 1715000 91 | 1715000 1731062 92 | 1731062 1747057 93 | 1747057 1767015 94 | 1767015 1784172 95 | 1784172 1803428 96 | 1803428 1822339 97 | 1822339 1841392 98 | 1841392 1860719 99 | 1860719 1880850 100 | 1880850 1900239 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/20.tsv: -------------------------------------------------------------------------------- 1 | 0 20388 2 | 20388 40927 3 | 40927 58350 4 | 58350 76675 5 | 76675 97031 6 | 97031 112425 7 | 112425 132611 8 | 132611 152331 9 | 152331 172323 10 | 172323 189609 11 | 189609 207650 12 | 207650 227614 13 | 227614 244582 14 | 244582 264405 15 | 264405 283633 16 | 283633 303890 17 | 303890 322280 18 | 322280 337986 19 | 337986 355418 20 | 355418 374147 21 | 374147 393049 22 | 393049 412479 23 | 412479 432614 24 | 432614 448279 25 | 448279 467667 26 | 467667 485637 27 | 485637 506095 28 | 506095 526481 29 | 526481 542151 30 | 542151 561488 31 | 561488 578269 32 | 578269 598925 33 | 598925 619332 34 | 619332 637370 35 | 637370 656997 36 | 656997 674952 37 | 674952 694552 38 | 694552 713724 39 | 713724 733390 40 | 733390 753565 41 | 753565 773481 42 | 773481 794008 43 | 794008 814365 44 | 814365 830536 45 | 830536 850227 46 | 850227 870144 47 | 870144 887454 48 | 887454 908229 49 | 908229 928145 50 | 928145 947765 51 | 947765 967292 52 | 967292 986536 53 | 986536 1006520 54 | 1006520 1026253 55 | 1026253 1045890 56 | 1045890 1065621 57 | 1065621 1081205 58 | 1081205 1099102 59 | 1099102 1114808 60 | 1114808 1134165 61 | 1134165 1153763 62 | 1153763 1173643 63 | 1173643 1193448 64 | 1193448 1213681 65 | 1213681 1232111 66 | 1232111 1252285 67 | 1252285 1271921 68 | 1271921 1289750 69 | 1289750 1307226 70 | 1307226 1327094 71 | 1327094 1347045 72 | 1347045 1364235 73 | 1364235 1384582 74 | 1384582 1404801 75 | 1404801 1425815 76 | 1425815 1445269 77 | 1445269 1464920 78 | 1464920 1484899 79 | 1484899 1505502 80 | 1505502 1525739 81 | 1525739 1539379 82 | 1539379 1557674 83 | 1557674 1578291 84 | 1578291 1593749 85 | 1593749 1611220 86 | 1611220 1631269 87 | 1631269 1651897 88 | 1651897 1671978 89 | 1671978 1692448 90 | 1692448 1711337 91 | 1711337 1729961 92 | 1729961 1750010 93 | 1750010 1769962 94 | 1769962 1789846 95 | 1789846 1809981 96 | 1809981 1829416 97 | 1829416 1848119 98 | 1848119 1866350 99 | 1866350 1882764 100 | 1882764 1902856 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/21.tsv: -------------------------------------------------------------------------------- 1 | 0 15025 2 | 15025 34678 3 | 34678 54923 4 | 54923 73088 5 | 73088 93879 6 | 93879 112039 7 | 112039 125847 8 | 125847 145816 9 | 145816 160974 10 | 160974 181172 11 | 181172 201124 12 | 201124 217979 13 | 217979 237253 14 | 237253 251372 15 | 251372 269740 16 | 269740 289774 17 | 289774 309510 18 | 309510 328703 19 | 328703 347350 20 | 347350 366184 21 | 366184 386377 22 | 386377 402195 23 | 402195 420328 24 | 420328 439892 25 | 439892 458031 26 | 458031 477975 27 | 477975 498474 28 | 498474 518561 29 | 518561 536054 30 | 536054 556110 31 | 556110 576493 32 | 576493 596482 33 | 596482 616581 34 | 616581 636286 35 | 636286 654466 36 | 654466 675041 37 | 675041 695067 38 | 695067 714856 39 | 714856 733250 40 | 733250 753126 41 | 753126 773638 42 | 773638 791656 43 | 791656 810093 44 | 810093 827152 45 | 827152 845188 46 | 845188 865334 47 | 865334 885041 48 | 885041 905026 49 | 905026 924798 50 | 924798 944614 51 | 944614 961187 52 | 961187 981423 53 | 981423 999390 54 | 999390 1019322 55 | 1019322 1038532 56 | 1038532 1057085 57 | 1057085 1076615 58 | 1076615 1092857 59 | 1092857 1112618 60 | 1112618 1132767 61 | 1132767 1153181 62 | 1153181 1172656 63 | 1172656 1188773 64 | 1188773 1204568 65 | 1204568 1224107 66 | 1224107 1244680 67 | 1244680 1264335 68 | 1264335 1281145 69 | 1281145 1300885 70 | 1300885 1321020 71 | 1321020 1338966 72 | 1338966 1352449 73 | 1352449 1372886 74 | 1372886 1393191 75 | 1393191 1413241 76 | 1413241 1433247 77 | 1433247 1451233 78 | 1451233 1470490 79 | 1470490 1490737 80 | 1490737 1510981 81 | 1510981 1529786 82 | 1529786 1546043 83 | 1546043 1565926 84 | 1565926 1585789 85 | 1585789 1603807 86 | 1603807 1619324 87 | 1619324 1640179 88 | 1640179 1658900 89 | 1658900 1678691 90 | 1678691 1699247 91 | 1699247 1716107 92 | 1716107 1737045 93 | 1737045 1757318 94 | 1757318 1776087 95 | 1776087 1795886 96 | 1795886 1815924 97 | 1815924 1836155 98 | 1836155 1856406 99 | 1856406 1875846 100 | 1875846 1894297 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/22.tsv: -------------------------------------------------------------------------------- 1 | 0 20462 2 | 20462 39466 3 | 39466 59799 4 | 59799 78618 5 | 78618 98590 6 | 98590 118748 7 | 118748 135664 8 | 135664 155341 9 | 155341 175027 10 | 175027 194095 11 | 194095 214114 12 | 214114 234623 13 | 234623 254909 14 | 254909 274587 15 | 274587 292342 16 | 292342 310385 17 | 310385 329738 18 | 329738 348685 19 | 348685 369260 20 | 369260 389110 21 | 389110 409677 22 | 409677 430170 23 | 430170 445767 24 | 445767 465159 25 | 465159 483074 26 | 483074 503762 27 | 503762 522724 28 | 522724 526951 29 | 526951 546118 30 | 546118 564611 31 | 564611 584237 32 | 584237 603005 33 | 603005 621899 34 | 621899 642476 35 | 642476 662846 36 | 662846 680684 37 | 680684 700580 38 | 700580 720579 39 | 720579 740580 40 | 740580 760591 41 | 760591 781382 42 | 781382 802166 43 | 802166 821968 44 | 821968 841732 45 | 841732 861541 46 | 861541 881685 47 | 881685 899784 48 | 899784 919865 49 | 919865 937921 50 | 937921 957078 51 | 957078 977828 52 | 977828 997523 53 | 997523 1017949 54 | 1017949 1037406 55 | 1037406 1055837 56 | 1055837 1074827 57 | 1074827 1095104 58 | 1095104 1112716 59 | 1112716 1132832 60 | 1132832 1148164 61 | 1148164 1168338 62 | 1168338 1189009 63 | 1189009 1204695 64 | 1204695 1224709 65 | 1224709 1244136 66 | 1244136 1264078 67 | 1264078 1284764 68 | 1284764 1304557 69 | 1304557 1322364 70 | 1322364 1341337 71 | 1341337 1361775 72 | 1361775 1381090 73 | 1381090 1399273 74 | 1399273 1419639 75 | 1419639 1439766 76 | 1439766 1459382 77 | 1459382 1473862 78 | 1473862 1494219 79 | 1494219 1513845 80 | 1513845 1530947 81 | 1530947 1550972 82 | 1550972 1569996 83 | 1569996 1589305 84 | 1589305 1607459 85 | 1607459 1621106 86 | 1621106 1640356 87 | 1640356 1660012 88 | 1660012 1680278 89 | 1680278 1699701 90 | 1699701 1718449 91 | 1718449 1738351 92 | 1738351 1756472 93 | 1756472 1776229 94 | 1776229 1796865 95 | 1796865 1815558 96 | 1815558 1835435 97 | 1835435 1855747 98 | 1855747 1872123 99 | 1872123 1891747 100 | 1891747 1912478 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/23.tsv: -------------------------------------------------------------------------------- 1 | 0 14616 2 | 14616 30487 3 | 30487 49855 4 | 49855 69572 5 | 69572 90236 6 | 90236 109573 7 | 109573 129452 8 | 129452 150165 9 | 150165 170114 10 | 170114 190547 11 | 190547 210458 12 | 210458 230327 13 | 230327 245330 14 | 245330 264821 15 | 264821 285450 16 | 285450 305049 17 | 305049 323667 18 | 323667 343085 19 | 343085 361230 20 | 361230 378775 21 | 378775 398775 22 | 398775 416331 23 | 416331 436161 24 | 436161 456628 25 | 456628 477111 26 | 477111 497438 27 | 497438 517762 28 | 517762 536905 29 | 536905 557252 30 | 557252 576792 31 | 576792 594642 32 | 594642 615089 33 | 615089 635248 34 | 635248 655310 35 | 655310 670782 36 | 670782 690905 37 | 690905 709305 38 | 709305 727793 39 | 727793 746487 40 | 746487 764563 41 | 764563 783994 42 | 783994 803288 43 | 803288 822745 44 | 822745 840521 45 | 840521 861047 46 | 861047 880203 47 | 880203 899962 48 | 899962 919572 49 | 919572 937658 50 | 937658 953835 51 | 953835 973603 52 | 973603 994183 53 | 994183 1013864 54 | 1013864 1034707 55 | 1034707 1055119 56 | 1055119 1075325 57 | 1075325 1094507 58 | 1094507 1113952 59 | 1113952 1134329 60 | 1134329 1154304 61 | 1154304 1171783 62 | 1171783 1191714 63 | 1191714 1210414 64 | 1210414 1231205 65 | 1231205 1249025 66 | 1249025 1269821 67 | 1269821 1289836 68 | 1289836 1304937 69 | 1304937 1323629 70 | 1323629 1342860 71 | 1342860 1362331 72 | 1362331 1382291 73 | 1382291 1401866 74 | 1401866 1420102 75 | 1420102 1439892 76 | 1439892 1459810 77 | 1459810 1480358 78 | 1480358 1500371 79 | 1500371 1519349 80 | 1519349 1539401 81 | 1539401 1555040 82 | 1555040 1574209 83 | 1574209 1594225 84 | 1594225 1611272 85 | 1611272 1628442 86 | 1628442 1648389 87 | 1648389 1669045 88 | 1669045 1687888 89 | 1687888 1705404 90 | 1705404 1723511 91 | 1723511 1743564 92 | 1743564 1763797 93 | 1763797 1782785 94 | 1782785 1801282 95 | 1801282 1820982 96 | 1820982 1840591 97 | 1840591 1857639 98 | 1857639 1877164 99 | 1877164 1897152 100 | 1897152 1915129 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/24.tsv: -------------------------------------------------------------------------------- 1 | 0 17930 2 | 17930 35716 3 | 35716 56326 4 | 56326 75352 5 | 75352 95486 6 | 95486 110649 7 | 110649 130196 8 | 130196 149150 9 | 149150 164584 10 | 164584 183470 11 | 183470 201619 12 | 201619 222197 13 | 222197 241773 14 | 241773 261569 15 | 261569 279223 16 | 279223 299519 17 | 299519 319551 18 | 319551 338679 19 | 338679 358311 20 | 358311 378132 21 | 378132 397588 22 | 397588 415094 23 | 415094 433479 24 | 433479 451588 25 | 451588 471050 26 | 471050 491434 27 | 491434 511931 28 | 511931 531505 29 | 531505 551943 30 | 551943 566963 31 | 566963 584415 32 | 584415 599784 33 | 599784 618606 34 | 618606 638391 35 | 638391 657943 36 | 657943 676676 37 | 676676 695876 38 | 695876 715863 39 | 715863 731614 40 | 731614 749852 41 | 749852 769228 42 | 769228 786952 43 | 786952 806443 44 | 806443 822220 45 | 822220 842492 46 | 842492 862206 47 | 862206 882344 48 | 882344 902338 49 | 902338 917957 50 | 917957 937018 51 | 937018 956155 52 | 956155 975439 53 | 975439 994412 54 | 994412 1012845 55 | 1012845 1030333 56 | 1030333 1048581 57 | 1048581 1068236 58 | 1068236 1087652 59 | 1087652 1107504 60 | 1107504 1123900 61 | 1123900 1141764 62 | 1141764 1161956 63 | 1161956 1176730 64 | 1176730 1196520 65 | 1196520 1214995 66 | 1214995 1234122 67 | 1234122 1253715 68 | 1253715 1274109 69 | 1274109 1288735 70 | 1288735 1305838 71 | 1305838 1326049 72 | 1326049 1346532 73 | 1346532 1366608 74 | 1366608 1386939 75 | 1386939 1405926 76 | 1405926 1425669 77 | 1425669 1443577 78 | 1443577 1460273 79 | 1460273 1478940 80 | 1478940 1498503 81 | 1498503 1518071 82 | 1518071 1537418 83 | 1537418 1557680 84 | 1557680 1576004 85 | 1576004 1596177 86 | 1596177 1615280 87 | 1615280 1633830 88 | 1633830 1653365 89 | 1653365 1674078 90 | 1674078 1693843 91 | 1693843 1713687 92 | 1713687 1732437 93 | 1732437 1751853 94 | 1751853 1771785 95 | 1771785 1791937 96 | 1791937 1811648 97 | 1811648 1827072 98 | 1827072 1841075 99 | 1841075 1859096 100 | 1859096 1879114 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/25.tsv: -------------------------------------------------------------------------------- 1 | 0 18020 2 | 18020 36464 3 | 36464 56338 4 | 56338 75400 5 | 75400 94988 6 | 94988 114749 7 | 114749 133522 8 | 133522 153640 9 | 153640 171741 10 | 171741 192023 11 | 192023 211443 12 | 211443 231060 13 | 231060 251140 14 | 251140 269635 15 | 269635 289806 16 | 289806 309633 17 | 309633 329435 18 | 329435 348299 19 | 348299 368499 20 | 368499 387120 21 | 387120 407762 22 | 407762 425084 23 | 425084 442719 24 | 442719 461700 25 | 461700 480713 26 | 480713 496287 27 | 496287 516555 28 | 516555 535804 29 | 535804 547967 30 | 547967 567972 31 | 567972 587437 32 | 587437 602115 33 | 602115 621994 34 | 621994 641864 35 | 641864 661018 36 | 661018 680724 37 | 680724 700369 38 | 700369 720033 39 | 720033 738908 40 | 738908 757450 41 | 757450 772874 42 | 772874 792824 43 | 792824 812514 44 | 812514 831370 45 | 831370 846835 46 | 846835 862491 47 | 862491 882219 48 | 882219 902926 49 | 902926 919565 50 | 919565 939223 51 | 939223 954693 52 | 954693 972494 53 | 972494 989417 54 | 989417 1009133 55 | 1009133 1029330 56 | 1029330 1049775 57 | 1049775 1067346 58 | 1067346 1086907 59 | 1086907 1106449 60 | 1106449 1126778 61 | 1126778 1146893 62 | 1146893 1166497 63 | 1166497 1180800 64 | 1180800 1200264 65 | 1200264 1214230 66 | 1214230 1234341 67 | 1234341 1254327 68 | 1254327 1269488 69 | 1269488 1289925 70 | 1289925 1309930 71 | 1309930 1330127 72 | 1330127 1348456 73 | 1348456 1366724 74 | 1366724 1386621 75 | 1386621 1404795 76 | 1404795 1424750 77 | 1424750 1441711 78 | 1441711 1461201 79 | 1461201 1481532 80 | 1481532 1502255 81 | 1502255 1520616 82 | 1520616 1540827 83 | 1540827 1560802 84 | 1560802 1580740 85 | 1580740 1599987 86 | 1599987 1615136 87 | 1615136 1630299 88 | 1630299 1650616 89 | 1650616 1670619 90 | 1670619 1689853 91 | 1689853 1707608 92 | 1707608 1727504 93 | 1727504 1741775 94 | 1741775 1760535 95 | 1760535 1779470 96 | 1779470 1799828 97 | 1799828 1819938 98 | 1819938 1838304 99 | 1838304 1856600 100 | 1856600 1877473 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/26.tsv: -------------------------------------------------------------------------------- 1 | 0 19837 2 | 19837 39673 3 | 39673 59545 4 | 59545 79441 5 | 79441 97366 6 | 97366 114427 7 | 114427 134177 8 | 134177 153048 9 | 153048 172965 10 | 172965 192624 11 | 192624 212880 12 | 212880 232975 13 | 232975 251610 14 | 251610 271894 15 | 271894 291888 16 | 291888 309931 17 | 309931 330072 18 | 330072 348408 19 | 348408 369125 20 | 369125 388694 21 | 388694 406185 22 | 406185 425530 23 | 425530 445339 24 | 445339 463912 25 | 463912 483961 26 | 483961 503485 27 | 503485 523605 28 | 523605 543495 29 | 543495 563756 30 | 563756 582714 31 | 582714 602565 32 | 602565 620734 33 | 620734 640062 34 | 640062 657889 35 | 657889 673643 36 | 673643 691866 37 | 691866 711405 38 | 711405 731115 39 | 731115 751063 40 | 751063 770533 41 | 770533 791075 42 | 791075 810445 43 | 810445 828617 44 | 828617 848864 45 | 848864 868718 46 | 868718 888632 47 | 888632 908469 48 | 908469 929276 49 | 929276 949831 50 | 949831 968785 51 | 968785 988226 52 | 988226 1005119 53 | 1005119 1025489 54 | 1025489 1045670 55 | 1045670 1062752 56 | 1062752 1081705 57 | 1081705 1101257 58 | 1101257 1121112 59 | 1121112 1141266 60 | 1141266 1160979 61 | 1160979 1181094 62 | 1181094 1200679 63 | 1200679 1221577 64 | 1221577 1241138 65 | 1241138 1260844 66 | 1260844 1280727 67 | 1280727 1300335 68 | 1300335 1320027 69 | 1320027 1338050 70 | 1338050 1358811 71 | 1358811 1378892 72 | 1378892 1398774 73 | 1398774 1419069 74 | 1419069 1439395 75 | 1439395 1457295 76 | 1457295 1477686 77 | 1477686 1498322 78 | 1498322 1518188 79 | 1518188 1537935 80 | 1537935 1555734 81 | 1555734 1576280 82 | 1576280 1591402 83 | 1591402 1610958 84 | 1610958 1630617 85 | 1630617 1645503 86 | 1645503 1665451 87 | 1665451 1685056 88 | 1685056 1704859 89 | 1704859 1724790 90 | 1724790 1740197 91 | 1740197 1759649 92 | 1759649 1777134 93 | 1777134 1797467 94 | 1797467 1817133 95 | 1817133 1836756 96 | 1836756 1857542 97 | 1857542 1877216 98 | 1877216 1896417 99 | 1896417 1916474 100 | 1916474 1931203 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/27.tsv: -------------------------------------------------------------------------------- 1 | 0 17409 2 | 17409 33431 3 | 33431 53139 4 | 53139 73853 5 | 73853 88386 6 | 88386 101754 7 | 101754 121807 8 | 121807 137366 9 | 137366 157532 10 | 157532 176827 11 | 176827 192972 12 | 192972 213310 13 | 213310 233324 14 | 233324 249099 15 | 249099 267398 16 | 267398 287218 17 | 287218 307186 18 | 307186 327608 19 | 327608 348212 20 | 348212 367666 21 | 367666 383822 22 | 383822 401737 23 | 401737 417257 24 | 417257 434570 25 | 434570 449994 26 | 449994 466056 27 | 466056 486185 28 | 486185 505729 29 | 505729 526173 30 | 526173 541701 31 | 541701 561289 32 | 561289 581186 33 | 581186 602065 34 | 602065 620402 35 | 620402 640150 36 | 640150 660182 37 | 660182 680058 38 | 680058 700624 39 | 700624 719046 40 | 719046 739252 41 | 739252 758667 42 | 758667 778310 43 | 778310 797773 44 | 797773 815638 45 | 815638 832564 46 | 832564 850646 47 | 850646 868178 48 | 868178 885575 49 | 885575 905656 50 | 905656 925470 51 | 925470 944713 52 | 944713 964765 53 | 964765 983787 54 | 983787 1003301 55 | 1003301 1021319 56 | 1021319 1040353 57 | 1040353 1060271 58 | 1060271 1078603 59 | 1078603 1098622 60 | 1098622 1117371 61 | 1117371 1135729 62 | 1135729 1154935 63 | 1154935 1170574 64 | 1170574 1189030 65 | 1189030 1209345 66 | 1209345 1229509 67 | 1229509 1244380 68 | 1244380 1260662 69 | 1260662 1279025 70 | 1279025 1298944 71 | 1298944 1318705 72 | 1318705 1339217 73 | 1339217 1359520 74 | 1359520 1379325 75 | 1379325 1399608 76 | 1399608 1420199 77 | 1420199 1438991 78 | 1438991 1456979 79 | 1456979 1475046 80 | 1475046 1495148 81 | 1495148 1515323 82 | 1515323 1536245 83 | 1536245 1550643 84 | 1550643 1571295 85 | 1571295 1590479 86 | 1590479 1608176 87 | 1608176 1629025 88 | 1629025 1648834 89 | 1648834 1669111 90 | 1669111 1688921 91 | 1688921 1709055 92 | 1709055 1727225 93 | 1727225 1746817 94 | 1746817 1765713 95 | 1765713 1783207 96 | 1783207 1802269 97 | 1802269 1822069 98 | 1822069 1842268 99 | 1842268 1863341 100 | 1863341 1878568 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/28.tsv: -------------------------------------------------------------------------------- 1 | 0 19609 2 | 19609 35238 3 | 35238 54720 4 | 54720 71094 5 | 71094 90601 6 | 90601 109072 7 | 109072 127287 8 | 127287 146784 9 | 146784 166725 10 | 166725 185195 11 | 185195 204336 12 | 204336 222097 13 | 222097 241734 14 | 241734 257191 15 | 257191 275220 16 | 275220 295848 17 | 295848 316499 18 | 316499 336713 19 | 336713 357164 20 | 357164 372979 21 | 372979 392967 22 | 392967 413157 23 | 413157 430343 24 | 430343 448624 25 | 448624 468500 26 | 468500 488897 27 | 488897 509091 28 | 509091 525846 29 | 525846 545656 30 | 545656 565074 31 | 565074 585093 32 | 585093 604786 33 | 604786 624005 34 | 624005 644258 35 | 644258 664630 36 | 664630 685297 37 | 685297 705694 38 | 705694 724855 39 | 724855 737192 40 | 737192 757673 41 | 757673 775742 42 | 775742 795511 43 | 795511 815454 44 | 815454 830066 45 | 830066 847840 46 | 847840 867883 47 | 867883 888155 48 | 888155 908036 49 | 908036 924849 50 | 924849 943777 51 | 943777 963648 52 | 963648 983937 53 | 983937 1003198 54 | 1003198 1022414 55 | 1022414 1042131 56 | 1042131 1061424 57 | 1061424 1082190 58 | 1082190 1102403 59 | 1102403 1122474 60 | 1122474 1141684 61 | 1141684 1161342 62 | 1161342 1181110 63 | 1181110 1196130 64 | 1196130 1216056 65 | 1216056 1236226 66 | 1236226 1255870 67 | 1255870 1271523 68 | 1271523 1291356 69 | 1291356 1311029 70 | 1311029 1331337 71 | 1331337 1350992 72 | 1350992 1370835 73 | 1370835 1391487 74 | 1391487 1411861 75 | 1411861 1431051 76 | 1431051 1451043 77 | 1451043 1471802 78 | 1471802 1489979 79 | 1489979 1509546 80 | 1509546 1529930 81 | 1529930 1549716 82 | 1549716 1570099 83 | 1570099 1588095 84 | 1588095 1608042 85 | 1608042 1628440 86 | 1628440 1648599 87 | 1648599 1664020 88 | 1664020 1684268 89 | 1684268 1703685 90 | 1703685 1724174 91 | 1724174 1743652 92 | 1743652 1761341 93 | 1761341 1780758 94 | 1780758 1800864 95 | 1800864 1821112 96 | 1821112 1839567 97 | 1839567 1859413 98 | 1859413 1879182 99 | 1879182 1899541 100 | 1899541 1919497 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/29.tsv: -------------------------------------------------------------------------------- 1 | 0 20534 2 | 20534 40352 3 | 40352 60455 4 | 60455 78685 5 | 78685 99175 6 | 99175 119301 7 | 119301 139111 8 | 139111 159633 9 | 159633 178287 10 | 178287 193165 11 | 193165 213187 12 | 213187 230425 13 | 230425 247688 14 | 247688 267654 15 | 267654 287394 16 | 287394 304965 17 | 304965 324830 18 | 324830 343839 19 | 343839 363945 20 | 363945 383786 21 | 383786 402480 22 | 402480 423161 23 | 423161 442758 24 | 442758 458444 25 | 458444 478694 26 | 478694 498564 27 | 498564 519481 28 | 519481 539138 29 | 539138 559556 30 | 559556 577271 31 | 577271 595503 32 | 595503 615259 33 | 615259 635187 34 | 635187 654725 35 | 654725 674227 36 | 674227 692575 37 | 692575 710575 38 | 710575 730656 39 | 730656 748136 40 | 748136 768274 41 | 768274 788893 42 | 788893 807230 43 | 807230 823021 44 | 823021 842744 45 | 842744 860262 46 | 860262 879057 47 | 879057 894263 48 | 894263 911324 49 | 911324 931174 50 | 931174 949440 51 | 949440 969577 52 | 969577 989864 53 | 989864 1008063 54 | 1008063 1025645 55 | 1025645 1046107 56 | 1046107 1061862 57 | 1061862 1080060 58 | 1080060 1100025 59 | 1100025 1120260 60 | 1120260 1138433 61 | 1138433 1156443 62 | 1156443 1176375 63 | 1176375 1196012 64 | 1196012 1216413 65 | 1216413 1234845 66 | 1234845 1255220 67 | 1255220 1275963 68 | 1275963 1295453 69 | 1295453 1316135 70 | 1316135 1336383 71 | 1336383 1354615 72 | 1354615 1374848 73 | 1374848 1394708 74 | 1394708 1414326 75 | 1414326 1434688 76 | 1434688 1451771 77 | 1451771 1471456 78 | 1471456 1488265 79 | 1488265 1508161 80 | 1508161 1528392 81 | 1528392 1546270 82 | 1546270 1566290 83 | 1566290 1585239 84 | 1585239 1605279 85 | 1605279 1625403 86 | 1625403 1645659 87 | 1645659 1663864 88 | 1663864 1683389 89 | 1683389 1703043 90 | 1703043 1723215 91 | 1723215 1737153 92 | 1737153 1757310 93 | 1757310 1774937 94 | 1774937 1795635 95 | 1795635 1816461 96 | 1816461 1834770 97 | 1834770 1852896 98 | 1852896 1872896 99 | 1872896 1892529 100 | 1892529 1910680 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/3.tsv: -------------------------------------------------------------------------------- 1 | 0 18687 2 | 18687 36837 3 | 36837 55662 4 | 55662 75306 5 | 75306 95642 6 | 95642 115597 7 | 115597 130823 8 | 130823 149105 9 | 149105 169826 10 | 169826 185787 11 | 185787 206429 12 | 206429 225188 13 | 225188 244726 14 | 244726 264937 15 | 264937 283874 16 | 283874 299921 17 | 299921 318266 18 | 318266 338729 19 | 338729 358824 20 | 358824 378693 21 | 378693 396565 22 | 396565 415373 23 | 415373 435739 24 | 435739 454031 25 | 454031 474017 26 | 474017 493842 27 | 493842 508877 28 | 508877 528496 29 | 528496 548708 30 | 548708 566891 31 | 566891 585444 32 | 585444 604829 33 | 604829 624783 34 | 624783 645343 35 | 645343 665541 36 | 665541 685934 37 | 685934 706469 38 | 706469 725882 39 | 725882 746118 40 | 746118 759917 41 | 759917 780052 42 | 780052 800348 43 | 800348 820787 44 | 820787 841444 45 | 841444 861734 46 | 861734 882137 47 | 882137 902481 48 | 902481 921650 49 | 921650 939742 50 | 939742 958798 51 | 958798 979669 52 | 979669 999421 53 | 999421 1015168 54 | 1015168 1034938 55 | 1034938 1055466 56 | 1055466 1074992 57 | 1074992 1095092 58 | 1095092 1114978 59 | 1114978 1134724 60 | 1134724 1155382 61 | 1155382 1175930 62 | 1175930 1191281 63 | 1191281 1211115 64 | 1211115 1231140 65 | 1231140 1250692 66 | 1250692 1267700 67 | 1267700 1288145 68 | 1288145 1308075 69 | 1308075 1326536 70 | 1326536 1346684 71 | 1346684 1365047 72 | 1365047 1383448 73 | 1383448 1398867 74 | 1398867 1418921 75 | 1418921 1437553 76 | 1437553 1457732 77 | 1457732 1477924 78 | 1477924 1497789 79 | 1497789 1512926 80 | 1512926 1532441 81 | 1532441 1551179 82 | 1551179 1570859 83 | 1570859 1590962 84 | 1590962 1610294 85 | 1610294 1630167 86 | 1630167 1648490 87 | 1648490 1667885 88 | 1667885 1685138 89 | 1685138 1703861 90 | 1703861 1724328 91 | 1724328 1743774 92 | 1743774 1763625 93 | 1763625 1781088 94 | 1781088 1799688 95 | 1799688 1819198 96 | 1819198 1838927 97 | 1838927 1858339 98 | 1858339 1877793 99 | 1877793 1897422 100 | 1897422 1916250 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/30.tsv: -------------------------------------------------------------------------------- 1 | 0 19552 2 | 19552 39020 3 | 39020 57428 4 | 57428 73185 5 | 73185 93504 6 | 93504 114304 7 | 114304 134209 8 | 134209 154518 9 | 154518 170245 10 | 170245 190544 11 | 190544 208965 12 | 208965 229187 13 | 229187 249062 14 | 249062 267176 15 | 267176 287402 16 | 287402 307407 17 | 307407 326750 18 | 326750 346779 19 | 346779 363646 20 | 363646 382999 21 | 382999 397240 22 | 397240 415532 23 | 415532 435967 24 | 435967 455692 25 | 455692 475851 26 | 475851 495720 27 | 495720 513430 28 | 513430 533034 29 | 533034 549763 30 | 549763 569054 31 | 569054 588399 32 | 588399 608498 33 | 608498 628128 34 | 628128 645066 35 | 645066 665553 36 | 665553 683801 37 | 683801 701913 38 | 701913 719611 39 | 719611 738085 40 | 738085 758063 41 | 758063 776010 42 | 776010 796341 43 | 796341 815233 44 | 815233 835525 45 | 835525 855163 46 | 855163 876086 47 | 876086 896007 48 | 896007 915345 49 | 915345 930662 50 | 930662 946512 51 | 946512 965903 52 | 965903 985662 53 | 985662 1004876 54 | 1004876 1025354 55 | 1025354 1045965 56 | 1045965 1063931 57 | 1063931 1084219 58 | 1084219 1104308 59 | 1104308 1123783 60 | 1123783 1144247 61 | 1144247 1164047 62 | 1164047 1183234 63 | 1183234 1203360 64 | 1203360 1223459 65 | 1223459 1243437 66 | 1243437 1263937 67 | 1263937 1283856 68 | 1283856 1303657 69 | 1303657 1319047 70 | 1319047 1337300 71 | 1337300 1357550 72 | 1357550 1377048 73 | 1377048 1397104 74 | 1397104 1416878 75 | 1416878 1437239 76 | 1437239 1451685 77 | 1451685 1467511 78 | 1467511 1486730 79 | 1486730 1503691 80 | 1503691 1523109 81 | 1523109 1543012 82 | 1543012 1562462 83 | 1562462 1582675 84 | 1582675 1600649 85 | 1600649 1619677 86 | 1619677 1635543 87 | 1635543 1654840 88 | 1654840 1675727 89 | 1675727 1695538 90 | 1695538 1710259 91 | 1710259 1725256 92 | 1725256 1745100 93 | 1745100 1765337 94 | 1765337 1783568 95 | 1783568 1803224 96 | 1803224 1822774 97 | 1822774 1843613 98 | 1843613 1863378 99 | 1863378 1880178 100 | 1880178 1900355 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/31.tsv: -------------------------------------------------------------------------------- 1 | 0 19493 2 | 19493 39997 3 | 39997 58851 4 | 58851 78900 5 | 78900 98984 6 | 98984 119130 7 | 119130 139516 8 | 139516 158322 9 | 158322 176072 10 | 176072 191973 11 | 191973 211714 12 | 211714 231271 13 | 231271 249939 14 | 249939 270067 15 | 270067 290263 16 | 290263 310263 17 | 310263 329925 18 | 329925 349755 19 | 349755 369173 20 | 369173 387337 21 | 387337 406821 22 | 406821 425964 23 | 425964 446039 24 | 446039 466705 25 | 466705 487418 26 | 487418 506026 27 | 506026 525459 28 | 525459 544375 29 | 544375 561885 30 | 561885 579741 31 | 579741 599996 32 | 599996 616637 33 | 616637 636617 34 | 636617 656864 35 | 656864 677248 36 | 677248 697577 37 | 697577 716069 38 | 716069 735747 39 | 735747 755266 40 | 755266 773746 41 | 773746 793594 42 | 793594 813086 43 | 813086 828646 44 | 828646 849135 45 | 849135 866935 46 | 866935 883232 47 | 883232 903167 48 | 903167 924078 49 | 924078 941901 50 | 941901 961935 51 | 961935 982299 52 | 982299 1002548 53 | 1002548 1021855 54 | 1021855 1040073 55 | 1040073 1056317 56 | 1056317 1075587 57 | 1075587 1093809 58 | 1093809 1113817 59 | 1113817 1132122 60 | 1132122 1150395 61 | 1150395 1168169 62 | 1168169 1188752 63 | 1188752 1206954 64 | 1206954 1227000 65 | 1227000 1247109 66 | 1247109 1266746 67 | 1266746 1282293 68 | 1282293 1297876 69 | 1297876 1317586 70 | 1317586 1337281 71 | 1337281 1355643 72 | 1355643 1376188 73 | 1376188 1392968 74 | 1392968 1412800 75 | 1412800 1433160 76 | 1433160 1453405 77 | 1453405 1473036 78 | 1473036 1493161 79 | 1493161 1508702 80 | 1508702 1528808 81 | 1528808 1549201 82 | 1549201 1568880 83 | 1568880 1588708 84 | 1588708 1608423 85 | 1608423 1628463 86 | 1628463 1648928 87 | 1648928 1668224 88 | 1668224 1684930 89 | 1684930 1705396 90 | 1705396 1726065 91 | 1726065 1744568 92 | 1744568 1764941 93 | 1764941 1785214 94 | 1785214 1799346 95 | 1799346 1819649 96 | 1819649 1837438 97 | 1837438 1856634 98 | 1856634 1876662 99 | 1876662 1896489 100 | 1896489 1917141 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/32.tsv: -------------------------------------------------------------------------------- 1 | 0 19263 2 | 19263 39441 3 | 39441 58400 4 | 58400 78411 5 | 78411 98326 6 | 98326 117957 7 | 117957 137765 8 | 137765 156734 9 | 156734 175550 10 | 175550 191445 11 | 191445 211290 12 | 211290 231558 13 | 231558 250154 14 | 250154 270259 15 | 270259 290350 16 | 290350 308274 17 | 308274 323847 18 | 323847 342641 19 | 342641 363181 20 | 363181 383366 21 | 383366 403127 22 | 403127 423611 23 | 423611 437392 24 | 437392 456514 25 | 456514 476341 26 | 476341 496915 27 | 496915 517540 28 | 517540 534405 29 | 534405 550517 30 | 550517 570100 31 | 570100 589085 32 | 589085 606748 33 | 606748 626900 34 | 626900 646859 35 | 646859 667114 36 | 667114 687163 37 | 687163 703148 38 | 703148 722699 39 | 722699 742878 40 | 742878 761000 41 | 761000 778198 42 | 778198 798429 43 | 798429 818698 44 | 818698 839105 45 | 839105 858160 46 | 858160 877772 47 | 877772 897499 48 | 897499 917967 49 | 917967 937644 50 | 937644 957408 51 | 957408 977425 52 | 977425 996010 53 | 996010 1016141 54 | 1016141 1036052 55 | 1036052 1053996 56 | 1053996 1074118 57 | 1074118 1089725 58 | 1089725 1104891 59 | 1104891 1120769 60 | 1120769 1139231 61 | 1139231 1159525 62 | 1159525 1179111 63 | 1179111 1198142 64 | 1198142 1212097 65 | 1212097 1231642 66 | 1231642 1251785 67 | 1251785 1270730 68 | 1270730 1290317 69 | 1290317 1310359 70 | 1310359 1330291 71 | 1330291 1343564 72 | 1343564 1363167 73 | 1363167 1382783 74 | 1382783 1402875 75 | 1402875 1423136 76 | 1423136 1442096 77 | 1442096 1461881 78 | 1461881 1480985 79 | 1480985 1499733 80 | 1499733 1519809 81 | 1519809 1535516 82 | 1535516 1553043 83 | 1553043 1572791 84 | 1572791 1592302 85 | 1592302 1612178 86 | 1612178 1631185 87 | 1631185 1650897 88 | 1650897 1669988 89 | 1669988 1687398 90 | 1687398 1707149 91 | 1707149 1727017 92 | 1727017 1747254 93 | 1747254 1765163 94 | 1765163 1785267 95 | 1785267 1802176 96 | 1802176 1822864 97 | 1822864 1842967 98 | 1842967 1860550 99 | 1860550 1880774 100 | 1880774 1901065 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/33.tsv: -------------------------------------------------------------------------------- 1 | 0 20542 2 | 20542 39892 3 | 39892 57760 4 | 57760 78068 5 | 78068 97200 6 | 97200 114561 7 | 114561 134280 8 | 134280 154295 9 | 154295 175119 10 | 175119 195859 11 | 195859 215644 12 | 215644 235269 13 | 235269 254691 14 | 254691 274378 15 | 274378 293446 16 | 293446 313657 17 | 313657 333403 18 | 333403 353528 19 | 353528 372035 20 | 372035 392556 21 | 392556 410176 22 | 410176 426184 23 | 426184 445361 24 | 445361 465499 25 | 465499 485055 26 | 485055 502720 27 | 502720 517898 28 | 517898 535687 29 | 535687 555898 30 | 555898 575870 31 | 575870 595647 32 | 595647 615425 33 | 615425 635815 34 | 635815 652044 35 | 652044 671870 36 | 671870 691462 37 | 691462 711651 38 | 711651 731382 39 | 731382 751288 40 | 751288 771610 41 | 771610 792213 42 | 792213 812421 43 | 812421 832271 44 | 832271 850331 45 | 850331 867793 46 | 867793 888109 47 | 888109 905103 48 | 905103 920768 49 | 920768 938673 50 | 938673 958548 51 | 958548 978231 52 | 978231 998230 53 | 998230 1018120 54 | 1018120 1037333 55 | 1037333 1057555 56 | 1057555 1077115 57 | 1077115 1096620 58 | 1096620 1114908 59 | 1114908 1130218 60 | 1130218 1150174 61 | 1150174 1165410 62 | 1165410 1184880 63 | 1184880 1202689 64 | 1202689 1223185 65 | 1223185 1243506 66 | 1243506 1263202 67 | 1263202 1283073 68 | 1283073 1298691 69 | 1298691 1319315 70 | 1319315 1339544 71 | 1339544 1359520 72 | 1359520 1377496 73 | 1377496 1397706 74 | 1397706 1418428 75 | 1418428 1437625 76 | 1437625 1456769 77 | 1456769 1476692 78 | 1476692 1490989 79 | 1490989 1511713 80 | 1511713 1530419 81 | 1530419 1550860 82 | 1550860 1569817 83 | 1569817 1588150 84 | 1588150 1603757 85 | 1603757 1623419 86 | 1623419 1642925 87 | 1642925 1656067 88 | 1656067 1675870 89 | 1675870 1694584 90 | 1694584 1714083 91 | 1714083 1733128 92 | 1733128 1751622 93 | 1751622 1772475 94 | 1772475 1791286 95 | 1791286 1809606 96 | 1809606 1829134 97 | 1829134 1844315 98 | 1844315 1864237 99 | 1864237 1882400 100 | 1882400 1901918 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/34.tsv: -------------------------------------------------------------------------------- 1 | 0 19515 2 | 19515 39636 3 | 39636 59952 4 | 59952 78929 5 | 78929 98810 6 | 98810 119401 7 | 119401 139579 8 | 139579 159317 9 | 159317 174559 10 | 174559 189931 11 | 189931 209582 12 | 209582 227708 13 | 227708 248356 14 | 248356 268079 15 | 268079 288453 16 | 288453 309039 17 | 309039 328348 18 | 328348 346475 19 | 346475 366281 20 | 366281 386234 21 | 386234 405862 22 | 405862 425587 23 | 425587 444206 24 | 444206 459393 25 | 459393 479723 26 | 479723 498576 27 | 498576 519227 28 | 519227 536787 29 | 536787 556976 30 | 556976 576160 31 | 576160 595949 32 | 595949 616045 33 | 616045 636074 34 | 636074 653374 35 | 653374 673015 36 | 673015 692089 37 | 692089 712757 38 | 712757 733004 39 | 733004 753564 40 | 753564 773276 41 | 773276 793419 42 | 793419 808322 43 | 808322 828321 44 | 828321 847496 45 | 847496 866225 46 | 866225 885954 47 | 885954 905038 48 | 905038 925256 49 | 925256 945162 50 | 945162 965289 51 | 965289 984596 52 | 984596 1005277 53 | 1005277 1020742 54 | 1020742 1041164 55 | 1041164 1061002 56 | 1061002 1080665 57 | 1080665 1100747 58 | 1100747 1120953 59 | 1120953 1139235 60 | 1139235 1159259 61 | 1159259 1179630 62 | 1179630 1200460 63 | 1200460 1220021 64 | 1220021 1238926 65 | 1238926 1258115 66 | 1258115 1277783 67 | 1277783 1297910 68 | 1297910 1318308 69 | 1318308 1337086 70 | 1337086 1356369 71 | 1356369 1375749 72 | 1375749 1391027 73 | 1391027 1410773 74 | 1410773 1430105 75 | 1430105 1450314 76 | 1450314 1470144 77 | 1470144 1489790 78 | 1489790 1507925 79 | 1507925 1523395 80 | 1523395 1544376 81 | 1544376 1564402 82 | 1564402 1583093 83 | 1583093 1603902 84 | 1603902 1623459 85 | 1623459 1642841 86 | 1642841 1662617 87 | 1662617 1678613 88 | 1678613 1698480 89 | 1698480 1719061 90 | 1719061 1737177 91 | 1737177 1756668 92 | 1756668 1776979 93 | 1776979 1794661 94 | 1794661 1814017 95 | 1814017 1833085 96 | 1833085 1849171 97 | 1849171 1869854 98 | 1869854 1887854 99 | 1887854 1908131 100 | 1908131 1927822 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/35.tsv: -------------------------------------------------------------------------------- 1 | 0 20574 2 | 20574 39971 3 | 39971 60100 4 | 60100 74478 5 | 74478 94997 6 | 94997 115062 7 | 115062 133621 8 | 133621 154350 9 | 154350 174630 10 | 174630 192480 11 | 192480 213013 12 | 213013 233319 13 | 233319 251414 14 | 251414 269817 15 | 269817 287864 16 | 287864 308133 17 | 308133 327850 18 | 327850 347525 19 | 347525 367513 20 | 367513 387489 21 | 387489 407600 22 | 407600 426791 23 | 426791 443206 24 | 443206 458557 25 | 458557 478588 26 | 478588 498766 27 | 498766 518527 28 | 518527 536694 29 | 536694 556387 30 | 556387 576659 31 | 576659 594775 32 | 594775 614241 33 | 614241 629756 34 | 629756 647935 35 | 647935 668190 36 | 668190 687778 37 | 687778 708027 38 | 708027 727550 39 | 727550 747542 40 | 747542 767190 41 | 767190 786959 42 | 786959 805930 43 | 805930 826643 44 | 826643 846700 45 | 846700 863052 46 | 863052 883087 47 | 883087 900847 48 | 900847 921174 49 | 921174 941072 50 | 941072 960982 51 | 960982 980747 52 | 980747 1000939 53 | 1000939 1021290 54 | 1021290 1041437 55 | 1041437 1061396 56 | 1061396 1081530 57 | 1081530 1101325 58 | 1101325 1121097 59 | 1121097 1139606 60 | 1139606 1160051 61 | 1160051 1177840 62 | 1177840 1191716 63 | 1191716 1211731 64 | 1211731 1229726 65 | 1229726 1249683 66 | 1249683 1265173 67 | 1265173 1281947 68 | 1281947 1302159 69 | 1302159 1322169 70 | 1322169 1341727 71 | 1341727 1361459 72 | 1361459 1381636 73 | 1381636 1398576 74 | 1398576 1414901 75 | 1414901 1435236 76 | 1435236 1455533 77 | 1455533 1475630 78 | 1475630 1495787 79 | 1495787 1514818 80 | 1514818 1535161 81 | 1535161 1555127 82 | 1555127 1574771 83 | 1574771 1592280 84 | 1592280 1612597 85 | 1612597 1632243 86 | 1632243 1652640 87 | 1652640 1672773 88 | 1672773 1692901 89 | 1692901 1712626 90 | 1712626 1732956 91 | 1732956 1752448 92 | 1752448 1770829 93 | 1770829 1789150 94 | 1789150 1804695 95 | 1804695 1822911 96 | 1822911 1837879 97 | 1837879 1855450 98 | 1855450 1873370 99 | 1873370 1893613 100 | 1893613 1912237 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/36.tsv: -------------------------------------------------------------------------------- 1 | 0 19287 2 | 19287 39358 3 | 39358 57718 4 | 57718 78453 5 | 78453 98663 6 | 98663 118230 7 | 118230 137711 8 | 137711 155475 9 | 155475 175154 10 | 175154 192002 11 | 192002 211869 12 | 211869 231665 13 | 231665 251370 14 | 251370 270515 15 | 270515 290990 16 | 290990 311277 17 | 311277 328148 18 | 328148 348285 19 | 348285 368503 20 | 368503 388935 21 | 388935 406410 22 | 406410 424407 23 | 424407 444768 24 | 444768 462086 25 | 462086 482753 26 | 482753 503042 27 | 503042 523133 28 | 523133 542434 29 | 542434 560468 30 | 560468 577604 31 | 577604 597517 32 | 597517 616080 33 | 616080 630174 34 | 630174 648605 35 | 648605 667109 36 | 667109 682226 37 | 682226 700319 38 | 700319 717934 39 | 717934 736014 40 | 736014 756201 41 | 756201 775501 42 | 775501 795470 43 | 795470 814966 44 | 814966 834799 45 | 834799 854495 46 | 854495 870412 47 | 870412 889833 48 | 889833 909189 49 | 909189 929181 50 | 929181 947161 51 | 947161 965468 52 | 965468 985512 53 | 985512 1005434 54 | 1005434 1025833 55 | 1025833 1045533 56 | 1045533 1065716 57 | 1065716 1081223 58 | 1081223 1099620 59 | 1099620 1118600 60 | 1118600 1136114 61 | 1136114 1154567 62 | 1154567 1173378 63 | 1173378 1192718 64 | 1192718 1212554 65 | 1212554 1228253 66 | 1228253 1247874 67 | 1247874 1268214 68 | 1268214 1288783 69 | 1288783 1308227 70 | 1308227 1328224 71 | 1328224 1348920 72 | 1348920 1368805 73 | 1368805 1388588 74 | 1388588 1405156 75 | 1405156 1424367 76 | 1424367 1444342 77 | 1444342 1464248 78 | 1464248 1484605 79 | 1484605 1505009 80 | 1505009 1525258 81 | 1525258 1544883 82 | 1544883 1565405 83 | 1565405 1584873 84 | 1584873 1600040 85 | 1600040 1619003 86 | 1619003 1639014 87 | 1639014 1658997 88 | 1658997 1678690 89 | 1678690 1696511 90 | 1696511 1715452 91 | 1715452 1731237 92 | 1731237 1750719 93 | 1750719 1767699 94 | 1767699 1788306 95 | 1788306 1803611 96 | 1803611 1821443 97 | 1821443 1841051 98 | 1841051 1860142 99 | 1860142 1880042 100 | 1880042 1900071 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/37.tsv: -------------------------------------------------------------------------------- 1 | 0 19309 2 | 19309 38897 3 | 38897 59610 4 | 59610 78069 5 | 78069 97185 6 | 97185 117382 7 | 117382 136713 8 | 136713 157034 9 | 157034 175208 10 | 175208 193026 11 | 193026 213108 12 | 213108 233144 13 | 233144 253017 14 | 253017 273174 15 | 273174 292723 16 | 292723 312821 17 | 312821 332991 18 | 332991 352787 19 | 352787 373013 20 | 373013 392949 21 | 392949 412296 22 | 412296 429287 23 | 429287 449251 24 | 449251 469627 25 | 469627 489588 26 | 489588 510685 27 | 510685 526010 28 | 526010 545832 29 | 545832 565035 30 | 565035 583115 31 | 583115 600840 32 | 600840 614925 33 | 614925 634923 34 | 634923 652756 35 | 652756 672460 36 | 672460 692182 37 | 692182 712613 38 | 712613 730892 39 | 730892 750565 40 | 750565 769709 41 | 769709 790141 42 | 790141 808985 43 | 808985 828423 44 | 828423 847125 45 | 847125 866794 46 | 866794 886847 47 | 886847 902244 48 | 902244 922309 49 | 922309 942226 50 | 942226 962612 51 | 962612 980692 52 | 980692 1001401 53 | 1001401 1020630 54 | 1020630 1040850 55 | 1040850 1061038 56 | 1061038 1081508 57 | 1081508 1099614 58 | 1099614 1119619 59 | 1119619 1139865 60 | 1139865 1155786 61 | 1155786 1175622 62 | 1175622 1195427 63 | 1195427 1214867 64 | 1214867 1232947 65 | 1232947 1252955 66 | 1252955 1272148 67 | 1272148 1290587 68 | 1290587 1311375 69 | 1311375 1329396 70 | 1329396 1346907 71 | 1346907 1367145 72 | 1367145 1386355 73 | 1386355 1406638 74 | 1406638 1426340 75 | 1426340 1445024 76 | 1445024 1462249 77 | 1462249 1482628 78 | 1482628 1502562 79 | 1502562 1522337 80 | 1522337 1540583 81 | 1540583 1555738 82 | 1555738 1575672 83 | 1575672 1596098 84 | 1596098 1614290 85 | 1614290 1633501 86 | 1633501 1652753 87 | 1652753 1672792 88 | 1672792 1692662 89 | 1692662 1713172 90 | 1713172 1733102 91 | 1733102 1751329 92 | 1751329 1769115 93 | 1769115 1788458 94 | 1788458 1805067 95 | 1805067 1822654 96 | 1822654 1842629 97 | 1842629 1862465 98 | 1862465 1880543 99 | 1880543 1900437 100 | 1900437 1918739 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/38.tsv: -------------------------------------------------------------------------------- 1 | 0 17997 2 | 17997 37845 3 | 37845 57542 4 | 57542 77162 5 | 77162 97690 6 | 97690 116462 7 | 116462 134511 8 | 134511 153291 9 | 153291 173414 10 | 173414 190619 11 | 190619 211079 12 | 211079 231638 13 | 231638 251187 14 | 251187 269127 15 | 269127 287852 16 | 287852 308003 17 | 308003 324881 18 | 324881 340460 19 | 340460 360292 20 | 360292 378234 21 | 378234 396614 22 | 396614 414249 23 | 414249 432245 24 | 432245 451549 25 | 451549 471698 26 | 471698 491353 27 | 491353 511376 28 | 511376 530722 29 | 530722 550536 30 | 550536 569391 31 | 569391 588959 32 | 588959 609221 33 | 609221 629748 34 | 629748 650019 35 | 650019 669309 36 | 669309 689269 37 | 689269 708874 38 | 708874 727336 39 | 727336 747434 40 | 747434 767467 41 | 767467 787281 42 | 787281 802657 43 | 802657 821567 44 | 821567 839624 45 | 839624 859400 46 | 859400 878778 47 | 878778 898129 48 | 898129 917219 49 | 917219 936028 50 | 936028 954700 51 | 954700 973641 52 | 973641 993022 53 | 993022 1013479 54 | 1013479 1033407 55 | 1033407 1053568 56 | 1053568 1071933 57 | 1071933 1090756 58 | 1090756 1109872 59 | 1109872 1129396 60 | 1129396 1144080 61 | 1144080 1164102 62 | 1164102 1184086 63 | 1184086 1204810 64 | 1204810 1223919 65 | 1223919 1244197 66 | 1244197 1263955 67 | 1263955 1269054 68 | 1269054 1287173 69 | 1287173 1307475 70 | 1307475 1327554 71 | 1327554 1345759 72 | 1345759 1363975 73 | 1363975 1384085 74 | 1384085 1404150 75 | 1404150 1423112 76 | 1423112 1441143 77 | 1441143 1458377 78 | 1458377 1472958 79 | 1472958 1492159 80 | 1492159 1511530 81 | 1511530 1531244 82 | 1531244 1551468 83 | 1551468 1569947 84 | 1569947 1588829 85 | 1588829 1608519 86 | 1608519 1628253 87 | 1628253 1648019 88 | 1648019 1665717 89 | 1665717 1684767 90 | 1684767 1704474 91 | 1704474 1725114 92 | 1725114 1745259 93 | 1745259 1760773 94 | 1760773 1777819 95 | 1777819 1798790 96 | 1798790 1818892 97 | 1818892 1836854 98 | 1836854 1857179 99 | 1857179 1877159 100 | 1877159 1897566 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/39.tsv: -------------------------------------------------------------------------------- 1 | 0 15193 2 | 15193 35467 3 | 35467 53288 4 | 53288 73345 5 | 73345 90576 6 | 90576 110652 7 | 110652 129412 8 | 129412 148355 9 | 148355 167524 10 | 167524 185180 11 | 185180 202187 12 | 202187 222388 13 | 222388 241528 14 | 241528 260316 15 | 260316 281133 16 | 281133 300506 17 | 300506 319731 18 | 319731 339993 19 | 339993 357155 20 | 357155 377390 21 | 377390 395367 22 | 395367 415535 23 | 415535 433475 24 | 433475 452915 25 | 452915 472797 26 | 472797 491125 27 | 491125 511295 28 | 511295 530420 29 | 530420 548181 30 | 548181 568003 31 | 568003 588130 32 | 588130 607665 33 | 607665 626312 34 | 626312 647005 35 | 647005 667338 36 | 667338 686988 37 | 686988 707298 38 | 707298 724013 39 | 724013 743960 40 | 743960 761807 41 | 761807 782765 42 | 782765 800733 43 | 800733 820533 44 | 820533 835385 45 | 835385 855481 46 | 855481 875464 47 | 875464 894821 48 | 894821 914945 49 | 914945 935390 50 | 935390 955042 51 | 955042 975204 52 | 975204 994988 53 | 994988 1013276 54 | 1013276 1033848 55 | 1033848 1053735 56 | 1053735 1073310 57 | 1073310 1093135 58 | 1093135 1112816 59 | 1112816 1133376 60 | 1133376 1152692 61 | 1152692 1170654 62 | 1170654 1190674 63 | 1190674 1209824 64 | 1209824 1230150 65 | 1230150 1249826 66 | 1249826 1269724 67 | 1269724 1290429 68 | 1290429 1309320 69 | 1309320 1329487 70 | 1329487 1345639 71 | 1345639 1364022 72 | 1364022 1384106 73 | 1384106 1402893 74 | 1402893 1423574 75 | 1423574 1441924 76 | 1441924 1461933 77 | 1461933 1480899 78 | 1480899 1499707 79 | 1499707 1519031 80 | 1519031 1539192 81 | 1539192 1556728 82 | 1556728 1575362 83 | 1575362 1590541 84 | 1590541 1609712 85 | 1609712 1627802 86 | 1627802 1647099 87 | 1647099 1667670 88 | 1667670 1687898 89 | 1687898 1708047 90 | 1708047 1726445 91 | 1726445 1744301 92 | 1744301 1763619 93 | 1763619 1784355 94 | 1784355 1803939 95 | 1803939 1822703 96 | 1822703 1840555 97 | 1840555 1856105 98 | 1856105 1876325 99 | 1876325 1895629 100 | 1895629 1915187 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/4.tsv: -------------------------------------------------------------------------------- 1 | 0 20085 2 | 20085 39601 3 | 39601 55197 4 | 55197 73327 5 | 73327 92087 6 | 92087 110244 7 | 110244 128422 8 | 128422 148822 9 | 148822 162464 10 | 162464 182849 11 | 182849 202968 12 | 202968 223377 13 | 223377 244038 14 | 244038 262586 15 | 262586 281894 16 | 281894 301827 17 | 301827 316336 18 | 316336 336024 19 | 336024 353833 20 | 353833 373804 21 | 373804 394017 22 | 394017 412740 23 | 412740 432378 24 | 432378 452437 25 | 452437 471570 26 | 471570 490904 27 | 490904 510010 28 | 510010 529260 29 | 529260 547848 30 | 547848 566248 31 | 566248 586472 32 | 586472 604675 33 | 604675 624838 34 | 624838 644553 35 | 644553 665027 36 | 665027 683332 37 | 683332 702403 38 | 702403 722925 39 | 722925 742785 40 | 742785 762471 41 | 762471 782539 42 | 782539 802266 43 | 802266 822313 44 | 822313 838236 45 | 838236 856201 46 | 856201 876065 47 | 876065 895853 48 | 895853 916040 49 | 916040 934638 50 | 934638 954448 51 | 954448 972887 52 | 972887 991307 53 | 991307 1010539 54 | 1010539 1029029 55 | 1029029 1047369 56 | 1047369 1067622 57 | 1067622 1087079 58 | 1087079 1106492 59 | 1106492 1127085 60 | 1127085 1147523 61 | 1147523 1167652 62 | 1167652 1184590 63 | 1184590 1204054 64 | 1204054 1222134 65 | 1222134 1241831 66 | 1241831 1260119 67 | 1260119 1277690 68 | 1277690 1293356 69 | 1293356 1313747 70 | 1313747 1334036 71 | 1334036 1352899 72 | 1352899 1373051 73 | 1373051 1392184 74 | 1392184 1410143 75 | 1410143 1427511 76 | 1427511 1446876 77 | 1446876 1467287 78 | 1467287 1487952 79 | 1487952 1508405 80 | 1508405 1527365 81 | 1527365 1546329 82 | 1546329 1565653 83 | 1565653 1585612 84 | 1585612 1604559 85 | 1604559 1624158 86 | 1624158 1642211 87 | 1642211 1661849 88 | 1661849 1681823 89 | 1681823 1696405 90 | 1696405 1715842 91 | 1715842 1735490 92 | 1735490 1754642 93 | 1754642 1775110 94 | 1775110 1795169 95 | 1795169 1813977 96 | 1813977 1833401 97 | 1833401 1852806 98 | 1852806 1872517 99 | 1872517 1892623 100 | 1892623 1911795 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/40.tsv: -------------------------------------------------------------------------------- 1 | 0 19218 2 | 19218 36538 3 | 36538 55019 4 | 55019 74815 5 | 74815 92809 6 | 92809 111091 7 | 111091 129600 8 | 129600 149953 9 | 149953 167118 10 | 167118 187066 11 | 187066 207077 12 | 207077 222901 13 | 222901 242898 14 | 242898 261949 15 | 261949 282330 16 | 282330 302107 17 | 302107 322070 18 | 322070 340132 19 | 340132 359975 20 | 359975 380745 21 | 380745 400494 22 | 400494 418518 23 | 418518 433966 24 | 433966 452272 25 | 452272 472213 26 | 472213 488739 27 | 488739 505710 28 | 505710 525694 29 | 525694 544480 30 | 544480 564753 31 | 564753 584463 32 | 584463 602992 33 | 602992 623068 34 | 623068 643148 35 | 643148 663279 36 | 663279 682037 37 | 682037 702049 38 | 702049 721337 39 | 721337 742123 40 | 742123 760935 41 | 760935 781140 42 | 781140 801278 43 | 801278 819872 44 | 819872 840430 45 | 840430 858712 46 | 858712 877777 47 | 877777 897418 48 | 897418 917647 49 | 917647 932097 50 | 932097 951832 51 | 951832 971202 52 | 971202 974066 53 | 974066 994055 54 | 994055 1012359 55 | 1012359 1031078 56 | 1031078 1050668 57 | 1050668 1070418 58 | 1070418 1090516 59 | 1090516 1106891 60 | 1106891 1125609 61 | 1125609 1145990 62 | 1145990 1165641 63 | 1165641 1185497 64 | 1185497 1205349 65 | 1205349 1224624 66 | 1224624 1245138 67 | 1245138 1264473 68 | 1264473 1284077 69 | 1284077 1301293 70 | 1301293 1321225 71 | 1321225 1340560 72 | 1340560 1361230 73 | 1361230 1380129 74 | 1380129 1399469 75 | 1399469 1419524 76 | 1419524 1437131 77 | 1437131 1456704 78 | 1456704 1476117 79 | 1476117 1496476 80 | 1496476 1515021 81 | 1515021 1533934 82 | 1533934 1553949 83 | 1553949 1569659 84 | 1569659 1589448 85 | 1589448 1609825 86 | 1609825 1628966 87 | 1628966 1648054 88 | 1648054 1668015 89 | 1668015 1687534 90 | 1687534 1705991 91 | 1705991 1726024 92 | 1726024 1746545 93 | 1746545 1766270 94 | 1766270 1784450 95 | 1784450 1805344 96 | 1805344 1825182 97 | 1825182 1845110 98 | 1845110 1865218 99 | 1865218 1878377 100 | 1878377 1898890 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/41.tsv: -------------------------------------------------------------------------------- 1 | 0 18311 2 | 18311 36064 3 | 36064 55824 4 | 55824 75367 5 | 75367 94688 6 | 94688 114859 7 | 114859 134532 8 | 134532 152493 9 | 152493 170527 10 | 170527 189178 11 | 189178 209677 12 | 209677 228199 13 | 228199 248172 14 | 248172 262686 15 | 262686 282870 16 | 282870 302929 17 | 302929 323106 18 | 323106 341233 19 | 341233 357803 20 | 357803 377679 21 | 377679 396321 22 | 396321 416706 23 | 416706 427412 24 | 427412 447137 25 | 447137 467359 26 | 467359 486884 27 | 486884 506817 28 | 506817 526803 29 | 526803 545651 30 | 545651 565943 31 | 565943 585418 32 | 585418 604392 33 | 604392 624196 34 | 624196 644442 35 | 644442 664213 36 | 664213 680600 37 | 680600 698559 38 | 698559 716531 39 | 716531 736879 40 | 736879 756934 41 | 756934 776932 42 | 776932 792782 43 | 792782 810474 44 | 810474 831009 45 | 831009 847097 46 | 847097 867299 47 | 867299 886930 48 | 886930 905155 49 | 905155 925288 50 | 925288 945244 51 | 945244 965618 52 | 965618 983742 53 | 983742 1003622 54 | 1003622 1022513 55 | 1022513 1040977 56 | 1040977 1060804 57 | 1060804 1079961 58 | 1079961 1098851 59 | 1098851 1115646 60 | 1115646 1134278 61 | 1134278 1153730 62 | 1153730 1167913 63 | 1167913 1187091 64 | 1187091 1205577 65 | 1205577 1225233 66 | 1225233 1241144 67 | 1241144 1262046 68 | 1262046 1280295 69 | 1280295 1299853 70 | 1299853 1319826 71 | 1319826 1339417 72 | 1339417 1359798 73 | 1359798 1378101 74 | 1378101 1396743 75 | 1396743 1416978 76 | 1416978 1432490 77 | 1432490 1453315 78 | 1453315 1469266 79 | 1469266 1489066 80 | 1489066 1509691 81 | 1509691 1527164 82 | 1527164 1546787 83 | 1546787 1566866 84 | 1566866 1584777 85 | 1584777 1604895 86 | 1604895 1624761 87 | 1624761 1644469 88 | 1644469 1654461 89 | 1654461 1672561 90 | 1672561 1692883 91 | 1692883 1712707 92 | 1712707 1728475 93 | 1728475 1746948 94 | 1746948 1764076 95 | 1764076 1784612 96 | 1784612 1801764 97 | 1801764 1821800 98 | 1821800 1840445 99 | 1840445 1860458 100 | 1860458 1879790 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/42.tsv: -------------------------------------------------------------------------------- 1 | 0 20397 2 | 20397 35974 3 | 35974 54628 4 | 54628 73558 5 | 73558 93580 6 | 93580 109366 7 | 109366 117608 8 | 117608 135714 9 | 135714 153050 10 | 153050 169206 11 | 169206 189183 12 | 189183 209394 13 | 209394 227582 14 | 227582 243048 15 | 243048 262662 16 | 262662 282390 17 | 282390 300319 18 | 300319 320769 19 | 320769 334539 20 | 334539 353224 21 | 353224 369466 22 | 369466 376372 23 | 376372 396171 24 | 396171 416582 25 | 416582 436635 26 | 436635 456122 27 | 456122 476621 28 | 476621 484704 29 | 484704 503524 30 | 503524 523521 31 | 523521 543421 32 | 543421 563069 33 | 563069 583232 34 | 583232 600807 35 | 600807 620629 36 | 620629 637805 37 | 637805 649544 38 | 649544 665967 39 | 665967 682791 40 | 682791 698855 41 | 698855 718996 42 | 718996 734669 43 | 734669 755086 44 | 755086 765379 45 | 765379 783648 46 | 783648 803468 47 | 803468 823037 48 | 823037 843154 49 | 843154 861387 50 | 861387 877903 51 | 877903 897166 52 | 897166 917433 53 | 917433 937547 54 | 937547 957705 55 | 957705 977875 56 | 977875 996529 57 | 996529 1016536 58 | 1016536 1035463 59 | 1035463 1055032 60 | 1055032 1073559 61 | 1073559 1088943 62 | 1088943 1108153 63 | 1108153 1126265 64 | 1126265 1146832 65 | 1146832 1151602 66 | 1151602 1169184 67 | 1169184 1189211 68 | 1189211 1209744 69 | 1209744 1227690 70 | 1227690 1248261 71 | 1248261 1266555 72 | 1266555 1286057 73 | 1286057 1306303 74 | 1306303 1324209 75 | 1324209 1331423 76 | 1331423 1349978 77 | 1349978 1368576 78 | 1368576 1388353 79 | 1388353 1400428 80 | 1400428 1415619 81 | 1415619 1432940 82 | 1432940 1453069 83 | 1453069 1470874 84 | 1470874 1491053 85 | 1491053 1506186 86 | 1506186 1526560 87 | 1526560 1546536 88 | 1546536 1565942 89 | 1565942 1585691 90 | 1585691 1605160 91 | 1605160 1615082 92 | 1615082 1634226 93 | 1634226 1651232 94 | 1651232 1667018 95 | 1667018 1682762 96 | 1682762 1703153 97 | 1703153 1722270 98 | 1722270 1739311 99 | 1739311 1759066 100 | 1759066 1778551 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/43.tsv: -------------------------------------------------------------------------------- 1 | 0 18587 2 | 18587 38219 3 | 38219 58625 4 | 58625 78380 5 | 78380 98146 6 | 98146 118292 7 | 118292 136157 8 | 136157 155451 9 | 155451 174855 10 | 174855 178703 11 | 178703 199043 12 | 199043 219200 13 | 219200 236584 14 | 236584 257053 15 | 257053 277069 16 | 277069 292881 17 | 292881 308712 18 | 308712 324366 19 | 324366 327626 20 | 327626 340629 21 | 340629 361296 22 | 361296 365968 23 | 365968 382552 24 | 382552 402822 25 | 402822 422856 26 | 422856 443082 27 | 443082 446672 28 | 446672 466914 29 | 466914 487254 30 | 487254 507330 31 | 507330 527027 32 | 527027 547147 33 | 547147 565258 34 | 565258 583610 35 | 583610 604200 36 | 604200 624301 37 | 624301 636317 38 | 636317 654229 39 | 654229 674826 40 | 674826 690621 41 | 690621 710580 42 | 710580 730527 43 | 730527 750020 44 | 750020 769686 45 | 769686 788771 46 | 788771 808091 47 | 808091 827420 48 | 827420 845355 49 | 845355 855208 50 | 855208 874504 51 | 874504 893063 52 | 893063 913269 53 | 913269 932872 54 | 932872 950580 55 | 950580 954196 56 | 954196 966055 57 | 966055 985386 58 | 985386 1005661 59 | 1005661 1025463 60 | 1025463 1045935 61 | 1045935 1061530 62 | 1061530 1079657 63 | 1079657 1095843 64 | 1095843 1109069 65 | 1109069 1128451 66 | 1128451 1148790 67 | 1148790 1159565 68 | 1159565 1161692 69 | 1161692 1167738 70 | 1167738 1187430 71 | 1187430 1207675 72 | 1207675 1222572 73 | 1222572 1242457 74 | 1242457 1252219 75 | 1252219 1265647 76 | 1265647 1283910 77 | 1283910 1303291 78 | 1303291 1323427 79 | 1323427 1343160 80 | 1343160 1344158 81 | 1344158 1361381 82 | 1361381 1381734 83 | 1381734 1397106 84 | 1397106 1411903 85 | 1411903 1416731 86 | 1416731 1437005 87 | 1437005 1456453 88 | 1456453 1460213 89 | 1460213 1480280 90 | 1480280 1498353 91 | 1498353 1514345 92 | 1514345 1534233 93 | 1534233 1543788 94 | 1543788 1551791 95 | 1551791 1553562 96 | 1553562 1562085 97 | 1562085 1581004 98 | 1581004 1600682 99 | 1600682 1618922 100 | 1618922 1634303 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/44.tsv: -------------------------------------------------------------------------------- 1 | 0 20121 2 | 20121 38219 3 | 38219 51362 4 | 51362 71808 5 | 71808 91355 6 | 91355 111879 7 | 111879 129537 8 | 129537 145046 9 | 145046 146627 10 | 146627 153919 11 | 153919 171838 12 | 171838 174339 13 | 174339 179613 14 | 179613 196362 15 | 196362 200842 16 | 200842 214565 17 | 214565 221641 18 | 221641 241530 19 | 241530 257953 20 | 257953 273306 21 | 273306 292220 22 | 292220 312587 23 | 312587 332144 24 | 332144 351696 25 | 351696 365304 26 | 365304 385413 27 | 385413 406366 28 | 406366 426837 29 | 426837 438809 30 | 438809 457906 31 | 457906 473929 32 | 473929 494720 33 | 494720 511747 34 | 511747 530523 35 | 530523 535199 36 | 535199 553764 37 | 553764 573057 38 | 573057 592821 39 | 592821 604879 40 | 604879 623781 41 | 623781 635391 42 | 635391 656028 43 | 656028 667979 44 | 667979 686234 45 | 686234 702155 46 | 702155 721725 47 | 721725 725274 48 | 725274 740124 49 | 740124 755465 50 | 755465 773459 51 | 773459 785754 52 | 785754 803911 53 | 803911 821664 54 | 821664 836486 55 | 836486 856412 56 | 856412 874462 57 | 874462 894955 58 | 894955 913359 59 | 913359 919959 60 | 919959 940826 61 | 940826 958400 62 | 958400 978941 63 | 978941 996891 64 | 996891 1017601 65 | 1017601 1037785 66 | 1037785 1046298 67 | 1046298 1056425 68 | 1056425 1076577 69 | 1076577 1085948 70 | 1085948 1104027 71 | 1104027 1106848 72 | 1106848 1126939 73 | 1126939 1140746 74 | 1140746 1159845 75 | 1159845 1169980 76 | 1169980 1189684 77 | 1189684 1192533 78 | 1192533 1211664 79 | 1211664 1221938 80 | 1221938 1238341 81 | 1238341 1258067 82 | 1258067 1269292 83 | 1269292 1270465 84 | 1270465 1279478 85 | 1279478 1297423 86 | 1297423 1312045 87 | 1312045 1331980 88 | 1331980 1352114 89 | 1352114 1353221 90 | 1353221 1372730 91 | 1372730 1392827 92 | 1392827 1412674 93 | 1412674 1417836 94 | 1417836 1429528 95 | 1429528 1438221 96 | 1438221 1450066 97 | 1450066 1468703 98 | 1468703 1479768 99 | 1479768 1499536 100 | 1499536 1516848 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/45.tsv: -------------------------------------------------------------------------------- 1 | 0 2316 2 | 2316 11805 3 | 11805 19436 4 | 19436 34841 5 | 34841 42297 6 | 42297 44202 7 | 44202 64560 8 | 64560 84306 9 | 84306 91361 10 | 91361 106735 11 | 106735 115757 12 | 115757 135763 13 | 135763 156018 14 | 156018 171993 15 | 171993 185510 16 | 185510 189951 17 | 189951 198444 18 | 198444 210807 19 | 210807 225715 20 | 225715 243596 21 | 243596 254326 22 | 254326 267171 23 | 267171 281815 24 | 281815 301806 25 | 301806 302619 26 | 302619 322449 27 | 322449 339067 28 | 339067 359076 29 | 359076 363379 30 | 363379 374516 31 | 374516 394824 32 | 394824 413008 33 | 413008 433349 34 | 433349 447922 35 | 447922 467182 36 | 467182 474533 37 | 474533 479010 38 | 479010 481357 39 | 481357 499846 40 | 499846 511763 41 | 511763 520512 42 | 520512 536361 43 | 536361 556511 44 | 556511 564175 45 | 564175 583946 46 | 583946 595954 47 | 595954 597616 48 | 597616 615949 49 | 615949 619347 50 | 619347 627712 51 | 627712 645904 52 | 645904 663533 53 | 663533 683441 54 | 683441 691574 55 | 691574 695584 56 | 695584 715489 57 | 715489 715546 58 | 715546 717745 59 | 717745 728212 60 | 728212 732229 61 | 732229 752098 62 | 752098 771081 63 | 771081 790621 64 | 790621 801859 65 | 801859 805259 66 | 805259 818175 67 | 818175 837967 68 | 837967 842386 69 | 842386 856783 70 | 856783 869632 71 | 869632 887596 72 | 887596 899977 73 | 899977 914589 74 | 914589 932685 75 | 932685 952086 76 | 952086 972106 77 | 972106 990949 78 | 990949 999306 79 | 999306 1000800 80 | 1000800 1006082 81 | 1006082 1010246 82 | 1010246 1027813 83 | 1027813 1035545 84 | 1035545 1052853 85 | 1052853 1055521 86 | 1055521 1059472 87 | 1059472 1062261 88 | 1062261 1066096 89 | 1066096 1070311 90 | 1070311 1080692 91 | 1080692 1083708 92 | 1083708 1087509 93 | 1087509 1092674 94 | 1092674 1111982 95 | 1111982 1130670 96 | 1130670 1141519 97 | 1141519 1147482 98 | 1147482 1158810 99 | 1158810 1158939 100 | 1158939 1163735 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/46.tsv: -------------------------------------------------------------------------------- 1 | 0 8507 2 | 8507 12796 3 | 12796 15795 4 | 15795 27122 5 | 27122 36164 6 | 36164 37626 7 | 37626 48704 8 | 48704 53378 9 | 53378 53421 10 | 53421 59357 11 | 59357 65099 12 | 65099 83537 13 | 83537 91966 14 | 91966 106725 15 | 106725 114418 16 | 114418 123841 17 | 123841 138790 18 | 138790 143210 19 | 143210 163068 20 | 163068 174468 21 | 174468 191991 22 | 191991 210350 23 | 210350 228544 24 | 228544 249188 25 | 249188 269547 26 | 269547 289746 27 | 289746 305505 28 | 305505 326119 29 | 326119 346016 30 | 346016 364174 31 | 364174 382000 32 | 382000 401663 33 | 401663 422394 34 | 422394 442482 35 | 442482 459724 36 | 459724 479407 37 | 479407 500330 38 | 500330 519845 39 | 519845 537913 40 | 537913 558599 41 | 558599 574713 42 | 574713 594675 43 | 594675 612526 44 | 612526 631646 45 | 631646 651716 46 | 651716 671960 47 | 671960 690205 48 | 690205 710167 49 | 710167 729985 50 | 729985 750816 51 | 750816 769444 52 | 769444 789239 53 | 789239 799305 54 | 799305 809373 55 | 809373 823464 56 | 823464 838018 57 | 838018 852836 58 | 852836 860748 59 | 860748 872914 60 | 872914 885491 61 | 885491 887798 62 | 887798 893590 63 | 893590 905950 64 | 905950 925645 65 | 925645 938009 66 | 938009 950619 67 | 950619 957576 68 | 957576 971076 69 | 971076 983230 70 | 983230 994584 71 | 994584 999889 72 | 999889 1004272 73 | 1004272 1016566 74 | 1016566 1029340 75 | 1029340 1042276 76 | 1042276 1053604 77 | 1053604 1059750 78 | 1059750 1061126 79 | 1061126 1065472 80 | 1065472 1069762 81 | 1069762 1080129 82 | 1080129 1090210 83 | 1090210 1093334 84 | 1093334 1096153 85 | 1096153 1099846 86 | 1099846 1120381 87 | 1120381 1138469 88 | 1138469 1138469 89 | 1138469 1138469 90 | 1138469 1138469 91 | 1138469 1138469 92 | 1138469 1138469 93 | 1138469 1138469 94 | 1138469 1138469 95 | 1138469 1138469 96 | 1138469 1138469 97 | 1138469 1138469 98 | 1138469 1138469 99 | 1138469 1138469 100 | 1138469 1138469 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/5.tsv: -------------------------------------------------------------------------------- 1 | 0 16054 2 | 16054 36144 3 | 36144 56026 4 | 56026 72916 5 | 72916 92158 6 | 92158 110673 7 | 110673 130233 8 | 130233 150617 9 | 150617 171366 10 | 171366 191360 11 | 191360 211262 12 | 211262 232153 13 | 232153 251685 14 | 251685 271336 15 | 271336 290703 16 | 290703 310435 17 | 310435 329378 18 | 329378 348686 19 | 348686 368527 20 | 368527 388542 21 | 388542 407941 22 | 407941 426237 23 | 426237 446559 24 | 446559 464207 25 | 464207 483798 26 | 483798 501660 27 | 501660 522003 28 | 522003 542183 29 | 542183 561505 30 | 561505 581479 31 | 581479 601733 32 | 601733 621992 33 | 621992 641924 34 | 641924 659093 35 | 659093 674829 36 | 674829 695364 37 | 695364 714809 38 | 714809 735247 39 | 735247 755407 40 | 755407 775857 41 | 775857 788517 42 | 788517 808771 43 | 808771 828384 44 | 828384 848457 45 | 848457 868557 46 | 868557 884337 47 | 884337 905312 48 | 905312 925014 49 | 925014 942156 50 | 942156 962746 51 | 962746 983143 52 | 983143 1003578 53 | 1003578 1023401 54 | 1023401 1042591 55 | 1042591 1062961 56 | 1062961 1081026 57 | 1081026 1101325 58 | 1101325 1120927 59 | 1120927 1141081 60 | 1141081 1160756 61 | 1160756 1180711 62 | 1180711 1201191 63 | 1201191 1220819 64 | 1220819 1236160 65 | 1236160 1256201 66 | 1256201 1276622 67 | 1276622 1295967 68 | 1295967 1315644 69 | 1315644 1335516 70 | 1335516 1355324 71 | 1355324 1375330 72 | 1375330 1395490 73 | 1395490 1414932 74 | 1414932 1435148 75 | 1435148 1455086 76 | 1455086 1474900 77 | 1474900 1494206 78 | 1494206 1511219 79 | 1511219 1525688 80 | 1525688 1540356 81 | 1540356 1558513 82 | 1558513 1575712 83 | 1575712 1595673 84 | 1595673 1615470 85 | 1615470 1634771 86 | 1634771 1654842 87 | 1654842 1674615 88 | 1674615 1689881 89 | 1689881 1708041 90 | 1708041 1727884 91 | 1727884 1746824 92 | 1746824 1764804 93 | 1764804 1784406 94 | 1784406 1804567 95 | 1804567 1822869 96 | 1822869 1843169 97 | 1843169 1862511 98 | 1862511 1882606 99 | 1882606 1902880 100 | 1902880 1921578 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/6.tsv: -------------------------------------------------------------------------------- 1 | 0 15884 2 | 15884 29930 3 | 29930 48496 4 | 48496 63924 5 | 63924 82097 6 | 82097 101589 7 | 101589 121616 8 | 121616 137482 9 | 137482 157549 10 | 157549 177513 11 | 177513 197710 12 | 197710 217275 13 | 217275 237129 14 | 237129 256199 15 | 256199 275925 16 | 275925 295178 17 | 295178 314693 18 | 314693 334551 19 | 334551 354215 20 | 354215 374195 21 | 374195 392540 22 | 392540 410670 23 | 410670 430028 24 | 430028 448372 25 | 448372 467971 26 | 467971 486076 27 | 486076 506249 28 | 506249 526092 29 | 526092 542165 30 | 542165 561709 31 | 561709 580931 32 | 580931 601994 33 | 601994 621685 34 | 621685 640637 35 | 640637 659536 36 | 659536 676171 37 | 676171 696013 38 | 696013 713052 39 | 713052 733014 40 | 733014 752645 41 | 752645 771037 42 | 771037 786419 43 | 786419 804294 44 | 804294 824170 45 | 824170 842548 46 | 842548 862808 47 | 862808 878441 48 | 878441 898463 49 | 898463 917211 50 | 917211 936942 51 | 936942 956330 52 | 956330 976896 53 | 976896 996998 54 | 996998 1016920 55 | 1016920 1030741 56 | 1030741 1049672 57 | 1049672 1068940 58 | 1068940 1086035 59 | 1086035 1105694 60 | 1105694 1122839 61 | 1122839 1138370 62 | 1138370 1158438 63 | 1158438 1178333 64 | 1178333 1197349 65 | 1197349 1216013 66 | 1216013 1236109 67 | 1236109 1254439 68 | 1254439 1274261 69 | 1274261 1293097 70 | 1293097 1313010 71 | 1313010 1332568 72 | 1332568 1352994 73 | 1352994 1373570 74 | 1373570 1391020 75 | 1391020 1410939 76 | 1410939 1431171 77 | 1431171 1450423 78 | 1450423 1469881 79 | 1469881 1488738 80 | 1488738 1509215 81 | 1509215 1528654 82 | 1528654 1548167 83 | 1548167 1563984 84 | 1563984 1583363 85 | 1583363 1602901 86 | 1602901 1622410 87 | 1622410 1641426 88 | 1641426 1659511 89 | 1659511 1678358 90 | 1678358 1696851 91 | 1696851 1716652 92 | 1716652 1737010 93 | 1737010 1756791 94 | 1756791 1775101 95 | 1775101 1794956 96 | 1794956 1813793 97 | 1813793 1834479 98 | 1834479 1852401 99 | 1852401 1872489 100 | 1872489 1892082 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/7.tsv: -------------------------------------------------------------------------------- 1 | 0 18044 2 | 18044 38509 3 | 38509 58893 4 | 58893 78682 5 | 78682 98664 6 | 98664 118949 7 | 118949 136741 8 | 136741 152720 9 | 152720 170626 10 | 170626 190570 11 | 190570 210248 12 | 210248 229724 13 | 229724 249915 14 | 249915 267678 15 | 267678 286679 16 | 286679 306809 17 | 306809 323752 18 | 323752 343016 19 | 343016 361735 20 | 361735 381105 21 | 381105 401303 22 | 401303 419646 23 | 419646 439806 24 | 439806 459800 25 | 459800 479541 26 | 479541 499673 27 | 499673 517532 28 | 517532 533192 29 | 533192 548519 30 | 548519 568214 31 | 568214 585128 32 | 585128 605504 33 | 605504 623452 34 | 623452 643625 35 | 643625 664029 36 | 664029 684251 37 | 684251 704608 38 | 704608 724752 39 | 724752 745335 40 | 745335 764753 41 | 764753 783363 42 | 783363 804268 43 | 804268 823463 44 | 823463 841843 45 | 841843 861773 46 | 861773 881519 47 | 881519 901606 48 | 901606 921853 49 | 921853 942362 50 | 942362 962359 51 | 962359 982739 52 | 982739 1002786 53 | 1002786 1022867 54 | 1022867 1042619 55 | 1042619 1062351 56 | 1062351 1082859 57 | 1082859 1102022 58 | 1102022 1113811 59 | 1113811 1134060 60 | 1134060 1152351 61 | 1152351 1171101 62 | 1171101 1190984 63 | 1190984 1208745 64 | 1208745 1228751 65 | 1228751 1248489 66 | 1248489 1268684 67 | 1268684 1289319 68 | 1289319 1309550 69 | 1309550 1329816 70 | 1329816 1348052 71 | 1348052 1365467 72 | 1365467 1385558 73 | 1385558 1406103 74 | 1406103 1425916 75 | 1425916 1446842 76 | 1446842 1467334 77 | 1467334 1487597 78 | 1487597 1505256 79 | 1505256 1524413 80 | 1524413 1544390 81 | 1544390 1564461 82 | 1564461 1584264 83 | 1584264 1604115 84 | 1604115 1623171 85 | 1623171 1641418 86 | 1641418 1661556 87 | 1661556 1681335 88 | 1681335 1701041 89 | 1701041 1717031 90 | 1717031 1734129 91 | 1734129 1751738 92 | 1751738 1771364 93 | 1771364 1791998 94 | 1791998 1811962 95 | 1811962 1831663 96 | 1831663 1851793 97 | 1851793 1870347 98 | 1870347 1890600 99 | 1890600 1909365 100 | 1909365 1928593 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/8.tsv: -------------------------------------------------------------------------------- 1 | 0 20340 2 | 20340 40195 3 | 40195 51625 4 | 51625 71200 5 | 71200 91735 6 | 91735 111587 7 | 111587 129850 8 | 129850 145655 9 | 145655 163553 10 | 163553 182858 11 | 182858 199012 12 | 199012 219122 13 | 219122 235231 14 | 235231 252981 15 | 252981 272564 16 | 272564 292563 17 | 292563 312954 18 | 312954 332611 19 | 332611 352257 20 | 352257 370459 21 | 370459 388457 22 | 388457 404110 23 | 404110 422638 24 | 422638 442421 25 | 442421 459519 26 | 459519 479093 27 | 479093 499587 28 | 499587 519036 29 | 519036 537196 30 | 537196 556607 31 | 556607 575205 32 | 575205 594850 33 | 594850 615234 34 | 615234 634806 35 | 634806 654524 36 | 654524 674362 37 | 674362 694411 38 | 694411 713561 39 | 713561 731630 40 | 731630 751020 41 | 751020 771505 42 | 771505 787605 43 | 787605 807480 44 | 807480 827880 45 | 827880 842119 46 | 842119 862829 47 | 862829 882470 48 | 882470 900846 49 | 900846 920564 50 | 920564 939567 51 | 939567 958990 52 | 958990 979084 53 | 979084 998906 54 | 998906 1018601 55 | 1018601 1037059 56 | 1037059 1054316 57 | 1054316 1069649 58 | 1069649 1085494 59 | 1085494 1101490 60 | 1101490 1118932 61 | 1118932 1137786 62 | 1137786 1157485 63 | 1157485 1177981 64 | 1177981 1196863 65 | 1196863 1216805 66 | 1216805 1234619 67 | 1234619 1255427 68 | 1255427 1275805 69 | 1275805 1295495 70 | 1295495 1314783 71 | 1314783 1334340 72 | 1334340 1353144 73 | 1353144 1372208 74 | 1372208 1391842 75 | 1391842 1411753 76 | 1411753 1429667 77 | 1429667 1447553 78 | 1447553 1467785 79 | 1467785 1487088 80 | 1487088 1507352 81 | 1507352 1525987 82 | 1525987 1546126 83 | 1546126 1562063 84 | 1562063 1578162 85 | 1578162 1598311 86 | 1598311 1618268 87 | 1618268 1638513 88 | 1638513 1658231 89 | 1658231 1678453 90 | 1678453 1697647 91 | 1697647 1717366 92 | 1717366 1737254 93 | 1737254 1757386 94 | 1757386 1777386 95 | 1777386 1796361 96 | 1796361 1816094 97 | 1816094 1836568 98 | 1836568 1856214 99 | 1856214 1874327 100 | 1874327 1892979 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/9.tsv: -------------------------------------------------------------------------------- 1 | 0 20294 2 | 20294 40615 3 | 40615 60769 4 | 60769 81170 5 | 81170 100588 6 | 100588 120701 7 | 120701 140938 8 | 140938 159366 9 | 159366 179662 10 | 179662 200400 11 | 200400 220898 12 | 220898 241131 13 | 241131 260783 14 | 260783 280963 15 | 280963 301628 16 | 301628 320780 17 | 320780 338718 18 | 338718 358612 19 | 358612 378202 20 | 378202 398591 21 | 398591 418367 22 | 418367 438623 23 | 438623 458730 24 | 458730 476765 25 | 476765 494724 26 | 494724 511876 27 | 511876 532601 28 | 532601 552071 29 | 552071 572205 30 | 572205 592344 31 | 592344 612432 32 | 612432 629385 33 | 629385 648869 34 | 648869 665994 35 | 665994 685643 36 | 685643 705952 37 | 705952 724504 38 | 724504 744329 39 | 744329 764861 40 | 764861 780593 41 | 780593 800967 42 | 800967 821341 43 | 821341 841109 44 | 841109 861060 45 | 861060 881434 46 | 881434 900252 47 | 900252 915989 48 | 915989 935994 49 | 935994 956138 50 | 956138 976322 51 | 976322 995996 52 | 995996 1015687 53 | 1015687 1033059 54 | 1033059 1052056 55 | 1052056 1067305 56 | 1067305 1086949 57 | 1086949 1106611 58 | 1106611 1126558 59 | 1126558 1147179 60 | 1147179 1167377 61 | 1167377 1184353 62 | 1184353 1202374 63 | 1202374 1222987 64 | 1222987 1243073 65 | 1243073 1262933 66 | 1262933 1280587 67 | 1280587 1298407 68 | 1298407 1319065 69 | 1319065 1338937 70 | 1338937 1358085 71 | 1358085 1378348 72 | 1378348 1398228 73 | 1398228 1417195 74 | 1417195 1436889 75 | 1436889 1457201 76 | 1457201 1472704 77 | 1472704 1492204 78 | 1492204 1511790 79 | 1511790 1530265 80 | 1530265 1550365 81 | 1550365 1569025 82 | 1569025 1586272 83 | 1586272 1602738 84 | 1602738 1622285 85 | 1622285 1641852 86 | 1641852 1660021 87 | 1660021 1679797 88 | 1679797 1698857 89 | 1698857 1718549 90 | 1718549 1733458 91 | 1733458 1752958 92 | 1752958 1773449 93 | 1773449 1793827 94 | 1793827 1813887 95 | 1813887 1832519 96 | 1832519 1852576 97 | 1852576 1872774 98 | 1872774 1893036 99 | 1893036 1912870 100 | 1912870 1933254 101 | -------------------------------------------------------------------------------- /backend/search/clueweb/ranges/clueweb.tsv: -------------------------------------------------------------------------------- 1 | 0 1923448 2 | 1923448 3836041 3 | 3836041 5736280 4 | 5736280 7652530 5 | 7652530 9564325 6 | 9564325 11485903 7 | 11485903 13377985 8 | 13377985 15306578 9 | 15306578 17199557 10 | 17199557 19132811 11 | 19132811 21003297 12 | 21003297 22906879 13 | 22906879 24785211 14 | 24785211 26661648 15 | 26661648 28552464 16 | 28552464 30467313 17 | 30467313 32376877 18 | 32376877 34262856 19 | 34262856 36174481 20 | 36174481 38084673 21 | 38084673 39987529 22 | 39987529 41881826 23 | 41881826 43794304 24 | 43794304 45709433 25 | 45709433 47588547 26 | 47588547 49466020 27 | 49466020 51397223 28 | 51397223 53275791 29 | 53275791 55195288 30 | 55195288 57105968 31 | 57105968 59006323 32 | 59006323 60923464 33 | 60923464 62824529 34 | 62824529 64726447 35 | 64726447 66654269 36 | 66654269 68566506 37 | 68566506 70466577 38 | 70466577 72385316 39 | 72385316 74282882 40 | 74282882 76198069 41 | 76198069 78096959 42 | 78096959 79976749 43 | 79976749 81755300 44 | 81755300 83389603 45 | 83389603 84906451 46 | 84906451 86070186 47 | 86070186 87208655 48 | -------------------------------------------------------------------------------- /backend/search/clueweb/search.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("/home/tevinw/ragviz/backend") 3 | 4 | import requests 5 | import os 6 | from helpers.ClueWeb22Api import ClueWeb22Api 7 | from helpers.concurrent_fetch import fetch_all 8 | from helpers.range_dictionary import create_range_dictionary, query_range_dictionary 9 | from search.search import Search 10 | import concurrent.futures 11 | from threading import Lock 12 | 13 | class CluewebSearch(Search): 14 | def __init__(self): 15 | directory = f'{os.getenv("PROJECT_DIR")}/backend/search/clueweb/ranges/' 16 | self.range_dictionaries = {} 17 | 18 | for filename in os.listdir(directory): 19 | if filename.endswith('.tsv'): 20 | file_path = os.path.join(directory, filename) 21 | index = filename.split('.')[0] 22 | range_list = create_range_dictionary(file_path) 23 | self.range_dictionaries[index] = range_list 24 | self.lock = Lock() 25 | self.query_id = 0 26 | 27 | def get_search_results(self, embedding, k, query, snippet_object): 28 | with self.lock: 29 | jsonquery = {"Ls": 256, 30 | "query_id": self.query_id, 31 | "query": embedding, 32 | "k": k} 33 | self.query_id += 1 34 | 35 | urls = [ 36 | (0, f'http://{os.getenv("CLUEWEB_ADDR_0")}:{os.getenv("CLUEWEB_PORT_0")}'), 37 | (1, f'http://{os.getenv("CLUEWEB_ADDR_1")}:{os.getenv("CLUEWEB_PORT_1")}'), 38 | (2, f'http://{os.getenv("CLUEWEB_ADDR_2")}:{os.getenv("CLUEWEB_PORT_2")}'), 39 | (3, f'http://{os.getenv("CLUEWEB_ADDR_3")}:{os.getenv("CLUEWEB_PORT_3")}'), 40 | ] 41 | 42 | responses = fetch_all(urls, jsonquery) 43 | 44 | merged_indices = [] 45 | merged_distances = [] 46 | for response in responses: 47 | indices = [ind + response[0] * 21517546 for ind in response[1]['indices']] 48 | distances = response[1]['distances'] 49 | merged_indices.extend(indices) 50 | merged_distances.extend(distances) 51 | 52 | # Sort indices based on distances 53 | sorted_indices = [index for _, index in sorted(zip(merged_distances, merged_indices))] 54 | indices = sorted_indices[:k] 55 | 56 | results = [] 57 | def process_index(i): 58 | subfolder, index = query_range_dictionary(self.range_dictionaries['clueweb'], i) 59 | ranged = self.range_dictionaries[str(subfolder)] 60 | jsongz_id, doc_id = query_range_dictionary(ranged, index) 61 | 62 | jjsongz_id = str(jsongz_id).zfill(2) 63 | ddoc_id = str(doc_id).zfill(5) 64 | 65 | subfolder_id = str(subfolder).zfill(2) 66 | 67 | cweb_doc_id = f"clueweb22-en00{subfolder_id}-{jjsongz_id}-{ddoc_id}" 68 | path_clueweb = os.getenv("CLUEWEB_PATH") 69 | clueweb_api = ClueWeb22Api(cweb_doc_id, path_clueweb) 70 | 71 | clean_txt = eval(clueweb_api.get_clean_text()) 72 | title = clean_txt["Clean-Text"].split('\n')[0].replace("\n", "").replace("\t", "").replace("\r", "").replace("\'", "").replace("\"", "").strip() 73 | snippet = snippet_object.get_snippet(query, '\n'.join(clean_txt["Clean-Text"].split('\n')[1:])) 74 | 75 | return {"name": title, "url": clean_txt["URL"].replace("\n", ""), "snippet": snippet} 76 | 77 | # Use ThreadPoolExecutor for parallel processing 78 | results = [] 79 | with concurrent.futures.ThreadPoolExecutor() as executor: 80 | # Map tasks for each index 81 | futures = executor.map(process_index, indices) 82 | 83 | # Iterate over results and append them in the original order of indices 84 | for result in futures: 85 | results.append(result) 86 | 87 | return results 88 | -------------------------------------------------------------------------------- /backend/search/pile/ranges/pile.tsv: -------------------------------------------------------------------------------- 1 | 0 1748721 2 | 1748721 3495968 3 | 3495968 5243964 4 | 5243964 6992434 5 | 6992434 8738421 6 | 8738421 10486291 7 | 10486291 12233326 8 | 12233326 13981871 9 | 13981871 15728132 10 | 15728132 17479986 11 | 17479986 19227744 12 | 19227744 20973991 13 | 20973991 22722836 14 | 22722836 24469871 15 | 24469871 26218610 16 | 26218610 27968173 17 | 27968173 29715302 18 | 29715302 31463714 19 | 31463714 33213207 20 | 33213207 34962040 21 | 34962040 36710622 22 | 36710622 38459678 23 | 38459678 40205464 24 | 40205464 41951040 25 | 41951040 43699734 26 | 43699734 45447127 27 | 45447127 47194609 28 | 47194609 48942937 29 | 48942937 50690114 30 | 50690114 52441354 31 | -------------------------------------------------------------------------------- /backend/search/pile/search.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("/home/tevinw/ragviz/backend") 3 | 4 | import requests 5 | import os 6 | import time 7 | from helpers.concurrent_fetch import fetch_all 8 | from helpers.range_dictionary import create_range_dictionary, query_range_dictionary 9 | from search.search import Search 10 | import csv 11 | csv.field_size_limit(sys.maxsize) 12 | import concurrent.futures 13 | from threading import Lock 14 | 15 | class PileSearch(Search): 16 | def __init__(self): 17 | directory = f'{os.getenv("PROJECT_DIR")}/backend/search/pile/ranges/' 18 | self.range_dictionaries = {} 19 | 20 | for filename in os.listdir(directory): 21 | if filename.endswith('.tsv'): 22 | file_path = os.path.join(directory, filename) 23 | index = filename.split('.')[0] 24 | range_list = create_range_dictionary(file_path) 25 | self.range_dictionaries[index] = range_list 26 | self.lock = Lock() 27 | self.query_id = 0 28 | 29 | 30 | def get_search_results(self, embedding, k, query, snippet_object): 31 | with self.lock: 32 | jsonquery = {"Ls": 256, 33 | "query_id": self.query_id, 34 | "query": embedding, 35 | "k": k} 36 | self.query_id += 1 37 | 38 | urls = [ 39 | (0, f'http://{os.getenv("PILE_ADDR_0")}:{os.getenv("PILE_PORT_0")}'), 40 | (1, f'http://{os.getenv("PILE_ADDR_1")}:{os.getenv("PILE_PORT_1")}'), 41 | (2, f'http://{os.getenv("PILE_ADDR_2")}:{os.getenv("PILE_PORT_2")}'), 42 | (3, f'http://{os.getenv("PILE_ADDR_3")}:{os.getenv("PILE_PORT_3")}'), 43 | ] 44 | 45 | prefixes = [0, 13981871, 13981871 + 13986302, 13981871 + 13986302 + 12237291] 46 | 47 | start_time = time.perf_counter() 48 | responses = fetch_all(urls, jsonquery) 49 | 50 | merged_indices = [] 51 | merged_distances = [] 52 | for response in responses: 53 | indices = [ind + prefixes[response[0]] for ind in response[1]['indices']] 54 | distances = response[1]['distances'] 55 | merged_indices.extend(indices) 56 | merged_distances.extend(distances) 57 | 58 | # Sort indices based on distances 59 | sorted_indices = [index for _, index in sorted(zip(merged_distances, merged_indices))] 60 | indices = sorted_indices[:k] 61 | end_time = time.perf_counter() 62 | elapsed_time = end_time - start_time 63 | print(f"QUERY AND RERANK TIME: {elapsed_time} seconds") 64 | 65 | results = [] 66 | 67 | # Define a function to process each index 68 | def process_index(i): 69 | start_time = time.perf_counter() 70 | subfolder, index = query_range_dictionary(self.range_dictionaries['pile'], i) 71 | 72 | subfolder_id = str(subfolder).zfill(2) 73 | 74 | if index != 0: 75 | index -= 1 76 | 77 | pile_part = str((index) // 25000 + 1) 78 | 79 | line_number = index % 25000 + 1 80 | 81 | corpus_path = f'{os.getenv("PILE_PATH")}/{subfolder_id}/full_corpus_{subfolder_id}_part_{pile_part}.tsv' 82 | 83 | title = None 84 | snippet = None 85 | 86 | # Read the specific line from the TSV file 87 | with open(corpus_path, 'r', encoding='utf-8') as tsv_file: 88 | reader = csv.reader(tsv_file, delimiter='\t') 89 | for current_line, row in enumerate(reader): 90 | if current_line == line_number: 91 | title = row[1] # Assuming the first column is the title 92 | end_time = time.perf_counter() 93 | elapsed_time = end_time - start_time 94 | print(f"PILE FETCH DOCUMENT TIME: {elapsed_time} seconds") 95 | snippet = snippet_object.get_snippet(embedding, row[2]) 96 | break 97 | res = {"name": title, "url": "http://google.com", "snippet": snippet} 98 | end_time = time.perf_counter() 99 | return res 100 | 101 | results = [] 102 | with concurrent.futures.ThreadPoolExecutor() as executor: 103 | futures = executor.map(process_index, indices) 104 | for result in futures: 105 | results.append(result) 106 | 107 | return results -------------------------------------------------------------------------------- /backend/search/search.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append("/home/tevinw/ragviz/backend") 3 | from abc import ABC, abstractmethod 4 | 5 | class Search(ABC): 6 | @abstractmethod 7 | def get_search_results(self, embedding, k, query, snippet_object): 8 | pass -------------------------------------------------------------------------------- /backend/snippet/naive_first.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import time 3 | sys.path.append("/home/tevinw/ragviz/backend") 4 | 5 | from snippet.snippet import Snippet 6 | 7 | class NaiveFirstSnippet(Snippet): 8 | def __init__(self, tokenizer): 9 | self.tokenizer = tokenizer 10 | 11 | def get_snippet(self, query, article): 12 | start_time = time.perf_counter() 13 | tokens = self.tokenizer.tokenize(article) 14 | first_128_tokens = tokens[:128] 15 | first_128_tokens_string = self.tokenizer.convert_tokens_to_string(first_128_tokens) 16 | end_time = time.perf_counter() 17 | elapsed_time = end_time - start_time 18 | print(f"NAIVE FIRST SNIPPET TIME: {elapsed_time}") 19 | return first_128_tokens_string -------------------------------------------------------------------------------- /backend/snippet/sliding_window.py: -------------------------------------------------------------------------------- 1 | 2 | import sys 3 | sys.path.append("/home/tevinw/ragviz/backend") 4 | 5 | from snippet.snippet import Snippet 6 | import torch 7 | import time 8 | 9 | class SlidingWindowSnippet(Snippet): 10 | def __init__(self, tokenizer, model, stride, window_size): 11 | self.tokenizer = tokenizer 12 | self.model = model 13 | self.stride = stride 14 | self.window_size = window_size 15 | 16 | def get_snippet(self, query, article): 17 | start_time = time.perf_counter() 18 | tokens = self.tokenizer.tokenize(article) 19 | input_ids = self.tokenizer(article, return_tensors="pt").input_ids 20 | decoder_input_ids = input_ids.detach().clone() 21 | 22 | best_tokens = [] 23 | best_sim = -torch.inf 24 | 25 | for i in range(0, len(input_ids[0]), self.stride): 26 | cur_input_ids = input_ids[:, i:i+self.window_size] 27 | cur_decoder_input_ids = decoder_input_ids[:, i:i+self.window_size] 28 | 29 | with torch.no_grad(): 30 | outputs = self.model(input_ids=cur_input_ids, decoder_input_ids=cur_decoder_input_ids) 31 | 32 | embeddings = outputs.last_hidden_state 33 | 34 | snippet_embedding = embeddings[0,0] 35 | 36 | query_tensor = torch.tensor(query) 37 | 38 | sim = float(torch.dot(torch.nn.functional.normalize(query_tensor, dim=0), torch.nn.functional.normalize(snippet_embedding, dim=0))) 39 | if i == 0: 40 | print(f"NAIVE FIRST SIMILARITY: {sim}") 41 | if sim > best_sim: 42 | best_sim = sim 43 | best_tokens = tokens[i:i+self.window_size] 44 | print(f"SLIDING WINDOW SIMILARITY: {best_sim}") 45 | res = self.tokenizer.convert_tokens_to_string(best_tokens) 46 | end_time = time.perf_counter() 47 | elapsed_time = end_time - start_time 48 | print(f"SLIDING WINDOW SNIPPET TIME: {elapsed_time} seconds") 49 | return res -------------------------------------------------------------------------------- /backend/snippet/snippet.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | 3 | class Snippet(ABC): 4 | @abstractmethod 5 | def get_snippet(self, query, article): 6 | pass -------------------------------------------------------------------------------- /frontend/.env.local.example: -------------------------------------------------------------------------------- 1 | NEXT_PUBLIC_BASE_PATH= -------------------------------------------------------------------------------- /frontend/.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["unused-imports"], 3 | "extends": ["next/core-web-vitals", "plugin:prettier/recommended"], 4 | "rules": { 5 | "unused-imports/no-unused-imports": "error" 6 | } 7 | } 8 | -------------------------------------------------------------------------------- /frontend/next-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | /// 3 | 4 | // NOTE: This file should not be edited 5 | // see https://nextjs.org/docs/basic-features/typescript for more information. 6 | -------------------------------------------------------------------------------- /frontend/next.config.mjs: -------------------------------------------------------------------------------- 1 | export default (phase, { defaultConfig }) => { 2 | const env = process.env.NODE_ENV; 3 | /** 4 | * @type {import("next").NextConfig} 5 | */ 6 | if (env === "production") { 7 | return { 8 | output: "export", 9 | assetPrefix: `${process.env.NEXT_PUBLIC_BASE_PATH}/ui/`, 10 | basePath: `${process.env.NEXT_PUBLIC_BASE_PATH}/ui`, 11 | distDir: "../ui", 12 | }; 13 | } else { 14 | return { 15 | async rewrites() { 16 | return [ 17 | { 18 | source: "/query", 19 | destination: "http://localhost:8080/query", // Proxy to Backend 20 | }, 21 | ]; 22 | }, 23 | }; 24 | } 25 | }; 26 | -------------------------------------------------------------------------------- /frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "search", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@next/third-parties": "^14.0.4", 13 | "@radix-ui/react-popover": "^1.0.7", 14 | "@tailwindcss/forms": "^0.5.7", 15 | "@upstash/ratelimit": "^1.0.0", 16 | "@vercel/kv": "^1.0.1", 17 | "clsx": "^2.1.0", 18 | "headlessui": "^0.0.0", 19 | "lucide-react": "^0.309.0", 20 | "mdast-util-from-markdown": "^2.0.0", 21 | "nanoid": "^5.0.4", 22 | "next": "14.0.4", 23 | "react": "^18", 24 | "react-dom": "^18", 25 | "react-markdown": "^9.0.1", 26 | "tailwind-merge": "^2.2.0", 27 | "unist-builder": "^4.0.0" 28 | }, 29 | "devDependencies": { 30 | "@tailwindcss/typography": "^0.5.10", 31 | "@types/node": "^20", 32 | "@types/react": "^18", 33 | "@types/react-dom": "^18", 34 | "autoprefixer": "^10.0.1", 35 | "eslint": "^8", 36 | "eslint-config-next": "14.0.4", 37 | "eslint-config-prettier": "^9.0.0", 38 | "eslint-plugin-prettier": "^5.0.1", 39 | "eslint-plugin-unused-imports": "^3.0.0", 40 | "postcss": "^8", 41 | "prettier": "^3.1.0", 42 | "tailwindcss": "^3.3.0", 43 | "typescript": "^5" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /frontend/postcss.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | }; 7 | -------------------------------------------------------------------------------- /frontend/public/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/android-chrome-192x192.png -------------------------------------------------------------------------------- /frontend/public/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/android-chrome-512x512.png -------------------------------------------------------------------------------- /frontend/public/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/apple-touch-icon.png -------------------------------------------------------------------------------- /frontend/public/bg.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /frontend/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/favicon-16x16.png -------------------------------------------------------------------------------- /frontend/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/favicon-32x32.png -------------------------------------------------------------------------------- /frontend/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/favicon.ico -------------------------------------------------------------------------------- /frontend/public/ragviz-square.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/ragviz-square.png -------------------------------------------------------------------------------- /frontend/public/site.webmanifest: -------------------------------------------------------------------------------- 1 | { 2 | "name": "", 3 | "short_name": "", 4 | "icons": [ 5 | { 6 | "src": "/android-chrome-192x192.png", 7 | "sizes": "192x192", 8 | "type": "image/png" 9 | }, 10 | { 11 | "src": "/android-chrome-512x512.png", 12 | "sizes": "512x512", 13 | "type": "image/png" 14 | } 15 | ], 16 | "theme_color": "#ffffff", 17 | "background_color": "#ffffff", 18 | "display": "standalone" 19 | } 20 | -------------------------------------------------------------------------------- /frontend/src/app/components/answer.tsx: -------------------------------------------------------------------------------- 1 | import { Skeleton } from "@/app/components/skeleton"; 2 | import { Wrapper } from "@/app/components/wrapper"; 3 | import { Source } from "@/app/interfaces/source"; 4 | import { BookOpenText } from "lucide-react"; 5 | import { Dispatch, FC, SetStateAction } from "react"; 6 | 7 | export const Token: FC<{ 8 | markdown: string; 9 | index: number; 10 | token: number[] | null; 11 | setToken: Dispatch>; 12 | clicked: number[] | null; 13 | setClicked: Dispatch>; 14 | }> = ({ markdown, index, token, setToken, clicked, setClicked }) => { 15 | const handleDragStart = (event: React.DragEvent) => {}; 16 | 17 | const handleDragOver = (event: React.DragEvent) => { 18 | event.preventDefault(); 19 | if (!token?.includes(index)) { 20 | const newTokens = token ? [...token, index] : [index]; 21 | setClicked(newTokens); 22 | setToken(newTokens); 23 | } 24 | }; 25 | 26 | const handleMouseEnter = () => { 27 | setToken([index]); 28 | }; 29 | 30 | const className = token?.includes(index) 31 | ? "bg-red-300 hover:cursor-pointer" 32 | : "hover:bg-red-200 hover:cursor-pointer"; 33 | 34 | const handleMouseLeave = () => { 35 | setToken(clicked); 36 | }; 37 | return ( 38 | 46 | {markdown} 47 | 48 | ); 49 | }; 50 | export const Answer: FC<{ 51 | markdown: string[]; 52 | compare: boolean; 53 | markdownCompare: string[]; 54 | sources: Source[]; 55 | token: number[] | null; 56 | tokenLength: number; 57 | setToken: Dispatch>; 58 | clicked: number[] | null; 59 | setClicked: Dispatch>; 60 | }> = ({ 61 | markdown, 62 | compare, 63 | markdownCompare, 64 | sources, 65 | token, 66 | tokenLength, 67 | setToken, 68 | clicked, 69 | setClicked, 70 | }) => { 71 | return ( 72 | 75 | Answer 76 | 77 | } 78 | content={ 79 | markdown && markdown.length > 0 && compare ? ( 80 |
81 |
82 |

83 | Generation using all retrieved documents 84 |

85 | {markdown.map((str, index) => ( 86 | 95 | ))} 96 |
97 | {markdownCompare && markdownCompare.length > 0 ? ( 98 |
99 |

100 | Generation on selected documents and tokens 101 |

102 | {markdownCompare.map((str, index) => ( 103 | 112 | ))} 113 |
114 | ) : ( 115 |
116 | 117 | 118 | 119 | 120 | 121 |
122 | )} 123 |
124 | ) : markdown && markdown.length > 0 ? ( 125 |
126 |

Generation on all retrieved documents

127 | {markdown.map((str, index) => ( 128 | 137 | ))} 138 |
139 | ) : ( 140 |
141 | 142 | 143 | 144 | 145 | 146 |
147 | ) 148 | } 149 | >
150 | ); 151 | }; 152 | -------------------------------------------------------------------------------- /frontend/src/app/components/footer.tsx: -------------------------------------------------------------------------------- 1 | import { FC } from "react"; 2 | 3 | export const Footer: FC = () => { 4 | return ( 5 |
6 |
7 | Answer generated by large language models, plz double check for 8 | correctness. UI adapted from Lepton. 9 |
10 |
11 | ); 12 | }; 13 | -------------------------------------------------------------------------------- /frontend/src/app/components/logo.tsx: -------------------------------------------------------------------------------- 1 | import React, { FC } from "react"; 2 | 3 | export const Logo: FC = () => { 4 | return ( 5 |
6 |
7 | 8 |
9 |
10 | RAGViz 11 |
12 |
13 | alpha 14 |
15 |
16 | ); 17 | }; 18 | -------------------------------------------------------------------------------- /frontend/src/app/components/popover.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import * as React from "react"; 4 | import * as PopoverPrimitive from "@radix-ui/react-popover"; 5 | 6 | import { cn } from "@/app/utils/cn"; 7 | 8 | const Popover = PopoverPrimitive.Root; 9 | 10 | const PopoverTrigger = PopoverPrimitive.Trigger; 11 | 12 | const PopoverContent = React.forwardRef< 13 | React.ElementRef, 14 | React.ComponentPropsWithoutRef 15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => ( 16 | 17 | 27 | 28 | )); 29 | PopoverContent.displayName = PopoverPrimitive.Content.displayName; 30 | 31 | export { Popover, PopoverTrigger, PopoverContent }; 32 | -------------------------------------------------------------------------------- /frontend/src/app/components/preset-query.tsx: -------------------------------------------------------------------------------- 1 | import { getSearchUrl } from "@/app/utils/get-search-url"; 2 | import { nanoid } from "nanoid"; 3 | import Link from "next/link"; 4 | import React, { FC, useMemo } from "react"; 5 | 6 | export const PresetQuery: FC<{ query: string }> = ({ query }) => { 7 | const rid = useMemo(() => nanoid(), [query]); 8 | const k = "5"; 9 | const apiKey = ""; 10 | const snippet = "first"; 11 | 12 | return ( 13 | 19 | {query} 20 | 21 | ); 22 | }; 23 | -------------------------------------------------------------------------------- /frontend/src/app/components/relates.tsx: -------------------------------------------------------------------------------- 1 | import { PresetQuery } from "@/app/components/preset-query"; 2 | import { Skeleton } from "@/app/components/skeleton"; 3 | import { Wrapper } from "@/app/components/wrapper"; 4 | import { Relate } from "@/app/interfaces/relate"; 5 | import { MessageSquareQuote } from "lucide-react"; 6 | import React, { FC } from "react"; 7 | 8 | export const Relates: FC<{ relates: Relate[] | null }> = ({ relates }) => { 9 | return ( 10 | 13 | Related 14 | 15 | } 16 | content={ 17 |
18 | {relates !== null ? ( 19 | relates.length > 0 ? ( 20 | relates.map(({ question }) => ( 21 | 22 | )) 23 | ) : ( 24 |
No related questions.
25 | ) 26 | ) : ( 27 | <> 28 | 29 | 30 | 31 | 32 | )} 33 |
34 | } 35 | >
36 | ); 37 | }; 38 | -------------------------------------------------------------------------------- /frontend/src/app/components/result.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { Answer } from "@/app/components/answer"; 3 | // import { Relates } from "@/app/components/relates"; 4 | import { Sources } from "@/app/components/sources"; 5 | import { Relate } from "@/app/interfaces/relate"; 6 | import { Source } from "@/app/interfaces/source"; 7 | import { parseRAG } from "@/app/utils/parse-rag"; 8 | import { Annoyed } from "lucide-react"; 9 | import { FC, useEffect, useState } from "react"; 10 | import { parseRewrite } from "../utils/parse-rewrite"; 11 | 12 | export const Result: FC<{ 13 | query: string; 14 | rid: string; 15 | k: string; 16 | apiKey: string; 17 | snippet: string; 18 | modal: boolean; 19 | setModal: any; 20 | }> = ({ query, rid, k, apiKey, snippet, modal, setModal }) => { 21 | const [sources, setSources] = useState([]); 22 | const [token, setToken] = useState(null); 23 | const [clicked, setClicked] = useState(null); 24 | const [tokenLength, setTokenLength] = useState(0); 25 | const [markdown, setMarkdown] = useState([]); 26 | const [markdownCompare, setMarkdownCompare] = useState([]); 27 | const [compare, setCompare] = useState(false); 28 | const [relates, setRelates] = useState(null); 29 | const [error, setError] = useState(null); 30 | useEffect(() => { 31 | const controller = new AbortController(); 32 | void parseRAG( 33 | controller, 34 | query, 35 | rid, 36 | k, 37 | apiKey, 38 | snippet, 39 | setSources, 40 | setMarkdown, 41 | setTokenLength, 42 | setError, 43 | ); 44 | return () => { 45 | controller.abort(); 46 | }; 47 | }, [query]); 48 | 49 | const rewrite = (keep: boolean[][][]) => { 50 | const controller = new AbortController(); 51 | setCompare(true); 52 | setMarkdownCompare([]); 53 | setToken([]); 54 | void parseRewrite( 55 | controller, 56 | query, 57 | sources, 58 | keep, 59 | rid, 60 | k, 61 | tokenLength, 62 | apiKey, 63 | snippet, 64 | setSources, 65 | setMarkdown, 66 | setMarkdownCompare, 67 | setError, 68 | ); 69 | }; 70 | 71 | return ( 72 |
73 | 84 | 85 | {error && ( 86 |
87 |
88 | 89 | {error === 429 90 | ? "Sorry, you have made too many requests recently, try again later." 91 | : "Sorry, we might be overloaded, try again later."} 92 |
93 |
94 | )} 95 | {modal && ( 96 |
97 |
98 |

99 | 1. Hover over tokens in the Answer section to see the attention 100 | visualization for that generated token. If you want to lock that 101 | visualization or visualize multiple tokens, simply drag. 102 |

103 | 104 |

105 | 2. Remove documents by clicking the buttons, and remove tokens by 106 | dragging. 107 |

108 | 109 |

110 | 3. Press "Save and Rewrite" to see the new generation. 111 |

112 | 113 |
setModal(false)} 116 | > 117 | Close Instructions 118 |
119 |
120 |
121 | )} 122 |
123 | ); 124 | }; 125 | -------------------------------------------------------------------------------- /frontend/src/app/components/search.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { getSearchUrl } from "@/app/utils/get-search-url"; 3 | import { ArrowRight } from "lucide-react"; 4 | import { nanoid } from "nanoid"; 5 | import { useRouter } from "next/navigation"; 6 | import React, { FC, useState } from "react"; 7 | 8 | export const Search: FC<{ 9 | defK: string; 10 | apiKey: string; 11 | defSnippet: string; 12 | }> = ({ defK, apiKey, defSnippet }) => { 13 | const [value, setValue] = useState(""); 14 | const [k, setK] = useState(defK); 15 | const [snippet, setSnippet] = useState(defSnippet); 16 | const router = useRouter(); 17 | return ( 18 |
{ 20 | e.preventDefault(); 21 | if (value) { 22 | setValue(""); 23 | router.push( 24 | getSearchUrl( 25 | encodeURIComponent(value), 26 | nanoid(), 27 | encodeURIComponent(k), 28 | encodeURIComponent(apiKey), 29 | encodeURIComponent(snippet), 30 | ), 31 | ); 32 | } 33 | }} 34 | > 35 |
36 |
37 |

Snippet type:

38 | 46 |
47 | 78 |
79 | ); 80 | }; 81 | -------------------------------------------------------------------------------- /frontend/src/app/components/skeleton.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/app/utils/cn"; 2 | import { HTMLAttributes } from "react"; 3 | 4 | function Skeleton({ className, ...props }: HTMLAttributes) { 5 | return ( 6 |
10 | ); 11 | } 12 | 13 | export { Skeleton }; 14 | -------------------------------------------------------------------------------- /frontend/src/app/components/sources.tsx: -------------------------------------------------------------------------------- 1 | import { Skeleton } from "@/app/components/skeleton"; 2 | import { Wrapper } from "@/app/components/wrapper"; 3 | import { Source } from "@/app/interfaces/source"; 4 | import { BookText } from "lucide-react"; 5 | import { Dispatch, FC, SetStateAction, useEffect, useState } from "react"; 6 | 7 | const SourceItem: FC<{ 8 | source: Source; 9 | index: number; 10 | token: number[] | null; 11 | keep: boolean[][][]; 12 | setKeep: Dispatch>; 13 | }> = ({ source, index, token, keep, setKeep }) => { 14 | const { id, url, nameTokens, snippetTokens, attn } = source; 15 | const domain = new URL(url).hostname; 16 | const highlight = (score: number | null) => 17 | score == null || score < 0.1 18 | ? "" 19 | : score >= 0.1 && score <= 0.2 20 | ? "bg-red-100" 21 | : score > 0.2 && score <= 0.4 22 | ? "bg-red-200" 23 | : score > 0.4 && score <= 0.6 24 | ? "bg-red-300" 25 | : score > 0.6 && score <= 0.8 26 | ? "bg-red-400" 27 | : "bg-red-500"; // Default shade 28 | const handleRemoveDocument = (set: boolean) => () => { 29 | const newKeep = [...keep]; 30 | newKeep[index] = [ 31 | newKeep[index][0].map(() => set), 32 | newKeep[index][1].map(() => set), 33 | ]; 34 | setKeep(newKeep); 35 | }; 36 | 37 | const handleDragStart = 38 | (tokenIndex: number, name: boolean) => 39 | (event: React.DragEvent) => { 40 | // event.preventDefault(); // Prevent text selection 41 | // event.dataTransfer.setData("text/plain", String(index)); 42 | // if (!event.target.classList.contains("allow-drag")) { 43 | // event.target.classList.add("select-none"); 44 | // event.preventDefault(); 45 | // } 46 | // event.dataTransfer.dropEffect = "move"; 47 | const newKeep = [...keep]; 48 | console.log(tokenIndex); 49 | const nameSnippet = name ? 0 : 1; 50 | newKeep[index][nameSnippet][tokenIndex] = false; 51 | setKeep(newKeep); 52 | }; 53 | const handleDrop = 54 | (tokenIndex: number, name: boolean) => 55 | (event: React.DragEvent) => { 56 | event.preventDefault(); 57 | }; 58 | 59 | const handleDragKeep = 60 | (tokenIndex: number, name: boolean) => 61 | (event: React.DragEvent) => { 62 | event.preventDefault(); 63 | event.dataTransfer.setData("text/plain", String(index)); 64 | const newKeep = [...keep]; 65 | console.log(tokenIndex); 66 | const nameSnippet = name ? 0 : 1; 67 | newKeep[index][nameSnippet][tokenIndex] = false; 68 | setKeep(newKeep); 69 | }; 70 | 71 | return ( 72 |
73 |
74 |

Attention score:

75 |

76 | {token == null || 77 | !token.map((t) => attn[t]) || 78 | token.length <= 0 || 79 | attn[token[0]] == null 80 | ? "None" 81 | : ( 82 | Math.round( 83 | token 84 | .map((t) => attn[t].score) 85 | .reduce((acc, curr) => acc + curr) * 1000, 86 | ) / 87 | (1000 * token.length) 88 | ).toFixed(3)} 89 |

90 |
91 |
95 | Add All Tokens 96 |
97 |
101 | Remove All Tokens 102 |
103 |
104 |
105 |
108 | {/* */} 109 |
110 | {nameTokens.map((str, tokenIndex) => ( 111 | attn[t]) && 120 | token.length > 0 && 121 | attn[token[0]] != null 122 | ? token 123 | .map((t) => attn[t].name[tokenIndex] * 1000) 124 | .reduce((acc, curr) => acc + curr) / token.length 125 | : null, 126 | )} ${keep[index] && keep[index][0][tokenIndex] ? "opacity-100" : "opacity-50"} cursor-pointer`} 127 | > 128 | {str} 129 | 130 | ))} 131 |
132 | 133 |
134 | {snippetTokens.map((str, tokenIndex) => ( 135 | attn[t]) && 144 | token.length > 0 && 145 | attn[token[0]] != null 146 | ? token 147 | .map((t) => attn[t].snippet[tokenIndex] * 1000) 148 | .reduce((acc, curr) => acc + curr) / token.length 149 | : null, 150 | )} ${keep[index] && keep[index][1][tokenIndex] ? "opacity-100" : "opacity-50"} cursor-pointer`} 151 | > 152 | {str} 153 | 154 | ))} 155 |
156 | 157 | {/*
158 |
159 |
160 | {index + 1} - {domain} 161 |
162 |
163 |
164 | {domain} 169 |
170 |
*/} 171 |
172 |
173 | ); 174 | }; 175 | 176 | function deepCopyArray(arr: any): any { 177 | if (!Array.isArray(arr)) { 178 | return arr; // If it's not an array, return the value 179 | } 180 | 181 | return arr.map((element) => deepCopyArray(element)); // Recursively copy each element 182 | } 183 | 184 | export const Sources: FC<{ 185 | sources: Source[]; 186 | token: number[] | null; 187 | rewrite: (keep: boolean[][][]) => void; 188 | }> = ({ sources, token, rewrite }) => { 189 | const [keep, setKeep]: [ 190 | boolean[][][], 191 | Dispatch>, 192 | ] = useState([] as boolean[][][]); 193 | const [oldKeep, setOldKeep]: [ 194 | boolean[][][], 195 | Dispatch>, 196 | ] = useState([] as boolean[][][]); 197 | 198 | const handleSave = () => { 199 | setOldKeep(deepCopyArray(keep)); 200 | rewrite(keep); 201 | }; 202 | 203 | const handleCancel = () => { 204 | // const newKeep = [...oldKeep]; 205 | setKeep(deepCopyArray(oldKeep)); 206 | }; 207 | 208 | useEffect(() => { 209 | // This code will run only once, immediately after the component mounts 210 | setKeep( 211 | sources.map((source) => [ 212 | source.nameTokens.map(() => true), 213 | source.snippetTokens.map(() => true), 214 | ]), 215 | ); 216 | setOldKeep( 217 | sources.map((source) => [ 218 | source.nameTokens.map(() => true), 219 | source.snippetTokens.map(() => true), 220 | ]), 221 | ); 222 | }, [sources.length]); 223 | return ( 224 | 227 |
228 | Sources 229 | {!(JSON.stringify(oldKeep) === JSON.stringify(keep)) && ( 230 | <> 231 |
235 | Save and Rewrite 236 |
237 |
241 | Cancel 242 |
243 | 244 | )} 245 |
246 | 247 | } 248 | content={ 249 |
250 | {sources.length > 0 ? ( 251 | sources.map((item, index) => ( 252 | 260 | )) 261 | ) : ( 262 | <> 263 | 264 | 265 | 266 | 267 | 268 | )} 269 |
270 | } 271 | >
272 | ); 273 | }; 274 | -------------------------------------------------------------------------------- /frontend/src/app/components/title.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { RefreshCcw } from "lucide-react"; 3 | import { nanoid } from "nanoid"; 4 | // import { getSearchUrl } from "@/app/utils/get-search-url"; 5 | // import { RefreshCcw } from "lucide-react"; 6 | // import { nanoid } from "nanoid"; 7 | import { useRouter } from "next/navigation"; 8 | import { getSearchUrl } from "../utils/get-search-url"; 9 | import { BookText } from "lucide-react"; 10 | 11 | export const Title = ({ 12 | query, 13 | k, 14 | apiKey, 15 | snippet, 16 | setModal, 17 | }: { 18 | query: string; 19 | k: string; 20 | apiKey: string; 21 | snippet: string; 22 | setModal: any; 23 | }) => { 24 | const router = useRouter(); 25 | return ( 26 |
27 |
31 | {query} 32 |
33 | 40 |
41 | 58 |
59 |
60 | ); 61 | }; 62 | -------------------------------------------------------------------------------- /frontend/src/app/components/wrapper.tsx: -------------------------------------------------------------------------------- 1 | import { FC, ReactNode } from "react"; 2 | 3 | export const Wrapper: FC<{ 4 | title: ReactNode; 5 | content: ReactNode; 6 | }> = ({ title, content }) => { 7 | return ( 8 |
9 |
{title}
10 | {content} 11 |
12 | ); 13 | }; 14 | -------------------------------------------------------------------------------- /frontend/src/app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | input:-webkit-autofill, 6 | input:-webkit-autofill:hover, 7 | input:-webkit-autofill:focus, 8 | textarea:-webkit-autofill, 9 | textarea:-webkit-autofill:hover, 10 | textarea:-webkit-autofill:focus, 11 | select:-webkit-autofill, 12 | select:-webkit-autofill:hover, 13 | select:-webkit-autofill:focus { 14 | -webkit-background-clip: text; 15 | } 16 | -------------------------------------------------------------------------------- /frontend/src/app/interfaces/relate.ts: -------------------------------------------------------------------------------- 1 | export interface Relate { 2 | question: string; 3 | } 4 | -------------------------------------------------------------------------------- /frontend/src/app/interfaces/source.ts: -------------------------------------------------------------------------------- 1 | export interface Source { 2 | id: string; 3 | name: string; 4 | url: string; 5 | isFamilyFriendly: boolean; 6 | displayUrl: string; 7 | snippet: string; 8 | deepLinks: { snippet: string; name: string; url: string }[]; 9 | dateLastCrawled: string; 10 | cachedPageUrl: string; 11 | language: string; 12 | primaryImageOfPage?: { 13 | thumbnailUrl: string; 14 | width: number; 15 | height: number; 16 | imageId: string; 17 | }; 18 | attn: { 19 | name: number[]; 20 | snippet: number[]; 21 | score: number; 22 | }[]; 23 | isNavigational: boolean; 24 | nameTokens: string[]; 25 | snippetTokens: string[]; 26 | } 27 | -------------------------------------------------------------------------------- /frontend/src/app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import { Inter } from "next/font/google"; 3 | import "./globals.css"; 4 | import { ReactNode } from "react"; 5 | 6 | const inter = Inter({ subsets: ["latin"] }); 7 | 8 | export const metadata: Metadata = { 9 | title: "RAGViz", 10 | description: 11 | "Answer generated by large language models (LLMs). Double check for correctness.", 12 | }; 13 | 14 | export default function RootLayout({ children }: { children: ReactNode }) { 15 | return ( 16 | 17 | 18 | {/* Adding the favicon */} 19 | RAGViz {/* Using metadata title */} 20 | {" "} 24 | {/* Using metadata description */} 25 | 26 | {children} 27 | 28 | ); 29 | } 30 | -------------------------------------------------------------------------------- /frontend/src/app/page.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { Footer } from "@/app/components/footer"; 3 | import { Logo } from "@/app/components/logo"; 4 | // import { PresetQuery } from "@/app/components/preset-query"; 5 | import { Search } from "@/app/components/search"; 6 | import React, { useState } from "react"; 7 | 8 | export default function Home() { 9 | const [apiKey, setApiKey] = useState(""); 10 | return ( 11 |
12 |
13 | 14 | 28 | 29 | 30 |
31 |
32 |
33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /frontend/src/app/search/page.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | import { Result } from "@/app/components/result"; 3 | import { Search } from "@/app/components/search"; 4 | import { Title } from "@/app/components/title"; 5 | import { useSearchParams } from "next/navigation"; 6 | import { useState } from "react"; 7 | 8 | export default function SearchPage() { 9 | const searchParams = useSearchParams(); 10 | const query = decodeURIComponent(searchParams.get("q") || ""); 11 | const rid = decodeURIComponent(searchParams.get("rid") || ""); 12 | const k = decodeURIComponent(searchParams.get("k") || "5"); 13 | const apiKey = decodeURIComponent(searchParams.get("api_key") || ""); 14 | const snippet = decodeURIComponent(searchParams.get("snippet") || "first"); 15 | const [modal, setModal] = useState(false); 16 | return ( 17 |
18 |
19 |
20 |
21 | 28 | 38 |
39 |
40 |
41 |
42 | 43 |
44 |
45 |
46 |
47 | ); 48 | } 49 | -------------------------------------------------------------------------------- /frontend/src/app/utils/cn.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx"; 2 | import { twMerge } from "tailwind-merge"; 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)); 6 | } 7 | -------------------------------------------------------------------------------- /frontend/src/app/utils/fetch-stream.ts: -------------------------------------------------------------------------------- 1 | async function pump( 2 | reader: ReadableStreamDefaultReader, 3 | controller: ReadableStreamDefaultController, 4 | onChunk?: (chunk: Uint8Array) => void, 5 | onDone?: () => void, 6 | ): Promise | undefined> { 7 | const { done, value } = await reader.read(); 8 | if (done) { 9 | onDone && onDone(); 10 | controller.close(); 11 | return; 12 | } 13 | onChunk && onChunk(value); 14 | controller.enqueue(value); 15 | return pump(reader, controller, onChunk, onDone); 16 | } 17 | export const fetchStream = ( 18 | response: Response, 19 | onChunk?: (chunk: Uint8Array) => void, 20 | onDone?: () => void, 21 | ): ReadableStream => { 22 | const reader = response.body!.getReader(); 23 | return new ReadableStream({ 24 | start: (controller) => pump(reader, controller, onChunk, onDone), 25 | }); 26 | }; 27 | -------------------------------------------------------------------------------- /frontend/src/app/utils/get-search-url.ts: -------------------------------------------------------------------------------- 1 | export const getSearchUrl = ( 2 | query: string, 3 | search_uuid: string, 4 | k: string, 5 | apiKey: string, 6 | snippet: string, 7 | ) => { 8 | const prefix = 9 | process.env.NODE_ENV === "production" ? "/search.html" : "/search"; 10 | return `${prefix}?q=${encodeURIComponent(query)}&rid=${search_uuid}&k=${k}&api_key=${encodeURIComponent(apiKey)}&snippet=${snippet}`; 11 | }; 12 | -------------------------------------------------------------------------------- /frontend/src/app/utils/parse-rag.ts: -------------------------------------------------------------------------------- 1 | import { Source } from "@/app/interfaces/source"; 2 | 3 | export const parseRAG = async ( 4 | controller: AbortController, 5 | query: string, 6 | search_uuid: string, 7 | k: string, 8 | apiKey: string, 9 | snippet: string, 10 | onSources: (value: Source[]) => void, 11 | onMarkdown: (value: string[]) => void, 12 | onTokenLength: (value: number) => void, 13 | onError?: (status: number) => void, 14 | ) => { 15 | const url = `${process.env.NEXT_PUBLIC_BASE_PATH}/query.cgi?query=${encodeURIComponent(query)}&search_uuid=${encodeURIComponent(search_uuid)}&k=${encodeURIComponent(k)}&api_key=${encodeURIComponent(apiKey)}&snippet=${encodeURIComponent(snippet)}`; 16 | 17 | const response = await fetch(url, { 18 | method: "GET", 19 | headers: { 20 | "Content-Type": "application/json", 21 | Accept: "*/*", 22 | }, 23 | signal: controller.signal, 24 | }); 25 | 26 | if (response.status !== 200) { 27 | onError?.(response.status); 28 | return; 29 | } 30 | response 31 | .json() 32 | .then((data) => { 33 | console.log(data); 34 | onTokenLength(JSON.parse(data).docs[0].attn.length); 35 | onSources(JSON.parse(data).docs); 36 | onMarkdown(JSON.parse(data).answer); 37 | }) 38 | .catch((error) => { 39 | console.error("Error fetching RAG backend:", error); 40 | onSources([]); 41 | }); 42 | }; 43 | -------------------------------------------------------------------------------- /frontend/src/app/utils/parse-rewrite.ts: -------------------------------------------------------------------------------- 1 | import { Source } from "@/app/interfaces/source"; 2 | 3 | const LLM_SPLIT = "__LLM_RESPONSE__"; 4 | const RELATED_SPLIT = "__RELATED_QUESTIONS__"; 5 | 6 | function unfilterAndPadAttention( 7 | attnElement: { name: any; snippet: any; score: any }, 8 | filter: any[], 9 | ) { 10 | const nameAttention = unfilterAndPadSingleArray(attnElement.name, filter[0]); 11 | const snippetAttention = unfilterAndPadSingleArray( 12 | attnElement.snippet, 13 | filter[1], 14 | ); 15 | return { 16 | name: nameAttention, 17 | snippet: snippetAttention, 18 | score: attnElement.score, 19 | }; 20 | } 21 | 22 | // Function to unfilter and pad a single array 23 | function unfilterAndPadSingleArray( 24 | array: string | any[], 25 | filter: string | any[], 26 | ) { 27 | let filteredIndex = 0; 28 | const result = []; 29 | 30 | for (let index = 0; index < filter.length; index++) { 31 | if (filter[index]) { 32 | if (filteredIndex < array.length) { 33 | result.push(array[filteredIndex]); 34 | filteredIndex++; 35 | } else { 36 | result.push(0); 37 | } 38 | } else { 39 | result.push(0); 40 | } 41 | } 42 | 43 | return result; 44 | } 45 | 46 | export const parseRewrite = async ( 47 | controller: AbortController, 48 | query: string, 49 | sources: Source[], 50 | keep: boolean[][][], 51 | search_uuid: string, 52 | k: string, 53 | tokenLength: number, 54 | apiKey: string, 55 | snippet: string, 56 | onSources: (value: Source[]) => void, 57 | onMarkdown: (value: string[]) => void, 58 | onMarkdownCompare: (value: string[]) => void, 59 | onError?: (status: number) => void, 60 | ) => { 61 | const decoder = new TextDecoder(); 62 | let uint8Array = new Uint8Array(); 63 | let chunks = ""; 64 | let sourcesEmitted = false; 65 | const filteredIndexes: number[] = []; 66 | 67 | const filteredSources = sources.filter((source, index) => { 68 | const shouldKeep = [keep[index][0].slice(1, -1), keep[index][1]] 69 | .flat() 70 | .some((e) => e == true); 71 | if (shouldKeep) { 72 | filteredIndexes.push(index); 73 | } 74 | return shouldKeep; 75 | }); 76 | 77 | const newSources = filteredSources.map((source, j) => { 78 | const originalIndex = filteredIndexes[j]; 79 | return { 80 | ...source, 81 | originalIndex, 82 | name: source.nameTokens 83 | .filter((_, index) => keep[originalIndex][0][index]) 84 | .join(""), 85 | snippet: source.snippetTokens 86 | .filter((_, index) => keep[originalIndex][1][index]) 87 | .join(""), 88 | nameTokens: source.nameTokens.filter( 89 | (_, index) => keep[originalIndex][0][index], 90 | ), 91 | snippetTokens: source.snippetTokens.filter( 92 | (_, index) => keep[originalIndex][1][index], 93 | ), 94 | }; 95 | }); 96 | const url = `${process.env.NEXT_PUBLIC_BASE_PATH}/rewrite.cgi`; 97 | 98 | const response = await fetch(url, { 99 | method: "POST", 100 | headers: { 101 | "Content-Type": "application/json", 102 | Accept: "*/*", 103 | }, 104 | signal: controller.signal, 105 | body: JSON.stringify({ 106 | query, 107 | search_uuid, 108 | k, 109 | api_key: apiKey, 110 | results: newSources, 111 | snippet, 112 | }), 113 | }); 114 | 115 | if (response.status !== 200) { 116 | onError?.(response.status); 117 | return; 118 | } 119 | response 120 | .json() 121 | .then((data) => { 122 | const parsedData = JSON.parse(data); 123 | const updatedSources = [...sources]; 124 | var next = 0; 125 | parsedData.docs.forEach((source: Source, index: number) => { 126 | while ( 127 | next < keep.length && 128 | [keep[next][0].slice(1, -1), keep[next][1]] 129 | .flat() 130 | .every((e) => e == false) 131 | ) { 132 | updatedSources[next].attn = updatedSources[next].attn.slice( 133 | 0, 134 | tokenLength, 135 | ); 136 | next++; 137 | } 138 | 139 | if (next < updatedSources.length) { 140 | const updatedAttn = source.attn.map( 141 | (element: any, index: string | number) => { 142 | return unfilterAndPadAttention(element, keep[next]); 143 | }, 144 | ); 145 | 146 | updatedSources[next].attn = updatedSources[next].attn 147 | .slice(0, tokenLength) 148 | .concat(updatedAttn); 149 | next++; 150 | } else { 151 | console.error("Index out of range for updatedSources array"); 152 | } 153 | }); 154 | onSources(updatedSources); 155 | onMarkdownCompare(parsedData.answer); 156 | }) 157 | .catch((error) => { 158 | console.error("Error fetching sources:", error); 159 | onSources([]); 160 | }); 161 | }; 162 | -------------------------------------------------------------------------------- /frontend/tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "tailwindcss"; 2 | 3 | const config: Config = { 4 | content: [ 5 | "./src/pages/**/*.{js,ts,jsx,tsx,mdx}", 6 | "./src/components/**/*.{js,ts,jsx,tsx,mdx}", 7 | "./src/app/**/*.{js,ts,jsx,tsx,mdx}", 8 | ], 9 | theme: { 10 | extend: { 11 | backgroundImage: { 12 | "gradient-radial": "radial-gradient(var(--tw-gradient-stops))", 13 | "gradient-conic": 14 | "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))", 15 | }, 16 | colors: { 17 | blue: { 18 | 500: "#2F80ED", 19 | }, 20 | }, 21 | }, 22 | }, 23 | plugins: [require("@tailwindcss/typography")], 24 | }; 25 | export default config; 26 | -------------------------------------------------------------------------------- /frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2015", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./src/*"] 23 | } 24 | }, 25 | "include": [ 26 | "next-env.d.ts", 27 | "**/*.ts", 28 | "**/*.tsx", 29 | ".next/types/**/*.ts", 30 | "../ui/types/**/*.ts" 31 | ], 32 | "exclude": ["node_modules"] 33 | } 34 | --------------------------------------------------------------------------------