├── .gitignore
├── LICENSE
├── README.md
├── backend
├── .env.example
├── helpers
│ ├── ClueWeb22Api.py
│ ├── concurrent_fetch.py
│ ├── embedding.py
│ └── range_dictionary.py
├── rag
│ ├── client.py
│ └── server.py
├── ragviz.py
├── requirements.txt
├── search
│ ├── clueweb
│ │ ├── ranges
│ │ │ ├── 0.tsv
│ │ │ ├── 1.tsv
│ │ │ ├── 10.tsv
│ │ │ ├── 11.tsv
│ │ │ ├── 12.tsv
│ │ │ ├── 13.tsv
│ │ │ ├── 14.tsv
│ │ │ ├── 15.tsv
│ │ │ ├── 16.tsv
│ │ │ ├── 17.tsv
│ │ │ ├── 18.tsv
│ │ │ ├── 19.tsv
│ │ │ ├── 2.tsv
│ │ │ ├── 20.tsv
│ │ │ ├── 21.tsv
│ │ │ ├── 22.tsv
│ │ │ ├── 23.tsv
│ │ │ ├── 24.tsv
│ │ │ ├── 25.tsv
│ │ │ ├── 26.tsv
│ │ │ ├── 27.tsv
│ │ │ ├── 28.tsv
│ │ │ ├── 29.tsv
│ │ │ ├── 3.tsv
│ │ │ ├── 30.tsv
│ │ │ ├── 31.tsv
│ │ │ ├── 32.tsv
│ │ │ ├── 33.tsv
│ │ │ ├── 34.tsv
│ │ │ ├── 35.tsv
│ │ │ ├── 36.tsv
│ │ │ ├── 37.tsv
│ │ │ ├── 38.tsv
│ │ │ ├── 39.tsv
│ │ │ ├── 4.tsv
│ │ │ ├── 40.tsv
│ │ │ ├── 41.tsv
│ │ │ ├── 42.tsv
│ │ │ ├── 43.tsv
│ │ │ ├── 44.tsv
│ │ │ ├── 45.tsv
│ │ │ ├── 46.tsv
│ │ │ ├── 5.tsv
│ │ │ ├── 6.tsv
│ │ │ ├── 7.tsv
│ │ │ ├── 8.tsv
│ │ │ ├── 9.tsv
│ │ │ └── clueweb.tsv
│ │ └── search.py
│ ├── pile
│ │ ├── ranges
│ │ │ └── pile.tsv
│ │ └── search.py
│ └── search.py
└── snippet
│ ├── naive_first.py
│ ├── sliding_window.py
│ └── snippet.py
└── frontend
├── .env.local.example
├── .eslintrc.json
├── next-env.d.ts
├── next.config.mjs
├── package-lock.json
├── package.json
├── postcss.config.js
├── public
├── android-chrome-192x192.png
├── android-chrome-512x512.png
├── apple-touch-icon.png
├── bg.svg
├── favicon-16x16.png
├── favicon-32x32.png
├── favicon.ico
├── ragviz-square.png
└── site.webmanifest
├── src
└── app
│ ├── components
│ ├── answer.tsx
│ ├── footer.tsx
│ ├── logo.tsx
│ ├── popover.tsx
│ ├── preset-query.tsx
│ ├── relates.tsx
│ ├── result.tsx
│ ├── search.tsx
│ ├── skeleton.tsx
│ ├── sources.tsx
│ ├── title.tsx
│ └── wrapper.tsx
│ ├── globals.css
│ ├── interfaces
│ ├── relate.ts
│ └── source.ts
│ ├── layout.tsx
│ ├── page.tsx
│ ├── search
│ └── page.tsx
│ └── utils
│ ├── cn.ts
│ ├── fetch-stream.ts
│ ├── get-search-url.ts
│ ├── parse-rag.ts
│ └── parse-rewrite.ts
├── tailwind.config.ts
└── tsconfig.json
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | .env
3 | .env.local
4 | node_modules/
5 | .next/
6 | ui/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Tevin Wang
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | #
RAGViz
2 |
3 | RAGViz (Retrieval Augmented Generation Visualization) is a tool that visualizes both document and token-level attention on the retrieved context feeded to the LLM to ground answer generation.
4 |
5 | - RAGViz provides an add/remove document functionality to compare the generated tokens when certain documents are not included in the context.
6 | - Combining both functionalities allows for a diagnosis on the effectiveness and influence of certain retrieved documents or sections of text on the LLM's answer generation.
7 |
8 | ### Demo Video
9 | A basic demonstration of RAGViz is available [here](https://www.youtube.com/embed/cTAbuTu6ur4?si=-uZ2AyNLx-5p8MZC).
10 |
11 | ### Configuration
12 |
13 | The following are the system configurations of our RAGViz demonstration:
14 | - The [Pile-CC](https://github.com/EleutherAI/pile-cc) English documents are used for retrieval
15 | - Documents are partioned into 4 [DiskANN](https://github.com/microsoft/DiskANN/) indexes on separate nodes, each with ~20 million documents
16 | - Documents are embedded into feature vectors using [AnchorDR](https://github.com/yiqingxyq/AnchorDR). **To use [AnchorDR](https://github.com/yiqingxyq/AnchorDR) in RAGViz you must follow the installation instructions on the repo [here](https://github.com/yiqingxyq/AnchorDR) to ensure your Python environment is set up correctly. Do this after running `pip install -r backend/requirements.txt`.**
17 | - [LLaMa2](https://huggingface.co/docs/transformers/v4.34.0/en/model_doc/llama2) generation/attention output done with [vLLM](https://github.com/vllm-project/vllm) and [HuggingFace](https://huggingface.co/) transformers library
18 | - Frontend UI is adapted from [Lepton search engine](https://github.com/leptonai/search_with_lepton)
19 |
20 | ### Customization
21 |
22 | #### Snippets:
23 | You can modify the snippets used for context in RAG by adding a new file and class in `backend/snippet`, adding it to `backend/ragviz.py` and `frontend/src/app/components/search.tsx`. We currently offer the following snippets:
24 | - Naive First:
25 | - Represent a document with its first 128 tokens
26 | - Sliding Window
27 | - Compute inner product similarity between windows of 128 tokens and the query; use the most similar window to the query to represent a document
28 |
29 | #### Datasets:
30 | New datasets for retrieval can be added using a new file and class in `backend/search`, and modifying `backend/ragviz.py` accordingly.
31 |
32 | We currently have implemented both a implementation the following datasets:
33 | - Clueweb22B english documents
34 | - Pile-CC dataset
35 |
36 | #### LLMs:
37 | Any model supported by [HuggingFace](https://huggingface.co/) transformers library can be used as the LLM backbone.
38 |
39 | To apply vLLM for fast inference, the LLM backbone needs to be supported by vLLM. A list of vLLM supported model is available [here](https://docs.vllm.ai/en/latest/models/supported_models.html).
40 |
41 | You can set the model path of the model for RAG inside of `backend/.env.example`. We used `meta-llama/Llama-2-7b-chat-hf` for the demo.
42 |
43 |
--------------------------------------------------------------------------------
/backend/.env.example:
--------------------------------------------------------------------------------
1 | PROJECT_DIR=
2 | API_KEY=
3 | BACKEND_ADDR=
4 | BACKEND_PORT=
5 | RAG_ADDR=
6 | RAG_PORT=
7 | RAG_MODEL=
8 |
9 | CLUEWEB_ADDR_0=
10 | CLUEWEB_PORT_0=
11 | CLUEWEB_ADDR_1=
12 | CLUEWEB_PORT_1=
13 | CLUEWEB_ADDR_2=
14 | CLUEWEB_PORT_2=
15 | CLUEWEB_ADDR_3=
16 | CLUEWEB_PORT_3=
17 | CLUEWEB_PATH=
18 |
19 | PILE_ADDR_0=
20 | PILE_PORT_0=
21 | PILE_ADDR_1=
22 | PILE_PORT_1=
23 | PILE_ADDR_2=
24 | PILE_PORT_2=
25 | PILE_ADDR_3=
26 | PILE_PORT_3=
27 | PILE_PATH=
28 |
--------------------------------------------------------------------------------
/backend/helpers/ClueWeb22Api.py:
--------------------------------------------------------------------------------
1 |
2 | import os
3 | import gzip
4 |
5 | class ClueWeb22Api:
6 |
7 | def __init__(self, cw22id, cw22root_path):
8 | self.cw22id = cw22id
9 | self.cw22root_path = cw22root_path
10 |
11 | def get_base_filename_by_id(self, cw22id, cw22root_path, file_type='html'):
12 | html_path = self.cw22root_path + os.sep + file_type
13 | id_parts = cw22id.split('-')
14 | doc = int(id_parts[len(id_parts) - 1])
15 |
16 | language = id_parts[1][:2]
17 | segment = id_parts[1][:4]
18 | directory = id_parts[1]
19 | base_path = html_path + os.sep + language + os.sep + segment + os.sep + directory + os.sep
20 | base_filename = base_path + id_parts[1] + '-' + id_parts[2]
21 | return base_filename
22 |
23 | def get_primary_node_ids(self, annotate_html):
24 | annotations = annotate_html.annotations
25 | primary_node_ids = []
26 | for annotation in annotations:
27 | if annotation.type == AnnotateHtml.AnnotationType.Primary:
28 | primary_node_ids.append(int(annotation.nodeId))
29 | primary_node_ids.sort()
30 | return primary_node_ids
31 |
32 | def get_html_from_warc(self):
33 | cw22id = self.cw22id
34 | cw22root_path = self.cw22root_path
35 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path)
36 |
37 | warc_path = base_filename + '.warc.gz'
38 | offset_path = base_filename + '.warc.offset'
39 |
40 | id_parts = cw22id.split('-')
41 | doc = int(id_parts[len(id_parts) - 1])
42 |
43 | #Get html from warc using offset
44 | offset_length = len('{:010d}\n'.format(0, 0))
45 | with open (warc_path,'rb') as f_warc:
46 | with open (offset_path, 'r') as f_offset:
47 | f_offset.seek(int(doc) * int(offset_length))
48 | start_bytes = int (f_offset.read (offset_length).strip())
49 | end_bytes = int (f_offset.read (offset_length).strip())
50 | f_warc.seek(start_bytes)
51 | record = f_warc.read(end_bytes - start_bytes)
52 | record = gzip.decompress(record).decode('utf-8')
53 |
54 | #Remove the WARC header to get the htmlStr
55 | warc_header = ''
56 | for line in record.splitlines():
57 | warc_header += line
58 | warc_header += '\r\n'
59 | if len(line.strip()) == 0:
60 | break
61 | record = record[len(warc_header):]
62 |
63 | return record
64 |
65 | def get_node_features(self):
66 | cw22id = self.cw22id
67 | cw22root_path = self.cw22root_path
68 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type='vdom')
69 | vdom_path = base_filename + '.zip'
70 |
71 | with zipfile.ZipFile(vdom_path, 'r') as z:
72 | doc_num = 0
73 | filename = cw22id + '.bin'
74 | with z.open(filename) as f:
75 | data = f.read()
76 | annotate_html = AnnotateHtml()
77 | annotate_html.ParseFromString(data)
78 |
79 | html_string = self.get_html_from_warc()
80 | api = AnnotateHtmlApi(annotate_html, init_nodes=False, html_string=html_string)
81 | vdom_features = api.get_all_node_features_no_offset()
82 | return vdom_features
83 |
84 | def get_node_features_with_text(self, is_primary=True):
85 | cw22id = self.cw22id
86 | cw22root_path = self.cw22root_path
87 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type='vdom')
88 | vdom_path = base_filename + '.zip'
89 |
90 | json_path = base_filename + '.json.gz'
91 | offset_path = base_filename + '.offset'
92 |
93 | id_parts = cw22id.split('-')
94 | doc = int(id_parts[len(id_parts) - 1])
95 |
96 | nodes_and_features = []
97 | with zipfile.ZipFile(vdom_path, 'r') as z:
98 | doc_num = 0
99 | filename = cw22id + '.bin'
100 | with z.open(filename) as f:
101 | data = f.read()
102 | annotate_html = AnnotateHtml()
103 | annotate_html.ParseFromString(data)
104 |
105 | html_string = self.get_html_from_warc()
106 | api = AnnotateHtmlApi(annotate_html, init_nodes=True, html_string=html_string)
107 |
108 | all_soup_nodes = api.soup.find_all()
109 | primary_node_ids = all_soup_nodes
110 | if is_primary:
111 | primary_node_ids = self.get_primary_node_ids(annotate_html)
112 |
113 | htmlnode_vdomfeatures = {}
114 | for htmlnode in all_soup_nodes:
115 | #print(htmlnode)
116 | node_text = htmlnode.text.strip()
117 | if 'data-dcnode-id' in htmlnode.attrs and len(node_text) > 0:
118 | nodeid = int(htmlnode.attrs['data-dcnode-id'])
119 | if nodeid in primary_node_ids:
120 | vdom_feature = api.all_nodes[nodeid].vdom_feature
121 | node_dict = {'id': nodeid, 'text':htmlnode.text, 'vdom_feature':vdom_feature}
122 | nodes_and_features.append(node_dict)
123 | #htmlnode_vdomfeatures[nodeid] = vdom_feature
124 | return nodes_and_features
125 |
126 |
127 | def get_primary_content_with_annotations(self):
128 | cw22id = self.cw22id
129 | cw22root_path = self.cw22root_path
130 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type='vdom')
131 | vdom_path = base_filename + '.zip'
132 |
133 | id_parts = cw22id.split('-')
134 | doc = int(id_parts[len(id_parts) - 1])
135 |
136 | with zipfile.ZipFile(vdom_path, 'r') as z:
137 | doc_num = 0
138 | filename = cw22id + '.bin'
139 | with z.open(filename) as f:
140 | data = f.read()
141 | annotate_html = AnnotateHtml()
142 | annotate_html.ParseFromString(data)
143 |
144 | html_string = self.get_html_from_warc()
145 | api = AnnotateHtmlApi(annotate_html, init_nodes=True, html_string=html_string)
146 | primary_content_with_offset = api.get_primary_content_with_annotation_offset(get_binary_text=True)
147 | return primary_content_with_offset
148 |
149 | def get_json_record(self, record_type):
150 | cw22id = self.cw22id
151 | cw22root_path = self.cw22root_path
152 | base_filename = self.get_base_filename_by_id(cw22id, cw22root_path, file_type=record_type)
153 |
154 | json_path = base_filename + '.json.gz'
155 | offset_path = base_filename + '.offset'
156 |
157 | id_parts = cw22id.split('-')
158 | doc = int(id_parts[len(id_parts) - 1])
159 |
160 | offset_length = len('{:010d}\n'.format(0, 0))
161 | with open (json_path,'rb') as f_json:
162 | with open (offset_path, 'r') as f_offset:
163 | f_offset.seek(int(doc) * int(offset_length))
164 | start_bytes = int (f_offset.read (offset_length).strip())
165 | end_bytes = int (f_offset.read (offset_length).strip())
166 | f_json.seek(start_bytes)
167 | record = f_json.read(end_bytes - start_bytes)
168 | record = gzip.decompress(record).decode('utf-8')
169 | return record
170 |
171 |
172 | def get_clean_text(self):
173 | record = self.get_json_record('txt')
174 | return record
175 |
176 | def get_inlinks(self):
177 | record = self.get_json_record('inlink')
178 | return record
179 |
180 | def get_outlinks(self):
181 | record = self.get_json_record('outlink')
182 | return record
183 |
184 |
185 |
--------------------------------------------------------------------------------
/backend/helpers/concurrent_fetch.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import concurrent.futures
3 | import time
4 |
5 | def fetch(i, url, jsonquery):
6 | start_time = time.perf_counter()
7 | response = requests.post(url, json=jsonquery)
8 | end_time = time.perf_counter()
9 | elapsed_time = end_time - start_time
10 | print(f"ANNS TIME: {elapsed_time} seconds")
11 | return i, response.json()
12 |
13 | def fetch_all(urls, jsonquery):
14 | with concurrent.futures.ThreadPoolExecutor() as executor:
15 | # Submit requests for all URLs concurrently
16 | future_to_url = {executor.submit(fetch, i, url, jsonquery): url for i, url in urls}
17 | responses = []
18 | for future in concurrent.futures.as_completed(future_to_url):
19 | url = future_to_url[future]
20 | try:
21 | data = future.result()
22 | responses.append(data)
23 | except Exception as exc:
24 | print(f"Error fetching data from {url}: {exc}")
25 | return responses
--------------------------------------------------------------------------------
/backend/helpers/embedding.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import time
3 |
4 | def embedding_function(tokenizer, model, query):
5 | start_time = time.perf_counter()
6 | input_ids = tokenizer(query, return_tensors="pt").input_ids
7 | print(f"TOKEN_COUNT: {len(input_ids[0])}")
8 | decoder_input_ids = tokenizer(query, return_tensors="pt").input_ids
9 |
10 | # Forward pass through the model to obtain embeddings
11 | with torch.no_grad():
12 | outputs = model(input_ids=input_ids, decoder_input_ids=decoder_input_ids)
13 |
14 | # Extract the embeddings
15 | embeddings = outputs.last_hidden_state # Last layer hidden states
16 |
17 | embeddings_np = embeddings.numpy()
18 | res = embeddings_np[0,0].tolist()
19 | end_time = time.perf_counter()
20 | elapsed_time = end_time - start_time
21 | print(f"EMBEDDING TIME: {elapsed_time} seconds")
22 | return res
--------------------------------------------------------------------------------
/backend/helpers/range_dictionary.py:
--------------------------------------------------------------------------------
1 | def create_range_dictionary(file_path):
2 | range_list = []
3 | with open(file_path, 'r') as file:
4 | for line in file:
5 | # Assuming each line contains two numbers separated by a tab
6 | start, end = map(int, line.strip().split())
7 | # Append the range as a tuple to the list
8 | range_list.append((start, end))
9 | return range_list
10 |
11 | def query_range_dictionary(range_list, query):
12 | for i, (start, end) in enumerate(range_list):
13 | if start <= query < end:
14 | return i, query - start
15 | return 0
--------------------------------------------------------------------------------
/backend/rag/client.py:
--------------------------------------------------------------------------------
1 | import requests
2 | import os
3 |
4 | def rag_client(query: str, results):
5 | jsonquery = {
6 | "query": query,
7 | "docs": results
8 | }
9 |
10 | response = requests.post(f'http://{os.getenv("RAG_ADDR")}:{os.getenv("RAG_PORT")}/generate', json=jsonquery)
11 | for i, result in enumerate(results):
12 | result['attn'] = response.json()['attn'][i]
13 | result['nameTokens'] = response.json()['docs'][i]['name']
14 | result['snippetTokens'] = response.json()['docs'][i]['snippet']
15 | return {'docs': results, 'answer': response.json()['tokens']}
--------------------------------------------------------------------------------
/backend/rag/server.py:
--------------------------------------------------------------------------------
1 | from transformers import BitsAndBytesConfig, AutoModelForCausalLM, AutoTokenizer # LlamaTokenizer, LlamaForCausalLM,
2 | import torch
3 | import numpy as np
4 | import os
5 |
6 | from fastapi import FastAPI, Query
7 | from pydantic import BaseModel
8 |
9 | from vllm import LLM, SamplingParams
10 | import torch
11 |
12 | import uvicorn
13 | import json
14 | import time
15 |
16 | def load_model(model_name, tp_size=1):
17 | if "minicpm" in model_name.lower():
18 | llm = LLM(
19 | model_name,
20 | trust_remote_code=True,
21 | dtype='half',
22 | tensor_parallel_size=tp_size,
23 | device=torch.device("cuda:0"),
24 | gpu_memory_utilization=0.5
25 | )
26 | else:
27 | llm = LLM(model_name, tensor_parallel_size=tp_size, device=torch.device("cuda:0"), gpu_memory_utilization=0.5)
28 | return llm
29 |
30 | os.environ["CUDA_VISIBLE_DEVICES"] = "0"
31 |
32 | model_path = os.getenv("RAG_MODEL")
33 | tokenizer = AutoTokenizer.from_pretrained(model_path, trust_remote_code=True)
34 | hf_model = AutoModelForCausalLM.from_pretrained(
35 | model_path,
36 | trust_remote_code=True,
37 | device_map="auto",
38 | quantization_config=BitsAndBytesConfig(load_in_4bit=True),
39 | max_memory={0: "12GB"}
40 | )
41 |
42 |
43 | torch.cuda.manual_seed(42)
44 | torch.manual_seed(42)
45 |
46 | model_name = os.getenv("RAG_MODEL")
47 | model = load_model(model_name)
48 |
49 | app = FastAPI()
50 |
51 | def process_tokens(tokenized_docs):
52 | tokens = []
53 |
54 | special_tokens = []
55 |
56 | for i, token in enumerate(tokenized_docs):
57 | if token not in special_tokens:
58 | token = token.replace('▁', ' ')
59 | token = token.replace('<0x0A>', '\n')
60 | token = token.replace('Ġ', ' ')
61 | token = token.replace('Ċ', ' ')
62 | tokens.append(token)
63 | return tokens
64 |
65 | def vllm(model, query, docs, max_new_tokens=100, user_prompt=None, top_p=0.9, temperature=0.8):
66 | context = ""
67 | doc_starts = []
68 | doc_tokens = []
69 | input_len = 0
70 | context_ids = tokenizer("Context: ", return_tensors='pt')['input_ids']
71 | input_ids = context_ids.clone()
72 | for i, doc in enumerate(docs):
73 | new_title = f"{doc['name']}: "
74 | title_ids = tokenizer(new_title, return_tensors='pt')['input_ids'][:, 1:]
75 | title_tokens = process_tokens(tokenizer.convert_ids_to_tokens(tokenizer(new_title)['input_ids']))[1:]
76 | title_start = input_ids.size(1)
77 | input_ids = torch.cat([input_ids, title_ids], dim=-1).clone()
78 |
79 | new_context = f"{doc['snippet']};" if i < len(docs) - 1 else f"{doc['snippet']}"
80 | context += new_context
81 | snippet_ids = tokenizer(new_context, return_tensors='pt')['input_ids'][:, 1:]
82 | snippet_tokens = process_tokens(tokenizer.convert_ids_to_tokens(tokenizer(new_context)['input_ids']))[1:]
83 | doc_starts.append((title_start, input_ids.size(1)))
84 | input_ids = torch.cat([input_ids, snippet_ids], dim=-1).clone()
85 | doc_tokens.append({'name': title_tokens, 'snippet': snippet_tokens})
86 | query_ids = tokenizer("\n Question: {query} Answer in less than 100 tokens:", return_tensors='pt')['input_ids']
87 | docs_len = input_ids.size(1)
88 | input_ids = torch.cat([input_ids, query_ids], dim=-1).clone()
89 | input_len = input_ids.size(1)
90 |
91 | sampling_param = SamplingParams(top_p=top_p, temperature=temperature, max_tokens=max_new_tokens)
92 |
93 | prompt = f"Context: {context}\n Question: {query} Answer in less than 100 tokens:"
94 | start_time = time.perf_counter()
95 | outputs = model.generate(prompt, sampling_params=sampling_param)
96 | end_time = time.perf_counter()
97 | elapsed_time = end_time - start_time
98 | print(f"VLLM CHAT COMPLETION TIME: {elapsed_time} seconds")
99 | print(outputs)
100 |
101 | return input_ids, doc_starts, docs_len, input_len, outputs[0].outputs[0].text, doc_tokens
102 |
103 | def format_attention(attention, layers=None, heads=None):
104 | if layers:
105 | attention = [attention[layer_index] for layer_index in layers]
106 | squeezed = []
107 | for layer_attention in attention:
108 | # 1 x num_heads x seq_len x seq_len
109 | if len(layer_attention.shape) != 4:
110 | raise ValueError("The attention tensor does not have the correct number of dimensions. Make sure you set "
111 | "output_attentions=True when initializing your model.")
112 | layer_attention = layer_attention.squeeze(0)
113 | if heads:
114 | layer_attention = layer_attention[heads]
115 | squeezed.append(layer_attention)
116 | # num_layers x num_heads x seq_len x seq_len
117 | return torch.stack(squeezed)
118 |
119 | def num_layers(attention):
120 | return len(attention)
121 |
122 | def num_heads(attention):
123 | return attention[0][0].size(0)
124 |
125 |
126 | def hf(model, input_ids, doc_starts, docs_len, input_len, generated_text):
127 | generated_ids = tokenizer(generated_text, return_tensors='pt')['input_ids']
128 | generated_tokens = tokenizer.convert_ids_to_tokens(tokenizer(generated_text)['input_ids'])
129 | output_ids = torch.cat([input_ids, generated_ids], dim=-1).clone()
130 | with torch.no_grad():
131 | start_time = time.perf_counter()
132 | outputs = model(output_ids, output_attentions=True)
133 | end_time = time.perf_counter()
134 | elapsed_time = end_time - start_time
135 | print(f"ATTENTION FORWARD PASS TIME: {elapsed_time} seconds")
136 | attentions = outputs.attentions
137 | n_heads = num_heads(attentions)
138 | include_layers = list(range(num_layers(attentions)))
139 | include_heads = list(range(n_heads))
140 | attention = format_attention(attentions, include_layers, include_heads)
141 |
142 | att_q = []
143 | att_d = [[] for doc in doc_starts]
144 | att = torch.mean(attention, dim=[0,1]).numpy()
145 |
146 | for t_num in range(input_len, output_ids.shape[1]):
147 | for i, doc in enumerate(doc_starts):
148 | title_start, snippet_start = doc
149 | doc_end = doc_starts[i + 1][0] if i < len(doc_starts) - 1 else docs_len
150 | att_d[i].append({"name": att[t_num, title_start:snippet_start].tolist(), "snippet": att[t_num, snippet_start:doc_end].tolist(), "score": float(np.sum(att[t_num, title_start:doc_end]))}) # doc
151 |
152 | return att_d, generated_tokens
153 |
154 | def process_text(doc_tokens, tokenized_text, attn_d):
155 | processed_tokens = []
156 |
157 | special_tokens = ['']
158 |
159 | attn = [[] for i in range (len(attn_d))]
160 |
161 | for i, token in enumerate(tokenized_text):
162 | if token not in special_tokens:
163 | token = token.replace('▁', ' ')
164 | token = token.replace('<0x0A>', '\n')
165 | token = token.replace('Ġ', ' ')
166 | token = token.replace('Ċ', ' ')
167 | processed_tokens.append(token)
168 | for j in range(len(attn)):
169 | attn[j].append(attn_d[j][i])
170 | return {'docs': doc_tokens, 'tokens': processed_tokens, 'attn': attn}
171 |
172 | class RequestData(BaseModel):
173 | query: str
174 | docs: list
175 |
176 | @app.post("/generate")
177 | def generate(request_data: RequestData = None, max_new_tokens: int = Query(100), top_p: float = Query(0.9), temperature: float = Query(0.8)):
178 | if request_data:
179 | query = request_data.query
180 | docs = request_data.docs
181 | input_ids, doc_starts, docs_len, input_len, response_text, doc_tokens = vllm(model, query, docs)
182 | attn_d, tokenized_text = hf(hf_model, input_ids, doc_starts, docs_len, input_len, response_text)
183 | return process_text(doc_tokens, tokenized_text, attn_d)
184 |
185 | uvicorn.run(app, host="0.0.0.0", port=8080)
186 |
--------------------------------------------------------------------------------
/backend/ragviz.py:
--------------------------------------------------------------------------------
1 | import json
2 | import re
3 | import os
4 | from fastapi import HTTPException, FastAPI, Request
5 | from fastapi.responses import JSONResponse
6 | import uvicorn
7 | import time
8 |
9 | from search.pile.search import PileSearch
10 | from snippet.naive_first import NaiveFirstSnippet
11 | from snippet.sliding_window import SlidingWindowSnippet
12 | from helpers.embedding import embedding_function
13 | from rag.client import rag_client
14 |
15 | from transformers import AutoTokenizer, AutoModel
16 | import torch
17 |
18 | if __name__ == "__main__":
19 | tokenizer = AutoTokenizer.from_pretrained("yiqingx/AnchorDR")
20 | model = AutoModel.from_pretrained("yiqingx/AnchorDR")
21 | pile_search = PileSearch()
22 | naive_first = NaiveFirstSnippet(tokenizer)
23 | sliding_window = SlidingWindowSnippet(tokenizer, model, 64, 128)
24 | app = FastAPI()
25 |
26 | @app.post("/query")
27 | def query_function(item: dict) -> JSONResponse:
28 | """
29 | Query RAGViz and returns the response.
30 |
31 | The query can have the following fields:
32 | - query: the user query.
33 | """
34 | start_time = time.perf_counter()
35 | query = item['query'] or _default_query
36 | # Basic attack protection: remove "[INST]" or "[/INST]" from the query
37 | query = re.sub(r"\[/?INST\]", "", query)
38 | k = int(item['k'])
39 | snippet_type = item['snippet']
40 |
41 | embeddings = embedding_function(tokenizer, model, query)
42 |
43 | if snippet_type == "first":
44 | results = pile_search.get_search_results(embeddings, k, query, naive_first)
45 | else:
46 | results = pile_search.get_search_results(embeddings, k, query, sliding_window)
47 |
48 | rag_response = rag_client(query, results)
49 |
50 | res = JSONResponse(content=json.dumps(rag_response), media_type="application/json")
51 | end_time = time.perf_counter()
52 | elapsed_time = end_time - start_time
53 | print(f"TOTAL QUERY TIME: {elapsed_time} seconds")
54 | return res
55 |
56 | # Define your API keys
57 | API_KEYS = {
58 | "key": os.getenv("API_KEY"),
59 | }
60 |
61 | @app.middleware("http")
62 | async def check_api_key(request: Request, call_next):
63 | api_key = request.headers.get("X-API-Key")
64 |
65 | if api_key not in API_KEYS.values():
66 | return JSONResponse(status_code=401, content={"error": "Invalid API key"})
67 |
68 | response = await call_next(request)
69 | return response
70 |
71 | @app.post("/rewrite")
72 | async def rewrite(item: dict):
73 | return JSONResponse(content=json.dumps(rag_client(item['query'], item['results'])), media_type="application/json")
74 |
75 | if __name__ == "__main__":
76 | uvicorn.run(app, host=os.getenv("BACKEND_ADDR"), port=int(os.getenv("BACKEND_PORT")))
--------------------------------------------------------------------------------
/backend/requirements.txt:
--------------------------------------------------------------------------------
1 | torch
2 | vllm
3 | transformers
4 | fastapi
5 | uvicorn
6 | requests
7 | pydantic
8 | numpy
9 | bitsandbytes
10 | accelerate>=0.26.0
11 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/0.tsv:
--------------------------------------------------------------------------------
1 | 0 19012
2 | 19012 37463
3 | 37463 57240
4 | 57240 76855
5 | 76855 96968
6 | 96968 115565
7 | 115565 135228
8 | 135228 155670
9 | 155670 175677
10 | 175677 196038
11 | 196038 216354
12 | 216354 234317
13 | 234317 253331
14 | 253331 273734
15 | 273734 293656
16 | 293656 313792
17 | 313792 333411
18 | 333411 350870
19 | 350870 369667
20 | 369667 386046
21 | 386046 406438
22 | 406438 425786
23 | 425786 445832
24 | 445832 465141
25 | 465141 484849
26 | 484849 505218
27 | 505218 520648
28 | 520648 540793
29 | 540793 558073
30 | 558073 578670
31 | 578670 595429
32 | 595429 615361
33 | 615361 635673
34 | 635673 651218
35 | 651218 671516
36 | 671516 691457
37 | 691457 709979
38 | 709979 729412
39 | 729412 748591
40 | 748591 768670
41 | 768670 786578
42 | 786578 806118
43 | 806118 825684
44 | 825684 845645
45 | 845645 865668
46 | 865668 883640
47 | 883640 903375
48 | 903375 923724
49 | 923724 943398
50 | 943398 963455
51 | 963455 983465
52 | 983465 1003057
53 | 1003057 1022567
54 | 1022567 1042576
55 | 1042576 1062877
56 | 1062877 1082720
57 | 1082720 1102066
58 | 1102066 1122329
59 | 1122329 1142586
60 | 1142586 1162950
61 | 1162950 1182935
62 | 1182935 1203077
63 | 1203077 1221442
64 | 1221442 1239195
65 | 1239195 1259987
66 | 1259987 1279874
67 | 1279874 1298295
68 | 1298295 1316572
69 | 1316572 1336185
70 | 1336185 1356965
71 | 1356965 1375004
72 | 1375004 1395002
73 | 1395002 1415056
74 | 1415056 1434753
75 | 1434753 1454837
76 | 1454837 1472977
77 | 1472977 1487671
78 | 1487671 1506012
79 | 1506012 1521407
80 | 1521407 1541846
81 | 1541846 1562115
82 | 1562115 1581217
83 | 1581217 1601281
84 | 1601281 1621764
85 | 1621764 1641577
86 | 1641577 1661461
87 | 1661461 1680050
88 | 1680050 1693221
89 | 1693221 1713195
90 | 1713195 1732768
91 | 1732768 1753153
92 | 1753153 1773011
93 | 1773011 1793165
94 | 1793165 1810107
95 | 1810107 1829883
96 | 1829883 1847631
97 | 1847631 1868446
98 | 1868446 1888189
99 | 1888189 1905471
100 | 1905471 1923448
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/1.tsv:
--------------------------------------------------------------------------------
1 | 0 20172
2 | 20172 39185
3 | 39185 59373
4 | 59373 78740
5 | 78740 94505
6 | 94505 113927
7 | 113927 133601
8 | 133601 151967
9 | 151967 172446
10 | 172446 192310
11 | 192310 212688
12 | 212688 232873
13 | 232873 253138
14 | 253138 268508
15 | 268508 287466
16 | 287466 308018
17 | 308018 327566
18 | 327566 348471
19 | 348471 366581
20 | 366581 384899
21 | 384899 404953
22 | 404953 424021
23 | 424021 442112
24 | 442112 456806
25 | 456806 476318
26 | 476318 496071
27 | 496071 515828
28 | 515828 534934
29 | 534934 555480
30 | 555480 573941
31 | 573941 593592
32 | 593592 611658
33 | 611658 631334
34 | 631334 645720
35 | 645720 665435
36 | 665435 685940
37 | 685940 705703
38 | 705703 726047
39 | 726047 746528
40 | 746528 766084
41 | 766084 786116
42 | 786116 805644
43 | 805644 826164
44 | 826164 846727
45 | 846727 864257
46 | 864257 879716
47 | 879716 899229
48 | 899229 914478
49 | 914478 934095
50 | 934095 954131
51 | 954131 973703
52 | 973703 991094
53 | 991094 1006931
54 | 1006931 1027122
55 | 1027122 1047190
56 | 1047190 1068071
57 | 1068071 1084092
58 | 1084092 1103447
59 | 1103447 1124041
60 | 1124041 1143549
61 | 1143549 1163819
62 | 1163819 1183546
63 | 1183546 1204099
64 | 1204099 1225031
65 | 1225031 1242509
66 | 1242509 1260496
67 | 1260496 1280529
68 | 1280529 1300793
69 | 1300793 1319686
70 | 1319686 1340503
71 | 1340503 1359088
72 | 1359088 1377255
73 | 1377255 1394468
74 | 1394468 1413615
75 | 1413615 1433939
76 | 1433939 1452225
77 | 1452225 1470812
78 | 1470812 1491158
79 | 1491158 1511395
80 | 1511395 1529854
81 | 1529854 1546221
82 | 1546221 1564971
83 | 1564971 1584271
84 | 1584271 1604502
85 | 1604502 1624656
86 | 1624656 1644133
87 | 1644133 1664251
88 | 1664251 1682014
89 | 1682014 1700641
90 | 1700641 1720798
91 | 1720798 1740891
92 | 1740891 1761059
93 | 1761059 1780395
94 | 1780395 1797726
95 | 1797726 1817565
96 | 1817565 1836878
97 | 1836878 1856935
98 | 1856935 1871731
99 | 1871731 1892547
100 | 1892547 1912593
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/10.tsv:
--------------------------------------------------------------------------------
1 | 0 18915
2 | 18915 38930
3 | 38930 57633
4 | 57633 77288
5 | 77288 96782
6 | 96782 116769
7 | 116769 135842
8 | 135842 151833
9 | 151833 171590
10 | 171590 190485
11 | 190485 208440
12 | 208440 227954
13 | 227954 246765
14 | 246765 266653
15 | 266653 286769
16 | 286769 306523
17 | 306523 326936
18 | 326936 347145
19 | 347145 367141
20 | 367141 386908
21 | 386908 406694
22 | 406694 426257
23 | 426257 444956
24 | 444956 464891
25 | 464891 483972
26 | 483972 501667
27 | 501667 521264
28 | 521264 540709
29 | 540709 559796
30 | 559796 580669
31 | 580669 599657
32 | 599657 617618
33 | 617618 637883
34 | 637883 655988
35 | 655988 673415
36 | 673415 693362
37 | 693362 711456
38 | 711456 731421
39 | 731421 749644
40 | 749644 769333
41 | 769333 787715
42 | 787715 807483
43 | 807483 826919
44 | 826919 847696
45 | 847696 863487
46 | 863487 879206
47 | 879206 898521
48 | 898521 917530
49 | 917530 937905
50 | 937905 953121
51 | 953121 972647
52 | 972647 992294
53 | 992294 1012273
54 | 1012273 1032676
55 | 1032676 1050922
56 | 1050922 1071301
57 | 1071301 1088013
58 | 1088013 1108171
59 | 1108171 1124178
60 | 1124178 1144754
61 | 1144754 1165062
62 | 1165062 1182299
63 | 1182299 1201911
64 | 1201911 1219090
65 | 1219090 1238460
66 | 1238460 1256831
67 | 1256831 1272523
68 | 1272523 1287930
69 | 1287930 1308111
70 | 1308111 1327344
71 | 1327344 1338864
72 | 1338864 1357329
73 | 1357329 1377560
74 | 1377560 1396878
75 | 1396878 1416731
76 | 1416731 1436648
77 | 1436648 1457015
78 | 1457015 1468397
79 | 1468397 1488759
80 | 1488759 1508706
81 | 1508706 1528952
82 | 1528952 1544492
83 | 1544492 1562737
84 | 1562737 1581727
85 | 1581727 1600616
86 | 1600616 1613774
87 | 1613774 1633590
88 | 1633590 1653463
89 | 1653463 1671964
90 | 1671964 1691779
91 | 1691779 1711281
92 | 1711281 1726549
93 | 1726549 1744332
94 | 1744332 1762460
95 | 1762460 1779155
96 | 1779155 1799353
97 | 1799353 1813468
98 | 1813468 1831619
99 | 1831619 1851515
100 | 1851515 1870486
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/11.tsv:
--------------------------------------------------------------------------------
1 | 0 16024
2 | 16024 33179
3 | 33179 53533
4 | 53533 73636
5 | 73636 93710
6 | 93710 113203
7 | 113203 133104
8 | 133104 153398
9 | 153398 173272
10 | 173272 192052
11 | 192052 211909
12 | 211909 229078
13 | 229078 248924
14 | 248924 265990
15 | 265990 286418
16 | 286418 304378
17 | 304378 319288
18 | 319288 339149
19 | 339149 358899
20 | 358899 374443
21 | 374443 394500
22 | 394500 413311
23 | 413311 432215
24 | 432215 452046
25 | 452046 469857
26 | 469857 490044
27 | 490044 510088
28 | 510088 529191
29 | 529191 548491
30 | 548491 568915
31 | 568915 586225
32 | 586225 604832
33 | 604832 625715
34 | 625715 645947
35 | 645947 665984
36 | 665984 681737
37 | 681737 702162
38 | 702162 722731
39 | 722731 742884
40 | 742884 763013
41 | 763013 778923
42 | 778923 797520
43 | 797520 817926
44 | 817926 835903
45 | 835903 856335
46 | 856335 876558
47 | 876558 896968
48 | 896968 917357
49 | 917357 937418
50 | 937418 955824
51 | 955824 974274
52 | 974274 993673
53 | 993673 1013411
54 | 1013411 1033906
55 | 1033906 1054570
56 | 1054570 1074308
57 | 1074308 1095028
58 | 1095028 1113189
59 | 1113189 1131237
60 | 1131237 1149739
61 | 1149739 1169111
62 | 1169111 1187516
63 | 1187516 1206936
64 | 1206936 1224771
65 | 1224771 1244744
66 | 1244744 1263023
67 | 1263023 1278791
68 | 1278791 1294171
69 | 1294171 1314556
70 | 1314556 1334297
71 | 1334297 1354155
72 | 1354155 1370417
73 | 1370417 1389901
74 | 1389901 1408537
75 | 1408537 1428616
76 | 1428616 1446956
77 | 1446956 1465463
78 | 1465463 1484247
79 | 1484247 1504474
80 | 1504474 1524135
81 | 1524135 1542776
82 | 1542776 1562466
83 | 1562466 1582697
84 | 1582697 1602753
85 | 1602753 1622314
86 | 1622314 1642309
87 | 1642309 1661954
88 | 1661954 1682689
89 | 1682689 1702685
90 | 1702685 1722016
91 | 1722016 1741866
92 | 1741866 1759752
93 | 1759752 1779609
94 | 1779609 1794852
95 | 1794852 1814505
96 | 1814505 1832397
97 | 1832397 1851822
98 | 1851822 1871251
99 | 1871251 1886369
100 | 1886369 1903582
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/12.tsv:
--------------------------------------------------------------------------------
1 | 0 20536
2 | 20536 40840
3 | 40840 60548
4 | 60548 80205
5 | 80205 99338
6 | 99338 119724
7 | 119724 139911
8 | 139911 158004
9 | 158004 175892
10 | 175892 194317
11 | 194317 214858
12 | 214858 232850
13 | 232850 253163
14 | 253163 273145
15 | 273145 289209
16 | 289209 309330
17 | 309330 329602
18 | 329602 342159
19 | 342159 362988
20 | 362988 380465
21 | 380465 400108
22 | 400108 418357
23 | 418357 438574
24 | 438574 458433
25 | 458433 475058
26 | 475058 494644
27 | 494644 514214
28 | 514214 533601
29 | 533601 553323
30 | 553323 572426
31 | 572426 591694
32 | 591694 611745
33 | 611745 631094
34 | 631094 651284
35 | 651284 667154
36 | 667154 681416
37 | 681416 699932
38 | 699932 720826
39 | 720826 740316
40 | 740316 759890
41 | 759890 779884
42 | 779884 798616
43 | 798616 816742
44 | 816742 835093
45 | 835093 855669
46 | 855669 875573
47 | 875573 894956
48 | 894956 913287
49 | 913287 932164
50 | 932164 952199
51 | 952199 972537
52 | 972537 984791
53 | 984791 1000817
54 | 1000817 1020780
55 | 1020780 1040945
56 | 1040945 1059132
57 | 1059132 1077061
58 | 1077061 1096507
59 | 1096507 1116055
60 | 1116055 1135508
61 | 1135508 1153735
62 | 1153735 1173119
63 | 1173119 1191066
64 | 1191066 1207119
65 | 1207119 1227643
66 | 1227643 1247907
67 | 1247907 1267701
68 | 1267701 1285623
69 | 1285623 1301297
70 | 1301297 1321398
71 | 1321398 1339046
72 | 1339046 1358282
73 | 1358282 1374790
74 | 1374790 1393606
75 | 1393606 1408534
76 | 1408534 1426959
77 | 1426959 1444985
78 | 1444985 1464912
79 | 1464912 1484618
80 | 1484618 1504670
81 | 1504670 1525017
82 | 1525017 1541079
83 | 1541079 1561806
84 | 1561806 1577742
85 | 1577742 1595931
86 | 1595931 1616605
87 | 1616605 1636749
88 | 1636749 1653156
89 | 1653156 1673440
90 | 1673440 1691681
91 | 1691681 1710386
92 | 1710386 1726270
93 | 1726270 1745106
94 | 1745106 1765157
95 | 1765157 1785245
96 | 1785245 1803186
97 | 1803186 1823564
98 | 1823564 1843362
99 | 1843362 1862962
100 | 1862962 1878332
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/13.tsv:
--------------------------------------------------------------------------------
1 | 0 18477
2 | 18477 36925
3 | 36925 57160
4 | 57160 76301
5 | 76301 91513
6 | 91513 111444
7 | 111444 127133
8 | 127133 147337
9 | 147337 166938
10 | 166938 186406
11 | 186406 202185
12 | 202185 222697
13 | 222697 239694
14 | 239694 255448
15 | 255448 275720
16 | 275720 292777
17 | 292777 312527
18 | 312527 332685
19 | 332685 351199
20 | 351199 371303
21 | 371303 390458
22 | 390458 408704
23 | 408704 428413
24 | 428413 448335
25 | 448335 463713
26 | 463713 483280
27 | 483280 499584
28 | 499584 519168
29 | 519168 539336
30 | 539336 559155
31 | 559155 579879
32 | 579879 599852
33 | 599852 619567
34 | 619567 639560
35 | 639560 659280
36 | 659280 678282
37 | 678282 693375
38 | 693375 711599
39 | 711599 732048
40 | 732048 752235
41 | 752235 772406
42 | 772406 788942
43 | 788942 807707
44 | 807707 826810
45 | 826810 846922
46 | 846922 862171
47 | 862171 882418
48 | 882418 899679
49 | 899679 916855
50 | 916855 935974
51 | 935974 950447
52 | 950447 966417
53 | 966417 986055
54 | 986055 1001423
55 | 1001423 1021234
56 | 1021234 1041206
57 | 1041206 1058160
58 | 1058160 1076241
59 | 1076241 1095889
60 | 1095889 1116194
61 | 1116194 1136235
62 | 1136235 1155809
63 | 1155809 1175535
64 | 1175535 1195643
65 | 1195643 1214935
66 | 1214935 1232794
67 | 1232794 1251003
68 | 1251003 1270848
69 | 1270848 1290887
70 | 1290887 1310356
71 | 1310356 1327676
72 | 1327676 1346640
73 | 1346640 1364944
74 | 1364944 1383140
75 | 1383140 1403661
76 | 1403661 1423435
77 | 1423435 1443779
78 | 1443779 1463036
79 | 1463036 1480634
80 | 1480634 1501048
81 | 1501048 1520693
82 | 1520693 1541257
83 | 1541257 1560607
84 | 1560607 1575126
85 | 1575126 1593798
86 | 1593798 1613497
87 | 1613497 1633329
88 | 1633329 1648986
89 | 1648986 1667398
90 | 1667398 1687086
91 | 1687086 1707648
92 | 1707648 1726843
93 | 1726843 1744877
94 | 1744877 1764451
95 | 1764451 1783182
96 | 1783182 1800606
97 | 1800606 1820685
98 | 1820685 1838317
99 | 1838317 1855872
100 | 1855872 1876437
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/14.tsv:
--------------------------------------------------------------------------------
1 | 0 17171
2 | 17171 37453
3 | 37453 57237
4 | 57237 74453
5 | 74453 95418
6 | 95418 115431
7 | 115431 135557
8 | 135557 155630
9 | 155630 176347
10 | 176347 195891
11 | 195891 215340
12 | 215340 234975
13 | 234975 255315
14 | 255315 274598
15 | 274598 290609
16 | 290609 310756
17 | 310756 327014
18 | 327014 346715
19 | 346715 361873
20 | 361873 377175
21 | 377175 396160
22 | 396160 416433
23 | 416433 434434
24 | 434434 454360
25 | 454360 472755
26 | 472755 492025
27 | 492025 511505
28 | 511505 531427
29 | 531427 545345
30 | 545345 565005
31 | 565005 583063
32 | 583063 603126
33 | 603126 618644
34 | 618644 638889
35 | 638889 656169
36 | 656169 676698
37 | 676698 695743
38 | 695743 713996
39 | 713996 733091
40 | 733091 753197
41 | 753197 771465
42 | 771465 787286
43 | 787286 807280
44 | 807280 825284
45 | 825284 845592
46 | 845592 865833
47 | 865833 886037
48 | 886037 905855
49 | 905855 924810
50 | 924810 945696
51 | 945696 965634
52 | 965634 984873
53 | 984873 1004901
54 | 1004901 1023158
55 | 1023158 1042969
56 | 1042969 1062322
57 | 1062322 1082237
58 | 1082237 1102226
59 | 1102226 1120139
60 | 1120139 1138026
61 | 1138026 1157580
62 | 1157580 1178063
63 | 1178063 1195127
64 | 1195127 1215700
65 | 1215700 1234544
66 | 1234544 1255069
67 | 1255069 1274629
68 | 1274629 1292777
69 | 1292777 1312442
70 | 1312442 1331926
71 | 1331926 1352248
72 | 1352248 1370841
73 | 1370841 1390846
74 | 1390846 1409874
75 | 1409874 1428054
76 | 1428054 1447772
77 | 1447772 1463658
78 | 1463658 1471590
79 | 1471590 1492299
80 | 1492299 1508127
81 | 1508127 1527987
82 | 1527987 1548002
83 | 1548002 1567649
84 | 1567649 1587664
85 | 1587664 1605527
86 | 1605527 1625475
87 | 1625475 1643633
88 | 1643633 1662622
89 | 1662622 1682525
90 | 1682525 1698343
91 | 1698343 1717191
92 | 1717191 1736780
93 | 1736780 1756597
94 | 1756597 1777002
95 | 1777002 1796035
96 | 1796035 1812953
97 | 1812953 1832483
98 | 1832483 1852373
99 | 1852373 1871358
100 | 1871358 1890816
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/15.tsv:
--------------------------------------------------------------------------------
1 | 0 20150
2 | 20150 38649
3 | 38649 55632
4 | 55632 75858
5 | 75858 95567
6 | 95567 113880
7 | 113880 133996
8 | 133996 154510
9 | 154510 174297
10 | 174297 194965
11 | 194965 212824
12 | 212824 231846
13 | 231846 251904
14 | 251904 272522
15 | 272522 290368
16 | 290368 310387
17 | 310387 329129
18 | 329129 347497
19 | 347497 366110
20 | 366110 382625
21 | 382625 401793
22 | 401793 421091
23 | 421091 439927
24 | 439927 458903
25 | 458903 477427
26 | 477427 497021
27 | 497021 516720
28 | 516720 534424
29 | 534424 554237
30 | 554237 574597
31 | 574597 594350
32 | 594350 614644
33 | 614644 633063
34 | 633063 653424
35 | 653424 671156
36 | 671156 690188
37 | 690188 705555
38 | 705555 725089
39 | 725089 744644
40 | 744644 763891
41 | 763891 784433
42 | 784433 805357
43 | 805357 824817
44 | 824817 842700
45 | 842700 862806
46 | 862806 879644
47 | 879644 900061
48 | 900061 919657
49 | 919657 940283
50 | 940283 958389
51 | 958389 978361
52 | 978361 995813
53 | 995813 1015724
54 | 1015724 1034742
55 | 1034742 1054589
56 | 1054589 1074631
57 | 1074631 1093770
58 | 1093770 1114066
59 | 1114066 1134715
60 | 1134715 1154309
61 | 1154309 1174379
62 | 1174379 1193292
63 | 1193292 1213484
64 | 1213484 1233904
65 | 1233904 1253231
66 | 1253231 1272934
67 | 1272934 1292028
68 | 1292028 1310493
69 | 1310493 1328700
70 | 1328700 1347260
71 | 1347260 1367363
72 | 1367363 1382775
73 | 1382775 1402762
74 | 1402762 1423020
75 | 1423020 1443844
76 | 1443844 1460710
77 | 1460710 1477771
78 | 1477771 1496146
79 | 1496146 1516330
80 | 1516330 1535981
81 | 1535981 1554400
82 | 1554400 1573099
83 | 1573099 1590259
84 | 1590259 1609716
85 | 1609716 1628591
86 | 1628591 1645149
87 | 1645149 1663540
88 | 1663540 1684039
89 | 1684039 1703972
90 | 1703972 1718626
91 | 1718626 1735850
92 | 1735850 1755127
93 | 1755127 1775039
94 | 1775039 1795423
95 | 1795423 1814617
96 | 1814617 1833233
97 | 1833233 1853661
98 | 1853661 1874164
99 | 1874164 1894537
100 | 1894537 1914849
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/16.tsv:
--------------------------------------------------------------------------------
1 | 0 20379
2 | 20379 39929
3 | 39929 55687
4 | 55687 73050
5 | 73050 91501
6 | 91501 110783
7 | 110783 130632
8 | 130632 151258
9 | 151258 171289
10 | 171289 190769
11 | 190769 205471
12 | 205471 224320
13 | 224320 243944
14 | 243944 263971
15 | 263971 283629
16 | 283629 304063
17 | 304063 323899
18 | 323899 341320
19 | 341320 358591
20 | 358591 378669
21 | 378669 398813
22 | 398813 416454
23 | 416454 434558
24 | 434558 454490
25 | 454490 474721
26 | 474721 494806
27 | 494806 514889
28 | 514889 534248
29 | 534248 549672
30 | 549672 569612
31 | 569612 589650
32 | 589650 609679
33 | 609679 629096
34 | 629096 647449
35 | 647449 668009
36 | 668009 687717
37 | 687717 707324
38 | 707324 725135
39 | 725135 744350
40 | 744350 764254
41 | 764254 784421
42 | 784421 804666
43 | 804666 825149
44 | 825149 844936
45 | 844936 865304
46 | 865304 884533
47 | 884533 903986
48 | 903986 923628
49 | 923628 939231
50 | 939231 959566
51 | 959566 974955
52 | 974955 995476
53 | 995476 1015918
54 | 1015918 1035549
55 | 1035549 1053884
56 | 1053884 1072730
57 | 1072730 1092794
58 | 1092794 1112198
59 | 1112198 1132273
60 | 1132273 1150722
61 | 1150722 1170515
62 | 1170515 1188866
63 | 1188866 1208087
64 | 1208087 1227432
65 | 1227432 1247845
66 | 1247845 1268335
67 | 1268335 1288410
68 | 1288410 1307366
69 | 1307366 1327810
70 | 1327810 1348064
71 | 1348064 1365013
72 | 1365013 1385158
73 | 1385158 1404742
74 | 1404742 1422744
75 | 1422744 1442906
76 | 1442906 1463026
77 | 1463026 1482445
78 | 1482445 1502931
79 | 1502931 1523642
80 | 1523642 1542506
81 | 1542506 1562128
82 | 1562128 1579686
83 | 1579686 1599788
84 | 1599788 1619282
85 | 1619282 1633864
86 | 1633864 1651034
87 | 1651034 1666928
88 | 1666928 1684863
89 | 1684863 1704155
90 | 1704155 1722938
91 | 1722938 1740731
92 | 1740731 1761093
93 | 1761093 1776673
94 | 1776673 1797260
95 | 1797260 1815826
96 | 1815826 1836010
97 | 1836010 1851699
98 | 1851699 1872250
99 | 1872250 1889599
100 | 1889599 1909564
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/17.tsv:
--------------------------------------------------------------------------------
1 | 0 20744
2 | 20744 36653
3 | 36653 51678
4 | 51678 71588
5 | 71588 91312
6 | 91312 111482
7 | 111482 130023
8 | 130023 150088
9 | 150088 169283
10 | 169283 189283
11 | 189283 207065
12 | 207065 227255
13 | 227255 247335
14 | 247335 266819
15 | 266819 286229
16 | 286229 305021
17 | 305021 324576
18 | 324576 344701
19 | 344701 363040
20 | 363040 381727
21 | 381727 401634
22 | 401634 418714
23 | 418714 439136
24 | 439136 459250
25 | 459250 479907
26 | 479907 499706
27 | 499706 519620
28 | 519620 535628
29 | 535628 555431
30 | 555431 573902
31 | 573902 591267
32 | 591267 609832
33 | 609832 629144
34 | 629144 648803
35 | 648803 668685
36 | 668685 688967
37 | 688967 707898
38 | 707898 725302
39 | 725302 745357
40 | 745357 766352
41 | 766352 786244
42 | 786244 805879
43 | 805879 825563
44 | 825563 843981
45 | 843981 861910
46 | 861910 880420
47 | 880420 899833
48 | 899833 918180
49 | 918180 936099
50 | 936099 956403
51 | 956403 976472
52 | 976472 996117
53 | 996117 1015019
54 | 1015019 1031683
55 | 1031683 1051300
56 | 1051300 1064739
57 | 1064739 1080688
58 | 1080688 1098529
59 | 1098529 1116482
60 | 1116482 1136364
61 | 1136364 1156191
62 | 1156191 1176234
63 | 1176234 1193832
64 | 1193832 1212773
65 | 1212773 1231200
66 | 1231200 1245061
67 | 1245061 1264751
68 | 1264751 1276065
69 | 1276065 1294753
70 | 1294753 1314353
71 | 1314353 1333837
72 | 1333837 1354021
73 | 1354021 1368991
74 | 1368991 1388543
75 | 1388543 1405885
76 | 1405885 1425862
77 | 1425862 1445961
78 | 1445961 1465236
79 | 1465236 1485088
80 | 1485088 1504670
81 | 1504670 1522593
82 | 1522593 1540419
83 | 1540419 1557158
84 | 1557158 1576344
85 | 1576344 1594557
86 | 1594557 1614673
87 | 1614673 1634523
88 | 1634523 1654641
89 | 1654641 1672511
90 | 1672511 1693444
91 | 1693444 1711272
92 | 1711272 1728607
93 | 1728607 1749425
94 | 1749425 1769513
95 | 1769513 1789709
96 | 1789709 1809075
97 | 1809075 1826031
98 | 1826031 1845888
99 | 1845888 1866021
100 | 1866021 1885979
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/18.tsv:
--------------------------------------------------------------------------------
1 | 0 20500
2 | 20500 40570
3 | 40570 58530
4 | 58530 76164
5 | 76164 95335
6 | 95335 109833
7 | 109833 128938
8 | 128938 145279
9 | 145279 165048
10 | 165048 185360
11 | 185360 203277
12 | 203277 221492
13 | 221492 239254
14 | 239254 259201
15 | 259201 279539
16 | 279539 300137
17 | 300137 319704
18 | 319704 338552
19 | 338552 359178
20 | 359178 378976
21 | 378976 399101
22 | 399101 418425
23 | 418425 436260
24 | 436260 456716
25 | 456716 471251
26 | 471251 489734
27 | 489734 509470
28 | 509470 525448
29 | 525448 545520
30 | 545520 566075
31 | 566075 586476
32 | 586476 606430
33 | 606430 625839
34 | 625839 645270
35 | 645270 663370
36 | 663370 683832
37 | 683832 704134
38 | 704134 721350
39 | 721350 742290
40 | 742290 762416
41 | 762416 783101
42 | 783101 801839
43 | 801839 822068
44 | 822068 841727
45 | 841727 861909
46 | 861909 881578
47 | 881578 901169
48 | 901169 921334
49 | 921334 939752
50 | 939752 959946
51 | 959946 975408
52 | 975408 993762
53 | 993762 1013061
54 | 1013061 1033662
55 | 1033662 1049007
56 | 1049007 1069248
57 | 1069248 1088312
58 | 1088312 1107997
59 | 1107997 1126494
60 | 1126494 1145405
61 | 1145405 1162927
62 | 1162927 1183548
63 | 1183548 1202668
64 | 1202668 1218405
65 | 1218405 1237146
66 | 1237146 1256006
67 | 1256006 1274064
68 | 1274064 1294552
69 | 1294552 1315086
70 | 1315086 1334910
71 | 1334910 1355105
72 | 1355105 1374733
73 | 1374733 1394985
74 | 1394985 1414824
75 | 1414824 1434025
76 | 1434025 1453133
77 | 1453133 1473006
78 | 1473006 1493393
79 | 1493393 1513756
80 | 1513756 1530974
81 | 1530974 1550821
82 | 1550821 1570325
83 | 1570325 1589455
84 | 1589455 1608261
85 | 1608261 1627731
86 | 1627731 1647905
87 | 1647905 1666408
88 | 1666408 1686777
89 | 1686777 1706972
90 | 1706972 1727572
91 | 1727572 1747162
92 | 1747162 1767397
93 | 1767397 1787518
94 | 1787518 1805917
95 | 1805917 1825454
96 | 1825454 1844399
97 | 1844399 1862518
98 | 1862518 1877878
99 | 1877878 1893381
100 | 1893381 1911625
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/19.tsv:
--------------------------------------------------------------------------------
1 | 0 18590
2 | 18590 38258
3 | 38258 58601
4 | 58601 78086
5 | 78086 98015
6 | 98015 116214
7 | 116214 133956
8 | 133956 154652
9 | 154652 174572
10 | 174572 192321
11 | 192321 209734
12 | 209734 229406
13 | 229406 244700
14 | 244700 264434
15 | 264434 283913
16 | 283913 304538
17 | 304538 320597
18 | 320597 340944
19 | 340944 359541
20 | 359541 377293
21 | 377293 396831
22 | 396831 417451
23 | 417451 435693
24 | 435693 455326
25 | 455326 474872
26 | 474872 495017
27 | 495017 514855
28 | 514855 533005
29 | 533005 552956
30 | 552956 571114
31 | 571114 589296
32 | 589296 607995
33 | 607995 628060
34 | 628060 648663
35 | 648663 668958
36 | 668958 687381
37 | 687381 706929
38 | 706929 726253
39 | 726253 745853
40 | 745853 763527
41 | 763527 782798
42 | 782798 799947
43 | 799947 819968
44 | 819968 837852
45 | 837852 858432
46 | 858432 878205
47 | 878205 898664
48 | 898664 915950
49 | 915950 935405
50 | 935405 955480
51 | 955480 975167
52 | 975167 993204
53 | 993204 1013511
54 | 1013511 1033196
55 | 1033196 1052300
56 | 1052300 1072719
57 | 1072719 1092729
58 | 1092729 1112402
59 | 1112402 1130646
60 | 1130646 1150175
61 | 1150175 1170228
62 | 1170228 1188199
63 | 1188199 1208141
64 | 1208141 1227010
65 | 1227010 1245338
66 | 1245338 1264639
67 | 1264639 1285061
68 | 1285061 1302169
69 | 1302169 1322115
70 | 1322115 1339794
71 | 1339794 1358129
72 | 1358129 1373533
73 | 1373533 1393240
74 | 1393240 1408777
75 | 1408777 1428887
76 | 1428887 1447362
77 | 1447362 1465741
78 | 1465741 1485197
79 | 1485197 1503885
80 | 1503885 1521994
81 | 1521994 1542464
82 | 1542464 1560491
83 | 1560491 1579713
84 | 1579713 1599147
85 | 1599147 1619193
86 | 1619193 1639442
87 | 1639442 1660506
88 | 1660506 1681273
89 | 1681273 1701107
90 | 1701107 1721003
91 | 1721003 1740989
92 | 1740989 1760636
93 | 1760636 1778404
94 | 1778404 1798290
95 | 1798290 1817921
96 | 1817921 1837832
97 | 1837832 1857760
98 | 1857760 1875117
99 | 1875117 1894736
100 | 1894736 1910192
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/2.tsv:
--------------------------------------------------------------------------------
1 | 0 18746
2 | 18746 34335
3 | 34335 52371
4 | 52371 72932
5 | 72932 84309
6 | 84309 104183
7 | 104183 123510
8 | 123510 139120
9 | 139120 157425
10 | 157425 178167
11 | 178167 196835
12 | 196835 215815
13 | 215815 236291
14 | 236291 255730
15 | 255730 276036
16 | 276036 295083
17 | 295083 309534
18 | 309534 327607
19 | 327607 346604
20 | 346604 366441
21 | 366441 386877
22 | 386877 404088
23 | 404088 422219
24 | 422219 442422
25 | 442422 462046
26 | 462046 481910
27 | 481910 502064
28 | 502064 519085
29 | 519085 538176
30 | 538176 556480
31 | 556480 572660
32 | 572660 588068
33 | 588068 607541
34 | 607541 627624
35 | 627624 648366
36 | 648366 668834
37 | 668834 689203
38 | 689203 709316
39 | 709316 728611
40 | 728611 748063
41 | 748063 768443
42 | 768443 788598
43 | 788598 805783
44 | 805783 826103
45 | 826103 846266
46 | 846266 865867
47 | 865867 886083
48 | 886083 905877
49 | 905877 925801
50 | 925801 943638
51 | 943638 963658
52 | 963658 982511
53 | 982511 998955
54 | 998955 1019487
55 | 1019487 1039418
56 | 1039418 1059503
57 | 1059503 1079256
58 | 1079256 1098724
59 | 1098724 1118532
60 | 1118532 1138759
61 | 1138759 1157959
62 | 1157959 1178102
63 | 1178102 1196261
64 | 1196261 1216790
65 | 1216790 1236739
66 | 1236739 1256742
67 | 1256742 1277650
68 | 1277650 1295603
69 | 1295603 1316010
70 | 1316010 1335973
71 | 1335973 1355514
72 | 1355514 1375727
73 | 1375727 1395385
74 | 1395385 1415710
75 | 1415710 1433008
76 | 1433008 1450983
77 | 1450983 1471355
78 | 1471355 1488903
79 | 1488903 1509265
80 | 1509265 1529222
81 | 1529222 1549171
82 | 1549171 1565174
83 | 1565174 1579213
84 | 1579213 1597678
85 | 1597678 1617577
86 | 1617577 1637757
87 | 1637757 1655853
88 | 1655853 1675614
89 | 1675614 1694904
90 | 1694904 1715000
91 | 1715000 1731062
92 | 1731062 1747057
93 | 1747057 1767015
94 | 1767015 1784172
95 | 1784172 1803428
96 | 1803428 1822339
97 | 1822339 1841392
98 | 1841392 1860719
99 | 1860719 1880850
100 | 1880850 1900239
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/20.tsv:
--------------------------------------------------------------------------------
1 | 0 20388
2 | 20388 40927
3 | 40927 58350
4 | 58350 76675
5 | 76675 97031
6 | 97031 112425
7 | 112425 132611
8 | 132611 152331
9 | 152331 172323
10 | 172323 189609
11 | 189609 207650
12 | 207650 227614
13 | 227614 244582
14 | 244582 264405
15 | 264405 283633
16 | 283633 303890
17 | 303890 322280
18 | 322280 337986
19 | 337986 355418
20 | 355418 374147
21 | 374147 393049
22 | 393049 412479
23 | 412479 432614
24 | 432614 448279
25 | 448279 467667
26 | 467667 485637
27 | 485637 506095
28 | 506095 526481
29 | 526481 542151
30 | 542151 561488
31 | 561488 578269
32 | 578269 598925
33 | 598925 619332
34 | 619332 637370
35 | 637370 656997
36 | 656997 674952
37 | 674952 694552
38 | 694552 713724
39 | 713724 733390
40 | 733390 753565
41 | 753565 773481
42 | 773481 794008
43 | 794008 814365
44 | 814365 830536
45 | 830536 850227
46 | 850227 870144
47 | 870144 887454
48 | 887454 908229
49 | 908229 928145
50 | 928145 947765
51 | 947765 967292
52 | 967292 986536
53 | 986536 1006520
54 | 1006520 1026253
55 | 1026253 1045890
56 | 1045890 1065621
57 | 1065621 1081205
58 | 1081205 1099102
59 | 1099102 1114808
60 | 1114808 1134165
61 | 1134165 1153763
62 | 1153763 1173643
63 | 1173643 1193448
64 | 1193448 1213681
65 | 1213681 1232111
66 | 1232111 1252285
67 | 1252285 1271921
68 | 1271921 1289750
69 | 1289750 1307226
70 | 1307226 1327094
71 | 1327094 1347045
72 | 1347045 1364235
73 | 1364235 1384582
74 | 1384582 1404801
75 | 1404801 1425815
76 | 1425815 1445269
77 | 1445269 1464920
78 | 1464920 1484899
79 | 1484899 1505502
80 | 1505502 1525739
81 | 1525739 1539379
82 | 1539379 1557674
83 | 1557674 1578291
84 | 1578291 1593749
85 | 1593749 1611220
86 | 1611220 1631269
87 | 1631269 1651897
88 | 1651897 1671978
89 | 1671978 1692448
90 | 1692448 1711337
91 | 1711337 1729961
92 | 1729961 1750010
93 | 1750010 1769962
94 | 1769962 1789846
95 | 1789846 1809981
96 | 1809981 1829416
97 | 1829416 1848119
98 | 1848119 1866350
99 | 1866350 1882764
100 | 1882764 1902856
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/21.tsv:
--------------------------------------------------------------------------------
1 | 0 15025
2 | 15025 34678
3 | 34678 54923
4 | 54923 73088
5 | 73088 93879
6 | 93879 112039
7 | 112039 125847
8 | 125847 145816
9 | 145816 160974
10 | 160974 181172
11 | 181172 201124
12 | 201124 217979
13 | 217979 237253
14 | 237253 251372
15 | 251372 269740
16 | 269740 289774
17 | 289774 309510
18 | 309510 328703
19 | 328703 347350
20 | 347350 366184
21 | 366184 386377
22 | 386377 402195
23 | 402195 420328
24 | 420328 439892
25 | 439892 458031
26 | 458031 477975
27 | 477975 498474
28 | 498474 518561
29 | 518561 536054
30 | 536054 556110
31 | 556110 576493
32 | 576493 596482
33 | 596482 616581
34 | 616581 636286
35 | 636286 654466
36 | 654466 675041
37 | 675041 695067
38 | 695067 714856
39 | 714856 733250
40 | 733250 753126
41 | 753126 773638
42 | 773638 791656
43 | 791656 810093
44 | 810093 827152
45 | 827152 845188
46 | 845188 865334
47 | 865334 885041
48 | 885041 905026
49 | 905026 924798
50 | 924798 944614
51 | 944614 961187
52 | 961187 981423
53 | 981423 999390
54 | 999390 1019322
55 | 1019322 1038532
56 | 1038532 1057085
57 | 1057085 1076615
58 | 1076615 1092857
59 | 1092857 1112618
60 | 1112618 1132767
61 | 1132767 1153181
62 | 1153181 1172656
63 | 1172656 1188773
64 | 1188773 1204568
65 | 1204568 1224107
66 | 1224107 1244680
67 | 1244680 1264335
68 | 1264335 1281145
69 | 1281145 1300885
70 | 1300885 1321020
71 | 1321020 1338966
72 | 1338966 1352449
73 | 1352449 1372886
74 | 1372886 1393191
75 | 1393191 1413241
76 | 1413241 1433247
77 | 1433247 1451233
78 | 1451233 1470490
79 | 1470490 1490737
80 | 1490737 1510981
81 | 1510981 1529786
82 | 1529786 1546043
83 | 1546043 1565926
84 | 1565926 1585789
85 | 1585789 1603807
86 | 1603807 1619324
87 | 1619324 1640179
88 | 1640179 1658900
89 | 1658900 1678691
90 | 1678691 1699247
91 | 1699247 1716107
92 | 1716107 1737045
93 | 1737045 1757318
94 | 1757318 1776087
95 | 1776087 1795886
96 | 1795886 1815924
97 | 1815924 1836155
98 | 1836155 1856406
99 | 1856406 1875846
100 | 1875846 1894297
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/22.tsv:
--------------------------------------------------------------------------------
1 | 0 20462
2 | 20462 39466
3 | 39466 59799
4 | 59799 78618
5 | 78618 98590
6 | 98590 118748
7 | 118748 135664
8 | 135664 155341
9 | 155341 175027
10 | 175027 194095
11 | 194095 214114
12 | 214114 234623
13 | 234623 254909
14 | 254909 274587
15 | 274587 292342
16 | 292342 310385
17 | 310385 329738
18 | 329738 348685
19 | 348685 369260
20 | 369260 389110
21 | 389110 409677
22 | 409677 430170
23 | 430170 445767
24 | 445767 465159
25 | 465159 483074
26 | 483074 503762
27 | 503762 522724
28 | 522724 526951
29 | 526951 546118
30 | 546118 564611
31 | 564611 584237
32 | 584237 603005
33 | 603005 621899
34 | 621899 642476
35 | 642476 662846
36 | 662846 680684
37 | 680684 700580
38 | 700580 720579
39 | 720579 740580
40 | 740580 760591
41 | 760591 781382
42 | 781382 802166
43 | 802166 821968
44 | 821968 841732
45 | 841732 861541
46 | 861541 881685
47 | 881685 899784
48 | 899784 919865
49 | 919865 937921
50 | 937921 957078
51 | 957078 977828
52 | 977828 997523
53 | 997523 1017949
54 | 1017949 1037406
55 | 1037406 1055837
56 | 1055837 1074827
57 | 1074827 1095104
58 | 1095104 1112716
59 | 1112716 1132832
60 | 1132832 1148164
61 | 1148164 1168338
62 | 1168338 1189009
63 | 1189009 1204695
64 | 1204695 1224709
65 | 1224709 1244136
66 | 1244136 1264078
67 | 1264078 1284764
68 | 1284764 1304557
69 | 1304557 1322364
70 | 1322364 1341337
71 | 1341337 1361775
72 | 1361775 1381090
73 | 1381090 1399273
74 | 1399273 1419639
75 | 1419639 1439766
76 | 1439766 1459382
77 | 1459382 1473862
78 | 1473862 1494219
79 | 1494219 1513845
80 | 1513845 1530947
81 | 1530947 1550972
82 | 1550972 1569996
83 | 1569996 1589305
84 | 1589305 1607459
85 | 1607459 1621106
86 | 1621106 1640356
87 | 1640356 1660012
88 | 1660012 1680278
89 | 1680278 1699701
90 | 1699701 1718449
91 | 1718449 1738351
92 | 1738351 1756472
93 | 1756472 1776229
94 | 1776229 1796865
95 | 1796865 1815558
96 | 1815558 1835435
97 | 1835435 1855747
98 | 1855747 1872123
99 | 1872123 1891747
100 | 1891747 1912478
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/23.tsv:
--------------------------------------------------------------------------------
1 | 0 14616
2 | 14616 30487
3 | 30487 49855
4 | 49855 69572
5 | 69572 90236
6 | 90236 109573
7 | 109573 129452
8 | 129452 150165
9 | 150165 170114
10 | 170114 190547
11 | 190547 210458
12 | 210458 230327
13 | 230327 245330
14 | 245330 264821
15 | 264821 285450
16 | 285450 305049
17 | 305049 323667
18 | 323667 343085
19 | 343085 361230
20 | 361230 378775
21 | 378775 398775
22 | 398775 416331
23 | 416331 436161
24 | 436161 456628
25 | 456628 477111
26 | 477111 497438
27 | 497438 517762
28 | 517762 536905
29 | 536905 557252
30 | 557252 576792
31 | 576792 594642
32 | 594642 615089
33 | 615089 635248
34 | 635248 655310
35 | 655310 670782
36 | 670782 690905
37 | 690905 709305
38 | 709305 727793
39 | 727793 746487
40 | 746487 764563
41 | 764563 783994
42 | 783994 803288
43 | 803288 822745
44 | 822745 840521
45 | 840521 861047
46 | 861047 880203
47 | 880203 899962
48 | 899962 919572
49 | 919572 937658
50 | 937658 953835
51 | 953835 973603
52 | 973603 994183
53 | 994183 1013864
54 | 1013864 1034707
55 | 1034707 1055119
56 | 1055119 1075325
57 | 1075325 1094507
58 | 1094507 1113952
59 | 1113952 1134329
60 | 1134329 1154304
61 | 1154304 1171783
62 | 1171783 1191714
63 | 1191714 1210414
64 | 1210414 1231205
65 | 1231205 1249025
66 | 1249025 1269821
67 | 1269821 1289836
68 | 1289836 1304937
69 | 1304937 1323629
70 | 1323629 1342860
71 | 1342860 1362331
72 | 1362331 1382291
73 | 1382291 1401866
74 | 1401866 1420102
75 | 1420102 1439892
76 | 1439892 1459810
77 | 1459810 1480358
78 | 1480358 1500371
79 | 1500371 1519349
80 | 1519349 1539401
81 | 1539401 1555040
82 | 1555040 1574209
83 | 1574209 1594225
84 | 1594225 1611272
85 | 1611272 1628442
86 | 1628442 1648389
87 | 1648389 1669045
88 | 1669045 1687888
89 | 1687888 1705404
90 | 1705404 1723511
91 | 1723511 1743564
92 | 1743564 1763797
93 | 1763797 1782785
94 | 1782785 1801282
95 | 1801282 1820982
96 | 1820982 1840591
97 | 1840591 1857639
98 | 1857639 1877164
99 | 1877164 1897152
100 | 1897152 1915129
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/24.tsv:
--------------------------------------------------------------------------------
1 | 0 17930
2 | 17930 35716
3 | 35716 56326
4 | 56326 75352
5 | 75352 95486
6 | 95486 110649
7 | 110649 130196
8 | 130196 149150
9 | 149150 164584
10 | 164584 183470
11 | 183470 201619
12 | 201619 222197
13 | 222197 241773
14 | 241773 261569
15 | 261569 279223
16 | 279223 299519
17 | 299519 319551
18 | 319551 338679
19 | 338679 358311
20 | 358311 378132
21 | 378132 397588
22 | 397588 415094
23 | 415094 433479
24 | 433479 451588
25 | 451588 471050
26 | 471050 491434
27 | 491434 511931
28 | 511931 531505
29 | 531505 551943
30 | 551943 566963
31 | 566963 584415
32 | 584415 599784
33 | 599784 618606
34 | 618606 638391
35 | 638391 657943
36 | 657943 676676
37 | 676676 695876
38 | 695876 715863
39 | 715863 731614
40 | 731614 749852
41 | 749852 769228
42 | 769228 786952
43 | 786952 806443
44 | 806443 822220
45 | 822220 842492
46 | 842492 862206
47 | 862206 882344
48 | 882344 902338
49 | 902338 917957
50 | 917957 937018
51 | 937018 956155
52 | 956155 975439
53 | 975439 994412
54 | 994412 1012845
55 | 1012845 1030333
56 | 1030333 1048581
57 | 1048581 1068236
58 | 1068236 1087652
59 | 1087652 1107504
60 | 1107504 1123900
61 | 1123900 1141764
62 | 1141764 1161956
63 | 1161956 1176730
64 | 1176730 1196520
65 | 1196520 1214995
66 | 1214995 1234122
67 | 1234122 1253715
68 | 1253715 1274109
69 | 1274109 1288735
70 | 1288735 1305838
71 | 1305838 1326049
72 | 1326049 1346532
73 | 1346532 1366608
74 | 1366608 1386939
75 | 1386939 1405926
76 | 1405926 1425669
77 | 1425669 1443577
78 | 1443577 1460273
79 | 1460273 1478940
80 | 1478940 1498503
81 | 1498503 1518071
82 | 1518071 1537418
83 | 1537418 1557680
84 | 1557680 1576004
85 | 1576004 1596177
86 | 1596177 1615280
87 | 1615280 1633830
88 | 1633830 1653365
89 | 1653365 1674078
90 | 1674078 1693843
91 | 1693843 1713687
92 | 1713687 1732437
93 | 1732437 1751853
94 | 1751853 1771785
95 | 1771785 1791937
96 | 1791937 1811648
97 | 1811648 1827072
98 | 1827072 1841075
99 | 1841075 1859096
100 | 1859096 1879114
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/25.tsv:
--------------------------------------------------------------------------------
1 | 0 18020
2 | 18020 36464
3 | 36464 56338
4 | 56338 75400
5 | 75400 94988
6 | 94988 114749
7 | 114749 133522
8 | 133522 153640
9 | 153640 171741
10 | 171741 192023
11 | 192023 211443
12 | 211443 231060
13 | 231060 251140
14 | 251140 269635
15 | 269635 289806
16 | 289806 309633
17 | 309633 329435
18 | 329435 348299
19 | 348299 368499
20 | 368499 387120
21 | 387120 407762
22 | 407762 425084
23 | 425084 442719
24 | 442719 461700
25 | 461700 480713
26 | 480713 496287
27 | 496287 516555
28 | 516555 535804
29 | 535804 547967
30 | 547967 567972
31 | 567972 587437
32 | 587437 602115
33 | 602115 621994
34 | 621994 641864
35 | 641864 661018
36 | 661018 680724
37 | 680724 700369
38 | 700369 720033
39 | 720033 738908
40 | 738908 757450
41 | 757450 772874
42 | 772874 792824
43 | 792824 812514
44 | 812514 831370
45 | 831370 846835
46 | 846835 862491
47 | 862491 882219
48 | 882219 902926
49 | 902926 919565
50 | 919565 939223
51 | 939223 954693
52 | 954693 972494
53 | 972494 989417
54 | 989417 1009133
55 | 1009133 1029330
56 | 1029330 1049775
57 | 1049775 1067346
58 | 1067346 1086907
59 | 1086907 1106449
60 | 1106449 1126778
61 | 1126778 1146893
62 | 1146893 1166497
63 | 1166497 1180800
64 | 1180800 1200264
65 | 1200264 1214230
66 | 1214230 1234341
67 | 1234341 1254327
68 | 1254327 1269488
69 | 1269488 1289925
70 | 1289925 1309930
71 | 1309930 1330127
72 | 1330127 1348456
73 | 1348456 1366724
74 | 1366724 1386621
75 | 1386621 1404795
76 | 1404795 1424750
77 | 1424750 1441711
78 | 1441711 1461201
79 | 1461201 1481532
80 | 1481532 1502255
81 | 1502255 1520616
82 | 1520616 1540827
83 | 1540827 1560802
84 | 1560802 1580740
85 | 1580740 1599987
86 | 1599987 1615136
87 | 1615136 1630299
88 | 1630299 1650616
89 | 1650616 1670619
90 | 1670619 1689853
91 | 1689853 1707608
92 | 1707608 1727504
93 | 1727504 1741775
94 | 1741775 1760535
95 | 1760535 1779470
96 | 1779470 1799828
97 | 1799828 1819938
98 | 1819938 1838304
99 | 1838304 1856600
100 | 1856600 1877473
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/26.tsv:
--------------------------------------------------------------------------------
1 | 0 19837
2 | 19837 39673
3 | 39673 59545
4 | 59545 79441
5 | 79441 97366
6 | 97366 114427
7 | 114427 134177
8 | 134177 153048
9 | 153048 172965
10 | 172965 192624
11 | 192624 212880
12 | 212880 232975
13 | 232975 251610
14 | 251610 271894
15 | 271894 291888
16 | 291888 309931
17 | 309931 330072
18 | 330072 348408
19 | 348408 369125
20 | 369125 388694
21 | 388694 406185
22 | 406185 425530
23 | 425530 445339
24 | 445339 463912
25 | 463912 483961
26 | 483961 503485
27 | 503485 523605
28 | 523605 543495
29 | 543495 563756
30 | 563756 582714
31 | 582714 602565
32 | 602565 620734
33 | 620734 640062
34 | 640062 657889
35 | 657889 673643
36 | 673643 691866
37 | 691866 711405
38 | 711405 731115
39 | 731115 751063
40 | 751063 770533
41 | 770533 791075
42 | 791075 810445
43 | 810445 828617
44 | 828617 848864
45 | 848864 868718
46 | 868718 888632
47 | 888632 908469
48 | 908469 929276
49 | 929276 949831
50 | 949831 968785
51 | 968785 988226
52 | 988226 1005119
53 | 1005119 1025489
54 | 1025489 1045670
55 | 1045670 1062752
56 | 1062752 1081705
57 | 1081705 1101257
58 | 1101257 1121112
59 | 1121112 1141266
60 | 1141266 1160979
61 | 1160979 1181094
62 | 1181094 1200679
63 | 1200679 1221577
64 | 1221577 1241138
65 | 1241138 1260844
66 | 1260844 1280727
67 | 1280727 1300335
68 | 1300335 1320027
69 | 1320027 1338050
70 | 1338050 1358811
71 | 1358811 1378892
72 | 1378892 1398774
73 | 1398774 1419069
74 | 1419069 1439395
75 | 1439395 1457295
76 | 1457295 1477686
77 | 1477686 1498322
78 | 1498322 1518188
79 | 1518188 1537935
80 | 1537935 1555734
81 | 1555734 1576280
82 | 1576280 1591402
83 | 1591402 1610958
84 | 1610958 1630617
85 | 1630617 1645503
86 | 1645503 1665451
87 | 1665451 1685056
88 | 1685056 1704859
89 | 1704859 1724790
90 | 1724790 1740197
91 | 1740197 1759649
92 | 1759649 1777134
93 | 1777134 1797467
94 | 1797467 1817133
95 | 1817133 1836756
96 | 1836756 1857542
97 | 1857542 1877216
98 | 1877216 1896417
99 | 1896417 1916474
100 | 1916474 1931203
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/27.tsv:
--------------------------------------------------------------------------------
1 | 0 17409
2 | 17409 33431
3 | 33431 53139
4 | 53139 73853
5 | 73853 88386
6 | 88386 101754
7 | 101754 121807
8 | 121807 137366
9 | 137366 157532
10 | 157532 176827
11 | 176827 192972
12 | 192972 213310
13 | 213310 233324
14 | 233324 249099
15 | 249099 267398
16 | 267398 287218
17 | 287218 307186
18 | 307186 327608
19 | 327608 348212
20 | 348212 367666
21 | 367666 383822
22 | 383822 401737
23 | 401737 417257
24 | 417257 434570
25 | 434570 449994
26 | 449994 466056
27 | 466056 486185
28 | 486185 505729
29 | 505729 526173
30 | 526173 541701
31 | 541701 561289
32 | 561289 581186
33 | 581186 602065
34 | 602065 620402
35 | 620402 640150
36 | 640150 660182
37 | 660182 680058
38 | 680058 700624
39 | 700624 719046
40 | 719046 739252
41 | 739252 758667
42 | 758667 778310
43 | 778310 797773
44 | 797773 815638
45 | 815638 832564
46 | 832564 850646
47 | 850646 868178
48 | 868178 885575
49 | 885575 905656
50 | 905656 925470
51 | 925470 944713
52 | 944713 964765
53 | 964765 983787
54 | 983787 1003301
55 | 1003301 1021319
56 | 1021319 1040353
57 | 1040353 1060271
58 | 1060271 1078603
59 | 1078603 1098622
60 | 1098622 1117371
61 | 1117371 1135729
62 | 1135729 1154935
63 | 1154935 1170574
64 | 1170574 1189030
65 | 1189030 1209345
66 | 1209345 1229509
67 | 1229509 1244380
68 | 1244380 1260662
69 | 1260662 1279025
70 | 1279025 1298944
71 | 1298944 1318705
72 | 1318705 1339217
73 | 1339217 1359520
74 | 1359520 1379325
75 | 1379325 1399608
76 | 1399608 1420199
77 | 1420199 1438991
78 | 1438991 1456979
79 | 1456979 1475046
80 | 1475046 1495148
81 | 1495148 1515323
82 | 1515323 1536245
83 | 1536245 1550643
84 | 1550643 1571295
85 | 1571295 1590479
86 | 1590479 1608176
87 | 1608176 1629025
88 | 1629025 1648834
89 | 1648834 1669111
90 | 1669111 1688921
91 | 1688921 1709055
92 | 1709055 1727225
93 | 1727225 1746817
94 | 1746817 1765713
95 | 1765713 1783207
96 | 1783207 1802269
97 | 1802269 1822069
98 | 1822069 1842268
99 | 1842268 1863341
100 | 1863341 1878568
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/28.tsv:
--------------------------------------------------------------------------------
1 | 0 19609
2 | 19609 35238
3 | 35238 54720
4 | 54720 71094
5 | 71094 90601
6 | 90601 109072
7 | 109072 127287
8 | 127287 146784
9 | 146784 166725
10 | 166725 185195
11 | 185195 204336
12 | 204336 222097
13 | 222097 241734
14 | 241734 257191
15 | 257191 275220
16 | 275220 295848
17 | 295848 316499
18 | 316499 336713
19 | 336713 357164
20 | 357164 372979
21 | 372979 392967
22 | 392967 413157
23 | 413157 430343
24 | 430343 448624
25 | 448624 468500
26 | 468500 488897
27 | 488897 509091
28 | 509091 525846
29 | 525846 545656
30 | 545656 565074
31 | 565074 585093
32 | 585093 604786
33 | 604786 624005
34 | 624005 644258
35 | 644258 664630
36 | 664630 685297
37 | 685297 705694
38 | 705694 724855
39 | 724855 737192
40 | 737192 757673
41 | 757673 775742
42 | 775742 795511
43 | 795511 815454
44 | 815454 830066
45 | 830066 847840
46 | 847840 867883
47 | 867883 888155
48 | 888155 908036
49 | 908036 924849
50 | 924849 943777
51 | 943777 963648
52 | 963648 983937
53 | 983937 1003198
54 | 1003198 1022414
55 | 1022414 1042131
56 | 1042131 1061424
57 | 1061424 1082190
58 | 1082190 1102403
59 | 1102403 1122474
60 | 1122474 1141684
61 | 1141684 1161342
62 | 1161342 1181110
63 | 1181110 1196130
64 | 1196130 1216056
65 | 1216056 1236226
66 | 1236226 1255870
67 | 1255870 1271523
68 | 1271523 1291356
69 | 1291356 1311029
70 | 1311029 1331337
71 | 1331337 1350992
72 | 1350992 1370835
73 | 1370835 1391487
74 | 1391487 1411861
75 | 1411861 1431051
76 | 1431051 1451043
77 | 1451043 1471802
78 | 1471802 1489979
79 | 1489979 1509546
80 | 1509546 1529930
81 | 1529930 1549716
82 | 1549716 1570099
83 | 1570099 1588095
84 | 1588095 1608042
85 | 1608042 1628440
86 | 1628440 1648599
87 | 1648599 1664020
88 | 1664020 1684268
89 | 1684268 1703685
90 | 1703685 1724174
91 | 1724174 1743652
92 | 1743652 1761341
93 | 1761341 1780758
94 | 1780758 1800864
95 | 1800864 1821112
96 | 1821112 1839567
97 | 1839567 1859413
98 | 1859413 1879182
99 | 1879182 1899541
100 | 1899541 1919497
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/29.tsv:
--------------------------------------------------------------------------------
1 | 0 20534
2 | 20534 40352
3 | 40352 60455
4 | 60455 78685
5 | 78685 99175
6 | 99175 119301
7 | 119301 139111
8 | 139111 159633
9 | 159633 178287
10 | 178287 193165
11 | 193165 213187
12 | 213187 230425
13 | 230425 247688
14 | 247688 267654
15 | 267654 287394
16 | 287394 304965
17 | 304965 324830
18 | 324830 343839
19 | 343839 363945
20 | 363945 383786
21 | 383786 402480
22 | 402480 423161
23 | 423161 442758
24 | 442758 458444
25 | 458444 478694
26 | 478694 498564
27 | 498564 519481
28 | 519481 539138
29 | 539138 559556
30 | 559556 577271
31 | 577271 595503
32 | 595503 615259
33 | 615259 635187
34 | 635187 654725
35 | 654725 674227
36 | 674227 692575
37 | 692575 710575
38 | 710575 730656
39 | 730656 748136
40 | 748136 768274
41 | 768274 788893
42 | 788893 807230
43 | 807230 823021
44 | 823021 842744
45 | 842744 860262
46 | 860262 879057
47 | 879057 894263
48 | 894263 911324
49 | 911324 931174
50 | 931174 949440
51 | 949440 969577
52 | 969577 989864
53 | 989864 1008063
54 | 1008063 1025645
55 | 1025645 1046107
56 | 1046107 1061862
57 | 1061862 1080060
58 | 1080060 1100025
59 | 1100025 1120260
60 | 1120260 1138433
61 | 1138433 1156443
62 | 1156443 1176375
63 | 1176375 1196012
64 | 1196012 1216413
65 | 1216413 1234845
66 | 1234845 1255220
67 | 1255220 1275963
68 | 1275963 1295453
69 | 1295453 1316135
70 | 1316135 1336383
71 | 1336383 1354615
72 | 1354615 1374848
73 | 1374848 1394708
74 | 1394708 1414326
75 | 1414326 1434688
76 | 1434688 1451771
77 | 1451771 1471456
78 | 1471456 1488265
79 | 1488265 1508161
80 | 1508161 1528392
81 | 1528392 1546270
82 | 1546270 1566290
83 | 1566290 1585239
84 | 1585239 1605279
85 | 1605279 1625403
86 | 1625403 1645659
87 | 1645659 1663864
88 | 1663864 1683389
89 | 1683389 1703043
90 | 1703043 1723215
91 | 1723215 1737153
92 | 1737153 1757310
93 | 1757310 1774937
94 | 1774937 1795635
95 | 1795635 1816461
96 | 1816461 1834770
97 | 1834770 1852896
98 | 1852896 1872896
99 | 1872896 1892529
100 | 1892529 1910680
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/3.tsv:
--------------------------------------------------------------------------------
1 | 0 18687
2 | 18687 36837
3 | 36837 55662
4 | 55662 75306
5 | 75306 95642
6 | 95642 115597
7 | 115597 130823
8 | 130823 149105
9 | 149105 169826
10 | 169826 185787
11 | 185787 206429
12 | 206429 225188
13 | 225188 244726
14 | 244726 264937
15 | 264937 283874
16 | 283874 299921
17 | 299921 318266
18 | 318266 338729
19 | 338729 358824
20 | 358824 378693
21 | 378693 396565
22 | 396565 415373
23 | 415373 435739
24 | 435739 454031
25 | 454031 474017
26 | 474017 493842
27 | 493842 508877
28 | 508877 528496
29 | 528496 548708
30 | 548708 566891
31 | 566891 585444
32 | 585444 604829
33 | 604829 624783
34 | 624783 645343
35 | 645343 665541
36 | 665541 685934
37 | 685934 706469
38 | 706469 725882
39 | 725882 746118
40 | 746118 759917
41 | 759917 780052
42 | 780052 800348
43 | 800348 820787
44 | 820787 841444
45 | 841444 861734
46 | 861734 882137
47 | 882137 902481
48 | 902481 921650
49 | 921650 939742
50 | 939742 958798
51 | 958798 979669
52 | 979669 999421
53 | 999421 1015168
54 | 1015168 1034938
55 | 1034938 1055466
56 | 1055466 1074992
57 | 1074992 1095092
58 | 1095092 1114978
59 | 1114978 1134724
60 | 1134724 1155382
61 | 1155382 1175930
62 | 1175930 1191281
63 | 1191281 1211115
64 | 1211115 1231140
65 | 1231140 1250692
66 | 1250692 1267700
67 | 1267700 1288145
68 | 1288145 1308075
69 | 1308075 1326536
70 | 1326536 1346684
71 | 1346684 1365047
72 | 1365047 1383448
73 | 1383448 1398867
74 | 1398867 1418921
75 | 1418921 1437553
76 | 1437553 1457732
77 | 1457732 1477924
78 | 1477924 1497789
79 | 1497789 1512926
80 | 1512926 1532441
81 | 1532441 1551179
82 | 1551179 1570859
83 | 1570859 1590962
84 | 1590962 1610294
85 | 1610294 1630167
86 | 1630167 1648490
87 | 1648490 1667885
88 | 1667885 1685138
89 | 1685138 1703861
90 | 1703861 1724328
91 | 1724328 1743774
92 | 1743774 1763625
93 | 1763625 1781088
94 | 1781088 1799688
95 | 1799688 1819198
96 | 1819198 1838927
97 | 1838927 1858339
98 | 1858339 1877793
99 | 1877793 1897422
100 | 1897422 1916250
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/30.tsv:
--------------------------------------------------------------------------------
1 | 0 19552
2 | 19552 39020
3 | 39020 57428
4 | 57428 73185
5 | 73185 93504
6 | 93504 114304
7 | 114304 134209
8 | 134209 154518
9 | 154518 170245
10 | 170245 190544
11 | 190544 208965
12 | 208965 229187
13 | 229187 249062
14 | 249062 267176
15 | 267176 287402
16 | 287402 307407
17 | 307407 326750
18 | 326750 346779
19 | 346779 363646
20 | 363646 382999
21 | 382999 397240
22 | 397240 415532
23 | 415532 435967
24 | 435967 455692
25 | 455692 475851
26 | 475851 495720
27 | 495720 513430
28 | 513430 533034
29 | 533034 549763
30 | 549763 569054
31 | 569054 588399
32 | 588399 608498
33 | 608498 628128
34 | 628128 645066
35 | 645066 665553
36 | 665553 683801
37 | 683801 701913
38 | 701913 719611
39 | 719611 738085
40 | 738085 758063
41 | 758063 776010
42 | 776010 796341
43 | 796341 815233
44 | 815233 835525
45 | 835525 855163
46 | 855163 876086
47 | 876086 896007
48 | 896007 915345
49 | 915345 930662
50 | 930662 946512
51 | 946512 965903
52 | 965903 985662
53 | 985662 1004876
54 | 1004876 1025354
55 | 1025354 1045965
56 | 1045965 1063931
57 | 1063931 1084219
58 | 1084219 1104308
59 | 1104308 1123783
60 | 1123783 1144247
61 | 1144247 1164047
62 | 1164047 1183234
63 | 1183234 1203360
64 | 1203360 1223459
65 | 1223459 1243437
66 | 1243437 1263937
67 | 1263937 1283856
68 | 1283856 1303657
69 | 1303657 1319047
70 | 1319047 1337300
71 | 1337300 1357550
72 | 1357550 1377048
73 | 1377048 1397104
74 | 1397104 1416878
75 | 1416878 1437239
76 | 1437239 1451685
77 | 1451685 1467511
78 | 1467511 1486730
79 | 1486730 1503691
80 | 1503691 1523109
81 | 1523109 1543012
82 | 1543012 1562462
83 | 1562462 1582675
84 | 1582675 1600649
85 | 1600649 1619677
86 | 1619677 1635543
87 | 1635543 1654840
88 | 1654840 1675727
89 | 1675727 1695538
90 | 1695538 1710259
91 | 1710259 1725256
92 | 1725256 1745100
93 | 1745100 1765337
94 | 1765337 1783568
95 | 1783568 1803224
96 | 1803224 1822774
97 | 1822774 1843613
98 | 1843613 1863378
99 | 1863378 1880178
100 | 1880178 1900355
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/31.tsv:
--------------------------------------------------------------------------------
1 | 0 19493
2 | 19493 39997
3 | 39997 58851
4 | 58851 78900
5 | 78900 98984
6 | 98984 119130
7 | 119130 139516
8 | 139516 158322
9 | 158322 176072
10 | 176072 191973
11 | 191973 211714
12 | 211714 231271
13 | 231271 249939
14 | 249939 270067
15 | 270067 290263
16 | 290263 310263
17 | 310263 329925
18 | 329925 349755
19 | 349755 369173
20 | 369173 387337
21 | 387337 406821
22 | 406821 425964
23 | 425964 446039
24 | 446039 466705
25 | 466705 487418
26 | 487418 506026
27 | 506026 525459
28 | 525459 544375
29 | 544375 561885
30 | 561885 579741
31 | 579741 599996
32 | 599996 616637
33 | 616637 636617
34 | 636617 656864
35 | 656864 677248
36 | 677248 697577
37 | 697577 716069
38 | 716069 735747
39 | 735747 755266
40 | 755266 773746
41 | 773746 793594
42 | 793594 813086
43 | 813086 828646
44 | 828646 849135
45 | 849135 866935
46 | 866935 883232
47 | 883232 903167
48 | 903167 924078
49 | 924078 941901
50 | 941901 961935
51 | 961935 982299
52 | 982299 1002548
53 | 1002548 1021855
54 | 1021855 1040073
55 | 1040073 1056317
56 | 1056317 1075587
57 | 1075587 1093809
58 | 1093809 1113817
59 | 1113817 1132122
60 | 1132122 1150395
61 | 1150395 1168169
62 | 1168169 1188752
63 | 1188752 1206954
64 | 1206954 1227000
65 | 1227000 1247109
66 | 1247109 1266746
67 | 1266746 1282293
68 | 1282293 1297876
69 | 1297876 1317586
70 | 1317586 1337281
71 | 1337281 1355643
72 | 1355643 1376188
73 | 1376188 1392968
74 | 1392968 1412800
75 | 1412800 1433160
76 | 1433160 1453405
77 | 1453405 1473036
78 | 1473036 1493161
79 | 1493161 1508702
80 | 1508702 1528808
81 | 1528808 1549201
82 | 1549201 1568880
83 | 1568880 1588708
84 | 1588708 1608423
85 | 1608423 1628463
86 | 1628463 1648928
87 | 1648928 1668224
88 | 1668224 1684930
89 | 1684930 1705396
90 | 1705396 1726065
91 | 1726065 1744568
92 | 1744568 1764941
93 | 1764941 1785214
94 | 1785214 1799346
95 | 1799346 1819649
96 | 1819649 1837438
97 | 1837438 1856634
98 | 1856634 1876662
99 | 1876662 1896489
100 | 1896489 1917141
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/32.tsv:
--------------------------------------------------------------------------------
1 | 0 19263
2 | 19263 39441
3 | 39441 58400
4 | 58400 78411
5 | 78411 98326
6 | 98326 117957
7 | 117957 137765
8 | 137765 156734
9 | 156734 175550
10 | 175550 191445
11 | 191445 211290
12 | 211290 231558
13 | 231558 250154
14 | 250154 270259
15 | 270259 290350
16 | 290350 308274
17 | 308274 323847
18 | 323847 342641
19 | 342641 363181
20 | 363181 383366
21 | 383366 403127
22 | 403127 423611
23 | 423611 437392
24 | 437392 456514
25 | 456514 476341
26 | 476341 496915
27 | 496915 517540
28 | 517540 534405
29 | 534405 550517
30 | 550517 570100
31 | 570100 589085
32 | 589085 606748
33 | 606748 626900
34 | 626900 646859
35 | 646859 667114
36 | 667114 687163
37 | 687163 703148
38 | 703148 722699
39 | 722699 742878
40 | 742878 761000
41 | 761000 778198
42 | 778198 798429
43 | 798429 818698
44 | 818698 839105
45 | 839105 858160
46 | 858160 877772
47 | 877772 897499
48 | 897499 917967
49 | 917967 937644
50 | 937644 957408
51 | 957408 977425
52 | 977425 996010
53 | 996010 1016141
54 | 1016141 1036052
55 | 1036052 1053996
56 | 1053996 1074118
57 | 1074118 1089725
58 | 1089725 1104891
59 | 1104891 1120769
60 | 1120769 1139231
61 | 1139231 1159525
62 | 1159525 1179111
63 | 1179111 1198142
64 | 1198142 1212097
65 | 1212097 1231642
66 | 1231642 1251785
67 | 1251785 1270730
68 | 1270730 1290317
69 | 1290317 1310359
70 | 1310359 1330291
71 | 1330291 1343564
72 | 1343564 1363167
73 | 1363167 1382783
74 | 1382783 1402875
75 | 1402875 1423136
76 | 1423136 1442096
77 | 1442096 1461881
78 | 1461881 1480985
79 | 1480985 1499733
80 | 1499733 1519809
81 | 1519809 1535516
82 | 1535516 1553043
83 | 1553043 1572791
84 | 1572791 1592302
85 | 1592302 1612178
86 | 1612178 1631185
87 | 1631185 1650897
88 | 1650897 1669988
89 | 1669988 1687398
90 | 1687398 1707149
91 | 1707149 1727017
92 | 1727017 1747254
93 | 1747254 1765163
94 | 1765163 1785267
95 | 1785267 1802176
96 | 1802176 1822864
97 | 1822864 1842967
98 | 1842967 1860550
99 | 1860550 1880774
100 | 1880774 1901065
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/33.tsv:
--------------------------------------------------------------------------------
1 | 0 20542
2 | 20542 39892
3 | 39892 57760
4 | 57760 78068
5 | 78068 97200
6 | 97200 114561
7 | 114561 134280
8 | 134280 154295
9 | 154295 175119
10 | 175119 195859
11 | 195859 215644
12 | 215644 235269
13 | 235269 254691
14 | 254691 274378
15 | 274378 293446
16 | 293446 313657
17 | 313657 333403
18 | 333403 353528
19 | 353528 372035
20 | 372035 392556
21 | 392556 410176
22 | 410176 426184
23 | 426184 445361
24 | 445361 465499
25 | 465499 485055
26 | 485055 502720
27 | 502720 517898
28 | 517898 535687
29 | 535687 555898
30 | 555898 575870
31 | 575870 595647
32 | 595647 615425
33 | 615425 635815
34 | 635815 652044
35 | 652044 671870
36 | 671870 691462
37 | 691462 711651
38 | 711651 731382
39 | 731382 751288
40 | 751288 771610
41 | 771610 792213
42 | 792213 812421
43 | 812421 832271
44 | 832271 850331
45 | 850331 867793
46 | 867793 888109
47 | 888109 905103
48 | 905103 920768
49 | 920768 938673
50 | 938673 958548
51 | 958548 978231
52 | 978231 998230
53 | 998230 1018120
54 | 1018120 1037333
55 | 1037333 1057555
56 | 1057555 1077115
57 | 1077115 1096620
58 | 1096620 1114908
59 | 1114908 1130218
60 | 1130218 1150174
61 | 1150174 1165410
62 | 1165410 1184880
63 | 1184880 1202689
64 | 1202689 1223185
65 | 1223185 1243506
66 | 1243506 1263202
67 | 1263202 1283073
68 | 1283073 1298691
69 | 1298691 1319315
70 | 1319315 1339544
71 | 1339544 1359520
72 | 1359520 1377496
73 | 1377496 1397706
74 | 1397706 1418428
75 | 1418428 1437625
76 | 1437625 1456769
77 | 1456769 1476692
78 | 1476692 1490989
79 | 1490989 1511713
80 | 1511713 1530419
81 | 1530419 1550860
82 | 1550860 1569817
83 | 1569817 1588150
84 | 1588150 1603757
85 | 1603757 1623419
86 | 1623419 1642925
87 | 1642925 1656067
88 | 1656067 1675870
89 | 1675870 1694584
90 | 1694584 1714083
91 | 1714083 1733128
92 | 1733128 1751622
93 | 1751622 1772475
94 | 1772475 1791286
95 | 1791286 1809606
96 | 1809606 1829134
97 | 1829134 1844315
98 | 1844315 1864237
99 | 1864237 1882400
100 | 1882400 1901918
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/34.tsv:
--------------------------------------------------------------------------------
1 | 0 19515
2 | 19515 39636
3 | 39636 59952
4 | 59952 78929
5 | 78929 98810
6 | 98810 119401
7 | 119401 139579
8 | 139579 159317
9 | 159317 174559
10 | 174559 189931
11 | 189931 209582
12 | 209582 227708
13 | 227708 248356
14 | 248356 268079
15 | 268079 288453
16 | 288453 309039
17 | 309039 328348
18 | 328348 346475
19 | 346475 366281
20 | 366281 386234
21 | 386234 405862
22 | 405862 425587
23 | 425587 444206
24 | 444206 459393
25 | 459393 479723
26 | 479723 498576
27 | 498576 519227
28 | 519227 536787
29 | 536787 556976
30 | 556976 576160
31 | 576160 595949
32 | 595949 616045
33 | 616045 636074
34 | 636074 653374
35 | 653374 673015
36 | 673015 692089
37 | 692089 712757
38 | 712757 733004
39 | 733004 753564
40 | 753564 773276
41 | 773276 793419
42 | 793419 808322
43 | 808322 828321
44 | 828321 847496
45 | 847496 866225
46 | 866225 885954
47 | 885954 905038
48 | 905038 925256
49 | 925256 945162
50 | 945162 965289
51 | 965289 984596
52 | 984596 1005277
53 | 1005277 1020742
54 | 1020742 1041164
55 | 1041164 1061002
56 | 1061002 1080665
57 | 1080665 1100747
58 | 1100747 1120953
59 | 1120953 1139235
60 | 1139235 1159259
61 | 1159259 1179630
62 | 1179630 1200460
63 | 1200460 1220021
64 | 1220021 1238926
65 | 1238926 1258115
66 | 1258115 1277783
67 | 1277783 1297910
68 | 1297910 1318308
69 | 1318308 1337086
70 | 1337086 1356369
71 | 1356369 1375749
72 | 1375749 1391027
73 | 1391027 1410773
74 | 1410773 1430105
75 | 1430105 1450314
76 | 1450314 1470144
77 | 1470144 1489790
78 | 1489790 1507925
79 | 1507925 1523395
80 | 1523395 1544376
81 | 1544376 1564402
82 | 1564402 1583093
83 | 1583093 1603902
84 | 1603902 1623459
85 | 1623459 1642841
86 | 1642841 1662617
87 | 1662617 1678613
88 | 1678613 1698480
89 | 1698480 1719061
90 | 1719061 1737177
91 | 1737177 1756668
92 | 1756668 1776979
93 | 1776979 1794661
94 | 1794661 1814017
95 | 1814017 1833085
96 | 1833085 1849171
97 | 1849171 1869854
98 | 1869854 1887854
99 | 1887854 1908131
100 | 1908131 1927822
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/35.tsv:
--------------------------------------------------------------------------------
1 | 0 20574
2 | 20574 39971
3 | 39971 60100
4 | 60100 74478
5 | 74478 94997
6 | 94997 115062
7 | 115062 133621
8 | 133621 154350
9 | 154350 174630
10 | 174630 192480
11 | 192480 213013
12 | 213013 233319
13 | 233319 251414
14 | 251414 269817
15 | 269817 287864
16 | 287864 308133
17 | 308133 327850
18 | 327850 347525
19 | 347525 367513
20 | 367513 387489
21 | 387489 407600
22 | 407600 426791
23 | 426791 443206
24 | 443206 458557
25 | 458557 478588
26 | 478588 498766
27 | 498766 518527
28 | 518527 536694
29 | 536694 556387
30 | 556387 576659
31 | 576659 594775
32 | 594775 614241
33 | 614241 629756
34 | 629756 647935
35 | 647935 668190
36 | 668190 687778
37 | 687778 708027
38 | 708027 727550
39 | 727550 747542
40 | 747542 767190
41 | 767190 786959
42 | 786959 805930
43 | 805930 826643
44 | 826643 846700
45 | 846700 863052
46 | 863052 883087
47 | 883087 900847
48 | 900847 921174
49 | 921174 941072
50 | 941072 960982
51 | 960982 980747
52 | 980747 1000939
53 | 1000939 1021290
54 | 1021290 1041437
55 | 1041437 1061396
56 | 1061396 1081530
57 | 1081530 1101325
58 | 1101325 1121097
59 | 1121097 1139606
60 | 1139606 1160051
61 | 1160051 1177840
62 | 1177840 1191716
63 | 1191716 1211731
64 | 1211731 1229726
65 | 1229726 1249683
66 | 1249683 1265173
67 | 1265173 1281947
68 | 1281947 1302159
69 | 1302159 1322169
70 | 1322169 1341727
71 | 1341727 1361459
72 | 1361459 1381636
73 | 1381636 1398576
74 | 1398576 1414901
75 | 1414901 1435236
76 | 1435236 1455533
77 | 1455533 1475630
78 | 1475630 1495787
79 | 1495787 1514818
80 | 1514818 1535161
81 | 1535161 1555127
82 | 1555127 1574771
83 | 1574771 1592280
84 | 1592280 1612597
85 | 1612597 1632243
86 | 1632243 1652640
87 | 1652640 1672773
88 | 1672773 1692901
89 | 1692901 1712626
90 | 1712626 1732956
91 | 1732956 1752448
92 | 1752448 1770829
93 | 1770829 1789150
94 | 1789150 1804695
95 | 1804695 1822911
96 | 1822911 1837879
97 | 1837879 1855450
98 | 1855450 1873370
99 | 1873370 1893613
100 | 1893613 1912237
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/36.tsv:
--------------------------------------------------------------------------------
1 | 0 19287
2 | 19287 39358
3 | 39358 57718
4 | 57718 78453
5 | 78453 98663
6 | 98663 118230
7 | 118230 137711
8 | 137711 155475
9 | 155475 175154
10 | 175154 192002
11 | 192002 211869
12 | 211869 231665
13 | 231665 251370
14 | 251370 270515
15 | 270515 290990
16 | 290990 311277
17 | 311277 328148
18 | 328148 348285
19 | 348285 368503
20 | 368503 388935
21 | 388935 406410
22 | 406410 424407
23 | 424407 444768
24 | 444768 462086
25 | 462086 482753
26 | 482753 503042
27 | 503042 523133
28 | 523133 542434
29 | 542434 560468
30 | 560468 577604
31 | 577604 597517
32 | 597517 616080
33 | 616080 630174
34 | 630174 648605
35 | 648605 667109
36 | 667109 682226
37 | 682226 700319
38 | 700319 717934
39 | 717934 736014
40 | 736014 756201
41 | 756201 775501
42 | 775501 795470
43 | 795470 814966
44 | 814966 834799
45 | 834799 854495
46 | 854495 870412
47 | 870412 889833
48 | 889833 909189
49 | 909189 929181
50 | 929181 947161
51 | 947161 965468
52 | 965468 985512
53 | 985512 1005434
54 | 1005434 1025833
55 | 1025833 1045533
56 | 1045533 1065716
57 | 1065716 1081223
58 | 1081223 1099620
59 | 1099620 1118600
60 | 1118600 1136114
61 | 1136114 1154567
62 | 1154567 1173378
63 | 1173378 1192718
64 | 1192718 1212554
65 | 1212554 1228253
66 | 1228253 1247874
67 | 1247874 1268214
68 | 1268214 1288783
69 | 1288783 1308227
70 | 1308227 1328224
71 | 1328224 1348920
72 | 1348920 1368805
73 | 1368805 1388588
74 | 1388588 1405156
75 | 1405156 1424367
76 | 1424367 1444342
77 | 1444342 1464248
78 | 1464248 1484605
79 | 1484605 1505009
80 | 1505009 1525258
81 | 1525258 1544883
82 | 1544883 1565405
83 | 1565405 1584873
84 | 1584873 1600040
85 | 1600040 1619003
86 | 1619003 1639014
87 | 1639014 1658997
88 | 1658997 1678690
89 | 1678690 1696511
90 | 1696511 1715452
91 | 1715452 1731237
92 | 1731237 1750719
93 | 1750719 1767699
94 | 1767699 1788306
95 | 1788306 1803611
96 | 1803611 1821443
97 | 1821443 1841051
98 | 1841051 1860142
99 | 1860142 1880042
100 | 1880042 1900071
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/37.tsv:
--------------------------------------------------------------------------------
1 | 0 19309
2 | 19309 38897
3 | 38897 59610
4 | 59610 78069
5 | 78069 97185
6 | 97185 117382
7 | 117382 136713
8 | 136713 157034
9 | 157034 175208
10 | 175208 193026
11 | 193026 213108
12 | 213108 233144
13 | 233144 253017
14 | 253017 273174
15 | 273174 292723
16 | 292723 312821
17 | 312821 332991
18 | 332991 352787
19 | 352787 373013
20 | 373013 392949
21 | 392949 412296
22 | 412296 429287
23 | 429287 449251
24 | 449251 469627
25 | 469627 489588
26 | 489588 510685
27 | 510685 526010
28 | 526010 545832
29 | 545832 565035
30 | 565035 583115
31 | 583115 600840
32 | 600840 614925
33 | 614925 634923
34 | 634923 652756
35 | 652756 672460
36 | 672460 692182
37 | 692182 712613
38 | 712613 730892
39 | 730892 750565
40 | 750565 769709
41 | 769709 790141
42 | 790141 808985
43 | 808985 828423
44 | 828423 847125
45 | 847125 866794
46 | 866794 886847
47 | 886847 902244
48 | 902244 922309
49 | 922309 942226
50 | 942226 962612
51 | 962612 980692
52 | 980692 1001401
53 | 1001401 1020630
54 | 1020630 1040850
55 | 1040850 1061038
56 | 1061038 1081508
57 | 1081508 1099614
58 | 1099614 1119619
59 | 1119619 1139865
60 | 1139865 1155786
61 | 1155786 1175622
62 | 1175622 1195427
63 | 1195427 1214867
64 | 1214867 1232947
65 | 1232947 1252955
66 | 1252955 1272148
67 | 1272148 1290587
68 | 1290587 1311375
69 | 1311375 1329396
70 | 1329396 1346907
71 | 1346907 1367145
72 | 1367145 1386355
73 | 1386355 1406638
74 | 1406638 1426340
75 | 1426340 1445024
76 | 1445024 1462249
77 | 1462249 1482628
78 | 1482628 1502562
79 | 1502562 1522337
80 | 1522337 1540583
81 | 1540583 1555738
82 | 1555738 1575672
83 | 1575672 1596098
84 | 1596098 1614290
85 | 1614290 1633501
86 | 1633501 1652753
87 | 1652753 1672792
88 | 1672792 1692662
89 | 1692662 1713172
90 | 1713172 1733102
91 | 1733102 1751329
92 | 1751329 1769115
93 | 1769115 1788458
94 | 1788458 1805067
95 | 1805067 1822654
96 | 1822654 1842629
97 | 1842629 1862465
98 | 1862465 1880543
99 | 1880543 1900437
100 | 1900437 1918739
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/38.tsv:
--------------------------------------------------------------------------------
1 | 0 17997
2 | 17997 37845
3 | 37845 57542
4 | 57542 77162
5 | 77162 97690
6 | 97690 116462
7 | 116462 134511
8 | 134511 153291
9 | 153291 173414
10 | 173414 190619
11 | 190619 211079
12 | 211079 231638
13 | 231638 251187
14 | 251187 269127
15 | 269127 287852
16 | 287852 308003
17 | 308003 324881
18 | 324881 340460
19 | 340460 360292
20 | 360292 378234
21 | 378234 396614
22 | 396614 414249
23 | 414249 432245
24 | 432245 451549
25 | 451549 471698
26 | 471698 491353
27 | 491353 511376
28 | 511376 530722
29 | 530722 550536
30 | 550536 569391
31 | 569391 588959
32 | 588959 609221
33 | 609221 629748
34 | 629748 650019
35 | 650019 669309
36 | 669309 689269
37 | 689269 708874
38 | 708874 727336
39 | 727336 747434
40 | 747434 767467
41 | 767467 787281
42 | 787281 802657
43 | 802657 821567
44 | 821567 839624
45 | 839624 859400
46 | 859400 878778
47 | 878778 898129
48 | 898129 917219
49 | 917219 936028
50 | 936028 954700
51 | 954700 973641
52 | 973641 993022
53 | 993022 1013479
54 | 1013479 1033407
55 | 1033407 1053568
56 | 1053568 1071933
57 | 1071933 1090756
58 | 1090756 1109872
59 | 1109872 1129396
60 | 1129396 1144080
61 | 1144080 1164102
62 | 1164102 1184086
63 | 1184086 1204810
64 | 1204810 1223919
65 | 1223919 1244197
66 | 1244197 1263955
67 | 1263955 1269054
68 | 1269054 1287173
69 | 1287173 1307475
70 | 1307475 1327554
71 | 1327554 1345759
72 | 1345759 1363975
73 | 1363975 1384085
74 | 1384085 1404150
75 | 1404150 1423112
76 | 1423112 1441143
77 | 1441143 1458377
78 | 1458377 1472958
79 | 1472958 1492159
80 | 1492159 1511530
81 | 1511530 1531244
82 | 1531244 1551468
83 | 1551468 1569947
84 | 1569947 1588829
85 | 1588829 1608519
86 | 1608519 1628253
87 | 1628253 1648019
88 | 1648019 1665717
89 | 1665717 1684767
90 | 1684767 1704474
91 | 1704474 1725114
92 | 1725114 1745259
93 | 1745259 1760773
94 | 1760773 1777819
95 | 1777819 1798790
96 | 1798790 1818892
97 | 1818892 1836854
98 | 1836854 1857179
99 | 1857179 1877159
100 | 1877159 1897566
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/39.tsv:
--------------------------------------------------------------------------------
1 | 0 15193
2 | 15193 35467
3 | 35467 53288
4 | 53288 73345
5 | 73345 90576
6 | 90576 110652
7 | 110652 129412
8 | 129412 148355
9 | 148355 167524
10 | 167524 185180
11 | 185180 202187
12 | 202187 222388
13 | 222388 241528
14 | 241528 260316
15 | 260316 281133
16 | 281133 300506
17 | 300506 319731
18 | 319731 339993
19 | 339993 357155
20 | 357155 377390
21 | 377390 395367
22 | 395367 415535
23 | 415535 433475
24 | 433475 452915
25 | 452915 472797
26 | 472797 491125
27 | 491125 511295
28 | 511295 530420
29 | 530420 548181
30 | 548181 568003
31 | 568003 588130
32 | 588130 607665
33 | 607665 626312
34 | 626312 647005
35 | 647005 667338
36 | 667338 686988
37 | 686988 707298
38 | 707298 724013
39 | 724013 743960
40 | 743960 761807
41 | 761807 782765
42 | 782765 800733
43 | 800733 820533
44 | 820533 835385
45 | 835385 855481
46 | 855481 875464
47 | 875464 894821
48 | 894821 914945
49 | 914945 935390
50 | 935390 955042
51 | 955042 975204
52 | 975204 994988
53 | 994988 1013276
54 | 1013276 1033848
55 | 1033848 1053735
56 | 1053735 1073310
57 | 1073310 1093135
58 | 1093135 1112816
59 | 1112816 1133376
60 | 1133376 1152692
61 | 1152692 1170654
62 | 1170654 1190674
63 | 1190674 1209824
64 | 1209824 1230150
65 | 1230150 1249826
66 | 1249826 1269724
67 | 1269724 1290429
68 | 1290429 1309320
69 | 1309320 1329487
70 | 1329487 1345639
71 | 1345639 1364022
72 | 1364022 1384106
73 | 1384106 1402893
74 | 1402893 1423574
75 | 1423574 1441924
76 | 1441924 1461933
77 | 1461933 1480899
78 | 1480899 1499707
79 | 1499707 1519031
80 | 1519031 1539192
81 | 1539192 1556728
82 | 1556728 1575362
83 | 1575362 1590541
84 | 1590541 1609712
85 | 1609712 1627802
86 | 1627802 1647099
87 | 1647099 1667670
88 | 1667670 1687898
89 | 1687898 1708047
90 | 1708047 1726445
91 | 1726445 1744301
92 | 1744301 1763619
93 | 1763619 1784355
94 | 1784355 1803939
95 | 1803939 1822703
96 | 1822703 1840555
97 | 1840555 1856105
98 | 1856105 1876325
99 | 1876325 1895629
100 | 1895629 1915187
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/4.tsv:
--------------------------------------------------------------------------------
1 | 0 20085
2 | 20085 39601
3 | 39601 55197
4 | 55197 73327
5 | 73327 92087
6 | 92087 110244
7 | 110244 128422
8 | 128422 148822
9 | 148822 162464
10 | 162464 182849
11 | 182849 202968
12 | 202968 223377
13 | 223377 244038
14 | 244038 262586
15 | 262586 281894
16 | 281894 301827
17 | 301827 316336
18 | 316336 336024
19 | 336024 353833
20 | 353833 373804
21 | 373804 394017
22 | 394017 412740
23 | 412740 432378
24 | 432378 452437
25 | 452437 471570
26 | 471570 490904
27 | 490904 510010
28 | 510010 529260
29 | 529260 547848
30 | 547848 566248
31 | 566248 586472
32 | 586472 604675
33 | 604675 624838
34 | 624838 644553
35 | 644553 665027
36 | 665027 683332
37 | 683332 702403
38 | 702403 722925
39 | 722925 742785
40 | 742785 762471
41 | 762471 782539
42 | 782539 802266
43 | 802266 822313
44 | 822313 838236
45 | 838236 856201
46 | 856201 876065
47 | 876065 895853
48 | 895853 916040
49 | 916040 934638
50 | 934638 954448
51 | 954448 972887
52 | 972887 991307
53 | 991307 1010539
54 | 1010539 1029029
55 | 1029029 1047369
56 | 1047369 1067622
57 | 1067622 1087079
58 | 1087079 1106492
59 | 1106492 1127085
60 | 1127085 1147523
61 | 1147523 1167652
62 | 1167652 1184590
63 | 1184590 1204054
64 | 1204054 1222134
65 | 1222134 1241831
66 | 1241831 1260119
67 | 1260119 1277690
68 | 1277690 1293356
69 | 1293356 1313747
70 | 1313747 1334036
71 | 1334036 1352899
72 | 1352899 1373051
73 | 1373051 1392184
74 | 1392184 1410143
75 | 1410143 1427511
76 | 1427511 1446876
77 | 1446876 1467287
78 | 1467287 1487952
79 | 1487952 1508405
80 | 1508405 1527365
81 | 1527365 1546329
82 | 1546329 1565653
83 | 1565653 1585612
84 | 1585612 1604559
85 | 1604559 1624158
86 | 1624158 1642211
87 | 1642211 1661849
88 | 1661849 1681823
89 | 1681823 1696405
90 | 1696405 1715842
91 | 1715842 1735490
92 | 1735490 1754642
93 | 1754642 1775110
94 | 1775110 1795169
95 | 1795169 1813977
96 | 1813977 1833401
97 | 1833401 1852806
98 | 1852806 1872517
99 | 1872517 1892623
100 | 1892623 1911795
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/40.tsv:
--------------------------------------------------------------------------------
1 | 0 19218
2 | 19218 36538
3 | 36538 55019
4 | 55019 74815
5 | 74815 92809
6 | 92809 111091
7 | 111091 129600
8 | 129600 149953
9 | 149953 167118
10 | 167118 187066
11 | 187066 207077
12 | 207077 222901
13 | 222901 242898
14 | 242898 261949
15 | 261949 282330
16 | 282330 302107
17 | 302107 322070
18 | 322070 340132
19 | 340132 359975
20 | 359975 380745
21 | 380745 400494
22 | 400494 418518
23 | 418518 433966
24 | 433966 452272
25 | 452272 472213
26 | 472213 488739
27 | 488739 505710
28 | 505710 525694
29 | 525694 544480
30 | 544480 564753
31 | 564753 584463
32 | 584463 602992
33 | 602992 623068
34 | 623068 643148
35 | 643148 663279
36 | 663279 682037
37 | 682037 702049
38 | 702049 721337
39 | 721337 742123
40 | 742123 760935
41 | 760935 781140
42 | 781140 801278
43 | 801278 819872
44 | 819872 840430
45 | 840430 858712
46 | 858712 877777
47 | 877777 897418
48 | 897418 917647
49 | 917647 932097
50 | 932097 951832
51 | 951832 971202
52 | 971202 974066
53 | 974066 994055
54 | 994055 1012359
55 | 1012359 1031078
56 | 1031078 1050668
57 | 1050668 1070418
58 | 1070418 1090516
59 | 1090516 1106891
60 | 1106891 1125609
61 | 1125609 1145990
62 | 1145990 1165641
63 | 1165641 1185497
64 | 1185497 1205349
65 | 1205349 1224624
66 | 1224624 1245138
67 | 1245138 1264473
68 | 1264473 1284077
69 | 1284077 1301293
70 | 1301293 1321225
71 | 1321225 1340560
72 | 1340560 1361230
73 | 1361230 1380129
74 | 1380129 1399469
75 | 1399469 1419524
76 | 1419524 1437131
77 | 1437131 1456704
78 | 1456704 1476117
79 | 1476117 1496476
80 | 1496476 1515021
81 | 1515021 1533934
82 | 1533934 1553949
83 | 1553949 1569659
84 | 1569659 1589448
85 | 1589448 1609825
86 | 1609825 1628966
87 | 1628966 1648054
88 | 1648054 1668015
89 | 1668015 1687534
90 | 1687534 1705991
91 | 1705991 1726024
92 | 1726024 1746545
93 | 1746545 1766270
94 | 1766270 1784450
95 | 1784450 1805344
96 | 1805344 1825182
97 | 1825182 1845110
98 | 1845110 1865218
99 | 1865218 1878377
100 | 1878377 1898890
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/41.tsv:
--------------------------------------------------------------------------------
1 | 0 18311
2 | 18311 36064
3 | 36064 55824
4 | 55824 75367
5 | 75367 94688
6 | 94688 114859
7 | 114859 134532
8 | 134532 152493
9 | 152493 170527
10 | 170527 189178
11 | 189178 209677
12 | 209677 228199
13 | 228199 248172
14 | 248172 262686
15 | 262686 282870
16 | 282870 302929
17 | 302929 323106
18 | 323106 341233
19 | 341233 357803
20 | 357803 377679
21 | 377679 396321
22 | 396321 416706
23 | 416706 427412
24 | 427412 447137
25 | 447137 467359
26 | 467359 486884
27 | 486884 506817
28 | 506817 526803
29 | 526803 545651
30 | 545651 565943
31 | 565943 585418
32 | 585418 604392
33 | 604392 624196
34 | 624196 644442
35 | 644442 664213
36 | 664213 680600
37 | 680600 698559
38 | 698559 716531
39 | 716531 736879
40 | 736879 756934
41 | 756934 776932
42 | 776932 792782
43 | 792782 810474
44 | 810474 831009
45 | 831009 847097
46 | 847097 867299
47 | 867299 886930
48 | 886930 905155
49 | 905155 925288
50 | 925288 945244
51 | 945244 965618
52 | 965618 983742
53 | 983742 1003622
54 | 1003622 1022513
55 | 1022513 1040977
56 | 1040977 1060804
57 | 1060804 1079961
58 | 1079961 1098851
59 | 1098851 1115646
60 | 1115646 1134278
61 | 1134278 1153730
62 | 1153730 1167913
63 | 1167913 1187091
64 | 1187091 1205577
65 | 1205577 1225233
66 | 1225233 1241144
67 | 1241144 1262046
68 | 1262046 1280295
69 | 1280295 1299853
70 | 1299853 1319826
71 | 1319826 1339417
72 | 1339417 1359798
73 | 1359798 1378101
74 | 1378101 1396743
75 | 1396743 1416978
76 | 1416978 1432490
77 | 1432490 1453315
78 | 1453315 1469266
79 | 1469266 1489066
80 | 1489066 1509691
81 | 1509691 1527164
82 | 1527164 1546787
83 | 1546787 1566866
84 | 1566866 1584777
85 | 1584777 1604895
86 | 1604895 1624761
87 | 1624761 1644469
88 | 1644469 1654461
89 | 1654461 1672561
90 | 1672561 1692883
91 | 1692883 1712707
92 | 1712707 1728475
93 | 1728475 1746948
94 | 1746948 1764076
95 | 1764076 1784612
96 | 1784612 1801764
97 | 1801764 1821800
98 | 1821800 1840445
99 | 1840445 1860458
100 | 1860458 1879790
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/42.tsv:
--------------------------------------------------------------------------------
1 | 0 20397
2 | 20397 35974
3 | 35974 54628
4 | 54628 73558
5 | 73558 93580
6 | 93580 109366
7 | 109366 117608
8 | 117608 135714
9 | 135714 153050
10 | 153050 169206
11 | 169206 189183
12 | 189183 209394
13 | 209394 227582
14 | 227582 243048
15 | 243048 262662
16 | 262662 282390
17 | 282390 300319
18 | 300319 320769
19 | 320769 334539
20 | 334539 353224
21 | 353224 369466
22 | 369466 376372
23 | 376372 396171
24 | 396171 416582
25 | 416582 436635
26 | 436635 456122
27 | 456122 476621
28 | 476621 484704
29 | 484704 503524
30 | 503524 523521
31 | 523521 543421
32 | 543421 563069
33 | 563069 583232
34 | 583232 600807
35 | 600807 620629
36 | 620629 637805
37 | 637805 649544
38 | 649544 665967
39 | 665967 682791
40 | 682791 698855
41 | 698855 718996
42 | 718996 734669
43 | 734669 755086
44 | 755086 765379
45 | 765379 783648
46 | 783648 803468
47 | 803468 823037
48 | 823037 843154
49 | 843154 861387
50 | 861387 877903
51 | 877903 897166
52 | 897166 917433
53 | 917433 937547
54 | 937547 957705
55 | 957705 977875
56 | 977875 996529
57 | 996529 1016536
58 | 1016536 1035463
59 | 1035463 1055032
60 | 1055032 1073559
61 | 1073559 1088943
62 | 1088943 1108153
63 | 1108153 1126265
64 | 1126265 1146832
65 | 1146832 1151602
66 | 1151602 1169184
67 | 1169184 1189211
68 | 1189211 1209744
69 | 1209744 1227690
70 | 1227690 1248261
71 | 1248261 1266555
72 | 1266555 1286057
73 | 1286057 1306303
74 | 1306303 1324209
75 | 1324209 1331423
76 | 1331423 1349978
77 | 1349978 1368576
78 | 1368576 1388353
79 | 1388353 1400428
80 | 1400428 1415619
81 | 1415619 1432940
82 | 1432940 1453069
83 | 1453069 1470874
84 | 1470874 1491053
85 | 1491053 1506186
86 | 1506186 1526560
87 | 1526560 1546536
88 | 1546536 1565942
89 | 1565942 1585691
90 | 1585691 1605160
91 | 1605160 1615082
92 | 1615082 1634226
93 | 1634226 1651232
94 | 1651232 1667018
95 | 1667018 1682762
96 | 1682762 1703153
97 | 1703153 1722270
98 | 1722270 1739311
99 | 1739311 1759066
100 | 1759066 1778551
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/43.tsv:
--------------------------------------------------------------------------------
1 | 0 18587
2 | 18587 38219
3 | 38219 58625
4 | 58625 78380
5 | 78380 98146
6 | 98146 118292
7 | 118292 136157
8 | 136157 155451
9 | 155451 174855
10 | 174855 178703
11 | 178703 199043
12 | 199043 219200
13 | 219200 236584
14 | 236584 257053
15 | 257053 277069
16 | 277069 292881
17 | 292881 308712
18 | 308712 324366
19 | 324366 327626
20 | 327626 340629
21 | 340629 361296
22 | 361296 365968
23 | 365968 382552
24 | 382552 402822
25 | 402822 422856
26 | 422856 443082
27 | 443082 446672
28 | 446672 466914
29 | 466914 487254
30 | 487254 507330
31 | 507330 527027
32 | 527027 547147
33 | 547147 565258
34 | 565258 583610
35 | 583610 604200
36 | 604200 624301
37 | 624301 636317
38 | 636317 654229
39 | 654229 674826
40 | 674826 690621
41 | 690621 710580
42 | 710580 730527
43 | 730527 750020
44 | 750020 769686
45 | 769686 788771
46 | 788771 808091
47 | 808091 827420
48 | 827420 845355
49 | 845355 855208
50 | 855208 874504
51 | 874504 893063
52 | 893063 913269
53 | 913269 932872
54 | 932872 950580
55 | 950580 954196
56 | 954196 966055
57 | 966055 985386
58 | 985386 1005661
59 | 1005661 1025463
60 | 1025463 1045935
61 | 1045935 1061530
62 | 1061530 1079657
63 | 1079657 1095843
64 | 1095843 1109069
65 | 1109069 1128451
66 | 1128451 1148790
67 | 1148790 1159565
68 | 1159565 1161692
69 | 1161692 1167738
70 | 1167738 1187430
71 | 1187430 1207675
72 | 1207675 1222572
73 | 1222572 1242457
74 | 1242457 1252219
75 | 1252219 1265647
76 | 1265647 1283910
77 | 1283910 1303291
78 | 1303291 1323427
79 | 1323427 1343160
80 | 1343160 1344158
81 | 1344158 1361381
82 | 1361381 1381734
83 | 1381734 1397106
84 | 1397106 1411903
85 | 1411903 1416731
86 | 1416731 1437005
87 | 1437005 1456453
88 | 1456453 1460213
89 | 1460213 1480280
90 | 1480280 1498353
91 | 1498353 1514345
92 | 1514345 1534233
93 | 1534233 1543788
94 | 1543788 1551791
95 | 1551791 1553562
96 | 1553562 1562085
97 | 1562085 1581004
98 | 1581004 1600682
99 | 1600682 1618922
100 | 1618922 1634303
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/44.tsv:
--------------------------------------------------------------------------------
1 | 0 20121
2 | 20121 38219
3 | 38219 51362
4 | 51362 71808
5 | 71808 91355
6 | 91355 111879
7 | 111879 129537
8 | 129537 145046
9 | 145046 146627
10 | 146627 153919
11 | 153919 171838
12 | 171838 174339
13 | 174339 179613
14 | 179613 196362
15 | 196362 200842
16 | 200842 214565
17 | 214565 221641
18 | 221641 241530
19 | 241530 257953
20 | 257953 273306
21 | 273306 292220
22 | 292220 312587
23 | 312587 332144
24 | 332144 351696
25 | 351696 365304
26 | 365304 385413
27 | 385413 406366
28 | 406366 426837
29 | 426837 438809
30 | 438809 457906
31 | 457906 473929
32 | 473929 494720
33 | 494720 511747
34 | 511747 530523
35 | 530523 535199
36 | 535199 553764
37 | 553764 573057
38 | 573057 592821
39 | 592821 604879
40 | 604879 623781
41 | 623781 635391
42 | 635391 656028
43 | 656028 667979
44 | 667979 686234
45 | 686234 702155
46 | 702155 721725
47 | 721725 725274
48 | 725274 740124
49 | 740124 755465
50 | 755465 773459
51 | 773459 785754
52 | 785754 803911
53 | 803911 821664
54 | 821664 836486
55 | 836486 856412
56 | 856412 874462
57 | 874462 894955
58 | 894955 913359
59 | 913359 919959
60 | 919959 940826
61 | 940826 958400
62 | 958400 978941
63 | 978941 996891
64 | 996891 1017601
65 | 1017601 1037785
66 | 1037785 1046298
67 | 1046298 1056425
68 | 1056425 1076577
69 | 1076577 1085948
70 | 1085948 1104027
71 | 1104027 1106848
72 | 1106848 1126939
73 | 1126939 1140746
74 | 1140746 1159845
75 | 1159845 1169980
76 | 1169980 1189684
77 | 1189684 1192533
78 | 1192533 1211664
79 | 1211664 1221938
80 | 1221938 1238341
81 | 1238341 1258067
82 | 1258067 1269292
83 | 1269292 1270465
84 | 1270465 1279478
85 | 1279478 1297423
86 | 1297423 1312045
87 | 1312045 1331980
88 | 1331980 1352114
89 | 1352114 1353221
90 | 1353221 1372730
91 | 1372730 1392827
92 | 1392827 1412674
93 | 1412674 1417836
94 | 1417836 1429528
95 | 1429528 1438221
96 | 1438221 1450066
97 | 1450066 1468703
98 | 1468703 1479768
99 | 1479768 1499536
100 | 1499536 1516848
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/45.tsv:
--------------------------------------------------------------------------------
1 | 0 2316
2 | 2316 11805
3 | 11805 19436
4 | 19436 34841
5 | 34841 42297
6 | 42297 44202
7 | 44202 64560
8 | 64560 84306
9 | 84306 91361
10 | 91361 106735
11 | 106735 115757
12 | 115757 135763
13 | 135763 156018
14 | 156018 171993
15 | 171993 185510
16 | 185510 189951
17 | 189951 198444
18 | 198444 210807
19 | 210807 225715
20 | 225715 243596
21 | 243596 254326
22 | 254326 267171
23 | 267171 281815
24 | 281815 301806
25 | 301806 302619
26 | 302619 322449
27 | 322449 339067
28 | 339067 359076
29 | 359076 363379
30 | 363379 374516
31 | 374516 394824
32 | 394824 413008
33 | 413008 433349
34 | 433349 447922
35 | 447922 467182
36 | 467182 474533
37 | 474533 479010
38 | 479010 481357
39 | 481357 499846
40 | 499846 511763
41 | 511763 520512
42 | 520512 536361
43 | 536361 556511
44 | 556511 564175
45 | 564175 583946
46 | 583946 595954
47 | 595954 597616
48 | 597616 615949
49 | 615949 619347
50 | 619347 627712
51 | 627712 645904
52 | 645904 663533
53 | 663533 683441
54 | 683441 691574
55 | 691574 695584
56 | 695584 715489
57 | 715489 715546
58 | 715546 717745
59 | 717745 728212
60 | 728212 732229
61 | 732229 752098
62 | 752098 771081
63 | 771081 790621
64 | 790621 801859
65 | 801859 805259
66 | 805259 818175
67 | 818175 837967
68 | 837967 842386
69 | 842386 856783
70 | 856783 869632
71 | 869632 887596
72 | 887596 899977
73 | 899977 914589
74 | 914589 932685
75 | 932685 952086
76 | 952086 972106
77 | 972106 990949
78 | 990949 999306
79 | 999306 1000800
80 | 1000800 1006082
81 | 1006082 1010246
82 | 1010246 1027813
83 | 1027813 1035545
84 | 1035545 1052853
85 | 1052853 1055521
86 | 1055521 1059472
87 | 1059472 1062261
88 | 1062261 1066096
89 | 1066096 1070311
90 | 1070311 1080692
91 | 1080692 1083708
92 | 1083708 1087509
93 | 1087509 1092674
94 | 1092674 1111982
95 | 1111982 1130670
96 | 1130670 1141519
97 | 1141519 1147482
98 | 1147482 1158810
99 | 1158810 1158939
100 | 1158939 1163735
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/46.tsv:
--------------------------------------------------------------------------------
1 | 0 8507
2 | 8507 12796
3 | 12796 15795
4 | 15795 27122
5 | 27122 36164
6 | 36164 37626
7 | 37626 48704
8 | 48704 53378
9 | 53378 53421
10 | 53421 59357
11 | 59357 65099
12 | 65099 83537
13 | 83537 91966
14 | 91966 106725
15 | 106725 114418
16 | 114418 123841
17 | 123841 138790
18 | 138790 143210
19 | 143210 163068
20 | 163068 174468
21 | 174468 191991
22 | 191991 210350
23 | 210350 228544
24 | 228544 249188
25 | 249188 269547
26 | 269547 289746
27 | 289746 305505
28 | 305505 326119
29 | 326119 346016
30 | 346016 364174
31 | 364174 382000
32 | 382000 401663
33 | 401663 422394
34 | 422394 442482
35 | 442482 459724
36 | 459724 479407
37 | 479407 500330
38 | 500330 519845
39 | 519845 537913
40 | 537913 558599
41 | 558599 574713
42 | 574713 594675
43 | 594675 612526
44 | 612526 631646
45 | 631646 651716
46 | 651716 671960
47 | 671960 690205
48 | 690205 710167
49 | 710167 729985
50 | 729985 750816
51 | 750816 769444
52 | 769444 789239
53 | 789239 799305
54 | 799305 809373
55 | 809373 823464
56 | 823464 838018
57 | 838018 852836
58 | 852836 860748
59 | 860748 872914
60 | 872914 885491
61 | 885491 887798
62 | 887798 893590
63 | 893590 905950
64 | 905950 925645
65 | 925645 938009
66 | 938009 950619
67 | 950619 957576
68 | 957576 971076
69 | 971076 983230
70 | 983230 994584
71 | 994584 999889
72 | 999889 1004272
73 | 1004272 1016566
74 | 1016566 1029340
75 | 1029340 1042276
76 | 1042276 1053604
77 | 1053604 1059750
78 | 1059750 1061126
79 | 1061126 1065472
80 | 1065472 1069762
81 | 1069762 1080129
82 | 1080129 1090210
83 | 1090210 1093334
84 | 1093334 1096153
85 | 1096153 1099846
86 | 1099846 1120381
87 | 1120381 1138469
88 | 1138469 1138469
89 | 1138469 1138469
90 | 1138469 1138469
91 | 1138469 1138469
92 | 1138469 1138469
93 | 1138469 1138469
94 | 1138469 1138469
95 | 1138469 1138469
96 | 1138469 1138469
97 | 1138469 1138469
98 | 1138469 1138469
99 | 1138469 1138469
100 | 1138469 1138469
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/5.tsv:
--------------------------------------------------------------------------------
1 | 0 16054
2 | 16054 36144
3 | 36144 56026
4 | 56026 72916
5 | 72916 92158
6 | 92158 110673
7 | 110673 130233
8 | 130233 150617
9 | 150617 171366
10 | 171366 191360
11 | 191360 211262
12 | 211262 232153
13 | 232153 251685
14 | 251685 271336
15 | 271336 290703
16 | 290703 310435
17 | 310435 329378
18 | 329378 348686
19 | 348686 368527
20 | 368527 388542
21 | 388542 407941
22 | 407941 426237
23 | 426237 446559
24 | 446559 464207
25 | 464207 483798
26 | 483798 501660
27 | 501660 522003
28 | 522003 542183
29 | 542183 561505
30 | 561505 581479
31 | 581479 601733
32 | 601733 621992
33 | 621992 641924
34 | 641924 659093
35 | 659093 674829
36 | 674829 695364
37 | 695364 714809
38 | 714809 735247
39 | 735247 755407
40 | 755407 775857
41 | 775857 788517
42 | 788517 808771
43 | 808771 828384
44 | 828384 848457
45 | 848457 868557
46 | 868557 884337
47 | 884337 905312
48 | 905312 925014
49 | 925014 942156
50 | 942156 962746
51 | 962746 983143
52 | 983143 1003578
53 | 1003578 1023401
54 | 1023401 1042591
55 | 1042591 1062961
56 | 1062961 1081026
57 | 1081026 1101325
58 | 1101325 1120927
59 | 1120927 1141081
60 | 1141081 1160756
61 | 1160756 1180711
62 | 1180711 1201191
63 | 1201191 1220819
64 | 1220819 1236160
65 | 1236160 1256201
66 | 1256201 1276622
67 | 1276622 1295967
68 | 1295967 1315644
69 | 1315644 1335516
70 | 1335516 1355324
71 | 1355324 1375330
72 | 1375330 1395490
73 | 1395490 1414932
74 | 1414932 1435148
75 | 1435148 1455086
76 | 1455086 1474900
77 | 1474900 1494206
78 | 1494206 1511219
79 | 1511219 1525688
80 | 1525688 1540356
81 | 1540356 1558513
82 | 1558513 1575712
83 | 1575712 1595673
84 | 1595673 1615470
85 | 1615470 1634771
86 | 1634771 1654842
87 | 1654842 1674615
88 | 1674615 1689881
89 | 1689881 1708041
90 | 1708041 1727884
91 | 1727884 1746824
92 | 1746824 1764804
93 | 1764804 1784406
94 | 1784406 1804567
95 | 1804567 1822869
96 | 1822869 1843169
97 | 1843169 1862511
98 | 1862511 1882606
99 | 1882606 1902880
100 | 1902880 1921578
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/6.tsv:
--------------------------------------------------------------------------------
1 | 0 15884
2 | 15884 29930
3 | 29930 48496
4 | 48496 63924
5 | 63924 82097
6 | 82097 101589
7 | 101589 121616
8 | 121616 137482
9 | 137482 157549
10 | 157549 177513
11 | 177513 197710
12 | 197710 217275
13 | 217275 237129
14 | 237129 256199
15 | 256199 275925
16 | 275925 295178
17 | 295178 314693
18 | 314693 334551
19 | 334551 354215
20 | 354215 374195
21 | 374195 392540
22 | 392540 410670
23 | 410670 430028
24 | 430028 448372
25 | 448372 467971
26 | 467971 486076
27 | 486076 506249
28 | 506249 526092
29 | 526092 542165
30 | 542165 561709
31 | 561709 580931
32 | 580931 601994
33 | 601994 621685
34 | 621685 640637
35 | 640637 659536
36 | 659536 676171
37 | 676171 696013
38 | 696013 713052
39 | 713052 733014
40 | 733014 752645
41 | 752645 771037
42 | 771037 786419
43 | 786419 804294
44 | 804294 824170
45 | 824170 842548
46 | 842548 862808
47 | 862808 878441
48 | 878441 898463
49 | 898463 917211
50 | 917211 936942
51 | 936942 956330
52 | 956330 976896
53 | 976896 996998
54 | 996998 1016920
55 | 1016920 1030741
56 | 1030741 1049672
57 | 1049672 1068940
58 | 1068940 1086035
59 | 1086035 1105694
60 | 1105694 1122839
61 | 1122839 1138370
62 | 1138370 1158438
63 | 1158438 1178333
64 | 1178333 1197349
65 | 1197349 1216013
66 | 1216013 1236109
67 | 1236109 1254439
68 | 1254439 1274261
69 | 1274261 1293097
70 | 1293097 1313010
71 | 1313010 1332568
72 | 1332568 1352994
73 | 1352994 1373570
74 | 1373570 1391020
75 | 1391020 1410939
76 | 1410939 1431171
77 | 1431171 1450423
78 | 1450423 1469881
79 | 1469881 1488738
80 | 1488738 1509215
81 | 1509215 1528654
82 | 1528654 1548167
83 | 1548167 1563984
84 | 1563984 1583363
85 | 1583363 1602901
86 | 1602901 1622410
87 | 1622410 1641426
88 | 1641426 1659511
89 | 1659511 1678358
90 | 1678358 1696851
91 | 1696851 1716652
92 | 1716652 1737010
93 | 1737010 1756791
94 | 1756791 1775101
95 | 1775101 1794956
96 | 1794956 1813793
97 | 1813793 1834479
98 | 1834479 1852401
99 | 1852401 1872489
100 | 1872489 1892082
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/7.tsv:
--------------------------------------------------------------------------------
1 | 0 18044
2 | 18044 38509
3 | 38509 58893
4 | 58893 78682
5 | 78682 98664
6 | 98664 118949
7 | 118949 136741
8 | 136741 152720
9 | 152720 170626
10 | 170626 190570
11 | 190570 210248
12 | 210248 229724
13 | 229724 249915
14 | 249915 267678
15 | 267678 286679
16 | 286679 306809
17 | 306809 323752
18 | 323752 343016
19 | 343016 361735
20 | 361735 381105
21 | 381105 401303
22 | 401303 419646
23 | 419646 439806
24 | 439806 459800
25 | 459800 479541
26 | 479541 499673
27 | 499673 517532
28 | 517532 533192
29 | 533192 548519
30 | 548519 568214
31 | 568214 585128
32 | 585128 605504
33 | 605504 623452
34 | 623452 643625
35 | 643625 664029
36 | 664029 684251
37 | 684251 704608
38 | 704608 724752
39 | 724752 745335
40 | 745335 764753
41 | 764753 783363
42 | 783363 804268
43 | 804268 823463
44 | 823463 841843
45 | 841843 861773
46 | 861773 881519
47 | 881519 901606
48 | 901606 921853
49 | 921853 942362
50 | 942362 962359
51 | 962359 982739
52 | 982739 1002786
53 | 1002786 1022867
54 | 1022867 1042619
55 | 1042619 1062351
56 | 1062351 1082859
57 | 1082859 1102022
58 | 1102022 1113811
59 | 1113811 1134060
60 | 1134060 1152351
61 | 1152351 1171101
62 | 1171101 1190984
63 | 1190984 1208745
64 | 1208745 1228751
65 | 1228751 1248489
66 | 1248489 1268684
67 | 1268684 1289319
68 | 1289319 1309550
69 | 1309550 1329816
70 | 1329816 1348052
71 | 1348052 1365467
72 | 1365467 1385558
73 | 1385558 1406103
74 | 1406103 1425916
75 | 1425916 1446842
76 | 1446842 1467334
77 | 1467334 1487597
78 | 1487597 1505256
79 | 1505256 1524413
80 | 1524413 1544390
81 | 1544390 1564461
82 | 1564461 1584264
83 | 1584264 1604115
84 | 1604115 1623171
85 | 1623171 1641418
86 | 1641418 1661556
87 | 1661556 1681335
88 | 1681335 1701041
89 | 1701041 1717031
90 | 1717031 1734129
91 | 1734129 1751738
92 | 1751738 1771364
93 | 1771364 1791998
94 | 1791998 1811962
95 | 1811962 1831663
96 | 1831663 1851793
97 | 1851793 1870347
98 | 1870347 1890600
99 | 1890600 1909365
100 | 1909365 1928593
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/8.tsv:
--------------------------------------------------------------------------------
1 | 0 20340
2 | 20340 40195
3 | 40195 51625
4 | 51625 71200
5 | 71200 91735
6 | 91735 111587
7 | 111587 129850
8 | 129850 145655
9 | 145655 163553
10 | 163553 182858
11 | 182858 199012
12 | 199012 219122
13 | 219122 235231
14 | 235231 252981
15 | 252981 272564
16 | 272564 292563
17 | 292563 312954
18 | 312954 332611
19 | 332611 352257
20 | 352257 370459
21 | 370459 388457
22 | 388457 404110
23 | 404110 422638
24 | 422638 442421
25 | 442421 459519
26 | 459519 479093
27 | 479093 499587
28 | 499587 519036
29 | 519036 537196
30 | 537196 556607
31 | 556607 575205
32 | 575205 594850
33 | 594850 615234
34 | 615234 634806
35 | 634806 654524
36 | 654524 674362
37 | 674362 694411
38 | 694411 713561
39 | 713561 731630
40 | 731630 751020
41 | 751020 771505
42 | 771505 787605
43 | 787605 807480
44 | 807480 827880
45 | 827880 842119
46 | 842119 862829
47 | 862829 882470
48 | 882470 900846
49 | 900846 920564
50 | 920564 939567
51 | 939567 958990
52 | 958990 979084
53 | 979084 998906
54 | 998906 1018601
55 | 1018601 1037059
56 | 1037059 1054316
57 | 1054316 1069649
58 | 1069649 1085494
59 | 1085494 1101490
60 | 1101490 1118932
61 | 1118932 1137786
62 | 1137786 1157485
63 | 1157485 1177981
64 | 1177981 1196863
65 | 1196863 1216805
66 | 1216805 1234619
67 | 1234619 1255427
68 | 1255427 1275805
69 | 1275805 1295495
70 | 1295495 1314783
71 | 1314783 1334340
72 | 1334340 1353144
73 | 1353144 1372208
74 | 1372208 1391842
75 | 1391842 1411753
76 | 1411753 1429667
77 | 1429667 1447553
78 | 1447553 1467785
79 | 1467785 1487088
80 | 1487088 1507352
81 | 1507352 1525987
82 | 1525987 1546126
83 | 1546126 1562063
84 | 1562063 1578162
85 | 1578162 1598311
86 | 1598311 1618268
87 | 1618268 1638513
88 | 1638513 1658231
89 | 1658231 1678453
90 | 1678453 1697647
91 | 1697647 1717366
92 | 1717366 1737254
93 | 1737254 1757386
94 | 1757386 1777386
95 | 1777386 1796361
96 | 1796361 1816094
97 | 1816094 1836568
98 | 1836568 1856214
99 | 1856214 1874327
100 | 1874327 1892979
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/9.tsv:
--------------------------------------------------------------------------------
1 | 0 20294
2 | 20294 40615
3 | 40615 60769
4 | 60769 81170
5 | 81170 100588
6 | 100588 120701
7 | 120701 140938
8 | 140938 159366
9 | 159366 179662
10 | 179662 200400
11 | 200400 220898
12 | 220898 241131
13 | 241131 260783
14 | 260783 280963
15 | 280963 301628
16 | 301628 320780
17 | 320780 338718
18 | 338718 358612
19 | 358612 378202
20 | 378202 398591
21 | 398591 418367
22 | 418367 438623
23 | 438623 458730
24 | 458730 476765
25 | 476765 494724
26 | 494724 511876
27 | 511876 532601
28 | 532601 552071
29 | 552071 572205
30 | 572205 592344
31 | 592344 612432
32 | 612432 629385
33 | 629385 648869
34 | 648869 665994
35 | 665994 685643
36 | 685643 705952
37 | 705952 724504
38 | 724504 744329
39 | 744329 764861
40 | 764861 780593
41 | 780593 800967
42 | 800967 821341
43 | 821341 841109
44 | 841109 861060
45 | 861060 881434
46 | 881434 900252
47 | 900252 915989
48 | 915989 935994
49 | 935994 956138
50 | 956138 976322
51 | 976322 995996
52 | 995996 1015687
53 | 1015687 1033059
54 | 1033059 1052056
55 | 1052056 1067305
56 | 1067305 1086949
57 | 1086949 1106611
58 | 1106611 1126558
59 | 1126558 1147179
60 | 1147179 1167377
61 | 1167377 1184353
62 | 1184353 1202374
63 | 1202374 1222987
64 | 1222987 1243073
65 | 1243073 1262933
66 | 1262933 1280587
67 | 1280587 1298407
68 | 1298407 1319065
69 | 1319065 1338937
70 | 1338937 1358085
71 | 1358085 1378348
72 | 1378348 1398228
73 | 1398228 1417195
74 | 1417195 1436889
75 | 1436889 1457201
76 | 1457201 1472704
77 | 1472704 1492204
78 | 1492204 1511790
79 | 1511790 1530265
80 | 1530265 1550365
81 | 1550365 1569025
82 | 1569025 1586272
83 | 1586272 1602738
84 | 1602738 1622285
85 | 1622285 1641852
86 | 1641852 1660021
87 | 1660021 1679797
88 | 1679797 1698857
89 | 1698857 1718549
90 | 1718549 1733458
91 | 1733458 1752958
92 | 1752958 1773449
93 | 1773449 1793827
94 | 1793827 1813887
95 | 1813887 1832519
96 | 1832519 1852576
97 | 1852576 1872774
98 | 1872774 1893036
99 | 1893036 1912870
100 | 1912870 1933254
101 |
--------------------------------------------------------------------------------
/backend/search/clueweb/ranges/clueweb.tsv:
--------------------------------------------------------------------------------
1 | 0 1923448
2 | 1923448 3836041
3 | 3836041 5736280
4 | 5736280 7652530
5 | 7652530 9564325
6 | 9564325 11485903
7 | 11485903 13377985
8 | 13377985 15306578
9 | 15306578 17199557
10 | 17199557 19132811
11 | 19132811 21003297
12 | 21003297 22906879
13 | 22906879 24785211
14 | 24785211 26661648
15 | 26661648 28552464
16 | 28552464 30467313
17 | 30467313 32376877
18 | 32376877 34262856
19 | 34262856 36174481
20 | 36174481 38084673
21 | 38084673 39987529
22 | 39987529 41881826
23 | 41881826 43794304
24 | 43794304 45709433
25 | 45709433 47588547
26 | 47588547 49466020
27 | 49466020 51397223
28 | 51397223 53275791
29 | 53275791 55195288
30 | 55195288 57105968
31 | 57105968 59006323
32 | 59006323 60923464
33 | 60923464 62824529
34 | 62824529 64726447
35 | 64726447 66654269
36 | 66654269 68566506
37 | 68566506 70466577
38 | 70466577 72385316
39 | 72385316 74282882
40 | 74282882 76198069
41 | 76198069 78096959
42 | 78096959 79976749
43 | 79976749 81755300
44 | 81755300 83389603
45 | 83389603 84906451
46 | 84906451 86070186
47 | 86070186 87208655
48 |
--------------------------------------------------------------------------------
/backend/search/clueweb/search.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("/home/tevinw/ragviz/backend")
3 |
4 | import requests
5 | import os
6 | from helpers.ClueWeb22Api import ClueWeb22Api
7 | from helpers.concurrent_fetch import fetch_all
8 | from helpers.range_dictionary import create_range_dictionary, query_range_dictionary
9 | from search.search import Search
10 | import concurrent.futures
11 | from threading import Lock
12 |
13 | class CluewebSearch(Search):
14 | def __init__(self):
15 | directory = f'{os.getenv("PROJECT_DIR")}/backend/search/clueweb/ranges/'
16 | self.range_dictionaries = {}
17 |
18 | for filename in os.listdir(directory):
19 | if filename.endswith('.tsv'):
20 | file_path = os.path.join(directory, filename)
21 | index = filename.split('.')[0]
22 | range_list = create_range_dictionary(file_path)
23 | self.range_dictionaries[index] = range_list
24 | self.lock = Lock()
25 | self.query_id = 0
26 |
27 | def get_search_results(self, embedding, k, query, snippet_object):
28 | with self.lock:
29 | jsonquery = {"Ls": 256,
30 | "query_id": self.query_id,
31 | "query": embedding,
32 | "k": k}
33 | self.query_id += 1
34 |
35 | urls = [
36 | (0, f'http://{os.getenv("CLUEWEB_ADDR_0")}:{os.getenv("CLUEWEB_PORT_0")}'),
37 | (1, f'http://{os.getenv("CLUEWEB_ADDR_1")}:{os.getenv("CLUEWEB_PORT_1")}'),
38 | (2, f'http://{os.getenv("CLUEWEB_ADDR_2")}:{os.getenv("CLUEWEB_PORT_2")}'),
39 | (3, f'http://{os.getenv("CLUEWEB_ADDR_3")}:{os.getenv("CLUEWEB_PORT_3")}'),
40 | ]
41 |
42 | responses = fetch_all(urls, jsonquery)
43 |
44 | merged_indices = []
45 | merged_distances = []
46 | for response in responses:
47 | indices = [ind + response[0] * 21517546 for ind in response[1]['indices']]
48 | distances = response[1]['distances']
49 | merged_indices.extend(indices)
50 | merged_distances.extend(distances)
51 |
52 | # Sort indices based on distances
53 | sorted_indices = [index for _, index in sorted(zip(merged_distances, merged_indices))]
54 | indices = sorted_indices[:k]
55 |
56 | results = []
57 | def process_index(i):
58 | subfolder, index = query_range_dictionary(self.range_dictionaries['clueweb'], i)
59 | ranged = self.range_dictionaries[str(subfolder)]
60 | jsongz_id, doc_id = query_range_dictionary(ranged, index)
61 |
62 | jjsongz_id = str(jsongz_id).zfill(2)
63 | ddoc_id = str(doc_id).zfill(5)
64 |
65 | subfolder_id = str(subfolder).zfill(2)
66 |
67 | cweb_doc_id = f"clueweb22-en00{subfolder_id}-{jjsongz_id}-{ddoc_id}"
68 | path_clueweb = os.getenv("CLUEWEB_PATH")
69 | clueweb_api = ClueWeb22Api(cweb_doc_id, path_clueweb)
70 |
71 | clean_txt = eval(clueweb_api.get_clean_text())
72 | title = clean_txt["Clean-Text"].split('\n')[0].replace("\n", "").replace("\t", "").replace("\r", "").replace("\'", "").replace("\"", "").strip()
73 | snippet = snippet_object.get_snippet(query, '\n'.join(clean_txt["Clean-Text"].split('\n')[1:]))
74 |
75 | return {"name": title, "url": clean_txt["URL"].replace("\n", ""), "snippet": snippet}
76 |
77 | # Use ThreadPoolExecutor for parallel processing
78 | results = []
79 | with concurrent.futures.ThreadPoolExecutor() as executor:
80 | # Map tasks for each index
81 | futures = executor.map(process_index, indices)
82 |
83 | # Iterate over results and append them in the original order of indices
84 | for result in futures:
85 | results.append(result)
86 |
87 | return results
88 |
--------------------------------------------------------------------------------
/backend/search/pile/ranges/pile.tsv:
--------------------------------------------------------------------------------
1 | 0 1748721
2 | 1748721 3495968
3 | 3495968 5243964
4 | 5243964 6992434
5 | 6992434 8738421
6 | 8738421 10486291
7 | 10486291 12233326
8 | 12233326 13981871
9 | 13981871 15728132
10 | 15728132 17479986
11 | 17479986 19227744
12 | 19227744 20973991
13 | 20973991 22722836
14 | 22722836 24469871
15 | 24469871 26218610
16 | 26218610 27968173
17 | 27968173 29715302
18 | 29715302 31463714
19 | 31463714 33213207
20 | 33213207 34962040
21 | 34962040 36710622
22 | 36710622 38459678
23 | 38459678 40205464
24 | 40205464 41951040
25 | 41951040 43699734
26 | 43699734 45447127
27 | 45447127 47194609
28 | 47194609 48942937
29 | 48942937 50690114
30 | 50690114 52441354
31 |
--------------------------------------------------------------------------------
/backend/search/pile/search.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("/home/tevinw/ragviz/backend")
3 |
4 | import requests
5 | import os
6 | import time
7 | from helpers.concurrent_fetch import fetch_all
8 | from helpers.range_dictionary import create_range_dictionary, query_range_dictionary
9 | from search.search import Search
10 | import csv
11 | csv.field_size_limit(sys.maxsize)
12 | import concurrent.futures
13 | from threading import Lock
14 |
15 | class PileSearch(Search):
16 | def __init__(self):
17 | directory = f'{os.getenv("PROJECT_DIR")}/backend/search/pile/ranges/'
18 | self.range_dictionaries = {}
19 |
20 | for filename in os.listdir(directory):
21 | if filename.endswith('.tsv'):
22 | file_path = os.path.join(directory, filename)
23 | index = filename.split('.')[0]
24 | range_list = create_range_dictionary(file_path)
25 | self.range_dictionaries[index] = range_list
26 | self.lock = Lock()
27 | self.query_id = 0
28 |
29 |
30 | def get_search_results(self, embedding, k, query, snippet_object):
31 | with self.lock:
32 | jsonquery = {"Ls": 256,
33 | "query_id": self.query_id,
34 | "query": embedding,
35 | "k": k}
36 | self.query_id += 1
37 |
38 | urls = [
39 | (0, f'http://{os.getenv("PILE_ADDR_0")}:{os.getenv("PILE_PORT_0")}'),
40 | (1, f'http://{os.getenv("PILE_ADDR_1")}:{os.getenv("PILE_PORT_1")}'),
41 | (2, f'http://{os.getenv("PILE_ADDR_2")}:{os.getenv("PILE_PORT_2")}'),
42 | (3, f'http://{os.getenv("PILE_ADDR_3")}:{os.getenv("PILE_PORT_3")}'),
43 | ]
44 |
45 | prefixes = [0, 13981871, 13981871 + 13986302, 13981871 + 13986302 + 12237291]
46 |
47 | start_time = time.perf_counter()
48 | responses = fetch_all(urls, jsonquery)
49 |
50 | merged_indices = []
51 | merged_distances = []
52 | for response in responses:
53 | indices = [ind + prefixes[response[0]] for ind in response[1]['indices']]
54 | distances = response[1]['distances']
55 | merged_indices.extend(indices)
56 | merged_distances.extend(distances)
57 |
58 | # Sort indices based on distances
59 | sorted_indices = [index for _, index in sorted(zip(merged_distances, merged_indices))]
60 | indices = sorted_indices[:k]
61 | end_time = time.perf_counter()
62 | elapsed_time = end_time - start_time
63 | print(f"QUERY AND RERANK TIME: {elapsed_time} seconds")
64 |
65 | results = []
66 |
67 | # Define a function to process each index
68 | def process_index(i):
69 | start_time = time.perf_counter()
70 | subfolder, index = query_range_dictionary(self.range_dictionaries['pile'], i)
71 |
72 | subfolder_id = str(subfolder).zfill(2)
73 |
74 | if index != 0:
75 | index -= 1
76 |
77 | pile_part = str((index) // 25000 + 1)
78 |
79 | line_number = index % 25000 + 1
80 |
81 | corpus_path = f'{os.getenv("PILE_PATH")}/{subfolder_id}/full_corpus_{subfolder_id}_part_{pile_part}.tsv'
82 |
83 | title = None
84 | snippet = None
85 |
86 | # Read the specific line from the TSV file
87 | with open(corpus_path, 'r', encoding='utf-8') as tsv_file:
88 | reader = csv.reader(tsv_file, delimiter='\t')
89 | for current_line, row in enumerate(reader):
90 | if current_line == line_number:
91 | title = row[1] # Assuming the first column is the title
92 | end_time = time.perf_counter()
93 | elapsed_time = end_time - start_time
94 | print(f"PILE FETCH DOCUMENT TIME: {elapsed_time} seconds")
95 | snippet = snippet_object.get_snippet(embedding, row[2])
96 | break
97 | res = {"name": title, "url": "http://google.com", "snippet": snippet}
98 | end_time = time.perf_counter()
99 | return res
100 |
101 | results = []
102 | with concurrent.futures.ThreadPoolExecutor() as executor:
103 | futures = executor.map(process_index, indices)
104 | for result in futures:
105 | results.append(result)
106 |
107 | return results
--------------------------------------------------------------------------------
/backend/search/search.py:
--------------------------------------------------------------------------------
1 | import sys
2 | sys.path.append("/home/tevinw/ragviz/backend")
3 | from abc import ABC, abstractmethod
4 |
5 | class Search(ABC):
6 | @abstractmethod
7 | def get_search_results(self, embedding, k, query, snippet_object):
8 | pass
--------------------------------------------------------------------------------
/backend/snippet/naive_first.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import time
3 | sys.path.append("/home/tevinw/ragviz/backend")
4 |
5 | from snippet.snippet import Snippet
6 |
7 | class NaiveFirstSnippet(Snippet):
8 | def __init__(self, tokenizer):
9 | self.tokenizer = tokenizer
10 |
11 | def get_snippet(self, query, article):
12 | start_time = time.perf_counter()
13 | tokens = self.tokenizer.tokenize(article)
14 | first_128_tokens = tokens[:128]
15 | first_128_tokens_string = self.tokenizer.convert_tokens_to_string(first_128_tokens)
16 | end_time = time.perf_counter()
17 | elapsed_time = end_time - start_time
18 | print(f"NAIVE FIRST SNIPPET TIME: {elapsed_time}")
19 | return first_128_tokens_string
--------------------------------------------------------------------------------
/backend/snippet/sliding_window.py:
--------------------------------------------------------------------------------
1 |
2 | import sys
3 | sys.path.append("/home/tevinw/ragviz/backend")
4 |
5 | from snippet.snippet import Snippet
6 | import torch
7 | import time
8 |
9 | class SlidingWindowSnippet(Snippet):
10 | def __init__(self, tokenizer, model, stride, window_size):
11 | self.tokenizer = tokenizer
12 | self.model = model
13 | self.stride = stride
14 | self.window_size = window_size
15 |
16 | def get_snippet(self, query, article):
17 | start_time = time.perf_counter()
18 | tokens = self.tokenizer.tokenize(article)
19 | input_ids = self.tokenizer(article, return_tensors="pt").input_ids
20 | decoder_input_ids = input_ids.detach().clone()
21 |
22 | best_tokens = []
23 | best_sim = -torch.inf
24 |
25 | for i in range(0, len(input_ids[0]), self.stride):
26 | cur_input_ids = input_ids[:, i:i+self.window_size]
27 | cur_decoder_input_ids = decoder_input_ids[:, i:i+self.window_size]
28 |
29 | with torch.no_grad():
30 | outputs = self.model(input_ids=cur_input_ids, decoder_input_ids=cur_decoder_input_ids)
31 |
32 | embeddings = outputs.last_hidden_state
33 |
34 | snippet_embedding = embeddings[0,0]
35 |
36 | query_tensor = torch.tensor(query)
37 |
38 | sim = float(torch.dot(torch.nn.functional.normalize(query_tensor, dim=0), torch.nn.functional.normalize(snippet_embedding, dim=0)))
39 | if i == 0:
40 | print(f"NAIVE FIRST SIMILARITY: {sim}")
41 | if sim > best_sim:
42 | best_sim = sim
43 | best_tokens = tokens[i:i+self.window_size]
44 | print(f"SLIDING WINDOW SIMILARITY: {best_sim}")
45 | res = self.tokenizer.convert_tokens_to_string(best_tokens)
46 | end_time = time.perf_counter()
47 | elapsed_time = end_time - start_time
48 | print(f"SLIDING WINDOW SNIPPET TIME: {elapsed_time} seconds")
49 | return res
--------------------------------------------------------------------------------
/backend/snippet/snippet.py:
--------------------------------------------------------------------------------
1 | from abc import ABC, abstractmethod
2 |
3 | class Snippet(ABC):
4 | @abstractmethod
5 | def get_snippet(self, query, article):
6 | pass
--------------------------------------------------------------------------------
/frontend/.env.local.example:
--------------------------------------------------------------------------------
1 | NEXT_PUBLIC_BASE_PATH=
--------------------------------------------------------------------------------
/frontend/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "plugins": ["unused-imports"],
3 | "extends": ["next/core-web-vitals", "plugin:prettier/recommended"],
4 | "rules": {
5 | "unused-imports/no-unused-imports": "error"
6 | }
7 | }
8 |
--------------------------------------------------------------------------------
/frontend/next-env.d.ts:
--------------------------------------------------------------------------------
1 | ///
2 | ///
3 |
4 | // NOTE: This file should not be edited
5 | // see https://nextjs.org/docs/basic-features/typescript for more information.
6 |
--------------------------------------------------------------------------------
/frontend/next.config.mjs:
--------------------------------------------------------------------------------
1 | export default (phase, { defaultConfig }) => {
2 | const env = process.env.NODE_ENV;
3 | /**
4 | * @type {import("next").NextConfig}
5 | */
6 | if (env === "production") {
7 | return {
8 | output: "export",
9 | assetPrefix: `${process.env.NEXT_PUBLIC_BASE_PATH}/ui/`,
10 | basePath: `${process.env.NEXT_PUBLIC_BASE_PATH}/ui`,
11 | distDir: "../ui",
12 | };
13 | } else {
14 | return {
15 | async rewrites() {
16 | return [
17 | {
18 | source: "/query",
19 | destination: "http://localhost:8080/query", // Proxy to Backend
20 | },
21 | ];
22 | },
23 | };
24 | }
25 | };
26 |
--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "search",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev",
7 | "build": "next build",
8 | "start": "next start",
9 | "lint": "next lint"
10 | },
11 | "dependencies": {
12 | "@next/third-parties": "^14.0.4",
13 | "@radix-ui/react-popover": "^1.0.7",
14 | "@tailwindcss/forms": "^0.5.7",
15 | "@upstash/ratelimit": "^1.0.0",
16 | "@vercel/kv": "^1.0.1",
17 | "clsx": "^2.1.0",
18 | "headlessui": "^0.0.0",
19 | "lucide-react": "^0.309.0",
20 | "mdast-util-from-markdown": "^2.0.0",
21 | "nanoid": "^5.0.4",
22 | "next": "14.0.4",
23 | "react": "^18",
24 | "react-dom": "^18",
25 | "react-markdown": "^9.0.1",
26 | "tailwind-merge": "^2.2.0",
27 | "unist-builder": "^4.0.0"
28 | },
29 | "devDependencies": {
30 | "@tailwindcss/typography": "^0.5.10",
31 | "@types/node": "^20",
32 | "@types/react": "^18",
33 | "@types/react-dom": "^18",
34 | "autoprefixer": "^10.0.1",
35 | "eslint": "^8",
36 | "eslint-config-next": "14.0.4",
37 | "eslint-config-prettier": "^9.0.0",
38 | "eslint-plugin-prettier": "^5.0.1",
39 | "eslint-plugin-unused-imports": "^3.0.0",
40 | "postcss": "^8",
41 | "prettier": "^3.1.0",
42 | "tailwindcss": "^3.3.0",
43 | "typescript": "^5"
44 | }
45 | }
46 |
--------------------------------------------------------------------------------
/frontend/postcss.config.js:
--------------------------------------------------------------------------------
1 | module.exports = {
2 | plugins: {
3 | tailwindcss: {},
4 | autoprefixer: {},
5 | },
6 | };
7 |
--------------------------------------------------------------------------------
/frontend/public/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/android-chrome-192x192.png
--------------------------------------------------------------------------------
/frontend/public/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/android-chrome-512x512.png
--------------------------------------------------------------------------------
/frontend/public/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/apple-touch-icon.png
--------------------------------------------------------------------------------
/frontend/public/bg.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/frontend/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/favicon-16x16.png
--------------------------------------------------------------------------------
/frontend/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/favicon-32x32.png
--------------------------------------------------------------------------------
/frontend/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/favicon.ico
--------------------------------------------------------------------------------
/frontend/public/ragviz-square.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/cxcscmu/RAGViz/41d375c0dd2c712f167201ea538d044972a85ff2/frontend/public/ragviz-square.png
--------------------------------------------------------------------------------
/frontend/public/site.webmanifest:
--------------------------------------------------------------------------------
1 | {
2 | "name": "",
3 | "short_name": "",
4 | "icons": [
5 | {
6 | "src": "/android-chrome-192x192.png",
7 | "sizes": "192x192",
8 | "type": "image/png"
9 | },
10 | {
11 | "src": "/android-chrome-512x512.png",
12 | "sizes": "512x512",
13 | "type": "image/png"
14 | }
15 | ],
16 | "theme_color": "#ffffff",
17 | "background_color": "#ffffff",
18 | "display": "standalone"
19 | }
20 |
--------------------------------------------------------------------------------
/frontend/src/app/components/answer.tsx:
--------------------------------------------------------------------------------
1 | import { Skeleton } from "@/app/components/skeleton";
2 | import { Wrapper } from "@/app/components/wrapper";
3 | import { Source } from "@/app/interfaces/source";
4 | import { BookOpenText } from "lucide-react";
5 | import { Dispatch, FC, SetStateAction } from "react";
6 |
7 | export const Token: FC<{
8 | markdown: string;
9 | index: number;
10 | token: number[] | null;
11 | setToken: Dispatch>;
12 | clicked: number[] | null;
13 | setClicked: Dispatch>;
14 | }> = ({ markdown, index, token, setToken, clicked, setClicked }) => {
15 | const handleDragStart = (event: React.DragEvent) => {};
16 |
17 | const handleDragOver = (event: React.DragEvent) => {
18 | event.preventDefault();
19 | if (!token?.includes(index)) {
20 | const newTokens = token ? [...token, index] : [index];
21 | setClicked(newTokens);
22 | setToken(newTokens);
23 | }
24 | };
25 |
26 | const handleMouseEnter = () => {
27 | setToken([index]);
28 | };
29 |
30 | const className = token?.includes(index)
31 | ? "bg-red-300 hover:cursor-pointer"
32 | : "hover:bg-red-200 hover:cursor-pointer";
33 |
34 | const handleMouseLeave = () => {
35 | setToken(clicked);
36 | };
37 | return (
38 |
46 | {markdown}
47 |
48 | );
49 | };
50 | export const Answer: FC<{
51 | markdown: string[];
52 | compare: boolean;
53 | markdownCompare: string[];
54 | sources: Source[];
55 | token: number[] | null;
56 | tokenLength: number;
57 | setToken: Dispatch>;
58 | clicked: number[] | null;
59 | setClicked: Dispatch>;
60 | }> = ({
61 | markdown,
62 | compare,
63 | markdownCompare,
64 | sources,
65 | token,
66 | tokenLength,
67 | setToken,
68 | clicked,
69 | setClicked,
70 | }) => {
71 | return (
72 |
75 | Answer
76 | >
77 | }
78 | content={
79 | markdown && markdown.length > 0 && compare ? (
80 |
81 |
82 |
83 | Generation using all retrieved documents
84 |
85 | {markdown.map((str, index) => (
86 |
95 | ))}
96 |
97 | {markdownCompare && markdownCompare.length > 0 ? (
98 |
99 |
100 | Generation on selected documents and tokens
101 |
102 | {markdownCompare.map((str, index) => (
103 |
112 | ))}
113 |
114 | ) : (
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 | )}
123 |
124 | ) : markdown && markdown.length > 0 ? (
125 |
126 |
Generation on all retrieved documents
127 | {markdown.map((str, index) => (
128 |
137 | ))}
138 |
139 | ) : (
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 | )
148 | }
149 | >
150 | );
151 | };
152 |
--------------------------------------------------------------------------------
/frontend/src/app/components/footer.tsx:
--------------------------------------------------------------------------------
1 | import { FC } from "react";
2 |
3 | export const Footer: FC = () => {
4 | return (
5 |
6 |
7 | Answer generated by large language models, plz double check for
8 | correctness. UI adapted from Lepton.
9 |
10 |
11 | );
12 | };
13 |
--------------------------------------------------------------------------------
/frontend/src/app/components/logo.tsx:
--------------------------------------------------------------------------------
1 | import React, { FC } from "react";
2 |
3 | export const Logo: FC = () => {
4 | return (
5 |
6 |
7 |
8 |
9 |
10 | RAGViz
11 |
12 |
13 | alpha
14 |
15 |
16 | );
17 | };
18 |
--------------------------------------------------------------------------------
/frontend/src/app/components/popover.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as React from "react";
4 | import * as PopoverPrimitive from "@radix-ui/react-popover";
5 |
6 | import { cn } from "@/app/utils/cn";
7 |
8 | const Popover = PopoverPrimitive.Root;
9 |
10 | const PopoverTrigger = PopoverPrimitive.Trigger;
11 |
12 | const PopoverContent = React.forwardRef<
13 | React.ElementRef,
14 | React.ComponentPropsWithoutRef
15 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => (
16 |
17 |
27 |
28 | ));
29 | PopoverContent.displayName = PopoverPrimitive.Content.displayName;
30 |
31 | export { Popover, PopoverTrigger, PopoverContent };
32 |
--------------------------------------------------------------------------------
/frontend/src/app/components/preset-query.tsx:
--------------------------------------------------------------------------------
1 | import { getSearchUrl } from "@/app/utils/get-search-url";
2 | import { nanoid } from "nanoid";
3 | import Link from "next/link";
4 | import React, { FC, useMemo } from "react";
5 |
6 | export const PresetQuery: FC<{ query: string }> = ({ query }) => {
7 | const rid = useMemo(() => nanoid(), [query]);
8 | const k = "5";
9 | const apiKey = "";
10 | const snippet = "first";
11 |
12 | return (
13 |
19 | {query}
20 |
21 | );
22 | };
23 |
--------------------------------------------------------------------------------
/frontend/src/app/components/relates.tsx:
--------------------------------------------------------------------------------
1 | import { PresetQuery } from "@/app/components/preset-query";
2 | import { Skeleton } from "@/app/components/skeleton";
3 | import { Wrapper } from "@/app/components/wrapper";
4 | import { Relate } from "@/app/interfaces/relate";
5 | import { MessageSquareQuote } from "lucide-react";
6 | import React, { FC } from "react";
7 |
8 | export const Relates: FC<{ relates: Relate[] | null }> = ({ relates }) => {
9 | return (
10 |
13 | Related
14 | >
15 | }
16 | content={
17 |
18 | {relates !== null ? (
19 | relates.length > 0 ? (
20 | relates.map(({ question }) => (
21 |
22 | ))
23 | ) : (
24 |
No related questions.
25 | )
26 | ) : (
27 | <>
28 |
29 |
30 |
31 | >
32 | )}
33 |
34 | }
35 | >
36 | );
37 | };
38 |
--------------------------------------------------------------------------------
/frontend/src/app/components/result.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { Answer } from "@/app/components/answer";
3 | // import { Relates } from "@/app/components/relates";
4 | import { Sources } from "@/app/components/sources";
5 | import { Relate } from "@/app/interfaces/relate";
6 | import { Source } from "@/app/interfaces/source";
7 | import { parseRAG } from "@/app/utils/parse-rag";
8 | import { Annoyed } from "lucide-react";
9 | import { FC, useEffect, useState } from "react";
10 | import { parseRewrite } from "../utils/parse-rewrite";
11 |
12 | export const Result: FC<{
13 | query: string;
14 | rid: string;
15 | k: string;
16 | apiKey: string;
17 | snippet: string;
18 | modal: boolean;
19 | setModal: any;
20 | }> = ({ query, rid, k, apiKey, snippet, modal, setModal }) => {
21 | const [sources, setSources] = useState([]);
22 | const [token, setToken] = useState(null);
23 | const [clicked, setClicked] = useState(null);
24 | const [tokenLength, setTokenLength] = useState(0);
25 | const [markdown, setMarkdown] = useState([]);
26 | const [markdownCompare, setMarkdownCompare] = useState([]);
27 | const [compare, setCompare] = useState(false);
28 | const [relates, setRelates] = useState(null);
29 | const [error, setError] = useState(null);
30 | useEffect(() => {
31 | const controller = new AbortController();
32 | void parseRAG(
33 | controller,
34 | query,
35 | rid,
36 | k,
37 | apiKey,
38 | snippet,
39 | setSources,
40 | setMarkdown,
41 | setTokenLength,
42 | setError,
43 | );
44 | return () => {
45 | controller.abort();
46 | };
47 | }, [query]);
48 |
49 | const rewrite = (keep: boolean[][][]) => {
50 | const controller = new AbortController();
51 | setCompare(true);
52 | setMarkdownCompare([]);
53 | setToken([]);
54 | void parseRewrite(
55 | controller,
56 | query,
57 | sources,
58 | keep,
59 | rid,
60 | k,
61 | tokenLength,
62 | apiKey,
63 | snippet,
64 | setSources,
65 | setMarkdown,
66 | setMarkdownCompare,
67 | setError,
68 | );
69 | };
70 |
71 | return (
72 |
73 |
84 |
85 | {error && (
86 |
87 |
88 |
89 | {error === 429
90 | ? "Sorry, you have made too many requests recently, try again later."
91 | : "Sorry, we might be overloaded, try again later."}
92 |
93 |
94 | )}
95 | {modal && (
96 |
97 |
98 |
99 | 1. Hover over tokens in the Answer section to see the attention
100 | visualization for that generated token. If you want to lock that
101 | visualization or visualize multiple tokens, simply drag.
102 |
103 |
104 |
105 | 2. Remove documents by clicking the buttons, and remove tokens by
106 | dragging.
107 |
108 |
109 |
110 | 3. Press "Save and Rewrite" to see the new generation.
111 |
112 |
113 |
setModal(false)}
116 | >
117 | Close Instructions
118 |
119 |
120 |
121 | )}
122 |
123 | );
124 | };
125 |
--------------------------------------------------------------------------------
/frontend/src/app/components/search.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { getSearchUrl } from "@/app/utils/get-search-url";
3 | import { ArrowRight } from "lucide-react";
4 | import { nanoid } from "nanoid";
5 | import { useRouter } from "next/navigation";
6 | import React, { FC, useState } from "react";
7 |
8 | export const Search: FC<{
9 | defK: string;
10 | apiKey: string;
11 | defSnippet: string;
12 | }> = ({ defK, apiKey, defSnippet }) => {
13 | const [value, setValue] = useState("");
14 | const [k, setK] = useState(defK);
15 | const [snippet, setSnippet] = useState(defSnippet);
16 | const router = useRouter();
17 | return (
18 |
79 | );
80 | };
81 |
--------------------------------------------------------------------------------
/frontend/src/app/components/skeleton.tsx:
--------------------------------------------------------------------------------
1 | import { cn } from "@/app/utils/cn";
2 | import { HTMLAttributes } from "react";
3 |
4 | function Skeleton({ className, ...props }: HTMLAttributes) {
5 | return (
6 |
10 | );
11 | }
12 |
13 | export { Skeleton };
14 |
--------------------------------------------------------------------------------
/frontend/src/app/components/sources.tsx:
--------------------------------------------------------------------------------
1 | import { Skeleton } from "@/app/components/skeleton";
2 | import { Wrapper } from "@/app/components/wrapper";
3 | import { Source } from "@/app/interfaces/source";
4 | import { BookText } from "lucide-react";
5 | import { Dispatch, FC, SetStateAction, useEffect, useState } from "react";
6 |
7 | const SourceItem: FC<{
8 | source: Source;
9 | index: number;
10 | token: number[] | null;
11 | keep: boolean[][][];
12 | setKeep: Dispatch>;
13 | }> = ({ source, index, token, keep, setKeep }) => {
14 | const { id, url, nameTokens, snippetTokens, attn } = source;
15 | const domain = new URL(url).hostname;
16 | const highlight = (score: number | null) =>
17 | score == null || score < 0.1
18 | ? ""
19 | : score >= 0.1 && score <= 0.2
20 | ? "bg-red-100"
21 | : score > 0.2 && score <= 0.4
22 | ? "bg-red-200"
23 | : score > 0.4 && score <= 0.6
24 | ? "bg-red-300"
25 | : score > 0.6 && score <= 0.8
26 | ? "bg-red-400"
27 | : "bg-red-500"; // Default shade
28 | const handleRemoveDocument = (set: boolean) => () => {
29 | const newKeep = [...keep];
30 | newKeep[index] = [
31 | newKeep[index][0].map(() => set),
32 | newKeep[index][1].map(() => set),
33 | ];
34 | setKeep(newKeep);
35 | };
36 |
37 | const handleDragStart =
38 | (tokenIndex: number, name: boolean) =>
39 | (event: React.DragEvent) => {
40 | // event.preventDefault(); // Prevent text selection
41 | // event.dataTransfer.setData("text/plain", String(index));
42 | // if (!event.target.classList.contains("allow-drag")) {
43 | // event.target.classList.add("select-none");
44 | // event.preventDefault();
45 | // }
46 | // event.dataTransfer.dropEffect = "move";
47 | const newKeep = [...keep];
48 | console.log(tokenIndex);
49 | const nameSnippet = name ? 0 : 1;
50 | newKeep[index][nameSnippet][tokenIndex] = false;
51 | setKeep(newKeep);
52 | };
53 | const handleDrop =
54 | (tokenIndex: number, name: boolean) =>
55 | (event: React.DragEvent) => {
56 | event.preventDefault();
57 | };
58 |
59 | const handleDragKeep =
60 | (tokenIndex: number, name: boolean) =>
61 | (event: React.DragEvent) => {
62 | event.preventDefault();
63 | event.dataTransfer.setData("text/plain", String(index));
64 | const newKeep = [...keep];
65 | console.log(tokenIndex);
66 | const nameSnippet = name ? 0 : 1;
67 | newKeep[index][nameSnippet][tokenIndex] = false;
68 | setKeep(newKeep);
69 | };
70 |
71 | return (
72 |
73 |
74 |
Attention score:
75 |
76 | {token == null ||
77 | !token.map((t) => attn[t]) ||
78 | token.length <= 0 ||
79 | attn[token[0]] == null
80 | ? "None"
81 | : (
82 | Math.round(
83 | token
84 | .map((t) => attn[t].score)
85 | .reduce((acc, curr) => acc + curr) * 1000,
86 | ) /
87 | (1000 * token.length)
88 | ).toFixed(3)}
89 |
90 |
91 |
95 | Add All Tokens
96 |
97 |
101 | Remove All Tokens
102 |
103 |
104 |
105 |
108 | {/*
*/}
109 |
110 | {nameTokens.map((str, tokenIndex) => (
111 | attn[t]) &&
120 | token.length > 0 &&
121 | attn[token[0]] != null
122 | ? token
123 | .map((t) => attn[t].name[tokenIndex] * 1000)
124 | .reduce((acc, curr) => acc + curr) / token.length
125 | : null,
126 | )} ${keep[index] && keep[index][0][tokenIndex] ? "opacity-100" : "opacity-50"} cursor-pointer`}
127 | >
128 | {str}
129 |
130 | ))}
131 |
132 |
133 |
134 | {snippetTokens.map((str, tokenIndex) => (
135 | attn[t]) &&
144 | token.length > 0 &&
145 | attn[token[0]] != null
146 | ? token
147 | .map((t) => attn[t].snippet[tokenIndex] * 1000)
148 | .reduce((acc, curr) => acc + curr) / token.length
149 | : null,
150 | )} ${keep[index] && keep[index][1][tokenIndex] ? "opacity-100" : "opacity-50"} cursor-pointer`}
151 | >
152 | {str}
153 |
154 | ))}
155 |
156 |
157 | {/*
158 |
159 |
160 | {index + 1} - {domain}
161 |
162 |
163 |
164 |
169 |
170 |
*/}
171 |
172 |
173 | );
174 | };
175 |
176 | function deepCopyArray(arr: any): any {
177 | if (!Array.isArray(arr)) {
178 | return arr; // If it's not an array, return the value
179 | }
180 |
181 | return arr.map((element) => deepCopyArray(element)); // Recursively copy each element
182 | }
183 |
184 | export const Sources: FC<{
185 | sources: Source[];
186 | token: number[] | null;
187 | rewrite: (keep: boolean[][][]) => void;
188 | }> = ({ sources, token, rewrite }) => {
189 | const [keep, setKeep]: [
190 | boolean[][][],
191 | Dispatch>,
192 | ] = useState([] as boolean[][][]);
193 | const [oldKeep, setOldKeep]: [
194 | boolean[][][],
195 | Dispatch>,
196 | ] = useState([] as boolean[][][]);
197 |
198 | const handleSave = () => {
199 | setOldKeep(deepCopyArray(keep));
200 | rewrite(keep);
201 | };
202 |
203 | const handleCancel = () => {
204 | // const newKeep = [...oldKeep];
205 | setKeep(deepCopyArray(oldKeep));
206 | };
207 |
208 | useEffect(() => {
209 | // This code will run only once, immediately after the component mounts
210 | setKeep(
211 | sources.map((source) => [
212 | source.nameTokens.map(() => true),
213 | source.snippetTokens.map(() => true),
214 | ]),
215 | );
216 | setOldKeep(
217 | sources.map((source) => [
218 | source.nameTokens.map(() => true),
219 | source.snippetTokens.map(() => true),
220 | ]),
221 | );
222 | }, [sources.length]);
223 | return (
224 |
227 |
228 |
Sources
229 | {!(JSON.stringify(oldKeep) === JSON.stringify(keep)) && (
230 | <>
231 |
235 | Save and Rewrite
236 |
237 |
241 | Cancel
242 |
243 | >
244 | )}
245 |
246 | >
247 | }
248 | content={
249 |
250 | {sources.length > 0 ? (
251 | sources.map((item, index) => (
252 |
260 | ))
261 | ) : (
262 | <>
263 |
264 |
265 |
266 |
267 | >
268 | )}
269 |
270 | }
271 | >
272 | );
273 | };
274 |
--------------------------------------------------------------------------------
/frontend/src/app/components/title.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { RefreshCcw } from "lucide-react";
3 | import { nanoid } from "nanoid";
4 | // import { getSearchUrl } from "@/app/utils/get-search-url";
5 | // import { RefreshCcw } from "lucide-react";
6 | // import { nanoid } from "nanoid";
7 | import { useRouter } from "next/navigation";
8 | import { getSearchUrl } from "../utils/get-search-url";
9 | import { BookText } from "lucide-react";
10 |
11 | export const Title = ({
12 | query,
13 | k,
14 | apiKey,
15 | snippet,
16 | setModal,
17 | }: {
18 | query: string;
19 | k: string;
20 | apiKey: string;
21 | snippet: string;
22 | setModal: any;
23 | }) => {
24 | const router = useRouter();
25 | return (
26 |
27 |
31 | {query}
32 |
33 |
setModal(true)}
35 | type="button"
36 | className="rounded flex gap-2 items-center bg-transparent px-2 py-1 text-xs font-semibold text-blue-500 hover:bg-zinc-100"
37 | >
38 | Instructions
39 |
40 |
41 | {
43 | router.push(
44 | getSearchUrl(
45 | encodeURIComponent(query),
46 | nanoid(),
47 | encodeURIComponent(k),
48 | encodeURIComponent(apiKey),
49 | encodeURIComponent(snippet),
50 | ),
51 | );
52 | }}
53 | type="button"
54 | className="rounded flex gap-2 items-center bg-transparent px-2 py-1 text-xs font-semibold text-blue-500 hover:bg-zinc-100"
55 | >
56 | Rewrite
57 |
58 |
59 |
60 | );
61 | };
62 |
--------------------------------------------------------------------------------
/frontend/src/app/components/wrapper.tsx:
--------------------------------------------------------------------------------
1 | import { FC, ReactNode } from "react";
2 |
3 | export const Wrapper: FC<{
4 | title: ReactNode;
5 | content: ReactNode;
6 | }> = ({ title, content }) => {
7 | return (
8 |
9 |
{title}
10 | {content}
11 |
12 | );
13 | };
14 |
--------------------------------------------------------------------------------
/frontend/src/app/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | input:-webkit-autofill,
6 | input:-webkit-autofill:hover,
7 | input:-webkit-autofill:focus,
8 | textarea:-webkit-autofill,
9 | textarea:-webkit-autofill:hover,
10 | textarea:-webkit-autofill:focus,
11 | select:-webkit-autofill,
12 | select:-webkit-autofill:hover,
13 | select:-webkit-autofill:focus {
14 | -webkit-background-clip: text;
15 | }
16 |
--------------------------------------------------------------------------------
/frontend/src/app/interfaces/relate.ts:
--------------------------------------------------------------------------------
1 | export interface Relate {
2 | question: string;
3 | }
4 |
--------------------------------------------------------------------------------
/frontend/src/app/interfaces/source.ts:
--------------------------------------------------------------------------------
1 | export interface Source {
2 | id: string;
3 | name: string;
4 | url: string;
5 | isFamilyFriendly: boolean;
6 | displayUrl: string;
7 | snippet: string;
8 | deepLinks: { snippet: string; name: string; url: string }[];
9 | dateLastCrawled: string;
10 | cachedPageUrl: string;
11 | language: string;
12 | primaryImageOfPage?: {
13 | thumbnailUrl: string;
14 | width: number;
15 | height: number;
16 | imageId: string;
17 | };
18 | attn: {
19 | name: number[];
20 | snippet: number[];
21 | score: number;
22 | }[];
23 | isNavigational: boolean;
24 | nameTokens: string[];
25 | snippetTokens: string[];
26 | }
27 |
--------------------------------------------------------------------------------
/frontend/src/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import type { Metadata } from "next";
2 | import { Inter } from "next/font/google";
3 | import "./globals.css";
4 | import { ReactNode } from "react";
5 |
6 | const inter = Inter({ subsets: ["latin"] });
7 |
8 | export const metadata: Metadata = {
9 | title: "RAGViz",
10 | description:
11 | "Answer generated by large language models (LLMs). Double check for correctness.",
12 | };
13 |
14 | export default function RootLayout({ children }: { children: ReactNode }) {
15 | return (
16 |
17 |
18 | {/* Adding the favicon */}
19 | RAGViz {/* Using metadata title */}
20 | {" "}
24 | {/* Using metadata description */}
25 |
26 | {children}
27 |
28 | );
29 | }
30 |
--------------------------------------------------------------------------------
/frontend/src/app/page.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { Footer } from "@/app/components/footer";
3 | import { Logo } from "@/app/components/logo";
4 | // import { PresetQuery } from "@/app/components/preset-query";
5 | import { Search } from "@/app/components/search";
6 | import React, { useState } from "react";
7 |
8 | export default function Home() {
9 | const [apiKey, setApiKey] = useState("");
10 | return (
11 |
12 |
13 |
14 |
18 | API Key:
19 | setApiKey(e.target.value)}
23 | autoFocus
24 | placeholder="Add your search engine API key..."
25 | className="pl-5 pr-6 py-2 w-full flex-1 outline-none bg-white border rounded-lg"
26 | />
27 |
28 |
29 |
30 |
31 |
32 |
33 | );
34 | }
35 |
--------------------------------------------------------------------------------
/frontend/src/app/search/page.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 | import { Result } from "@/app/components/result";
3 | import { Search } from "@/app/components/search";
4 | import { Title } from "@/app/components/title";
5 | import { useSearchParams } from "next/navigation";
6 | import { useState } from "react";
7 |
8 | export default function SearchPage() {
9 | const searchParams = useSearchParams();
10 | const query = decodeURIComponent(searchParams.get("q") || "");
11 | const rid = decodeURIComponent(searchParams.get("rid") || "");
12 | const k = decodeURIComponent(searchParams.get("k") || "5");
13 | const apiKey = decodeURIComponent(searchParams.get("api_key") || "");
14 | const snippet = decodeURIComponent(searchParams.get("snippet") || "first");
15 | const [modal, setModal] = useState(false);
16 | return (
17 |
18 |
19 |
20 |
21 |
28 |
38 |
39 |
40 |
45 |
46 |
47 | );
48 | }
49 |
--------------------------------------------------------------------------------
/frontend/src/app/utils/cn.ts:
--------------------------------------------------------------------------------
1 | import { type ClassValue, clsx } from "clsx";
2 | import { twMerge } from "tailwind-merge";
3 |
4 | export function cn(...inputs: ClassValue[]) {
5 | return twMerge(clsx(inputs));
6 | }
7 |
--------------------------------------------------------------------------------
/frontend/src/app/utils/fetch-stream.ts:
--------------------------------------------------------------------------------
1 | async function pump(
2 | reader: ReadableStreamDefaultReader,
3 | controller: ReadableStreamDefaultController,
4 | onChunk?: (chunk: Uint8Array) => void,
5 | onDone?: () => void,
6 | ): Promise | undefined> {
7 | const { done, value } = await reader.read();
8 | if (done) {
9 | onDone && onDone();
10 | controller.close();
11 | return;
12 | }
13 | onChunk && onChunk(value);
14 | controller.enqueue(value);
15 | return pump(reader, controller, onChunk, onDone);
16 | }
17 | export const fetchStream = (
18 | response: Response,
19 | onChunk?: (chunk: Uint8Array) => void,
20 | onDone?: () => void,
21 | ): ReadableStream => {
22 | const reader = response.body!.getReader();
23 | return new ReadableStream({
24 | start: (controller) => pump(reader, controller, onChunk, onDone),
25 | });
26 | };
27 |
--------------------------------------------------------------------------------
/frontend/src/app/utils/get-search-url.ts:
--------------------------------------------------------------------------------
1 | export const getSearchUrl = (
2 | query: string,
3 | search_uuid: string,
4 | k: string,
5 | apiKey: string,
6 | snippet: string,
7 | ) => {
8 | const prefix =
9 | process.env.NODE_ENV === "production" ? "/search.html" : "/search";
10 | return `${prefix}?q=${encodeURIComponent(query)}&rid=${search_uuid}&k=${k}&api_key=${encodeURIComponent(apiKey)}&snippet=${snippet}`;
11 | };
12 |
--------------------------------------------------------------------------------
/frontend/src/app/utils/parse-rag.ts:
--------------------------------------------------------------------------------
1 | import { Source } from "@/app/interfaces/source";
2 |
3 | export const parseRAG = async (
4 | controller: AbortController,
5 | query: string,
6 | search_uuid: string,
7 | k: string,
8 | apiKey: string,
9 | snippet: string,
10 | onSources: (value: Source[]) => void,
11 | onMarkdown: (value: string[]) => void,
12 | onTokenLength: (value: number) => void,
13 | onError?: (status: number) => void,
14 | ) => {
15 | const url = `${process.env.NEXT_PUBLIC_BASE_PATH}/query.cgi?query=${encodeURIComponent(query)}&search_uuid=${encodeURIComponent(search_uuid)}&k=${encodeURIComponent(k)}&api_key=${encodeURIComponent(apiKey)}&snippet=${encodeURIComponent(snippet)}`;
16 |
17 | const response = await fetch(url, {
18 | method: "GET",
19 | headers: {
20 | "Content-Type": "application/json",
21 | Accept: "*/*",
22 | },
23 | signal: controller.signal,
24 | });
25 |
26 | if (response.status !== 200) {
27 | onError?.(response.status);
28 | return;
29 | }
30 | response
31 | .json()
32 | .then((data) => {
33 | console.log(data);
34 | onTokenLength(JSON.parse(data).docs[0].attn.length);
35 | onSources(JSON.parse(data).docs);
36 | onMarkdown(JSON.parse(data).answer);
37 | })
38 | .catch((error) => {
39 | console.error("Error fetching RAG backend:", error);
40 | onSources([]);
41 | });
42 | };
43 |
--------------------------------------------------------------------------------
/frontend/src/app/utils/parse-rewrite.ts:
--------------------------------------------------------------------------------
1 | import { Source } from "@/app/interfaces/source";
2 |
3 | const LLM_SPLIT = "__LLM_RESPONSE__";
4 | const RELATED_SPLIT = "__RELATED_QUESTIONS__";
5 |
6 | function unfilterAndPadAttention(
7 | attnElement: { name: any; snippet: any; score: any },
8 | filter: any[],
9 | ) {
10 | const nameAttention = unfilterAndPadSingleArray(attnElement.name, filter[0]);
11 | const snippetAttention = unfilterAndPadSingleArray(
12 | attnElement.snippet,
13 | filter[1],
14 | );
15 | return {
16 | name: nameAttention,
17 | snippet: snippetAttention,
18 | score: attnElement.score,
19 | };
20 | }
21 |
22 | // Function to unfilter and pad a single array
23 | function unfilterAndPadSingleArray(
24 | array: string | any[],
25 | filter: string | any[],
26 | ) {
27 | let filteredIndex = 0;
28 | const result = [];
29 |
30 | for (let index = 0; index < filter.length; index++) {
31 | if (filter[index]) {
32 | if (filteredIndex < array.length) {
33 | result.push(array[filteredIndex]);
34 | filteredIndex++;
35 | } else {
36 | result.push(0);
37 | }
38 | } else {
39 | result.push(0);
40 | }
41 | }
42 |
43 | return result;
44 | }
45 |
46 | export const parseRewrite = async (
47 | controller: AbortController,
48 | query: string,
49 | sources: Source[],
50 | keep: boolean[][][],
51 | search_uuid: string,
52 | k: string,
53 | tokenLength: number,
54 | apiKey: string,
55 | snippet: string,
56 | onSources: (value: Source[]) => void,
57 | onMarkdown: (value: string[]) => void,
58 | onMarkdownCompare: (value: string[]) => void,
59 | onError?: (status: number) => void,
60 | ) => {
61 | const decoder = new TextDecoder();
62 | let uint8Array = new Uint8Array();
63 | let chunks = "";
64 | let sourcesEmitted = false;
65 | const filteredIndexes: number[] = [];
66 |
67 | const filteredSources = sources.filter((source, index) => {
68 | const shouldKeep = [keep[index][0].slice(1, -1), keep[index][1]]
69 | .flat()
70 | .some((e) => e == true);
71 | if (shouldKeep) {
72 | filteredIndexes.push(index);
73 | }
74 | return shouldKeep;
75 | });
76 |
77 | const newSources = filteredSources.map((source, j) => {
78 | const originalIndex = filteredIndexes[j];
79 | return {
80 | ...source,
81 | originalIndex,
82 | name: source.nameTokens
83 | .filter((_, index) => keep[originalIndex][0][index])
84 | .join(""),
85 | snippet: source.snippetTokens
86 | .filter((_, index) => keep[originalIndex][1][index])
87 | .join(""),
88 | nameTokens: source.nameTokens.filter(
89 | (_, index) => keep[originalIndex][0][index],
90 | ),
91 | snippetTokens: source.snippetTokens.filter(
92 | (_, index) => keep[originalIndex][1][index],
93 | ),
94 | };
95 | });
96 | const url = `${process.env.NEXT_PUBLIC_BASE_PATH}/rewrite.cgi`;
97 |
98 | const response = await fetch(url, {
99 | method: "POST",
100 | headers: {
101 | "Content-Type": "application/json",
102 | Accept: "*/*",
103 | },
104 | signal: controller.signal,
105 | body: JSON.stringify({
106 | query,
107 | search_uuid,
108 | k,
109 | api_key: apiKey,
110 | results: newSources,
111 | snippet,
112 | }),
113 | });
114 |
115 | if (response.status !== 200) {
116 | onError?.(response.status);
117 | return;
118 | }
119 | response
120 | .json()
121 | .then((data) => {
122 | const parsedData = JSON.parse(data);
123 | const updatedSources = [...sources];
124 | var next = 0;
125 | parsedData.docs.forEach((source: Source, index: number) => {
126 | while (
127 | next < keep.length &&
128 | [keep[next][0].slice(1, -1), keep[next][1]]
129 | .flat()
130 | .every((e) => e == false)
131 | ) {
132 | updatedSources[next].attn = updatedSources[next].attn.slice(
133 | 0,
134 | tokenLength,
135 | );
136 | next++;
137 | }
138 |
139 | if (next < updatedSources.length) {
140 | const updatedAttn = source.attn.map(
141 | (element: any, index: string | number) => {
142 | return unfilterAndPadAttention(element, keep[next]);
143 | },
144 | );
145 |
146 | updatedSources[next].attn = updatedSources[next].attn
147 | .slice(0, tokenLength)
148 | .concat(updatedAttn);
149 | next++;
150 | } else {
151 | console.error("Index out of range for updatedSources array");
152 | }
153 | });
154 | onSources(updatedSources);
155 | onMarkdownCompare(parsedData.answer);
156 | })
157 | .catch((error) => {
158 | console.error("Error fetching sources:", error);
159 | onSources([]);
160 | });
161 | };
162 |
--------------------------------------------------------------------------------
/frontend/tailwind.config.ts:
--------------------------------------------------------------------------------
1 | import type { Config } from "tailwindcss";
2 |
3 | const config: Config = {
4 | content: [
5 | "./src/pages/**/*.{js,ts,jsx,tsx,mdx}",
6 | "./src/components/**/*.{js,ts,jsx,tsx,mdx}",
7 | "./src/app/**/*.{js,ts,jsx,tsx,mdx}",
8 | ],
9 | theme: {
10 | extend: {
11 | backgroundImage: {
12 | "gradient-radial": "radial-gradient(var(--tw-gradient-stops))",
13 | "gradient-conic":
14 | "conic-gradient(from 180deg at 50% 50%, var(--tw-gradient-stops))",
15 | },
16 | colors: {
17 | blue: {
18 | 500: "#2F80ED",
19 | },
20 | },
21 | },
22 | },
23 | plugins: [require("@tailwindcss/typography")],
24 | };
25 | export default config;
26 |
--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "es2015",
4 | "lib": ["dom", "dom.iterable", "esnext"],
5 | "allowJs": true,
6 | "skipLibCheck": true,
7 | "strict": true,
8 | "noEmit": true,
9 | "esModuleInterop": true,
10 | "module": "esnext",
11 | "moduleResolution": "bundler",
12 | "resolveJsonModule": true,
13 | "isolatedModules": true,
14 | "jsx": "preserve",
15 | "incremental": true,
16 | "plugins": [
17 | {
18 | "name": "next"
19 | }
20 | ],
21 | "paths": {
22 | "@/*": ["./src/*"]
23 | }
24 | },
25 | "include": [
26 | "next-env.d.ts",
27 | "**/*.ts",
28 | "**/*.tsx",
29 | ".next/types/**/*.ts",
30 | "../ui/types/**/*.ts"
31 | ],
32 | "exclude": ["node_modules"]
33 | }
34 |
--------------------------------------------------------------------------------