├── .eslintignore
├── .eslintrc.json
├── .gitignore
├── LICENSE
├── README.md
├── bun.lockb
├── components.json
├── next.config.mjs
├── package.json
├── postcss.config.mjs
├── public
├── android-chrome-192x192.png
├── android-chrome-512x512.png
├── apple-touch-icon.png
├── favicon-16x16.png
├── favicon-32x32.png
├── favicon.ico
├── next.svg
├── og-image.png
├── usage-1.png
├── usage-2.png
├── usage-3.png
└── vercel.svg
├── src
├── app
│ ├── api
│ │ └── [[...route]]
│ │ │ └── route.ts
│ ├── favicon.ico
│ ├── globals.css
│ ├── layout.tsx
│ └── page.tsx
├── components
│ ├── attribute-fields.tsx
│ ├── browse-template.tsx
│ ├── execution-time-result.tsx
│ ├── input-field.tsx
│ ├── json-result-view.tsx
│ ├── layout
│ │ ├── footer.tsx
│ │ ├── model-select.tsx
│ │ ├── navbar.tsx
│ │ ├── theme-provider.tsx
│ │ └── theme-toggler-button.tsx
│ ├── property-field.tsx
│ ├── property-items-field.tsx
│ ├── property-object-field.tsx
│ ├── result-section.tsx
│ ├── select-field.tsx
│ ├── skeleton.tsx
│ ├── template-form.tsx
│ ├── text-area-field.tsx
│ └── ui
│ │ ├── accordion.tsx
│ │ ├── button.tsx
│ │ ├── card.tsx
│ │ ├── checkbox.tsx
│ │ ├── dialog.tsx
│ │ ├── form.tsx
│ │ ├── input.tsx
│ │ ├── label.tsx
│ │ ├── select.tsx
│ │ └── textarea.tsx
├── controllers
│ └── extract-controller.ts
├── errors
│ ├── request-timeout-error.ts
│ └── validation-error.ts
├── hooks
│ └── use-debounce.tsx
├── lib
│ ├── constants.ts
│ ├── context-utils.ts
│ ├── embed-utils.ts
│ ├── env.ts
│ ├── error-utils.ts
│ ├── langchain-setup.ts
│ ├── llm-utils.ts
│ ├── string-utils.ts
│ ├── time-utils.ts
│ ├── types.ts
│ ├── utils.ts
│ └── web-scraper.ts
├── middlewares
│ └── rate-limiter-middleware.ts
├── routes
│ ├── extract-route.ts
│ └── vectorstore-routes.ts
├── schemas
│ └── template-schema.ts
└── store
│ ├── model-store.ts
│ └── template-store.ts
├── tailwind.config.ts
└── tsconfig.json
/.eslintignore:
--------------------------------------------------------------------------------
1 | src/components/ui/
2 |
3 | next.config.mjs
4 | src/lib/web-scraper.ts
--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": ["next/core-web-vitals", "airbnb", "airbnb-typescript"],
3 | "parserOptions": {
4 | "project": "./tsconfig.json"
5 | },
6 | "plugins": ["align-assignments"],
7 | "rules": {
8 | // to align json keys
9 | "key-spacing": [
10 | "warn",
11 | {
12 | "align": {
13 | "beforeColon": true,
14 | "afterColon": true,
15 | "on": "colon"
16 | }
17 | }
18 | ],
19 | "@typescript-eslint/quotes": "off",
20 | "react/react-in-jsx-scope": "off",
21 | "react/jsx-props-no-spreading": "off",
22 | "import/prefer-default-export": "off",
23 | "react/no-array-index-key": "off",
24 | "no-multi-spaces": "off",
25 | // for variable alignment
26 | "align-assignments/align-assignments": "warn",
27 | "react/require-default-props": "off",
28 | "max-len": "off",
29 | "@typescript-eslint/no-use-before-define": "off",
30 | "indent": [
31 | "error",
32 | 2,
33 | {
34 | "SwitchCase": 1
35 | }
36 | ],
37 | "react/jsx-curly-spacing": [
38 | "warn",
39 | {
40 | "when": "always",
41 | "children": true,
42 | "spacing": {
43 | "objectLiterals": "never"
44 | }
45 | }
46 | ]
47 | }
48 | }
49 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files.
2 |
3 | # dependencies
4 | /node_modules
5 | /.pnp
6 | .pnp.js
7 | .yarn/install-state.gz
8 |
9 | # testing
10 | /coverage
11 |
12 | # next.js
13 | /.next/
14 | /out/
15 |
16 | # production
17 | /build
18 |
19 | # misc
20 | .DS_Store
21 | *.pem
22 |
23 | # debug
24 | npm-debug.log*
25 | yarn-debug.log*
26 | yarn-error.log*
27 |
28 | # local env files
29 | .env*.local
30 | .env*
31 |
32 | # vercel
33 | .vercel
34 |
35 | # typescript
36 | *.tsbuildinfo
37 | next-env.d.ts
38 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2024 Imam Septian Adi Wijaya
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # JSON-Shift
2 |
3 | JSON-Shift is a tool designed to simplify web scraping and information extraction. It transforms website content into structured JSON format based on user-defined attributes.
4 |
5 | ## Table of Contents
6 |
7 | - [Key Features](#keyfeatures)
8 | - [Setup Project](#setup-project)
9 | - [How This Works](#how-this-works)
10 | - [Usage Examples](#usage-examples)
11 | - [Extract array of mangas object from manga reading website](#extract-array-of-mangas-object-from-manga-reading-website)
12 | - [Extract Person Information from Wiki](#extract-person-information-from-wiki)
13 | - [Extract Indonesian News](#extract-indonesian-news)
14 | - [Technologies Used](#technologies-used)
15 | - [License](#license)
16 |
17 | ## KeyFeatures
18 |
19 | To extract information that requested by user, this project utilizing:
20 |
21 | - Web scraping using Puppeteer
22 | - Embedding with Cohere
23 | - Large Language Model integration (currently using Gemini and Groq free plans)
24 | - Intelligent content filtering
25 | - Vector store for efficient similarity search
26 | - Langchain to integrate data/context to Large Language Model
27 | - Gemini and Groq LLM
28 | - Customizable JSON output
29 |
30 | ## Setup Project
31 |
32 | - Clone this project
33 | - install project dependencies
34 |
35 | ```bash
36 | bun install
37 |
38 | # or
39 |
40 | npm install
41 | ```
42 |
43 | - create `.env` file and make sure you set all required env variable. You can check `@/lib/env.ts` to know what `.env` variable should be set
44 |
45 | Check [src/lib/env.ts](src/lib/env.ts) for .env variable requirements
46 |
47 | ```typescript
48 | import { z } from "zod";
49 |
50 | const envSchema = z.object({
51 | // llm providers
52 | // https://console.groq.com/keys
53 | GROQ_API_KEY: z.string().min(1),
54 | // https://aistudio.google.com/app/apikey
55 | GOOGLE_AI_STUDIO_API_KEY: z.string().min(1),
56 | // embedding, get from https://dashboard.cohere.com/api-keys
57 | COHERE_API_KEY: z.string().min(1),
58 | // upstash vectorstore, get from https://console.upstash.com/
59 | UPSTASH_VECTOR_REST_URL: z.string().url(),
60 | UPSTASH_VECTOR_REST_TOKEN: z.string().min(1),
61 | // upstash redis, get from https://console.upstash.com/
62 | UPSTASH_REDIS_REST_URL: z.string().url(),
63 | UPSTASH_REDIS_REST_TOKEN: z.string().min(1),
64 | // just put random string or generate with command `openssl rand -base64 32`
65 | CLEAR_UPSTASH_VECTOR_STORE_TOKEN: z.string().min(1),
66 | BASE_URL: z.string().url(),
67 | NODE_ENV: z
68 | .union([
69 | z.literal("development"),
70 | z.literal("testing"),
71 | z.literal("production"),
72 | ])
73 | .default("development"),
74 | });
75 | export const env = envSchema.parse(process.env);
76 | ```
77 |
78 | - run project on development mode
79 |
80 | ```bash
81 | bun dev
82 |
83 | # or
84 |
85 | npm install
86 | ```
87 |
88 | ## How this works ?
89 |
90 | 1. Users provide a URL and define desired JSON attributes.
91 | 2. The backend scrapes the website, filtering out unnecessary elements like:
92 |
93 | - `nav`
94 | - `footer`
95 | - `header`
96 | - `aside`
97 | - `script`
98 | - `style`
99 | - `noscript`
100 | - `iframe`
101 |
102 | 3. Extracted content is split and stored in a vector database.
103 | 4. Relevant chunks are retrieved using similarity search.
104 | 5. An LLM processes the data to generate the requested JSON output.
105 |
106 | > **Note:** We store user form or template data in localStorage, so user can reuse it without need to refill the form
107 |
108 | ## Usage Example
109 |
110 | ### Extract array of mangas object from manga reading website
111 |
112 | 
113 |
114 | **Request Body**
115 |
116 | ```json
117 | {
118 | "id": "1c8ab1fa-303d-4000-8e2c-70d22ea5b528",
119 | "url": "https://tcbscans.me",
120 | "name": "manga scraper",
121 | "attributes": [
122 | {
123 | "name": "mangas",
124 | "description": "array of manga object",
125 | "type": "array",
126 | "items": {
127 | "type": "object",
128 | "properties": [
129 | {
130 | "name": "name",
131 | "description": "manga name",
132 | "type": "string"
133 | },
134 | {
135 | "name": "chapter",
136 | "description": "manga chapter number",
137 | "type": "string"
138 | },
139 | {
140 | "name": "thumbnail",
141 | "description": "manga thumbnail image url",
142 | "type": "string"
143 | },
144 | {
145 | "name": "url",
146 | "description": "url to read the manga chapter",
147 | "type": "string"
148 | }
149 | ]
150 | }
151 | },
152 | {
153 | "name": "last_updated_at",
154 | "description": "time of latest manga update",
155 | "type": "string"
156 | }
157 | ],
158 | "latestResult": {
159 | "output": {
160 | "mangas": [
161 | {
162 | "name": "My Hero Academia",
163 | "chapter": "430",
164 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/mhaDescriptionv2.png",
165 | "url": "https://tcbscans.me/chapters/7777/my-hero-academia-chapter-430"
166 | },
167 | {
168 | "name": "Black Clover",
169 | "chapter": "370.371",
170 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/site_cover_bc1.png",
171 | "url": "https://tcbscans.me/chapters/7723/black-clover-chapter-370and371"
172 | },
173 | {
174 | "name": "Haikyuu!! (New Special)",
175 | "chapter": "3",
176 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/haikyu45-1200px.jpeg",
177 | "url": "https://tcbscans.me/chapters/7654/haikyu-special-chapter-3"
178 | },
179 | {
180 | "name": "Black Clover Gaiden: Quartet Knights",
181 | "chapter": "40",
182 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/hbc.png",
183 | "url": "https://tcbscans.me/chapters/7651/black-clover-gaiden-quartet-knights-chapter-40"
184 | },
185 | {
186 | "name": "Jujutsu Kaisen",
187 | "chapter": "267",
188 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/jjkkk.png",
189 | "url": "https://tcbscans.me/chapters/7790/jujutsu-kaisen-chapter-267"
190 | },
191 | {
192 | "name": "One Piece",
193 | "chapter": "1124",
194 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/op_1009_00-Cover-redraw-fin-wm-lvl-1.png",
195 | "url": "https://tcbscans.me/chapters/7789/one-piece-chapter-1124"
196 | },
197 | {
198 | "name": "Chainsaw Man",
199 | "chapter": "174",
200 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/cmt2.jpg",
201 | "url": "https://tcbscans.me/chapters/7787/chainsaw-man-chapter-174"
202 | },
203 | {
204 | "name": "My Hero Academia One-Shot: You're Next!!",
205 | "chapter": "1",
206 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/PV_pic.png",
207 | "url": "https://tcbscans.me/chapters/7782/my-hero-academia-one-shot-you-re-next-chapter-1"
208 | }
209 | ],
210 | "last_updated_at": "2 days ago"
211 | }
212 | },
213 | "updatedAt": "2024-08-25T14:50:36.284Z",
214 | "ignoreCache": false,
215 | "model": "mixtral-8x7b-32768"
216 | }
217 | ```
218 |
219 | **Output**
220 |
221 | ```json
222 | {
223 | "output": {
224 | "mangas": [
225 | {
226 | "name": "My Hero Academia",
227 | "chapter": "430",
228 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/mhaDescriptionv2.png",
229 | "url": "https://tcbscans.me/chapters/7777/my-hero-academia-chapter-430"
230 | },
231 | {
232 | "name": "Black Clover",
233 | "chapter": "370.371",
234 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/site_cover_bc1.png",
235 | "url": "https://tcbscans.me/chapters/7723/black-clover-chapter-370and371"
236 | },
237 | {
238 | "name": "Haikyuu!! (New Special)",
239 | "chapter": "3",
240 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/haikyu45-1200px.jpeg",
241 | "url": "https://tcbscans.me/chapters/7654/haikyu-special-chapter-3"
242 | },
243 | {
244 | "name": "Black Clover Gaiden: Quartet Knights",
245 | "chapter": "40",
246 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/hbc.png",
247 | "url": "https://tcbscans.me/chapters/7651/black-clover-gaiden-quartet-knights-chapter-40"
248 | },
249 | {
250 | "name": "Jujutsu Kaisen",
251 | "chapter": "267",
252 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/jjkkk.png",
253 | "url": "https://tcbscans.me/chapters/7790/jujutsu-kaisen-chapter-267"
254 | },
255 | {
256 | "name": "One Piece",
257 | "chapter": "1124",
258 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/op_1009_00-Cover-redraw-fin-wm-lvl-1.png",
259 | "url": "https://tcbscans.me/chapters/7789/one-piece-chapter-1124"
260 | },
261 | {
262 | "name": "Chainsaw Man",
263 | "chapter": "174",
264 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/cmt2.jpg",
265 | "url": "https://tcbscans.me/chapters/7787/chainsaw-man-chapter-174"
266 | },
267 | {
268 | "name": "My Hero Academia One-Shot: You're Next!!",
269 | "chapter": "1",
270 | "thumbnail": "https://cdn.onepiecechapters.com/file/CDN-M-A-N/PV_pic.png",
271 | "url": "https://tcbscans.me/chapters/7782/my-hero-academia-one-shot-you-re-next-chapter-1"
272 | }
273 | ],
274 | "last_updated_at": "2 days ago"
275 | }
276 | }
277 | ```
278 |
279 | ### Extract Person Information from Wiki
280 |
281 | 
282 |
283 | **Request Body**
284 |
285 | ```json
286 | {
287 | "id": "e0654277-89b6-4a7b-a071-a788fdbb6636",
288 | "url": "https://liquipedia.net/dota2/Gorgc",
289 | "name": "dota player detail",
290 | "attributes": [
291 | {
292 | "name": "name",
293 | "description": "player real name",
294 | "type": "string"
295 | },
296 | {
297 | "name": "ign",
298 | "description": "player In game name",
299 | "type": "string"
300 | },
301 | {
302 | "name": "earnings",
303 | "description": "players earnings from dota competitive scene",
304 | "type": "number"
305 | },
306 | {
307 | "name": "nationality",
308 | "description": "player nationality",
309 | "type": "string"
310 | },
311 | {
312 | "name": "picture",
313 | "description": "image url of player picture",
314 | "type": "string"
315 | }
316 | ],
317 | "latestResult": {
318 | "output": {
319 | "name": "Janne Stefanovski",
320 | "ign": "Gorgc",
321 | "earnings": 14839,
322 | "nationality": "Sweden",
323 | "picture": "https://liquipedia.net/commons/images/thumb/0/0f/Gorgc_WESG_2016.jpg/600px-Gorgc_WESG_2016.jpg"
324 | }
325 | },
326 | "createdAt": "2024-08-25T14:59:46.509Z",
327 | "updatedAt": "2024-08-25T14:59:46.509Z",
328 | "ignoreCache": false,
329 | "model": "mixtral-8x7b-32768"
330 | }
331 | ```
332 |
333 | **Output**
334 |
335 | ```json
336 | {
337 | "output": {
338 | "name": "Janne Stefanovski",
339 | "ign": "Gorgc",
340 | "earnings": 14839,
341 | "nationality": "Sweden",
342 | "picture": "https://liquipedia.net/commons/images/thumb/0/0f/Gorgc_WESG_2016.jpg/600px-Gorgc_WESG_2016.jpg"
343 | }
344 | }
345 | ```
346 |
347 | ### Extract indonesian news
348 |
349 | 
350 |
351 | **Request Body**
352 |
353 | ```json
354 | {
355 | "id": "f6086055-324d-4b81-be68-b2c220b83b1f",
356 | "url": "https://www.kaskus.co.id/thread/66c916cb5a6daedab1041d6c/netizen-curiga-skandal-azizah-salsha-hanya-pengalihan-isu-polemik-putusan-mk?ref=homelanding&med=hot_thread&style=thumb",
357 | "name": "news scraper",
358 | "attributes": [
359 | {
360 | "name": "judul",
361 | "description": "judul dari berita pada website",
362 | "type": "string"
363 | },
364 | {
365 | "name": "rangkuman",
366 | "description": "2 sampai 3 kalimat rangkuman mengenai berita pada website",
367 | "type": "string"
368 | },
369 | {
370 | "name": "gambar",
371 | "description": "link untuk gambar thumbnaill berita",
372 | "type": "string"
373 | },
374 | {
375 | "name": "penulis",
376 | "description": "objek yang berisi detail profil penulis berita",
377 | "type": "object",
378 | "properties": [
379 | {
380 | "name": "nama",
381 | "description": "nama akun penulis",
382 | "type": "string"
383 | },
384 | {
385 | "name": "total_post",
386 | "description": "jumlah postingan penulis",
387 | "type": "number"
388 | }
389 | ]
390 | }
391 | ],
392 | "latestResult": {
393 | "output": {
394 | "judul": "Netizen Curiga! Skandal Azizah Salsha Hanya Pengalihan Isu Polemik Putusan MK!",
395 | "rangkuman": "Kasus dugaan perselingkuhan istri Pratama Arhan, Azizah Salsha, dengan pacar selebgram Rachel Vennya, Salim Nauderer, memancing spekulasi di ranah digital. Netizen memulai kampanye #KawalPutusanMK untuk menjaga fokus terhadap isu yang dinilai lebih krusial.",
396 | "gambar": "https://s.kaskus.id/images/2024/08/23/10600510_202408231058540652.jpg",
397 | "penulis": {
398 | "nama": "TS harrywjyy",
399 | "total_post": 3
400 | }
401 | }
402 | },
403 | "createdAt": "2024-08-25T15:08:10.227Z",
404 | "updatedAt": "2024-08-25T15:08:10.227Z",
405 | "ignoreCache": false,
406 | "model": "mixtral-8x7b-32768"
407 | }
408 | ```
409 |
410 | **Output**
411 |
412 | ```json
413 | {
414 | "output": {
415 | "judul": "Netizen Curiga! Skandal Azizah Salsha Hanya Pengalihan Isu Polemik Putusan MK!",
416 | "rangkuman": "Kasus dugaan perselingkuhan istri Pratama Arhan, Azizah Salsha, dengan pacar selebgram Rachel Vennya, Salim Nauderer, memancing spekulasi di ranah digital. Netizen memulai kampanye #KawalPutusanMK untuk menjaga fokus terhadap isu yang dinilai lebih krusial.",
417 | "gambar": "https://s.kaskus.id/images/2024/08/23/10600510_202408231058540652.jpg",
418 | "penulis": {
419 | "nama": "TS harrywjyy",
420 | "total_post": 3
421 | }
422 | }
423 | }
424 | ```
425 |
426 | ## Technologies Used
427 |
428 | - Web Scraping: Puppeteer
429 | - Embedding: Cohere
430 | - Langchain
431 | - LLM: Gemini, Groq
432 | - Vector Store: Upstash
433 | - Development: TypeScript, Bun
434 |
435 | ## License
436 |
437 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.
438 |
--------------------------------------------------------------------------------
/bun.lockb:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/bun.lockb
--------------------------------------------------------------------------------
/components.json:
--------------------------------------------------------------------------------
1 | {
2 | "$schema": "https://ui.shadcn.com/schema.json",
3 | "style": "default",
4 | "rsc": true,
5 | "tsx": true,
6 | "tailwind": {
7 | "config": "tailwind.config.ts",
8 | "css": "src/app/globals.css",
9 | "baseColor": "slate",
10 | "cssVariables": true,
11 | "prefix": ""
12 | },
13 | "aliases": {
14 | "components": "@/components",
15 | "utils": "@/lib/utils"
16 | }
17 | }
--------------------------------------------------------------------------------
/next.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('next').NextConfig} */
2 | const nextConfig = {
3 | reactStrictMode: true,
4 | // env: env,
5 | experimental: {
6 | serverComponentsExternalPackages: ["puppeteer-core", "@sparticuz/chromium"],
7 | },
8 | };
9 |
10 | export default nextConfig;
11 |
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "json-shift",
3 | "version": "0.1.0",
4 | "private": true,
5 | "scripts": {
6 | "dev": "next dev",
7 | "build": "next build",
8 | "start": "next start",
9 | "lint": "next lint"
10 | },
11 | "dependencies": {
12 | "@hookform/resolvers": "^3.9.0",
13 | "@langchain/cohere": "^0.2.2",
14 | "@langchain/community": "^0.2.28",
15 | "@langchain/core": "^0.2.28",
16 | "@langchain/google-genai": "^0.0.26",
17 | "@langchain/groq": "^0.0.16",
18 | "@langchain/openai": "^0.2.6",
19 | "@radix-ui/react-accordion": "^1.2.0",
20 | "@radix-ui/react-checkbox": "^1.1.1",
21 | "@radix-ui/react-dialog": "^1.1.1",
22 | "@radix-ui/react-label": "^2.1.0",
23 | "@radix-ui/react-select": "^2.1.1",
24 | "@radix-ui/react-slot": "^1.1.0",
25 | "@sparticuz/chromium": "^123.0.0",
26 | "@uiw/react-json-view": "^2.0.0-alpha.26",
27 | "@upstash/redis": "^1.34.0",
28 | "@upstash/vector": "^1.1.5",
29 | "axios": "^1.7.3",
30 | "class-variance-authority": "^0.7.0",
31 | "clsx": "^2.1.1",
32 | "hono": "^4.5.4",
33 | "hono-rate-limiter": "^0.4.0",
34 | "langchain": "^0.2.17",
35 | "lucide-react": "^0.426.0",
36 | "next": "14.2.6",
37 | "next-themes": "^0.3.0",
38 | "puppeteer": "^22.11.0",
39 | "puppeteer-core": "^22.11.0",
40 | "react": "^18",
41 | "react-dom": "^18",
42 | "react-hook-form": "^7.52.2",
43 | "tailwind-merge": "^2.4.0",
44 | "tailwindcss-animate": "^1.0.7",
45 | "uuid": "^10.0.0",
46 | "zod": "^3.23.8",
47 | "zustand": "^4.5.4"
48 | },
49 | "devDependencies": {
50 | "@types/node": "^20",
51 | "@types/puppeteer": "^7.0.4",
52 | "@types/react": "^18",
53 | "@types/react-dom": "^18",
54 | "@typescript-eslint/eslint-plugin": "^7.0.0",
55 | "@typescript-eslint/parser": "^7.10.0",
56 | "eslint": "^8",
57 | "eslint-config-airbnb": "^19.0.4",
58 | "eslint-config-airbnb-typescript": "^18.0.0",
59 | "eslint-config-next": "14.2.6",
60 | "eslint-plugin-align-assignments": "^1.1.2",
61 | "postcss": "^8",
62 | "tailwindcss": "^3.4.1",
63 | "typescript": "^5"
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/postcss.config.mjs:
--------------------------------------------------------------------------------
1 | /** @type {import('postcss-load-config').Config} */
2 | const config = {
3 | plugins: {
4 | tailwindcss: {},
5 | },
6 | };
7 |
8 | export default config;
9 |
--------------------------------------------------------------------------------
/public/android-chrome-192x192.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/android-chrome-192x192.png
--------------------------------------------------------------------------------
/public/android-chrome-512x512.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/android-chrome-512x512.png
--------------------------------------------------------------------------------
/public/apple-touch-icon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/apple-touch-icon.png
--------------------------------------------------------------------------------
/public/favicon-16x16.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/favicon-16x16.png
--------------------------------------------------------------------------------
/public/favicon-32x32.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/favicon-32x32.png
--------------------------------------------------------------------------------
/public/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/favicon.ico
--------------------------------------------------------------------------------
/public/next.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/public/og-image.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/og-image.png
--------------------------------------------------------------------------------
/public/usage-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/usage-1.png
--------------------------------------------------------------------------------
/public/usage-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/usage-2.png
--------------------------------------------------------------------------------
/public/usage-3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/public/usage-3.png
--------------------------------------------------------------------------------
/public/vercel.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/src/app/api/[[...route]]/route.ts:
--------------------------------------------------------------------------------
1 | import { Context, Hono } from "hono";
2 |
3 | import RequestTimeoutError from "@/errors/request-timeout-error";
4 | import ValidationError from "@/errors/validation-error";
5 | import { env } from "@/lib/env";
6 | import extractRoute from "@/routes/extract-route";
7 | import vectorStoreRoute from "@/routes/vectorstore-routes";
8 | import { cors } from "hono/cors";
9 | import { StatusCode } from "hono/utils/http-status";
10 | import { handle } from "hono/vercel";
11 |
12 | /**
13 | * Set maximum duration for serverless function
14 | */
15 | export const maxDuration = 60;
16 |
17 | /**
18 | * Initialize Hono app with base path
19 | */
20 | const app = new Hono().basePath("/api");
21 |
22 | app.use(
23 | "/api/*",
24 | cors({
25 | origin : env.NODE_ENV === "production" ? env.BASE_URL : "*",
26 | allowMethods : ["POST", "GET", "OPTIONS"],
27 | maxAge : 600,
28 | credentials : true,
29 | }),
30 | );
31 |
32 | const errorResponse = (c: Context, status: StatusCode, error: any) => c.json(
33 | {
34 | code : error?.code || "INTERNAL_SERVER_ERROR",
35 | title : error.name.replace(/([A-Z])/g, " $1").trim(),
36 | message : error.message,
37 | details : error.details,
38 | },
39 | status,
40 | );
41 |
42 | /**
43 | * Global error handler
44 | * @param {Error} err - The error object
45 | * @param {Context} c - The Hono context
46 | * @returns {Response} JSON response with error details
47 | */
48 | app.onError((err, c) => {
49 | if (err instanceof ValidationError) {
50 | return errorResponse(c, 422, err);
51 | }
52 | if (err instanceof RequestTimeoutError) {
53 | return errorResponse(c, 504, err);
54 | }
55 | return errorResponse(c, 500, new Error("Internal Server Error"));
56 | });
57 |
58 | app.route("/extract", extractRoute);
59 | app.route("/vectorstore", vectorStoreRoute);
60 |
61 | export const GET = handle(app);
62 | export const POST = handle(app);
63 |
--------------------------------------------------------------------------------
/src/app/favicon.ico:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/imamseptian/json-shift/849262fa5287c39f114b90dca0eebfdeb80ef0f8/src/app/favicon.ico
--------------------------------------------------------------------------------
/src/app/globals.css:
--------------------------------------------------------------------------------
1 | @tailwind base;
2 | @tailwind components;
3 | @tailwind utilities;
4 |
5 | @layer base {
6 | :root {
7 | --background: 0 0% 100%;
8 | --foreground: 222.2 84% 4.9%;
9 | --card: 0 0% 100%;
10 | --card-foreground: 222.2 84% 4.9%;
11 | --popover: 0 0% 100%;
12 | --popover-foreground: 222.2 84% 4.9%;
13 | --primary: 222.2 47.4% 11.2%;
14 | --primary-foreground: 210 40% 98%;
15 | --secondary: 210 40% 96.1%;
16 | --secondary-foreground: 222.2 47.4% 11.2%;
17 | --muted: 210 40% 96.1%;
18 | --muted-foreground: 215.4 16.3% 46.9%;
19 | --accent: 210 40% 96.1%;
20 | --accent-foreground: 222.2 47.4% 11.2%;
21 | --destructive: 0 84.2% 60.2%;
22 | --destructive-foreground: 210 40% 98%;
23 | --border: 214.3 31.8% 91.4%;
24 | --input: 214.3 31.8% 91.4%;
25 | --ring: 222.2 84% 4.9%;
26 | --radius: 0.3rem;
27 | --chart-1: 12 76% 61%;
28 | --chart-2: 173 58% 39%;
29 | --chart-3: 197 37% 24%;
30 | --chart-4: 43 74% 66%;
31 | --chart-5: 27 87% 67%;
32 | --success: 221.2 83.2% 53.3%;
33 | --success-foreground: 210 40% 98%;
34 | }
35 |
36 | .dark {
37 | --background: 222.2 84% 4.9%;
38 | --foreground: 210 40% 98%;
39 | --card: 222.2 84% 4.9%;
40 | --card-foreground: 210 40% 98%;
41 | --popover: 222.2 84% 4.9%;
42 | --popover-foreground: 210 40% 98%;
43 | --primary: 210 40% 98%;
44 | --primary-foreground: 222.2 47.4% 11.2%;
45 | --secondary: 217.2 32.6% 17.5%;
46 | --secondary-foreground: 210 40% 98%;
47 | --muted: 217.2 32.6% 17.5%;
48 | --muted-foreground: 215 20.2% 65.1%;
49 | --accent: 217.2 32.6% 17.5%;
50 | --accent-foreground: 210 40% 98%;
51 | --destructive: 0 62.8% 30.6%;
52 | --destructive-foreground: 210 40% 98%;
53 | --border: 217.2 32.6% 17.5%;
54 | --input: 217.2 32.6% 17.5%;
55 | --ring: 212.7 26.8% 83.9;
56 | --chart-1: 220 70% 50%;
57 | --chart-2: 160 60% 45%;
58 | --chart-3: 30 80% 55%;
59 | --chart-4: 280 65% 60%;
60 | --chart-5: 340 75% 55%;
61 | --success: 217.2 91.2% 59.8%;
62 | --success-foreground: 222.2 47.4% 11.2%;
63 | }
64 | }
65 |
66 | @layer base {
67 | * {
68 | @apply border-border;
69 | }
70 | body {
71 | @apply bg-background text-foreground;
72 | }
73 | }
74 |
75 | .sidebar {
76 | width: 400px;
77 | transition: margin-left 0.3s ease-in-out;
78 | }
79 |
80 | .sidebar-open {
81 | margin-left: 0;
82 | }
83 |
84 | .sidebar-close {
85 | margin-left: -400px;
86 | }
87 |
--------------------------------------------------------------------------------
/src/app/layout.tsx:
--------------------------------------------------------------------------------
1 | import Footer from "@/components/layout/footer";
2 | import Navbar from "@/components/layout/navbar";
3 | import { ThemeProvider } from "@/components/layout/theme-provider";
4 | import { env } from "@/lib/env";
5 | import { cn } from "@/lib/utils";
6 | import type { Metadata } from "next";
7 | import { Inter } from "next/font/google";
8 | import "./globals.css";
9 |
10 | const inter = Inter({ subsets: ["latin"] });
11 |
12 | export const metadata: Metadata = {
13 | title: {
14 | default : "JsonShift | AI-Powered Web Data Extraction to JSON",
15 | template : "%s | JsonShift",
16 | },
17 | description:
18 | "JsonShift utilizes scraping, embedding , and LLM technologies to extract and convert web content into structured JSON format based on user inputs. Simplify data extraction with our powerful tools.",
19 | keywords: [
20 | "JSON",
21 | "data extraction",
22 | "AI-powered data extraction",
23 | "web scraping",
24 | "groq",
25 | "gemini",
26 | "cohere",
27 | "embedding",
28 | "scraper",
29 | 'langchain',
30 | "jsonshift",
31 | "data extraction",
32 | "web content to JSON",
33 | "LLM data processing",
34 | "website data extraction",
35 | "JSON generation",
36 | "web data to JSON",
37 | "structured data",
38 | "data transformation",
39 | ],
40 | authors : [{ name: "Imam Septian Adi Wijaya" }],
41 | creator : "Imam Septian Adi Wijaya",
42 | themeColor : [
43 | { media: "(prefers-color-scheme: dark)", color: "black" },
44 | { media: "(prefers-color-scheme: light)", color: "white" },
45 | ],
46 | openGraph: {
47 | type : "website",
48 | locale : "en_US",
49 | url : env.BASE_URL,
50 | siteName : "JsonShift",
51 | title : "JsonShift | AI-Powered Web Data Extraction to JSON",
52 | description:
53 | "Experience the power of AI and advanced scraping with JsonShift to transform web content into customizable JSON outputs. Efficiently extract data tailored to your needs.",
54 | images: [
55 | {
56 | url : `${env.BASE_URL}/og-image.png`,
57 | width : 1200,
58 | height : 630,
59 | alt : "JsonShift - AI-Powered Data Extraction",
60 | },
61 | ],
62 | },
63 | twitter: {
64 | card : "summary_large_image",
65 | site : "@yourTwitterHandle",
66 | title : "JsonShift | AI-Powered Web Data Extraction to JSON",
67 | description:
68 | "Transform web content into structured JSON outputs with JsonShift's AI-powered tools. Extract data quickly and accurately based on user-defined inputs.",
69 | images: [
70 | {
71 | url : `${env.BASE_URL}/og-image.png`,
72 | width : 1200,
73 | height : 630,
74 | alt : "JsonShift - AI-Powered Data Extraction",
75 | },
76 | ],
77 | },
78 | robots: {
79 | index : true,
80 | follow : true,
81 | googleBot : {
82 | index : true,
83 | follow : true,
84 | "max-video-preview" : -1,
85 | "max-image-preview" : "large",
86 | "max-snippet" : -1,
87 | },
88 | },
89 | icons: {
90 | icon : "/favicon.ico",
91 | shortcut : "/favicon-16x16.png",
92 | apple : "/apple-touch-icon.png",
93 | },
94 | alternates: {
95 | languages: {
96 | "en-US" : "/en-US",
97 | "es-ES" : "/es-ES",
98 | },
99 | },
100 | };
101 |
102 | export default function RootLayout({
103 | children,
104 | }: Readonly<{
105 | children: React.ReactNode;
106 | }>) {
107 | return (
108 |
109 |
112 |
118 |
119 |
120 | { children }
121 |
122 |
123 |
124 |
125 |
126 | );
127 | }
128 |
--------------------------------------------------------------------------------
/src/app/page.tsx:
--------------------------------------------------------------------------------
1 | 'use client';
2 |
3 | import {
4 | useEffect, useRef, useState,
5 | } from "react";
6 | import { UseFormReturn } from "react-hook-form";
7 | import { v4 as uuid } from "uuid";
8 |
9 | import BrowseTemplate from "@/components/browse-template";
10 | import { ExecutionTime } from "@/components/execution-time-result";
11 | import ResultSection from "@/components/result-section";
12 | import TemplateForm from "@/components/template-form";
13 | import { Button } from "@/components/ui/button";
14 | import { applyValidationErrorsToForm } from "@/lib/error-utils";
15 | import { ErrorObject } from "@/lib/types";
16 | import { Template } from "@/schemas/template-schema";
17 | import { useModelStore } from "@/store/model-store";
18 | import { useTemplateStore } from "@/store/template-store";
19 |
20 | /**
21 | * Homepage component for template management and execution
22 | * @returns {JSX.Element} The rendered Homepage component
23 | */
24 | export default function Homepage(): JSX.Element {
25 | const {
26 | selectedTemplate,
27 | setSelectedTemplate,
28 | addTemplate,
29 | updateTemplate,
30 | } = useTemplateStore();
31 | const { model: selectedModel } = useModelStore();
32 |
33 | const resultRef = useRef(null);
34 | const [objectResult, setObjectResult] = useState(null);
35 | const [isSubmitting, setIsSubmitting] = useState(false);
36 | const [errorObject, setErrorObject] = useState(null);
37 | const [executionTime, setExecutionTime] = useState({
38 | scrapeExecutionTime : null,
39 | embeddingTime : null,
40 | llmProcessingTime : null,
41 | });
42 |
43 | useEffect(() => {
44 | setObjectResult(selectedTemplate?.latestResult ?? null);
45 | }, [selectedTemplate]);
46 |
47 | /**
48 | * Handles form submission and API interaction
49 | * @param {Template} formValues - The form values to be submitted
50 | * @param {UseFormReturn} form - The form instance
51 | */
52 | const handleSubmit = async (formValues: Template, form: UseFormReturn) => {
53 | if (resultRef.current) {
54 | resultRef.current.scrollIntoView({ behavior: "smooth" });
55 | }
56 |
57 | setIsSubmitting(true);
58 | setErrorObject(null);
59 | setObjectResult(null);
60 |
61 | const templateId = formValues.id || uuid();
62 |
63 | try {
64 | const response = await fetch("/api/extract", {
65 | method : "POST",
66 | headers : { "Content-Type": "application/json" },
67 | body : JSON.stringify({
68 | ...formValues,
69 | model : selectedModel,
70 | id : templateId,
71 | }),
72 | });
73 |
74 | const data = await response.json();
75 |
76 | if (!response.ok) {
77 | handleErrorResponse(response, data, form);
78 | } else {
79 | handleSuccessResponse(data, formValues, templateId);
80 | }
81 |
82 | updateExecutionTime(data);
83 | } catch (error) {
84 | handleFetchError(error as Error);
85 | } finally {
86 | setIsSubmitting(false);
87 | scrollToResult();
88 | }
89 | };
90 |
91 | /**
92 | * Handles error responses from the API
93 | * @param {Response} response - The API response
94 | * @param {any} data - The parsed response data
95 | * @param {UseFormReturn} form - The form instance
96 | */
97 | const handleErrorResponse = (response: Response, data: any, form: UseFormReturn) => {
98 | if (response.status === 422 && data.code === "VALIDATION_ERROR") {
99 | applyValidationErrorsToForm(data.details, form.setError);
100 | setErrorObject({
101 | title : data?.title ?? "Validation Error",
102 | message : data?.message ?? "Validation failed. Please check your inputs.",
103 | });
104 | } else {
105 | setErrorObject({
106 | title : data?.title ?? "Unknown error",
107 | message : data?.message ?? "Unknown error",
108 | });
109 | }
110 | };
111 |
112 | /**
113 | * Handles successful responses from the API
114 | * @param {any} data - The parsed response data
115 | * @param {Template} formValues - The submitted form values
116 | * @param {string} templateId - The template ID
117 | */
118 | const handleSuccessResponse = (data: any, formValues: Template, templateId: string) => {
119 | setObjectResult(data.answer);
120 |
121 | const updatedTemplate = {
122 | ...formValues,
123 | latestResult : data?.answer ?? null,
124 | id : templateId,
125 | };
126 |
127 | if (formValues.id) {
128 | updateTemplate(updatedTemplate);
129 | } else {
130 | addTemplate(updatedTemplate);
131 | }
132 | setSelectedTemplate(updatedTemplate);
133 | };
134 |
135 | /**
136 | * Updates the execution time state
137 | * @param {any} data - The parsed response data containing execution times
138 | */
139 | const updateExecutionTime = (data: any) => {
140 | setExecutionTime({
141 | scrapeExecutionTime : data?.scrapeExecutionTime ?? null,
142 | embeddingTime : data?.embeddingTime ?? null,
143 | llmProcessingTime : data?.llmProcessingTime ?? null,
144 | });
145 | };
146 |
147 | /**
148 | * Handles errors that occur during the fetch operation
149 | * @param {Error} error - The error object
150 | */
151 | const handleFetchError = (error: Error) => {
152 | setErrorObject({
153 | title : error.name ?? "Internal server error",
154 | message : error.message ?? "Please try again later",
155 | });
156 | };
157 |
158 | /**
159 | * Scrolls to the result section
160 | */
161 | const scrollToResult = () => {
162 | if (resultRef.current) {
163 | resultRef.current.scrollIntoView({ behavior: "smooth" });
164 | }
165 | };
166 |
167 | const showErrorMessage = !!errorObject && !isSubmitting;
168 | const showResults = !isSubmitting && !showErrorMessage;
169 |
170 | return (
171 |
172 |
173 |
174 |
175 | { selectedTemplate?.id && (
176 | setSelectedTemplate(null) }
180 | >
181 | Create New
182 |
183 | ) }
184 |
185 |
186 |
187 |
188 |
197 |
198 |
199 |
200 | );
201 | }
202 |
--------------------------------------------------------------------------------
/src/components/attribute-fields.tsx:
--------------------------------------------------------------------------------
1 | import { Button } from "@/components/ui/button";
2 | import { Template, TYPES } from "@/schemas/template-schema";
3 | import { PlusCircle } from "lucide-react";
4 | import { useState } from "react";
5 | import { useFieldArray, useFormContext } from "react-hook-form";
6 | import InputField from "./input-field";
7 | import PropertiesField from "./property-field";
8 | import SelectField from "./select-field";
9 | import TextareaField from "./text-area-field";
10 | import {
11 | Accordion,
12 | AccordionContent,
13 | AccordionItem,
14 | AccordionTrigger,
15 | } from "./ui/accordion";
16 |
17 | /**
18 | * AttributeFields component for managing attribute fields in a template form
19 | * @returns {JSX.Element} The rendered AttributeFields component
20 | */
21 | export default function AttributeFields() {
22 | const { control, watch, formState: { errors } } = useFormContext();
23 | const [openItems, setOpenItems] = useState([]);
24 |
25 | const {
26 | remove,
27 | append,
28 | } = useFieldArray({
29 | control,
30 | name: `attributes`,
31 | });
32 |
33 | const attributes = watch("attributes");
34 |
35 | const handleAddAttribute = () => {
36 | const newIndex = attributes.length;
37 | append({
38 | name : "",
39 | type : "string",
40 | description : "",
41 | });
42 | setOpenItems((prev) => [...prev, `attribute-${newIndex}`]);
43 | };
44 |
45 | return (
46 |
47 |
53 | { attributes.map((attribute, index) => (
54 |
60 | )) }
61 |
62 |
68 |
69 | Add Field
70 |
71 | {
72 | errors.attributes && (
73 |
{ errors.attributes.message }
74 | )
75 | }
76 |
77 | );
78 | }
79 |
80 | interface AttributeAccordionItemProps {
81 | attribute: any;
82 | index: number;
83 | remove: (index: number) => void;
84 | }
85 |
86 | /**
87 | * AttributeAccordionItem component for rendering individual attribute items
88 | * @param {AttributeAccordionItemProps} props - The component props
89 | * @returns {JSX.Element} The rendered AttributeAccordionItem component
90 | */
91 | function AttributeAccordionItem({ attribute, index, remove }: AttributeAccordionItemProps): JSX.Element {
92 | return (
93 |
98 | remove(index) }
101 | >
102 |
103 | { attribute.name || `Attribute ${index + 1}` }
104 |
105 | { attribute.type }
106 |
107 |
108 |
109 |
115 |
121 |
122 |
123 |
128 |
129 |
130 |
131 | );
132 | }
133 |
--------------------------------------------------------------------------------
/src/components/browse-template.tsx:
--------------------------------------------------------------------------------
1 | /* eslint-disable jsx-a11y/no-static-element-interactions */
2 | /* eslint-disable jsx-a11y/click-events-have-key-events */
3 |
4 | "use client";
5 |
6 | import { Button } from "@/components/ui/button";
7 | import {
8 | Dialog,
9 | DialogContent,
10 | DialogDescription,
11 | DialogHeader,
12 | DialogTitle,
13 | DialogTrigger,
14 | } from "@/components/ui/dialog";
15 | import useDebounce from "@/hooks/use-debounce";
16 | import { Template } from "@/schemas/template-schema";
17 | import { useTemplateStore } from "@/store/template-store";
18 | import { Circle, CircleCheck, X } from "lucide-react";
19 | import { useCallback, useMemo, useState } from "react";
20 |
21 | interface TemplateListProps {
22 | templates: Template[];
23 | onItemClick: (item: Template) => void;
24 | onDeleteClick: (item: Template) => void;
25 | }
26 |
27 | /**
28 | * TemplateList component for rendering a list of templates
29 | * @param {TemplateListProps} props - The component props
30 | * @returns {JSX.Element} The rendered TemplateList component
31 | */
32 | function TemplateList({
33 | templates,
34 | onItemClick,
35 | onDeleteClick,
36 | }: TemplateListProps) {
37 | const { selectedTemplate } = useTemplateStore();
38 |
39 | return (
40 |
41 | { templates.map((template) => {
42 | const isSelected = selectedTemplate?.id === template.id;
43 | return (
44 |
45 | onItemClick(template) }
50 | >
51 |
52 | { isSelected ? : }
53 | { template.name }
54 |
55 |
{ template.url }
56 |
57 | onDeleteClick(template) }
62 | aria-label={ `Delete template ${template.name}` }
63 | >
64 |
65 |
66 |
67 | );
68 | }) }
69 |
70 | );
71 | }
72 |
73 | /**
74 | * BrowseTemplate component for browsing and selecting templates
75 | * @returns {JSX.Element} The rendered BrowseTemplate component
76 | */
77 | export default function BrowseTemplate() {
78 | const [isModalOpen, setIsModalOpen] = useState(false);
79 | const [filterText, setFilterText] = useState("");
80 | const {
81 | templates, setSelectedTemplate, deleteTemplate,
82 | } = useTemplateStore();
83 |
84 | const debouncedFilterText = useDebounce(filterText, 300);
85 |
86 | const isLoading = filterText !== debouncedFilterText;
87 |
88 | const filterTemplate = useCallback(
89 | (currentTemplates: Template[], filter: string) => {
90 | const lowerCaseFilter = filter.toLowerCase();
91 | return currentTemplates.filter(
92 | (template) => template.name.toLowerCase().includes(lowerCaseFilter)
93 | || template.url.toLowerCase().includes(lowerCaseFilter),
94 | );
95 | },
96 | [],
97 | );
98 |
99 | const filteredTemplates = useMemo(
100 | () => filterTemplate(templates, debouncedFilterText),
101 | [filterTemplate, templates, debouncedFilterText],
102 | );
103 |
104 | const handleItemClick = (template: Template) => {
105 | setSelectedTemplate(template);
106 | setIsModalOpen(false);
107 | };
108 |
109 | const handleDeleteClick = (template: Template) => {
110 | deleteTemplate(template.id!);
111 | };
112 |
113 | return (
114 |
115 |
116 | Browse Templates
117 |
118 |
119 |
120 | Browse Templates
121 | Select or search for a template
122 |
123 |
124 |
125 | setFilterText(e.target.value) }
131 | />
132 |
133 |
134 | { isLoading ? (
135 | Loading...
136 | ) : (
137 |
142 | ) }
143 |
144 | { filteredTemplates.length === 0 && (
145 | No templates found
146 | ) }
147 |
148 |
149 | );
150 | }
151 |
--------------------------------------------------------------------------------
/src/components/execution-time-result.tsx:
--------------------------------------------------------------------------------
1 | export interface ExecutionTime {
2 | scrapeExecutionTime: string | null | undefined;
3 | embeddingTime: string | null | undefined;
4 | llmProcessingTime: string | null | undefined;
5 | }
6 | export function ExecutionTimeResult({
7 | executionTime,
8 | }: {
9 | executionTime: ExecutionTime;
10 | }) {
11 | return (
12 |
13 | { executionTime?.scrapeExecutionTime && (
14 |
15 | Scrape Execution Time:
16 | { ' ' }
17 | { executionTime.scrapeExecutionTime }
18 |
19 | ) }
20 | { executionTime?.embeddingTime && (
21 |
22 | Embedding Execution Time:
23 | { ' ' }
24 | { executionTime.embeddingTime }
25 |
26 | ) }
27 | { executionTime?.llmProcessingTime && (
28 |
29 | LLM Processing Time:
30 | { ' ' }
31 | { executionTime.llmProcessingTime }
32 |
33 | ) }
34 |
35 | );
36 | }
37 |
--------------------------------------------------------------------------------
/src/components/input-field.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import {
4 | FormControl,
5 | FormDescription,
6 | FormField,
7 | FormItem,
8 | FormLabel,
9 | FormMessage,
10 | } from "@/components/ui/form";
11 | import { Input } from "@/components/ui/input";
12 | import { useFormContext } from "react-hook-form";
13 |
14 | interface InputFieldProps extends React.InputHTMLAttributes {
15 | name: string;
16 | label?: string;
17 | description?: string;
18 | className?: string;
19 | }
20 |
21 | export default function InputField({
22 | name,
23 | label,
24 | description,
25 | className = "",
26 | ...props
27 | }: InputFieldProps) {
28 | const { control } = useFormContext();
29 | return (
30 | (
34 |
35 | { label || name }
36 |
37 |
38 |
39 | { description && { description } }
40 |
41 |
42 | ) }
43 | />
44 | );
45 | }
46 |
--------------------------------------------------------------------------------
/src/components/json-result-view.tsx:
--------------------------------------------------------------------------------
1 | import { useTemplateStore } from "@/store/template-store";
2 | import JsonView from "@uiw/react-json-view";
3 | import { monokaiTheme } from "@uiw/react-json-view/monokai";
4 | import { useState } from "react";
5 |
6 | function JsonResultView({ objectResult }: { objectResult: any }) {
7 | const [copied, setCopied] = useState(false);
8 |
9 | const { selectedTemplate } = useTemplateStore();
10 |
11 | if (!objectResult) {
12 | return (
13 |
14 |
15 |
Create your template and extract your data
16 |
17 |
18 | );
19 | }
20 |
21 | const handleCopy = async () => {
22 | try {
23 | await navigator.clipboard.writeText(
24 | JSON.stringify(objectResult, null, 2),
25 | );
26 | setCopied(true);
27 | setTimeout(() => setCopied(false), 2000);
28 | } catch (err) {
29 | // eslint-disable-next-line no-console
30 | console.error("Failed to copy JSON to clipboard:", err);
31 | // eslint-disable-next-line no-alert
32 | alert("Failed to copy JSON to clipboard:");
33 | }
34 | };
35 |
36 | const handleDownload = () => {
37 | const jsonString = JSON.stringify(objectResult, null, 2);
38 | const blob = new Blob([jsonString], { type: "application/json" });
39 | const url = URL.createObjectURL(blob);
40 | const link = document.createElement("a");
41 | link.href = url;
42 | link.download = `${selectedTemplate?.name}.json`;
43 | document.body.appendChild(link);
44 | link.click();
45 | document.body.removeChild(link);
46 | URL.revokeObjectURL(url);
47 | };
48 |
49 | return (
50 |
51 |
52 |
57 | { copied ? "Copied!" : "Copy JSON" }
58 |
59 |
64 | Download JSON
65 |
66 |
67 |
68 |
69 | );
70 | }
71 |
72 | export default JsonResultView;
73 |
--------------------------------------------------------------------------------
/src/components/layout/footer.tsx:
--------------------------------------------------------------------------------
1 | import { Button } from "@/components/ui/button";
2 | import {
3 | Facebook,
4 | Globe, Linkedin,
5 | LucideGithub,
6 | } from "lucide-react";
7 | import Link from "next/link";
8 |
9 | export default function Footer() {
10 | return (
11 |
12 |
13 |
14 |
15 |
16 | ©
17 | { ' ' }
18 | { new Date().getFullYear() }
19 | { ' ' }
20 | Razhael™. All Rights Reserved.
21 |
22 |
23 |
24 |
25 |
31 |
32 | Facebook page
33 |
34 |
35 |
36 |
37 |
43 |
50 |
51 |
52 | Discord community
53 |
54 |
55 |
56 |
57 |
63 |
64 | GitHub account
65 |
66 |
67 |
68 |
69 |
75 |
76 | Linkedin
77 |
78 |
79 |
80 |
86 |
87 | Linkedin
88 |
89 |
90 |
91 |
92 |
93 |
94 | );
95 | }
96 |
--------------------------------------------------------------------------------
/src/components/layout/model-select.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import {
4 | Select,
5 | SelectContent,
6 | SelectItem,
7 | SelectTrigger,
8 | SelectValue,
9 | } from "@/components/ui/select";
10 | import {
11 | DEFAULT_LLM_MODEL,
12 | LLM_MODEL_OPTIONS,
13 | LLMModel,
14 | } from "@/lib/constants";
15 | import { useModelStore } from "@/store/model-store";
16 | import React from "react";
17 |
18 | export default function ModelSelect() {
19 | const { model: selectedModel, setModel } = useModelStore();
20 |
21 | const onModelChange = (model: LLMModel) => {
22 | setModel(model);
23 | };
24 |
25 | return (
26 |
31 |
32 |
33 |
34 |
35 | { Object.entries(LLM_MODEL_OPTIONS).map(([provider, models]) => (
36 |
37 |
38 | { provider }
39 |
40 | { models.map((model) => (
41 |
42 | { model }
43 |
44 | )) }
45 |
46 | )) }
47 |
48 |
49 | );
50 | }
51 |
--------------------------------------------------------------------------------
/src/components/layout/navbar.tsx:
--------------------------------------------------------------------------------
1 | import Link from "next/link";
2 | import ModelSelect from "./model-select";
3 | import { ThemeToggler } from "./theme-toggler-button";
4 |
5 | export default async function Navbar() {
6 | return (
7 |
8 |
9 |
10 |
11 | JSON
12 | Shift.
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 | );
23 | }
24 |
--------------------------------------------------------------------------------
/src/components/layout/theme-provider.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { ThemeProvider as NextThemesProvider } from "next-themes";
4 | import { type ThemeProviderProps } from "next-themes/dist/types";
5 |
6 | export function ThemeProvider({ children, ...props }: ThemeProviderProps) {
7 | return { children } ;
8 | }
9 |
--------------------------------------------------------------------------------
/src/components/layout/theme-toggler-button.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { Moon, Sun } from "lucide-react";
4 | import { useTheme } from "next-themes";
5 |
6 | import { Button } from "@/components/ui/button";
7 | import { cn } from "@/lib/utils";
8 |
9 | export function ThemeToggler({ className }: { className?: string }) {
10 | const { theme, setTheme, systemTheme } = useTheme();
11 |
12 | const currentTheme = theme === "system" ? systemTheme : theme;
13 |
14 | const toggleTheme = () => {
15 | setTheme(currentTheme === "dark" ? "light" : "dark");
16 | };
17 |
18 | return (
19 |
24 |
25 |
26 |
27 | );
28 | }
29 |
--------------------------------------------------------------------------------
/src/components/property-field.tsx:
--------------------------------------------------------------------------------
1 | import { Template } from "@/schemas/template-schema";
2 | import { useFormContext } from "react-hook-form";
3 | import PropertyItemsField from "./property-items-field";
4 | import PropertyObjectField from "./property-object-field";
5 |
6 | export default function PropertiesField({ index }: { index: number }) {
7 | const { watch } = useFormContext();
8 |
9 | const currentAttribute = watch(`attributes.${index}`);
10 |
11 | if (currentAttribute.type === "object") {
12 | return ;
13 | }
14 |
15 | if (currentAttribute.type === "array") {
16 | return ;
17 | }
18 |
19 | return null;
20 | }
21 |
--------------------------------------------------------------------------------
/src/components/property-items-field.tsx:
--------------------------------------------------------------------------------
1 | import { Template, TYPES } from "@/schemas/template-schema";
2 | import { useFormContext } from "react-hook-form";
3 | import PropertyObjectField from "./property-object-field";
4 | import SelectField from "./select-field";
5 |
6 | const ITEM_TYPES = TYPES.filter((type) => !["array"].includes(type.value));
7 |
8 | export default function PropertyItemsField({ index }: { index: number }) {
9 | const { watch } = useFormContext();
10 |
11 | const currentType = watch(`attributes.${index}.items.type`);
12 | return (
13 | <>
14 |
19 | { currentType === "object" && (
20 |
21 | ) }
22 | >
23 | );
24 | }
25 |
--------------------------------------------------------------------------------
/src/components/property-object-field.tsx:
--------------------------------------------------------------------------------
1 | import { Button } from '@/components/ui/button';
2 | import { ObjectAttribute, Template, TYPES } from '@/schemas/template-schema';
3 | import { PlusCircle } from 'lucide-react';
4 | import React, { useCallback, useState } from 'react';
5 | import { useFieldArray, useFormContext } from 'react-hook-form';
6 | import InputField from './input-field';
7 | import SelectField from './select-field';
8 | import TextareaField from './text-area-field';
9 | import {
10 | Accordion, AccordionContent, AccordionItem, AccordionTrigger,
11 | } from './ui/accordion';
12 |
13 | // Extracted constant
14 | const PRIMITIVE_TYPES = TYPES.filter((type) => !['array', 'object'].includes(type.value));
15 |
16 | interface PropertyObjectFieldProps {
17 | index: number;
18 | isArray?: boolean;
19 | }
20 |
21 | /**
22 | * PropertyObjectField component for managing object properties in a form.
23 | * @param {PropertyObjectFieldProps} props - The component props
24 | * @returns {React.ReactElement} The rendered component
25 | */
26 | export default function PropertyObjectField({ index, isArray = false }: PropertyObjectFieldProps): React.ReactElement {
27 | const { watch, control } = useFormContext();
28 | const [openProperties, setOpenProperties] = useState([]);
29 |
30 | const currentAttribute = watch(`attributes.${index}`);
31 | const currentObjectField = watch(
32 | isArray ? `attributes.${index}.items` : `attributes.${index}`,
33 | ) as ObjectAttribute;
34 |
35 | const { fields, remove, append } = useFieldArray({
36 | control,
37 | name: isArray ? `attributes.${index}.items.properties` : `attributes.${index}.properties`,
38 | });
39 |
40 | const currentProperties = currentObjectField?.properties ?? [];
41 |
42 | const handleAddPropertiesAttribute = useCallback(() => {
43 | const newIndex = fields.length;
44 | append({
45 | name : '',
46 | type : 'string',
47 | description : '',
48 | });
49 | setOpenProperties((prev) => [...prev, `property-field-${newIndex}`]);
50 | }, [append, fields.length]);
51 |
52 | return (
53 |
54 |
Object Properties
55 |
61 | { currentProperties.map((field, secondIndex) => (
62 | remove(secondIndex) }
73 | />
74 | )) }
75 |
81 |
82 | Add Object Property
83 |
84 |
85 |
86 | );
87 | }
88 |
89 | interface PropertyAccordionItemProps {
90 | field: Record;
91 | fieldNamePrefix: string;
92 | index: number;
93 | currentAttribute: any;
94 | onRemove: () => void;
95 | }
96 |
97 | /**
98 | * PropertyAccordionItem component for rendering individual property items.
99 | * @param {PropertyAccordionItemProps} props - The component props
100 | * @returns {React.ReactElement} The rendered component
101 | */
102 | function PropertyAccordionItem({
103 | field,
104 | fieldNamePrefix,
105 | currentAttribute,
106 | index,
107 | onRemove,
108 | }: PropertyAccordionItemProps): React.ReactElement {
109 | return (
110 |
114 |
118 |
119 | { field.name
120 | ? `${currentAttribute.name}.${field.name}`
121 | : `Object Property ${index + 1}` }
122 |
123 | { field.type }
124 |
125 |
126 |
127 |
133 |
139 |
140 |
145 |
146 |
147 | );
148 | }
149 |
--------------------------------------------------------------------------------
/src/components/result-section.tsx:
--------------------------------------------------------------------------------
1 | 'use client';
2 |
3 | import {
4 | forwardRef,
5 | } from "react";
6 |
7 | import { ExecutionTime, ExecutionTimeResult } from "@/components/execution-time-result";
8 | import JsonResultView from "@/components/json-result-view";
9 | import { Skeleton } from "@/components/skeleton";
10 |
11 | interface ErrorObject {
12 | title: string;
13 | message: string;
14 | }
15 |
16 | interface ResultSectionProps {
17 | isSubmitting: boolean;
18 | showErrorMessage: boolean;
19 | error: ErrorObject | null;
20 | showResults: boolean;
21 | objectResult: any;
22 | executionTime: ExecutionTime;
23 | }
24 |
25 | /**
26 | * Result display section component
27 | * This component is wrapped with forwardRef to allow passing of refs
28 | */
29 | const ResultSection = forwardRef(
30 | (
31 | {
32 | isSubmitting,
33 | showErrorMessage,
34 | error,
35 | showResults,
36 | objectResult,
37 | executionTime,
38 | },
39 | ref,
40 | ) => {
41 | if (isSubmitting) {
42 | return ;
43 | }
44 |
45 | if (showErrorMessage) {
46 | return (
47 |
48 |
49 |
{ error?.title }
50 |
{ error?.message }
51 |
52 |
53 | );
54 | }
55 |
56 | if (showResults) {
57 | return (
58 |
59 |
60 |
61 |
62 | );
63 | }
64 |
65 | return
;
66 | },
67 | );
68 |
69 | // Set display name for the forwardRef component
70 | ResultSection.displayName = 'ResultSection';
71 |
72 | export default ResultSection;
73 |
--------------------------------------------------------------------------------
/src/components/select-field.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import {
4 | FormControl,
5 | FormDescription,
6 | FormField,
7 | FormItem,
8 | FormLabel,
9 | FormMessage,
10 | } from "@/components/ui/form";
11 | import {
12 | Select,
13 | SelectContent,
14 | SelectItem,
15 | SelectTrigger,
16 | SelectValue,
17 | } from "@/components/ui/select";
18 | import { useFormContext } from "react-hook-form";
19 |
20 | interface SelectFieldOptions {
21 | label: string;
22 | value: string;
23 | }
24 |
25 | interface SelectFieldProps {
26 | name: string;
27 | label?: string;
28 | description?: string;
29 | className?: string;
30 | options?: SelectFieldOptions[];
31 | }
32 |
33 | export default function SelectField({
34 | name,
35 | label,
36 | description,
37 | className = "",
38 | options = [],
39 | }: SelectFieldProps) {
40 | const { control } = useFormContext();
41 |
42 | return (
43 | (
47 |
48 | { label || name }
49 |
50 |
51 |
52 |
53 |
54 |
55 | { options.map((option) => (
56 |
57 | { option.label }
58 |
59 | )) }
60 |
61 |
62 |
63 | { description && { description } }
64 |
65 |
66 | ) }
67 | />
68 | );
69 | }
70 |
--------------------------------------------------------------------------------
/src/components/skeleton.tsx:
--------------------------------------------------------------------------------
1 | import { cn } from "@/lib/utils";
2 |
3 | function Skeleton({
4 | className,
5 | ...props
6 | }: React.HTMLAttributes) {
7 | return (
8 |
12 | );
13 | }
14 |
15 | export { Skeleton };
16 |
--------------------------------------------------------------------------------
/src/components/template-form.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import { zodResolver } from "@hookform/resolvers/zod";
4 | import { useEffect } from "react";
5 | import { useForm, UseFormReturn } from "react-hook-form";
6 |
7 | import { Button } from "@/components/ui/button";
8 | import {
9 | Card, CardContent, CardHeader, CardTitle,
10 | } from "@/components/ui/card";
11 | import {
12 | Form,
13 | FormControl,
14 | FormDescription,
15 | FormField,
16 | FormItem,
17 | FormLabel,
18 | } from "@/components/ui/form";
19 | import { Template, TemplateSchema } from "@/schemas/template-schema";
20 | import { CircleAlert } from "lucide-react";
21 | import AttributeFields from "./attribute-fields";
22 | import InputField from "./input-field";
23 | import { Checkbox } from "./ui/checkbox";
24 |
25 | const DEFAULT_TEMPLATE_VALUES: Template = {
26 | name : "",
27 | url : "",
28 | attributes : [],
29 | ignoreCache : false,
30 | };
31 |
32 | interface TemplateFormProps {
33 | template: Template | null | undefined;
34 | onFormSubmit: (data: Template, form: UseFormReturn) => void;
35 | }
36 |
37 | /**
38 | * TemplateForm component for creating and editing templates
39 | * @param {TemplateFormProps} props - The component props
40 | * @returns {JSX.Element} The rendered TemplateForm component
41 | */
42 | export default function TemplateForm({ template, onFormSubmit }: TemplateFormProps): JSX.Element {
43 | const form = useForm({
44 | resolver : zodResolver(TemplateSchema),
45 | defaultValues : DEFAULT_TEMPLATE_VALUES,
46 | });
47 |
48 | const {
49 | reset, control, handleSubmit, formState: { isSubmitting },
50 | } = form;
51 |
52 | useEffect(() => {
53 | reset(template || DEFAULT_TEMPLATE_VALUES);
54 | }, [template, reset]);
55 |
56 | const onSubmit = async (values: Template) => {
57 | onFormSubmit(values, form);
58 | };
59 |
60 | return (
61 |
71 |
72 | );
73 | }
74 |
75 | /**
76 | * TemplateInfoCard component for displaying template information fields
77 | * @returns {JSX.Element} The rendered TemplateInfoCard component
78 | */
79 | function TemplateInfoCard(): JSX.Element {
80 | return (
81 |
82 |
83 |
84 | Template Information
85 |
86 |
87 |
88 |
89 |
90 |
91 |
92 | );
93 | }
94 |
95 | /**
96 | * WarningAlert component for displaying important information to users
97 | * @returns {JSX.Element} The rendered WarningAlert component
98 | */
99 | function WarningAlert(): JSX.Element {
100 | return (
101 |
102 |
103 |
Info
104 |
105 |
Warning alert!
106 | { ' ' }
107 | Due to LLM Token limitations per request that can cause a slow process or even an error response, Please note the following :
108 |
109 |
110 | Processing Time:
111 | { ' ' }
112 | It might take longer to process requests, especially if the targeted website has extensive content.
113 |
114 |
115 | Content Length:
116 | { ' ' }
117 | Very long content from a website can cause difficulties in processing due to token limitations in the language model.
118 |
119 |
120 | Anti-Scraping Security:
121 | { ' ' }
122 | Some websites have security measures that may prevent us from extracting information.
123 |
124 |
125 |
126 |
127 | );
128 | }
129 |
130 | /**
131 | * AttributesCard component for displaying attribute fields
132 | * @returns {JSX.Element} The rendered AttributesCard component
133 | */
134 | function AttributesCard(): JSX.Element {
135 | return (
136 |
137 |
138 |
139 | Attributes
140 |
141 |
142 |
143 |
144 |
145 |
146 | );
147 | }
148 |
149 | /**
150 | * IgnoreCacheCheckbox component for toggling cache usage
151 | * @param {Object} props - The component props
152 | * @param {any} props.control - The form control object
153 | * @returns {JSX.Element} The rendered IgnoreCacheCheckbox component
154 | */
155 | function IgnoreCacheCheckbox({ control }: { control: any }): JSX.Element {
156 | return (
157 | (
161 |
162 |
163 |
164 |
165 |
166 | Ignore Cache
167 |
168 | Check this option to bypass the scraping cache and fetch fresh data
169 | from the URL. Note that ignoring the cache may result in longer
170 | processing times, especially for the first scrape or complex web
171 | pages.
172 |
173 |
174 |
175 | ) }
176 | />
177 | );
178 | }
179 |
--------------------------------------------------------------------------------
/src/components/text-area-field.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import {
4 | FormControl,
5 | FormField,
6 | FormItem,
7 | FormLabel,
8 | FormMessage,
9 | } from "@/components/ui/form";
10 | import { Textarea } from "@/components/ui/textarea";
11 | import { useFormContext } from "react-hook-form";
12 |
13 | interface TextareaProps
14 | extends React.TextareaHTMLAttributes {
15 | name: string;
16 | label?: string;
17 | }
18 |
19 | export default function TextareaField({
20 | name,
21 | label,
22 | ...props
23 | }: TextareaProps) {
24 | const { control } = useFormContext();
25 |
26 | return (
27 | (
31 |
32 | { label || name }
33 |
34 |
35 |
36 |
37 |
38 | ) }
39 | />
40 | );
41 | }
42 |
--------------------------------------------------------------------------------
/src/components/ui/accordion.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as AccordionPrimitive from "@radix-ui/react-accordion";
4 | import { ChevronDown, Trash } from "lucide-react";
5 | import * as React from "react";
6 |
7 | import { cn } from "@/lib/utils";
8 | import { Button } from "./button";
9 |
10 | const Accordion = AccordionPrimitive.Root;
11 |
12 | const AccordionItem = React.forwardRef<
13 | React.ElementRef,
14 | React.ComponentPropsWithoutRef
15 | >(({ className, ...props }, ref) => (
16 |
21 | ));
22 | AccordionItem.displayName = "AccordionItem";
23 |
24 | interface AccordionTriggerProps
25 | extends React.ComponentPropsWithoutRef {
26 | onCloseClick?: () => void;
27 | }
28 |
29 | const AccordionTrigger = React.forwardRef<
30 | React.ElementRef,
31 | AccordionTriggerProps
32 | >(({ className, children, onCloseClick, ...props }, ref) => (
33 |
34 | svg]:rotate-180",
38 | className
39 | )}
40 | {...props}
41 | >
42 | {children}
43 |
44 |
45 | {onCloseClick && (
46 |
53 |
54 |
55 | )}
56 |
57 | ));
58 | AccordionTrigger.displayName = AccordionPrimitive.Trigger.displayName;
59 |
60 | const AccordionContent = React.forwardRef<
61 | React.ElementRef,
62 | React.ComponentPropsWithoutRef
63 | >(({ className, children, ...props }, ref) => (
64 |
69 | {children}
70 |
71 | ));
72 |
73 | AccordionContent.displayName = AccordionPrimitive.Content.displayName;
74 |
75 | export { Accordion, AccordionContent, AccordionItem, AccordionTrigger };
76 |
--------------------------------------------------------------------------------
/src/components/ui/button.tsx:
--------------------------------------------------------------------------------
1 | import { Slot } from "@radix-ui/react-slot";
2 | import { cva, type VariantProps } from "class-variance-authority";
3 | import * as React from "react";
4 |
5 | import { cn } from "@/lib/utils";
6 |
7 | const buttonVariants = cva(
8 | "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50",
9 | {
10 | variants: {
11 | variant: {
12 | default: "bg-primary text-primary-foreground hover:bg-primary/90",
13 | destructive:
14 | "bg-destructive text-destructive-foreground hover:bg-destructive/90",
15 | outline:
16 | "border border-input bg-background hover:bg-accent hover:text-accent-foreground",
17 | secondary:
18 | "bg-secondary text-secondary-foreground hover:bg-secondary/80",
19 | success: "bg-success text-success-foreground hover:bg-success/80",
20 | ghost: "hover:bg-accent hover:text-accent-foreground",
21 | link: "text-primary underline-offset-4 hover:underline",
22 | },
23 | size: {
24 | default: "h-10 px-4 py-2",
25 | sm: "h-9 rounded-md px-3",
26 | lg: "h-11 rounded-md px-8",
27 | icon: "h-10 w-10",
28 | },
29 | },
30 | defaultVariants: {
31 | variant: "default",
32 | size: "default",
33 | },
34 | }
35 | );
36 |
37 | export interface ButtonProps
38 | extends React.ButtonHTMLAttributes,
39 | VariantProps {
40 | asChild?: boolean;
41 | }
42 |
43 | const Button = React.forwardRef(
44 | ({ className, variant, size, asChild = false, ...props }, ref) => {
45 | const Comp = asChild ? Slot : "button";
46 | return (
47 |
52 | );
53 | }
54 | );
55 | Button.displayName = "Button";
56 |
57 | export { Button, buttonVariants };
58 |
--------------------------------------------------------------------------------
/src/components/ui/card.tsx:
--------------------------------------------------------------------------------
1 | import * as React from "react"
2 |
3 | import { cn } from "@/lib/utils"
4 |
5 | const Card = React.forwardRef<
6 | HTMLDivElement,
7 | React.HTMLAttributes
8 | >(({ className, ...props }, ref) => (
9 |
17 | ))
18 | Card.displayName = "Card"
19 |
20 | const CardHeader = React.forwardRef<
21 | HTMLDivElement,
22 | React.HTMLAttributes
23 | >(({ className, ...props }, ref) => (
24 |
29 | ))
30 | CardHeader.displayName = "CardHeader"
31 |
32 | const CardTitle = React.forwardRef<
33 | HTMLParagraphElement,
34 | React.HTMLAttributes
35 | >(({ className, ...props }, ref) => (
36 |
44 | ))
45 | CardTitle.displayName = "CardTitle"
46 |
47 | const CardDescription = React.forwardRef<
48 | HTMLParagraphElement,
49 | React.HTMLAttributes
50 | >(({ className, ...props }, ref) => (
51 |
56 | ))
57 | CardDescription.displayName = "CardDescription"
58 |
59 | const CardContent = React.forwardRef<
60 | HTMLDivElement,
61 | React.HTMLAttributes
62 | >(({ className, ...props }, ref) => (
63 |
64 | ))
65 | CardContent.displayName = "CardContent"
66 |
67 | const CardFooter = React.forwardRef<
68 | HTMLDivElement,
69 | React.HTMLAttributes
70 | >(({ className, ...props }, ref) => (
71 |
76 | ))
77 | CardFooter.displayName = "CardFooter"
78 |
79 | export { Card, CardHeader, CardFooter, CardTitle, CardDescription, CardContent }
80 |
--------------------------------------------------------------------------------
/src/components/ui/checkbox.tsx:
--------------------------------------------------------------------------------
1 | "use client"
2 |
3 | import * as React from "react"
4 | import * as CheckboxPrimitive from "@radix-ui/react-checkbox"
5 | import { Check } from "lucide-react"
6 |
7 | import { cn } from "@/lib/utils"
8 |
9 | const Checkbox = React.forwardRef<
10 | React.ElementRef,
11 | React.ComponentPropsWithoutRef
12 | >(({ className, ...props }, ref) => (
13 |
21 |
24 |
25 |
26 |
27 | ))
28 | Checkbox.displayName = CheckboxPrimitive.Root.displayName
29 |
30 | export { Checkbox }
31 |
--------------------------------------------------------------------------------
/src/components/ui/dialog.tsx:
--------------------------------------------------------------------------------
1 | "use client"
2 |
3 | import * as React from "react"
4 | import * as DialogPrimitive from "@radix-ui/react-dialog"
5 | import { X } from "lucide-react"
6 |
7 | import { cn } from "@/lib/utils"
8 |
9 | const Dialog = DialogPrimitive.Root
10 |
11 | const DialogTrigger = DialogPrimitive.Trigger
12 |
13 | const DialogPortal = DialogPrimitive.Portal
14 |
15 | const DialogClose = DialogPrimitive.Close
16 |
17 | const DialogOverlay = React.forwardRef<
18 | React.ElementRef,
19 | React.ComponentPropsWithoutRef
20 | >(({ className, ...props }, ref) => (
21 |
29 | ))
30 | DialogOverlay.displayName = DialogPrimitive.Overlay.displayName
31 |
32 | const DialogContent = React.forwardRef<
33 | React.ElementRef,
34 | React.ComponentPropsWithoutRef
35 | >(({ className, children, ...props }, ref) => (
36 |
37 |
38 |
46 | {children}
47 |
48 |
49 | Close
50 |
51 |
52 |
53 | ))
54 | DialogContent.displayName = DialogPrimitive.Content.displayName
55 |
56 | const DialogHeader = ({
57 | className,
58 | ...props
59 | }: React.HTMLAttributes) => (
60 |
67 | )
68 | DialogHeader.displayName = "DialogHeader"
69 |
70 | const DialogFooter = ({
71 | className,
72 | ...props
73 | }: React.HTMLAttributes) => (
74 |
81 | )
82 | DialogFooter.displayName = "DialogFooter"
83 |
84 | const DialogTitle = React.forwardRef<
85 | React.ElementRef,
86 | React.ComponentPropsWithoutRef
87 | >(({ className, ...props }, ref) => (
88 |
96 | ))
97 | DialogTitle.displayName = DialogPrimitive.Title.displayName
98 |
99 | const DialogDescription = React.forwardRef<
100 | React.ElementRef,
101 | React.ComponentPropsWithoutRef
102 | >(({ className, ...props }, ref) => (
103 |
108 | ))
109 | DialogDescription.displayName = DialogPrimitive.Description.displayName
110 |
111 | export {
112 | Dialog,
113 | DialogPortal,
114 | DialogOverlay,
115 | DialogClose,
116 | DialogTrigger,
117 | DialogContent,
118 | DialogHeader,
119 | DialogFooter,
120 | DialogTitle,
121 | DialogDescription,
122 | }
123 |
--------------------------------------------------------------------------------
/src/components/ui/form.tsx:
--------------------------------------------------------------------------------
1 | "use client";
2 |
3 | import * as LabelPrimitive from "@radix-ui/react-label";
4 | import { Slot } from "@radix-ui/react-slot";
5 | import * as React from "react";
6 | import {
7 | Controller,
8 | ControllerProps,
9 | FieldPath,
10 | FieldValues,
11 | FormProvider,
12 | useFormContext,
13 | } from "react-hook-form";
14 |
15 | import { Label } from "@/components/ui/label";
16 | import { cn } from "@/lib/utils";
17 |
18 | const Form = FormProvider;
19 |
20 | type FormFieldContextValue<
21 | TFieldValues extends FieldValues = FieldValues,
22 | TName extends FieldPath = FieldPath
23 | > = {
24 | name: TName;
25 | };
26 |
27 | const FormFieldContext = React.createContext(
28 | {} as FormFieldContextValue
29 | );
30 |
31 | const FormField = <
32 | TFieldValues extends FieldValues = FieldValues,
33 | TName extends FieldPath = FieldPath
34 | >({
35 | ...props
36 | }: ControllerProps) => {
37 | return (
38 |
39 |
40 |
41 | );
42 | };
43 |
44 | const useFormField = () => {
45 | const fieldContext = React.useContext(FormFieldContext);
46 | const itemContext = React.useContext(FormItemContext);
47 | const { getFieldState, formState } = useFormContext();
48 |
49 | const fieldState = getFieldState(fieldContext.name, formState);
50 |
51 | if (!fieldContext) {
52 | throw new Error("useFormField should be used within ");
53 | }
54 |
55 | const { id } = itemContext;
56 |
57 | return {
58 | id,
59 | name: fieldContext.name,
60 | formItemId: `${id}-form-item`,
61 | formDescriptionId: `${id}-form-item-description`,
62 | formMessageId: `${id}-form-item-message`,
63 | ...fieldState,
64 | };
65 | };
66 |
67 | type FormItemContextValue = {
68 | id: string;
69 | };
70 |
71 | const FormItemContext = React.createContext(
72 | {} as FormItemContextValue
73 | );
74 |
75 | const FormItem = React.forwardRef<
76 | HTMLDivElement,
77 | React.HTMLAttributes
78 | >(({ className, ...props }, ref) => {
79 | const id = React.useId();
80 |
81 | return (
82 |
83 |
84 |
85 | );
86 | });
87 | FormItem.displayName = "FormItem";
88 |
89 | const FormLabel = React.forwardRef<
90 | React.ElementRef,
91 | React.ComponentPropsWithoutRef
92 | >(({ className, ...props }, ref) => {
93 | const { error, formItemId } = useFormField();
94 |
95 | return (
96 |
105 | );
106 | });
107 | FormLabel.displayName = "FormLabel";
108 |
109 | const FormControl = React.forwardRef<
110 | React.ElementRef,
111 | React.ComponentPropsWithoutRef
112 | >(({ ...props }, ref) => {
113 | const { error, formItemId, formDescriptionId, formMessageId } =
114 | useFormField();
115 |
116 | return (
117 |
128 | );
129 | });
130 | FormControl.displayName = "FormControl";
131 |
132 | const FormDescription = React.forwardRef<
133 | HTMLParagraphElement,
134 | React.HTMLAttributes
135 | >(({ className, ...props }, ref) => {
136 | const { formDescriptionId } = useFormField();
137 |
138 | return (
139 |
145 | );
146 | });
147 | FormDescription.displayName = "FormDescription";
148 |
149 | const FormMessage = React.forwardRef<
150 | HTMLParagraphElement,
151 | React.HTMLAttributes
152 | >(({ className, children, ...props }, ref) => {
153 | const { error, formMessageId } = useFormField();
154 | const body = error ? String(error?.message) : children;
155 |
156 | if (!body) {
157 | return null;
158 | }
159 |
160 | return (
161 |
170 | {body}
171 |
172 | );
173 | });
174 | FormMessage.displayName = "FormMessage";
175 |
176 | export {
177 | Form,
178 | FormControl,
179 | FormDescription,
180 | FormField,
181 | FormItem,
182 | FormLabel,
183 | FormMessage,
184 | useFormField,
185 | };
186 |
--------------------------------------------------------------------------------
/src/components/ui/input.tsx:
--------------------------------------------------------------------------------
1 | import * as React from "react"
2 |
3 | import { cn } from "@/lib/utils"
4 |
5 | export interface InputProps
6 | extends React.InputHTMLAttributes {}
7 |
8 | const Input = React.forwardRef(
9 | ({ className, type, ...props }, ref) => {
10 | return (
11 |
20 | )
21 | }
22 | )
23 | Input.displayName = "Input"
24 |
25 | export { Input }
26 |
--------------------------------------------------------------------------------
/src/components/ui/label.tsx:
--------------------------------------------------------------------------------
1 | "use client"
2 |
3 | import * as React from "react"
4 | import * as LabelPrimitive from "@radix-ui/react-label"
5 | import { cva, type VariantProps } from "class-variance-authority"
6 |
7 | import { cn } from "@/lib/utils"
8 |
9 | const labelVariants = cva(
10 | "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70"
11 | )
12 |
13 | const Label = React.forwardRef<
14 | React.ElementRef,
15 | React.ComponentPropsWithoutRef &
16 | VariantProps
17 | >(({ className, ...props }, ref) => (
18 |
23 | ))
24 | Label.displayName = LabelPrimitive.Root.displayName
25 |
26 | export { Label }
27 |
--------------------------------------------------------------------------------
/src/components/ui/select.tsx:
--------------------------------------------------------------------------------
1 | "use client"
2 |
3 | import * as React from "react"
4 | import * as SelectPrimitive from "@radix-ui/react-select"
5 | import { Check, ChevronDown, ChevronUp } from "lucide-react"
6 |
7 | import { cn } from "@/lib/utils"
8 |
9 | const Select = SelectPrimitive.Root
10 |
11 | const SelectGroup = SelectPrimitive.Group
12 |
13 | const SelectValue = SelectPrimitive.Value
14 |
15 | const SelectTrigger = React.forwardRef<
16 | React.ElementRef,
17 | React.ComponentPropsWithoutRef
18 | >(({ className, children, ...props }, ref) => (
19 | span]:line-clamp-1",
23 | className
24 | )}
25 | {...props}
26 | >
27 | {children}
28 |
29 |
30 |
31 |
32 | ))
33 | SelectTrigger.displayName = SelectPrimitive.Trigger.displayName
34 |
35 | const SelectScrollUpButton = React.forwardRef<
36 | React.ElementRef,
37 | React.ComponentPropsWithoutRef
38 | >(({ className, ...props }, ref) => (
39 |
47 |
48 |
49 | ))
50 | SelectScrollUpButton.displayName = SelectPrimitive.ScrollUpButton.displayName
51 |
52 | const SelectScrollDownButton = React.forwardRef<
53 | React.ElementRef,
54 | React.ComponentPropsWithoutRef
55 | >(({ className, ...props }, ref) => (
56 |
64 |
65 |
66 | ))
67 | SelectScrollDownButton.displayName =
68 | SelectPrimitive.ScrollDownButton.displayName
69 |
70 | const SelectContent = React.forwardRef<
71 | React.ElementRef,
72 | React.ComponentPropsWithoutRef
73 | >(({ className, children, position = "popper", ...props }, ref) => (
74 |
75 |
86 |
87 |
94 | {children}
95 |
96 |
97 |
98 |
99 | ))
100 | SelectContent.displayName = SelectPrimitive.Content.displayName
101 |
102 | const SelectLabel = React.forwardRef<
103 | React.ElementRef,
104 | React.ComponentPropsWithoutRef
105 | >(({ className, ...props }, ref) => (
106 |
111 | ))
112 | SelectLabel.displayName = SelectPrimitive.Label.displayName
113 |
114 | const SelectItem = React.forwardRef<
115 | React.ElementRef,
116 | React.ComponentPropsWithoutRef
117 | >(({ className, children, ...props }, ref) => (
118 |
126 |
127 |
128 |
129 |
130 |
131 |
132 | {children}
133 |
134 | ))
135 | SelectItem.displayName = SelectPrimitive.Item.displayName
136 |
137 | const SelectSeparator = React.forwardRef<
138 | React.ElementRef,
139 | React.ComponentPropsWithoutRef
140 | >(({ className, ...props }, ref) => (
141 |
146 | ))
147 | SelectSeparator.displayName = SelectPrimitive.Separator.displayName
148 |
149 | export {
150 | Select,
151 | SelectGroup,
152 | SelectValue,
153 | SelectTrigger,
154 | SelectContent,
155 | SelectLabel,
156 | SelectItem,
157 | SelectSeparator,
158 | SelectScrollUpButton,
159 | SelectScrollDownButton,
160 | }
161 |
--------------------------------------------------------------------------------
/src/components/ui/textarea.tsx:
--------------------------------------------------------------------------------
1 | import * as React from "react"
2 |
3 | import { cn } from "@/lib/utils"
4 |
5 | export interface TextareaProps
6 | extends React.TextareaHTMLAttributes {}
7 |
8 | const Textarea = React.forwardRef(
9 | ({ className, ...props }, ref) => {
10 | return (
11 |
19 | )
20 | }
21 | )
22 | Textarea.displayName = "Textarea"
23 |
24 | export { Textarea }
25 |
--------------------------------------------------------------------------------
/src/controllers/extract-controller.ts:
--------------------------------------------------------------------------------
1 | import { Context } from "hono";
2 |
3 | import ValidationError from "@/errors/validation-error";
4 | import { DEFAULT_LLM_MODEL } from "@/lib/constants";
5 | import { getContext } from "@/lib/context-utils";
6 | import { deleteStoredDocuments } from "@/lib/embed-utils";
7 | import { setupLangChain } from "@/lib/langchain-setup";
8 | import { measureExecutionTime } from "@/lib/time-utils";
9 | import { Template, TemplateSchema } from "@/schemas/template-schema";
10 | import { z, ZodError } from "zod";
11 |
12 | const TemplateSchemaWithRequiredId = TemplateSchema.extend({
13 | id : z.string().min(1, "ID is required"),
14 | ignoreCache : z.boolean().optional(),
15 | model : z.string().optional(),
16 | });
17 |
18 | export const extractController = {
19 | scrape: async (c: Context) => {
20 | const { model = DEFAULT_LLM_MODEL, ignoreCache, ...restTemplate } = await c.req.json();
21 |
22 | // combine later with model and ignore cahce
23 | try {
24 | TemplateSchemaWithRequiredId.parse({
25 | ignoreCache,
26 | model,
27 | ...restTemplate,
28 | });
29 | } catch (error) {
30 | if (error instanceof ZodError) {
31 | throw new ValidationError("Invalid template input", error.errors);
32 | }
33 | throw error;
34 | }
35 |
36 | const template = restTemplate as Template;
37 |
38 | const scrapeStartTime = Date.now();
39 |
40 | try {
41 | const {
42 | scrapeExecutionTime, embeddingTime, context, storedDocumentIds,
43 | } = await getContext({ ignoreCache: ignoreCache as boolean, ...template });
44 |
45 | const llmProcessingStartTime = Date.now();
46 | const chain = setupLangChain(template.attributes, model);
47 | const answer = await chain.invoke({ context });
48 | const llmProcessingTime = measureExecutionTime(llmProcessingStartTime);
49 |
50 | await deleteStoredDocuments(storedDocumentIds);
51 |
52 | return c.json({
53 | answer,
54 | context,
55 | scrapeExecutionTime,
56 | embeddingTime,
57 | llmProcessingTime,
58 | storedDocumentIds,
59 | });
60 | } catch (error) {
61 | // eslint-disable-next-line no-console
62 | console.error("Error in extractHandler:", error);
63 | const errorMessage = error instanceof Error ? error.message : "An unexpected error occurred";
64 | return c.json(
65 | {
66 | error : errorMessage,
67 | processingTime : measureExecutionTime(scrapeStartTime),
68 | },
69 | { status: 500 },
70 | );
71 | }
72 | },
73 | };
74 |
--------------------------------------------------------------------------------
/src/errors/request-timeout-error.ts:
--------------------------------------------------------------------------------
1 | /**
2 | * Custom error for request timeout
3 | */
4 | export default class RequestTimeoutError extends Error {
5 | code: string;
6 |
7 | /**
8 | * @param {string} message - The error message
9 | */
10 | constructor(message: string) {
11 | super(message);
12 | this.name = "RequestTimeoutError";
13 | this.code = "REQUEST_TIMEOUT";
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/src/errors/validation-error.ts:
--------------------------------------------------------------------------------
1 | export default class ValidationError extends Error {
2 | details: any;
3 |
4 | code: string;
5 |
6 | constructor(message: string, details: any) {
7 | super(message);
8 | this.code = "VALIDATION_ERROR";
9 | this.name = "ValidationError";
10 | this.details = details;
11 | }
12 | }
13 |
--------------------------------------------------------------------------------
/src/hooks/use-debounce.tsx:
--------------------------------------------------------------------------------
1 | import { useEffect, useState } from "react";
2 |
3 | export default function useDebounce(value: string, delay: number): string {
4 | const [debouncedValue, setDebouncedValue] = useState(value);
5 |
6 | useEffect(() => {
7 | const handler = setTimeout(() => {
8 | setDebouncedValue(value);
9 | }, delay);
10 |
11 | return () => {
12 | clearTimeout(handler);
13 | };
14 | }, [value, delay]);
15 |
16 | return debouncedValue;
17 | }
18 |
--------------------------------------------------------------------------------
/src/lib/constants.ts:
--------------------------------------------------------------------------------
1 | export const LLM_MODEL_OPTIONS = {
2 | gemini : ["gemini-pro", "gemini-1.5-flash"],
3 | groq : ["mixtral-8x7b-32768"],
4 | };
5 |
6 | export type LLMProvider = "gemini" | "groq";
7 |
8 | export type LLMModel = "gemini-pro" | "gemini-1.5-flash" | "mixtral-8x7b-32768";
9 |
10 | export const DEFAULT_LLM_MODEL = "mixtral-8x7b-32768";
11 |
12 | export const MAX_CONTEXT_LENGTH = 15000;
13 | export const REDIS_EXPIRATION_TIME = 3600;
14 |
--------------------------------------------------------------------------------
/src/lib/context-utils.ts:
--------------------------------------------------------------------------------
1 | import { Redis } from "@upstash/redis";
2 |
3 | import { MAX_CONTEXT_LENGTH, REDIS_EXPIRATION_TIME } from "@/lib/constants";
4 | import {
5 | embedAndStoreGroupedWebContent,
6 | embedAndStoreWebContent,
7 | retrieveSimilarContext,
8 | } from "@/lib/embed-utils";
9 | import { delay, measureExecutionTime } from "@/lib/time-utils";
10 | import {
11 | Content,
12 | extractContentsFromWeb,
13 | extractGroupedContentFromWeb,
14 | GroupedContent,
15 | } from "@/lib/web-scraper";
16 | import { AttributeType } from "@/schemas/template-schema";
17 | import { env } from "./env";
18 |
19 | const redis = new Redis({
20 | url : env.UPSTASH_REDIS_REST_URL!,
21 | token : env.UPSTASH_REDIS_REST_TOKEN!,
22 | });
23 |
24 | const generateRedisKey = (uuid: string, url: string): string => `${uuid}-${url}`;
25 |
26 | export async function getContext({
27 | id = "",
28 | url,
29 | attributes,
30 | isGroupScraping = true,
31 | ignoreCache = false,
32 | }: {
33 | id?: string;
34 | url: string;
35 | attributes: AttributeType[];
36 | isGroupScraping?: boolean;
37 | ignoreCache?: boolean;
38 | }) {
39 | const redisKey = generateRedisKey(id, url);
40 |
41 | // Check Redis cache
42 | if (!ignoreCache) {
43 | const cachedContext = await redis.get(redisKey);
44 | if (cachedContext) {
45 | return {
46 | scrapeExecutionTime : 0,
47 | embeddingTime : 0,
48 | context : cachedContext as string,
49 | storedDocumentIds : [],
50 | };
51 | }
52 | }
53 |
54 | // Scrape web content
55 | const scrapeStartTime = Date.now();
56 | const contents = isGroupScraping
57 | ? await extractGroupedContentFromWeb(url)
58 | : await extractContentsFromWeb(url);
59 | const scrapeExecutionTime = measureExecutionTime(scrapeStartTime);
60 |
61 | if (!contents) {
62 | throw new Error("Failed to extract contents from the webpage");
63 | }
64 |
65 | // Embed and store content
66 | const embeddingStartTime = Date.now();
67 | const storedDocumentIds = isGroupScraping
68 | ? await embedAndStoreGroupedWebContent(contents as GroupedContent[], url)
69 | : await embedAndStoreWebContent(contents as Content[], url);
70 |
71 | await delay(1000);
72 | const context = await retrieveSimilarContext(
73 | url,
74 | attributes as AttributeType[],
75 | );
76 | const embeddingTime = measureExecutionTime(embeddingStartTime);
77 |
78 | // Clean and truncate context
79 | const cleanedContext = context.replace(/\s+/g, " ").trim();
80 | const truncatedContext = truncateString(cleanedContext);
81 |
82 | // Cache the context
83 | await redis.set(redisKey, truncatedContext);
84 | await redis.expire(redisKey, REDIS_EXPIRATION_TIME);
85 |
86 | return {
87 | scrapeExecutionTime,
88 | embeddingTime,
89 | context: truncatedContext,
90 | storedDocumentIds,
91 | };
92 | }
93 |
94 | /**
95 | * Truncate string to prevent exceeding LLM token limit
96 | * @param {string} str - The input string to truncate
97 | * @returns {string} Truncated string
98 | */
99 | function truncateString(str: string): string {
100 | return str.length > MAX_CONTEXT_LENGTH
101 | ? str.substring(0, MAX_CONTEXT_LENGTH)
102 | : str;
103 | }
104 |
--------------------------------------------------------------------------------
/src/lib/embed-utils.ts:
--------------------------------------------------------------------------------
1 | import { AttributeType } from "@/schemas/template-schema";
2 | import { CohereEmbeddings } from "@langchain/cohere";
3 | import { UpstashVectorStore } from "@langchain/community/vectorstores/upstash";
4 | import type { Document } from "@langchain/core/documents";
5 | import { Index } from "@upstash/vector";
6 | import { RecursiveCharacterTextSplitter } from "langchain/text_splitter";
7 | import { formatDocumentsAsString } from "langchain/util/document";
8 | import { v4 as uuidv4 } from "uuid";
9 | import { env } from "./env";
10 | import {
11 | Content,
12 | formatContentForLangChain,
13 | GroupedContent,
14 | } from "./web-scraper";
15 |
16 | /**
17 | * Configuration for the embeddings model.
18 | */
19 | const EMBEDDINGS_CONFIG = {
20 | apiKey : env.COHERE_API_KEY,
21 | batchSize : 48,
22 | model : "embed-multilingual-v3.0",
23 | };
24 |
25 | /**
26 | * Configuration for the text splitter.
27 | */
28 | const TEXT_SPLITTER_CONFIG = {
29 | chunkSize : 1000,
30 | chunkOverlap : 100,
31 | };
32 |
33 | /**
34 | * Creates and configures the embeddings model.
35 | */
36 | const embeddings = new CohereEmbeddings(EMBEDDINGS_CONFIG);
37 |
38 | /**
39 | * Creates and configures the Upstash index.
40 | */
41 | const indexWithCredentials = new Index({
42 | url : env.UPSTASH_VECTOR_REST_URL,
43 | token : env.UPSTASH_VECTOR_REST_TOKEN,
44 | });
45 |
46 | /**
47 | * Creates and configures the vector store.
48 | */
49 | const vectorStore = new UpstashVectorStore(embeddings, {
50 | index: indexWithCredentials,
51 | });
52 |
53 | /**
54 | * Processes and splits documents into chunks.
55 | * @param contents - The grouped content to process.
56 | * @param url - The source URL of the content.
57 | * @returns An array of processed and split documents.
58 | */
59 | async function processDocuments(
60 | contents: GroupedContent[],
61 | url: string,
62 | ): Promise {
63 | const textSplitter = new RecursiveCharacterTextSplitter(TEXT_SPLITTER_CONFIG);
64 |
65 | const splitDocuments = await Promise.all(
66 | contents.map(async (content) => {
67 | const contentString = formatContentForLangChain(content.blocks);
68 | const document: Document = {
69 | pageContent : contentString,
70 | metadata : { source: url, group_id: content.group_id },
71 | };
72 | return textSplitter.splitDocuments([document]);
73 | }),
74 | );
75 |
76 | return splitDocuments.flat();
77 | }
78 |
79 | /**
80 | * Embeds and stores grouped web content.
81 | * @param contents - The grouped content to embed and store.
82 | * @param url - The source URL of the content.
83 | * @returns An array of stored document IDs.
84 | */
85 | export async function embedAndStoreGroupedWebContent(
86 | contents: GroupedContent[],
87 | url: string,
88 | ): Promise {
89 | const groupedDocuments = await processDocuments(contents, url);
90 | return vectorStore.addDocuments(groupedDocuments, {
91 | ids: groupedDocuments.map(() => uuidv4()),
92 | });
93 | }
94 |
95 | /**
96 | * Embeds and stores web content.
97 | * @param contents - The content to embed and store.
98 | * @param url - The source URL of the content.
99 | * @returns An array of stored document IDs.
100 | */
101 | export async function embedAndStoreWebContent(
102 | contents: Content[],
103 | url: string,
104 | ): Promise {
105 | const context = formatContentForLangChain(contents);
106 | const textSplitter = new RecursiveCharacterTextSplitter(TEXT_SPLITTER_CONFIG);
107 |
108 | const document: Document = {
109 | pageContent : context,
110 | metadata : { source: url },
111 | };
112 | const splitDocuments = await textSplitter.splitDocuments([document]);
113 |
114 | return vectorStore.addDocuments(splitDocuments, {
115 | ids: splitDocuments.map(() => uuidv4()),
116 | });
117 | }
118 |
119 | /**
120 | * Retrieves similar context based on given attributes.
121 | * @param url - The source URL to filter results.
122 | * @param attributes - The attributes to base the similarity search on.
123 | * @returns A string of similar context.
124 | */
125 | export async function retrieveSimilarContext(
126 | url: string,
127 | attributes: AttributeType[],
128 | ): Promise {
129 | const filter = `source = '${url}'`;
130 | const query = attributes
131 | .map((attribute) => `${attribute.name}: ${attribute.description}`)
132 | .join(", ");
133 |
134 | const similaritySearchResults = await vectorStore.similaritySearch(
135 | query,
136 | 30,
137 | filter,
138 | );
139 |
140 | return formatDocumentsAsString(similaritySearchResults);
141 | }
142 |
143 | /**
144 | * Deletes stored documents by their IDs.
145 | * @param ids - An array of document IDs to delete.
146 | */
147 | export async function deleteStoredDocuments(ids: string[]): Promise {
148 | if (ids.length > 0) {
149 | await vectorStore.delete({ ids });
150 | }
151 | }
152 |
153 | /**
154 | * Deletes all stored documents.
155 | */
156 | export async function deleteAllStoredDocuments(): Promise {
157 | await vectorStore.delete({ deleteAll: true });
158 | }
159 |
--------------------------------------------------------------------------------
/src/lib/env.ts:
--------------------------------------------------------------------------------
1 | import { z } from "zod";
2 |
3 | const envSchema = z.object({
4 | // llm providers
5 | // https://console.groq.com/keys
6 | GROQ_API_KEY : z.string().min(1),
7 | // https://aistudio.google.com/app/apikey
8 | GOOGLE_AI_STUDIO_API_KEY : z.string().min(1),
9 | // embedding, get from https://dashboard.cohere.com/api-keys
10 | COHERE_API_KEY : z.string().min(1),
11 | // upstash vectorstore, get from https://console.upstash.com/
12 | UPSTASH_VECTOR_REST_URL : z.string().url(),
13 | UPSTASH_VECTOR_REST_TOKEN : z.string().min(1),
14 | // upstash redis, get from https://console.upstash.com/
15 | UPSTASH_REDIS_REST_URL : z.string().url(),
16 | UPSTASH_REDIS_REST_TOKEN : z.string().min(1),
17 | // just put random string or generate with command `openssl rand -base64 32`
18 | CLEAR_UPSTASH_VECTOR_STORE_TOKEN : z.string().min(1),
19 | BASE_URL : z.string().url(),
20 | NODE_ENV : z
21 | .union([
22 | z.literal("development"),
23 | z.literal("testing"),
24 | z.literal("production"),
25 | ])
26 | .default("development"),
27 | });
28 | export const env = envSchema.parse(process.env);
29 |
--------------------------------------------------------------------------------
/src/lib/error-utils.ts:
--------------------------------------------------------------------------------
1 | import { Template } from "@/schemas/template-schema";
2 | import { Path, UseFormSetError } from "react-hook-form";
3 |
4 | export const applyValidationErrorsToForm = (
5 | errors: any[],
6 | setError: UseFormSetError,
7 | ) => {
8 | errors.forEach((error) => {
9 | const path = error.path.join(".") as Path;
10 | setError(path, {
11 | type : "manual",
12 | message : error.message,
13 | });
14 | });
15 | };
16 |
--------------------------------------------------------------------------------
/src/lib/langchain-setup.ts:
--------------------------------------------------------------------------------
1 | import {
2 | ArrayAttribute,
3 | AttributeType,
4 | ObjectAttribute,
5 | } from "@/schemas/template-schema";
6 | import { JsonOutputParser } from "@langchain/core/output_parsers";
7 | import { ChatPromptTemplate } from "@langchain/core/prompts";
8 | import { RunnableSequence } from "@langchain/core/runnables";
9 | import { z } from "zod";
10 | import { DEFAULT_LLM_MODEL } from "./constants";
11 | import { getModel } from "./llm-utils";
12 |
13 | // const MAX_TOKENS = 4000;
14 |
15 | /**
16 | * Safely parses a JSON string using a JsonOutputParser.
17 | * @param {string} jsonString - The JSON string to parse.
18 | * @returns {Promise} The parsed JSON object.
19 | * @throws {Error} If the JSON is invalid.
20 | */
21 | async function safeJsonParse(jsonString: string): Promise {
22 | try {
23 | const parser = new JsonOutputParser();
24 | return await parser.parse(jsonString);
25 | } catch (error) {
26 | throw new Error("Invalid JSON LLM output, please try again later.");
27 | }
28 | }
29 |
30 | /**
31 | * Sets up a LangChain sequence for processing attributes.
32 | * @param {AttributeType[]} attributes - The attributes to process.
33 | * @param {string} [modelName=DEFAULT_LLM_MODEL] - The name of the language model to use.
34 | * @returns {RunnableSequence} A LangChain sequence for processing.
35 | */
36 | export function setupLangChain(
37 | attributes: AttributeType[],
38 | modelName: string = DEFAULT_LLM_MODEL,
39 | ): RunnableSequence {
40 | const query = generatePromptTemplate(attributes);
41 | const model = getModel(modelName);
42 | const prompt = ChatPromptTemplate.fromTemplate(query);
43 |
44 | return RunnableSequence.from([
45 | prompt,
46 | model,
47 | async (input) => safeJsonParse(input.content),
48 | ]);
49 | }
50 |
51 | /**
52 | * Generates a list of attribute names.
53 | * @param {AttributeType[]} attributes - The attributes to list.
54 | * @returns {string} A formatted string of attribute names.
55 | */
56 | function generateAttributeList(attributes: AttributeType[]): string {
57 | return attributes.map((attr) => `- ${attr.name}`).join("\n");
58 | }
59 |
60 | /**
61 | * Generates a schema string from attributes.
62 | * @param {AttributeType[]} attributes - The attributes to generate the schema from.
63 | * @returns {string} A formatted schema string.
64 | */
65 | function generateSchemaString(attributes: AttributeType[]): string {
66 | return attributes
67 | .map((attr) => {
68 | switch (attr.type) {
69 | case "array":
70 | return handleArrayAttribute(attr);
71 | case "object":
72 | return handleObjectAttribute(attr);
73 | default:
74 | return `${attr.name}: "${attr.type}"`;
75 | }
76 | })
77 | .join(", ");
78 | }
79 |
80 | /**
81 | * Handles the schema generation for array attributes.
82 | * @param {ArrayAttribute} attribute - The array attribute to handle.
83 | * @returns {string} A formatted schema string for the array attribute.
84 | */
85 | function handleArrayAttribute(attribute: ArrayAttribute): string {
86 | const { name, items } = attribute;
87 | if (items.type === "object" && items.properties) {
88 | const objectSchema = items.properties
89 | .map((prop) => `${prop.name}: "${prop.type}"`)
90 | .join(", ");
91 | return `${name}: [{{${objectSchema}}}]`;
92 | }
93 | return `${name}: ["${items.type}"]`;
94 | }
95 |
96 | /**
97 | * Handles the schema generation for object attributes.
98 | * @param {ObjectAttribute} attribute - The object attribute to handle.
99 | * @returns {string} A formatted schema string for the object attribute.
100 | */
101 | function handleObjectAttribute(attribute: ObjectAttribute): string {
102 | const { name, properties } = attribute;
103 | const objectSchema = properties
104 | .map((prop) => `${name}.${prop.name}: "${prop.type}"`)
105 | .join(", ");
106 | return `{{${objectSchema}}}`;
107 | }
108 |
109 | /**
110 | * Generates descriptions for attributes.
111 | * @param {AttributeType[]} attributes - The attributes to generate descriptions for.
112 | * @returns {string} A formatted string of attribute descriptions.
113 | */
114 | function generateAttributeDescriptions(attributes: AttributeType[]): string {
115 | return attributes
116 | .map((attr) => {
117 | switch (attr.type) {
118 | case "array":
119 | return handleArrayDescription(attr);
120 | case "object":
121 | return handleObjectDescription(attr);
122 | default:
123 | return attr.description
124 | ? `- ${attr.name}: ${attr.description}`
125 | : undefined;
126 | }
127 | })
128 | .filter((desc): desc is string => desc !== undefined)
129 | .join("\n");
130 | }
131 |
132 | /**
133 | * Handles the description generation for array attributes.
134 | * @param {ArrayAttribute} attribute - The array attribute to handle.
135 | * @returns {string} A formatted description string for the array attribute.
136 | */
137 | function handleArrayDescription(attribute: ArrayAttribute): string {
138 | const { name, description, items } = attribute;
139 | let arrayDesc = description ? `${name} is ${description}, ` : "";
140 | arrayDesc += `${name} is an array of ${items.type}s`;
141 |
142 | if (items.type === "object" && items.properties) {
143 | arrayDesc += " with the following description:\n";
144 | arrayDesc += items.properties
145 | .filter((prop) => prop.description)
146 | .map((prop) => `- ${name}.${prop.name}: ${prop.description}`)
147 | .join("\n");
148 | }
149 |
150 | return arrayDesc;
151 | }
152 |
153 | /**
154 | * Handles the description generation for object attributes.
155 | * @param {ObjectAttribute} attribute - The object attribute to handle.
156 | * @returns {string} A formatted description string for the object attribute.
157 | */
158 | function handleObjectDescription(attribute: ObjectAttribute): string {
159 | const { name, description, properties } = attribute;
160 | let objectDesc = description ? `${name} is ${description}\n` : "";
161 | objectDesc += `${name} is an object with the following description:\n`;
162 | objectDesc += properties
163 | .filter((prop) => prop.description)
164 | .map((prop) => `- ${name}.${prop.name}: ${prop.description}`)
165 | .join("\n");
166 | return objectDesc;
167 | }
168 |
169 | /**
170 | * Generates a prompt template for attribute extraction.
171 | * @param {AttributeType[]} attributes - The attributes to generate the prompt for.
172 | * @returns {string} A formatted prompt template.
173 | */
174 | export function generatePromptTemplate(attributes: AttributeType[]): string {
175 | const listAttributeString = generateAttributeList(attributes);
176 | const schemaString = generateSchemaString(attributes);
177 | const description = generateAttributeDescriptions(attributes);
178 |
179 | return `
180 | You are an AI assistant designed to extract specific information from web content. Your task is to analyze the given context and extract the requested attributes.
181 |
182 | Attributes to extract:
183 | ${listAttributeString}
184 |
185 | Context:
186 | {context}
187 |
188 | Instructions:
189 | 1. Carefully read the context and identify the requested information.
190 | 2. For each attribute, extract the most accurate and relevant information from the context.
191 | 3. If an attribute is not found or unclear, use "N/A" as the value.
192 | 4. Ensure all extracted information is factual and directly supported by the context.
193 | 5. Respond ONLY with a valid JSON string wrapped in an "output" attribute. Do not include any explanations, comments, or additional text.
194 |
195 | Response Format:
196 | - Provide ONLY a valid JSON string wrapped in an "output" attribute.
197 | - Use double quotes for all strings in the JSON.
198 | - For number types, do not use quotes.
199 | - Do not include any text before or after the JSON string.
200 | - Do not include any confidence scores, notes, or explanations.
201 | - Ensure the JSON is properly formatted without any trailing commas.
202 |
203 | JSON Schema:
204 | ${schemaString}
205 |
206 | Attribute Descriptions:
207 | ${description}
208 |
209 | Generate the JSON string with the requested information. Your entire response must be a single, valid JSON object:
210 | `;
211 | }
212 |
213 | /**
214 | * Sets up a JSON LangChain sequence for processing attributes.
215 | * Note: Still unstable, sometimes it raise error because LLM cant handle the schema. Will fix later
216 | * @param {AttributeType[]} attributes - The attributes to process.
217 | * @param {string} [modelName=DEFAULT_LLM_MODEL] - The name of the language model to use.
218 | * @returns {RunnableSequence} A LangChain sequence for processing.
219 | */
220 | export function setupJsonLangChain(
221 | attributes: AttributeType[],
222 | modelName: string = DEFAULT_LLM_MODEL,
223 | ): RunnableSequence {
224 | const schema = createSchemaFromAttributes(attributes);
225 | const model = getModel(modelName);
226 | const structuredLlm = model.withStructuredOutput(schema);
227 |
228 | const prompt = ChatPromptTemplate.fromTemplate(`
229 | You are an AI assistant designed to extract specific information from web content. Your task is to analyze the given context and extract the requested attributes.
230 |
231 | Context from web content:
232 | {context}
233 |
234 | Instructions:
235 | 1. Carefully read the context and identify the requested information.
236 | 2. For each attribute, extract the most accurate and relevant information from the context.
237 | 3. If an attribute is not found or unclear, use "N/A" as the value.
238 | 4. Ensure all extracted information is factual and directly supported by the context.
239 | 5. Respond ONLY with a valid JSON string. Do not include any explanations or additional text.
240 |
241 | Response Format:
242 | - Provide ONLY a valid JSON string.
243 | - Use double quotes for all strings in the JSON.
244 | - For number types, do not use quotes.
245 | - Do not include any text before or after the JSON string.
246 |
247 | Remember to provide answers for all requested attributes. If an attribute is not found and cannot be reasonably inferred, return null for that field.
248 | `);
249 |
250 | return RunnableSequence.from([
251 | prompt,
252 | structuredLlm,
253 | (input) => {
254 | console.log(input);
255 | return input;
256 | },
257 | ]);
258 | }
259 |
260 | /**
261 | * Creates a Zod schema for an array attribute.
262 | * @param {ArrayAttribute} attribute - The array attribute to create a schema for.
263 | * @returns {z.ZodType} A Zod schema for the array attribute.
264 | */
265 | function createArrayZod(attribute: ArrayAttribute): z.ZodType {
266 | const { items, description } = attribute;
267 | if (items.type === "object" && items.properties) {
268 | const objectProperties = items.properties.map((attr) => ({
269 | name : attr.name,
270 | type : attr.type,
271 | description : attr.description,
272 | }));
273 | return z.array(createObjectZod(objectProperties)).describe(description);
274 | }
275 | return z.array(
276 | primitiveZodSchema(
277 | items.type as "string" | "number" | "boolean",
278 | description,
279 | ),
280 | );
281 | }
282 |
283 | interface ObjectProperty {
284 | name: string;
285 | type: "string" | "number" | "boolean";
286 | description: string;
287 | }
288 |
289 | /**
290 | * Creates a Zod schema for an object attribute.
291 | * @param {ObjectProperty[]} properties - The properties of the object attribute.
292 | * @returns {z.ZodObject} A Zod schema for the object attribute.
293 | */
294 | function createObjectZod(properties: ObjectProperty[]): z.ZodObject {
295 | const objectZod = Object.fromEntries(
296 | properties.map((attr) => [
297 | attr.name,
298 | primitiveZodSchema(attr.type, attr.description),
299 | ]),
300 | );
301 | return z.object(objectZod);
302 | }
303 |
304 | /**
305 | * Creates a Zod schema for a primitive type.
306 | * @param {string} type - The type of the primitive.
307 | * @param {string} description - The description of the primitive.
308 | * @returns {z.ZodType} A Zod schema for the primitive type.
309 | */
310 | const primitiveZodSchema = (
311 | type: "string" | "number" | "boolean",
312 | description: string,
313 | ): z.ZodType => {
314 | switch (type) {
315 | case "string":
316 | return z.string().describe(description).optional().nullable();
317 | case "number":
318 | return z.number().describe(description).optional().nullable();
319 | case "boolean":
320 | return z.boolean().describe(description).optional().nullable();
321 | default:
322 | throw new Error(`Unsupported type: ${type}`);
323 | }
324 | };
325 |
326 | /**
327 | * Creates a Zod schema for an attribute.
328 | * @param {AttributeType} attribute - The attribute to create a schema for.
329 | * @returns {z.ZodType} A Zod schema for the attribute.
330 | */
331 | const typeToZod = (attribute: AttributeType): z.ZodType => {
332 | switch (attribute.type) {
333 | case "object":
334 | return createObjectZod(attribute.properties);
335 | case "array":
336 | return createArrayZod(attribute);
337 | default:
338 | return primitiveZodSchema(attribute.type, attribute.description);
339 | }
340 | };
341 |
342 | /**
343 | * Creates a Zod schema from attributes.
344 | * @param {AttributeType[]} attributes - The attributes to create a schema from.
345 | * @returns {z.ZodObject} A Zod schema for the attributes.
346 | */
347 | const createSchemaFromAttributes = (
348 | attributes: AttributeType[],
349 | ): z.ZodObject => {
350 | const shape = Object.fromEntries(
351 | attributes.map((attr) => [attr.name, typeToZod(attr)]),
352 | );
353 | return z.object(shape);
354 | };
355 |
--------------------------------------------------------------------------------
/src/lib/llm-utils.ts:
--------------------------------------------------------------------------------
1 | import { ChatGoogleGenerativeAI } from "@langchain/google-genai";
2 | import { ChatGroq } from "@langchain/groq";
3 | import { LLM_MODEL_OPTIONS } from "./constants";
4 | import { env } from "./env";
5 |
6 | /**
7 | * Type for supported LLM models
8 | */
9 | type SupportedModel =
10 | (typeof LLM_MODEL_OPTIONS)[keyof typeof LLM_MODEL_OPTIONS][number];
11 |
12 | /**
13 | * Mapping of providers to their respective model creation functions
14 | */
15 | const MODEL_CREATORS = {
16 | groq: (model: string) => new ChatGroq({
17 | apiKey : env.GROQ_API_KEY,
18 | modelName : model,
19 | }),
20 | gemini: (model: string) => new ChatGoogleGenerativeAI({
21 | apiKey : env.GOOGLE_AI_STUDIO_API_KEY,
22 | modelName : model,
23 | }),
24 | };
25 |
26 | /**
27 | * Gets the appropriate LLM model instance based on the provided model name.
28 | *
29 | * @param model - The name of the model to instantiate
30 | * @returns An instance of the specified LLM model
31 | * @throws Error if the model provider is unknown
32 | */
33 | export function getModel(model: SupportedModel) {
34 | const provider = Object.entries(LLM_MODEL_OPTIONS).find(([, models]) => models.includes(model))?.[0] as keyof typeof MODEL_CREATORS | undefined;
35 |
36 | if (!provider || !(provider in MODEL_CREATORS)) {
37 | throw new Error(`Unknown model provider for model: ${model}`);
38 | }
39 |
40 | return MODEL_CREATORS[provider](model);
41 | }
42 |
--------------------------------------------------------------------------------
/src/lib/string-utils.ts:
--------------------------------------------------------------------------------
1 | import { MAX_CONTEXT_LENGTH } from "./constants";
2 |
3 | /**
4 | * Truncate string to prevent exceeding LLM token limit
5 | * @param {string} str - The input string to truncate
6 | * @returns {string} Truncated string
7 | */
8 | export function truncateString(str: string): string {
9 | return str.length > MAX_CONTEXT_LENGTH
10 | ? str.substring(0, MAX_CONTEXT_LENGTH)
11 | : str;
12 | }
13 |
14 | export function removeExtraWhitespace(str: string) {
15 | return str.split(/\s+/).filter(Boolean).join(" ");
16 | }
17 |
18 | // export function countStringTokens(str: string) {
19 | // const tokenizer = new GPT4Tokenizer({ type: "gpt3" }); // or 'codex'
20 | // const estimatedTokenCount = tokenizer.estimateTokenCount(str); // 7
21 |
22 | // return estimatedTokenCount;
23 | // }
24 |
--------------------------------------------------------------------------------
/src/lib/time-utils.ts:
--------------------------------------------------------------------------------
1 | export const measureExecutionTime = (startTime: number) => {
2 | const endTime = Date.now();
3 | return `${endTime - startTime} ms`;
4 | };
5 |
6 | export const delay = (ms: number) => new Promise((resolve) => { setTimeout(resolve, ms); });
7 |
--------------------------------------------------------------------------------
/src/lib/types.ts:
--------------------------------------------------------------------------------
1 | export interface ErrorObject {
2 | title: string;
3 | message: string;
4 | }
5 |
--------------------------------------------------------------------------------
/src/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { type ClassValue, clsx } from "clsx";
2 | import { twMerge } from "tailwind-merge";
3 |
4 | export function cn(...inputs: ClassValue[]) {
5 | return twMerge(clsx(inputs));
6 | }
7 |
--------------------------------------------------------------------------------
/src/lib/web-scraper.ts:
--------------------------------------------------------------------------------
1 | import chromium from "@sparticuz/chromium";
2 | import * as puppeteer from "puppeteer";
3 | import puppeteerCore from "puppeteer-core";
4 | import { env } from "./env";
5 |
6 | interface TextContent {
7 | type: "text";
8 | content: string;
9 | }
10 |
11 | interface LinkContent {
12 | type: "link";
13 | text: string;
14 | url: string;
15 | }
16 |
17 | interface ImageContent {
18 | type: "image";
19 | src: string;
20 | alt: string;
21 | }
22 |
23 | interface HeadingContent {
24 | type: "heading";
25 | level: number;
26 | content: string;
27 | }
28 |
29 | interface ListContent {
30 | type: "list";
31 | items: string[];
32 | }
33 |
34 | interface TableContent {
35 | type: "table";
36 | headers: string[];
37 | rows: string[][];
38 | }
39 |
40 | interface QuoteContent {
41 | type: "quote";
42 | content: string;
43 | }
44 |
45 | interface MetaContent {
46 | type: "meta";
47 | description: string;
48 | }
49 |
50 | export type Content =
51 | | TextContent
52 | | LinkContent
53 | | ImageContent
54 | | HeadingContent
55 | | ListContent
56 | | TableContent
57 | | QuoteContent
58 | | MetaContent;
59 |
60 | export interface GroupedContent {
61 | group_id: string;
62 | blocks: Content[];
63 | }
64 |
65 | export async function extractGroupedContentFromWeb(
66 | url: string,
67 | ): Promise {
68 | let browser = null;
69 |
70 | if (env.NODE_ENV === "development") {
71 | browser = await puppeteer.launch({
72 | args : ["--no-sandbox", "--disable-setuid-sandbox"],
73 | headless : true,
74 | });
75 | } else if (env.NODE_ENV === "production") {
76 | browser = await puppeteerCore.launch({
77 | args : chromium.args,
78 | defaultViewport : chromium.defaultViewport,
79 | executablePath : await chromium.executablePath(),
80 | headless : chromium.headless,
81 | });
82 | }
83 |
84 | if (!browser) {
85 | throw new Error("Failed to launch browser");
86 | }
87 |
88 | const page = await browser.newPage();
89 |
90 | try {
91 | await page.goto(url, { waitUntil: "networkidle2" });
92 |
93 | // @ts-ignore
94 | const content = await page.evaluate<() => GroupedContent[]>(() => {
95 | const groupedResults: GroupedContent[] = [];
96 | let currentGroup: GroupedContent | null = null;
97 | let groupCounter = 0;
98 |
99 | const elementsToIgnore = [
100 | "nav",
101 | "footer",
102 | "header",
103 | "aside",
104 | "script",
105 | "style",
106 | "noscript",
107 | "iframe",
108 | ];
109 | const classesToIgnore = ["advertisement", "sidebar", "comment"];
110 |
111 | function shouldIgnoreElement(element: Element): boolean {
112 | if (elementsToIgnore.includes(element.tagName.toLowerCase())) return true;
113 | if (element.closest(elementsToIgnore.join(","))) return true;
114 | for (const className of classesToIgnore) {
115 | if (element.classList.contains(className)) return true;
116 | }
117 | return false;
118 | }
119 |
120 | function createNewGroup(): GroupedContent {
121 | groupCounter++;
122 | return {
123 | group_id : `group${groupCounter}`,
124 | blocks : [],
125 | };
126 | }
127 |
128 | function truncateText(text: string, maxLength: number): string {
129 | return text.length > maxLength
130 | ? `${text.slice(0, maxLength)}...`
131 | : text;
132 | }
133 |
134 | function processElement(element: Element) {
135 | if (shouldIgnoreElement(element)) return;
136 |
137 | // Start a new group for major structural elements or headings
138 | if (
139 | element.tagName.toLowerCase() === "section"
140 | || element.tagName.toLowerCase() === "article"
141 | || /^h[1-3]$/i.test(element.tagName)
142 | ) {
143 | if (currentGroup && currentGroup.blocks.length > 0) {
144 | groupedResults.push(currentGroup);
145 | }
146 | currentGroup = createNewGroup();
147 | }
148 |
149 | if (!currentGroup) {
150 | currentGroup = createNewGroup();
151 | }
152 |
153 | if (element instanceof HTMLElement && element.offsetParent !== null) {
154 | let content: Content | null = null;
155 |
156 | if (element instanceof HTMLAnchorElement) {
157 | const visibleText = element.innerText.trim();
158 | if (visibleText) {
159 | content = {
160 | type : "link",
161 | text : truncateText(visibleText, 100),
162 | url : element.href,
163 | };
164 | }
165 | } else if (element instanceof HTMLImageElement) {
166 | content = {
167 | type : "image",
168 | src : element.src,
169 | alt : truncateText(element.alt || "No alt text provided", 100),
170 | };
171 | } else if (/^h[1-6]$/i.test(element.tagName)) {
172 | content = {
173 | type : "heading",
174 | level : parseInt(element.tagName.toLowerCase().charAt(1)),
175 | content : truncateText(element.innerText.trim(), 200),
176 | };
177 | } else if (
178 | element instanceof HTMLUListElement
179 | || element instanceof HTMLOListElement
180 | ) {
181 | const items = Array.from(element.getElementsByTagName("li"))
182 | .map((li) => truncateText(li.innerText.trim(), 200))
183 | .filter((text) => text.length > 0);
184 | if (items.length > 0) {
185 | content = {
186 | type: "list",
187 | items,
188 | };
189 | }
190 | } else if (element instanceof HTMLTableElement) {
191 | const headers = Array.from(element.getElementsByTagName("th")).map(
192 | (th) => truncateText(th.innerText.trim(), 100),
193 | );
194 | const rows = Array.from(element.getElementsByTagName("tr")).map(
195 | (tr) => Array.from(tr.getElementsByTagName("td")).map((td) => truncateText(td.innerText.trim(), 100)),
196 | );
197 | if (headers.length > 0 || rows.length > 0) {
198 | content = {
199 | type: "table",
200 | headers,
201 | rows,
202 | };
203 | }
204 | } else if (element instanceof HTMLQuoteElement) {
205 | content = {
206 | type : "quote",
207 | content : truncateText(element.innerText.trim(), 500),
208 | };
209 | } else {
210 | const visibleText = element.innerText.trim();
211 | if (visibleText) {
212 | content = {
213 | type : "text",
214 | content : truncateText(visibleText, 1000),
215 | };
216 | }
217 | }
218 |
219 | if (content) {
220 | const currentGroupBlocksLength = currentGroup.blocks.length;
221 | const contentExists = currentGroup.blocks.some(
222 | (currentContent, index) => {
223 | const isLastTenIndexes = index >= currentGroupBlocksLength - 10;
224 | if (
225 | currentContent.type === "text"
226 | && content.type === "text"
227 | && isLastTenIndexes
228 | ) {
229 | return currentContent.content.includes(content.content);
230 | }
231 | return (
232 | JSON.stringify(currentContent) === JSON.stringify(content)
233 | );
234 | },
235 | );
236 | if (!contentExists) {
237 | currentGroup.blocks.push(content);
238 | }
239 | }
240 | }
241 |
242 | for (const child of Array.from(element.children)) {
243 | processElement(child);
244 | }
245 | }
246 |
247 | // Extract meta description
248 | const metaDescription = document.querySelector(
249 | 'meta[name="description"]',
250 | );
251 | if (
252 | metaDescription instanceof HTMLMetaElement
253 | && metaDescription.content
254 | ) {
255 | groupedResults.push({
256 | group_id : "meta",
257 | blocks : [
258 | {
259 | type : "meta",
260 | description : truncateText(metaDescription.content, 200),
261 | },
262 | ],
263 | });
264 | }
265 |
266 | processElement(document.body);
267 |
268 | // Add the last group if it exists
269 | // @ts-ignore
270 | if (currentGroup && currentGroup?.blocks.length > 0) {
271 | groupedResults.push(currentGroup);
272 | }
273 |
274 | return groupedResults;
275 | });
276 |
277 | return content;
278 | } finally {
279 | await browser.close();
280 | }
281 | }
282 |
283 | export async function extractContentsFromWeb(url: string): Promise {
284 | let browser = null;
285 |
286 | if (env.NODE_ENV === "development") {
287 | browser = await puppeteer.launch({
288 | args : ["--no-sandbox", "--disable-setuid-sandbox"],
289 | headless : true,
290 | });
291 | } else if (env.NODE_ENV === "production") {
292 | browser = await puppeteerCore.launch({
293 | args : chromium.args,
294 | defaultViewport : chromium.defaultViewport,
295 | executablePath : await chromium.executablePath(),
296 | headless : chromium.headless,
297 | });
298 | }
299 |
300 | if (!browser) {
301 | throw new Error("Failed to launch browser");
302 | }
303 |
304 | const page = await browser.newPage();
305 |
306 | try {
307 | await page.goto(url, { waitUntil: "networkidle2" });
308 |
309 | // @ts-ignore
310 | const content = await page.evaluate<() => Content[]>(() => {
311 | const results: Content[] = [];
312 |
313 | const elementsToIgnore = [
314 | "nav",
315 | "footer",
316 | "header",
317 | "aside",
318 | "script",
319 | "style",
320 | "noscript",
321 | "iframe",
322 | ];
323 | const classesToIgnore = ["advertisement", "sidebar", "comment"];
324 |
325 | function shouldIgnoreElement(element: Element): boolean {
326 | if (elementsToIgnore.includes(element.tagName.toLowerCase())) return true;
327 | if (element.closest(elementsToIgnore.join(","))) return true;
328 | for (const className of classesToIgnore) {
329 | if (element.classList.contains(className)) return true;
330 | }
331 | return false;
332 | }
333 |
334 | function truncateText(text: string, maxLength: number): string {
335 | return text.length > maxLength
336 | ? `${text.slice(0, maxLength)}...`
337 | : text;
338 | }
339 |
340 | function isContentEqual(
341 | a: Content,
342 | b: Content,
343 | isLastTenIndexes: boolean,
344 | ): boolean {
345 | // return JSON.stringify(a) === JSON.stringify(b);
346 | if (a.type === "text" && b.type === "text" && isLastTenIndexes) {
347 | return a.content.includes(b.content);
348 | }
349 | return JSON.stringify(a) === JSON.stringify(b);
350 | }
351 |
352 | function addUniqueContent(content: Content) {
353 | const resultsLength = results.length;
354 | if (
355 | !results.some((item, index) => {
356 | const isLastTenIndexes = index >= resultsLength - 10;
357 | return isContentEqual(item, content, isLastTenIndexes);
358 | })
359 | ) {
360 | results.push(content);
361 | }
362 | }
363 |
364 | function processElement(element: Element) {
365 | if (shouldIgnoreElement(element)) return;
366 |
367 | if (element instanceof HTMLElement && element.offsetParent !== null) {
368 | let content: Content | null = null;
369 |
370 | if (element instanceof HTMLAnchorElement) {
371 | const visibleText = element.innerText.trim();
372 | if (visibleText) {
373 | content = {
374 | type : "link",
375 | text : truncateText(visibleText, 100),
376 | url : element.href,
377 | };
378 | }
379 | } else if (element instanceof HTMLImageElement) {
380 | content = {
381 | type : "image",
382 | src : element.src,
383 | alt : truncateText(element.alt || "No alt text provided", 100),
384 | };
385 | } else if (/^h[1-6]$/i.test(element.tagName)) {
386 | content = {
387 | type : "heading",
388 | level : parseInt(element.tagName.toLowerCase().charAt(1)),
389 | content : truncateText(element.innerText.trim(), 200),
390 | };
391 | } else if (
392 | element instanceof HTMLUListElement
393 | || element instanceof HTMLOListElement
394 | ) {
395 | const items = Array.from(element.getElementsByTagName("li"))
396 | .map((li) => truncateText(li.innerText.trim(), 200))
397 | .filter((text) => text.length > 0);
398 | if (items.length > 0) {
399 | content = {
400 | type: "list",
401 | items,
402 | };
403 | }
404 | } else if (element instanceof HTMLTableElement) {
405 | const headers = Array.from(element.getElementsByTagName("th")).map(
406 | (th) => truncateText(th.innerText.trim(), 100),
407 | );
408 | const rows = Array.from(element.getElementsByTagName("tr")).map(
409 | (tr) => Array.from(tr.getElementsByTagName("td")).map((td) => truncateText(td.innerText.trim(), 100)),
410 | );
411 | if (headers.length > 0 || rows.length > 0) {
412 | content = {
413 | type: "table",
414 | headers,
415 | rows,
416 | };
417 | }
418 | } else if (element instanceof HTMLQuoteElement) {
419 | content = {
420 | type : "quote",
421 | content : truncateText(element.innerText.trim(), 500),
422 | };
423 | } else {
424 | const visibleText = element.innerText.trim();
425 | if (visibleText) {
426 | content = {
427 | type : "text",
428 | content : truncateText(visibleText, 1000),
429 | };
430 | }
431 | }
432 |
433 | if (content) {
434 | addUniqueContent(content);
435 | }
436 | }
437 |
438 | for (const child of Array.from(element.children)) {
439 | processElement(child);
440 | }
441 | }
442 |
443 | // Extract meta description
444 | const metaDescription = document.querySelector(
445 | 'meta[name="description"]',
446 | );
447 | if (
448 | metaDescription instanceof HTMLMetaElement
449 | && metaDescription.content
450 | ) {
451 | addUniqueContent({
452 | type : "meta",
453 | description : truncateText(metaDescription.content, 200),
454 | });
455 | }
456 |
457 | processElement(document.body);
458 |
459 | return results;
460 | });
461 |
462 | return content;
463 | } finally {
464 | await browser.close();
465 | }
466 | }
467 |
468 | export function formatContentForLangChain(content: Content[]): string {
469 | return content
470 | .map((item) => {
471 | switch (item.type) {
472 | case "text":
473 | return item.content;
474 | case "link":
475 | return `[${item.text}](${item.url})`;
476 | case "image":
477 | return `[Image: ${item.alt}](${item.src})`;
478 | case "heading":
479 | return `${"#".repeat(item.level)} ${item.content}`;
480 | case "list":
481 | return item.items.map((i) => `- ${i}`).join("\n");
482 | case "table":
483 | return `Table: ${item.headers.join(", ")}\n${item.rows
484 | .map((row) => row.join(", "))
485 | .join("\n")}`;
486 | case "quote":
487 | return `> ${item.content}`;
488 | case "meta":
489 | return `Meta Description: ${item.description}`;
490 | default:
491 | return "";
492 | }
493 | })
494 | .join(". ");
495 | }
496 |
--------------------------------------------------------------------------------
/src/middlewares/rate-limiter-middleware.ts:
--------------------------------------------------------------------------------
1 | import { env } from "@/lib/env";
2 | import { rateLimiter } from "hono-rate-limiter";
3 |
4 | export const limiterMiddleware = rateLimiter({
5 | windowMs : 60 * 1000, // 1 minute window
6 | limit : 10, // Limit each IP to 10 requests per 1 minute window.
7 | standardHeaders : "draft-6", // Use draft-6 RateLimit headers
8 | keyGenerator : () => "user",
9 | // // in case want to limit by IP
10 | // keyGenerator: (c) => {
11 | // const info = getConnInfo(c);
12 | // return info.remote.address || "unknown";
13 | // },
14 | skip : () => env.NODE_ENV === "development",
15 | handler : (c) => c.json(
16 | {
17 | code : "TOO_MANY_REQUESTS",
18 | title : "Too Many Requests",
19 | message : "Too many requests, please try again later.",
20 | },
21 | 429,
22 | ),
23 | });
24 |
--------------------------------------------------------------------------------
/src/routes/extract-route.ts:
--------------------------------------------------------------------------------
1 | import { extractController } from "@/controllers/extract-controller";
2 | import RequestTimeoutError from "@/errors/request-timeout-error";
3 | import { limiterMiddleware } from "@/middlewares/rate-limiter-middleware";
4 | import { Context, Hono } from "hono";
5 |
6 | const extractRoute = new Hono();
7 |
8 | const TIMEOUT_DURATION = 50 * 1000;
9 |
10 | extractRoute.use("*", limiterMiddleware);
11 |
12 | /**
13 | * Handle POST requests to /extract endpoint
14 | * @param {Context} c - The Hono context
15 | * @returns {Promise} JSON response with extraction results or error
16 | */
17 | extractRoute.post("/", async (c: Context) => {
18 | const timeoutPromise = new Promise((_, reject) => {
19 | setTimeout(() => {
20 | reject(new RequestTimeoutError("Request timeout after 50 seconds"));
21 | }, TIMEOUT_DURATION);
22 | });
23 |
24 | try {
25 | const result = await Promise.race([
26 | extractController.scrape(c),
27 | timeoutPromise,
28 | ]);
29 | return result;
30 | } catch (error) {
31 | if (error instanceof RequestTimeoutError) {
32 | return c.json(
33 | {
34 | code : "REQUEST_TIMEOUT",
35 | title : "Request Timeout",
36 | message:
37 | "The request took too long to process. Please try again later.",
38 | },
39 | 504,
40 | );
41 | }
42 | throw error;
43 | }
44 | });
45 |
46 | export default extractRoute;
47 |
--------------------------------------------------------------------------------
/src/routes/vectorstore-routes.ts:
--------------------------------------------------------------------------------
1 | import { deleteAllStoredDocuments } from "@/lib/embed-utils";
2 | import { env } from "@/lib/env";
3 | import { Context, Hono } from "hono";
4 |
5 | const vectorStoreRoute = new Hono();
6 |
7 | /**
8 | * Handle POST requests to clear-vectorstore endpoint
9 | * @param {Context} c - The Hono context
10 | * @returns {Promise} Text response indicating success or error
11 | * @throws {Error} If the access token is invalid
12 | */
13 | vectorStoreRoute.post("clear", async (c: Context) => {
14 | const accessToken = c.req.header("accessToken");
15 | if (accessToken !== env.CLEAR_UPSTASH_VECTOR_STORE_TOKEN) {
16 | throw new Error("Unauthorized: Invalid access token");
17 | }
18 |
19 | await deleteAllStoredDocuments();
20 | return c.text("Vector store cleared successfully", { status: 200 });
21 | });
22 |
23 | export default vectorStoreRoute;
24 |
--------------------------------------------------------------------------------
/src/schemas/template-schema.ts:
--------------------------------------------------------------------------------
1 | import { z } from "zod";
2 |
3 | const reservedKeywords = [
4 | "break",
5 | "case",
6 | "catch",
7 | "class",
8 | "const",
9 | "continue",
10 | "debugger",
11 | "default",
12 | "delete",
13 | "do",
14 | "else",
15 | "export",
16 | "extends",
17 | "finally",
18 | "for",
19 | "function",
20 | "if",
21 | "import",
22 | "in",
23 | "instanceof",
24 | "new",
25 | "return",
26 | "super",
27 | "switch",
28 | "this",
29 | "throw",
30 | "try",
31 | "typeof",
32 | "var",
33 | "void",
34 | "while",
35 | "with",
36 | "yield",
37 | // Strict mode reserved words
38 | "implements",
39 | "interface",
40 | "let",
41 | "package",
42 | "private",
43 | "protected",
44 | "public",
45 | "static",
46 | // Constants
47 | "null",
48 | "true",
49 | "false",
50 | ];
51 |
52 | // Base schema for all attributes
53 | const BaseAttributeSchema = z.object({
54 | // name: z.string().min(1, "Cannot be empty"),
55 | name: z
56 | .string({
57 | required_error : "Name is required",
58 | invalid_type_error : "Name must be a string",
59 | })
60 | .min(1, "Name cannot be empty")
61 | .regex(
62 | /^[a-zA-Z_$][a-zA-Z0-9_$]*$/,
63 | "Name must start with a letter, underscore, or dollar sign, and can only contain letters, numbers, underscores, or dollar signs",
64 | )
65 | .refine(
66 | (key) => !reservedKeywords.includes(key),
67 | "Object key cannot be a reserved JavaScript keyword",
68 | ),
69 | description: z.string().min(1, "Cannot be empty"),
70 | });
71 |
72 | // Schema for simple attributes (string, number, boolean)
73 | const SimpleAttributeSchema = BaseAttributeSchema.extend({
74 | type: z.enum(["string", "number", "boolean"]),
75 | });
76 |
77 | const ObjectAttributeSchema = BaseAttributeSchema.extend({
78 | type : z.literal("object"),
79 | properties : z.array(SimpleAttributeSchema),
80 | });
81 |
82 | const ArrayAttributeSchema = BaseAttributeSchema.extend({
83 | type : z.literal("array"),
84 | items : z.object({
85 | type : z.enum(["string", "number", "boolean", "object"]),
86 | properties : z.array(SimpleAttributeSchema).optional(),
87 | }),
88 | // properties: z.array(SimpleAttributeSchema).optional(),
89 | });
90 |
91 | export const AttributeSchema = z.discriminatedUnion("type", [
92 | SimpleAttributeSchema,
93 | ObjectAttributeSchema,
94 | ArrayAttributeSchema,
95 | ]);
96 |
97 | const isInternalIP = (hostname: string): boolean => {
98 | // Regular expressions for common private IP ranges and reserved IPs
99 | const privateIPPatterns = [
100 | /^10\./, // 10.0.0.0 – 10.255.255.255
101 | /^172\.(1[6-9]|2[0-9]|3[0-1])\./, // 172.16.0.0 – 172.31.255.255
102 | /^192\.168\./, // 192.168.0.0 – 192.168.255.255
103 | /^127\./, // Loopback address range (localhost)
104 | /^169\.254\./, // Link-local addresses
105 | /^::1$/, // IPv6 loopback address
106 | /^fc00:/, // IPv6 unique local addresses
107 | /^fe80:/, // IPv6 link-local addresses
108 | ];
109 |
110 | // Check for local and reserved hostnames
111 | const reservedHostnames = [
112 | 'localhost', // Local development server
113 | '127.0.0.1', // IPv4 loopback address
114 | '::1', // IPv6 loopback address
115 | ];
116 |
117 | // Test if hostname matches any reserved or private IP patterns
118 | return privateIPPatterns.some((pattern) => pattern.test(hostname)) || reservedHostnames.includes(hostname);
119 | };
120 |
121 | const urlSchema = z.string().refine((url) => {
122 | try {
123 | const parsedUrl = new URL(url);
124 | const isValidProtocol = parsedUrl.protocol === 'http:' || parsedUrl.protocol === 'https:';
125 | const isValidHostname = parsedUrl.hostname && !isInternalIP(parsedUrl.hostname);
126 | return isValidProtocol && isValidHostname;
127 | return false;
128 | } catch {
129 | return false;
130 | }
131 | }, {
132 | message: "Invalid URL. It must use HTTP or HTTPS protocol and have a valid, non-internal hostname.",
133 | });
134 |
135 | export const TemplateSchema = z.object({
136 | id : z.string().optional(),
137 | url : urlSchema,
138 | name : z.string().min(1, "Cannot be empty"),
139 | attributes : z.array(AttributeSchema).min(1, "Please input at least one attribute"),
140 | latestResult : z.any().optional(),
141 | createdAt : z.union([z.date(), z.string()]).optional(),
142 | updatedAt : z.union([z.string(), z.date()]).optional(),
143 | ignoreCache : z.boolean().optional(),
144 | });
145 |
146 | export type Template = z.infer;
147 | export type ObjectAttribute = z.infer;
148 | export type ArrayAttribute = z.infer;
149 | export type AttributeType = z.infer;
150 |
151 | export const DEFAULT_TEMPLATE_VALUE: Template = {
152 | id : "",
153 | name : "",
154 | url : "",
155 | attributes : [
156 | {
157 | name : "",
158 | type : "string",
159 | description : "",
160 | },
161 | ],
162 | };
163 |
164 | export const TYPES = [
165 | { label: "String", value: "string" },
166 | { label: "Number", value: "number" },
167 | { label: "Boolean", value: "boolean" },
168 | { label: "Array", value: "array" },
169 | { label: "Object", value: "object" },
170 | ];
171 |
--------------------------------------------------------------------------------
/src/store/model-store.ts:
--------------------------------------------------------------------------------
1 | import { DEFAULT_LLM_MODEL, LLMModel } from "@/lib/constants";
2 | import { create, StateCreator } from "zustand";
3 | import { persist, PersistOptions } from "zustand/middleware";
4 |
5 | const MODEL_LOCAL_STORAGE_TEMPLATES_KEY = "llm-model-store";
6 |
7 | type ModelStoreType = {
8 | model: LLMModel;
9 | setModel: (model: LLMModel) => void;
10 | };
11 |
12 | type PersistType = (
13 | config: StateCreator,
14 | options: PersistOptions
15 | ) => StateCreator;
16 |
17 | const useModelStore = create(
18 | (persist as PersistType)(
19 | (set) => ({
20 | model : DEFAULT_LLM_MODEL,
21 | setModel : (model) => set({ model }),
22 | }),
23 | {
24 | name: MODEL_LOCAL_STORAGE_TEMPLATES_KEY,
25 | },
26 | ),
27 | );
28 |
29 | export { useModelStore };
30 |
--------------------------------------------------------------------------------
/src/store/template-store.ts:
--------------------------------------------------------------------------------
1 | import { Template } from "@/schemas/template-schema";
2 | import { create } from "zustand";
3 | import { persist } from "zustand/middleware";
4 |
5 | // Constants
6 | const LOCAL_STORAGE_TEMPLATES_KEY = "templates-store";
7 |
8 | // Types
9 | interface TemplateState {
10 | templates: Template[];
11 | selectedTemplate: Template | null;
12 | }
13 |
14 | interface TemplateActions {
15 | setSelectedTemplate: (template: Template | null) => void;
16 | addTemplate: (template: Template) => void;
17 | updateTemplate: (template: Template) => void;
18 | deleteTemplate: (templateId: string) => void;
19 | }
20 |
21 | type TemplateStore = TemplateState & TemplateActions;
22 |
23 | // Helpers
24 | const stripIgnoreCache = (template: Template): Omit => {
25 | const { ignoreCache, ...rest } = template;
26 | return rest;
27 | };
28 |
29 | const updateTimestamps = (template: Template, isNew = false): Template => ({
30 | ...template,
31 | updatedAt: new Date(),
32 | ...(isNew && { createdAt: new Date() }),
33 | });
34 |
35 | // Store
36 | export const useTemplateStore = create()(
37 | persist(
38 | (set) => ({
39 | templates : [],
40 | selectedTemplate : null,
41 |
42 | setSelectedTemplate: (template) => set({ selectedTemplate: template ? stripIgnoreCache(template) : null }),
43 |
44 | addTemplate: (template) => set((state) => ({
45 | templates: [...state.templates, updateTimestamps(stripIgnoreCache(template), true)],
46 | })),
47 |
48 | updateTemplate: (template) => set((state) => ({
49 | templates: state.templates.map((t) => (t.id === template.id ? updateTimestamps(stripIgnoreCache(template)) : t)),
50 | })),
51 |
52 | deleteTemplate: (templateId) => set((state) => {
53 | const newTemplates = state.templates.filter((t) => t.id !== templateId);
54 | const newSelectedTemplate = state.selectedTemplate?.id === templateId
55 | ? null
56 | : state.selectedTemplate;
57 |
58 | return {
59 | templates : newTemplates,
60 | selectedTemplate : newSelectedTemplate,
61 | };
62 | }),
63 | }),
64 | {
65 | name: LOCAL_STORAGE_TEMPLATES_KEY,
66 | },
67 | ),
68 | );
69 |
--------------------------------------------------------------------------------
/tailwind.config.ts:
--------------------------------------------------------------------------------
1 | import type { Config } from "tailwindcss";
2 |
3 | const config = {
4 | darkMode: ["class"],
5 | content: [
6 | "./pages/**/*.{ts,tsx}",
7 | "./components/**/*.{ts,tsx}",
8 | "./app/**/*.{ts,tsx}",
9 | "./src/**/*.{ts,tsx}",
10 | ],
11 | prefix: "",
12 | theme: {
13 | container: {
14 | center: true,
15 | padding: "2rem",
16 | screens: {
17 | "2xl": "1400px",
18 | },
19 | },
20 | extend: {
21 | colors: {
22 | border: "hsl(var(--border))",
23 | input: "hsl(var(--input))",
24 | ring: "hsl(var(--ring))",
25 | background: "hsl(var(--background))",
26 | foreground: "hsl(var(--foreground))",
27 | primary: {
28 | DEFAULT: "hsl(var(--primary))",
29 | foreground: "hsl(var(--primary-foreground))",
30 | },
31 | secondary: {
32 | DEFAULT: "hsl(var(--secondary))",
33 | foreground: "hsl(var(--secondary-foreground))",
34 | },
35 | destructive: {
36 | DEFAULT: "hsl(var(--destructive))",
37 | foreground: "hsl(var(--destructive-foreground))",
38 | },
39 | muted: {
40 | DEFAULT: "hsl(var(--muted))",
41 | foreground: "hsl(var(--muted-foreground))",
42 | },
43 | accent: {
44 | DEFAULT: "hsl(var(--accent))",
45 | foreground: "hsl(var(--accent-foreground))",
46 | },
47 | popover: {
48 | DEFAULT: "hsl(var(--popover))",
49 | foreground: "hsl(var(--popover-foreground))",
50 | },
51 | card: {
52 | DEFAULT: "hsl(var(--card))",
53 | foreground: "hsl(var(--card-foreground))",
54 | },
55 | success: {
56 | DEFAULT: "hsl(var(--success))",
57 | foreground: "hsl(var(--success-foreground))",
58 | },
59 | },
60 | borderRadius: {
61 | lg: "var(--radius)",
62 | md: "calc(var(--radius) - 2px)",
63 | sm: "calc(var(--radius) - 4px)",
64 | },
65 | keyframes: {
66 | "accordion-down": {
67 | from: { height: "0" },
68 | to: { height: "var(--radix-accordion-content-height)" },
69 | },
70 | "accordion-up": {
71 | from: { height: "var(--radix-accordion-content-height)" },
72 | to: { height: "0" },
73 | },
74 | },
75 | animation: {
76 | "accordion-down": "accordion-down 0.2s ease-out",
77 | "accordion-up": "accordion-up 0.2s ease-out",
78 | },
79 | },
80 | },
81 | plugins: [require("tailwindcss-animate")],
82 | } satisfies Config;
83 |
84 | export default config;
85 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "lib": ["dom", "dom.iterable", "esnext"],
4 | "allowJs": true,
5 | "skipLibCheck": true,
6 | "strict": true,
7 | "noEmit": true,
8 | "esModuleInterop": true,
9 | "module": "esnext",
10 | "moduleResolution": "bundler",
11 | "resolveJsonModule": true,
12 | "isolatedModules": true,
13 | "jsx": "preserve",
14 | "incremental": true,
15 | "plugins": [
16 | {
17 | "name": "next"
18 | }
19 | ],
20 | "paths": {
21 | "@/*": ["./src/*"]
22 | }
23 | },
24 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"],
25 | "exclude": ["node_modules"]
26 | }
27 |
--------------------------------------------------------------------------------