├── .gitattributes ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── .husky └── pre-commit ├── .prettierignore ├── .prettierrc.mjs ├── README.md ├── assets └── icon.png ├── babel.config.js ├── components.json ├── components ├── CommandEnter.tsx ├── data-grid.tsx ├── query-input.tsx └── ui │ ├── badge.tsx │ ├── button.tsx │ ├── hover-card.tsx │ ├── input.tsx │ ├── label.tsx │ ├── sonner.tsx │ ├── switch.tsx │ └── textarea.tsx ├── hooks ├── useDuckDB.ts └── useParquetInfo.ts ├── lib ├── datasets.test.ts ├── datasets.ts └── utils.ts ├── media └── screenshot.png ├── package.json ├── pnpm-lock.yaml ├── postcss.config.js ├── services └── DuckDBClient.ts ├── src ├── content.tsx ├── explorer.tsx ├── popup.tsx └── styles.css ├── tailwind.config.js ├── tsconfig.json └── types └── parquet.ts /.gitattributes: -------------------------------------------------------------------------------- 1 | pnpm-lock.yaml binary 2 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: Node.js CI and Manual Release 2 | 3 | on: 4 | push: 5 | branches: [main] 6 | pull_request: 7 | branches: [main] 8 | workflow_dispatch: 9 | inputs: 10 | version: 11 | description: "Version to release" 12 | required: true 13 | type: string 14 | 15 | jobs: 16 | build: 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | - uses: pnpm/action-setup@v4 21 | with: 22 | version: 8 23 | - uses: actions/setup-node@v4 24 | with: 25 | node-version: 20 26 | cache: "pnpm" 27 | - run: pnpm install 28 | - run: pnpm build 29 | - run: pnpm test 30 | - run: zip -r build.zip build 31 | - uses: actions/upload-artifact@v3 32 | with: 33 | name: build 34 | path: build.zip 35 | 36 | release: 37 | needs: build 38 | if: github.event_name == 'workflow_dispatch' 39 | runs-on: ubuntu-latest 40 | steps: 41 | - uses: actions/checkout@v4 42 | - uses: actions/download-artifact@v3 43 | with: 44 | name: build 45 | - name: Create Release 46 | env: 47 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 48 | run: | 49 | gh release create v${{ github.event.inputs.version }} \ 50 | --title "Release v${{ github.event.inputs.version }}" \ 51 | --notes "Release notes for version ${{ github.event.inputs.version }}" \ 52 | build.zip 53 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 3 | 4 | # dependencies 5 | /node_modules 6 | /.pnp 7 | .pnp.js 8 | 9 | # testing 10 | /coverage 11 | 12 | #cache 13 | .turbo 14 | .next 15 | .vercel 16 | 17 | # misc 18 | .DS_Store 19 | *.pem 20 | 21 | # debug 22 | npm-debug.log* 23 | yarn-debug.log* 24 | yarn-error.log* 25 | .pnpm-debug.log* 26 | 27 | 28 | # local env files 29 | .env* 30 | 31 | out/ 32 | build/ 33 | dist/ 34 | 35 | # plasmo - https://www.plasmo.com 36 | .plasmo 37 | 38 | # bpp - http://bpp.browser.market/ 39 | keys.json 40 | 41 | # typescript 42 | .tsbuildinfo 43 | -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | . "$(dirname -- "$0")/_/husky.sh" 3 | 4 | npx lint-staged 5 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | build 2 | coverage 3 | pnpm-lock.yaml -------------------------------------------------------------------------------- /.prettierrc.mjs: -------------------------------------------------------------------------------- 1 | /** 2 | * @type {import('prettier').Options} 3 | */ 4 | export default { 5 | printWidth: 80, 6 | tabWidth: 4, 7 | useTabs: false, 8 | semi: false, 9 | singleQuote: false, 10 | trailingComma: "none", 11 | bracketSpacing: true, 12 | bracketSameLine: true, 13 | plugins: ["@ianvs/prettier-plugin-sort-imports"], 14 | importOrder: [ 15 | "", // Node.js built-in modules 16 | "", // Imports not matched by other special words or groups. 17 | "", // Empty line 18 | "^@plasmo/(.*)$", 19 | "", 20 | "^@plasmohq/(.*)$", 21 | "", 22 | "^~(.*)$", 23 | "", 24 | "^[./]" 25 | ] 26 | } 27 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # HF Data Explorer (Archived) 2 | 3 | **This feature has been archived since it is available natively in Hugging Face now. You can learn more about it [here](https://huggingface.co/blog/sql-console).** 4 | 5 | A Chrome Extension for exploring and query Hugging Face datasets with SQL. 6 | 7 | ![HF Data Explorer Screenshot](./media/screenshot.png) 8 | 9 | ## Getting Started 10 | 11 | #### Installing 12 | 13 | 1. Download the latest release from the [Releases page](https://github.com/cfahlgren1/hf-data-explorer/releases). 14 | 2. Unzip the downloaded file. 15 | 3. Open Google Chrome and navigate to `chrome://extensions/`. 16 | 4. Enable "Developer mode" by toggling the switch in the top right corner. 17 | 5. Click on "Load unpacked" button. 18 | 6. Select the unzipped folder containing the extension files. 19 | 7. The HF Data Explorer extension should now be installed and visible in your Chrome toolbar. 20 | 21 | #### Limitations 22 | 23 | DuckDB WASM can only use ~ 4GB of memory (_more like 3.4GB with duckdb memory limit_). Since DuckDB WASM can't spill extra data to disk like the other clients, it may not be able to perform very large or advanced queries. However, DuckDB WASM is still **very** fast, performant, and can work for most workloads. 24 | 25 | ## Contributing 26 | 27 | This is a [Plasmo extension](https://docs.plasmo.com/) project bootstrapped with [`plasmo init`](https://www.npmjs.com/package/plasmo). 28 | 29 | First, run the development server: 30 | 31 | ```bash 32 | pnpm dev 33 | # or 34 | npm run dev 35 | ``` 36 | 37 | Open your browser and load the appropriate development build. For example, if you are developing for the chrome browser, using manifest v3, use: `build/chrome-mv3-dev`. 38 | 39 | You can start editing the popup by modifying `popup.tsx`. It should auto-update as you make changes. To add an options page, simply add a `options.tsx` file to the root of the project, with a react component default exported. Likewise to add a content page, add a `content.ts` file to the root of the project, importing some module and do some logic, then reload the extension on your browser. 40 | 41 | For further guidance, [visit our Documentation](https://docs.plasmo.com/) 42 | 43 | ## Making production build 44 | 45 | Run the following: 46 | 47 | ```bash 48 | pnpm build 49 | # or 50 | npm run build 51 | ``` 52 | 53 | This should create a production bundle for your extension, ready to be zipped and published to the stores. 54 | -------------------------------------------------------------------------------- /assets/icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/cfahlgren1/hf-data-explorer/75a863fc9b4d5453641e597a93f284964bd4be1f/assets/icon.png -------------------------------------------------------------------------------- /babel.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | presets: [ 3 | ["@babel/preset-env", { targets: { node: "current" } }], 4 | "@babel/preset-typescript" 5 | ] 6 | } 7 | -------------------------------------------------------------------------------- /components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": false, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.js", 8 | "css": "src/styles.css", 9 | "baseColor": "slate", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /components/CommandEnter.tsx: -------------------------------------------------------------------------------- 1 | import React from "react" 2 | import { AiOutlineEnter } from "react-icons/ai" 3 | 4 | const CommandEnter = ({ color = "currentColor" }) => { 5 | return ( 6 |
7 | 16 | 17 | 18 | 19 |
20 | ) 21 | } 22 | 23 | export default CommandEnter 24 | -------------------------------------------------------------------------------- /components/data-grid.tsx: -------------------------------------------------------------------------------- 1 | import type { ColDef, GridReadyEvent, IDatasource } from "ag-grid-community" 2 | import { AgGridReact } from "ag-grid-react" 3 | import React, { useCallback, useMemo, useRef, useState } from "react" 4 | 5 | type RowData = Record 6 | 7 | interface DataGridProps { 8 | initialData: { 9 | rows: T[] 10 | columns: ColDef[] 11 | } 12 | fetchNextBatch: () => Promise<{ 13 | rows: T[] 14 | }> 15 | height?: number 16 | } 17 | 18 | export const DataGrid = React.memo( 19 | ({ initialData, fetchNextBatch }: DataGridProps) => { 20 | const gridRef = useRef(null) 21 | const [rowCount, setRowCount] = useState( 22 | initialData.rows.length 23 | ) 24 | 25 | const datasource: IDatasource = useMemo(() => { 26 | return { 27 | getRows: async (params) => { 28 | const { startRow, successCallback, failCallback } = params 29 | 30 | try { 31 | if (startRow === 0) { 32 | successCallback(initialData.rows) 33 | setRowCount(initialData.rows.length) 34 | } else { 35 | const { rows } = await fetchNextBatch() 36 | successCallback(rows, startRow + rows.length) 37 | setRowCount((prevCount) => prevCount + rows.length) 38 | } 39 | } catch (error) { 40 | failCallback() 41 | console.error("Error fetching data:", error) 42 | } 43 | } 44 | } 45 | }, [initialData, fetchNextBatch]) 46 | 47 | const onGridReady = useCallback( 48 | (params: GridReadyEvent) => { 49 | params.api.setGridOption("datasource", datasource) 50 | }, 51 | [datasource] 52 | ) 53 | 54 | /* 55 | For small results, we want to use autoHeight to show smaller grid, 56 | but not for large results as it causes performance issues. 57 | */ 58 | const isSmallResult = initialData.rows.length < 10 59 | 60 | const defaultColDef = useMemo(() => { 61 | return { 62 | flex: 1, 63 | minWidth: 100, 64 | sortable: false, 65 | filter: false 66 | } 67 | }, []) 68 | 69 | return ( 70 |
71 |
73 | 86 |
87 |

88 | Showing {rowCount} rows 89 |

90 |
91 | ) 92 | } 93 | ) 94 | -------------------------------------------------------------------------------- /components/query-input.tsx: -------------------------------------------------------------------------------- 1 | import { Button } from "@/components/ui/button" 2 | import type { ParquetInfo } from "@/types/parquet" 3 | import { ReloadIcon } from "@radix-ui/react-icons" 4 | import React, { useState } from "react" 5 | import { Controlled as CodeMirror } from "react-codemirror2" 6 | 7 | import "codemirror/mode/sql/sql" 8 | 9 | import CommandEnter from "./CommandEnter" 10 | import { Badge } from "./ui/badge" 11 | 12 | interface QueryInputProps { 13 | onRunQuery: (query: string) => void 14 | isRunning: boolean 15 | isLoading: boolean 16 | onCancelQuery: () => void 17 | isCancelling: boolean 18 | views: ParquetInfo[] 19 | } 20 | 21 | const QueryInput: React.FC = React.memo( 22 | ({ 23 | onRunQuery, 24 | isRunning, 25 | views, 26 | isLoading, 27 | onCancelQuery, 28 | isCancelling 29 | }) => { 30 | const [query, setQuery] = useState("") 31 | 32 | const handleChange = React.useCallback( 33 | (editor: any, data: any, value: string) => { 34 | setQuery(value) 35 | }, 36 | [] 37 | ) 38 | // helpful auto-fill for preview query 39 | const handleTableClick = (tableName: string) => { 40 | const newQuery = `SELECT * FROM ${tableName} LIMIT 500` 41 | setQuery(newQuery) 42 | } 43 | 44 | // allow horizontal scrolling with mouse 45 | const handleWheel = (event: React.WheelEvent) => { 46 | const container = event.currentTarget 47 | const scrollAmount = event.deltaY 48 | 49 | requestAnimationFrame(() => { 50 | container.scrollLeft += scrollAmount 51 | }) 52 | 53 | event.preventDefault() 54 | } 55 | 56 | return ( 57 | <> 58 | { 66 | onRunQuery(cm.getValue()) 67 | }, 68 | "Ctrl-Enter": (cm: any) => { 69 | onRunQuery(cm.getValue()) 70 | } 71 | } 72 | }} 73 | onBeforeChange={handleChange} 74 | className="w-full max-h-32 text-sm resize-none overflow-auto mb-3" 75 | /> 76 | {views.length > 0 && ( 77 |
78 | 81 |
93 |
94 | {views.map((view) => ( 95 | 100 | handleTableClick(view.view_name) 101 | }> 102 | {view.name} 103 | 104 | ))} 105 |
106 |
107 |
108 | )} 109 | {isRunning ? ( 110 | 118 | ) : isLoading ? ( 119 | 123 | ) : ( 124 | 131 | )} 132 | 133 | ) 134 | } 135 | ) 136 | 137 | QueryInput.displayName = "QueryInput" 138 | export default QueryInput 139 | -------------------------------------------------------------------------------- /components/ui/badge.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import { cva, type VariantProps } from "class-variance-authority" 3 | import * as React from "react" 4 | 5 | const badgeVariants = cva( 6 | "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2", 7 | { 8 | variants: { 9 | variant: { 10 | default: 11 | "border-transparent bg-primary text-primary-foreground hover:bg-primary/80", 12 | secondary: 13 | "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80", 14 | destructive: 15 | "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80", 16 | outline: "text-foreground" 17 | } 18 | }, 19 | defaultVariants: { 20 | variant: "default" 21 | } 22 | } 23 | ) 24 | 25 | export interface BadgeProps 26 | extends React.HTMLAttributes, 27 | VariantProps {} 28 | 29 | function Badge({ className, variant, ...props }: BadgeProps) { 30 | return ( 31 |
32 | ) 33 | } 34 | 35 | export { Badge, badgeVariants } 36 | -------------------------------------------------------------------------------- /components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import { Slot } from "@radix-ui/react-slot" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | import * as React from "react" 5 | 6 | const buttonVariants = cva( 7 | "inline-flex items-center justify-center whitespace-nowrap rounded-md text-sm font-medium ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50", 8 | { 9 | variants: { 10 | variant: { 11 | default: 12 | "bg-primary text-primary-foreground hover:bg-primary/90", 13 | destructive: 14 | "bg-destructive text-destructive-foreground hover:bg-destructive/90", 15 | outline: 16 | "border border-input bg-background hover:bg-accent hover:text-accent-foreground", 17 | secondary: 18 | "bg-secondary text-secondary-foreground hover:bg-secondary/80", 19 | ghost: "hover:bg-accent hover:text-accent-foreground", 20 | link: "text-primary underline-offset-4 hover:underline" 21 | }, 22 | size: { 23 | default: "h-10 px-4 py-2", 24 | sm: "h-9 rounded-md px-3", 25 | lg: "h-11 rounded-md px-8", 26 | icon: "h-10 w-10" 27 | } 28 | }, 29 | defaultVariants: { 30 | variant: "default", 31 | size: "default" 32 | } 33 | } 34 | ) 35 | 36 | export interface ButtonProps 37 | extends React.ButtonHTMLAttributes, 38 | VariantProps { 39 | asChild?: boolean 40 | } 41 | 42 | const Button = React.forwardRef( 43 | ({ className, variant, size, asChild = false, ...props }, ref) => { 44 | const Comp = asChild ? Slot : "button" 45 | return ( 46 | 51 | ) 52 | } 53 | ) 54 | Button.displayName = "Button" 55 | 56 | export { Button, buttonVariants } 57 | -------------------------------------------------------------------------------- /components/ui/hover-card.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import * as HoverCardPrimitive from "@radix-ui/react-hover-card" 3 | import * as React from "react" 4 | 5 | const HoverCard = HoverCardPrimitive.Root 6 | 7 | const HoverCardTrigger = HoverCardPrimitive.Trigger 8 | 9 | const HoverCardContent = React.forwardRef< 10 | React.ElementRef, 11 | React.ComponentPropsWithoutRef 12 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => ( 13 | 23 | )) 24 | HoverCardContent.displayName = HoverCardPrimitive.Content.displayName 25 | 26 | export { HoverCard, HoverCardTrigger, HoverCardContent } 27 | -------------------------------------------------------------------------------- /components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import * as React from "react" 3 | 4 | export interface InputProps 5 | extends React.InputHTMLAttributes {} 6 | 7 | const Input = React.forwardRef( 8 | ({ className, type, ...props }, ref) => { 9 | return ( 10 | 19 | ) 20 | } 21 | ) 22 | Input.displayName = "Input" 23 | 24 | export { Input } 25 | -------------------------------------------------------------------------------- /components/ui/label.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import * as LabelPrimitive from "@radix-ui/react-label" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | import * as React from "react" 5 | 6 | const labelVariants = cva( 7 | "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70" 8 | ) 9 | 10 | const Label = React.forwardRef< 11 | React.ElementRef, 12 | React.ComponentPropsWithoutRef & 13 | VariantProps 14 | >(({ className, ...props }, ref) => ( 15 | 20 | )) 21 | Label.displayName = LabelPrimitive.Root.displayName 22 | 23 | export { Label } 24 | -------------------------------------------------------------------------------- /components/ui/sonner.tsx: -------------------------------------------------------------------------------- 1 | import { useTheme } from "next-themes" 2 | import { Toaster as Sonner } from "sonner" 3 | 4 | type ToasterProps = React.ComponentProps 5 | 6 | const Toaster = ({ ...props }: ToasterProps) => { 7 | const { theme = "system" } = useTheme() 8 | 9 | return ( 10 | 25 | ) 26 | } 27 | 28 | export { Toaster } 29 | -------------------------------------------------------------------------------- /components/ui/switch.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import * as SwitchPrimitives from "@radix-ui/react-switch" 3 | import * as React from "react" 4 | 5 | const Switch = React.forwardRef< 6 | React.ElementRef, 7 | React.ComponentPropsWithoutRef 8 | >(({ className, ...props }, ref) => ( 9 | 16 | 21 | 22 | )) 23 | Switch.displayName = SwitchPrimitives.Root.displayName 24 | 25 | export { Switch } 26 | -------------------------------------------------------------------------------- /components/ui/textarea.tsx: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils" 2 | import * as React from "react" 3 | 4 | export interface TextareaProps 5 | extends React.TextareaHTMLAttributes {} 6 | 7 | const Textarea = React.forwardRef( 8 | ({ className, ...props }, ref) => { 9 | return ( 10 |