├── .claudeignore
├── media
    ├── homepage.png
    ├── dataset_editor.png
    └── merged_dataset_created.png
├── src
    └── lerobot_data_studio
    │   ├── backend
    │       ├── __init__.py
    │       ├── utils.py
    │       ├── models.py
    │       ├── state_store.py
    │       ├── background_tasks.py
    │       └── main.py
    │   ├── __init__.py
    │   └── frontend
    │       ├── tsconfig.node.json
    │       ├── index.html
    │       ├── vitest.config.ts
    │       ├── vite.config.ts
    │       ├── src
    │           ├── App.tsx
    │           ├── utils
    │           │   └── createDataset.ts
    │           ├── main.tsx
    │           ├── components
    │           │   ├── LoadingIndicator.tsx
    │           │   ├── EpisodeIndexDisplay.tsx
    │           │   ├── DatasetCompletionModal.tsx
    │           │   ├── EpisodeNavigation.tsx
    │           │   ├── DataChart.tsx
    │           │   ├── EpisodeSidebar.tsx
    │           │   ├── HomePage.tsx
    │           │   ├── VideoPlayer.tsx
    │           │   └── DatasetViewer.tsx
    │           ├── types
    │           │   └── index.ts
    │           ├── hooks
    │           │   ├── useVideoPreloader.ts
    │           │   └── useSelectedEpisodes.ts
    │           ├── index.css
    │           └── services
    │           │   └── api.ts
    │       ├── tsconfig.json
    │       ├── tests
    │           ├── createDatasetUtil.test.ts
    │           └── setup.ts
    │       └── package.json
├── scripts
    └── lint.sh
├── .github
    └── workflows
    │   └── test.yml
├── pytest.ini
├── LICENSE
├── run_dev.sh
├── mypy.ini
├── .gitignore
├── README.md
└── pyproject.toml


/.claudeignore:
--------------------------------------------------------------------------------
1 | .git


--------------------------------------------------------------------------------
/media/homepage.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackvial/lerobot-data-studio/HEAD/media/homepage.png


--------------------------------------------------------------------------------
/media/dataset_editor.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackvial/lerobot-data-studio/HEAD/media/dataset_editor.png


--------------------------------------------------------------------------------
/src/lerobot_data_studio/backend/__init__.py:
--------------------------------------------------------------------------------
1 | """LeRobot Data Studio Backend Package"""
2 | __version__ = "0.1.0"
3 | 


--------------------------------------------------------------------------------
/media/merged_dataset_created.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jackvial/lerobot-data-studio/HEAD/media/merged_dataset_created.png


--------------------------------------------------------------------------------
/src/lerobot_data_studio/__init__.py:
--------------------------------------------------------------------------------
1 | """LeRobot Data Studio - LeRobot Data Studio - Unofficial LeRobot Dataset Editor"""
2 | 
3 | __version__ = "0.1.0"
4 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "composite": true,
 4 |     "skipLibCheck": true,
 5 |     "module": "ESNext",
 6 |     "moduleResolution": "bundler",
 7 |     "allowSyntheticDefaultImports": true
 8 |   },
 9 |   "include": ["vite.config.ts"]
10 | } 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 
 4 | <head>
 5 |     <meta charset="UTF-8" />
 6 |     <link rel="icon" type="image/svg+xml" href="/vite.svg" />
 7 |     <meta name="viewport" content="width=device-width, initial-scale=1.0" />
 8 |     <title>LeRobot Data Studio - Unofficial LeRobot Dataset Editor</title>
 9 | </head>
10 | 
11 | <body>
12 |     <div id="root"></div>
13 |     <script type="module" src="/src/main.tsx"></script>
14 | </body>
15 | 
16 | </html>


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/vitest.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vitest/config'
 2 | import react from '@vitejs/plugin-react'
 3 | import path from 'path'
 4 | 
 5 | export default defineConfig({
 6 |   plugins: [react()],
 7 |   test: {
 8 |     environment: 'jsdom',
 9 |     setupFiles: ['./tests/setup.ts'],
10 |     include: ['tests/**/*.{test,spec}.{js,mjs,cjs,ts,mts,cts,jsx,tsx}'],
11 |     globals: true,
12 |   },
13 |   resolve: {
14 |     alias: {
15 |       '@': path.resolve(__dirname, './src'),
16 |     },
17 |   },
18 | }) 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import { defineConfig } from 'vite'
 2 | import react from '@vitejs/plugin-react'
 3 | import path from 'path'
 4 | 
 5 | // https://vitejs.dev/config/
 6 | export default defineConfig({
 7 |   plugins: [react()],
 8 |   resolve: {
 9 |     alias: {
10 |       '@': path.resolve(__dirname, './src'),
11 |     },
12 |   },
13 |   server: {
14 |     port: 3000,
15 |     host: true,  // Allow connections from outside localhost
16 |     proxy: {
17 |       '/api': {
18 |         target: 'http://localhost:8000',
19 |         changeOrigin: true,
20 |       },
21 |     },
22 |   },
23 | }) 


--------------------------------------------------------------------------------
/scripts/lint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | # Lint script for running Ruff checks
 3 | 
 4 | set -e
 5 | 
 6 | echo "🔍 Running Ruff linter..."
 7 | 
 8 | # Default to checking src directory
 9 | TARGET="${1:-./src}"
10 | 
11 | # Check for private member access violations
12 | echo "Checking private member access (SLF001)..."
13 | uv run ruff check "$TARGET" --select SLF || true
14 | 
15 | # Run full check
16 | echo -e "\nRunning full Ruff check..."
17 | uv run ruff check "$TARGET"
18 | 
19 | # Show statistics
20 | echo -e "\nIssue summary:"
21 | uv run ruff check "$TARGET" --statistics 2>/dev/null | head -10
22 | 
23 | echo -e "\n💡 To auto-fix issues, run: uv run ruff check $TARGET --fix"
24 | echo "💡 To format code, run: uv run ruff format $TARGET"


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/App.tsx:
--------------------------------------------------------------------------------
 1 | import { Routes, Route } from 'react-router-dom';
 2 | import { Layout } from 'antd';
 3 | import HomePage from './components/HomePage';
 4 | import DatasetViewer from './components/DatasetViewer';
 5 | 
 6 | const { Content } = Layout;
 7 | 
 8 | function App() {
 9 |   return (
10 |     <Layout style={{ minHeight: '100vh' }}>
11 |       <Content>
12 |         <Routes>
13 |           <Route path="/" element={<HomePage />} />
14 |           <Route path="/:namespace/:name" element={<DatasetViewer />} />
15 |           <Route path="/:namespace/:name/episode/:episodeId" element={<DatasetViewer />} />
16 |         </Routes>
17 |       </Content>
18 |     </Layout>
19 |   );
20 | }
21 | 
22 | export default App; 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "ES2020",
 4 |     "useDefineForClassFields": true,
 5 |     "lib": ["ES2020", "DOM", "DOM.Iterable"],
 6 |     "module": "ESNext",
 7 |     "skipLibCheck": true,
 8 | 
 9 |     /* Bundler mode */
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "resolveJsonModule": true,
13 |     "isolatedModules": true,
14 |     "noEmit": true,
15 |     "jsx": "react-jsx",
16 | 
17 |     /* Linting */
18 |     "strict": true,
19 |     "noUnusedLocals": true,
20 |     "noUnusedParameters": true,
21 |     "noFallthroughCasesInSwitch": true,
22 | 
23 |     /* Path aliases */
24 |     "baseUrl": ".",
25 |     "paths": {
26 |       "@/*": ["src/*"]
27 |     }
28 |   },
29 |   "include": ["src"],
30 |   "references": [{ "path": "./tsconfig.node.json" }]
31 | } 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches: [ main ]
 6 |     types: [opened, synchronize, reopened]
 7 | 
 8 | jobs:
 9 |   test:
10 |     runs-on: ubuntu-latest
11 |     
12 |     steps:
13 |     - uses: actions/checkout@v4
14 |    
15 |     - name: Install nvm and Node.js 24
16 |       run: |
17 |         curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
18 |         export NVM_DIR="$HOME/.nvm"
19 |         [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh"
20 |         nvm install 24
21 |         nvm use 24
22 |         echo "$NVM_DIR/versions/node/$(nvm version)/bin" >> $GITHUB_PATH
23 |     
24 |     - name: Install frontend dependencies
25 |       run: |
26 |         cd  src/lerobot_data_studio/frontend
27 |         npm install
28 |     
29 |     - name: Run frontend tests
30 |       run: |
31 |         cd src/lerobot_data_studio/frontend
32 |         npm run test


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
 1 | [tool:pytest]
 2 | testpaths = tests
 3 | python_files = test_*.py
 4 | python_classes = Test*
 5 | python_functions = test_*
 6 | addopts = 
 7 |     -v
 8 |     --tb=short
 9 |     --strict-markers
10 |     --disable-warnings
11 |     --color=yes
12 |     --durations=10
13 | markers =
14 |     requires_internet: marks tests as requiring internet connection
15 |     slow: marks tests as slow (deselect with '-m "not slow"')
16 |     integration: marks tests as integration tests
17 | filterwarnings =
18 |     ignore::DeprecationWarning
19 |     ignore::PendingDeprecationWarning
20 |     ignore::FutureWarning
21 |     ignore::UserWarning:torch.*
22 |     ignore::UserWarning:torchvision.*
23 |     ignore::UserWarning:transformers.*
24 |     ignore::UserWarning:huggingface_hub.*
25 | log_cli = true
26 | log_cli_level = INFO
27 | log_cli_format = %(asctime)s [%(levelname)8s] %(name)s: %(message)s
28 | log_cli_date_format = %Y-%m-%d %H:%M:%S 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/utils/createDataset.ts:
--------------------------------------------------------------------------------
 1 | export interface CreateDatasetParams {
 2 |   datasetId: string;
 3 |   newRepoId: string;
 4 |   selectedEpisodes: number[];
 5 | }
 6 | 
 7 | /**
 8 |  * Build the CreateDatasetRequest payload for the backend.
 9 |  *
10 |  */
11 | export function createDatasetRequest({
12 |   datasetId,
13 |   newRepoId,
14 |   selectedEpisodes,
15 | }: CreateDatasetParams) {
16 |     
17 |   // Validate inputs
18 |   if (
19 |     !datasetId ||
20 |     !newRepoId ||
21 |     !selectedEpisodes ||
22 |     selectedEpisodes.length === 0
23 |   ) {
24 |     throw new Error(
25 |       `Invalid parameters: datasetId=${datasetId}, newRepoId=${newRepoId}, selectedEpisodes=${
26 |         selectedEpisodes?.length || 0
27 |       }`
28 |     );
29 |   }
30 | 
31 | 
32 |   const payload = {
33 |     original_repo_id: datasetId,
34 |     new_repo_id: newRepoId,
35 |     selected_episodes: selectedEpisodes,
36 |   };
37 | 
38 |   return payload;
39 | }
40 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/tests/createDatasetUtil.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, it, expect } from 'vitest';
 2 | import { createDatasetRequest } from '../src/utils/createDataset';
 3 | 
 4 | describe('createDatasetRequest', () => {
 5 |   it('should build payload with episode and available tasks', async () => {
 6 |     const datasetId = 'namespace/dataset';
 7 |     const newRepoId = 'namespace/new-dataset';
 8 |     const selectedEpisodes = [0, 1, 2];
 9 | 
10 |     const payload = createDatasetRequest({
11 |       datasetId,
12 |       newRepoId,
13 |       selectedEpisodes,
14 |     });
15 | 
16 |     expect(payload.original_repo_id).toBe(datasetId);
17 |     expect(payload.new_repo_id).toBe(newRepoId);
18 |     expect(payload.selected_episodes).toEqual(selectedEpisodes);
19 |   });
20 | 
21 |   it('should handle single episode selection', async () => {
22 |     const payload = createDatasetRequest({
23 |       datasetId: 'namespace/dataset',
24 |       newRepoId: 'namespace/new-dataset',
25 |       selectedEpisodes: [5],
26 |     });
27 | 
28 |     expect(payload.selected_episodes).toEqual([5]);
29 |   });
30 | }); 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 Jack Vial
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/main.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import ReactDOM from 'react-dom/client';
 3 | import { BrowserRouter } from 'react-router-dom';
 4 | import { QueryClient, QueryClientProvider } from '@tanstack/react-query';
 5 | import { ConfigProvider, theme } from 'antd';
 6 | import App from './App';
 7 | import './index.css';
 8 | 
 9 | const queryClient = new QueryClient({
10 |   defaultOptions: {
11 |     queries: {
12 |       staleTime: 5 * 60 * 1000, // 5 minutes
13 |       gcTime: 10 * 60 * 1000, // 10 minutes
14 |       retry: 1,
15 |     },
16 |   },
17 | });
18 | 
19 | ReactDOM.createRoot(document.getElementById('root')!).render(
20 |   <React.StrictMode>
21 |     <BrowserRouter>
22 |       <QueryClientProvider client={queryClient}>
23 |         <ConfigProvider
24 |           theme={{
25 |             algorithm: theme.darkAlgorithm,
26 |             token: {
27 |               colorPrimary: '#1890ff',
28 |               colorBgContainer: '#1f1f1f',
29 |             },
30 |           }}
31 |         >
32 |           <App />
33 |         </ConfigProvider>
34 |       </QueryClientProvider>
35 |     </BrowserRouter>
36 |   </React.StrictMode>
37 | ); 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/tests/setup.ts:
--------------------------------------------------------------------------------
 1 | // Vitest setup file for React Testing Library and additional configuration
 2 | import { vi } from 'vitest';
 3 | 
 4 | // Mock global objects that might be used in the codebase
 5 | global.ResizeObserver = vi.fn().mockImplementation(() => ({
 6 |   observe: vi.fn(),
 7 |   unobserve: vi.fn(),
 8 |   disconnect: vi.fn(),
 9 | }));
10 | 
11 | // Mock matchMedia
12 | Object.defineProperty(window, 'matchMedia', {
13 |   writable: true,
14 |   value: vi.fn().mockImplementation(query => ({
15 |     matches: false,
16 |     media: query,
17 |     onchange: null,
18 |     addListener: vi.fn(), // deprecated
19 |     removeListener: vi.fn(), // deprecated
20 |     addEventListener: vi.fn(),
21 |     removeEventListener: vi.fn(),
22 |     dispatchEvent: vi.fn(),
23 |   })),
24 | });
25 | 
26 | // Mock localStorage
27 | const localStorageMock = {
28 |   getItem: vi.fn(),
29 |   setItem: vi.fn(),
30 |   removeItem: vi.fn(),
31 |   clear: vi.fn(),
32 | };
33 | Object.defineProperty(window, 'localStorage', {
34 |   value: localStorageMock
35 | });
36 | 
37 | // Mock sessionStorage
38 | Object.defineProperty(window, 'sessionStorage', {
39 |   value: localStorageMock
40 | }); 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/LoadingIndicator.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Spin, Progress, Typography } from 'antd';
 3 | import { LoadingOutlined } from '@ant-design/icons';
 4 | 
 5 | const { Title } = Typography;
 6 | 
 7 | interface LoadingIndicatorProps {
 8 |   message?: string;
 9 |   progress?: number;
10 | }
11 | 
12 | const LoadingIndicator: React.FC<LoadingIndicatorProps> = ({
13 |   message,
14 |   progress,
15 | }) => {
16 |   const antIcon = <LoadingOutlined style={{ fontSize: 48 }} spin />;
17 | 
18 |   return (
19 |     <div className='loading-overlay'>
20 |       <div style={{ textAlign: 'center' }}>
21 |         <Spin indicator={antIcon} />
22 |         <Title level={4} style={{ marginTop: '24px', color: '#fff' }}>
23 |           {message || 'Loading...'}
24 |         </Title>
25 |         {progress !== undefined && progress > 0 && progress < 1 && (
26 |           <Progress
27 |             percent={Math.round(progress * 100)}
28 |             strokeColor='#1890ff'
29 |             style={{ maxWidth: '300px', margin: '16px auto' }}
30 |           />
31 |         )}
32 |       </div>
33 |     </div>
34 |   );
35 | };
36 | 
37 | export default LoadingIndicator;
38 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/EpisodeIndexDisplay.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Typography } from 'antd';
 3 | 
 4 | const { Text } = Typography;
 5 | 
 6 | interface EpisodeIndexDisplayProps {
 7 |   currentEpisodeId: number;
 8 |   actualEpisodeIndex?: number | null;
 9 | }
10 | 
11 | /**
12 |  * Visually sanity check indices
13 |  */
14 | const EpisodeIndexDisplay: React.FC<EpisodeIndexDisplayProps> = ({
15 |   currentEpisodeId,
16 |   actualEpisodeIndex,
17 | }) => {
18 |   const hasIndexMismatch =
19 |     actualEpisodeIndex !== null && actualEpisodeIndex !== currentEpisodeId;
20 | 
21 |   return (
22 |     <Text
23 |       type={hasIndexMismatch ? 'warning' : 'secondary'}
24 |       style={{
25 |         fontSize: '12px',
26 |         whiteSpace: 'nowrap',
27 |         color: hasIndexMismatch ? '#faad14' : undefined,
28 |       }}
29 |     >
30 |       {actualEpisodeIndex !== null ? (
31 |         <>
32 |           selected_episode_index={currentEpisodeId}, row_episode_index=
33 |           {actualEpisodeIndex}
34 |           {hasIndexMismatch && ' ⚠️'}
35 |         </>
36 |       ) : (
37 |         <>selected_episode_index={currentEpisodeId}, content=loading...</>
38 |       )}
39 |     </Text>
40 |   );
41 | };
42 | 
43 | export default EpisodeIndexDisplay;
44 | 


--------------------------------------------------------------------------------
/run_dev.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Exit on error
 4 | set -e
 5 | 
 6 | # Skip Git LFS downloads (we don't need test artifacts)
 7 | export GIT_LFS_SKIP_SMUDGE=1
 8 | 
 9 | # Colors for output
10 | GREEN='\033[0;32m'
11 | BLUE='\033[0;34m'
12 | RED='\033[0;31m'
13 | NC='\033[0m' # No Color
14 | 
15 | echo -e "${BLUE}Starting LeRobot Data Studio...${NC}"
16 | 
17 | # Function to cleanup on exit
18 | cleanup() {
19 |     echo -e "\n${BLUE}Shutting down servers...${NC}"
20 |     # Kill all child processes
21 |     pkill -P $$ || true
22 |     exit
23 | }
24 | 
25 | # Set up trap to cleanup on Ctrl+C
26 | trap cleanup INT TERM
27 | 
28 | # Start backend server
29 | echo -e "${GREEN}Starting backend server...${NC}"
30 | uv run uvicorn lerobot_data_studio.backend.main:app --reload --host 0.0.0.0 --port 8000 &
31 | BACKEND_PID=$!
32 | 
33 | # Wait a bit for backend to start
34 | sleep 2
35 | 
36 | # Start frontend server
37 | echo -e "${GREEN}Starting frontend server...${NC}"
38 | cd src/lerobot_data_studio/frontend
39 | npm run build && npm run dev &
40 | FRONTEND_PID=$!
41 | cd ../../..
42 | 
43 | echo -e "${GREEN}LeRobot Data Studio is running!${NC}"
44 | echo -e "${BLUE}Backend API: http://localhost:8000${NC}"
45 | echo -e "${BLUE}Frontend UI: http://localhost:3000${NC}"
46 | echo -e "${RED}Press Ctrl+C to stop all servers${NC}"
47 | 
48 | # Wait for both processes
49 | wait $BACKEND_PID $FRONTEND_PID 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | python_version = 3.10
 3 | warn_return_any = True
 4 | warn_unused_configs = True
 5 | disallow_untyped_defs = True
 6 | disallow_any_unimported = False
 7 | no_implicit_optional = True
 8 | check_untyped_defs = True
 9 | warn_redundant_casts = True
10 | warn_unused_ignores = True
11 | warn_no_return = True
12 | warn_unreachable = True
13 | strict_equality = True
14 | 
15 | # Allow gradual typing - start with less strict rules
16 | allow_untyped_calls = True
17 | allow_untyped_decorators = True
18 | allow_incomplete_defs = True
19 | 
20 | # Ignore missing imports for packages without stubs
21 | [mypy-lerobot.*]
22 | ignore_missing_imports = True
23 | 
24 | [mypy-huggingface_hub.*]
25 | ignore_missing_imports = True
26 | 
27 | [mypy-datasets.*]
28 | ignore_missing_imports = True
29 | 
30 | [mypy-uvicorn.*]
31 | ignore_missing_imports = True
32 | 
33 | [mypy-fastapi.*]
34 | ignore_missing_imports = True
35 | 
36 | [mypy-pydantic.*]
37 | ignore_missing_imports = True
38 | 
39 | [mypy-numpy.*]
40 | ignore_missing_imports = True
41 | 
42 | [mypy-pandas.*]
43 | ignore_missing_imports = True
44 | 
45 | [mypy-psutil.*]
46 | ignore_missing_imports = True
47 | 
48 | [mypy-requests.*]
49 | ignore_missing_imports = True
50 | 
51 | [mypy-cv2.*]
52 | ignore_missing_imports = True
53 | 
54 | [mypy-imageio.*]
55 | ignore_missing_imports = True
56 | 
57 | [mypy-h5py.*]
58 | ignore_missing_imports = True
59 | 
60 | [mypy-zarr.*]
61 | ignore_missing_imports = True


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/types/index.ts:
--------------------------------------------------------------------------------
 1 | export interface DatasetInfo {
 2 |   repo_id: string;
 3 |   num_samples: number;
 4 |   num_episodes: number;
 5 |   fps: number;
 6 |   version?: string;
 7 | }
 8 | 
 9 | export interface VideoInfo {
10 |   url: string;
11 |   filename: string;
12 |   language_instruction?: string[];
13 | }
14 | 
15 | export interface EpisodeData {
16 |   episode_id: number;
17 |   dataset_info: DatasetInfo;
18 |   videos_info: VideoInfo[];
19 |   episode_data: Record<string, number[]>[];
20 |   feature_names: string[];
21 |   tasks: string[];
22 |   actual_episode_index?: number | null;
23 | }
24 | 
25 | export interface DatasetListResponse {
26 |   featured_datasets: string[];
27 |   lerobot_datasets: string[];
28 | }
29 | 
30 | export interface CreateDatasetRequest {
31 |   original_repo_id: string;
32 |   new_repo_id: string;
33 |   selected_episodes: number[];
34 |   episode_index_task_map?: Record<number, string>;
35 |   ui_custom_task_list?: string[];
36 | }
37 | 
38 | export interface CreateDatasetResponse {
39 |   success: boolean;
40 |   new_repo_id: string;
41 |   message: string;
42 |   task_id?: string;
43 | }
44 | 
45 | export interface CreateTaskStatus {
46 |   task_id: string;
47 |   status: 'pending' | 'running' | 'completed' | 'failed';
48 |   progress?: number;
49 |   message?: string;
50 |   new_repo_id?: string;
51 | }
52 | 
53 | export interface DatasetLoadingStatus {
54 |   status: 'loading' | 'ready' | 'error' | 'not_loaded';
55 |   progress?: number;
56 |   message?: string;
57 | }
58 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "lerobot-data-studio",
 3 |   "version": "0.1.0",
 4 |   "description": "LeRobot Data Studio - Unofficial LeRobot Dataset Editor",
 5 |   "private": true,
 6 |   "type": "module",
 7 |   "scripts": {
 8 |     "dev": "vite",
 9 |     "build": "tsc && vite build",
10 |     "preview": "vite preview",
11 |     "type-check": "tsc --noEmit",
12 |     "test": "vitest run",
13 |     "test:run": "vitest run",
14 |     "test:watch": "vitest",
15 |     "test:coverage": "vitest run --coverage"
16 |   },
17 |   "keywords": [
18 |     "lerobot",
19 |     "dataset",
20 |     "editor",
21 |     "unofficial"
22 |   ],
23 |   "author": "Jack Vial <vialjack@gmail.com>",
24 |   "license": "MIT",
25 |   "dependencies": {
26 |     "@ant-design/icons": "^5.2.6",
27 |     "@tanstack/react-query": "^5.12.0",
28 |     "antd": "^5.11.5",
29 |     "axios": "^1.6.2",
30 |     "date-fns": "^2.30.0",
31 |     "dygraphs": "^2.2.1",
32 |     "react": "^19.1.0",
33 |     "react-dom": "^19.1.0",
34 |     "react-player": "^2.13.0",
35 |     "react-router-dom": "^6.20.0"
36 |   },
37 |   "devDependencies": {
38 |     "@types/dygraphs": "^2.1.10",
39 |     "@types/react": "^19.0.2",
40 |     "@types/react-dom": "^19.0.2",
41 |     "@typescript-eslint/eslint-plugin": "^6.13.2",
42 |     "@typescript-eslint/parser": "^6.13.2",
43 |     "@vitejs/plugin-react": "^4.2.0",
44 |     "@vitest/coverage-v8": "^1.0.0",
45 |     "eslint": "^8.55.0",
46 |     "eslint-plugin-react": "^7.33.2",
47 |     "eslint-plugin-react-hooks": "^4.6.0",
48 |     "jsdom": "^23.0.0",
49 |     "typescript": "^5.3.2",
50 |     "vite": "^5.0.6",
51 |     "vitest": "^1.0.0"
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/backend/utils.py:
--------------------------------------------------------------------------------
 1 | """Utility functions for the backend."""
 2 | 
 3 | import logging
 4 | 
 5 | from lerobot.datasets.lerobot_dataset import LeRobotDataset
 6 | 
 7 | from .models import EpisodeDataItem
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | def get_episode_data(dataset: LeRobotDataset, episode_index: int):
13 |     """Extract episode data for display in the UI.
14 | 
15 |     Args:
16 |         dataset: The LeRobotDataset to extract data from
17 |         episode_index: The episode index to extract
18 | 
19 |     Returns:
20 |         Tuple of (episode_data_items, feature_names)
21 |     """
22 |     # Get episode boundaries from meta.episodes
23 |     episode_info = dataset.meta.episodes[episode_index]
24 |     from_idx = episode_info["dataset_from_index"]
25 |     to_idx = episode_info["dataset_to_index"]
26 |     data = dataset.hf_dataset.select(range(from_idx, to_idx)).select_columns(
27 |         ["episode_index", "action", "observation.state", "timestamp"]
28 |     )
29 | 
30 |     episode_data_items = []
31 |     for sample in data:
32 |         # Round action and observation values to 2 decimal places
33 |         action_values = (
34 |             sample["action"].tolist() if hasattr(sample["action"], "tolist") else list(sample["action"])
35 |         )
36 |         action_rounded = [round(val, 2) for val in action_values]
37 | 
38 |         observation_values = (
39 |             sample["observation.state"].tolist()
40 |             if hasattr(sample["observation.state"], "tolist")
41 |             else list(sample["observation.state"])
42 |         )
43 |         observation_rounded = [round(val, 2) for val in observation_values]
44 | 
45 |         episode_data_items.append(
46 |             EpisodeDataItem(
47 |                 episode_index=sample["episode_index"],
48 |                 action=action_rounded,
49 |                 observation=observation_rounded,
50 |                 timestamp=round(float(sample["timestamp"]), 2),
51 |             )
52 |         )
53 | 
54 |     return episode_data_items, dataset.features["observation.state"]["names"]
55 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/backend/models.py:
--------------------------------------------------------------------------------
 1 | from typing import Dict, List, Optional
 2 | 
 3 | from pydantic import BaseModel, Field
 4 | 
 5 | 
 6 | class DatasetInfo(BaseModel):
 7 |     repo_id: str
 8 |     num_samples: int
 9 |     num_episodes: int
10 |     fps: int
11 |     version: Optional[str] = None
12 | 
13 | 
14 | class VideoInfo(BaseModel):
15 |     url: str
16 |     filename: str
17 |     language_instruction: Optional[List[str]] = None
18 | 
19 | 
20 | class EpisodeDataItem(BaseModel):
21 |     episode_index: int
22 |     action: List[float]
23 |     observation: List[float]
24 |     timestamp: float
25 | 
26 | 
27 | class EpisodeData(BaseModel):
28 |     episode_id: int
29 |     dataset_info: DatasetInfo
30 |     videos_info: List[VideoInfo]
31 |     episode_data: List[EpisodeDataItem]
32 |     feature_names: List[str]
33 |     actual_episode_index: Optional[int] = None
34 |     tasks: List[str]
35 | 
36 | 
37 | class DatasetListResponse(BaseModel):
38 |     featured_datasets: List[str]
39 |     lerobot_datasets: List[str]
40 | 
41 | 
42 | class CreateDatasetRequest(BaseModel):
43 |     original_repo_id: str
44 |     new_repo_id: str
45 |     selected_episodes: List[int] = Field(..., min_length=1)
46 | 
47 |     # Episode ID -> Task name
48 |     episode_index_task_map: Optional[Dict[int, str]] = None
49 | 
50 | 
51 | class CreateDatasetResponse(BaseModel):
52 |     success: bool
53 |     new_repo_id: str
54 |     message: str
55 |     task_id: Optional[str] = None
56 | 
57 | 
58 | class DatasetLoadingStatus(BaseModel):
59 |     status: Optional[str] = None
60 |     progress: Optional[float] = None
61 |     message: Optional[str] = None
62 |     memory_usage_mb: Optional[float] = None
63 | 
64 | 
65 | class DatasetSearchResponse(BaseModel):
66 |     repo_ids: List[str]
67 | 
68 | 
69 | class DatasetValidationResponse(BaseModel):
70 |     exists: bool
71 |     message: Optional[str] = None
72 | 
73 | 
74 | class CreateTaskStatus(BaseModel):
75 |     task_id: Optional[str] = None
76 |     status: Optional[str] = None
77 |     progress: Optional[float] = None
78 |     message: Optional[str] = None
79 |     new_repo_id: Optional[str] = None
80 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/hooks/useVideoPreloader.ts:
--------------------------------------------------------------------------------
 1 | import { useEffect, useRef } from 'react';
 2 | 
 3 | export const useVideoPreloader = (
 4 |   currentEpisodeId: number,
 5 |   totalEpisodes: number,
 6 |   getVideoUrl: (episodeId: number) => string | undefined,
 7 |   preloadCount: number = 2
 8 | ) => {
 9 |   const preloadedVideos = useRef<Map<number, HTMLVideoElement>>(new Map());
10 | 
11 |   useEffect(() => {
12 |     // Clear old preloaded videos
13 |     const currentPreloaded = new Set<number>();
14 | 
15 |     // Preload previous episodes
16 |     for (let i = 1; i <= preloadCount; i++) {
17 |       const prevId = currentEpisodeId - i;
18 |       if (prevId >= 0) {
19 |         preloadVideo(prevId);
20 |         currentPreloaded.add(prevId);
21 |       }
22 |     }
23 | 
24 |     // Preload next episodes
25 |     for (let i = 1; i <= preloadCount; i++) {
26 |       const nextId = currentEpisodeId + i;
27 |       if (nextId < totalEpisodes) {
28 |         preloadVideo(nextId);
29 |         currentPreloaded.add(nextId);
30 |       }
31 |     }
32 | 
33 |     // Remove videos that are no longer needed
34 |     preloadedVideos.current.forEach((video, episodeId) => {
35 |       if (!currentPreloaded.has(episodeId) && episodeId !== currentEpisodeId) {
36 |         video.src = '';
37 |         video.load();
38 |         preloadedVideos.current.delete(episodeId);
39 |       }
40 |     });
41 |   }, [currentEpisodeId, totalEpisodes, getVideoUrl, preloadCount]);
42 | 
43 |   const preloadVideo = (episodeId: number) => {
44 |     if (preloadedVideos.current.has(episodeId)) {
45 |       return;
46 |     }
47 | 
48 |     const videoUrl = getVideoUrl(episodeId);
49 |     if (!videoUrl) return;
50 | 
51 |     const video = document.createElement('video');
52 |     video.src = videoUrl;
53 |     video.preload = 'auto';
54 |     video.muted = true;
55 | 
56 |     // Start loading the video
57 |     video.load();
58 | 
59 |     preloadedVideos.current.set(episodeId, video);
60 |   };
61 | 
62 |   // Cleanup on unmount
63 |   useEffect(() => {
64 |     return () => {
65 |       preloadedVideos.current.forEach((video) => {
66 |         video.src = '';
67 |         video.load();
68 |       });
69 |       preloadedVideos.current.clear();
70 |     };
71 |   }, []);
72 | 
73 |   return {
74 |     isPreloaded: (episodeId: number) => preloadedVideos.current.has(episodeId),
75 |   };
76 | };
77 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/hooks/useSelectedEpisodes.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | 
 3 | interface SelectedEpisodesState {
 4 |   [datasetId: string]: number[];
 5 | }
 6 | 
 7 | export const useSelectedEpisodes = (datasetId?: string) => {
 8 |   const [selectedEpisodes, setSelectedEpisodes] =
 9 |     useState<SelectedEpisodesState>({});
10 | 
11 |   // Load from localStorage on mount
12 |   useEffect(() => {
13 |     const stored = localStorage.getItem('selectedEpisodes');
14 |     if (stored) {
15 |       try {
16 |         setSelectedEpisodes(JSON.parse(stored));
17 |       } catch (e) {
18 |         console.error('Failed to parse stored episodes:', e);
19 |       }
20 |     }
21 |   }, []);
22 | 
23 |   // Save to localStorage whenever selectedEpisodes changes
24 |   useEffect(() => {
25 |     localStorage.setItem('selectedEpisodes', JSON.stringify(selectedEpisodes));
26 |   }, [selectedEpisodes]);
27 | 
28 |   const toggleEpisode = (episodeId: number) => {
29 |     if (!datasetId) return;
30 | 
31 |     setSelectedEpisodes((prev) => {
32 |       const current = prev[datasetId] || [];
33 |       const isSelected = current.includes(episodeId);
34 | 
35 |       if (isSelected) {
36 |         return {
37 |           ...prev,
38 |           [datasetId]: current.filter((id) => id !== episodeId),
39 |         };
40 |       } else {
41 |         return {
42 |           ...prev,
43 |           [datasetId]: [...current, episodeId].sort((a, b) => a - b),
44 |         };
45 |       }
46 |     });
47 |   };
48 | 
49 |   const clearSelection = () => {
50 |     if (!datasetId) return;
51 | 
52 |     setSelectedEpisodes((prev) => ({
53 |       ...prev,
54 |       [datasetId]: [],
55 |     }));
56 |   };
57 | 
58 |   const selectAll = (episodeIds: number[]) => {
59 |     if (!datasetId) return;
60 | 
61 |     setSelectedEpisodes((prev) => ({
62 |       ...prev,
63 |       [datasetId]: [...episodeIds].sort((a, b) => a - b),
64 |     }));
65 |   };
66 | 
67 |   const isSelected = (episodeId: number): boolean => {
68 |     if (!datasetId) return false;
69 |     return (selectedEpisodes[datasetId] || []).includes(episodeId);
70 |   };
71 | 
72 |   const getSelectedForDataset = (): number[] => {
73 |     if (!datasetId) return [];
74 |     return selectedEpisodes[datasetId] || [];
75 |   };
76 | 
77 |   return {
78 |     selectedEpisodes: getSelectedForDataset(),
79 |     toggleEpisode,
80 |     clearSelection,
81 |     selectAll,
82 |     isSelected,
83 |     selectedCount: getSelectedForDataset().length,
84 |   };
85 | };
86 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/DatasetCompletionModal.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Modal, Button, Space, Tag, Progress, Typography } from 'antd';
 3 | import { useNavigate } from 'react-router-dom';
 4 | 
 5 | const { Text, Paragraph } = Typography;
 6 | 
 7 | interface DatasetCompletionModalProps {
 8 |   visible: boolean;
 9 |   onClose: () => void;
10 |   status?: {
11 |     status: 'pending' | 'running' | 'completed' | 'failed';
12 |     progress?: number;
13 |     message?: string;
14 |     repo_id?: string;
15 |   };
16 |   title?: string;
17 |   actionLabel?: string;
18 | }
19 | 
20 | const DatasetCompletionModal: React.FC<DatasetCompletionModalProps> = ({
21 |   visible,
22 |   onClose,
23 |   status,
24 |   title = 'Dataset Operation Status',
25 |   actionLabel = 'View Dataset',
26 | }) => {
27 |   const navigate = useNavigate();
28 | 
29 |   const handleViewDataset = () => {
30 |     if (status?.repo_id) {
31 |       const [namespace, name] = status.repo_id.split('/');
32 |       navigate(`/${namespace}/${name}/episode/0`);
33 |     }
34 |     onClose();
35 |   };
36 | 
37 |   return (
38 |     <Modal
39 |       title={title}
40 |       open={visible}
41 |       onCancel={onClose}
42 |       footer={[
43 |         <Button
44 |           key='close'
45 |           onClick={onClose}
46 |           disabled={
47 |             status?.status === 'running' || status?.status === 'pending'
48 |           }
49 |         >
50 |           Close
51 |         </Button>,
52 |         status?.status === 'completed' && (
53 |           <Button key='view' type='primary' onClick={handleViewDataset}>
54 |             {actionLabel}
55 |           </Button>
56 |         ),
57 |       ]}
58 |     >
59 |       {status && (
60 |         <Space direction='vertical' style={{ width: '100%' }}>
61 |           <Text>
62 |             Status:{' '}
63 |             <Tag
64 |               color={
65 |                 status.status === 'completed'
66 |                   ? 'success'
67 |                   : status.status === 'failed'
68 |                   ? 'error'
69 |                   : 'processing'
70 |               }
71 |             >
72 |               {status.status}
73 |             </Tag>
74 |           </Text>
75 | 
76 |           {status.progress !== undefined && (
77 |             <Progress percent={Math.round(status.progress * 100)} />
78 |           )}
79 | 
80 |           {status.message && <Paragraph>{status.message}</Paragraph>}
81 | 
82 |           {status.repo_id && status.status === 'completed' && (
83 |             <Text>
84 |               Dataset: <Text code>{status.repo_id}</Text>
85 |             </Text>
86 |           )}
87 |         </Space>
88 |       )}
89 |     </Modal>
90 |   );
91 | };
92 | 
93 | export default DatasetCompletionModal;
94 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .env_training
107 | .venv
108 | env/
109 | venv/
110 | ENV/
111 | env.bak/
112 | venv.bak/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 
132 | # Output directories
133 | outputs/
134 | 
135 | # W&B
136 | wandb/
137 | wandb_downloads/
138 | 
139 | # Dataset files
140 | *.mp4
141 | !tests/template_datasets/v2_1/screwdriver_panel_ls_080225_4_e5/videos/chunk-000/observation.images.side/episode_000000.mp4
142 | *.png
143 | *.jpg
144 | *.jpeg
145 | 
146 | # But allow images in media folders
147 | !media/*.png
148 | !media/*.jpg
149 | !media/*.jpeg
150 | 
151 | # IDE files
152 | .vscode/
153 | .idea/
154 | *.swp
155 | *.swo
156 | 
157 | # OS files
158 | .DS_Store
159 | Thumbs.db 
160 | 
161 | node_modules/
162 | *.egg-info
163 | 
164 | node_modules/
165 | .claude/
166 | CLAUDE.md
167 | .ruff_cache/


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LeRobot Data Studio - Unofficial LeRobot Dataset Editor
  2 | 
  3 | A web-based GUI for editing LeRobot datasets build on the LeRobot [dataset tools api](https://huggingface.co/docs/lerobot/using_dataset_tools)
  4 | 
  5 | *Note: This is an unofficial tool and is not affiliated with Huggingface, LeRobot or the LeRobot team.*
  6 | 
  7 | ## Main Features
  8 | - Easily remove episodes and create new clean datasets
  9 | - Speed control and keyboard shortcuts to streamline dataset cleaning
 10 | 
 11 | ![Dataset Editor](media/dataset_editor.png)
 12 | 
 13 | ## Quick Start
 14 | 
 15 | ### Step 1: Prerequisites
 16 | - [UV Python package and project manager](https://astral.sh/uv/): `curl -LsSf https://astral.sh/uv/install.sh | sh`
 17 | - Python 3.10+ (You can use uv to install and manage python versions e.g. `uv python install 3.12`)
 18 | - Node.js 24+ (Install using nvm - see instructions below)
 19 | - A Huggingface account (free)
 20 | - [Huggingface CLI](https://huggingface.co/docs/huggingface_hub/en/guides/cli)
 21 | 
 22 | #### Installing Node.js with nvm
 23 | 
 24 | ```bash
 25 | # Install nvm (Node Version Manager)
 26 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash
 27 | 
 28 | # Reload your shell configuration
 29 | source ~/.bashrc  # or ~/.zshrc if using zsh
 30 | 
 31 | # Install and use Node.js 24
 32 | nvm install 24
 33 | nvm use 24
 34 | 
 35 | # Verify installation
 36 | node --version
 37 | ```
 38 | 
 39 | ### Step 2: Installation
 40 | 
 41 | ```bash
 42 | git clone https://github.com/jackvial/lerobot-data-studio
 43 | cd lerobot-data-studio
 44 | 
 45 | # Create a virtual environment with UV
 46 | uv venv
 47 | 
 48 | # Activate the virtual environment
 49 | source .venv/bin/activate
 50 | 
 51 | # Install all packages using UV
 52 | uv sync
 53 | ```
 54 | 
 55 | ### Step 3: Install Frontend Dependencies
 56 | 
 57 | After completing the python installation, install frontend dependencies:
 58 | 
 59 | ```bash
 60 | cd src/lerobot-data-studio/frontend
 61 | npm install
 62 | ```
 63 | 
 64 | ### Step 4: Running the App
 65 | 
 66 | Use the provided script to start both frontend and backend servers:
 67 | 
 68 | ```bash
 69 | ./run_dev.sh
 70 | ```
 71 | 
 72 | ## Dataset Creation
 73 | Dataset creation for filtered (AKA edited) datasets is always none destructive and will always create a new dataset and upload it to the Huggingface Hub.
 74 | 
 75 | ### Filtered Dataset Creation
 76 | Editing/filtering a dataset creates a new dataset that only excludes the episodes that were selected in the UI.
 77 | 
 78 | ### Merging Datasets
 79 | If you need to merge multiple datasets we recommend using the [LeRobot datasets tool CLI](https://huggingface.co/docs/lerobot/using_dataset_tools#lerobot.datasets.merge_datasets)
 80 | 
 81 | ## Development
 82 | 
 83 | ### Run Backend Tests
 84 | 
 85 | ```bash
 86 | uv run pytest
 87 | ```
 88 | 
 89 | ### Run Frontend Tests
 90 | 
 91 | ```bash
 92 | cd src/lerobot_data_studio/frontend
 93 | npm run test
 94 | ```
 95 | 
 96 | ### Contributing
 97 | 
 98 | Contributions are welcome!
 99 | 
100 | ### License
101 | 
102 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details.


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/EpisodeNavigation.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Button, Space, InputNumber, Tag, Tooltip } from 'antd';
 3 | import {
 4 |   LeftOutlined,
 5 |   RightOutlined,
 6 |   FastBackwardOutlined,
 7 |   FastForwardOutlined,
 8 | } from '@ant-design/icons';
 9 | 
10 | interface EpisodeNavigationProps {
11 |   currentEpisodeId: number;
12 |   totalEpisodes: number;
13 |   onEpisodeChange: (episodeId: number) => void;
14 |   isPreloaded: (episodeId: number) => boolean;
15 | }
16 | 
17 | const EpisodeNavigation: React.FC<EpisodeNavigationProps> = ({
18 |   currentEpisodeId,
19 |   totalEpisodes,
20 |   onEpisodeChange,
21 |   isPreloaded,
22 | }) => {
23 |   const canGoPrevious = currentEpisodeId > 0;
24 |   const canGoNext = currentEpisodeId < totalEpisodes - 1;
25 | 
26 |   const handleJumpTo = (value: number | null) => {
27 |     if (value !== null && value >= 0 && value < totalEpisodes) {
28 |       onEpisodeChange(value);
29 |     }
30 |   };
31 | 
32 |   return (
33 |     <div className='episode-navigation'>
34 |       <Space direction='vertical' style={{ width: '100%' }}>
35 |         {/* Navigation Controls */}
36 |         <Space wrap>
37 |           <Button
38 |             icon={<FastBackwardOutlined />}
39 |             onClick={() => onEpisodeChange(0)}
40 |             disabled={!canGoPrevious}
41 |           >
42 |             First
43 |           </Button>
44 |           <Tooltip title='Press ← arrow key'>
45 |             <Button
46 |               icon={<LeftOutlined />}
47 |               onClick={() => onEpisodeChange(currentEpisodeId - 1)}
48 |               disabled={!canGoPrevious}
49 |             >
50 |               Previous
51 |               {canGoPrevious && isPreloaded(currentEpisodeId - 1) && (
52 |                 <Tag color='green' style={{ marginLeft: '8px' }}>
53 |                   Preloaded
54 |                 </Tag>
55 |               )}
56 |             </Button>
57 |           </Tooltip>
58 | 
59 |           <Space align='center'>
60 |             <span>Episode</span>
61 |             <InputNumber
62 |               min={0}
63 |               max={totalEpisodes - 1}
64 |               value={currentEpisodeId}
65 |               onChange={handleJumpTo}
66 |               style={{ width: '80px' }}
67 |             />
68 |             <span>of {totalEpisodes - 1}</span>
69 |           </Space>
70 | 
71 |           <Tooltip title='Press → arrow key'>
72 |             <Button
73 |               icon={<RightOutlined />}
74 |               onClick={() => onEpisodeChange(currentEpisodeId + 1)}
75 |               disabled={!canGoNext}
76 |             >
77 |               Next
78 |               {canGoNext && isPreloaded(currentEpisodeId + 1) && (
79 |                 <Tag color='green' style={{ marginLeft: '8px' }}>
80 |                   Preloaded
81 |                 </Tag>
82 |               )}
83 |             </Button>
84 |           </Tooltip>
85 |           <Button
86 |             icon={<FastForwardOutlined />}
87 |             onClick={() => onEpisodeChange(totalEpisodes - 1)}
88 |             disabled={!canGoNext}
89 |           >
90 |             Last
91 |           </Button>
92 |         </Space>
93 |       </Space>
94 |     </div>
95 |   );
96 | };
97 | 
98 | export default EpisodeNavigation;
99 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
  1 | [project]
  2 | name = "lerobot-data-studio"
  3 | version = "0.2.0"
  4 | description = "LeRobot Data Studio - Unofficial LeRobot Dataset Editor"
  5 | readme = "README.md"
  6 | requires-python = ">=3.10"
  7 | license = {text = "MIT"}
  8 | authors = [
  9 |     {name = "Jack Vial", email = "vialjack@gmail.com"}
 10 | ]
 11 | dependencies = [
 12 |     "fastapi>=0.104.1",
 13 |     "uvicorn[standard]>=0.24.0",
 14 |     "pydantic>=2.5.0",
 15 |     "python-multipart>=0.0.6",
 16 |     "aiofiles>=23.2.1",
 17 |     "pandas>=2.1.3",
 18 |     "requests>=2.31.0",
 19 |     "huggingface-hub>=0.27.1",
 20 |     "jsonlines>=4.0.0",
 21 |     "lerobot @ git+https://github.com/huggingface/lerobot.git@main",
 22 |     "numpy>=1.26.2",
 23 |     "h5py>=3.10.0",
 24 |     "zarr>=2.17.0",
 25 |     "opencv-python>=4.9.0",
 26 |     "imageio[ffmpeg]>=2.34.0",
 27 |     "ruff>=0.8.0",
 28 | ]
 29 | 
 30 | [project.urls]
 31 | Homepage = "https://github.com/jackvial/lerobot-data-studio"
 32 | Repository = "https://github.com/jackvial/lerobot-data-studio"
 33 | 
 34 | # External dependencies inherited from workspace root
 35 | # lerobot source defined in root pyproject.toml
 36 | 
 37 | [build-system]
 38 | requires = ["hatchling"]
 39 | build-backend = "hatchling.build"
 40 | 
 41 | [tool.hatch.metadata]
 42 | allow-direct-references = true
 43 | 
 44 | [tool.hatch.build.targets.wheel]
 45 | packages = ["src/lerobot_data_studio"]
 46 | 
 47 | [dependency-groups]
 48 | dev = [
 49 |     "pytest>=8.4.1",
 50 |     "mypy>=1.8.0",
 51 |     "types-requests>=2.31.0",
 52 |     "types-psutil>=5.9.5",
 53 |     "pandas-stubs>=2.1.4",
 54 | ]
 55 | 
 56 | [tool.ruff]
 57 | target-version = "py310"
 58 | line-length = 110
 59 | exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"]
 60 | 
 61 | [tool.ruff.lint]
 62 | # E, W: pycodestyle errors and warnings
 63 | # F: PyFlakes
 64 | # I: isort
 65 | # B: flake8-bugbear (good practices, potential bugs)
 66 | # C4: flake8-comprehensions (more concise comprehensions)
 67 | # T20: flake8-print (discourage print statements in production code)
 68 | # N: pep8-naming
 69 | # SLF: flake8-self (private member access detection)
 70 | # TODO: Add more rules when ready: "SIM", "A", "S", "D", "RUF", "UP"
 71 | select = [
 72 |     "E", "W", "F", "I", "B", "C4", "T20", "N", "SLF"
 73 | ]
 74 | ignore = [
 75 |     "E501", # Line too long
 76 |     "T201", # Print statement found
 77 |     "T203", # Pprint statement found
 78 |     "B008", # Perform function call in argument defaults
 79 | ]
 80 | 
 81 | [tool.ruff.lint.per-file-ignores]
 82 | "__init__.py" = ["F401", "F403"]
 83 | "tests/*" = ["SLF001"]  # Allow private member access in tests
 84 | "**/test_*.py" = ["SLF001", "T201"]  # Allow private member access and print in test files
 85 | 
 86 | [tool.ruff.lint.isort]
 87 | combine-as-imports = true
 88 | known-first-party = ["lerobot_data_studio"]
 89 | 
 90 | [tool.ruff.lint.pydocstyle]
 91 | convention = "google"
 92 | 
 93 | [tool.ruff.format]
 94 | quote-style = "double"
 95 | indent-style = "space"
 96 | skip-magic-trailing-comma = false
 97 | line-ending = "auto"
 98 | docstring-code-format = true
 99 | 
100 | # Enforce private member access rules
101 | [tool.ruff.lint.flake8-self]
102 | ignore-names = ["_meta", "_abc_impl"]  # Allow specific private attributes if needed
103 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/index.css:
--------------------------------------------------------------------------------
  1 | @tailwind base;
  2 | @tailwind components;
  3 | @tailwind utilities;
  4 | 
  5 | :root {
  6 |   font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif;
  7 |   line-height: 1.5;
  8 |   font-weight: 400;
  9 | 
 10 |   color-scheme: light dark;
 11 |   color: rgba(255, 255, 255, 0.87);
 12 |   background-color: #242424;
 13 | 
 14 |   font-synthesis: none;
 15 |   text-rendering: optimizeLegibility;
 16 |   -webkit-font-smoothing: antialiased;
 17 |   -moz-osx-font-smoothing: grayscale;
 18 |   -webkit-text-size-adjust: 100%;
 19 | }
 20 | 
 21 | body {
 22 |   margin: 0;
 23 |   display: flex;
 24 |   place-items: center;
 25 |   min-width: 320px;
 26 |   min-height: 100vh;
 27 | }
 28 | 
 29 | h1 {
 30 |   font-size: 3.2em;
 31 |   line-height: 1.1;
 32 | }
 33 | 
 34 | #root {
 35 |   width: 100%;
 36 |   height: 100vh;
 37 |   margin: 0 auto;
 38 |   text-align: center;
 39 | }
 40 | 
 41 | .selected-episodes-badge {
 42 |   position: fixed;
 43 |   bottom: 20px;
 44 |   right: 20px;
 45 |   z-index: 1000;
 46 | }
 47 | 
 48 | /* Dark theme for Ant Design components */
 49 | .ant-layout {
 50 |   background: #141414 !important;
 51 | }
 52 | 
 53 | .ant-layout-header {
 54 |   background: #1f1f1f !important;
 55 |   border-bottom: 1px solid #303030 !important;
 56 | }
 57 | 
 58 | .ant-layout-sider {
 59 |   background: #1f1f1f !important;
 60 |   border-right: 1px solid #303030 !important;
 61 | }
 62 | 
 63 | .ant-card {
 64 |   background: #1f1f1f !important;
 65 |   border: 1px solid #303030 !important;
 66 | }
 67 | 
 68 | .ant-card-head {
 69 |   border-bottom: 1px solid #303030 !important;
 70 | }
 71 | 
 72 | .ant-btn-primary {
 73 |   background: #1890ff !important;
 74 |   border-color: #1890ff !important;
 75 | }
 76 | 
 77 | .ant-btn-default {
 78 |   background: #303030 !important;
 79 |   border-color: #434343 !important;
 80 |   color: rgba(255, 255, 255, 0.85) !important;
 81 | }
 82 | 
 83 | .ant-checkbox-wrapper {
 84 |   color: rgba(255, 255, 255, 0.85) !important;
 85 | }
 86 | 
 87 | .ant-list {
 88 |   color: rgba(255, 255, 255, 0.85) !important;
 89 | }
 90 | 
 91 | .ant-list-item {
 92 |   border-bottom: 1px solid #303030 !important;
 93 | }
 94 | 
 95 | .ant-typography {
 96 |   color: rgba(255, 255, 255, 0.85) !important;
 97 | }
 98 | 
 99 | .ant-slider {
100 |   margin: 10px 0 !important;
101 | }
102 | 
103 | .ant-slider-track {
104 |   background-color: #1890ff !important;
105 | }
106 | 
107 | .ant-alert {
108 |   background: #303030 !important;
109 |   border: 1px solid #434343 !important;
110 | }
111 | 
112 | /* Dygraph dark theme */
113 | .dygraph-legend {
114 |   background: rgba(31, 31, 31, 0.95) !important;
115 |   color: rgba(255, 255, 255, 0.85) !important;
116 | }
117 | 
118 | .dygraph-axis-label {
119 |   color: rgba(255, 255, 255, 0.65) !important;
120 | }
121 | 
122 | .dygraph-xlabel, .dygraph-ylabel {
123 |   color: rgba(255, 255, 255, 0.85) !important;
124 | }
125 | 
126 | .video-container {
127 |   position: relative;
128 |   width: 100%;
129 |   background: #000;
130 | }
131 | 
132 | .episode-navigation {
133 |   display: flex;
134 |   justify-content: space-between;
135 |   align-items: center;
136 |   padding: 16px;
137 | }
138 | 
139 | .loading-overlay {
140 |   position: fixed;
141 |   top: 0;
142 |   left: 0;
143 |   right: 0;
144 |   bottom: 0;
145 |   background: rgba(0, 0, 0, 0.45);
146 |   display: flex;
147 |   align-items: center;
148 |   justify-content: center;
149 |   z-index: 9999;
150 | } 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/backend/state_store.py:
--------------------------------------------------------------------------------
 1 | """Dataset caching and state management for the LeRobot Data Studio backend."""
 2 | 
 3 | import os
 4 | from dataclasses import dataclass, field
 5 | from pathlib import Path
 6 | from typing import Dict, Optional
 7 | 
 8 | from huggingface_hub.constants import HF_HOME
 9 | from lerobot.datasets.lerobot_dataset import LeRobotDataset
10 | 
11 | from .models import CreateTaskStatus, DatasetLoadingStatus
12 | 
13 | default_cache_path = Path(HF_HOME) / "lerobot"
14 | HF_LEROBOT_HOME = Path(os.getenv("HF_LEROBOT_HOME", default_cache_path)).expanduser()
15 | 
16 | 
17 | @dataclass
18 | class StateStore:
19 |     """Simple global state management"""
20 | 
21 |     dataset_cache: Dict[str, LeRobotDataset] = field(default_factory=dict)
22 |     dataset_loading_status: Dict[str, DatasetLoadingStatus] = field(default_factory=dict)
23 |     loading_tasks: Dict[str, str] = field(default_factory=dict)
24 |     creation_tasks: Dict[str, CreateTaskStatus] = field(default_factory=dict)
25 | 
26 |     def _update_or_create(self, store: dict, key: str, value: object, defaults: object = None):
27 |         """Generic method to update or create entries with spreading pattern for Pydantic models"""
28 |         if hasattr(value, "model_dump"):
29 |             # It's a Pydantic model - get only the explicitly set fields
30 |             existing = store.get(key)
31 |             if existing:
32 |                 base = existing.model_dump()
33 |             elif defaults:
34 |                 base = defaults.model_dump()
35 |             else:
36 |                 base = {}
37 |             updates = value.model_dump(exclude_unset=True)
38 |             model_class = type(value) if existing is None else type(existing)
39 |             store[key] = model_class(**{**base, **updates})
40 |         else:
41 |             # Full replacement with non-Pydantic object
42 |             store[key] = value
43 | 
44 |     def is_dataset_cached(self, repo_id: str) -> bool:
45 |         return repo_id in self.dataset_cache
46 | 
47 |     def is_dataset_loading(self, repo_id: str) -> bool:
48 |         return repo_id in self.loading_tasks
49 | 
50 |     def get_dataset(self, repo_id: str) -> Optional[LeRobotDataset]:
51 |         return self.dataset_cache.get(repo_id)
52 | 
53 |     def set_loading_status(self, repo_id: str, status: DatasetLoadingStatus):
54 |         self._update_or_create(
55 |             self.dataset_loading_status,
56 |             repo_id,
57 |             status,
58 |             DatasetLoadingStatus(status="loading", progress=0.0),
59 |         )
60 | 
61 |     def get_loading_status(self, repo_id: str) -> Optional[DatasetLoadingStatus]:
62 |         return self.dataset_loading_status.get(repo_id)
63 | 
64 |     def start_loading(self, repo_id: str):
65 |         self.loading_tasks[repo_id] = "loading"
66 | 
67 |     def finish_loading(self, repo_id: str):
68 |         if repo_id in self.loading_tasks:
69 |             del self.loading_tasks[repo_id]
70 | 
71 |     def cache_dataset(self, repo_id: str, dataset: LeRobotDataset):
72 |         self.dataset_cache[repo_id] = dataset
73 | 
74 |     def get_creation_task(self, task_id: str) -> Optional[CreateTaskStatus]:
75 |         return self.creation_tasks.get(task_id)
76 | 
77 |     def set_creation_task(self, task_id: str, status: CreateTaskStatus):
78 |         self._update_or_create(
79 |             self.creation_tasks,
80 |             task_id,
81 |             status,
82 |             CreateTaskStatus(task_id=task_id, status="pending", progress=0.0),
83 |         )
84 | 
85 |     def clear_loading_tasks(self):
86 |         self.loading_tasks.clear()
87 | 
88 | 
89 | # Create a singleton instance for the application
90 | _state_store = StateStore()
91 | 
92 | 
93 | def get_state_store() -> StateStore:
94 |     """Dependency injection function to get the task manager."""
95 |     return _state_store
96 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/DataChart.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useEffect, useRef, useMemo } from 'react';
  2 | import { Card, Empty } from 'antd';
  3 | import Dygraph from 'dygraphs';
  4 | import 'dygraphs/dist/dygraph.css';
  5 | 
  6 | interface DataChartProps {
  7 |   episodeData: Record<string, number[]>[];
  8 |   featureNames: string[];
  9 |   currentTime?: number;
 10 | }
 11 | 
 12 | const DataChart: React.FC<DataChartProps> = ({
 13 |   episodeData,
 14 |   featureNames,
 15 |   currentTime,
 16 | }) => {
 17 |   const chartRef = useRef<HTMLDivElement>(null);
 18 |   const dygraphRef = useRef<Dygraph | null>(null);
 19 | 
 20 |   const chartData = useMemo(() => {
 21 |     if (!episodeData || episodeData.length === 0) return null;
 22 | 
 23 |     try {
 24 |       const data = episodeData.map((row: any) => {
 25 |         const timestamp = row['timestamp'] || 0;
 26 |         const observation = row['observation'] || [];
 27 | 
 28 |         // In Dygraph the first value is always the X axis
 29 |         // all other values will be plotted on the Y axis
 30 |         return [timestamp, ...observation];
 31 |       });
 32 | 
 33 |       return data;
 34 |     } catch (error) {
 35 |       console.error('Error converting JSON to array format:', error);
 36 |       return null;
 37 |     }
 38 |   }, [episodeData]);
 39 | 
 40 |   useEffect(() => {
 41 |     if (!chartRef.current || !chartData || chartData.length === 0) return;
 42 | 
 43 |     // Clean up previous chart
 44 |     if (dygraphRef.current) {
 45 |       dygraphRef.current.destroy();
 46 |     }
 47 | 
 48 |     try {
 49 |       dygraphRef.current = new Dygraph(chartRef.current, chartData, {
 50 |         labels: ['Time', ...featureNames],
 51 |         showRoller: true,
 52 |         rollPeriod: 1,
 53 |         animatedZooms: false,
 54 |         legend: 'always',
 55 |         labelsSeparateLines: true,
 56 |         highlightCircleSize: 5,
 57 |         strokeWidth: 1.5,
 58 |         gridLineColor: '#ddd',
 59 |         axisLineColor: '#999',
 60 |         axisLabelFontSize: 12,
 61 |         xLabelHeight: 18,
 62 |         yLabelWidth: 50,
 63 |         drawPoints: false,
 64 |         pointSize: 3,
 65 |         hideOverlayOnMouseOut: false,
 66 |         showRangeSelector: true,
 67 |         rangeSelectorHeight: 40,
 68 |         rangeSelectorPlotStrokeColor: '#666',
 69 |         rangeSelectorPlotFillColor: '#666',
 70 |         interactionModel: Dygraph.defaultInteractionModel,
 71 |         xValueParser: (x: string) => parseFloat(x),
 72 |         axes: {
 73 |           x: {
 74 |             axisLabelFormatter: (x: number | Date) => {
 75 |               // Handle both number and Date types
 76 |               if (typeof x === 'number') {
 77 |                 return `${x.toFixed(2)}s`;
 78 |               }
 79 |               // This shouldn't happen with our xValueParser, but handle it gracefully
 80 |               return x.toString();
 81 |             },
 82 |             valueFormatter: (x: number) => {
 83 |               return `${x.toFixed(3)} seconds`;
 84 |             },
 85 |           },
 86 |         },
 87 |         xlabel: 'Time (seconds)',
 88 |       });
 89 |     } catch (error) {
 90 |       console.error('Error creating Dygraph:', error);
 91 |     }
 92 | 
 93 |     // Cleanup function
 94 |     return () => {
 95 |       if (dygraphRef.current) {
 96 |         dygraphRef.current.destroy();
 97 |         dygraphRef.current = null;
 98 |       }
 99 |     };
100 |   }, [chartData, featureNames]);
101 | 
102 |   // Update vertical line when currentTime changes
103 |   useEffect(() => {
104 |     if (dygraphRef.current && currentTime !== undefined) {
105 |       // Draw a vertical line at the current time
106 |       dygraphRef.current.updateOptions({
107 |         underlayCallback: (canvas, area, g) => {
108 |           const x = g.toDomXCoord(currentTime);
109 | 
110 |           // Only draw if the time is within the visible range
111 |           if (x >= area.x && x <= area.x + area.w) {
112 |             canvas.strokeStyle = '#ff6b6b';
113 |             canvas.lineWidth = 2;
114 |             canvas.beginPath();
115 |             canvas.moveTo(x, area.y);
116 |             canvas.lineTo(x, area.y + area.h);
117 |             canvas.stroke();
118 |           }
119 |         },
120 |       });
121 |     }
122 |   }, [currentTime]);
123 | 
124 |   return (
125 |     <Card title='Episode Data'>
126 |       {chartData && chartData.length > 0 ? (
127 |         <div ref={chartRef} style={{ width: '100%', height: '400px' }} />
128 |       ) : (
129 |         <Empty description='No data available for this episode' />
130 |       )}
131 |     </Card>
132 |   );
133 | };
134 | 
135 | export default DataChart;
136 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/services/api.ts:
--------------------------------------------------------------------------------
  1 | import axios from 'axios';
  2 | import {
  3 |   DatasetListResponse,
  4 |   EpisodeData,
  5 |   CreateDatasetRequest,
  6 |   CreateDatasetResponse,
  7 |   DatasetLoadingStatus,
  8 |   CreateTaskStatus,
  9 | } from '@/types';
 10 | 
 11 | const api = axios.create({
 12 |   baseURL: '/api',
 13 |   timeout: 30000,
 14 |   headers: {
 15 |     'Content-Type': 'application/json',
 16 |   },
 17 | });
 18 | 
 19 | // Request interceptor for error handling
 20 | api.interceptors.request.use(
 21 |   (config) => {
 22 |     return config;
 23 |   },
 24 |   (error) => {
 25 |     return Promise.reject(error);
 26 |   }
 27 | );
 28 | 
 29 | export const datasetApi = {
 30 |   // Get list of available datasets
 31 |   listDatasets: async (): Promise<DatasetListResponse> => {
 32 |     const response = await api.get<DatasetListResponse>('/datasets');
 33 |     return response.data;
 34 |   },
 35 | 
 36 |   // Get dataset loading status
 37 |   getDatasetStatus: async (
 38 |     namespace: string,
 39 |     name: string,
 40 |     autoLoad: boolean = false
 41 |   ): Promise<DatasetLoadingStatus> => {
 42 |     const response = await api.get<DatasetLoadingStatus>(
 43 |       `/datasets/${namespace}/${name}/status`,
 44 |       {
 45 |         params: { auto_load: autoLoad },
 46 |       }
 47 |     );
 48 |     return response.data;
 49 |   },
 50 | 
 51 |   // Get episode data
 52 |   getEpisode: async (
 53 |     namespace: string,
 54 |     name: string,
 55 |     episodeId: number
 56 |   ): Promise<EpisodeData> => {
 57 |     const response = await api.get<EpisodeData>(
 58 |       `/datasets/${namespace}/${name}/episodes/${episodeId}`
 59 |     );
 60 |     return response.data;
 61 |   },
 62 | 
 63 |   // List all episode IDs for a dataset
 64 |   listEpisodes: async (
 65 |     namespace: string,
 66 |     name: string
 67 |   ): Promise<{ episodes: number[] }> => {
 68 |     const response = await api.get<{ episodes: number[] }>(
 69 |       `/datasets/${namespace}/${name}/episodes`
 70 |     );
 71 |     return response.data;
 72 |   },
 73 | 
 74 |   // Create new dataset from selected episodes
 75 |   createDataset: async (
 76 |     request: CreateDatasetRequest
 77 |   ): Promise<CreateDatasetResponse> => {
 78 |     const response = await api.post<CreateDatasetResponse>(
 79 |       '/datasets/create',
 80 |       request
 81 |     );
 82 |     return response.data;
 83 |   },
 84 | 
 85 |   // Search datasets by prefix
 86 |   searchDatasets: async (prefix: string): Promise<{ repo_ids: string[] }> => {
 87 |     const response = await api.get<{ repo_ids: string[] }>('/datasets/search', {
 88 |       params: { prefix },
 89 |     });
 90 |     return response.data;
 91 |   },
 92 | 
 93 |   // List datasets for a user
 94 |   listUserDatasets: async (
 95 |     username: string
 96 |   ): Promise<{ repo_ids: string[] }> => {
 97 |     const response = await api.get<{ repo_ids: string[] }>(
 98 |       `/datasets/user/${username}`
 99 |     );
100 |     return response.data;
101 |   },
102 | 
103 |   // Validate if a dataset exists
104 |   validateDataset: async (
105 |     namespace: string,
106 |     name: string
107 |   ): Promise<{ exists: boolean; message?: string }> => {
108 |     const response = await api.get<{ exists: boolean; message?: string }>(
109 |       `/datasets/validate/${namespace}/${name}`
110 |     );
111 |     return response.data;
112 |   },
113 | 
114 |   // Get dataset creation task status
115 |   getCreateStatus: async (taskId: string): Promise<CreateTaskStatus> => {
116 |     const response = await api.get<CreateTaskStatus>(
117 |       `/datasets/create/status/${taskId}`
118 |     );
119 |     return response.data;
120 |   },
121 | 
122 |   // Get current user info
123 |   getCurrentUser: async (): Promise<{
124 |     username: string | null;
125 |     fullname?: string;
126 |     avatar_url?: string;
127 |     error?: string;
128 |   }> => {
129 |     const response = await api.get('/user/whoami');
130 |     return response.data;
131 |   },
132 | 
133 |   // Poll dataset status until ready
134 |   waitForDataset: async (
135 |     namespace: string,
136 |     name: string,
137 |     onProgress?: (status: DatasetLoadingStatus) => void
138 |   ): Promise<void> => {
139 |     const pollInterval = 1000; // 1 second
140 |     const maxRetries = 300; // 5 minutes max
141 |     let retries = 0;
142 | 
143 |     while (retries < maxRetries) {
144 |       const status = await datasetApi.getDatasetStatus(namespace, name, false);
145 | 
146 |       if (onProgress) {
147 |         onProgress(status);
148 |       }
149 | 
150 |       if (status.status === 'ready') {
151 |         return;
152 |       }
153 | 
154 |       if (status.status === 'error') {
155 |         throw new Error(status.message || 'Dataset loading failed');
156 |       }
157 | 
158 |       await new Promise((resolve) => setTimeout(resolve, pollInterval));
159 |       retries++;
160 |     }
161 | 
162 |     throw new Error('Dataset loading timeout');
163 |   },
164 | };
165 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/EpisodeSidebar.tsx:
--------------------------------------------------------------------------------
  1 | import React from 'react';
  2 | import {
  3 |   List,
  4 |   Checkbox,
  5 |   Button,
  6 |   Space,
  7 |   Typography,
  8 |   Input,
  9 | } from 'antd';
 10 | import { CheckCircleOutlined, CloseCircleOutlined } from '@ant-design/icons';
 11 | 
 12 | const { Title, Text } = Typography;
 13 | const { Search } = Input;
 14 | 
 15 | interface EpisodeSidebarProps {
 16 |   episodes: number[];
 17 |   selectedEpisodes: number[];
 18 |   currentEpisodeId: number;
 19 |   onToggleEpisode: (episodeId: number) => void;
 20 |   onSelectAll: () => void;
 21 |   onClearSelection: () => void;
 22 |   onEpisodeClick: (episodeId: number) => void;
 23 | }
 24 | 
 25 | const EpisodeSidebar: React.FC<EpisodeSidebarProps> = ({
 26 |   episodes,
 27 |   selectedEpisodes,
 28 |   currentEpisodeId,
 29 |   onToggleEpisode,
 30 |   onSelectAll,
 31 |   onClearSelection,
 32 |   onEpisodeClick,
 33 | }) => {
 34 |   const [searchTerm, setSearchTerm] = React.useState('');
 35 | 
 36 |   const filteredEpisodes = episodes.filter((ep) =>
 37 |     ep.toString().includes(searchTerm)
 38 |   );
 39 | 
 40 |   return (
 41 |     <div
 42 |       style={{
 43 |         height: '100%',
 44 |         display: 'flex',
 45 |         flexDirection: 'column',
 46 |         padding: '16px',
 47 |         width: '320px',
 48 |       }}
 49 |     >
 50 |       <Title level={4} style={{ marginBottom: '16px' }}>
 51 |         Episodes
 52 |       </Title>
 53 | 
 54 |       <Space
 55 |         direction='vertical'
 56 |         style={{ width: '100%', marginBottom: '16px' }}
 57 |       >
 58 |         <Search
 59 |           placeholder='Search episodes...'
 60 |           value={searchTerm}
 61 |           onChange={(e) => setSearchTerm(e.target.value)}
 62 |           style={{ width: '100%' }}
 63 |           size='small'
 64 |         />
 65 | 
 66 |         <Space>
 67 |           <Button
 68 |             size='small'
 69 |             onClick={onSelectAll}
 70 |             icon={<CheckCircleOutlined />}
 71 |           >
 72 |             Select All
 73 |           </Button>
 74 |           <Button
 75 |             size='small'
 76 |             onClick={onClearSelection}
 77 |             icon={<CloseCircleOutlined />}
 78 |           >
 79 |             Clear
 80 |           </Button>
 81 |         </Space>
 82 | 
 83 |         {selectedEpisodes.length > 0 && (
 84 |           <Text type='secondary' style={{ fontSize: '12px' }}>
 85 |             {selectedEpisodes.length} episode
 86 |             {selectedEpisodes.length === 1 ? '' : 's'} selected
 87 |           </Text>
 88 |         )}
 89 |       </Space>
 90 | 
 91 |       <div style={{ flex: 1, overflow: 'auto' }}>
 92 |         <List
 93 |           dataSource={filteredEpisodes}
 94 |           renderItem={(episodeId) => {
 95 |             const isCurrentEpisode = episodeId === currentEpisodeId;
 96 | 
 97 |             return (
 98 |               <List.Item
 99 |                 style={{
100 |                   padding: '6px 8px',
101 |                   cursor: 'pointer',
102 |                   background: isCurrentEpisode
103 |                     ? 'rgba(24, 144, 255, 0.08)'
104 |                     : 'transparent',
105 |                   borderRadius: '4px',
106 |                   marginBottom: '2px',
107 |                   border: isCurrentEpisode
108 |                     ? '1px solid rgba(24, 144, 255, 0.25)'
109 |                     : '1px solid transparent',
110 |                   transition: 'all 0.2s ease',
111 |                 }}
112 |                 onMouseEnter={(e) => {
113 |                   if (!isCurrentEpisode) {
114 |                     e.currentTarget.style.background =
115 |                       'rgba(255, 255, 255, 0.03)';
116 |                   }
117 |                 }}
118 |                 onMouseLeave={(e) => {
119 |                   if (!isCurrentEpisode) {
120 |                     e.currentTarget.style.background = 'transparent';
121 |                   }
122 |                 }}
123 |               >
124 |                 <div
125 |                   style={{
126 |                     display: 'flex',
127 |                     alignItems: 'center',
128 |                     gap: '8px',
129 |                     width: '100%',
130 |                   }}
131 |                   onClick={() => onEpisodeClick(episodeId)}
132 |                 >
133 |                   <Checkbox
134 |                     checked={selectedEpisodes.includes(episodeId)}
135 |                     onChange={(e) => {
136 |                       e.stopPropagation();
137 |                       onToggleEpisode(episodeId);
138 |                     }}
139 |                     onClick={(e) => e.stopPropagation()}
140 |                     style={{ marginRight: 0 }}
141 |                   />
142 |                   <Text
143 |                     style={{
144 |                       fontSize: '13px',
145 |                       fontWeight: isCurrentEpisode ? 500 : 400,
146 |                       color: isCurrentEpisode ? '#1890ff' : undefined,
147 |                       minWidth: '80px',
148 |                     }}
149 |                   >
150 |                     Episode {episodeId}
151 |                   </Text>
152 |                 </div>
153 |               </List.Item>
154 |             );
155 |           }}
156 |         />
157 |       </div>
158 |     </div>
159 |   );
160 | };
161 | 
162 | export default EpisodeSidebar;
163 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/HomePage.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useState } from 'react';
  2 | import { useNavigate } from 'react-router-dom';
  3 | import { Card, Input, Typography, Space, Spin, Button } from 'antd';
  4 | import {
  5 |   ArrowRightOutlined,
  6 |   RobotOutlined,
  7 |   CheckCircleOutlined,
  8 |   CloseCircleOutlined,
  9 | } from '@ant-design/icons';
 10 | import { useQuery } from '@tanstack/react-query';
 11 | import { datasetApi } from '@/services/api';
 12 | 
 13 | const { Title, Text } = Typography;
 14 | 
 15 | const HomePage: React.FC = () => {
 16 |   const navigate = useNavigate();
 17 |   const [inputValue, setInputValue] = useState('');
 18 |   const [validationStatus, setValidationStatus] = useState<
 19 |     'idle' | 'validating' | 'success' | 'error'
 20 |   >('idle');
 21 |   const [validationMessage, setValidationMessage] = useState('');
 22 | 
 23 |   const { data: datasets, isLoading } = useQuery({
 24 |     queryKey: ['datasets'],
 25 |     queryFn: datasetApi.listDatasets,
 26 |   });
 27 | 
 28 |   const handleDatasetSelect = (repoId: string) => {
 29 |     const [namespace, name] = repoId.split('/');
 30 |     navigate(`/${namespace}/${name}`);
 31 |   };
 32 | 
 33 |   const validateDatasetFormat = (value: string): boolean => {
 34 |     // Check if it matches username/dataset-name format
 35 |     const pattern = /^[a-zA-Z0-9_-]+\/[a-zA-Z0-9_-]+$/;
 36 |     return pattern.test(value);
 37 |   };
 38 | 
 39 |   const validateDataset = async (value: string) => {
 40 |     if (!value) {
 41 |       setValidationStatus('idle');
 42 |       setValidationMessage('');
 43 |       return;
 44 |     }
 45 | 
 46 |     if (!validateDatasetFormat(value)) {
 47 |       setValidationStatus('error');
 48 |       setValidationMessage('Invalid format. Use: username/dataset-name');
 49 |       return;
 50 |     }
 51 | 
 52 |     setValidationStatus('validating');
 53 |     setValidationMessage('Checking dataset...');
 54 | 
 55 |     try {
 56 |       // Check if dataset exists in the available datasets
 57 |       const allDatasets = [
 58 |         ...(datasets?.lerobot_datasets || []),
 59 |         ...(datasets?.featured_datasets || []),
 60 |       ];
 61 |       const exists = allDatasets.some((dataset) => dataset === value);
 62 | 
 63 |       if (exists) {
 64 |         setValidationStatus('success');
 65 |         setValidationMessage('Dataset exists ✔');
 66 |       } else {
 67 |         // Try to validate if the dataset exists on the hub
 68 |         try {
 69 |           const [namespace, name] = value.split('/');
 70 |           const result = await datasetApi.validateDataset(namespace, name);
 71 |           if (result.exists) {
 72 |             setValidationStatus('success');
 73 |             setValidationMessage('Dataset exists ✔');
 74 |           } else {
 75 |             setValidationStatus('error');
 76 |             setValidationMessage(result.message || 'Dataset not found on hub');
 77 |           }
 78 |         } catch {
 79 |           setValidationStatus('error');
 80 |           setValidationMessage('Dataset not found on hub');
 81 |         }
 82 |       }
 83 |     } catch {
 84 |       setValidationStatus('error');
 85 |       setValidationMessage('Error validating dataset');
 86 |     }
 87 |   };
 88 | 
 89 |   const handleInputChange = (e: React.ChangeEvent<HTMLInputElement>) => {
 90 |     const value = e.target.value;
 91 |     setInputValue(value);
 92 |     validateDataset(value);
 93 |   };
 94 | 
 95 |   const handleSearch = () => {
 96 |     if (validationStatus === 'success' && inputValue) {
 97 |       handleDatasetSelect(inputValue);
 98 |     }
 99 |   };
100 | 
101 |   const handleKeyPress = (e: React.KeyboardEvent<HTMLInputElement>) => {
102 |     if (e.key === 'Enter' && validationStatus === 'success') {
103 |       handleSearch();
104 |     }
105 |   };
106 | 
107 |   const getValidationIcon = () => {
108 |     switch (validationStatus) {
109 |       case 'validating':
110 |         return <Spin size='small' />;
111 |       case 'success':
112 |         return <CheckCircleOutlined style={{ color: '#52c41a' }} />;
113 |       case 'error':
114 |         return <CloseCircleOutlined style={{ color: '#ff4d4f' }} />;
115 |       default:
116 |         return null;
117 |     }
118 |   };
119 | 
120 |   return (
121 |     <div style={{ padding: '40px', maxWidth: '1200px', margin: '0 auto' }}>
122 |       <Space direction='vertical' size='large' style={{ width: '100%' }}>
123 |         <div style={{ textAlign: 'center' }}>
124 |           <RobotOutlined style={{ fontSize: '48px', marginBottom: '16px' }} />
125 |           <Title level={1}>LeRobot Data Studio</Title>
126 |           <Title level={4}>The Unofficial LeRobot Dataset Editor</Title>
127 |           <Title level={2}>Edit LeRobot Datasets</Title>
128 |         </div>
129 | 
130 |         <div>
131 |           <Title level={2}>Edit Dataset</Title>
132 |           <Text
133 |             type='secondary'
134 |             style={{ fontSize: '16px', display: 'block', marginBottom: '16px' }}
135 |           >
136 |             Create a new dataset from selected episodes
137 |           </Text>
138 |           <Card>
139 |             <Space direction='vertical' style={{ width: '100%' }} size='small'>
140 |               <Space.Compact style={{ width: '100%' }} size='large'>
141 |                 <Input
142 |                   placeholder='Enter dataset repository ID e.g. username/dataset-name'
143 |                   value={inputValue}
144 |                   onChange={handleInputChange}
145 |                   onKeyPress={handleKeyPress}
146 |                   suffix={getValidationIcon()}
147 |                   status={validationStatus === 'error' ? 'error' : undefined}
148 |                   style={{ width: '100%' }}
149 |                 />
150 |                 <Button
151 |                   type='primary'
152 |                   icon={<ArrowRightOutlined />}
153 |                   onClick={handleSearch}
154 |                   disabled={validationStatus !== 'success'}
155 |                 />
156 |               </Space.Compact>
157 |               {validationMessage && (
158 |                 <Text
159 |                   type={
160 |                     validationStatus === 'error'
161 |                       ? 'danger'
162 |                       : validationStatus === 'success'
163 |                       ? 'success'
164 |                       : 'secondary'
165 |                   }
166 |                 >
167 |                   {validationMessage}
168 |                 </Text>
169 |               )}
170 |             </Space>
171 |           </Card>
172 |         </div>
173 | 
174 |         {isLoading && (
175 |           <div style={{ textAlign: 'center', padding: '40px' }}>
176 |             <Spin size='large' />
177 |           </div>
178 |         )}
179 |       </Space>
180 |     </div>
181 |   );
182 | };
183 | 
184 | export default HomePage;
185 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/backend/background_tasks.py:
--------------------------------------------------------------------------------
  1 | """
  2 | FastAPI async background tasks
  3 | docs: https://fastapi.tiangolo.com/tutorial/background-tasks/
  4 | """
  5 | 
  6 | import logging
  7 | from typing import Dict, List
  8 | 
  9 | import numpy as np
 10 | import psutil
 11 | from lerobot.datasets.dataset_tools import delete_episodes
 12 | from lerobot.datasets.lerobot_dataset import LeRobotDataset
 13 | 
 14 | from .models import CreateTaskStatus, DatasetLoadingStatus
 15 | from .state_store import StateStore
 16 | 
 17 | logger = logging.getLogger(__name__)
 18 | 
 19 | 
 20 | def get_process_memory_mb():
 21 |     """Get current process memory usage in MB."""
 22 |     process = psutil.Process()
 23 |     memory_info = process.memory_info()
 24 |     return round(memory_info.rss / (1024 * 1024), 2)
 25 | 
 26 | 
 27 | def load_dataset_task(repo_id: str, state_store: StateStore = None):
 28 |     """
 29 |     Background task to load dataset
 30 | 
 31 |     Args:
 32 |         repo_id: The repository ID of the dataset to load
 33 |         state_store: StateStore instance for state management
 34 |     """
 35 | 
 36 |     try:
 37 |         memory_before = get_process_memory_mb()
 38 |         logger.info(f"Memory before loading {repo_id}: {memory_before} MB")
 39 | 
 40 |         state_store.set_loading_status(
 41 |             repo_id,
 42 |             DatasetLoadingStatus(progress=0.3, message=f"Downloading dataset {repo_id}..."),
 43 |         )
 44 | 
 45 |         dataset = LeRobotDataset(repo_id)
 46 |         state_store.cache_dataset(repo_id, dataset)
 47 | 
 48 |         memory_after = get_process_memory_mb()
 49 |         memory_used = np.around(memory_after - memory_before, 2).item()
 50 |         logger.info(f"Memory after loading {repo_id}: {memory_after} MB (used: {memory_used} MB)")
 51 | 
 52 |         state_store.set_loading_status(
 53 |             repo_id,
 54 |             DatasetLoadingStatus(
 55 |                 status="ready",
 56 |                 progress=1.0,
 57 |                 message="Dataset loaded successfully",
 58 |                 memory_usage_mb=memory_used,
 59 |             ),
 60 |         )
 61 | 
 62 |     except (FileNotFoundError, PermissionError) as e:
 63 |         state_store.set_loading_status(
 64 |             repo_id, DatasetLoadingStatus(status="error", message=f"File access error: {str(e)}")
 65 |         )
 66 |     except (ValueError, KeyError) as e:
 67 |         state_store.set_loading_status(
 68 |             repo_id, DatasetLoadingStatus(status="error", message=f"Invalid dataset format: {str(e)}")
 69 |         )
 70 |     except Exception as e:
 71 |         state_store.set_loading_status(
 72 |             repo_id, DatasetLoadingStatus(status="error", message=f"Failed to load dataset: {str(e)}")
 73 |         )
 74 |     finally:
 75 |         state_store.finish_loading(repo_id)
 76 | 
 77 | 
 78 | def create_dataset_task(
 79 |     task_id: str,
 80 |     original_repo_id: str,
 81 |     new_repo_id: str,
 82 |     selected_episodes: List[int],
 83 |     episode_index_task_map: Dict[int, str],
 84 |     state_store: StateStore = None,
 85 | ):
 86 |     """Background task to create filtered dataset.
 87 | 
 88 |     Args:
 89 |         task_id: Unique task identifier
 90 |         original_repo_id: Source dataset repository ID
 91 |         new_repo_id: Target dataset repository ID
 92 |         selected_episodes: List of episode indices to include
 93 |         episode_index_task_map: Mapping of episode indices to tasks
 94 |         state_store: StateStore instance for state management
 95 |     """
 96 | 
 97 |     try:
 98 |         state_store.set_creation_task(
 99 |             task_id,
100 |             CreateTaskStatus(
101 |                 task_id=task_id,
102 |                 status="running",
103 |                 progress=0.1,
104 |                 message=f"Starting to create dataset with {len(selected_episodes)} episodes...",
105 |                 new_repo_id=new_repo_id,
106 |             ),
107 |         )
108 | 
109 |         dataset = state_store.get_dataset(original_repo_id)
110 |         if not dataset:
111 |             raise ValueError(f"Dataset {original_repo_id} not found in cache")
112 | 
113 |         state_store.set_creation_task(
114 |             task_id,
115 |             CreateTaskStatus(
116 |                 task_id=task_id,
117 |                 status="running",
118 |                 progress=0.3,
119 |                 message="Filtering episodes...",
120 |                 new_repo_id=new_repo_id,
121 |             ),
122 |         )
123 | 
124 |         # Create filtered dataset by deleting unselected episodes
125 |         all_episodes = list(range(dataset.meta.total_episodes))
126 |         episodes_to_delete = [ep for ep in all_episodes if ep not in selected_episodes]
127 | 
128 |         if episodes_to_delete:
129 |             filtered_dataset = delete_episodes(
130 |                 dataset, episode_indices=episodes_to_delete, repo_id=new_repo_id
131 |             )
132 |         else:
133 |             # If no episodes to delete, we're keeping all episodes
134 |             # In this case, we need to copy the dataset with a new repo_id
135 |             # For now, we'll just use the original dataset
136 |             filtered_dataset = dataset
137 |             filtered_dataset.repo_id = new_repo_id
138 | 
139 |         state_store.set_creation_task(
140 |             task_id,
141 |             CreateTaskStatus(
142 |                 task_id=task_id,
143 |                 status="running",
144 |                 progress=0.7,
145 |                 message="Pushing dataset to hub...",
146 |                 new_repo_id=new_repo_id,
147 |             ),
148 |         )
149 | 
150 |         # TODO: Handle episode_index_task_map for custom task assignments
151 |         # This might require using the add_feature API or updating metadata after creation
152 |         if episode_index_task_map:
153 |             logger.warning("Custom task assignment is not yet implemented with the new API")
154 | 
155 |         # Push to hub
156 |         filtered_dataset.push_to_hub(
157 |             license="apache-2.0",
158 |             tags=["LeRobot", "robotics"],
159 |         )
160 | 
161 |         state_store.set_creation_task(
162 |             task_id,
163 |             CreateTaskStatus(
164 |                 task_id=task_id,
165 |                 status="completed",
166 |                 progress=1.0,
167 |                 message=f"Successfully created dataset '{new_repo_id}'",
168 |                 new_repo_id=new_repo_id,
169 |             ),
170 |         )
171 | 
172 |     except Exception as e:
173 |         logger.error(f"Error creating dataset: {str(e)}", exc_info=True)
174 |         state_store.set_creation_task(
175 |             task_id,
176 |             CreateTaskStatus(
177 |                 task_id=task_id,
178 |                 status="failed",
179 |                 message=f"Error creating dataset: {str(e)}",
180 |                 new_repo_id=new_repo_id,
181 |             ),
182 |         )
183 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/VideoPlayer.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useRef, useEffect, useState } from 'react';
  2 | import {
  3 |   Card,
  4 |   Row,
  5 |   Col,
  6 |   Button,
  7 |   Space,
  8 |   Slider,
  9 |   Tooltip,
 10 |   Select,
 11 | } from 'antd';
 12 | import { PlayCircleOutlined, PauseCircleOutlined } from '@ant-design/icons';
 13 | 
 14 | interface VideoInfo {
 15 |   url: string;
 16 |   filename: string;
 17 |   language_instruction?: string[];
 18 | }
 19 | 
 20 | interface VideoPlayerProps {
 21 |   videos: VideoInfo[];
 22 |   episodeId: number;
 23 |   onTimeUpdate?: (time: number) => void;
 24 | }
 25 | 
 26 | const VideoPlayer: React.FC<VideoPlayerProps> = ({
 27 |   videos,
 28 |   episodeId,
 29 |   onTimeUpdate,
 30 | }) => {
 31 |   const videoRefs = useRef<(HTMLVideoElement | null)[]>([]);
 32 |   const [isPlaying, setIsPlaying] = useState(false);
 33 |   const [currentTime, setCurrentTime] = useState(0);
 34 |   const [duration, setDuration] = useState(0);
 35 |   const [isSeekingBySlider, setIsSeekingBySlider] = useState(false);
 36 |   const [playbackSpeed, setPlaybackSpeed] = useState(3.0); // Default to 3x speed
 37 | 
 38 |   // Speed options from 0.5x to 3x in 0.5x increments
 39 |   const speedOptions = [
 40 |     { label: '0.5x', value: 0.5 },
 41 |     { label: '1x', value: 1.0 },
 42 |     { label: '1.5x', value: 1.5 },
 43 |     { label: '2x', value: 2.0 },
 44 |     { label: '2.5x', value: 2.5 },
 45 |     { label: '3x', value: 3.0 },
 46 |   ];
 47 | 
 48 |   useEffect(() => {
 49 |     // Reset refs when videos change
 50 |     videoRefs.current = videoRefs.current.slice(0, videos.length);
 51 |   }, [videos]);
 52 | 
 53 |   // Set duration when first video loads and apply initial speed
 54 |   useEffect(() => {
 55 |     const checkDuration = () => {
 56 |       const firstVideo = videoRefs.current[0];
 57 |       if (firstVideo && firstVideo.duration) {
 58 |         setDuration(firstVideo.duration);
 59 |         // Apply initial playback speed
 60 |         videoRefs.current.forEach((video) => {
 61 |           if (video) {
 62 |             video.playbackRate = playbackSpeed;
 63 |           }
 64 |         });
 65 |       }
 66 |     };
 67 | 
 68 |     const interval = setInterval(checkDuration, 100);
 69 |     return () => clearInterval(interval);
 70 |   }, [videos, playbackSpeed]);
 71 | 
 72 |   // Update playback speed when changed
 73 |   useEffect(() => {
 74 |     videoRefs.current.forEach((video) => {
 75 |       if (video) {
 76 |         video.playbackRate = playbackSpeed;
 77 |       }
 78 |     });
 79 |   }, [playbackSpeed]);
 80 | 
 81 |   // Add keyboard event handler for spacebar
 82 |   useEffect(() => {
 83 |     const handleKeyPress = (e: KeyboardEvent) => {
 84 |       // Check if the target is an input element to avoid conflicts
 85 |       const target = e.target as HTMLElement;
 86 |       if (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA') {
 87 |         return;
 88 |       }
 89 | 
 90 |       // Spacebar key
 91 |       if (e.code === 'Space' || e.key === ' ') {
 92 |         e.preventDefault(); // Prevent page scroll
 93 | 
 94 |         // Inline play/pause logic to avoid dependency issues
 95 |         const allVideos = videoRefs.current.filter((v) => v !== null);
 96 |         const firstVideo = allVideos[0];
 97 | 
 98 |         if (firstVideo && !firstVideo.paused) {
 99 |           allVideos.forEach((video) => video?.pause());
100 |         } else {
101 |           allVideos.forEach((video) => video?.play());
102 |         }
103 |       }
104 |     };
105 | 
106 |     window.addEventListener('keydown', handleKeyPress);
107 |     return () => window.removeEventListener('keydown', handleKeyPress);
108 |   }, []); // Empty dependency array since we're not using external state
109 | 
110 |   const handleTimeUpdate = (e: React.SyntheticEvent<HTMLVideoElement>) => {
111 |     if (!isSeekingBySlider) {
112 |       const video = e.currentTarget;
113 |       setCurrentTime(video.currentTime);
114 |       if (video.duration && !isNaN(video.duration)) {
115 |         setDuration(video.duration);
116 |       }
117 |       if (onTimeUpdate) {
118 |         onTimeUpdate(video.currentTime);
119 |       }
120 |     }
121 |   };
122 | 
123 |   const handleSliderChange = (value: number) => {
124 |     setIsSeekingBySlider(true);
125 |     setCurrentTime(value);
126 | 
127 |     // Update all videos
128 |     videoRefs.current.forEach((video) => {
129 |       if (video) {
130 |         video.currentTime = value;
131 |       }
132 |     });
133 | 
134 |     if (onTimeUpdate) {
135 |       onTimeUpdate(value);
136 |     }
137 | 
138 |     // Reset seeking flag after a short delay
139 |     setTimeout(() => setIsSeekingBySlider(false), 100);
140 |   };
141 | 
142 |   const handlePlayPause = () => {
143 |     const allVideos = videoRefs.current.filter((v) => v !== null);
144 | 
145 |     if (isPlaying) {
146 |       allVideos.forEach((video) => video?.pause());
147 |       setIsPlaying(false);
148 |     } else {
149 |       allVideos.forEach((video) => video?.play());
150 |       setIsPlaying(true);
151 |     }
152 |   };
153 | 
154 |   const handleStop = () => {
155 |     const allVideos = videoRefs.current.filter((v) => v !== null);
156 |     allVideos.forEach((video) => {
157 |       if (video) {
158 |         video.pause();
159 |         video.currentTime = 0;
160 |       }
161 |     });
162 |     setIsPlaying(false);
163 |     setCurrentTime(0);
164 |     if (onTimeUpdate) {
165 |       onTimeUpdate(0);
166 |     }
167 |   };
168 | 
169 |   const syncVideos = (index: number) => {
170 |     if (!isSeekingBySlider) {
171 |       const sourceVideo = videoRefs.current[index];
172 |       if (sourceVideo) {
173 |         videoRefs.current.forEach((video, i) => {
174 |           if (
175 |             video &&
176 |             i !== index &&
177 |             Math.abs(video.currentTime - sourceVideo.currentTime) > 0.1
178 |           ) {
179 |             video.currentTime = sourceVideo.currentTime;
180 |           }
181 |         });
182 |       }
183 |     }
184 |   };
185 | 
186 |   const handleSpeedChange = (speed: number) => {
187 |     setPlaybackSpeed(speed);
188 |   };
189 | 
190 |   return (
191 |     <Card
192 |       title={
193 |         <div
194 |           style={{
195 |             display: 'flex',
196 |             justifyContent: 'space-between',
197 |             alignItems: 'center',
198 |           }}
199 |         >
200 |           <span>Episode {episodeId} Videos</span>
201 |         </div>
202 |       }
203 |       extra={
204 |         <Space>
205 |           <Tooltip title='Press spacebar to play/pause'>
206 |             <Button
207 |               type='primary'
208 |               icon={
209 |                 isPlaying ? <PauseCircleOutlined /> : <PlayCircleOutlined />
210 |               }
211 |               onClick={handlePlayPause}
212 |             >
213 |               {isPlaying ? 'Pause' : 'Play'}
214 |             </Button>
215 |           </Tooltip>
216 |           <Button onClick={handleStop}>Stop</Button>
217 |           <Select
218 |             value={playbackSpeed}
219 |             onChange={handleSpeedChange}
220 |             options={speedOptions}
221 |             style={{ width: 80 }}
222 |             size='small'
223 |           />
224 |           <span style={{ color: 'rgba(255, 255, 255, 0.65)' }}>
225 |             {currentTime.toFixed(1)}s / {duration.toFixed(1)}s
226 |           </span>
227 |         </Space>
228 |       }
229 |     >
230 |       <Row gutter={[16, 16]}>
231 |         {videos.map((video, index) => (
232 |           <Col key={index} span={8}>
233 |             <div style={{ position: 'relative' }}>
234 |               <video
235 |                 ref={(el) => {
236 |                   videoRefs.current[index] = el;
237 |                 }}
238 |                 src={video.url}
239 |                 controls={false}
240 |                 style={{ width: '100%', height: 'auto' }}
241 |                 onTimeUpdate={(e) => {
242 |                   handleTimeUpdate(e);
243 |                   syncVideos(index);
244 |                 }}
245 |                 onLoadedMetadata={(e) => {
246 |                   const video = e.currentTarget;
247 |                   if (video.duration && !isNaN(video.duration)) {
248 |                     setDuration(video.duration);
249 |                   }
250 |                   // Apply current playback speed to newly loaded video
251 |                   video.playbackRate = playbackSpeed;
252 |                 }}
253 |                 onPlay={() => setIsPlaying(true)}
254 |                 onPause={() => setIsPlaying(false)}
255 |               />
256 |               <div
257 |                 style={{
258 |                   position: 'absolute',
259 |                   bottom: 0,
260 |                   left: 0,
261 |                   right: 0,
262 |                   background: 'rgba(0, 0, 0, 0.7)',
263 |                   padding: '4px 8px',
264 |                   fontSize: '12px',
265 |                   color: 'white',
266 |                 }}
267 |               >
268 |                 {video.filename}
269 |               </div>
270 |             </div>
271 |           </Col>
272 |         ))}
273 |       </Row>
274 | 
275 |       <div style={{ marginTop: '16px', padding: '0 8px' }}>
276 |         <Slider
277 |           min={0}
278 |           max={duration || 100}
279 |           value={currentTime}
280 |           step={0.1}
281 |           onChange={handleSliderChange}
282 |           tooltip={{
283 |             formatter: (value) => `${(value || 0).toFixed(1)}s`,
284 |           }}
285 |         />
286 |       </div>
287 |     </Card>
288 |   );
289 | };
290 | 
291 | export default VideoPlayer;
292 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/backend/main.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | import uuid
  4 | from contextlib import asynccontextmanager
  5 | 
  6 | import requests
  7 | import uvicorn
  8 | from fastapi import BackgroundTasks, Depends, FastAPI, HTTPException, Query, status as http_status
  9 | from fastapi.middleware.cors import CORSMiddleware
 10 | from fastapi.responses import FileResponse
 11 | from huggingface_hub import HfApi
 12 | from lerobot import available_datasets
 13 | from lerobot.datasets.lerobot_dataset import LeRobotDataset
 14 | from lerobot.utils.utils import init_logging
 15 | 
 16 | from .background_tasks import create_dataset_task, load_dataset_task
 17 | from .models import (
 18 |     CreateDatasetRequest,
 19 |     CreateDatasetResponse,
 20 |     CreateTaskStatus,
 21 |     DatasetInfo,
 22 |     DatasetListResponse,
 23 |     DatasetLoadingStatus,
 24 |     DatasetSearchResponse,
 25 |     DatasetValidationResponse,
 26 |     EpisodeData,
 27 |     VideoInfo,
 28 | )
 29 | from .state_store import StateStore, get_state_store
 30 | from .utils import get_episode_data
 31 | 
 32 | init_logging()
 33 | logger = logging.getLogger(__name__)
 34 | 
 35 | 
 36 | def check_repo_id(repo_id: str) -> None:
 37 |     """
 38 |     Validate that a repo_id follows the HuggingFace format: namespace/name.
 39 | 
 40 |     Args:
 41 |         repo_id: The repository ID to validate.
 42 | 
 43 |     Raises:
 44 |         ValueError: If the repo_id format is invalid.
 45 |     """
 46 |     if not repo_id or not isinstance(repo_id, str):
 47 |         raise ValueError("repo_id must be a non-empty string")
 48 | 
 49 |     parts = repo_id.split("/")
 50 |     if len(parts) != 2:
 51 |         raise ValueError(f"Invalid repo_id format: '{repo_id}'. Expected format: 'namespace/name'")
 52 | 
 53 |     namespace, name = parts
 54 |     if not namespace or not name:
 55 |         raise ValueError(f"Invalid repo_id format: '{repo_id}'. Both namespace and name must be non-empty")
 56 | 
 57 |     # Validate characters (HuggingFace allows alphanumeric, hyphens, underscores, and dots)
 58 |     pattern = r"^[a-zA-Z0-9._-]+$"
 59 |     if not re.match(pattern, namespace):
 60 |         raise ValueError(
 61 |             f"Invalid namespace in repo_id: '{namespace}'. "
 62 |             "Only alphanumeric characters, dots, hyphens, and underscores are allowed"
 63 |         )
 64 |     if not re.match(pattern, name):
 65 |         raise ValueError(
 66 |             f"Invalid name in repo_id: '{name}'. "
 67 |             "Only alphanumeric characters, dots, hyphens, and underscores are allowed"
 68 |         )
 69 | 
 70 | 
 71 | @asynccontextmanager
 72 | async def lifespan(app: FastAPI):
 73 |     logger.info("Starting LeRobot Data Studio API")
 74 |     state_store = get_state_store()
 75 |     yield
 76 |     logger.info("Shutting down LeRobot Data Studio API")
 77 |     state_store.clear_loading_tasks()
 78 | 
 79 | 
 80 | app = FastAPI(title="LeRobot Data Studio API", version="1.0.0", lifespan=lifespan)
 81 | 
 82 | app.add_middleware(
 83 |     CORSMiddleware,
 84 |     allow_origins=[
 85 |         "http://localhost:3000",
 86 |         "http://localhost:3001",
 87 |         "http://localhost:5173",
 88 |     ],  # React dev servers
 89 |     allow_credentials=True,
 90 |     allow_methods=["*"],
 91 |     allow_headers=["*"],
 92 | )
 93 | 
 94 | 
 95 | @app.get("/api/datasets", response_model=DatasetListResponse)
 96 | async def list_datasets():
 97 |     """Get list of available datasets."""
 98 |     featured_datasets = [
 99 |         "lerobot/svla_so100_sorting",
100 |         "lerobot/svla_so100_stacking",
101 |         "jackvial/screwdriver-391",
102 |     ]
103 |     return DatasetListResponse(featured_datasets=featured_datasets, lerobot_datasets=available_datasets)
104 | 
105 | 
106 | @app.get("/api/datasets/{dataset_namespace}/{dataset_name}/status")
107 | async def get_dataset_status(
108 |     dataset_namespace: str,
109 |     dataset_name: str,
110 |     background_tasks: BackgroundTasks,
111 |     state_store: StateStore = Depends(get_state_store),
112 |     auto_load: bool = Query(False, description="Automatically start loading if not loaded"),
113 | ):
114 |     """Get dataset loading status."""
115 |     repo_id = f"{dataset_namespace}/{dataset_name}"
116 |     loading_status = state_store.get_loading_status(repo_id)
117 |     if loading_status:
118 |         logger.info(f"Found loading status for {repo_id}: {loading_status.status}")
119 |         return loading_status
120 |     elif state_store.is_dataset_cached(repo_id):
121 |         return DatasetLoadingStatus(status="ready", progress=1.0)
122 |     else:
123 |         if auto_load:
124 |             if not state_store.is_dataset_loading(repo_id):
125 |                 logger.info(f"Auto-loading dataset: {repo_id}")
126 |                 state_store.start_loading(repo_id)
127 |                 state_store.set_loading_status(
128 |                     repo_id, DatasetLoadingStatus(status="loading", message="Starting to load dataset...")
129 |                 )
130 |                 background_tasks.add_task(load_dataset_task, repo_id, state_store)
131 |                 return state_store.get_loading_status(repo_id)
132 | 
133 |             logger.info(f"Dataset {repo_id} is already being loaded")
134 |             return state_store.get_loading_status(repo_id) or DatasetLoadingStatus(
135 |                 status="loading", progress=0.0
136 |             )
137 | 
138 |         return DatasetLoadingStatus(status="not_loaded")
139 | 
140 | 
141 | @app.get("/api/datasets/{dataset_namespace}/{dataset_name}/episodes/{episode_id}")
142 | async def get_episode(
143 |     dataset_namespace: str,
144 |     dataset_name: str,
145 |     episode_id: int,
146 |     background_tasks: BackgroundTasks,
147 |     state_store: StateStore = Depends(get_state_store),
148 | ):
149 |     """Get episode data for a specific dataset and episode."""
150 |     repo_id = f"{dataset_namespace}/{dataset_name}"
151 |     logger.info(f"Getting episode {episode_id} for dataset: {repo_id}")
152 | 
153 |     if not state_store.is_dataset_cached(repo_id):
154 |         if state_store.is_dataset_loading(repo_id):
155 |             logger.info(f"Dataset {repo_id} is already being loaded")
156 |         else:
157 |             logger.info(f"Dataset not in cache, starting background load: {repo_id}")
158 |             state_store.start_loading(repo_id)
159 |             state_store.set_loading_status(
160 |                 repo_id,
161 |                 DatasetLoadingStatus(status="loading", message="Starting to load dataset..."),
162 |             )
163 |             background_tasks.add_task(load_dataset_task, repo_id, state_store)
164 | 
165 |         raise HTTPException(
166 |             status_code=http_status.HTTP_202_ACCEPTED, detail="Dataset is being loaded. Please check status."
167 |         )
168 | 
169 |     dataset = state_store.get_dataset(repo_id)
170 |     if not dataset:
171 |         raise HTTPException(status_code=404, detail=f"Dataset {repo_id} not found")
172 | 
173 |     if episode_id < 0 or episode_id >= dataset.num_episodes:
174 |         raise HTTPException(status_code=404, detail=f"Episode {episode_id} not found")
175 | 
176 |     episode_data_items, feature_names = get_episode_data(dataset, episode_id)
177 | 
178 |     dataset_info = DatasetInfo(
179 |         repo_id=repo_id,
180 |         num_samples=dataset.num_frames,
181 |         num_episodes=dataset.num_episodes,
182 |         fps=dataset.fps,
183 |         version=str(getattr(dataset.meta, "version", getattr(dataset.meta, "_version", None))),
184 |     )
185 | 
186 |     video_paths = [dataset.meta.get_video_file_path(episode_id, key) for key in dataset.meta.video_keys]
187 |     videos_info = [
188 |         VideoInfo(
189 |             url=f"/api/videos/{repo_id}/{str(video_path)}",
190 |             filename=f"{video_path.parent.parent.name} ({video_path.parent.name})",
191 |         )
192 |         for video_path in video_paths
193 |     ]
194 |     tasks = dataset.meta.episodes[episode_id]["tasks"]
195 | 
196 |     if videos_info:
197 |         videos_info[0].language_instruction = tasks
198 | 
199 |     return EpisodeData(
200 |         episode_id=episode_id,
201 |         dataset_info=dataset_info,
202 |         videos_info=videos_info,
203 |         episode_data=episode_data_items,
204 |         feature_names=feature_names,
205 |         # Used to visually sanity check indices are aligned
206 |         actual_episode_index=episode_data_items[0].episode_index,
207 |         tasks=tasks,
208 |     )
209 | 
210 | 
211 | @app.get("/api/videos/{dataset_namespace}/{dataset_name}/{video_path:path}")
212 | async def get_video(
213 |     dataset_namespace: str,
214 |     dataset_name: str,
215 |     video_path: str,
216 |     state_store: StateStore = Depends(get_state_store),
217 | ):
218 |     """Serve mp4 video file from the local copy of the dataset. Serving the video files on demand for the currently selected episode helps reduce memory usage.
219 |     The video files are usually not very long, usually tens of seconds at most."""
220 |     repo_id = f"{dataset_namespace}/{dataset_name}"
221 | 
222 |     if not state_store.is_dataset_cached(repo_id):
223 |         raise HTTPException(status_code=404, detail="Dataset not loaded")
224 | 
225 |     dataset = state_store.get_dataset(repo_id)
226 |     if not isinstance(dataset, LeRobotDataset):
227 |         raise HTTPException(status_code=400, detail="Video serving only available for local datasets")
228 | 
229 |     video_full_path = dataset.root / video_path
230 |     if not video_full_path.exists():
231 |         raise HTTPException(status_code=404, detail="Video not found")
232 | 
233 |     return FileResponse(video_full_path, media_type="video/mp4")
234 | 
235 | 
236 | @app.post("/api/datasets/create", response_model=CreateDatasetResponse)
237 | async def create_dataset(
238 |     request: CreateDatasetRequest,
239 |     background_tasks: BackgroundTasks,
240 |     state_store: StateStore = Depends(get_state_store),
241 | ):
242 |     """Create a new dataset from selected episodes."""
243 |     try:
244 |         check_repo_id(request.new_repo_id)
245 |     except ValueError as e:
246 |         raise HTTPException(status_code=400, detail=str(e)) from e
247 | 
248 |     if not state_store.is_dataset_cached(request.original_repo_id):
249 |         raise HTTPException(status_code=400, detail="Original dataset must be loaded first")
250 | 
251 |     task_id = str(uuid.uuid4())
252 |     state_store.set_creation_task(
253 |         task_id,
254 |         CreateTaskStatus(
255 |             task_id=task_id,
256 |             status="pending",
257 |             message="Dataset creation task created, starting soon...",
258 |             new_repo_id=request.new_repo_id,
259 |         ),
260 |     )
261 | 
262 |     background_tasks.add_task(
263 |         create_dataset_task,
264 |         task_id,
265 |         request.original_repo_id,
266 |         request.new_repo_id,
267 |         request.selected_episodes,
268 |         request.episode_index_task_map,
269 |         state_store,
270 |     )
271 | 
272 |     return CreateDatasetResponse(
273 |         success=True,
274 |         new_repo_id=request.new_repo_id,
275 |         message=f"Dataset creation started with {len(request.selected_episodes)} episodes",
276 |         task_id=task_id,
277 |     )
278 | 
279 | 
280 | @app.get("/api/datasets/{dataset_namespace}/{dataset_name}/episodes")
281 | async def list_episodes(
282 |     dataset_namespace: str, dataset_name: str, state_store: StateStore = Depends(get_state_store)
283 | ) -> dict[str, list[int]]:
284 |     """List all episode IDs for a dataset."""
285 |     repo_id = f"{dataset_namespace}/{dataset_name}"
286 | 
287 |     if not state_store.is_dataset_cached(repo_id):
288 |         raise HTTPException(status_code=404, detail="Dataset not loaded")
289 | 
290 |     dataset = state_store.get_dataset(repo_id)
291 |     num_episodes = dataset.num_episodes if isinstance(dataset, LeRobotDataset) else dataset.total_episodes
292 | 
293 |     return {"episodes": list(range(num_episodes))}
294 | 
295 | 
296 | @app.post("/api/datasets/{dataset_namespace}/{dataset_name}/load")
297 | async def load_dataset(
298 |     dataset_namespace: str,
299 |     dataset_name: str,
300 |     background_tasks: BackgroundTasks,
301 |     state_store: StateStore = Depends(get_state_store),
302 | ):
303 |     """Trigger dataset loading."""
304 |     repo_id = f"{dataset_namespace}/{dataset_name}"
305 |     logger.info(f"Load request for dataset: {repo_id}")
306 | 
307 |     if state_store.is_dataset_cached(repo_id):
308 |         logger.info(f"Dataset already loaded: {repo_id}")
309 |         return {"status": "already_loaded", "message": "Dataset is already loaded"}
310 | 
311 |     if state_store.is_dataset_loading(repo_id):
312 |         logger.info(f"Dataset already loading: {repo_id}")
313 |         return {"status": "already_loading", "message": "Dataset is already being loaded"}
314 | 
315 |     state_store.start_loading(repo_id)
316 |     state_store.set_loading_status(
317 |         repo_id, DatasetLoadingStatus(status="loading", message="Starting to load dataset...")
318 |     )
319 |     background_tasks.add_task(load_dataset_task, repo_id, state_store)
320 | 
321 |     return {"status": "loading_started", "message": "Dataset loading has been started"}
322 | 
323 | 
324 | @app.get("/api/datasets/search", response_model=DatasetSearchResponse)
325 | async def search_datasets(prefix: str):
326 |     """Search datasets on HuggingFace Hub by prefix match."""
327 |     api = HfApi()
328 |     results = api.list_datasets(search=prefix)
329 |     repo_ids = [d.id for d in results]
330 |     return DatasetSearchResponse(repo_ids=repo_ids)
331 | 
332 | 
333 | @app.get("/api/datasets/user/{username}", response_model=DatasetSearchResponse)
334 | async def list_user_datasets(username: str):
335 |     """List datasets on HuggingFace Hub for a given user."""
336 |     api = HfApi()
337 |     results = api.list_datasets(author=username)
338 |     repo_ids = [d.id for d in results]
339 |     return DatasetSearchResponse(repo_ids=repo_ids)
340 | 
341 | 
342 | @app.get(
343 |     "/api/datasets/validate/{dataset_namespace}/{dataset_name}", response_model=DatasetValidationResponse
344 | )
345 | async def validate_dataset(dataset_namespace: str, dataset_name: str):
346 |     """Check if a dataset exists on HuggingFace Hub."""
347 |     repo_id = f"{dataset_namespace}/{dataset_name}"
348 |     api = HfApi()
349 | 
350 |     try:
351 |         api.dataset_info(repo_id)
352 |         return DatasetValidationResponse(exists=True)
353 |     except (ValueError, KeyError, requests.HTTPError) as e:
354 |         return DatasetValidationResponse(
355 |             exists=False, message=f"Dataset '{repo_id}' not found on HuggingFace Hub: {str(e)}"
356 |         )
357 |     except requests.RequestException as e:
358 |         return DatasetValidationResponse(
359 |             exists=False, message=f"Network error checking dataset '{repo_id}': {str(e)}"
360 |         )
361 |     except Exception as e:
362 |         logger.error(f"Unexpected error validating dataset {repo_id}: {str(e)}", exc_info=True)
363 |         return DatasetValidationResponse(exists=False, message=f"Error validating dataset '{repo_id}'")
364 | 
365 | 
366 | @app.get("/api/datasets/create/status/{task_id}", response_model=CreateTaskStatus)
367 | async def get_create_status(task_id: str, state_store: StateStore = Depends(get_state_store)):
368 |     """Get the status of a dataset creation task."""
369 |     creation_task = state_store.get_creation_task(task_id)
370 |     if not creation_task:
371 |         raise HTTPException(status_code=404, detail="Task not found")
372 |     return creation_task
373 | 
374 | 
375 | @app.get("/api/user/whoami")
376 | async def get_current_user():
377 |     """Get current HuggingFace user information."""
378 |     try:
379 |         api = HfApi()
380 |         user_info = api.whoami()
381 |         return {
382 |             "username": user_info["name"],
383 |             "fullname": user_info.get("fullname", ""),
384 |             "avatar_url": user_info.get("avatarUrl", ""),
385 |         }
386 |     except Exception as e:
387 |         logger.warning(f"Could not get user info: {e}")
388 |         return {"username": None, "error": "Not logged in to HuggingFace Hub"}
389 | 
390 | 
391 | if __name__ == "__main__":
392 |     uvicorn.run(app, host="0.0.0.0", port=8000)
393 | 


--------------------------------------------------------------------------------
/src/lerobot_data_studio/frontend/src/components/DatasetViewer.tsx:
--------------------------------------------------------------------------------
  1 | import React, { useState, useEffect, useCallback } from 'react';
  2 | import { useParams, useNavigate } from 'react-router-dom';
  3 | import {
  4 |   Layout,
  5 |   Spin,
  6 |   Alert,
  7 |   Space,
  8 |   Typography,
  9 |   Button,
 10 |   Modal,
 11 |   Form,
 12 |   Input,
 13 |   message,
 14 | } from 'antd';
 15 | import {
 16 |   PlusOutlined,
 17 |   QuestionCircleOutlined,
 18 |   HomeOutlined,
 19 | } from '@ant-design/icons';
 20 | import { useQuery, useMutation } from '@tanstack/react-query';
 21 | import { datasetApi } from '@/services/api';
 22 | import { useSelectedEpisodes } from '@/hooks/useSelectedEpisodes';
 23 | import { useVideoPreloader } from '@/hooks/useVideoPreloader';
 24 | import VideoPlayer from './VideoPlayer';
 25 | import DataChart from './DataChart';
 26 | import LoadingIndicator from './LoadingIndicator';
 27 | import EpisodeSidebar from './EpisodeSidebar';
 28 | import EpisodeIndexDisplay from './EpisodeIndexDisplay';
 29 | import EpisodeNavigation from './EpisodeNavigation';
 30 | import DatasetCompletionModal from './DatasetCompletionModal';
 31 | import { createDatasetRequest } from '@/utils/createDataset';
 32 | 
 33 | const { Header, Content, Sider } = Layout;
 34 | const { Title, Text } = Typography;
 35 | 
 36 | const DatasetViewer: React.FC = () => {
 37 |   const { namespace, name, episodeId } = useParams<{
 38 |     namespace: string;
 39 |     name: string;
 40 |     episodeId?: string;
 41 |   }>();
 42 |   const navigate = useNavigate();
 43 |   const [currentEpisodeId, setCurrentEpisodeId] = useState(
 44 |     episodeId ? parseInt(episodeId) : 0
 45 |   );
 46 |   const [isCreateModalVisible, setIsCreateModalVisible] = useState(false);
 47 |   const [isShortcutsModalVisible, setIsShortcutsModalVisible] = useState(false);
 48 |   const [currentVideoTime, setCurrentVideoTime] = useState(0);
 49 |   const [creationTaskId, setCreationTaskId] = useState<string | null>(null);
 50 |   const [creationStatus, setCreationStatus] = useState<any>(null);
 51 |   const [showStatusModal, setShowStatusModal] = useState(false);
 52 |   const [form] = Form.useForm();
 53 | 
 54 |   const datasetId = `${namespace}/${name}`;
 55 | 
 56 |   const {
 57 |     selectedEpisodes,
 58 |     toggleEpisode,
 59 |     clearSelection,
 60 |     selectAll,
 61 |     selectedCount,
 62 |   } = useSelectedEpisodes(datasetId);
 63 | 
 64 |   // Updated version to trigger auto-load
 65 |   const { data: status, isLoading: isStatusLoading } = useQuery({
 66 |     queryKey: ['datasetStatus', namespace, name],
 67 |     queryFn: async () => {
 68 |       // First check status without auto-load
 69 |       const initialStatus = await datasetApi.getDatasetStatus(
 70 |         namespace!,
 71 |         name!,
 72 |         false
 73 |       );
 74 | 
 75 |       // If not loaded, trigger auto-load
 76 |       if (initialStatus.status === 'not_loaded') {
 77 |         return datasetApi.getDatasetStatus(namespace!, name!, true);
 78 |       }
 79 | 
 80 |       return initialStatus;
 81 |     },
 82 |     enabled: !!namespace && !!name,
 83 |     refetchInterval: (query) => {
 84 |       const currentStatus = query.state.data;
 85 |       if (
 86 |         currentStatus?.status === 'loading' ||
 87 |         currentStatus?.status === 'not_loaded'
 88 |       ) {
 89 |         return 1000; // Poll every second while loading
 90 |       }
 91 |       return false; // Stop polling when ready or error
 92 |     },
 93 |   });
 94 | 
 95 |   // Load episode data only when dataset is ready
 96 |   const {
 97 |     data: episodeData,
 98 |     isLoading: isEpisodeLoading,
 99 |     error,
100 |   } = useQuery({
101 |     queryKey: ['episode', namespace, name, currentEpisodeId],
102 |     queryFn: () => datasetApi.getEpisode(namespace!, name!, currentEpisodeId),
103 |     enabled: !!namespace && !!name && status?.status === 'ready',
104 |     retry: (failureCount, error: any) => {
105 |       if (error?.response?.status === 202) {
106 |         // Dataset is being loaded, wait for status to update
107 |         return false; // Don't retry, wait for enabled condition
108 |       }
109 |       return failureCount < 2;
110 |     },
111 |     staleTime: 5 * 60 * 1000, // Consider data fresh for 5 minutes
112 |     gcTime: 10 * 60 * 1000, // Keep in cache for 10 minutes
113 |   });
114 | 
115 |   // Get list of all episodes
116 |   const { data: episodesList } = useQuery({
117 |     queryKey: ['episodes', namespace, name],
118 |     queryFn: () => datasetApi.listEpisodes(namespace!, name!),
119 |     enabled: !!namespace && !!name && episodeData != null,
120 |   });
121 | 
122 |   // Poll for creation status
123 |   useEffect(() => {
124 |     if (!creationTaskId) return;
125 | 
126 |     const pollStatus = async () => {
127 |       try {
128 |         const status = await datasetApi.getCreateStatus(creationTaskId);
129 |         setCreationStatus(status);
130 | 
131 |         if (status.status === 'completed') {
132 |           setCreationTaskId(null);
133 |           setIsCreateModalVisible(false);
134 |           form.resetFields();
135 |           clearSelection();
136 |           // Keep the modal open but with completed status
137 |         } else if (status.status === 'failed') {
138 |           setCreationTaskId(null);
139 |           // Keep status to show the error in modal
140 |         }
141 |       } catch (error) {
142 |         console.error('Error polling creation status:', error);
143 |       }
144 |     };
145 | 
146 |     const interval = setInterval(pollStatus, 2000); // Poll every 2 seconds
147 |     pollStatus(); // Initial poll
148 | 
149 |     return () => clearInterval(interval);
150 |   }, [creationTaskId, form, clearSelection]);
151 | 
152 |   // Create dataset mutation
153 |   const createDatasetMutation = useMutation({
154 |     mutationFn: datasetApi.createDataset,
155 |     onSuccess: (data) => {
156 |       if (data.task_id) {
157 |         setCreationTaskId(data.task_id);
158 |         setShowStatusModal(true);
159 |         message.info('Dataset creation started. Please wait...');
160 |       } else {
161 |         message.success(data.message);
162 |         setIsCreateModalVisible(false);
163 |         form.resetFields();
164 |         clearSelection();
165 |       }
166 |     },
167 |     onError: (error: any) => {
168 |       console.error('Create dataset error:', error);
169 |       const errorMessage =
170 |         error.response?.data?.detail || 'Failed to create dataset';
171 | 
172 |       // If detail is an array of validation errors, format them nicely
173 |       if (Array.isArray(error.response?.data?.detail)) {
174 |         const validationErrors = error.response.data.detail
175 |           .map((err: any) => `${err.loc.join('.')}: ${err.msg}`)
176 |           .join('\n');
177 |         message.error(`Validation errors:\n${validationErrors}`);
178 |       } else {
179 |         message.error(errorMessage);
180 |       }
181 |     },
182 |   });
183 | 
184 |   // Video preloading
185 |   const getVideoUrl = useCallback(
186 |     (episodeId: number) => {
187 |       if (!episodeData?.videos_info?.[0]) return undefined;
188 |       // Construct the video URL for the episode
189 |       const videoInfo = episodeData.videos_info[0];
190 |       const baseUrl = videoInfo.url.substring(
191 |         0,
192 |         videoInfo.url.lastIndexOf('/')
193 |       );
194 |       return `${baseUrl}/episode_${episodeId}`;
195 |     },
196 |     [episodeData]
197 |   );
198 | 
199 |   const {} = useVideoPreloader(
200 |     currentEpisodeId,
201 |     episodeData?.dataset_info.num_episodes || 0,
202 |     getVideoUrl
203 |   );
204 | 
205 |   const handleEpisodeChange = (newEpisodeId: number) => {
206 |     setCurrentEpisodeId(newEpisodeId);
207 |   };
208 | 
209 |   // Update URL when episode changes
210 |   useEffect(() => {
211 |     if (namespace && name && currentEpisodeId !== parseInt(episodeId || '0')) {
212 |       navigate(`/${namespace}/${name}/episode/${currentEpisodeId}`, {
213 |         replace: true,
214 |       });
215 |     }
216 |   }, [currentEpisodeId, namespace, name, episodeId, navigate]);
217 | 
218 |   // Add keyboard shortcuts for navigation and selection
219 |   useEffect(() => {
220 |     const handleKeyDown = (e: KeyboardEvent) => {
221 |       // Check if the target is an input element to avoid conflicts
222 |       const target = e.target as HTMLElement;
223 |       if (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA') {
224 |         return;
225 |       }
226 | 
227 |       // Left arrow - previous episode
228 |       if (e.key === 'ArrowLeft') {
229 |         e.preventDefault();
230 |         if (currentEpisodeId > 0) {
231 |           handleEpisodeChange(currentEpisodeId - 1);
232 |         }
233 |       }
234 |       // Right arrow - next episode
235 |       else if (e.key === 'ArrowRight') {
236 |         e.preventDefault();
237 |         if (
238 |           episodeData &&
239 |           currentEpisodeId < episodeData.dataset_info.num_episodes - 1
240 |         ) {
241 |           handleEpisodeChange(currentEpisodeId + 1);
242 |         }
243 |       }
244 |       // Cmd+K (Mac) or Ctrl+K (Windows/Linux) - toggle checkbox
245 |       else if (e.key === 'k' && (e.metaKey || e.ctrlKey)) {
246 |         e.preventDefault();
247 |         toggleEpisode(currentEpisodeId);
248 |       }
249 |       // Cmd+P (Mac) or Ctrl+P (Windows/Linux) - show shortcuts
250 |       else if (e.key === 'p' && (e.metaKey || e.ctrlKey)) {
251 |         e.preventDefault();
252 |         setIsShortcutsModalVisible(true);
253 |       }
254 |     };
255 | 
256 |     window.addEventListener('keydown', handleKeyDown);
257 |     return () => window.removeEventListener('keydown', handleKeyDown);
258 |   }, [currentEpisodeId, episodeData, handleEpisodeChange, toggleEpisode]);
259 | 
260 |   const handleCreateDataset = async (values: any) => {
261 |     if (!episodeData) {
262 |       message.error('Episode data not loaded');
263 |       return;
264 |     }
265 | 
266 |     if (!selectedEpisodes || selectedEpisodes.length === 0) {
267 |       message.error('No episodes selected');
268 |       return;
269 |     }
270 | 
271 |     const payload = createDatasetRequest({
272 |       datasetId,
273 |       newRepoId: values.new_repo_id,
274 |       selectedEpisodes,
275 |     });
276 | 
277 |     await createDatasetMutation.mutateAsync(payload);
278 |   };
279 | 
280 |   // Show loading if we're checking status or status is loading
281 |   if (isStatusLoading || status?.status === 'loading') {
282 |     const loadingMessage =
283 |       status?.status === 'loading'
284 |         ? status.message || 'Loading dataset...'
285 |         : 'Checking dataset status...';
286 |     const progress = status?.status === 'loading' ? status.progress : 0.1;
287 | 
288 |     return <LoadingIndicator message={loadingMessage} progress={progress} />;
289 |   }
290 | 
291 |   // Show error if status is error
292 |   if (status?.status === 'error') {
293 |     return (
294 |       <div style={{ padding: '40px', textAlign: 'center' }}>
295 |         <Alert
296 |           type='error'
297 |           message='Failed to load dataset'
298 |           description={status.message}
299 |           showIcon
300 |         />
301 |         <Button
302 |           type='primary'
303 |           onClick={() => navigate('/')}
304 |           style={{ marginTop: '16px' }}
305 |         >
306 |           Back to Home
307 |         </Button>
308 |       </div>
309 |     );
310 |   }
311 | 
312 |   // Show error if episode loading failed (not 202)
313 |   if (
314 |     error &&
315 |     (error as any).response?.status !== 202 &&
316 |     (error as any).message !== 'Dataset not ready'
317 |   ) {
318 |     return (
319 |       <div style={{ padding: '40px', textAlign: 'center' }}>
320 |         <Alert
321 |           type='error'
322 |           message='Error loading episode'
323 |           description={(error as any).message}
324 |           showIcon
325 |         />
326 |       </div>
327 |     );
328 |   }
329 | 
330 |   return (
331 |     <Layout style={{ height: '100vh' }}>
332 |       <Header style={{ padding: '0 24px' }}>
333 |         <div
334 |           style={{
335 |             display: 'flex',
336 |             alignItems: 'center',
337 |             justifyContent: 'space-between',
338 |             height: '100%',
339 |           }}
340 |         >
341 |           <div
342 |             style={{
343 |               display: 'flex',
344 |               alignItems: 'center',
345 |               gap: '16px',
346 |               flex: 1,
347 |               minWidth: 0,
348 |             }}
349 |           >
350 |             <Button
351 |               icon={<HomeOutlined />}
352 |               onClick={() => navigate('/')}
353 |               title='Back to Home'
354 |             >
355 |               Home
356 |             </Button>
357 |             <div style={{ minWidth: 0, flex: '0 0 auto' }}>
358 |               <Title
359 |                 level={4}
360 |                 style={{
361 |                   margin: 0,
362 |                   whiteSpace: 'nowrap',
363 |                   overflow: 'hidden',
364 |                   textOverflow: 'ellipsis',
365 |                 }}
366 |               >
367 |                 {datasetId}
368 |               </Title>
369 |             </div>
370 |             {episodeData && (
371 |               <>
372 |                 <div style={{ minWidth: 0, flex: '0 0 auto' }}>
373 |                   <Text type='secondary' style={{ whiteSpace: 'nowrap' }}>
374 |                     {episodeData.dataset_info.num_episodes} episodes •{' '}
375 |                     {episodeData.dataset_info.fps} FPS
376 |                   </Text>
377 |                 </div>
378 |                 <div style={{ minWidth: 0, flex: '0 0 auto' }}>
379 |                   <EpisodeIndexDisplay
380 |                     currentEpisodeId={currentEpisodeId}
381 |                     actualEpisodeIndex={episodeData?.actual_episode_index}
382 |                   />
383 |                 </div>
384 |               </>
385 |             )}
386 |           </div>
387 |           <Space>
388 |             {selectedCount > 0 && (
389 |               <Button
390 |                 type='primary'
391 |                 icon={<PlusOutlined />}
392 |                 onClick={() => setIsCreateModalVisible(true)}
393 |               >
394 |                 Create Dataset ({selectedCount} episodes)
395 |               </Button>
396 |             )}
397 |             <Button
398 |               icon={<QuestionCircleOutlined />}
399 |               onClick={() => setIsShortcutsModalVisible(true)}
400 |               title='Keyboard Shortcuts (Cmd+P)'
401 |             >
402 |               Shortcuts
403 |             </Button>
404 |           </Space>
405 |         </div>
406 |       </Header>
407 | 
408 |       <Layout>
409 |         <Sider width={300} style={{ overflow: 'auto' }}>
410 |           {episodesList && (
411 |             <EpisodeSidebar
412 |               episodes={episodesList.episodes}
413 |               selectedEpisodes={selectedEpisodes}
414 |               currentEpisodeId={currentEpisodeId}
415 |               onToggleEpisode={toggleEpisode}
416 |               onSelectAll={() => selectAll(episodesList.episodes)}
417 |               onClearSelection={clearSelection}
418 |               onEpisodeClick={handleEpisodeChange}
419 |             />
420 |           )}
421 |         </Sider>
422 | 
423 |         <Content style={{ padding: '24px', overflow: 'auto' }}>
424 |           {(isEpisodeLoading || !episodeData) && status?.status === 'ready' ? (
425 |             <div style={{ textAlign: 'center', padding: '40px' }}>
426 |               <Spin size='large' />
427 |               <Text style={{ display: 'block', marginTop: '16px' }}>
428 |                 Loading episode {currentEpisodeId} data...
429 |               </Text>
430 |             </div>
431 |           ) : !episodeData && status?.status !== 'ready' ? (
432 |             <div style={{ textAlign: 'center', padding: '40px' }}>
433 |               <Spin size='large' />
434 |               <Text style={{ display: 'block', marginTop: '16px' }}>
435 |                 Waiting for dataset to load...
436 |               </Text>
437 |             </div>
438 |           ) : episodeData ? (
439 |             <Space direction='vertical' size='large' style={{ width: '100%' }}>
440 |               <EpisodeNavigation
441 |                 currentEpisodeId={currentEpisodeId}
442 |                 totalEpisodes={episodeData.dataset_info.num_episodes}
443 |                 onEpisodeChange={handleEpisodeChange}
444 |                 isPreloaded={() => false} // Can be improved with preloader state
445 |               />
446 | 
447 |               <VideoPlayer
448 |                 videos={episodeData.videos_info}
449 |                 episodeId={currentEpisodeId}
450 |                 onTimeUpdate={setCurrentVideoTime}
451 |               />
452 | 
453 |               <DataChart
454 |                 episodeData={episodeData.episode_data}
455 |                 featureNames={episodeData.feature_names}
456 |                 currentTime={currentVideoTime}
457 |               />
458 |             </Space>
459 |           ) : null}
460 |         </Content>
461 |       </Layout>
462 | 
463 |       <Modal
464 |         title='Create New Dataset'
465 |         open={isCreateModalVisible}
466 |         onCancel={() => setIsCreateModalVisible(false)}
467 |         footer={null}
468 |       >
469 |         <Form form={form} layout='vertical' onFinish={handleCreateDataset}>
470 |           <Form.Item
471 |             name='new_repo_id'
472 |             label='New Dataset Repository ID'
473 |             rules={[
474 |               { required: true, message: 'Please enter a repository ID' },
475 |               {
476 |                 pattern: /^[a-zA-Z0-9_-]+\/[a-zA-Z0-9_-]+$/,
477 |                 message: 'Must be in format: namespace/dataset-name',
478 |               },
479 |             ]}
480 |           >
481 |             <Input placeholder='e.g., myusername/my-dataset' />
482 |           </Form.Item>
483 |           <Form.Item>
484 |             <Text type='secondary'>
485 |               This will create a new dataset with {selectedCount} selected
486 |               episodes
487 |             </Text>
488 |           </Form.Item>
489 |           <Form.Item>
490 |             <Space>
491 |               <Button onClick={() => setIsCreateModalVisible(false)}>
492 |                 Cancel
493 |               </Button>
494 |               <Button
495 |                 type='primary'
496 |                 htmlType='submit'
497 |                 loading={createDatasetMutation.isPending}
498 |               >
499 |                 Create Dataset
500 |               </Button>
501 |             </Space>
502 |           </Form.Item>
503 |         </Form>
504 |       </Modal>
505 | 
506 |       {/* Keyboard Shortcuts Modal */}
507 |       <Modal
508 |         title='Keyboard Shortcuts'
509 |         open={isShortcutsModalVisible}
510 |         onCancel={() => setIsShortcutsModalVisible(false)}
511 |         footer={[
512 |           <Button key='close' onClick={() => setIsShortcutsModalVisible(false)}>
513 |             Close
514 |           </Button>,
515 |         ]}
516 |         width={500}
517 |       >
518 |         <Space direction='vertical' size='large' style={{ width: '100%' }}>
519 |           <div>
520 |             <Title level={5}>Navigation</Title>
521 |             <Space direction='vertical' style={{ width: '100%' }}>
522 |               <div style={{ display: 'flex', justifyContent: 'space-between' }}>
523 |                 <Text>Previous Episode</Text>
524 |                 <Text keyboard style={{ fontSize: '2em' }}>
525 |                   ←
526 |                 </Text>
527 |               </div>
528 |               <div style={{ display: 'flex', justifyContent: 'space-between' }}>
529 |                 <Text>Next Episode</Text>
530 |                 <Text keyboard style={{ fontSize: '2em' }}>
531 |                   →
532 |                 </Text>
533 |               </div>
534 |             </Space>
535 |           </div>
536 | 
537 |           <div>
538 |             <Title level={5}>Video Controls</Title>
539 |             <Space direction='vertical' style={{ width: '100%' }}>
540 |               <div style={{ display: 'flex', justifyContent: 'space-between' }}>
541 |                 <Text>Play/Pause Video</Text>
542 |                 <Text keyboard style={{ fontSize: '1.5em' }}>
543 |                   Space
544 |                 </Text>
545 |               </div>
546 |               <div style={{ display: 'flex', justifyContent: 'space-between' }}>
547 |                 <Text>Change Playback Speed</Text>
548 |                 <Text type='secondary' style={{ fontSize: '1em' }}>
549 |                   Use dropdown (0.5x - 3x)
550 |                 </Text>
551 |               </div>
552 |             </Space>
553 |           </div>
554 | 
555 |           <div>
556 |             <Title level={5}>Selection</Title>
557 |             <Space direction='vertical' style={{ width: '100%' }}>
558 |               <div style={{ display: 'flex', justifyContent: 'space-between' }}>
559 |                 <Text>Toggle Episode Selection</Text>
560 |                 <Text keyboard style={{ fontSize: '1.5em' }}>
561 |                   {navigator.platform.includes('Mac') ? 'Cmd' : 'Ctrl'}+K
562 |                 </Text>
563 |               </div>
564 |             </Space>
565 |           </div>
566 | 
567 |           <div>
568 |             <Title level={5}>General</Title>
569 |             <Space direction='vertical' style={{ width: '100%' }}>
570 |               <div style={{ display: 'flex', justifyContent: 'space-between' }}>
571 |                 <Text>Show Keyboard Shortcuts</Text>
572 |                 <Text keyboard style={{ fontSize: '1.5em' }}>
573 |                   {navigator.platform.includes('Mac') ? 'Cmd' : 'Ctrl'}+P
574 |                 </Text>
575 |               </div>
576 |             </Space>
577 |           </div>
578 |         </Space>
579 |       </Modal>
580 | 
581 |       {/* Dataset Creation Status Modal */}
582 |       <DatasetCompletionModal
583 |         visible={showStatusModal}
584 |         onClose={() => {
585 |           setShowStatusModal(false);
586 |           setCreationStatus(null);
587 |         }}
588 |         status={
589 |           creationStatus
590 |             ? {
591 |                 status: creationStatus.status,
592 |                 progress: creationStatus.progress,
593 |                 message: creationStatus.message,
594 |                 repo_id: creationStatus.new_repo_id,
595 |               }
596 |             : undefined
597 |         }
598 |         title='Dataset Creation Status'
599 |         actionLabel='View New Dataset'
600 |       />
601 |     </Layout>
602 |   );
603 | };
604 | 
605 | export default DatasetViewer;
606 | 


--------------------------------------------------------------------------------