├── .claudeignore ├── media ├── homepage.png ├── dataset_editor.png └── merged_dataset_created.png ├── src └── lerobot_data_studio │ ├── backend │ ├── __init__.py │ ├── utils.py │ ├── models.py │ ├── state_store.py │ ├── background_tasks.py │ └── main.py │ ├── __init__.py │ └── frontend │ ├── tsconfig.node.json │ ├── index.html │ ├── vitest.config.ts │ ├── vite.config.ts │ ├── src │ ├── App.tsx │ ├── utils │ │ └── createDataset.ts │ ├── main.tsx │ ├── components │ │ ├── LoadingIndicator.tsx │ │ ├── EpisodeIndexDisplay.tsx │ │ ├── DatasetCompletionModal.tsx │ │ ├── EpisodeNavigation.tsx │ │ ├── DataChart.tsx │ │ ├── EpisodeSidebar.tsx │ │ ├── HomePage.tsx │ │ ├── VideoPlayer.tsx │ │ └── DatasetViewer.tsx │ ├── types │ │ └── index.ts │ ├── hooks │ │ ├── useVideoPreloader.ts │ │ └── useSelectedEpisodes.ts │ ├── index.css │ └── services │ │ └── api.ts │ ├── tsconfig.json │ ├── tests │ ├── createDatasetUtil.test.ts │ └── setup.ts │ └── package.json ├── scripts └── lint.sh ├── .github └── workflows │ └── test.yml ├── pytest.ini ├── LICENSE ├── run_dev.sh ├── mypy.ini ├── .gitignore ├── README.md └── pyproject.toml /.claudeignore: -------------------------------------------------------------------------------- 1 | .git -------------------------------------------------------------------------------- /media/homepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackvial/lerobot-data-studio/HEAD/media/homepage.png -------------------------------------------------------------------------------- /media/dataset_editor.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackvial/lerobot-data-studio/HEAD/media/dataset_editor.png -------------------------------------------------------------------------------- /src/lerobot_data_studio/backend/__init__.py: -------------------------------------------------------------------------------- 1 | """LeRobot Data Studio Backend Package""" 2 | __version__ = "0.1.0" 3 | -------------------------------------------------------------------------------- /media/merged_dataset_created.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/jackvial/lerobot-data-studio/HEAD/media/merged_dataset_created.png -------------------------------------------------------------------------------- /src/lerobot_data_studio/__init__.py: -------------------------------------------------------------------------------- 1 | """LeRobot Data Studio - LeRobot Data Studio - Unofficial LeRobot Dataset Editor""" 2 | 3 | __version__ = "0.1.0" 4 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "skipLibCheck": true, 5 | "module": "ESNext", 6 | "moduleResolution": "bundler", 7 | "allowSyntheticDefaultImports": true 8 | }, 9 | "include": ["vite.config.ts"] 10 | } -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | LeRobot Data Studio - Unofficial LeRobot Dataset Editor 9 | 10 | 11 | 12 |
13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/vitest.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vitest/config' 2 | import react from '@vitejs/plugin-react' 3 | import path from 'path' 4 | 5 | export default defineConfig({ 6 | plugins: [react()], 7 | test: { 8 | environment: 'jsdom', 9 | setupFiles: ['./tests/setup.ts'], 10 | include: ['tests/**/*.{test,spec}.{js,mjs,cjs,ts,mts,cts,jsx,tsx}'], 11 | globals: true, 12 | }, 13 | resolve: { 14 | alias: { 15 | '@': path.resolve(__dirname, './src'), 16 | }, 17 | }, 18 | }) -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/vite.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'vite' 2 | import react from '@vitejs/plugin-react' 3 | import path from 'path' 4 | 5 | // https://vitejs.dev/config/ 6 | export default defineConfig({ 7 | plugins: [react()], 8 | resolve: { 9 | alias: { 10 | '@': path.resolve(__dirname, './src'), 11 | }, 12 | }, 13 | server: { 14 | port: 3000, 15 | host: true, // Allow connections from outside localhost 16 | proxy: { 17 | '/api': { 18 | target: 'http://localhost:8000', 19 | changeOrigin: true, 20 | }, 21 | }, 22 | }, 23 | }) -------------------------------------------------------------------------------- /scripts/lint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Lint script for running Ruff checks 3 | 4 | set -e 5 | 6 | echo "🔍 Running Ruff linter..." 7 | 8 | # Default to checking src directory 9 | TARGET="${1:-./src}" 10 | 11 | # Check for private member access violations 12 | echo "Checking private member access (SLF001)..." 13 | uv run ruff check "$TARGET" --select SLF || true 14 | 15 | # Run full check 16 | echo -e "\nRunning full Ruff check..." 17 | uv run ruff check "$TARGET" 18 | 19 | # Show statistics 20 | echo -e "\nIssue summary:" 21 | uv run ruff check "$TARGET" --statistics 2>/dev/null | head -10 22 | 23 | echo -e "\n💡 To auto-fix issues, run: uv run ruff check $TARGET --fix" 24 | echo "💡 To format code, run: uv run ruff format $TARGET" -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { Routes, Route } from 'react-router-dom'; 2 | import { Layout } from 'antd'; 3 | import HomePage from './components/HomePage'; 4 | import DatasetViewer from './components/DatasetViewer'; 5 | 6 | const { Content } = Layout; 7 | 8 | function App() { 9 | return ( 10 | 11 | 12 | 13 | } /> 14 | } /> 15 | } /> 16 | 17 | 18 | 19 | ); 20 | } 21 | 22 | export default App; -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "useDefineForClassFields": true, 5 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 6 | "module": "ESNext", 7 | "skipLibCheck": true, 8 | 9 | /* Bundler mode */ 10 | "moduleResolution": "bundler", 11 | "allowImportingTsExtensions": true, 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "noEmit": true, 15 | "jsx": "react-jsx", 16 | 17 | /* Linting */ 18 | "strict": true, 19 | "noUnusedLocals": true, 20 | "noUnusedParameters": true, 21 | "noFallthroughCasesInSwitch": true, 22 | 23 | /* Path aliases */ 24 | "baseUrl": ".", 25 | "paths": { 26 | "@/*": ["src/*"] 27 | } 28 | }, 29 | "include": ["src"], 30 | "references": [{ "path": "./tsconfig.node.json" }] 31 | } -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | name: Test 2 | 3 | on: 4 | pull_request: 5 | branches: [ main ] 6 | types: [opened, synchronize, reopened] 7 | 8 | jobs: 9 | test: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | 15 | - name: Install nvm and Node.js 24 16 | run: | 17 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash 18 | export NVM_DIR="$HOME/.nvm" 19 | [ -s "$NVM_DIR/nvm.sh" ] && \. "$NVM_DIR/nvm.sh" 20 | nvm install 24 21 | nvm use 24 22 | echo "$NVM_DIR/versions/node/$(nvm version)/bin" >> $GITHUB_PATH 23 | 24 | - name: Install frontend dependencies 25 | run: | 26 | cd src/lerobot_data_studio/frontend 27 | npm install 28 | 29 | - name: Run frontend tests 30 | run: | 31 | cd src/lerobot_data_studio/frontend 32 | npm run test -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [tool:pytest] 2 | testpaths = tests 3 | python_files = test_*.py 4 | python_classes = Test* 5 | python_functions = test_* 6 | addopts = 7 | -v 8 | --tb=short 9 | --strict-markers 10 | --disable-warnings 11 | --color=yes 12 | --durations=10 13 | markers = 14 | requires_internet: marks tests as requiring internet connection 15 | slow: marks tests as slow (deselect with '-m "not slow"') 16 | integration: marks tests as integration tests 17 | filterwarnings = 18 | ignore::DeprecationWarning 19 | ignore::PendingDeprecationWarning 20 | ignore::FutureWarning 21 | ignore::UserWarning:torch.* 22 | ignore::UserWarning:torchvision.* 23 | ignore::UserWarning:transformers.* 24 | ignore::UserWarning:huggingface_hub.* 25 | log_cli = true 26 | log_cli_level = INFO 27 | log_cli_format = %(asctime)s [%(levelname)8s] %(name)s: %(message)s 28 | log_cli_date_format = %Y-%m-%d %H:%M:%S -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/utils/createDataset.ts: -------------------------------------------------------------------------------- 1 | export interface CreateDatasetParams { 2 | datasetId: string; 3 | newRepoId: string; 4 | selectedEpisodes: number[]; 5 | } 6 | 7 | /** 8 | * Build the CreateDatasetRequest payload for the backend. 9 | * 10 | */ 11 | export function createDatasetRequest({ 12 | datasetId, 13 | newRepoId, 14 | selectedEpisodes, 15 | }: CreateDatasetParams) { 16 | 17 | // Validate inputs 18 | if ( 19 | !datasetId || 20 | !newRepoId || 21 | !selectedEpisodes || 22 | selectedEpisodes.length === 0 23 | ) { 24 | throw new Error( 25 | `Invalid parameters: datasetId=${datasetId}, newRepoId=${newRepoId}, selectedEpisodes=${ 26 | selectedEpisodes?.length || 0 27 | }` 28 | ); 29 | } 30 | 31 | 32 | const payload = { 33 | original_repo_id: datasetId, 34 | new_repo_id: newRepoId, 35 | selected_episodes: selectedEpisodes, 36 | }; 37 | 38 | return payload; 39 | } 40 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/tests/createDatasetUtil.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from 'vitest'; 2 | import { createDatasetRequest } from '../src/utils/createDataset'; 3 | 4 | describe('createDatasetRequest', () => { 5 | it('should build payload with episode and available tasks', async () => { 6 | const datasetId = 'namespace/dataset'; 7 | const newRepoId = 'namespace/new-dataset'; 8 | const selectedEpisodes = [0, 1, 2]; 9 | 10 | const payload = createDatasetRequest({ 11 | datasetId, 12 | newRepoId, 13 | selectedEpisodes, 14 | }); 15 | 16 | expect(payload.original_repo_id).toBe(datasetId); 17 | expect(payload.new_repo_id).toBe(newRepoId); 18 | expect(payload.selected_episodes).toEqual(selectedEpisodes); 19 | }); 20 | 21 | it('should handle single episode selection', async () => { 22 | const payload = createDatasetRequest({ 23 | datasetId: 'namespace/dataset', 24 | newRepoId: 'namespace/new-dataset', 25 | selectedEpisodes: [5], 26 | }); 27 | 28 | expect(payload.selected_episodes).toEqual([5]); 29 | }); 30 | }); -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 Jack Vial 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/main.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import ReactDOM from 'react-dom/client'; 3 | import { BrowserRouter } from 'react-router-dom'; 4 | import { QueryClient, QueryClientProvider } from '@tanstack/react-query'; 5 | import { ConfigProvider, theme } from 'antd'; 6 | import App from './App'; 7 | import './index.css'; 8 | 9 | const queryClient = new QueryClient({ 10 | defaultOptions: { 11 | queries: { 12 | staleTime: 5 * 60 * 1000, // 5 minutes 13 | gcTime: 10 * 60 * 1000, // 10 minutes 14 | retry: 1, 15 | }, 16 | }, 17 | }); 18 | 19 | ReactDOM.createRoot(document.getElementById('root')!).render( 20 | 21 | 22 | 23 | 32 | 33 | 34 | 35 | 36 | 37 | ); -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/tests/setup.ts: -------------------------------------------------------------------------------- 1 | // Vitest setup file for React Testing Library and additional configuration 2 | import { vi } from 'vitest'; 3 | 4 | // Mock global objects that might be used in the codebase 5 | global.ResizeObserver = vi.fn().mockImplementation(() => ({ 6 | observe: vi.fn(), 7 | unobserve: vi.fn(), 8 | disconnect: vi.fn(), 9 | })); 10 | 11 | // Mock matchMedia 12 | Object.defineProperty(window, 'matchMedia', { 13 | writable: true, 14 | value: vi.fn().mockImplementation(query => ({ 15 | matches: false, 16 | media: query, 17 | onchange: null, 18 | addListener: vi.fn(), // deprecated 19 | removeListener: vi.fn(), // deprecated 20 | addEventListener: vi.fn(), 21 | removeEventListener: vi.fn(), 22 | dispatchEvent: vi.fn(), 23 | })), 24 | }); 25 | 26 | // Mock localStorage 27 | const localStorageMock = { 28 | getItem: vi.fn(), 29 | setItem: vi.fn(), 30 | removeItem: vi.fn(), 31 | clear: vi.fn(), 32 | }; 33 | Object.defineProperty(window, 'localStorage', { 34 | value: localStorageMock 35 | }); 36 | 37 | // Mock sessionStorage 38 | Object.defineProperty(window, 'sessionStorage', { 39 | value: localStorageMock 40 | }); -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/LoadingIndicator.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Spin, Progress, Typography } from 'antd'; 3 | import { LoadingOutlined } from '@ant-design/icons'; 4 | 5 | const { Title } = Typography; 6 | 7 | interface LoadingIndicatorProps { 8 | message?: string; 9 | progress?: number; 10 | } 11 | 12 | const LoadingIndicator: React.FC = ({ 13 | message, 14 | progress, 15 | }) => { 16 | const antIcon = ; 17 | 18 | return ( 19 |
20 |
21 | 22 | 23 | {message || 'Loading...'} 24 | 25 | {progress !== undefined && progress > 0 && progress < 1 && ( 26 | 31 | )} 32 |
33 |
34 | ); 35 | }; 36 | 37 | export default LoadingIndicator; 38 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/EpisodeIndexDisplay.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Typography } from 'antd'; 3 | 4 | const { Text } = Typography; 5 | 6 | interface EpisodeIndexDisplayProps { 7 | currentEpisodeId: number; 8 | actualEpisodeIndex?: number | null; 9 | } 10 | 11 | /** 12 | * Visually sanity check indices 13 | */ 14 | const EpisodeIndexDisplay: React.FC = ({ 15 | currentEpisodeId, 16 | actualEpisodeIndex, 17 | }) => { 18 | const hasIndexMismatch = 19 | actualEpisodeIndex !== null && actualEpisodeIndex !== currentEpisodeId; 20 | 21 | return ( 22 | 30 | {actualEpisodeIndex !== null ? ( 31 | <> 32 | selected_episode_index={currentEpisodeId}, row_episode_index= 33 | {actualEpisodeIndex} 34 | {hasIndexMismatch && ' ⚠️'} 35 | 36 | ) : ( 37 | <>selected_episode_index={currentEpisodeId}, content=loading... 38 | )} 39 | 40 | ); 41 | }; 42 | 43 | export default EpisodeIndexDisplay; 44 | -------------------------------------------------------------------------------- /run_dev.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Exit on error 4 | set -e 5 | 6 | # Skip Git LFS downloads (we don't need test artifacts) 7 | export GIT_LFS_SKIP_SMUDGE=1 8 | 9 | # Colors for output 10 | GREEN='\033[0;32m' 11 | BLUE='\033[0;34m' 12 | RED='\033[0;31m' 13 | NC='\033[0m' # No Color 14 | 15 | echo -e "${BLUE}Starting LeRobot Data Studio...${NC}" 16 | 17 | # Function to cleanup on exit 18 | cleanup() { 19 | echo -e "\n${BLUE}Shutting down servers...${NC}" 20 | # Kill all child processes 21 | pkill -P $$ || true 22 | exit 23 | } 24 | 25 | # Set up trap to cleanup on Ctrl+C 26 | trap cleanup INT TERM 27 | 28 | # Start backend server 29 | echo -e "${GREEN}Starting backend server...${NC}" 30 | uv run uvicorn lerobot_data_studio.backend.main:app --reload --host 0.0.0.0 --port 8000 & 31 | BACKEND_PID=$! 32 | 33 | # Wait a bit for backend to start 34 | sleep 2 35 | 36 | # Start frontend server 37 | echo -e "${GREEN}Starting frontend server...${NC}" 38 | cd src/lerobot_data_studio/frontend 39 | npm run build && npm run dev & 40 | FRONTEND_PID=$! 41 | cd ../../.. 42 | 43 | echo -e "${GREEN}LeRobot Data Studio is running!${NC}" 44 | echo -e "${BLUE}Backend API: http://localhost:8000${NC}" 45 | echo -e "${BLUE}Frontend UI: http://localhost:3000${NC}" 46 | echo -e "${RED}Press Ctrl+C to stop all servers${NC}" 47 | 48 | # Wait for both processes 49 | wait $BACKEND_PID $FRONTEND_PID -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | python_version = 3.10 3 | warn_return_any = True 4 | warn_unused_configs = True 5 | disallow_untyped_defs = True 6 | disallow_any_unimported = False 7 | no_implicit_optional = True 8 | check_untyped_defs = True 9 | warn_redundant_casts = True 10 | warn_unused_ignores = True 11 | warn_no_return = True 12 | warn_unreachable = True 13 | strict_equality = True 14 | 15 | # Allow gradual typing - start with less strict rules 16 | allow_untyped_calls = True 17 | allow_untyped_decorators = True 18 | allow_incomplete_defs = True 19 | 20 | # Ignore missing imports for packages without stubs 21 | [mypy-lerobot.*] 22 | ignore_missing_imports = True 23 | 24 | [mypy-huggingface_hub.*] 25 | ignore_missing_imports = True 26 | 27 | [mypy-datasets.*] 28 | ignore_missing_imports = True 29 | 30 | [mypy-uvicorn.*] 31 | ignore_missing_imports = True 32 | 33 | [mypy-fastapi.*] 34 | ignore_missing_imports = True 35 | 36 | [mypy-pydantic.*] 37 | ignore_missing_imports = True 38 | 39 | [mypy-numpy.*] 40 | ignore_missing_imports = True 41 | 42 | [mypy-pandas.*] 43 | ignore_missing_imports = True 44 | 45 | [mypy-psutil.*] 46 | ignore_missing_imports = True 47 | 48 | [mypy-requests.*] 49 | ignore_missing_imports = True 50 | 51 | [mypy-cv2.*] 52 | ignore_missing_imports = True 53 | 54 | [mypy-imageio.*] 55 | ignore_missing_imports = True 56 | 57 | [mypy-h5py.*] 58 | ignore_missing_imports = True 59 | 60 | [mypy-zarr.*] 61 | ignore_missing_imports = True -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/types/index.ts: -------------------------------------------------------------------------------- 1 | export interface DatasetInfo { 2 | repo_id: string; 3 | num_samples: number; 4 | num_episodes: number; 5 | fps: number; 6 | version?: string; 7 | } 8 | 9 | export interface VideoInfo { 10 | url: string; 11 | filename: string; 12 | language_instruction?: string[]; 13 | } 14 | 15 | export interface EpisodeData { 16 | episode_id: number; 17 | dataset_info: DatasetInfo; 18 | videos_info: VideoInfo[]; 19 | episode_data: Record[]; 20 | feature_names: string[]; 21 | tasks: string[]; 22 | actual_episode_index?: number | null; 23 | } 24 | 25 | export interface DatasetListResponse { 26 | featured_datasets: string[]; 27 | lerobot_datasets: string[]; 28 | } 29 | 30 | export interface CreateDatasetRequest { 31 | original_repo_id: string; 32 | new_repo_id: string; 33 | selected_episodes: number[]; 34 | episode_index_task_map?: Record; 35 | ui_custom_task_list?: string[]; 36 | } 37 | 38 | export interface CreateDatasetResponse { 39 | success: boolean; 40 | new_repo_id: string; 41 | message: string; 42 | task_id?: string; 43 | } 44 | 45 | export interface CreateTaskStatus { 46 | task_id: string; 47 | status: 'pending' | 'running' | 'completed' | 'failed'; 48 | progress?: number; 49 | message?: string; 50 | new_repo_id?: string; 51 | } 52 | 53 | export interface DatasetLoadingStatus { 54 | status: 'loading' | 'ready' | 'error' | 'not_loaded'; 55 | progress?: number; 56 | message?: string; 57 | } 58 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "lerobot-data-studio", 3 | "version": "0.1.0", 4 | "description": "LeRobot Data Studio - Unofficial LeRobot Dataset Editor", 5 | "private": true, 6 | "type": "module", 7 | "scripts": { 8 | "dev": "vite", 9 | "build": "tsc && vite build", 10 | "preview": "vite preview", 11 | "type-check": "tsc --noEmit", 12 | "test": "vitest run", 13 | "test:run": "vitest run", 14 | "test:watch": "vitest", 15 | "test:coverage": "vitest run --coverage" 16 | }, 17 | "keywords": [ 18 | "lerobot", 19 | "dataset", 20 | "editor", 21 | "unofficial" 22 | ], 23 | "author": "Jack Vial ", 24 | "license": "MIT", 25 | "dependencies": { 26 | "@ant-design/icons": "^5.2.6", 27 | "@tanstack/react-query": "^5.12.0", 28 | "antd": "^5.11.5", 29 | "axios": "^1.6.2", 30 | "date-fns": "^2.30.0", 31 | "dygraphs": "^2.2.1", 32 | "react": "^19.1.0", 33 | "react-dom": "^19.1.0", 34 | "react-player": "^2.13.0", 35 | "react-router-dom": "^6.20.0" 36 | }, 37 | "devDependencies": { 38 | "@types/dygraphs": "^2.1.10", 39 | "@types/react": "^19.0.2", 40 | "@types/react-dom": "^19.0.2", 41 | "@typescript-eslint/eslint-plugin": "^6.13.2", 42 | "@typescript-eslint/parser": "^6.13.2", 43 | "@vitejs/plugin-react": "^4.2.0", 44 | "@vitest/coverage-v8": "^1.0.0", 45 | "eslint": "^8.55.0", 46 | "eslint-plugin-react": "^7.33.2", 47 | "eslint-plugin-react-hooks": "^4.6.0", 48 | "jsdom": "^23.0.0", 49 | "typescript": "^5.3.2", 50 | "vite": "^5.0.6", 51 | "vitest": "^1.0.0" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/backend/utils.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the backend.""" 2 | 3 | import logging 4 | 5 | from lerobot.datasets.lerobot_dataset import LeRobotDataset 6 | 7 | from .models import EpisodeDataItem 8 | 9 | logger = logging.getLogger(__name__) 10 | 11 | 12 | def get_episode_data(dataset: LeRobotDataset, episode_index: int): 13 | """Extract episode data for display in the UI. 14 | 15 | Args: 16 | dataset: The LeRobotDataset to extract data from 17 | episode_index: The episode index to extract 18 | 19 | Returns: 20 | Tuple of (episode_data_items, feature_names) 21 | """ 22 | # Get episode boundaries from meta.episodes 23 | episode_info = dataset.meta.episodes[episode_index] 24 | from_idx = episode_info["dataset_from_index"] 25 | to_idx = episode_info["dataset_to_index"] 26 | data = dataset.hf_dataset.select(range(from_idx, to_idx)).select_columns( 27 | ["episode_index", "action", "observation.state", "timestamp"] 28 | ) 29 | 30 | episode_data_items = [] 31 | for sample in data: 32 | # Round action and observation values to 2 decimal places 33 | action_values = ( 34 | sample["action"].tolist() if hasattr(sample["action"], "tolist") else list(sample["action"]) 35 | ) 36 | action_rounded = [round(val, 2) for val in action_values] 37 | 38 | observation_values = ( 39 | sample["observation.state"].tolist() 40 | if hasattr(sample["observation.state"], "tolist") 41 | else list(sample["observation.state"]) 42 | ) 43 | observation_rounded = [round(val, 2) for val in observation_values] 44 | 45 | episode_data_items.append( 46 | EpisodeDataItem( 47 | episode_index=sample["episode_index"], 48 | action=action_rounded, 49 | observation=observation_rounded, 50 | timestamp=round(float(sample["timestamp"]), 2), 51 | ) 52 | ) 53 | 54 | return episode_data_items, dataset.features["observation.state"]["names"] 55 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/backend/models.py: -------------------------------------------------------------------------------- 1 | from typing import Dict, List, Optional 2 | 3 | from pydantic import BaseModel, Field 4 | 5 | 6 | class DatasetInfo(BaseModel): 7 | repo_id: str 8 | num_samples: int 9 | num_episodes: int 10 | fps: int 11 | version: Optional[str] = None 12 | 13 | 14 | class VideoInfo(BaseModel): 15 | url: str 16 | filename: str 17 | language_instruction: Optional[List[str]] = None 18 | 19 | 20 | class EpisodeDataItem(BaseModel): 21 | episode_index: int 22 | action: List[float] 23 | observation: List[float] 24 | timestamp: float 25 | 26 | 27 | class EpisodeData(BaseModel): 28 | episode_id: int 29 | dataset_info: DatasetInfo 30 | videos_info: List[VideoInfo] 31 | episode_data: List[EpisodeDataItem] 32 | feature_names: List[str] 33 | actual_episode_index: Optional[int] = None 34 | tasks: List[str] 35 | 36 | 37 | class DatasetListResponse(BaseModel): 38 | featured_datasets: List[str] 39 | lerobot_datasets: List[str] 40 | 41 | 42 | class CreateDatasetRequest(BaseModel): 43 | original_repo_id: str 44 | new_repo_id: str 45 | selected_episodes: List[int] = Field(..., min_length=1) 46 | 47 | # Episode ID -> Task name 48 | episode_index_task_map: Optional[Dict[int, str]] = None 49 | 50 | 51 | class CreateDatasetResponse(BaseModel): 52 | success: bool 53 | new_repo_id: str 54 | message: str 55 | task_id: Optional[str] = None 56 | 57 | 58 | class DatasetLoadingStatus(BaseModel): 59 | status: Optional[str] = None 60 | progress: Optional[float] = None 61 | message: Optional[str] = None 62 | memory_usage_mb: Optional[float] = None 63 | 64 | 65 | class DatasetSearchResponse(BaseModel): 66 | repo_ids: List[str] 67 | 68 | 69 | class DatasetValidationResponse(BaseModel): 70 | exists: bool 71 | message: Optional[str] = None 72 | 73 | 74 | class CreateTaskStatus(BaseModel): 75 | task_id: Optional[str] = None 76 | status: Optional[str] = None 77 | progress: Optional[float] = None 78 | message: Optional[str] = None 79 | new_repo_id: Optional[str] = None 80 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/hooks/useVideoPreloader.ts: -------------------------------------------------------------------------------- 1 | import { useEffect, useRef } from 'react'; 2 | 3 | export const useVideoPreloader = ( 4 | currentEpisodeId: number, 5 | totalEpisodes: number, 6 | getVideoUrl: (episodeId: number) => string | undefined, 7 | preloadCount: number = 2 8 | ) => { 9 | const preloadedVideos = useRef>(new Map()); 10 | 11 | useEffect(() => { 12 | // Clear old preloaded videos 13 | const currentPreloaded = new Set(); 14 | 15 | // Preload previous episodes 16 | for (let i = 1; i <= preloadCount; i++) { 17 | const prevId = currentEpisodeId - i; 18 | if (prevId >= 0) { 19 | preloadVideo(prevId); 20 | currentPreloaded.add(prevId); 21 | } 22 | } 23 | 24 | // Preload next episodes 25 | for (let i = 1; i <= preloadCount; i++) { 26 | const nextId = currentEpisodeId + i; 27 | if (nextId < totalEpisodes) { 28 | preloadVideo(nextId); 29 | currentPreloaded.add(nextId); 30 | } 31 | } 32 | 33 | // Remove videos that are no longer needed 34 | preloadedVideos.current.forEach((video, episodeId) => { 35 | if (!currentPreloaded.has(episodeId) && episodeId !== currentEpisodeId) { 36 | video.src = ''; 37 | video.load(); 38 | preloadedVideos.current.delete(episodeId); 39 | } 40 | }); 41 | }, [currentEpisodeId, totalEpisodes, getVideoUrl, preloadCount]); 42 | 43 | const preloadVideo = (episodeId: number) => { 44 | if (preloadedVideos.current.has(episodeId)) { 45 | return; 46 | } 47 | 48 | const videoUrl = getVideoUrl(episodeId); 49 | if (!videoUrl) return; 50 | 51 | const video = document.createElement('video'); 52 | video.src = videoUrl; 53 | video.preload = 'auto'; 54 | video.muted = true; 55 | 56 | // Start loading the video 57 | video.load(); 58 | 59 | preloadedVideos.current.set(episodeId, video); 60 | }; 61 | 62 | // Cleanup on unmount 63 | useEffect(() => { 64 | return () => { 65 | preloadedVideos.current.forEach((video) => { 66 | video.src = ''; 67 | video.load(); 68 | }); 69 | preloadedVideos.current.clear(); 70 | }; 71 | }, []); 72 | 73 | return { 74 | isPreloaded: (episodeId: number) => preloadedVideos.current.has(episodeId), 75 | }; 76 | }; 77 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/hooks/useSelectedEpisodes.ts: -------------------------------------------------------------------------------- 1 | import { useState, useEffect } from 'react'; 2 | 3 | interface SelectedEpisodesState { 4 | [datasetId: string]: number[]; 5 | } 6 | 7 | export const useSelectedEpisodes = (datasetId?: string) => { 8 | const [selectedEpisodes, setSelectedEpisodes] = 9 | useState({}); 10 | 11 | // Load from localStorage on mount 12 | useEffect(() => { 13 | const stored = localStorage.getItem('selectedEpisodes'); 14 | if (stored) { 15 | try { 16 | setSelectedEpisodes(JSON.parse(stored)); 17 | } catch (e) { 18 | console.error('Failed to parse stored episodes:', e); 19 | } 20 | } 21 | }, []); 22 | 23 | // Save to localStorage whenever selectedEpisodes changes 24 | useEffect(() => { 25 | localStorage.setItem('selectedEpisodes', JSON.stringify(selectedEpisodes)); 26 | }, [selectedEpisodes]); 27 | 28 | const toggleEpisode = (episodeId: number) => { 29 | if (!datasetId) return; 30 | 31 | setSelectedEpisodes((prev) => { 32 | const current = prev[datasetId] || []; 33 | const isSelected = current.includes(episodeId); 34 | 35 | if (isSelected) { 36 | return { 37 | ...prev, 38 | [datasetId]: current.filter((id) => id !== episodeId), 39 | }; 40 | } else { 41 | return { 42 | ...prev, 43 | [datasetId]: [...current, episodeId].sort((a, b) => a - b), 44 | }; 45 | } 46 | }); 47 | }; 48 | 49 | const clearSelection = () => { 50 | if (!datasetId) return; 51 | 52 | setSelectedEpisodes((prev) => ({ 53 | ...prev, 54 | [datasetId]: [], 55 | })); 56 | }; 57 | 58 | const selectAll = (episodeIds: number[]) => { 59 | if (!datasetId) return; 60 | 61 | setSelectedEpisodes((prev) => ({ 62 | ...prev, 63 | [datasetId]: [...episodeIds].sort((a, b) => a - b), 64 | })); 65 | }; 66 | 67 | const isSelected = (episodeId: number): boolean => { 68 | if (!datasetId) return false; 69 | return (selectedEpisodes[datasetId] || []).includes(episodeId); 70 | }; 71 | 72 | const getSelectedForDataset = (): number[] => { 73 | if (!datasetId) return []; 74 | return selectedEpisodes[datasetId] || []; 75 | }; 76 | 77 | return { 78 | selectedEpisodes: getSelectedForDataset(), 79 | toggleEpisode, 80 | clearSelection, 81 | selectAll, 82 | isSelected, 83 | selectedCount: getSelectedForDataset().length, 84 | }; 85 | }; 86 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/DatasetCompletionModal.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Modal, Button, Space, Tag, Progress, Typography } from 'antd'; 3 | import { useNavigate } from 'react-router-dom'; 4 | 5 | const { Text, Paragraph } = Typography; 6 | 7 | interface DatasetCompletionModalProps { 8 | visible: boolean; 9 | onClose: () => void; 10 | status?: { 11 | status: 'pending' | 'running' | 'completed' | 'failed'; 12 | progress?: number; 13 | message?: string; 14 | repo_id?: string; 15 | }; 16 | title?: string; 17 | actionLabel?: string; 18 | } 19 | 20 | const DatasetCompletionModal: React.FC = ({ 21 | visible, 22 | onClose, 23 | status, 24 | title = 'Dataset Operation Status', 25 | actionLabel = 'View Dataset', 26 | }) => { 27 | const navigate = useNavigate(); 28 | 29 | const handleViewDataset = () => { 30 | if (status?.repo_id) { 31 | const [namespace, name] = status.repo_id.split('/'); 32 | navigate(`/${namespace}/${name}/episode/0`); 33 | } 34 | onClose(); 35 | }; 36 | 37 | return ( 38 | 50 | Close 51 | , 52 | status?.status === 'completed' && ( 53 | 56 | ), 57 | ]} 58 | > 59 | {status && ( 60 | 61 | 62 | Status:{' '} 63 | 72 | {status.status} 73 | 74 | 75 | 76 | {status.progress !== undefined && ( 77 | 78 | )} 79 | 80 | {status.message && {status.message}} 81 | 82 | {status.repo_id && status.status === 'completed' && ( 83 | 84 | Dataset: {status.repo_id} 85 | 86 | )} 87 | 88 | )} 89 | 90 | ); 91 | }; 92 | 93 | export default DatasetCompletionModal; 94 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .env_training 107 | .venv 108 | env/ 109 | venv/ 110 | ENV/ 111 | env.bak/ 112 | venv.bak/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | 132 | # Output directories 133 | outputs/ 134 | 135 | # W&B 136 | wandb/ 137 | wandb_downloads/ 138 | 139 | # Dataset files 140 | *.mp4 141 | !tests/template_datasets/v2_1/screwdriver_panel_ls_080225_4_e5/videos/chunk-000/observation.images.side/episode_000000.mp4 142 | *.png 143 | *.jpg 144 | *.jpeg 145 | 146 | # But allow images in media folders 147 | !media/*.png 148 | !media/*.jpg 149 | !media/*.jpeg 150 | 151 | # IDE files 152 | .vscode/ 153 | .idea/ 154 | *.swp 155 | *.swo 156 | 157 | # OS files 158 | .DS_Store 159 | Thumbs.db 160 | 161 | node_modules/ 162 | *.egg-info 163 | 164 | node_modules/ 165 | .claude/ 166 | CLAUDE.md 167 | .ruff_cache/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LeRobot Data Studio - Unofficial LeRobot Dataset Editor 2 | 3 | A web-based GUI for editing LeRobot datasets build on the LeRobot [dataset tools api](https://huggingface.co/docs/lerobot/using_dataset_tools) 4 | 5 | *Note: This is an unofficial tool and is not affiliated with Huggingface, LeRobot or the LeRobot team.* 6 | 7 | ## Main Features 8 | - Easily remove episodes and create new clean datasets 9 | - Speed control and keyboard shortcuts to streamline dataset cleaning 10 | 11 | ![Dataset Editor](media/dataset_editor.png) 12 | 13 | ## Quick Start 14 | 15 | ### Step 1: Prerequisites 16 | - [UV Python package and project manager](https://astral.sh/uv/): `curl -LsSf https://astral.sh/uv/install.sh | sh` 17 | - Python 3.10+ (You can use uv to install and manage python versions e.g. `uv python install 3.12`) 18 | - Node.js 24+ (Install using nvm - see instructions below) 19 | - A Huggingface account (free) 20 | - [Huggingface CLI](https://huggingface.co/docs/huggingface_hub/en/guides/cli) 21 | 22 | #### Installing Node.js with nvm 23 | 24 | ```bash 25 | # Install nvm (Node Version Manager) 26 | curl -o- https://raw.githubusercontent.com/nvm-sh/nvm/v0.40.3/install.sh | bash 27 | 28 | # Reload your shell configuration 29 | source ~/.bashrc # or ~/.zshrc if using zsh 30 | 31 | # Install and use Node.js 24 32 | nvm install 24 33 | nvm use 24 34 | 35 | # Verify installation 36 | node --version 37 | ``` 38 | 39 | ### Step 2: Installation 40 | 41 | ```bash 42 | git clone https://github.com/jackvial/lerobot-data-studio 43 | cd lerobot-data-studio 44 | 45 | # Create a virtual environment with UV 46 | uv venv 47 | 48 | # Activate the virtual environment 49 | source .venv/bin/activate 50 | 51 | # Install all packages using UV 52 | uv sync 53 | ``` 54 | 55 | ### Step 3: Install Frontend Dependencies 56 | 57 | After completing the python installation, install frontend dependencies: 58 | 59 | ```bash 60 | cd src/lerobot-data-studio/frontend 61 | npm install 62 | ``` 63 | 64 | ### Step 4: Running the App 65 | 66 | Use the provided script to start both frontend and backend servers: 67 | 68 | ```bash 69 | ./run_dev.sh 70 | ``` 71 | 72 | ## Dataset Creation 73 | Dataset creation for filtered (AKA edited) datasets is always none destructive and will always create a new dataset and upload it to the Huggingface Hub. 74 | 75 | ### Filtered Dataset Creation 76 | Editing/filtering a dataset creates a new dataset that only excludes the episodes that were selected in the UI. 77 | 78 | ### Merging Datasets 79 | If you need to merge multiple datasets we recommend using the [LeRobot datasets tool CLI](https://huggingface.co/docs/lerobot/using_dataset_tools#lerobot.datasets.merge_datasets) 80 | 81 | ## Development 82 | 83 | ### Run Backend Tests 84 | 85 | ```bash 86 | uv run pytest 87 | ``` 88 | 89 | ### Run Frontend Tests 90 | 91 | ```bash 92 | cd src/lerobot_data_studio/frontend 93 | npm run test 94 | ``` 95 | 96 | ### Contributing 97 | 98 | Contributions are welcome! 99 | 100 | ### License 101 | 102 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/EpisodeNavigation.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { Button, Space, InputNumber, Tag, Tooltip } from 'antd'; 3 | import { 4 | LeftOutlined, 5 | RightOutlined, 6 | FastBackwardOutlined, 7 | FastForwardOutlined, 8 | } from '@ant-design/icons'; 9 | 10 | interface EpisodeNavigationProps { 11 | currentEpisodeId: number; 12 | totalEpisodes: number; 13 | onEpisodeChange: (episodeId: number) => void; 14 | isPreloaded: (episodeId: number) => boolean; 15 | } 16 | 17 | const EpisodeNavigation: React.FC = ({ 18 | currentEpisodeId, 19 | totalEpisodes, 20 | onEpisodeChange, 21 | isPreloaded, 22 | }) => { 23 | const canGoPrevious = currentEpisodeId > 0; 24 | const canGoNext = currentEpisodeId < totalEpisodes - 1; 25 | 26 | const handleJumpTo = (value: number | null) => { 27 | if (value !== null && value >= 0 && value < totalEpisodes) { 28 | onEpisodeChange(value); 29 | } 30 | }; 31 | 32 | return ( 33 |
34 | 35 | {/* Navigation Controls */} 36 | 37 | 44 | 45 | 57 | 58 | 59 | 60 | Episode 61 | 68 | of {totalEpisodes - 1} 69 | 70 | 71 | 72 | 84 | 85 | 92 | 93 | 94 |
95 | ); 96 | }; 97 | 98 | export default EpisodeNavigation; 99 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "lerobot-data-studio" 3 | version = "0.2.0" 4 | description = "LeRobot Data Studio - Unofficial LeRobot Dataset Editor" 5 | readme = "README.md" 6 | requires-python = ">=3.10" 7 | license = {text = "MIT"} 8 | authors = [ 9 | {name = "Jack Vial", email = "vialjack@gmail.com"} 10 | ] 11 | dependencies = [ 12 | "fastapi>=0.104.1", 13 | "uvicorn[standard]>=0.24.0", 14 | "pydantic>=2.5.0", 15 | "python-multipart>=0.0.6", 16 | "aiofiles>=23.2.1", 17 | "pandas>=2.1.3", 18 | "requests>=2.31.0", 19 | "huggingface-hub>=0.27.1", 20 | "jsonlines>=4.0.0", 21 | "lerobot @ git+https://github.com/huggingface/lerobot.git@main", 22 | "numpy>=1.26.2", 23 | "h5py>=3.10.0", 24 | "zarr>=2.17.0", 25 | "opencv-python>=4.9.0", 26 | "imageio[ffmpeg]>=2.34.0", 27 | "ruff>=0.8.0", 28 | ] 29 | 30 | [project.urls] 31 | Homepage = "https://github.com/jackvial/lerobot-data-studio" 32 | Repository = "https://github.com/jackvial/lerobot-data-studio" 33 | 34 | # External dependencies inherited from workspace root 35 | # lerobot source defined in root pyproject.toml 36 | 37 | [build-system] 38 | requires = ["hatchling"] 39 | build-backend = "hatchling.build" 40 | 41 | [tool.hatch.metadata] 42 | allow-direct-references = true 43 | 44 | [tool.hatch.build.targets.wheel] 45 | packages = ["src/lerobot_data_studio"] 46 | 47 | [dependency-groups] 48 | dev = [ 49 | "pytest>=8.4.1", 50 | "mypy>=1.8.0", 51 | "types-requests>=2.31.0", 52 | "types-psutil>=5.9.5", 53 | "pandas-stubs>=2.1.4", 54 | ] 55 | 56 | [tool.ruff] 57 | target-version = "py310" 58 | line-length = 110 59 | exclude = ["tests/artifacts/**/*.safetensors", "*_pb2.py", "*_pb2_grpc.py"] 60 | 61 | [tool.ruff.lint] 62 | # E, W: pycodestyle errors and warnings 63 | # F: PyFlakes 64 | # I: isort 65 | # B: flake8-bugbear (good practices, potential bugs) 66 | # C4: flake8-comprehensions (more concise comprehensions) 67 | # T20: flake8-print (discourage print statements in production code) 68 | # N: pep8-naming 69 | # SLF: flake8-self (private member access detection) 70 | # TODO: Add more rules when ready: "SIM", "A", "S", "D", "RUF", "UP" 71 | select = [ 72 | "E", "W", "F", "I", "B", "C4", "T20", "N", "SLF" 73 | ] 74 | ignore = [ 75 | "E501", # Line too long 76 | "T201", # Print statement found 77 | "T203", # Pprint statement found 78 | "B008", # Perform function call in argument defaults 79 | ] 80 | 81 | [tool.ruff.lint.per-file-ignores] 82 | "__init__.py" = ["F401", "F403"] 83 | "tests/*" = ["SLF001"] # Allow private member access in tests 84 | "**/test_*.py" = ["SLF001", "T201"] # Allow private member access and print in test files 85 | 86 | [tool.ruff.lint.isort] 87 | combine-as-imports = true 88 | known-first-party = ["lerobot_data_studio"] 89 | 90 | [tool.ruff.lint.pydocstyle] 91 | convention = "google" 92 | 93 | [tool.ruff.format] 94 | quote-style = "double" 95 | indent-style = "space" 96 | skip-magic-trailing-comma = false 97 | line-ending = "auto" 98 | docstring-code-format = true 99 | 100 | # Enforce private member access rules 101 | [tool.ruff.lint.flake8-self] 102 | ignore-names = ["_meta", "_abc_impl"] # Allow specific private attributes if needed 103 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/index.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | :root { 6 | font-family: Inter, system-ui, Avenir, Helvetica, Arial, sans-serif; 7 | line-height: 1.5; 8 | font-weight: 400; 9 | 10 | color-scheme: light dark; 11 | color: rgba(255, 255, 255, 0.87); 12 | background-color: #242424; 13 | 14 | font-synthesis: none; 15 | text-rendering: optimizeLegibility; 16 | -webkit-font-smoothing: antialiased; 17 | -moz-osx-font-smoothing: grayscale; 18 | -webkit-text-size-adjust: 100%; 19 | } 20 | 21 | body { 22 | margin: 0; 23 | display: flex; 24 | place-items: center; 25 | min-width: 320px; 26 | min-height: 100vh; 27 | } 28 | 29 | h1 { 30 | font-size: 3.2em; 31 | line-height: 1.1; 32 | } 33 | 34 | #root { 35 | width: 100%; 36 | height: 100vh; 37 | margin: 0 auto; 38 | text-align: center; 39 | } 40 | 41 | .selected-episodes-badge { 42 | position: fixed; 43 | bottom: 20px; 44 | right: 20px; 45 | z-index: 1000; 46 | } 47 | 48 | /* Dark theme for Ant Design components */ 49 | .ant-layout { 50 | background: #141414 !important; 51 | } 52 | 53 | .ant-layout-header { 54 | background: #1f1f1f !important; 55 | border-bottom: 1px solid #303030 !important; 56 | } 57 | 58 | .ant-layout-sider { 59 | background: #1f1f1f !important; 60 | border-right: 1px solid #303030 !important; 61 | } 62 | 63 | .ant-card { 64 | background: #1f1f1f !important; 65 | border: 1px solid #303030 !important; 66 | } 67 | 68 | .ant-card-head { 69 | border-bottom: 1px solid #303030 !important; 70 | } 71 | 72 | .ant-btn-primary { 73 | background: #1890ff !important; 74 | border-color: #1890ff !important; 75 | } 76 | 77 | .ant-btn-default { 78 | background: #303030 !important; 79 | border-color: #434343 !important; 80 | color: rgba(255, 255, 255, 0.85) !important; 81 | } 82 | 83 | .ant-checkbox-wrapper { 84 | color: rgba(255, 255, 255, 0.85) !important; 85 | } 86 | 87 | .ant-list { 88 | color: rgba(255, 255, 255, 0.85) !important; 89 | } 90 | 91 | .ant-list-item { 92 | border-bottom: 1px solid #303030 !important; 93 | } 94 | 95 | .ant-typography { 96 | color: rgba(255, 255, 255, 0.85) !important; 97 | } 98 | 99 | .ant-slider { 100 | margin: 10px 0 !important; 101 | } 102 | 103 | .ant-slider-track { 104 | background-color: #1890ff !important; 105 | } 106 | 107 | .ant-alert { 108 | background: #303030 !important; 109 | border: 1px solid #434343 !important; 110 | } 111 | 112 | /* Dygraph dark theme */ 113 | .dygraph-legend { 114 | background: rgba(31, 31, 31, 0.95) !important; 115 | color: rgba(255, 255, 255, 0.85) !important; 116 | } 117 | 118 | .dygraph-axis-label { 119 | color: rgba(255, 255, 255, 0.65) !important; 120 | } 121 | 122 | .dygraph-xlabel, .dygraph-ylabel { 123 | color: rgba(255, 255, 255, 0.85) !important; 124 | } 125 | 126 | .video-container { 127 | position: relative; 128 | width: 100%; 129 | background: #000; 130 | } 131 | 132 | .episode-navigation { 133 | display: flex; 134 | justify-content: space-between; 135 | align-items: center; 136 | padding: 16px; 137 | } 138 | 139 | .loading-overlay { 140 | position: fixed; 141 | top: 0; 142 | left: 0; 143 | right: 0; 144 | bottom: 0; 145 | background: rgba(0, 0, 0, 0.45); 146 | display: flex; 147 | align-items: center; 148 | justify-content: center; 149 | z-index: 9999; 150 | } -------------------------------------------------------------------------------- /src/lerobot_data_studio/backend/state_store.py: -------------------------------------------------------------------------------- 1 | """Dataset caching and state management for the LeRobot Data Studio backend.""" 2 | 3 | import os 4 | from dataclasses import dataclass, field 5 | from pathlib import Path 6 | from typing import Dict, Optional 7 | 8 | from huggingface_hub.constants import HF_HOME 9 | from lerobot.datasets.lerobot_dataset import LeRobotDataset 10 | 11 | from .models import CreateTaskStatus, DatasetLoadingStatus 12 | 13 | default_cache_path = Path(HF_HOME) / "lerobot" 14 | HF_LEROBOT_HOME = Path(os.getenv("HF_LEROBOT_HOME", default_cache_path)).expanduser() 15 | 16 | 17 | @dataclass 18 | class StateStore: 19 | """Simple global state management""" 20 | 21 | dataset_cache: Dict[str, LeRobotDataset] = field(default_factory=dict) 22 | dataset_loading_status: Dict[str, DatasetLoadingStatus] = field(default_factory=dict) 23 | loading_tasks: Dict[str, str] = field(default_factory=dict) 24 | creation_tasks: Dict[str, CreateTaskStatus] = field(default_factory=dict) 25 | 26 | def _update_or_create(self, store: dict, key: str, value: object, defaults: object = None): 27 | """Generic method to update or create entries with spreading pattern for Pydantic models""" 28 | if hasattr(value, "model_dump"): 29 | # It's a Pydantic model - get only the explicitly set fields 30 | existing = store.get(key) 31 | if existing: 32 | base = existing.model_dump() 33 | elif defaults: 34 | base = defaults.model_dump() 35 | else: 36 | base = {} 37 | updates = value.model_dump(exclude_unset=True) 38 | model_class = type(value) if existing is None else type(existing) 39 | store[key] = model_class(**{**base, **updates}) 40 | else: 41 | # Full replacement with non-Pydantic object 42 | store[key] = value 43 | 44 | def is_dataset_cached(self, repo_id: str) -> bool: 45 | return repo_id in self.dataset_cache 46 | 47 | def is_dataset_loading(self, repo_id: str) -> bool: 48 | return repo_id in self.loading_tasks 49 | 50 | def get_dataset(self, repo_id: str) -> Optional[LeRobotDataset]: 51 | return self.dataset_cache.get(repo_id) 52 | 53 | def set_loading_status(self, repo_id: str, status: DatasetLoadingStatus): 54 | self._update_or_create( 55 | self.dataset_loading_status, 56 | repo_id, 57 | status, 58 | DatasetLoadingStatus(status="loading", progress=0.0), 59 | ) 60 | 61 | def get_loading_status(self, repo_id: str) -> Optional[DatasetLoadingStatus]: 62 | return self.dataset_loading_status.get(repo_id) 63 | 64 | def start_loading(self, repo_id: str): 65 | self.loading_tasks[repo_id] = "loading" 66 | 67 | def finish_loading(self, repo_id: str): 68 | if repo_id in self.loading_tasks: 69 | del self.loading_tasks[repo_id] 70 | 71 | def cache_dataset(self, repo_id: str, dataset: LeRobotDataset): 72 | self.dataset_cache[repo_id] = dataset 73 | 74 | def get_creation_task(self, task_id: str) -> Optional[CreateTaskStatus]: 75 | return self.creation_tasks.get(task_id) 76 | 77 | def set_creation_task(self, task_id: str, status: CreateTaskStatus): 78 | self._update_or_create( 79 | self.creation_tasks, 80 | task_id, 81 | status, 82 | CreateTaskStatus(task_id=task_id, status="pending", progress=0.0), 83 | ) 84 | 85 | def clear_loading_tasks(self): 86 | self.loading_tasks.clear() 87 | 88 | 89 | # Create a singleton instance for the application 90 | _state_store = StateStore() 91 | 92 | 93 | def get_state_store() -> StateStore: 94 | """Dependency injection function to get the task manager.""" 95 | return _state_store 96 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/DataChart.tsx: -------------------------------------------------------------------------------- 1 | import React, { useEffect, useRef, useMemo } from 'react'; 2 | import { Card, Empty } from 'antd'; 3 | import Dygraph from 'dygraphs'; 4 | import 'dygraphs/dist/dygraph.css'; 5 | 6 | interface DataChartProps { 7 | episodeData: Record[]; 8 | featureNames: string[]; 9 | currentTime?: number; 10 | } 11 | 12 | const DataChart: React.FC = ({ 13 | episodeData, 14 | featureNames, 15 | currentTime, 16 | }) => { 17 | const chartRef = useRef(null); 18 | const dygraphRef = useRef(null); 19 | 20 | const chartData = useMemo(() => { 21 | if (!episodeData || episodeData.length === 0) return null; 22 | 23 | try { 24 | const data = episodeData.map((row: any) => { 25 | const timestamp = row['timestamp'] || 0; 26 | const observation = row['observation'] || []; 27 | 28 | // In Dygraph the first value is always the X axis 29 | // all other values will be plotted on the Y axis 30 | return [timestamp, ...observation]; 31 | }); 32 | 33 | return data; 34 | } catch (error) { 35 | console.error('Error converting JSON to array format:', error); 36 | return null; 37 | } 38 | }, [episodeData]); 39 | 40 | useEffect(() => { 41 | if (!chartRef.current || !chartData || chartData.length === 0) return; 42 | 43 | // Clean up previous chart 44 | if (dygraphRef.current) { 45 | dygraphRef.current.destroy(); 46 | } 47 | 48 | try { 49 | dygraphRef.current = new Dygraph(chartRef.current, chartData, { 50 | labels: ['Time', ...featureNames], 51 | showRoller: true, 52 | rollPeriod: 1, 53 | animatedZooms: false, 54 | legend: 'always', 55 | labelsSeparateLines: true, 56 | highlightCircleSize: 5, 57 | strokeWidth: 1.5, 58 | gridLineColor: '#ddd', 59 | axisLineColor: '#999', 60 | axisLabelFontSize: 12, 61 | xLabelHeight: 18, 62 | yLabelWidth: 50, 63 | drawPoints: false, 64 | pointSize: 3, 65 | hideOverlayOnMouseOut: false, 66 | showRangeSelector: true, 67 | rangeSelectorHeight: 40, 68 | rangeSelectorPlotStrokeColor: '#666', 69 | rangeSelectorPlotFillColor: '#666', 70 | interactionModel: Dygraph.defaultInteractionModel, 71 | xValueParser: (x: string) => parseFloat(x), 72 | axes: { 73 | x: { 74 | axisLabelFormatter: (x: number | Date) => { 75 | // Handle both number and Date types 76 | if (typeof x === 'number') { 77 | return `${x.toFixed(2)}s`; 78 | } 79 | // This shouldn't happen with our xValueParser, but handle it gracefully 80 | return x.toString(); 81 | }, 82 | valueFormatter: (x: number) => { 83 | return `${x.toFixed(3)} seconds`; 84 | }, 85 | }, 86 | }, 87 | xlabel: 'Time (seconds)', 88 | }); 89 | } catch (error) { 90 | console.error('Error creating Dygraph:', error); 91 | } 92 | 93 | // Cleanup function 94 | return () => { 95 | if (dygraphRef.current) { 96 | dygraphRef.current.destroy(); 97 | dygraphRef.current = null; 98 | } 99 | }; 100 | }, [chartData, featureNames]); 101 | 102 | // Update vertical line when currentTime changes 103 | useEffect(() => { 104 | if (dygraphRef.current && currentTime !== undefined) { 105 | // Draw a vertical line at the current time 106 | dygraphRef.current.updateOptions({ 107 | underlayCallback: (canvas, area, g) => { 108 | const x = g.toDomXCoord(currentTime); 109 | 110 | // Only draw if the time is within the visible range 111 | if (x >= area.x && x <= area.x + area.w) { 112 | canvas.strokeStyle = '#ff6b6b'; 113 | canvas.lineWidth = 2; 114 | canvas.beginPath(); 115 | canvas.moveTo(x, area.y); 116 | canvas.lineTo(x, area.y + area.h); 117 | canvas.stroke(); 118 | } 119 | }, 120 | }); 121 | } 122 | }, [currentTime]); 123 | 124 | return ( 125 | 126 | {chartData && chartData.length > 0 ? ( 127 |
128 | ) : ( 129 | 130 | )} 131 | 132 | ); 133 | }; 134 | 135 | export default DataChart; 136 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/services/api.ts: -------------------------------------------------------------------------------- 1 | import axios from 'axios'; 2 | import { 3 | DatasetListResponse, 4 | EpisodeData, 5 | CreateDatasetRequest, 6 | CreateDatasetResponse, 7 | DatasetLoadingStatus, 8 | CreateTaskStatus, 9 | } from '@/types'; 10 | 11 | const api = axios.create({ 12 | baseURL: '/api', 13 | timeout: 30000, 14 | headers: { 15 | 'Content-Type': 'application/json', 16 | }, 17 | }); 18 | 19 | // Request interceptor for error handling 20 | api.interceptors.request.use( 21 | (config) => { 22 | return config; 23 | }, 24 | (error) => { 25 | return Promise.reject(error); 26 | } 27 | ); 28 | 29 | export const datasetApi = { 30 | // Get list of available datasets 31 | listDatasets: async (): Promise => { 32 | const response = await api.get('/datasets'); 33 | return response.data; 34 | }, 35 | 36 | // Get dataset loading status 37 | getDatasetStatus: async ( 38 | namespace: string, 39 | name: string, 40 | autoLoad: boolean = false 41 | ): Promise => { 42 | const response = await api.get( 43 | `/datasets/${namespace}/${name}/status`, 44 | { 45 | params: { auto_load: autoLoad }, 46 | } 47 | ); 48 | return response.data; 49 | }, 50 | 51 | // Get episode data 52 | getEpisode: async ( 53 | namespace: string, 54 | name: string, 55 | episodeId: number 56 | ): Promise => { 57 | const response = await api.get( 58 | `/datasets/${namespace}/${name}/episodes/${episodeId}` 59 | ); 60 | return response.data; 61 | }, 62 | 63 | // List all episode IDs for a dataset 64 | listEpisodes: async ( 65 | namespace: string, 66 | name: string 67 | ): Promise<{ episodes: number[] }> => { 68 | const response = await api.get<{ episodes: number[] }>( 69 | `/datasets/${namespace}/${name}/episodes` 70 | ); 71 | return response.data; 72 | }, 73 | 74 | // Create new dataset from selected episodes 75 | createDataset: async ( 76 | request: CreateDatasetRequest 77 | ): Promise => { 78 | const response = await api.post( 79 | '/datasets/create', 80 | request 81 | ); 82 | return response.data; 83 | }, 84 | 85 | // Search datasets by prefix 86 | searchDatasets: async (prefix: string): Promise<{ repo_ids: string[] }> => { 87 | const response = await api.get<{ repo_ids: string[] }>('/datasets/search', { 88 | params: { prefix }, 89 | }); 90 | return response.data; 91 | }, 92 | 93 | // List datasets for a user 94 | listUserDatasets: async ( 95 | username: string 96 | ): Promise<{ repo_ids: string[] }> => { 97 | const response = await api.get<{ repo_ids: string[] }>( 98 | `/datasets/user/${username}` 99 | ); 100 | return response.data; 101 | }, 102 | 103 | // Validate if a dataset exists 104 | validateDataset: async ( 105 | namespace: string, 106 | name: string 107 | ): Promise<{ exists: boolean; message?: string }> => { 108 | const response = await api.get<{ exists: boolean; message?: string }>( 109 | `/datasets/validate/${namespace}/${name}` 110 | ); 111 | return response.data; 112 | }, 113 | 114 | // Get dataset creation task status 115 | getCreateStatus: async (taskId: string): Promise => { 116 | const response = await api.get( 117 | `/datasets/create/status/${taskId}` 118 | ); 119 | return response.data; 120 | }, 121 | 122 | // Get current user info 123 | getCurrentUser: async (): Promise<{ 124 | username: string | null; 125 | fullname?: string; 126 | avatar_url?: string; 127 | error?: string; 128 | }> => { 129 | const response = await api.get('/user/whoami'); 130 | return response.data; 131 | }, 132 | 133 | // Poll dataset status until ready 134 | waitForDataset: async ( 135 | namespace: string, 136 | name: string, 137 | onProgress?: (status: DatasetLoadingStatus) => void 138 | ): Promise => { 139 | const pollInterval = 1000; // 1 second 140 | const maxRetries = 300; // 5 minutes max 141 | let retries = 0; 142 | 143 | while (retries < maxRetries) { 144 | const status = await datasetApi.getDatasetStatus(namespace, name, false); 145 | 146 | if (onProgress) { 147 | onProgress(status); 148 | } 149 | 150 | if (status.status === 'ready') { 151 | return; 152 | } 153 | 154 | if (status.status === 'error') { 155 | throw new Error(status.message || 'Dataset loading failed'); 156 | } 157 | 158 | await new Promise((resolve) => setTimeout(resolve, pollInterval)); 159 | retries++; 160 | } 161 | 162 | throw new Error('Dataset loading timeout'); 163 | }, 164 | }; 165 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/EpisodeSidebar.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | import { 3 | List, 4 | Checkbox, 5 | Button, 6 | Space, 7 | Typography, 8 | Input, 9 | } from 'antd'; 10 | import { CheckCircleOutlined, CloseCircleOutlined } from '@ant-design/icons'; 11 | 12 | const { Title, Text } = Typography; 13 | const { Search } = Input; 14 | 15 | interface EpisodeSidebarProps { 16 | episodes: number[]; 17 | selectedEpisodes: number[]; 18 | currentEpisodeId: number; 19 | onToggleEpisode: (episodeId: number) => void; 20 | onSelectAll: () => void; 21 | onClearSelection: () => void; 22 | onEpisodeClick: (episodeId: number) => void; 23 | } 24 | 25 | const EpisodeSidebar: React.FC = ({ 26 | episodes, 27 | selectedEpisodes, 28 | currentEpisodeId, 29 | onToggleEpisode, 30 | onSelectAll, 31 | onClearSelection, 32 | onEpisodeClick, 33 | }) => { 34 | const [searchTerm, setSearchTerm] = React.useState(''); 35 | 36 | const filteredEpisodes = episodes.filter((ep) => 37 | ep.toString().includes(searchTerm) 38 | ); 39 | 40 | return ( 41 |
50 | 51 | Episodes 52 | 53 | 54 | 58 | setSearchTerm(e.target.value)} 62 | style={{ width: '100%' }} 63 | size='small' 64 | /> 65 | 66 | 67 | 74 | 81 | 82 | 83 | {selectedEpisodes.length > 0 && ( 84 | 85 | {selectedEpisodes.length} episode 86 | {selectedEpisodes.length === 1 ? '' : 's'} selected 87 | 88 | )} 89 | 90 | 91 |
92 | { 95 | const isCurrentEpisode = episodeId === currentEpisodeId; 96 | 97 | return ( 98 | { 113 | if (!isCurrentEpisode) { 114 | e.currentTarget.style.background = 115 | 'rgba(255, 255, 255, 0.03)'; 116 | } 117 | }} 118 | onMouseLeave={(e) => { 119 | if (!isCurrentEpisode) { 120 | e.currentTarget.style.background = 'transparent'; 121 | } 122 | }} 123 | > 124 |
onEpisodeClick(episodeId)} 132 | > 133 | { 136 | e.stopPropagation(); 137 | onToggleEpisode(episodeId); 138 | }} 139 | onClick={(e) => e.stopPropagation()} 140 | style={{ marginRight: 0 }} 141 | /> 142 | 150 | Episode {episodeId} 151 | 152 |
153 |
154 | ); 155 | }} 156 | /> 157 |
158 |
159 | ); 160 | }; 161 | 162 | export default EpisodeSidebar; 163 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/HomePage.tsx: -------------------------------------------------------------------------------- 1 | import React, { useState } from 'react'; 2 | import { useNavigate } from 'react-router-dom'; 3 | import { Card, Input, Typography, Space, Spin, Button } from 'antd'; 4 | import { 5 | ArrowRightOutlined, 6 | RobotOutlined, 7 | CheckCircleOutlined, 8 | CloseCircleOutlined, 9 | } from '@ant-design/icons'; 10 | import { useQuery } from '@tanstack/react-query'; 11 | import { datasetApi } from '@/services/api'; 12 | 13 | const { Title, Text } = Typography; 14 | 15 | const HomePage: React.FC = () => { 16 | const navigate = useNavigate(); 17 | const [inputValue, setInputValue] = useState(''); 18 | const [validationStatus, setValidationStatus] = useState< 19 | 'idle' | 'validating' | 'success' | 'error' 20 | >('idle'); 21 | const [validationMessage, setValidationMessage] = useState(''); 22 | 23 | const { data: datasets, isLoading } = useQuery({ 24 | queryKey: ['datasets'], 25 | queryFn: datasetApi.listDatasets, 26 | }); 27 | 28 | const handleDatasetSelect = (repoId: string) => { 29 | const [namespace, name] = repoId.split('/'); 30 | navigate(`/${namespace}/${name}`); 31 | }; 32 | 33 | const validateDatasetFormat = (value: string): boolean => { 34 | // Check if it matches username/dataset-name format 35 | const pattern = /^[a-zA-Z0-9_-]+\/[a-zA-Z0-9_-]+$/; 36 | return pattern.test(value); 37 | }; 38 | 39 | const validateDataset = async (value: string) => { 40 | if (!value) { 41 | setValidationStatus('idle'); 42 | setValidationMessage(''); 43 | return; 44 | } 45 | 46 | if (!validateDatasetFormat(value)) { 47 | setValidationStatus('error'); 48 | setValidationMessage('Invalid format. Use: username/dataset-name'); 49 | return; 50 | } 51 | 52 | setValidationStatus('validating'); 53 | setValidationMessage('Checking dataset...'); 54 | 55 | try { 56 | // Check if dataset exists in the available datasets 57 | const allDatasets = [ 58 | ...(datasets?.lerobot_datasets || []), 59 | ...(datasets?.featured_datasets || []), 60 | ]; 61 | const exists = allDatasets.some((dataset) => dataset === value); 62 | 63 | if (exists) { 64 | setValidationStatus('success'); 65 | setValidationMessage('Dataset exists ✔'); 66 | } else { 67 | // Try to validate if the dataset exists on the hub 68 | try { 69 | const [namespace, name] = value.split('/'); 70 | const result = await datasetApi.validateDataset(namespace, name); 71 | if (result.exists) { 72 | setValidationStatus('success'); 73 | setValidationMessage('Dataset exists ✔'); 74 | } else { 75 | setValidationStatus('error'); 76 | setValidationMessage(result.message || 'Dataset not found on hub'); 77 | } 78 | } catch { 79 | setValidationStatus('error'); 80 | setValidationMessage('Dataset not found on hub'); 81 | } 82 | } 83 | } catch { 84 | setValidationStatus('error'); 85 | setValidationMessage('Error validating dataset'); 86 | } 87 | }; 88 | 89 | const handleInputChange = (e: React.ChangeEvent) => { 90 | const value = e.target.value; 91 | setInputValue(value); 92 | validateDataset(value); 93 | }; 94 | 95 | const handleSearch = () => { 96 | if (validationStatus === 'success' && inputValue) { 97 | handleDatasetSelect(inputValue); 98 | } 99 | }; 100 | 101 | const handleKeyPress = (e: React.KeyboardEvent) => { 102 | if (e.key === 'Enter' && validationStatus === 'success') { 103 | handleSearch(); 104 | } 105 | }; 106 | 107 | const getValidationIcon = () => { 108 | switch (validationStatus) { 109 | case 'validating': 110 | return ; 111 | case 'success': 112 | return ; 113 | case 'error': 114 | return ; 115 | default: 116 | return null; 117 | } 118 | }; 119 | 120 | return ( 121 |
122 | 123 |
124 | 125 | LeRobot Data Studio 126 | The Unofficial LeRobot Dataset Editor 127 | Edit LeRobot Datasets 128 |
129 | 130 |
131 | Edit Dataset 132 | 136 | Create a new dataset from selected episodes 137 | 138 | 139 | 140 | 141 | 150 |
173 | 174 | {isLoading && ( 175 |
176 | 177 |
178 | )} 179 |
180 |
181 | ); 182 | }; 183 | 184 | export default HomePage; 185 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/backend/background_tasks.py: -------------------------------------------------------------------------------- 1 | """ 2 | FastAPI async background tasks 3 | docs: https://fastapi.tiangolo.com/tutorial/background-tasks/ 4 | """ 5 | 6 | import logging 7 | from typing import Dict, List 8 | 9 | import numpy as np 10 | import psutil 11 | from lerobot.datasets.dataset_tools import delete_episodes 12 | from lerobot.datasets.lerobot_dataset import LeRobotDataset 13 | 14 | from .models import CreateTaskStatus, DatasetLoadingStatus 15 | from .state_store import StateStore 16 | 17 | logger = logging.getLogger(__name__) 18 | 19 | 20 | def get_process_memory_mb(): 21 | """Get current process memory usage in MB.""" 22 | process = psutil.Process() 23 | memory_info = process.memory_info() 24 | return round(memory_info.rss / (1024 * 1024), 2) 25 | 26 | 27 | def load_dataset_task(repo_id: str, state_store: StateStore = None): 28 | """ 29 | Background task to load dataset 30 | 31 | Args: 32 | repo_id: The repository ID of the dataset to load 33 | state_store: StateStore instance for state management 34 | """ 35 | 36 | try: 37 | memory_before = get_process_memory_mb() 38 | logger.info(f"Memory before loading {repo_id}: {memory_before} MB") 39 | 40 | state_store.set_loading_status( 41 | repo_id, 42 | DatasetLoadingStatus(progress=0.3, message=f"Downloading dataset {repo_id}..."), 43 | ) 44 | 45 | dataset = LeRobotDataset(repo_id) 46 | state_store.cache_dataset(repo_id, dataset) 47 | 48 | memory_after = get_process_memory_mb() 49 | memory_used = np.around(memory_after - memory_before, 2).item() 50 | logger.info(f"Memory after loading {repo_id}: {memory_after} MB (used: {memory_used} MB)") 51 | 52 | state_store.set_loading_status( 53 | repo_id, 54 | DatasetLoadingStatus( 55 | status="ready", 56 | progress=1.0, 57 | message="Dataset loaded successfully", 58 | memory_usage_mb=memory_used, 59 | ), 60 | ) 61 | 62 | except (FileNotFoundError, PermissionError) as e: 63 | state_store.set_loading_status( 64 | repo_id, DatasetLoadingStatus(status="error", message=f"File access error: {str(e)}") 65 | ) 66 | except (ValueError, KeyError) as e: 67 | state_store.set_loading_status( 68 | repo_id, DatasetLoadingStatus(status="error", message=f"Invalid dataset format: {str(e)}") 69 | ) 70 | except Exception as e: 71 | state_store.set_loading_status( 72 | repo_id, DatasetLoadingStatus(status="error", message=f"Failed to load dataset: {str(e)}") 73 | ) 74 | finally: 75 | state_store.finish_loading(repo_id) 76 | 77 | 78 | def create_dataset_task( 79 | task_id: str, 80 | original_repo_id: str, 81 | new_repo_id: str, 82 | selected_episodes: List[int], 83 | episode_index_task_map: Dict[int, str], 84 | state_store: StateStore = None, 85 | ): 86 | """Background task to create filtered dataset. 87 | 88 | Args: 89 | task_id: Unique task identifier 90 | original_repo_id: Source dataset repository ID 91 | new_repo_id: Target dataset repository ID 92 | selected_episodes: List of episode indices to include 93 | episode_index_task_map: Mapping of episode indices to tasks 94 | state_store: StateStore instance for state management 95 | """ 96 | 97 | try: 98 | state_store.set_creation_task( 99 | task_id, 100 | CreateTaskStatus( 101 | task_id=task_id, 102 | status="running", 103 | progress=0.1, 104 | message=f"Starting to create dataset with {len(selected_episodes)} episodes...", 105 | new_repo_id=new_repo_id, 106 | ), 107 | ) 108 | 109 | dataset = state_store.get_dataset(original_repo_id) 110 | if not dataset: 111 | raise ValueError(f"Dataset {original_repo_id} not found in cache") 112 | 113 | state_store.set_creation_task( 114 | task_id, 115 | CreateTaskStatus( 116 | task_id=task_id, 117 | status="running", 118 | progress=0.3, 119 | message="Filtering episodes...", 120 | new_repo_id=new_repo_id, 121 | ), 122 | ) 123 | 124 | # Create filtered dataset by deleting unselected episodes 125 | all_episodes = list(range(dataset.meta.total_episodes)) 126 | episodes_to_delete = [ep for ep in all_episodes if ep not in selected_episodes] 127 | 128 | if episodes_to_delete: 129 | filtered_dataset = delete_episodes( 130 | dataset, episode_indices=episodes_to_delete, repo_id=new_repo_id 131 | ) 132 | else: 133 | # If no episodes to delete, we're keeping all episodes 134 | # In this case, we need to copy the dataset with a new repo_id 135 | # For now, we'll just use the original dataset 136 | filtered_dataset = dataset 137 | filtered_dataset.repo_id = new_repo_id 138 | 139 | state_store.set_creation_task( 140 | task_id, 141 | CreateTaskStatus( 142 | task_id=task_id, 143 | status="running", 144 | progress=0.7, 145 | message="Pushing dataset to hub...", 146 | new_repo_id=new_repo_id, 147 | ), 148 | ) 149 | 150 | # TODO: Handle episode_index_task_map for custom task assignments 151 | # This might require using the add_feature API or updating metadata after creation 152 | if episode_index_task_map: 153 | logger.warning("Custom task assignment is not yet implemented with the new API") 154 | 155 | # Push to hub 156 | filtered_dataset.push_to_hub( 157 | license="apache-2.0", 158 | tags=["LeRobot", "robotics"], 159 | ) 160 | 161 | state_store.set_creation_task( 162 | task_id, 163 | CreateTaskStatus( 164 | task_id=task_id, 165 | status="completed", 166 | progress=1.0, 167 | message=f"Successfully created dataset '{new_repo_id}'", 168 | new_repo_id=new_repo_id, 169 | ), 170 | ) 171 | 172 | except Exception as e: 173 | logger.error(f"Error creating dataset: {str(e)}", exc_info=True) 174 | state_store.set_creation_task( 175 | task_id, 176 | CreateTaskStatus( 177 | task_id=task_id, 178 | status="failed", 179 | message=f"Error creating dataset: {str(e)}", 180 | new_repo_id=new_repo_id, 181 | ), 182 | ) 183 | -------------------------------------------------------------------------------- /src/lerobot_data_studio/frontend/src/components/VideoPlayer.tsx: -------------------------------------------------------------------------------- 1 | import React, { useRef, useEffect, useState } from 'react'; 2 | import { 3 | Card, 4 | Row, 5 | Col, 6 | Button, 7 | Space, 8 | Slider, 9 | Tooltip, 10 | Select, 11 | } from 'antd'; 12 | import { PlayCircleOutlined, PauseCircleOutlined } from '@ant-design/icons'; 13 | 14 | interface VideoInfo { 15 | url: string; 16 | filename: string; 17 | language_instruction?: string[]; 18 | } 19 | 20 | interface VideoPlayerProps { 21 | videos: VideoInfo[]; 22 | episodeId: number; 23 | onTimeUpdate?: (time: number) => void; 24 | } 25 | 26 | const VideoPlayer: React.FC = ({ 27 | videos, 28 | episodeId, 29 | onTimeUpdate, 30 | }) => { 31 | const videoRefs = useRef<(HTMLVideoElement | null)[]>([]); 32 | const [isPlaying, setIsPlaying] = useState(false); 33 | const [currentTime, setCurrentTime] = useState(0); 34 | const [duration, setDuration] = useState(0); 35 | const [isSeekingBySlider, setIsSeekingBySlider] = useState(false); 36 | const [playbackSpeed, setPlaybackSpeed] = useState(3.0); // Default to 3x speed 37 | 38 | // Speed options from 0.5x to 3x in 0.5x increments 39 | const speedOptions = [ 40 | { label: '0.5x', value: 0.5 }, 41 | { label: '1x', value: 1.0 }, 42 | { label: '1.5x', value: 1.5 }, 43 | { label: '2x', value: 2.0 }, 44 | { label: '2.5x', value: 2.5 }, 45 | { label: '3x', value: 3.0 }, 46 | ]; 47 | 48 | useEffect(() => { 49 | // Reset refs when videos change 50 | videoRefs.current = videoRefs.current.slice(0, videos.length); 51 | }, [videos]); 52 | 53 | // Set duration when first video loads and apply initial speed 54 | useEffect(() => { 55 | const checkDuration = () => { 56 | const firstVideo = videoRefs.current[0]; 57 | if (firstVideo && firstVideo.duration) { 58 | setDuration(firstVideo.duration); 59 | // Apply initial playback speed 60 | videoRefs.current.forEach((video) => { 61 | if (video) { 62 | video.playbackRate = playbackSpeed; 63 | } 64 | }); 65 | } 66 | }; 67 | 68 | const interval = setInterval(checkDuration, 100); 69 | return () => clearInterval(interval); 70 | }, [videos, playbackSpeed]); 71 | 72 | // Update playback speed when changed 73 | useEffect(() => { 74 | videoRefs.current.forEach((video) => { 75 | if (video) { 76 | video.playbackRate = playbackSpeed; 77 | } 78 | }); 79 | }, [playbackSpeed]); 80 | 81 | // Add keyboard event handler for spacebar 82 | useEffect(() => { 83 | const handleKeyPress = (e: KeyboardEvent) => { 84 | // Check if the target is an input element to avoid conflicts 85 | const target = e.target as HTMLElement; 86 | if (target.tagName === 'INPUT' || target.tagName === 'TEXTAREA') { 87 | return; 88 | } 89 | 90 | // Spacebar key 91 | if (e.code === 'Space' || e.key === ' ') { 92 | e.preventDefault(); // Prevent page scroll 93 | 94 | // Inline play/pause logic to avoid dependency issues 95 | const allVideos = videoRefs.current.filter((v) => v !== null); 96 | const firstVideo = allVideos[0]; 97 | 98 | if (firstVideo && !firstVideo.paused) { 99 | allVideos.forEach((video) => video?.pause()); 100 | } else { 101 | allVideos.forEach((video) => video?.play()); 102 | } 103 | } 104 | }; 105 | 106 | window.addEventListener('keydown', handleKeyPress); 107 | return () => window.removeEventListener('keydown', handleKeyPress); 108 | }, []); // Empty dependency array since we're not using external state 109 | 110 | const handleTimeUpdate = (e: React.SyntheticEvent) => { 111 | if (!isSeekingBySlider) { 112 | const video = e.currentTarget; 113 | setCurrentTime(video.currentTime); 114 | if (video.duration && !isNaN(video.duration)) { 115 | setDuration(video.duration); 116 | } 117 | if (onTimeUpdate) { 118 | onTimeUpdate(video.currentTime); 119 | } 120 | } 121 | }; 122 | 123 | const handleSliderChange = (value: number) => { 124 | setIsSeekingBySlider(true); 125 | setCurrentTime(value); 126 | 127 | // Update all videos 128 | videoRefs.current.forEach((video) => { 129 | if (video) { 130 | video.currentTime = value; 131 | } 132 | }); 133 | 134 | if (onTimeUpdate) { 135 | onTimeUpdate(value); 136 | } 137 | 138 | // Reset seeking flag after a short delay 139 | setTimeout(() => setIsSeekingBySlider(false), 100); 140 | }; 141 | 142 | const handlePlayPause = () => { 143 | const allVideos = videoRefs.current.filter((v) => v !== null); 144 | 145 | if (isPlaying) { 146 | allVideos.forEach((video) => video?.pause()); 147 | setIsPlaying(false); 148 | } else { 149 | allVideos.forEach((video) => video?.play()); 150 | setIsPlaying(true); 151 | } 152 | }; 153 | 154 | const handleStop = () => { 155 | const allVideos = videoRefs.current.filter((v) => v !== null); 156 | allVideos.forEach((video) => { 157 | if (video) { 158 | video.pause(); 159 | video.currentTime = 0; 160 | } 161 | }); 162 | setIsPlaying(false); 163 | setCurrentTime(0); 164 | if (onTimeUpdate) { 165 | onTimeUpdate(0); 166 | } 167 | }; 168 | 169 | const syncVideos = (index: number) => { 170 | if (!isSeekingBySlider) { 171 | const sourceVideo = videoRefs.current[index]; 172 | if (sourceVideo) { 173 | videoRefs.current.forEach((video, i) => { 174 | if ( 175 | video && 176 | i !== index && 177 | Math.abs(video.currentTime - sourceVideo.currentTime) > 0.1 178 | ) { 179 | video.currentTime = sourceVideo.currentTime; 180 | } 181 | }); 182 | } 183 | } 184 | }; 185 | 186 | const handleSpeedChange = (speed: number) => { 187 | setPlaybackSpeed(speed); 188 | }; 189 | 190 | return ( 191 | 200 | Episode {episodeId} Videos 201 |
202 | } 203 | extra={ 204 | 205 | 206 | 215 | 216 | 217 | 482 | 483 | 484 | 485 | This will create a new dataset with {selectedCount} selected 486 | episodes 487 | 488 | 489 | 490 | 491 | 494 | 501 | 502 | 503 | 504 | 505 | 506 | {/* Keyboard Shortcuts Modal */} 507 | setIsShortcutsModalVisible(false)} 511 | footer={[ 512 | , 515 | ]} 516 | width={500} 517 | > 518 | 519 |
520 | Navigation 521 | 522 |
523 | Previous Episode 524 | 525 | ← 526 | 527 |
528 |
529 | Next Episode 530 | 531 | → 532 | 533 |
534 |
535 |
536 | 537 |
538 | Video Controls 539 | 540 |
541 | Play/Pause Video 542 | 543 | Space 544 | 545 |
546 |
547 | Change Playback Speed 548 | 549 | Use dropdown (0.5x - 3x) 550 | 551 |
552 |
553 |
554 | 555 |
556 | Selection 557 | 558 |
559 | Toggle Episode Selection 560 | 561 | {navigator.platform.includes('Mac') ? 'Cmd' : 'Ctrl'}+K 562 | 563 |
564 |
565 |
566 | 567 |
568 | General 569 | 570 |
571 | Show Keyboard Shortcuts 572 | 573 | {navigator.platform.includes('Mac') ? 'Cmd' : 'Ctrl'}+P 574 | 575 |
576 |
577 |
578 |
579 |
580 | 581 | {/* Dataset Creation Status Modal */} 582 | { 585 | setShowStatusModal(false); 586 | setCreationStatus(null); 587 | }} 588 | status={ 589 | creationStatus 590 | ? { 591 | status: creationStatus.status, 592 | progress: creationStatus.progress, 593 | message: creationStatus.message, 594 | repo_id: creationStatus.new_repo_id, 595 | } 596 | : undefined 597 | } 598 | title='Dataset Creation Status' 599 | actionLabel='View New Dataset' 600 | /> 601 | 602 | ); 603 | }; 604 | 605 | export default DatasetViewer; 606 | --------------------------------------------------------------------------------