├── docker
    ├── data
    │   ├── api
    │   │   └── api.txt
    │   ├── minio
    │   │   └── miniio.txt
    │   ├── postgres
    │   │   └── pg.txt
    │   ├── redis
    │   │   └── redis.txt
    │   ├── celery
    │   │   └── celery.txt
    │   └── README.md
    ├── postgres
    │   └── init.sql
    ├── reset-redis.sh
    ├── redis
    │   └── redis.conf
    └── start.sh
├── backend
    ├── app
    │   ├── plugins
    │   │   ├── __init__.py
    │   │   └── base_plugin.py
    │   ├── api
    │   │   └── v1
    │   │   │   ├── endpoints
    │   │   │       ├── __init__.py
    │   │   │       ├── plugins.py
    │   │   │       └── llm_test.py
    │   │   │   ├── __init__.py
    │   │   │   └── schemas
    │   │   │       ├── conversion_schemas.py
    │   │   │       └── library_schemas.py
    │   ├── tasks
    │   │   └── __init__.py
    │   ├── utils
    │   │   └── response.py
    │   ├── celery_app.py
    │   └── models
    │   │   ├── permission.py
    │   │   ├── plugin.py
    │   │   ├── role_permission.py
    │   │   ├── role.py
    │   │   ├── conversion_file_detail.py
    │   │   ├── user_session.py
    │   │   ├── user_organization.py
    │   │   ├── user.py
    │   │   ├── project_team_member.py
    │   │   ├── user_role.py
    │   │   ├── resource_permission.py
    │   │   ├── audit_log.py
    │   │   ├── project_data_source.py
    │   │   └── organization.py
    ├── alembic
    │   ├── README
    │   ├── script.py.mako
    │   ├── versions
    │   │   └── 9d5aba691653_add_dataflow_task_types_to_enum.py
    │   └── env.py
    ├── celery_worker.py
    ├── start_celery.sh
    ├── migrations
    │   └── reset_system.sh
    ├── test
    │   ├── test_api.py
    │   └── test_minio_fix.py
    ├── Dockerfile
    ├── requirements_optimized.txt
    ├── start_celery_threads.sh
    ├── config.example.env
    └── debug_llm.py
├── frontend
    ├── .bolt
    │   └── ignore
    ├── src
    │   ├── screens
    │   │   ├── RawData
    │   │   │   ├── index.ts
    │   │   │   └── LibraryDetails
    │   │   │   │   ├── index.ts
    │   │   │   │   └── README.md
    │   │   ├── StitchDesign
    │   │   │   ├── index.ts
    │   │   │   ├── sections
    │   │   │   │   ├── ActivitySection
    │   │   │   │   │   └── index.ts
    │   │   │   │   └── OverviewSection
    │   │   │   │   │   └── OverviewSection.tsx
    │   │   │   └── StitchDesign.tsx
    │   │   ├── Auth
    │   │   │   └── index.tsx
    │   │   ├── Datasets
    │   │   │   ├── SmartDatasetCreator.tsx
    │   │   │   ├── index.tsx
    │   │   │   └── SmartDatasetCreator
    │   │   │   │   ├── components
    │   │   │   │       ├── index.ts
    │   │   │   │       ├── ErrorMessage.tsx
    │   │   │   │       └── NavigationButtons.tsx
    │   │   │   │   └── index.tsx
    │   │   ├── DataGovernance
    │   │   │   ├── ProjectDetail.tsx
    │   │   │   ├── index.ts
    │   │   │   ├── components
    │   │   │   │   └── index.ts
    │   │   │   └── ProjectDetail
    │   │   │   │   ├── index.ts
    │   │   │   │   └── README.md
    │   │   ├── Settings
    │   │   │   ├── components
    │   │   │   │   └── index.ts
    │   │   │   └── README.md
    │   │   └── Plugins
    │   │   │   └── Plugins.tsx
    │   ├── i18n
    │   │   ├── locales
    │   │   │   ├── zh
    │   │   │   │   ├── time.json
    │   │   │   │   ├── actions.json
    │   │   │   │   ├── navigation.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── annotation.json
    │   │   │   │   ├── libraryDetails.json
    │   │   │   │   ├── dataPreview.json
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── overview.json
    │   │   │   │   ├── auth.json
    │   │   │   │   └── datasets.json
    │   │   │   ├── en
    │   │   │   │   ├── time.json
    │   │   │   │   ├── actions.json
    │   │   │   │   ├── navigation.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── annotation.json
    │   │   │   │   ├── dataPreview.json
    │   │   │   │   ├── libraryDetails.json
    │   │   │   │   ├── overview.json
    │   │   │   │   └── auth.json
    │   │   │   └── ja
    │   │   │   │   ├── actions.json
    │   │   │   │   ├── time.json
    │   │   │   │   ├── navigation.json
    │   │   │   │   ├── common.json
    │   │   │   │   ├── index.ts
    │   │   │   │   ├── dataPreview.json
    │   │   │   │   ├── overview.json
    │   │   │   │   ├── libraryDetails.json
    │   │   │   │   └── auth.json
    │   │   └── index.ts
    │   ├── components
    │   │   ├── auth
    │   │   │   ├── index.ts
    │   │   │   ├── AuthProvider.tsx
    │   │   │   └── ProtectedRoute.tsx
    │   │   ├── MediaFileDetails
    │   │   │   ├── dialogs
    │   │   │   │   └── index.ts
    │   │   │   └── index.ts
    │   │   ├── DataPreview
    │   │   │   └── index.ts
    │   │   ├── ui
    │   │   │   ├── progress.tsx
    │   │   │   ├── main-content-layout.tsx
    │   │   │   ├── textarea.tsx
    │   │   │   ├── input.tsx
    │   │   │   ├── slider.tsx
    │   │   │   ├── language-switcher.tsx
    │   │   │   ├── badge.tsx
    │   │   │   ├── switch.tsx
    │   │   │   ├── checkbox.tsx
    │   │   │   ├── tabs.tsx
    │   │   │   ├── button.tsx
    │   │   │   ├── card.tsx
    │   │   │   ├── table.tsx
    │   │   │   └── dropdown-menu.tsx
    │   │   └── Layout.tsx
    │   ├── lib
    │   │   ├── utils.ts
    │   │   └── config.ts
    │   ├── env.d.ts
    │   ├── index.tsx
    │   ├── services
    │   │   ├── index.ts
    │   │   └── overview.service.ts
    │   ├── types
    │   │   ├── api.ts
    │   │   ├── data-governance.ts
    │   │   ├── systemLog.ts
    │   │   ├── library.ts
    │   │   └── llm.ts
    │   ├── hooks
    │   │   ├── useOverview.ts
    │   │   ├── useTaskStats.ts
    │   │   ├── useFileDetails.ts
    │   │   └── useFileConversion.ts
    │   └── examples
    │   │   └── DataPreviewExample.tsx
    ├── public
    │   ├── depth-5--frame-0.png
    │   ├── vector---0-6.svg
    │   ├── vector---0-12.svg
    │   ├── vector---0.svg
    │   ├── vector---0-9.svg
    │   ├── vector---0-2.svg
    │   └── vector---0-3.svg
    ├── tsconfig.json
    ├── Dockerfile.dev
    ├── vite.config.ts
    ├── .gitignore
    ├── Dockerfile
    ├── .dockerignore
    ├── index.html
    ├── tsconfig.node.json
    ├── tsconfig.app.json
    ├── README.md
    ├── package.json
    ├── tailwind.config.js
    └── nginx.conf
├── poster.png
├── .gitmodules
├── wechat_2025-06-10_175702_266.png
├── run.py
├── Procfile
├── .gitignore
├── plugins
    ├── custom_distillers
    │   └── __init__.py
    └── custom_parsers
    │   ├── __init__.py
    │   └── my_custom_parser.py
├── doc
    └── tmp
    │   ├── user.md
    │   ├── muti.md
    │   └── design.md
└── cleanup.sh


/docker/data/api/api.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker/data/minio/miniio.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker/data/postgres/pg.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker/data/redis/redis.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/docker/postgres/init.sql:
--------------------------------------------------------------------------------
1 |  


--------------------------------------------------------------------------------
/docker/data/celery/celery.txt:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/backend/app/plugins/__init__.py:
--------------------------------------------------------------------------------
1 | """插件系统初始化""" 


--------------------------------------------------------------------------------
/frontend/.bolt/ignore:
--------------------------------------------------------------------------------
1 | src/components/ui/*
2 | 


--------------------------------------------------------------------------------
/backend/app/api/v1/endpoints/__init__.py:
--------------------------------------------------------------------------------
1 | # 端点模块初始化 


--------------------------------------------------------------------------------
/backend/alembic/README:
--------------------------------------------------------------------------------
1 | Generic single-database configuration.


--------------------------------------------------------------------------------
/frontend/src/screens/RawData/index.ts:
--------------------------------------------------------------------------------
1 | export { RawData } from './RawData';


--------------------------------------------------------------------------------
/poster.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hikariming/pindata/HEAD/poster.png


--------------------------------------------------------------------------------
/frontend/src/screens/StitchDesign/index.ts:
--------------------------------------------------------------------------------
1 | export { StitchDesign } from "./StitchDesign";
2 | 


--------------------------------------------------------------------------------
/.gitmodules:
--------------------------------------------------------------------------------
1 | [submodule "Dataflow"]
2 | 	path = Dataflow
3 | 	url = https://github.com/OpenDCAI/DataFlow.git
4 | 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/time.json:
--------------------------------------------------------------------------------
1 | {
2 |   "hoursAgo": "{{count}}小时前",
3 |   "daysAgo": "{{count}}天前"
4 | }


--------------------------------------------------------------------------------
/frontend/src/screens/Auth/index.tsx:
--------------------------------------------------------------------------------
1 | export { Login } from './Login';
2 | export { Register } from './Register';


--------------------------------------------------------------------------------
/wechat_2025-06-10_175702_266.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hikariming/pindata/HEAD/wechat_2025-06-10_175702_266.png


--------------------------------------------------------------------------------
/frontend/public/depth-5--frame-0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/hikariming/pindata/HEAD/frontend/public/depth-5--frame-0.png


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/time.json:
--------------------------------------------------------------------------------
1 | {
2 |   "hoursAgo": "{{count}} hours ago",
3 |   "daysAgo": "{{count}} days ago"
4 | }


--------------------------------------------------------------------------------
/frontend/src/screens/StitchDesign/sections/ActivitySection/index.ts:
--------------------------------------------------------------------------------
1 | export { ActivitySection } from "./ActivitySection";
2 | 


--------------------------------------------------------------------------------
/frontend/src/screens/Datasets/SmartDatasetCreator.tsx:
--------------------------------------------------------------------------------
1 | // 重新导出重构后的组件
2 | export { SmartDatasetCreator } from './SmartDatasetCreator/index'; 


--------------------------------------------------------------------------------
/frontend/src/screens/DataGovernance/ProjectDetail.tsx:
--------------------------------------------------------------------------------
1 | // 重新导出ProjectDetail组件
2 | export { ProjectDetail } from './ProjectDetail/ProjectDetail';


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | from app import create_app
2 | 
3 | app = create_app()
4 | 
5 | if __name__ == '__main__':
6 |     app.run(host='0.0.0.0', port=5000, debug=True) 


--------------------------------------------------------------------------------
/frontend/src/components/auth/index.ts:
--------------------------------------------------------------------------------
1 | export { ProtectedRoute, PermissionGuard } from './ProtectedRoute';
2 | export { AuthProvider } from './AuthProvider';


--------------------------------------------------------------------------------
/frontend/src/components/MediaFileDetails/dialogs/index.ts:
--------------------------------------------------------------------------------
1 | export { AIQuestionDialog } from './AIQuestionDialog';
2 | export { ObjectDetectionDialog } from './ObjectDetectionDialog';


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/actions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "createDataset": "データセットを作成",
3 |   "startNewTask": "新しいタスクを開始",
4 |   "edit": "編集",
5 |   "delete": "削除",
6 |   "view": "表示"
7 | }


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/actions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "createDataset": "创建数据集",
3 |   "smartCreateDataset": "智能创建数据集",
4 |   "startNewTask": "开始新任务",
5 |   "edit": "编辑",
6 |   "delete": "删除",
7 |   "view": "查看"
8 | }


--------------------------------------------------------------------------------
/frontend/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "files": [],
 3 |   "references": [
 4 |     {
 5 |       "path": "./tsconfig.app.json"
 6 |     },
 7 |     {
 8 |       "path": "./tsconfig.node.json"
 9 |     }
10 |   ]
11 | }


--------------------------------------------------------------------------------
/frontend/src/components/DataPreview/index.ts:
--------------------------------------------------------------------------------
1 | export { DataPreview } from './DataPreview';
2 | export { DataPreviewContainer } from './DataPreviewContainer';
3 | export type { DataPreviewProps } from './DataPreview'; 


--------------------------------------------------------------------------------
/frontend/src/lib/utils.ts:
--------------------------------------------------------------------------------
1 | import { type ClassValue, clsx } from "clsx";
2 | import { twMerge } from "tailwind-merge";
3 | 
4 | export function cn(...inputs: ClassValue[]) {
5 |   return twMerge(clsx(inputs));
6 | }
7 | 


--------------------------------------------------------------------------------
/frontend/src/screens/DataGovernance/index.ts:
--------------------------------------------------------------------------------
1 | export { DataGovernanceProjects } from './DataGovernanceProjects';
2 | export { ProjectDetail } from './ProjectDetail';
3 | export { CreateProject } from './CreateProject';


--------------------------------------------------------------------------------
/frontend/src/screens/Datasets/index.tsx:
--------------------------------------------------------------------------------
1 | export { Datasets } from './Datasets';
2 | export { DatasetDetail } from './DatasetDetail';
3 | export { CreateDataset } from './CreateDataset';
4 | export { DatasetTasks } from './DatasetTasks'; 


--------------------------------------------------------------------------------
/frontend/src/env.d.ts:
--------------------------------------------------------------------------------
 1 | /// <reference types="vite/client" />
 2 | 
 3 | interface ImportMetaEnv {
 4 |   readonly VITE_API_BASE_URL: string
 5 |   // 更多环境变量...
 6 | }
 7 | 
 8 | interface ImportMeta {
 9 |   readonly env: ImportMetaEnv
10 | } 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/actions.json:
--------------------------------------------------------------------------------
1 | {
2 |   "createDataset": "Create Dataset",
3 |   "smartCreateDataset": "Smart Create Dataset",
4 |   "startNewTask": "Start New Task",
5 |   "edit": "Edit",
6 |   "delete": "Delete",
7 |   "view": "View"
8 | }


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/time.json:
--------------------------------------------------------------------------------
1 | {
2 |   "justNow": "たった今",
3 |   "minutesAgo": "{{count}}分前",
4 |   "hoursAgo": "{{count}}時間前",
5 |   "daysAgo": "{{count}}日前",
6 |   "weeksAgo": "{{count}}週間前",
7 |   "monthsAgo": "{{count}}ヶ月前",
8 |   "yearsAgo": "{{count}}年前"
9 | }


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/navigation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "overview": "概要",
 3 |   "datasets": "データセット",
 4 |   "tasks": "タスク",
 5 |   "rawData": "生データ",
 6 |   "plugins": "プラグイン",
 7 |   "settings": "設定",
 8 |   "storage": "ストレージ",
 9 |   "expand": "サイドバーを展開",
10 |   "collapse": "サイドバーを折りたたみ"
11 | }


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/navigation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "overview": "概览",
 3 |   "datasets": "数据集",
 4 |   "tasks": "任务",
 5 |   "rawData": "原始数据",
 6 |   "governance": "数据治理(开发中)",
 7 |   "plugins": "插件",
 8 |   "settings": "设置",
 9 |   "storage": "存储",
10 |   "expand": "展开侧边栏",
11 |   "collapse": "收起侧边栏"
12 | }


--------------------------------------------------------------------------------
/frontend/src/screens/DataGovernance/components/index.ts:
--------------------------------------------------------------------------------
1 | export { TeamManagement } from './TeamManagement';
2 | export { DataPipelineVisualization } from './DataPipelineVisualization';
3 | export { ProjectCreationWizard } from './ProjectCreationWizard';
4 | export { ProjectDataIntegration } from './ProjectDataIntegration';


--------------------------------------------------------------------------------
/frontend/src/screens/Settings/components/index.ts:
--------------------------------------------------------------------------------
1 | export { LLMConfigComponent } from './LLMConfig';
2 | export { SystemLogs } from './SystemLogs';
3 | export { UserProfile } from './UserProfile';
4 | export { SessionManagement } from './SessionManagement';
5 | export { UserAdministration } from './UserAdministration'; 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/navigation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "overview": "Overview",
 3 |   "datasets": "Datasets",
 4 |   "tasks": "Tasks",
 5 |   "rawData": "Raw Data",
 6 |   "plugins": "Plugins",
 7 |   "settings": "Settings",
 8 |   "storage": "Storage",
 9 |   "expand": "Expand Sidebar",
10 |   "collapse": "Collapse Sidebar"
11 | }


--------------------------------------------------------------------------------
/Procfile:
--------------------------------------------------------------------------------
1 | backend: CONDA_PATH=$(conda info --base); cd backend && $CONDA_PATH/envs/pindata-env/bin/python run.py
2 | celery: CONDA_PATH=$(conda info --base); cd backend && $CONDA_PATH/envs/pindata-env/bin/celery -A celery_worker.celery worker --loglevel=info --pool=threads --concurrency=4 -n worker@%h
3 | frontend: cd frontend && pnpm run dev 


--------------------------------------------------------------------------------
/frontend/Dockerfile.dev:
--------------------------------------------------------------------------------
 1 | FROM node:18-alpine
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # 安装 pnpm
 6 | RUN npm install -g pnpm
 7 | 
 8 | # 复制依赖文件
 9 | COPY package.json pnpm-lock.yaml ./
10 | 
11 | # 安装依赖
12 | RUN pnpm install --frozen-lockfile
13 | 
14 | # 暴露端口
15 | EXPOSE 3000
16 | 
17 | # 开发模式启动命令
18 | CMD ["pnpm", "dev", "--host", "0.0.0.0"] 


--------------------------------------------------------------------------------
/frontend/src/screens/RawData/LibraryDetails/index.ts:
--------------------------------------------------------------------------------
1 | export { LibraryDetails } from './LibraryDetails';
2 | export { ConvertToMarkdownDialog } from './components/ConvertToMarkdownDialog';
3 | export { ConversionProgress } from './components/ConversionProgress';
4 | export type { ConversionConfig } from './components/ConvertToMarkdownDialog'; 


--------------------------------------------------------------------------------
/frontend/src/screens/DataGovernance/ProjectDetail/index.ts:
--------------------------------------------------------------------------------
1 | export { ProjectDetail } from './ProjectDetail';
2 | export { RawDataTab } from './RawDataTab';
3 | export { GovernedDataTab } from './GovernedDataTab';
4 | export { KnowledgeTab } from './KnowledgeTab';
5 | export { DatasetsTab } from './DatasetsTab';
6 | export { AnalyticsTab } from './AnalyticsTab'; 


--------------------------------------------------------------------------------
/frontend/vite.config.ts:
--------------------------------------------------------------------------------
 1 | import react from "@vitejs/plugin-react";
 2 | import tailwind from "tailwindcss";
 3 | import { defineConfig } from "vite";
 4 | 
 5 | // https://vite.dev/config/
 6 | export default defineConfig({
 7 |   plugins: [react()],
 8 |   base: "./",
 9 |   css: {
10 |     postcss: {
11 |       plugins: [tailwind()],
12 |     },
13 |   },
14 | });
15 | 


--------------------------------------------------------------------------------
/backend/app/api/v1/__init__.py:
--------------------------------------------------------------------------------
1 | from flask import Blueprint
2 | 
3 | api_v1 = Blueprint('api_v1', __name__)
4 | 
5 | # 导入所有端点
6 | from .endpoints import datasets, tasks, plugins, raw_data, overview, libraries, llm_configs, system_logs, conversion_jobs, enhanced_datasets, auth, users, organizations, roles, annotations, image_annotations, data_governance, llm_test, dataflow
7 |  


--------------------------------------------------------------------------------
/docker/data/README.md:
--------------------------------------------------------------------------------
 1 | # 数据卷
 2 | 
 3 | ## 数据卷说明
 4 | 
 5 | - postgres_data: 数据库数据
 6 | - minio_data: 对象存储数据
 7 | - redis_data: 缓存数据
 8 | - api_data: 后端数据
 9 | - celery_data: 任务数据
10 | 
11 | ## data volume 
12 | 
13 | - postgres_data: db data
14 | - minio_data: object storage data
15 | - redis_data: cache data
16 | - api_data: backend data
17 | - celery_data: task data
18 | 


--------------------------------------------------------------------------------
/backend/celery_worker.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """Celery Worker启动脚本"""
 3 | import os
 4 | import sys
 5 | 
 6 | # 添加项目路径到 Python 路径
 7 | sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
 8 | 
 9 | from app.celery_app import celery
10 | 
11 | # 任务会通过include自动发现，不需要显式导入
12 | 
13 | if __name__ == '__main__':
14 |     # 启动 Celery Worker
15 |     celery.start() 


--------------------------------------------------------------------------------
/frontend/.gitignore:
--------------------------------------------------------------------------------
 1 | # Logs
 2 | logs
 3 | *.log
 4 | npm-debug.log*
 5 | yarn-debug.log*
 6 | yarn-error.log*
 7 | pnpm-debug.log*
 8 | lerna-debug.log*
 9 | 
10 | node_modules
11 | dist
12 | dist-ssr
13 | *.local
14 | 
15 | # Editor directories and files
16 | .vscode/*
17 | !.vscode/extensions.json
18 | .idea
19 | .DS_Store
20 | *.suo
21 | *.ntvs*
22 | *.njsproj
23 | *.sln
24 | *.sw?


--------------------------------------------------------------------------------
/frontend/src/index.tsx:
--------------------------------------------------------------------------------
 1 | import { StrictMode } from "react";
 2 | import { createRoot } from "react-dom/client";
 3 | import { BrowserRouter as Router } from "react-router-dom";
 4 | import { App } from "./App";
 5 | import './i18n';
 6 | 
 7 | createRoot(document.getElementById("app") as HTMLElement).render(
 8 |   <StrictMode>
 9 |     <Router>
10 |       <App />
11 |     </Router>
12 |   </StrictMode>,
13 | );


--------------------------------------------------------------------------------
/backend/app/tasks/__init__.py:
--------------------------------------------------------------------------------
1 | from .conversion_tasks import process_conversion_job
2 | from .dataset_import_tasks import import_dataset_task
3 | from .dataset_generation_tasks import generate_dataset_task
4 | # from .multimodal_dataset_tasks import generate_multimodal_dataset_task  # 暂时移除
5 | 
6 | __all__ = ['process_conversion_job', 'import_dataset_task', 'generate_dataset_task']  # , 'generate_multimodal_dataset_task' 


--------------------------------------------------------------------------------
/frontend/src/screens/StitchDesign/StitchDesign.tsx:
--------------------------------------------------------------------------------
 1 | // frontend/src/screens/StitchDesign/StitchDesign.tsx
 2 | 
 3 | import React from 'react';
 4 | import { ActivitySection } from './sections/ActivitySection';
 5 | 
 6 | /**
 7 |  * StitchDesign组件
 8 |  * 
 9 |  * @returns {JSX.Element} StitchDesign页面
10 |  */
11 | export const StitchDesign: React.FC = (): JSX.Element => {
12 |   return (
13 |     <ActivitySection />
14 |   );
15 | };
16 | 


--------------------------------------------------------------------------------
/frontend/src/components/MediaFileDetails/index.ts:
--------------------------------------------------------------------------------
1 | export { MediaFileDetailsContainer } from './MediaFileDetailsContainer';
2 | export { ImagePreviewPanel } from './ImagePreviewPanel';
3 | export { VideoPreviewPanel } from './VideoPreviewPanel';
4 | export { MediaAnnotationPanel } from './MediaAnnotationPanel';
5 | export { FileMetadataPanel } from './FileMetadataPanel';
6 | export { ProcessingHistoryPanel } from './ProcessingHistoryPanel';
7 | export * from './dialogs';


--------------------------------------------------------------------------------
/backend/start_celery.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 启动 Celery Worker 脚本
 4 | 
 5 | echo "Starting Celery Worker..."
 6 | 
 7 | # 设置环境变量
 8 | export FLASK_APP=run.py
 9 | export FLASK_ENV=development
10 | 
11 | # macOS 兼容性设置
12 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
13 | 
14 | # 启动 Celery Worker
15 | # -A: 指定 Celery 应用
16 | # -l: 日志级别
17 | # -c: 并发数（worker 进程数）
18 | # -n: worker 名称
19 | celery -A celery_worker.celery worker --loglevel=info --concurrency=4 -n worker@%h 


--------------------------------------------------------------------------------
/docker/reset-redis.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "停止Redis容器..."
 4 | docker compose -f docker compose.yml down redis
 5 | 
 6 | echo "清理Redis数据卷..."
 7 | docker volume rm docker_redis_data 2>/dev/null || true
 8 | 
 9 | echo "重新启动Redis..."
10 | docker compose -f docker compose.yml up -d redis
11 | 
12 | echo "等待Redis启动..."
13 | sleep 3
14 | 
15 | echo "检查Redis状态..."
16 | docker compose -f docker compose.yml logs --tail=20 redis
17 | 
18 | echo "测试Redis连接..."
19 | docker exec llama_redis redis-cli ping 


--------------------------------------------------------------------------------
/frontend/public/vector---0-6.svg:
--------------------------------------------------------------------------------
1 | <svg width="21" height="12" viewBox="0 0 21 12" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M19.5 0H1.5C0.671573 0 0 0.671573 0 1.5V10.5C0 11.3284 0.671573 12 1.5 12H19.5C20.3284 12 21 11.3284 21 10.5V1.5C21 0.671573 20.3284 0 19.5 0ZM19.5 10.5H1.5V1.5H19.5V10.5ZM17.25 6C17.25 6.62132 16.7463 7.125 16.125 7.125C15.5037 7.125 15 6.62132 15 6C15 5.37868 15.5037 4.875 16.125 4.875C16.7463 4.875 17.25 5.37868 17.25 6Z" fill="#0D141C"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/frontend/src/screens/Datasets/SmartDatasetCreator/components/index.ts:
--------------------------------------------------------------------------------
1 | export { StepIndicator } from './StepIndicator';
2 | export { Step1DataSelection } from './Step1DataSelection';
3 | export { Step2DatasetConfig } from './Step2DatasetConfig';
4 | export { Step3ModelConfig } from './Step3ModelConfig';
5 | export { Step4PreviewConfirm } from './Step4PreviewConfirm';
6 | export { Step5Generation } from './Step5Generation';
7 | export { NavigationButtons } from './NavigationButtons';
8 | export { ErrorMessage } from './ErrorMessage';
9 | export { FormatDetailsModal } from './FormatDetailsModal'; 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "appName": "拼好数",
 3 |   "cancel": "取消",
 4 |   "save": "保存",
 5 |   "delete": "删除",
 6 |   "justNow": "刚刚",
 7 |   "pageNotFound": "页面未找到",
 8 |   "loading": "加载中...",
 9 |   "error": "错误",
10 |   "retry": "重试",
11 |   "refresh": "刷新",
12 |   "close": "关闭",
13 |   "confirm": "确认",
14 |   "edit": "编辑",
15 |   "view": "查看",
16 |   "actions": "操作",
17 |   "status": "状态",
18 |   "name": "名称",
19 |   "description": "描述",
20 |   "size": "大小",
21 |   "type": "类型",
22 |   "create": "创建",
23 |   "upload": "上传",
24 |   "download": "下载",
25 |   "preview": "预览"
26 | }


--------------------------------------------------------------------------------
/frontend/src/components/ui/progress.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { cn } from "../../lib/utils";
 3 | 
 4 | interface ProgressProps {
 5 |   value: number;
 6 |   className?: string;
 7 | }
 8 | 
 9 | export const Progress: React.FC<ProgressProps> = ({ value, className }) => {
10 |   return (
11 |     <div className={cn("w-full bg-gray-200 rounded-full h-2", className)}>
12 |       <div
13 |         className="bg-[#1977e5] h-2 rounded-full transition-all duration-300 ease-in-out"
14 |         style={{ width: `${Math.min(Math.max(value, 0), 100)}%` }}
15 |       />
16 |     </div>
17 |   );
18 | }; 


--------------------------------------------------------------------------------
/frontend/Dockerfile:
--------------------------------------------------------------------------------
 1 | # 第一阶段：构建阶段
 2 | FROM node:18-alpine AS builder
 3 | 
 4 | WORKDIR /app
 5 | 
 6 | # 安装 pnpm
 7 | RUN npm install -g pnpm
 8 | 
 9 | # 复制依赖文件
10 | COPY package.json pnpm-lock.yaml ./
11 | 
12 | # 安装依赖
13 | RUN pnpm install --frozen-lockfile
14 | 
15 | # 复制源代码
16 | COPY . .
17 | 
18 | # 构建应用
19 | RUN pnpm run build
20 | 
21 | # 第二阶段：生产阶段
22 | FROM nginx:alpine AS production
23 | 
24 | # 复制构建产物到 nginx
25 | COPY --from=builder /app/dist /usr/share/nginx/html
26 | 
27 | # 复制 nginx 配置
28 | COPY nginx.conf /etc/nginx/nginx.conf
29 | 
30 | # 暴露端口
31 | EXPOSE 80
32 | 
33 | # 启动 nginx
34 | CMD ["nginx", "-g", "daemon off;"] 


--------------------------------------------------------------------------------
/frontend/.dockerignore:
--------------------------------------------------------------------------------
 1 | # 依赖目录
 2 | node_modules/
 3 | npm-debug.log*
 4 | yarn-debug.log*
 5 | yarn-error.log*
 6 | 
 7 | # 构建输出
 8 | dist/
 9 | build/
10 | 
11 | # 环境文件
12 | .env
13 | .env.local
14 | .env.development.local
15 | .env.test.local
16 | .env.production.local
17 | 
18 | # 编辑器和IDE
19 | .vscode/
20 | .idea/
21 | *.swp
22 | *.swo
23 | *~
24 | 
25 | # 操作系统
26 | .DS_Store
27 | .DS_Store?
28 | ._*
29 | .Spotlight-V100
30 | .Trashes
31 | ehthumbs.db
32 | Thumbs.db
33 | 
34 | # 日志文件
35 | *.log
36 | 
37 | # 测试覆盖率
38 | coverage/
39 | 
40 | # 临时文件
41 | *.tmp
42 | *.temp
43 | 
44 | # Git
45 | .git/
46 | .gitignore
47 | 
48 | # 其他
49 | README.md
50 | .bolt/ 


--------------------------------------------------------------------------------
/frontend/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 |   <head>
 4 |     <meta charset="utf-8" />
 5 |     <title>pindata</title>
 6 |     <meta name="viewport" content="width=device-width, initial-scale=1" />
 7 |     <meta charset="utf-8" />
 8 |     <meta content="width=device-width, initial-scale=1" name="viewport" />
 9 |     <style>
10 |       @import url("https://fonts.googleapis.com/css?family=Inter:500,700,400");
11 |     </style>
12 |     <link href="tailwind.css" rel="stylesheet" />
13 |     <link href="tailwind.css" rel="stylesheet" />
14 |   </head>
15 |   <body>
16 |     <div id="app"></div>
17 |     <script type="module" src="./src/index.tsx"></script>
18 |   </body>
19 | </html>


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "appName": "pindata",
 3 |   "cancel": "Cancel",
 4 |   "save": "Save",
 5 |   "delete": "Delete",
 6 |   "justNow": "Just now",
 7 |   "pageNotFound": "Page not found",
 8 |   "loading": "Loading...",
 9 |   "error": "Error",
10 |   "retry": "Retry",
11 |   "refresh": "Refresh",
12 |   "close": "Close",
13 |   "confirm": "Confirm",
14 |   "edit": "Edit",
15 |   "view": "View",
16 |   "actions": "Actions",
17 |   "status": "Status",
18 |   "name": "Name",
19 |   "description": "Description",
20 |   "size": "Size",
21 |   "type": "Type",
22 |   "create": "Create",
23 |   "upload": "Upload",
24 |   "download": "Download",
25 |   "preview": "Preview"
26 | }


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/common.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "cancel": "キャンセル",
 3 |   "save": "保存",
 4 |   "delete": "削除",
 5 |   "edit": "編集",
 6 |   "create": "作成",
 7 |   "update": "更新",
 8 |   "refresh": "更新",
 9 |   "loading": "読み込み中...",
10 |   "saving": "保存中...",
11 |   "creating": "作成中...",
12 |   "updating": "更新中...",
13 |   "deleting": "削除中...",
14 |   "processing": "処理中...",
15 |   "confirm": "確認",
16 |   "success": "成功",
17 |   "error": "エラー",
18 |   "warning": "警告",
19 |   "info": "情報",
20 |   "unknown": "不明",
21 |   "never": "なし",
22 |   "primary": "プライマリ",
23 |   "actions": "アクション",
24 |   "noPermission": "この機能にアクセスする権限がありません",
25 |   "justNow": "たった今",
26 |   "pageNotFound": "ページが見つかりません"
27 | }


--------------------------------------------------------------------------------
/frontend/tsconfig.node.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
 4 |     "target": "ES2022",
 5 |     "lib": [
 6 |       "ES2023"
 7 |     ],
 8 |     "module": "ESNext",
 9 |     "skipLibCheck": true,
10 |     "moduleResolution": "bundler",
11 |     "allowImportingTsExtensions": true,
12 |     "isolatedModules": true,
13 |     "moduleDetection": "force",
14 |     "noEmit": true,
15 |     "strict": true,
16 |     "noUnusedLocals": true,
17 |     "noUnusedParameters": true,
18 |     "noFallthroughCasesInSwitch": true,
19 |     "noUncheckedSideEffectImports": true
20 |   },
21 |   "include": [
22 |     "vite.config.ts"
23 |   ]
24 | }


--------------------------------------------------------------------------------
/frontend/src/components/ui/main-content-layout.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | 
 3 | interface MainContentLayoutProps {
 4 |   children: React.ReactNode;
 5 |   isFullWidth?: boolean;
 6 |   className?: string;
 7 | }
 8 | 
 9 | export const MainContentLayout = ({ 
10 |   children, 
11 |   isFullWidth = false,
12 |   className = ""
13 | }: MainContentLayoutProps): JSX.Element => {
14 |   return (
15 |     <div className={`
16 |       w-full transition-all duration-300 ease-in-out
17 |       ${isFullWidth ? 'max-w-none' : 'max-w-7xl mx-auto'}
18 |       ${className}
19 |     `}>
20 |       <div className="bg-white rounded-lg shadow-sm min-h-[calc(100vh-4rem)]">
21 |         {children}
22 |       </div>
23 |     </div>
24 |   );
25 | }; 


--------------------------------------------------------------------------------
/frontend/public/vector---0-12.svg:
--------------------------------------------------------------------------------
1 | <svg width="18" height="19" viewBox="0 0 18 19" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M18 8.58122V17.2484C18 18.0768 17.3284 18.7484 16.5 18.7484H12.75C11.9216 18.7484 11.25 18.0768 11.25 17.2484V13.4984C11.25 13.0842 10.9142 12.7484 10.5 12.7484H7.5C7.08579 12.7484 6.75 13.0842 6.75 13.4984V17.2484C6.75 18.0768 6.07843 18.7484 5.25 18.7484H1.5C0.671573 18.7484 0 18.0768 0 17.2484V8.58122C-6.38962e-05 8.16149 0.17573 7.76094 0.484688 7.47684L7.98469 0.400593L7.995 0.39028C8.56719 -0.130094 9.44124 -0.130094 10.0134 0.39028C10.0166 0.393949 10.0201 0.397394 10.0238 0.400593L17.5238 7.47684C17.8296 7.76243 18.0022 8.16278 18 8.58122Z" fill="#0D141C"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/frontend/src/services/index.ts:
--------------------------------------------------------------------------------
 1 | // 导出所有服务
 2 | export { libraryService } from './library.service';
 3 | export { systemLogService } from './systemLog.service';
 4 | export { llmService } from './llm.service';
 5 | export { fileService } from './file.service';
 6 | export { overviewService } from './overview.service';
 7 | export { authService } from './auth.service';
 8 | export type { User, Role, Organization, UserSession, LoginRequest, RegisterRequest, LoginResponse, ChangePasswordRequest } from './auth.service';
 9 | 
10 | // 导出所有Hooks
11 | export * from '../hooks/useLibraries';
12 | 
13 | // 导出所有类型
14 | export * from '../types/api';
15 | export * from '../types/library';
16 | 
17 | // 导出配置
18 | export * from '../lib/config';
19 | export { apiClient } from '../lib/api-client'; 


--------------------------------------------------------------------------------
/backend/alembic/script.py.mako:
--------------------------------------------------------------------------------
 1 | """${message}
 2 | 
 3 | Revision ID: ${up_revision}
 4 | Revises: ${down_revision | comma,n}
 5 | Create Date: ${create_date}
 6 | 
 7 | """
 8 | from typing import Sequence, Union
 9 | 
10 | from alembic import op
11 | import sqlalchemy as sa
12 | ${imports if imports else ""}
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = ${repr(up_revision)}
16 | down_revision: Union[str, None] = ${repr(down_revision)}
17 | branch_labels: Union[str, Sequence[str], None] = ${repr(branch_labels)}
18 | depends_on: Union[str, Sequence[str], None] = ${repr(depends_on)}
19 | 
20 | 
21 | def upgrade() -> None:
22 |     ${upgrades if upgrades else "pass"}
23 | 
24 | 
25 | def downgrade() -> None:
26 |     ${downgrades if downgrades else "pass"}
27 | 


--------------------------------------------------------------------------------
/frontend/src/i18n/index.ts:
--------------------------------------------------------------------------------
 1 | import i18n from 'i18next';
 2 | import { initReactI18next } from 'react-i18next';
 3 | import LanguageDetector from 'i18next-browser-languagedetector';
 4 | import enTranslations from './locales/en';
 5 | import zhTranslations from './locales/zh';
 6 | import jaTranslations from './locales/ja';
 7 | 
 8 | i18n
 9 |   .use(LanguageDetector)
10 |   .use(initReactI18next)
11 |   .init({
12 |     resources: {
13 |       en: {
14 |         translation: enTranslations
15 |       },
16 |       zh: {
17 |         translation: zhTranslations
18 |       },
19 |       ja: {
20 |         translation: jaTranslations
21 |       }
22 |     },
23 |     fallbackLng: 'en',
24 |     interpolation: {
25 |       escapeValue: false
26 |     }
27 |   });
28 | 
29 | export default i18n;


--------------------------------------------------------------------------------
/frontend/tsconfig.app.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
 4 |     "target": "ES2020",
 5 |     "useDefineForClassFields": true,
 6 |     "lib": [
 7 |       "ES2020",
 8 |       "DOM",
 9 |       "DOM.Iterable"
10 |     ],
11 |     "module": "ESNext",
12 |     "skipLibCheck": true,
13 |     "moduleResolution": "bundler",
14 |     "allowImportingTsExtensions": true,
15 |     "isolatedModules": true,
16 |     "moduleDetection": "force",
17 |     "noEmit": true,
18 |     "jsx": "react-jsx",
19 |     "strict": true,
20 |     "noUnusedLocals": true,
21 |     "noUnusedParameters": true,
22 |     "noFallthroughCasesInSwitch": true,
23 |     "noUncheckedSideEffectImports": true
24 |   },
25 |   "include": [
26 |     "src"
27 |   ]
28 | }


--------------------------------------------------------------------------------
/backend/app/plugins/base_plugin.py:
--------------------------------------------------------------------------------
 1 | """插件基类定义"""
 2 | from abc import ABC, abstractmethod
 3 | from typing import Any, Dict
 4 | 
 5 | class BasePlugin(ABC):
 6 |     """所有插件的基类"""
 7 |     
 8 |     def __init__(self, config: Dict[str, Any] = None):
 9 |         """初始化插件
10 |         
11 |         Args:
12 |             config: 插件配置参数
13 |         """
14 |         self.config = config or {}
15 |     
16 |     @abstractmethod
17 |     def process(self, data: Any) -> Any:
18 |         """处理数据的抽象方法
19 |         
20 |         Args:
21 |             data: 输入数据
22 |             
23 |         Returns:
24 |             处理后的数据
25 |         """
26 |         pass
27 |     
28 |     def validate_config(self) -> bool:
29 |         """验证配置是否有效
30 |         
31 |         Returns:
32 |             配置是否有效
33 |         """
34 |         return True 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/annotation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "imageAnnotation": "图片标注",
 3 |   "videoAnnotation": "视频标注",
 4 |   "qa": "问答标注",
 5 |   "caption": "图片描述",
 6 |   "transcript": "视频字幕",
 7 |   "question": "问题",
 8 |   "answer": "答案",
 9 |   "confidence": "置信度",
10 |   "newQuestion": "新问题",
11 |   "newAnswer": "新答案",
12 |   "addQA": "添加问答",
13 |   "questionPlaceholder": "请输入问题...",
14 |   "answerPlaceholder": "请输入答案...",
15 |   "captionPlaceholder": "请输入图片描述...",
16 |   "transcriptPlaceholder": "请输入视频字幕...",
17 |   "aiAssist": "AI 辅助标注",
18 |   "aiAssistTitle": "AI 辅助标注",
19 |   "aiPrompt": "提示词",
20 |   "aiPromptPlaceholder": "请输入提示词，帮助 AI 生成更准确的标注...",
21 |   "generate": "生成标注",
22 |   "aiAssistSuccess": "AI 辅助标注成功",
23 |   "aiAnnotationsGenerated": "AI 已生成标注，置信度：{confidence}",
24 |   "aiAssistFailed": "AI 辅助标注失败",
25 |   "aiAnnotationsFailed": "AI 生成标注失败，请重试"
26 | } 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/index.ts:
--------------------------------------------------------------------------------
 1 | import common from './common.json';
 2 | import navigation from './navigation.json';
 3 | import overview from './overview.json';
 4 | import settings from './settings.json';
 5 | import rawData from './rawData.json';
 6 | import datasets from './datasets.json';
 7 | import tasks from './tasks.json';
 8 | import smartDatasetCreator from './smartDatasetCreator.json';
 9 | import dataPreview from './dataPreview.json';
10 | import libraryDetails from './libraryDetails.json';
11 | import time from './time.json';
12 | import actions from './actions.json';
13 | import auth from './auth.json';
14 | 
15 | export default {
16 |   common,
17 |   navigation,
18 |   overview,
19 |   settings,
20 |   rawData,
21 |   datasets,
22 |   tasks,
23 |   smartDatasetCreator,
24 |   dataPreview,
25 |   libraryDetails,
26 |   time,
27 |   actions,
28 |   auth
29 | };


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/index.ts:
--------------------------------------------------------------------------------
 1 | import common from './common.json';
 2 | import navigation from './navigation.json';
 3 | import overview from './overview.json';
 4 | import settings from './settings.json';
 5 | import rawData from './rawData.json';
 6 | import datasets from './datasets.json';
 7 | import tasks from './tasks.json';
 8 | import smartDatasetCreator from './smartDatasetCreator.json';
 9 | import dataPreview from './dataPreview.json';
10 | import libraryDetails from './libraryDetails.json';
11 | import time from './time.json';
12 | import actions from './actions.json';
13 | import auth from './auth.json';
14 | 
15 | export default {
16 |   common,
17 |   navigation,
18 |   overview,
19 |   settings,
20 |   rawData,
21 |   datasets,
22 |   tasks,
23 |   smartDatasetCreator,
24 |   dataPreview,
25 |   libraryDetails,
26 |   time,
27 |   actions,
28 |   auth
29 | };


--------------------------------------------------------------------------------
/frontend/src/screens/Datasets/SmartDatasetCreator/components/ErrorMessage.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Card } from '../../../../components/ui/card';
 3 | import { AlertCircle } from 'lucide-react';
 4 | import { useSmartDatasetCreatorStore } from '../store/useSmartDatasetCreatorStore';
 5 | 
 6 | export const ErrorMessage: React.FC = () => {
 7 |   const error = useSmartDatasetCreatorStore(state => state.error);
 8 | 
 9 |   if (!error) return null;
10 | 
11 |   return (
12 |     <Card className="border-red-200 bg-red-50 mb-6">
13 |       <div className="p-4">
14 |         <div className="flex items-center gap-2">
15 |           <AlertCircle className="w-5 h-5 text-red-500" />
16 |           <span className="text-red-700 font-medium">错误</span>
17 |         </div>
18 |         <p className="text-red-600 mt-1">{error}</p>
19 |       </div>
20 |     </Card>
21 |   );
22 | }; 


--------------------------------------------------------------------------------
/frontend/README.md:
--------------------------------------------------------------------------------
 1 | # Anima Project
 2 | 
 3 | Welcome! This project has been automatically generated by [Anima](https://animaapp.com/).
 4 | 
 5 | ## Getting started
 6 | 
 7 | > **Prerequisites:**
 8 | > The following steps require [NodeJS](https://nodejs.org/en/) to be installed on your system, so please
 9 | > install it beforehand if you haven't already.
10 | 
11 | To get started with your project, you'll first need to install the dependencies with:
12 | 
13 | ```
14 | npm install
15 | ```
16 | 
17 | Then, you'll be able to run a development version of the project with:
18 | 
19 | ```
20 | npm run dev
21 | ```
22 | 
23 | After a few seconds, your project should be accessible at the address
24 | [http://localhost:5173/](http://localhost:5173/)
25 | 
26 | 
27 | If you are satisfied with the result, you can finally build the project for release with:
28 | 
29 | ```
30 | npm run build
31 | ```
32 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/textarea.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | import { cn } from "../../lib/utils";
 3 | 
 4 | export interface TextareaProps
 5 |   extends React.TextareaHTMLAttributes<HTMLTextAreaElement> {}
 6 | 
 7 | const Textarea = React.forwardRef<HTMLTextAreaElement, TextareaProps>(
 8 |   ({ className, ...props }, ref) => {
 9 |     return (
10 |       <textarea
11 |         className={cn(
12 |           "flex min-h-[80px] w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
13 |           className
14 |         )}
15 |         ref={ref}
16 |         {...props}
17 |       />
18 |     );
19 |   }
20 | );
21 | Textarea.displayName = "Textarea";
22 | 
23 | export { Textarea };


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/libraryDetails.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backToList": "返回列表",
 3 |   "uploadFiles": "上传文件",
 4 |   "totalFiles": "总文件数",
 5 |   "processed": "已处理",
 6 |   "processing": "处理中",
 7 |   "pending": "待处理",
 8 |   "mdFiles": "MD文件",
 9 |   "fileList": "文件列表",
10 |   "totalFilesCount": "共 {count} 个文件",
11 |   "selectedCount": "已选择 {count} 个",
12 |   "convertToMD": "转换为MD",
13 |   "batchDelete": "批量删除",
14 |   "refresh": "刷新",
15 |   "loadingFiles": "加载文件列表中...",
16 |   "loadFailed": "加载文件列表失败",
17 |   "retry": "重试",
18 |   "noFiles": "暂无文件",
19 |   "uploadTip": "点击上方“上传文件”按钮开始添加文件",
20 |   "fileName": "文件名",
21 |   "type": "类型",
22 |   "size": "大小",
23 |   "status": "状态",
24 |   "uploadTime": "上传时间",
25 |   "actions": "操作",
26 |   "selectAll": "全选",
27 |   "selectFile": "选择文件",
28 |   "convertToMDAction": "转换为MD",
29 |   "viewDetails": "查看详情",
30 |   "deleteFileAction": "删除文件",
31 |   "downloadFileAction": "下载文件"
32 | }


--------------------------------------------------------------------------------
/backend/migrations/reset_system.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🚀 开始重置系统..."
 4 | 
 5 | # 1. 清理数据库
 6 | echo "📦 1. 清理数据库..."
 7 | psql postgresql://postgres:password@localhost:5432/llama_dataset -f cleanup_database.sql
 8 | if [ $? -eq 0 ]; then
 9 |     echo "✅ 数据库清理完成"
10 | else
11 |     echo "❌ 数据库清理失败"
12 |     exit 1
13 | fi
14 | 
15 | # 2. 设置MinIO
16 | echo "🗄️  2. 设置MinIO存储..."
17 | python setup_minio.py
18 | if [ $? -eq 0 ]; then
19 |     echo "✅ MinIO设置完成"
20 | else
21 |     echo "❌ MinIO设置失败"
22 |     exit 1
23 | fi
24 | 
25 | # 3. 重启Celery (如果正在运行)
26 | echo "🔄 3. 重启Celery worker..."
27 | pkill -f "celery.*worker" 2>/dev/null
28 | sleep 2
29 | nohup celery -A app.celery_app worker --loglevel=info > celery.log 2>&1 &
30 | echo "✅ Celery worker 已重启"
31 | 
32 | echo ""
33 | echo "🎉 系统重置完成!"
34 | echo "📋 检查项目:"
35 | echo "  - 数据库已清理"
36 | echo "  - MinIO存储桶已准备就绪"  
37 | echo "  - Celery worker已重启"
38 | echo ""
39 | echo "现在可以重新上传文件并进行转换了!" 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/dataPreview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "数据预览",
 3 |   "dataset": "数据集",
 4 |   "version": "版本",
 5 |   "switching": "切换中...",
 6 |   "noVersion": "暂无版本",
 7 |   "default": "默认",
 8 |   "current": "当前",
 9 |   "defaultVersion": "默认版本",
10 |   "draft": "草稿",
11 |   "deprecated": "已废弃",
12 |   "selectVersion": "选择版本",
13 |   "loading": "加载中...",
14 |   "totalFiles": "总文件数",
15 |   "previewFiles": "预览文件数",
16 |   "versionSwitchFailed": "版本切换失败",
17 |   "refreshPreview": "刷新预览",
18 |   "exportInfo": "导出信息",
19 |   "allTypes": "全部类型",
20 |   "selected": "已选择",
21 |   "files": "个文件",
22 |   "deleteSelected": "删除选中",
23 |   "uploading": "上传中...",
24 |   "addFiles": "添加文件",
25 |   "noPreviewData": "暂无可预览的数据",
26 |   "noDataToPreview": "暂无数据",
27 |   "totalRows": "总行数",
28 |   "columns": "列数",
29 |   "preview": "预览",
30 |   "download": "下载",
31 |   "rows": "行",
32 |   "format": "格式",
33 |   "totalItems": "总条目",
34 |   "items": "条"
35 | }


--------------------------------------------------------------------------------
/frontend/src/types/api.ts:
--------------------------------------------------------------------------------
 1 | // API响应的通用类型
 2 | export interface ApiResponse<T = any> {
 3 |   success: boolean;
 4 |   message: string;
 5 |   data?: T;
 6 |   errors?: Record<string, string[]>;
 7 |   code?: string;
 8 | }
 9 | 
10 | // 分页响应类型
11 | export interface PaginatedResponse<T = any> extends ApiResponse<T[]> {
12 |   pagination: {
13 |     page: number;
14 |     per_page: number;
15 |     total: number;
16 |     total_pages: number;
17 |     has_next: boolean;
18 |     has_prev: boolean;
19 |   };
20 | }
21 | 
22 | // 通用查询参数
23 | export interface QueryParams {
24 |   page?: number;
25 |   per_page?: number;
26 |   sort_by?: string;
27 |   sort_order?: 'asc' | 'desc';
28 | }
29 | 
30 | // API错误类型
31 | export class ApiError extends Error {
32 |   constructor(
33 |     message: string,
34 |     public status: number,
35 |     public errors?: Record<string, string[]>
36 |   ) {
37 |     super(message);
38 |     this.name = 'ApiError';
39 |   }
40 | } 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/index.ts:
--------------------------------------------------------------------------------
 1 | import common from './common.json';
 2 | import navigation from './navigation.json';
 3 | import overview from './overview.json';
 4 | import settings from './settings.json';
 5 | import rawData from './rawData.json';
 6 | import datasets from './datasets.json';
 7 | import tasks from './tasks.json';
 8 | import smartDatasetCreator from './smartDatasetCreator.json';
 9 | import dataPreview from './dataPreview.json';
10 | import libraryDetails from './libraryDetails.json';
11 | import time from './time.json';
12 | import actions from './actions.json';
13 | import auth from './auth.json';
14 | import dataGovernance from './dataGovernance.json';
15 | 
16 | export default {
17 |   common,
18 |   navigation,
19 |   overview,
20 |   settings,
21 |   rawData,
22 |   datasets,
23 |   tasks,
24 |   smartDatasetCreator,
25 |   dataPreview,
26 |   libraryDetails,
27 |   time,
28 |   actions,
29 |   auth,
30 |   dataGovernance
31 | };


--------------------------------------------------------------------------------
/frontend/public/vector---0.svg:
--------------------------------------------------------------------------------
1 | <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M14.3904 7.14808C14.5348 7.29236 14.616 7.48815 14.616 7.69231C14.616 7.89647 14.5348 8.09226 14.3904 8.23654L9.00577 13.6212C8.86149 13.7656 8.6657 13.8468 8.46154 13.8468C8.25738 13.8468 8.06159 13.7656 7.91731 13.6212L5.60962 11.3135C5.30905 11.0129 5.30905 10.5256 5.60962 10.225C5.91019 9.92443 6.39751 9.92443 6.69808 10.225L8.46154 11.9894L13.3019 7.14808C13.4462 7.00363 13.642 6.92247 13.8462 6.92247C14.0503 6.92247 14.2461 7.00363 14.3904 7.14808ZM20 10C20 15.5228 15.5228 20 10 20C4.47715 20 0 15.5228 0 10C0 4.47715 4.47715 0 10 0C15.5204 0.00582964 19.9942 4.47957 20 10ZM18.4615 10C18.4615 5.32682 14.6732 1.53846 10 1.53846C5.32682 1.53846 1.53846 5.32682 1.53846 10C1.53846 14.6732 5.32682 18.4615 10 18.4615C14.671 18.4562 18.4562 14.671 18.4615 10Z" fill="#0D141C"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/input.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | import { cn } from "../../lib/utils";
 3 | 
 4 | export interface InputProps
 5 |   extends React.InputHTMLAttributes<HTMLInputElement> {}
 6 | 
 7 | const Input = React.forwardRef<HTMLInputElement, InputProps>(
 8 |   ({ className, type, ...props }, ref) => {
 9 |     return (
10 |       <input
11 |         type={type}
12 |         className={cn(
13 |           "flex h-9 w-full rounded-md border border-input bg-background px-3 py-2 text-sm ring-offset-background file:border-0 file:bg-transparent file:text-sm file:font-medium placeholder:text-muted-foreground focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:cursor-not-allowed disabled:opacity-50",
14 |           className
15 |         )}
16 |         ref={ref}
17 |         {...props}
18 |       />
19 |     );
20 |   }
21 | );
22 | Input.displayName = "Input";
23 | 
24 | export { Input };


--------------------------------------------------------------------------------
/frontend/src/types/data-governance.ts:
--------------------------------------------------------------------------------
 1 | export interface DataGovernanceProjectMetrics {
 2 |   totalDataSize: number;
 3 |   processedFiles: number;
 4 |   totalFiles: number;
 5 |   dataQualityScore: number;
 6 |   lastProcessedAt: string | null;
 7 |   processingProgress: number;
 8 | }
 9 | 
10 | export interface ProjectTeamMember {
11 |   id: string;
12 |   username: string;
13 |   fullName: string;
14 |   email: string;
15 |   role: 'owner' | 'admin' | 'editor' | 'viewer';
16 |   joinedAt: string;
17 | }
18 | 
19 | export interface DataGovernanceProject {
20 |   id: string;
21 |   name: string;
22 |   description: string;
23 |   status: 'active' | 'draft' | 'completed' | 'archived';
24 |   createdAt: string;
25 |   updatedAt: string;
26 |   owner: ProjectTeamMember;
27 |   team: ProjectTeamMember[];
28 |   metrics: DataGovernanceProjectMetrics;
29 |   pipeline: any[]; // TODO: Define pipeline type
30 |   dataSource: any[]; // TODO: Define data source type
31 |   config?: Record<string, any>;
32 | }


--------------------------------------------------------------------------------
/frontend/src/screens/Plugins/Plugins.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { useTranslation } from 'react-i18next';
 3 | import { Card } from '../../components/ui/card';
 4 | import { Rocket } from 'lucide-react';
 5 | 
 6 | export const Plugins = (): JSX.Element => {
 7 |   const { t } = useTranslation();
 8 | 
 9 |   return (
10 |     <div className="w-full max-w-[1200px] p-6">
11 |       <div className="mb-6">
12 |         <h2 className="text-[22px] font-bold leading-7 text-[#0c141c]">
13 |           {t('navigation.plugins')}
14 |         </h2>
15 |       </div>
16 |       <Card className="border-[#d1dbe8] p-8">
17 |         <div className="flex flex-col items-center justify-center text-center">
18 |           <Rocket className="w-12 h-12 text-[#1977e5] mb-4" />
19 |           <h3 className="text-xl font-semibold text-[#0c141c] mb-2">{t('overview.comingSoon')}</h3>
20 |           <p className="text-[#4f7096]">{t('overview.pluginsDevelopment')}</p>
21 |         </div>
22 |       </Card>
23 |     </div>
24 |   );
25 | };


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # Python
 2 | __pycache__/
 3 | *.py[cod]
 4 | *$py.class
 5 | 
 6 | # Virtual environment
 7 | venv/
 8 | */venv/
 9 | .venv/
10 | ENV/
11 | */ENV/
12 | env/
13 | */env/
14 | 
15 | # IDE and editor specific
16 | .vscode/
17 | .idea/
18 | *.sublime-project
19 | *.sublime-workspace
20 | *.DS_Store
21 | 
22 | # Secrets and environment variables
23 | .env
24 | .env.*
25 | !.env.example # 允许 .env.example 文件被追踪
26 | 
27 | #敏感配置
28 | backend/config/production.py # 通常包含敏感信息，应通过环境变量配置生产
29 | 
30 | # Python build artifacts
31 | build/
32 | dist/
33 | *.egg-info/
34 | *.egg
35 | 
36 | # Celery beat schedule file
37 | celerybeat-schedule.*
38 | 
39 | # Test output
40 | htmlcov/
41 | .coverage
42 | .pytest_cache/
43 | 
44 | # Frontend node_modules
45 | frontend/node_modules/
46 | frontend/build/
47 | frontend/.pnp.*
48 | 
49 | # Docker specific (if any local Docker build contexts are ignored)
50 | # docker-data/
51 | 
52 | # Log files
53 | *.log
54 | logs/
55 | 
56 | # OS generated files
57 | .DS_Store
58 | Thumbs.db 
59 | 
60 | 
61 | # docker
62 | docker/data/


--------------------------------------------------------------------------------
/backend/test/test_api.py:
--------------------------------------------------------------------------------
 1 | """API测试脚本"""
 2 | import requests
 3 | import json
 4 | 
 5 | BASE_URL = "http://localhost:5000/api/v1"
 6 | 
 7 | def test_health():
 8 |     """测试健康检查"""
 9 |     response = requests.get(f"{BASE_URL}/health")
10 |     print("健康检查:", response.json())
11 |     
12 | def test_create_dataset():
13 |     """测试创建数据集"""
14 |     data = {
15 |         "name": "测试数据集",
16 |         "description": "这是一个测试数据集"
17 |     }
18 |     response = requests.post(f"{BASE_URL}/datasets", json=data)
19 |     print("创建数据集:", response.json())
20 |     return response.json()
21 | 
22 | def test_get_datasets():
23 |     """测试获取数据集列表"""
24 |     response = requests.get(f"{BASE_URL}/datasets")
25 |     print("数据集列表:", response.json())
26 | 
27 | def test_get_stats():
28 |     """测试获取统计信息"""
29 |     response = requests.get(f"{BASE_URL}/overview/stats")
30 |     print("统计信息:", response.json())
31 | 
32 | if __name__ == "__main__":
33 |     print("开始测试API...")
34 |     test_health()
35 |     test_get_stats()
36 |     test_create_dataset()
37 |     test_get_datasets() 


--------------------------------------------------------------------------------
/backend/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11-slim
 2 | 
 3 | WORKDIR /app
 4 | 
 5 | # 安装系统依赖，包括 tesseract、OpenCV 基础依赖和其他必要工具
 6 | RUN apt-get update && apt-get install -y \
 7 |     gcc \
 8 |     g++ \
 9 |     tesseract-ocr \
10 |     tesseract-ocr-chi-sim \
11 |     tesseract-ocr-chi-tra \
12 |     poppler-utils \
13 |     libpq-dev \
14 |     build-essential \
15 |     libglib2.0-0 \
16 |     libsm6 \
17 |     libxext6 \
18 |     libxrender-dev \
19 |     libgomp1 \
20 |     libgl1-mesa-glx \
21 |     && rm -rf /var/lib/apt/lists/*
22 | 
23 | # 复制依赖文件
24 | COPY requirements.txt .
25 | 
26 | # 升级 pip 并安装Python依赖
27 | RUN pip install --upgrade pip && \
28 |     pip install --no-cache-dir -r requirements.txt
29 | 
30 | # 复制应用代码
31 | COPY . .
32 | 
33 | # 设置环境变量
34 | ENV FLASK_APP=run.py
35 | ENV FLASK_ENV=development
36 | ENV OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
37 | ENV PYTHONPATH=/app
38 | 
39 | # 给脚本执行权限（在复制文件后设置）
40 | RUN chmod +x start_celery_threads.sh start_celery.sh
41 | 
42 | # 暴露端口
43 | EXPOSE 8897
44 | 
45 | # 默认启动命令（将被 docker compose 覆盖）
46 | CMD ["python", "run.py"] 


--------------------------------------------------------------------------------
/backend/alembic/versions/9d5aba691653_add_dataflow_task_types_to_enum.py:
--------------------------------------------------------------------------------
 1 | """add_dataflow_task_types_to_enum
 2 | 
 3 | Revision ID: 9d5aba691653
 4 | Revises: 851830d12fef
 5 | Create Date: 2025-07-06 23:23:40.171051
 6 | 
 7 | """
 8 | from typing import Sequence, Union
 9 | 
10 | from alembic import op
11 | import sqlalchemy as sa
12 | 
13 | 
14 | # revision identifiers, used by Alembic.
15 | revision: str = '9d5aba691653'
16 | down_revision: Union[str, None] = '851830d12fef'
17 | branch_labels: Union[str, Sequence[str], None] = None
18 | depends_on: Union[str, Sequence[str], None] = None
19 | 
20 | 
21 | def upgrade() -> None:
22 |     # 添加新的 DataFlow 任务类型到 tasktype 枚举
23 |     op.execute("ALTER TYPE tasktype ADD VALUE 'PRETRAIN_FILTER'")
24 |     op.execute("ALTER TYPE tasktype ADD VALUE 'PRETRAIN_SYNTHETIC'")
25 |     op.execute("ALTER TYPE tasktype ADD VALUE 'SFT_FILTER'")
26 |     op.execute("ALTER TYPE tasktype ADD VALUE 'SFT_SYNTHETIC'")
27 | 
28 | 
29 | def downgrade() -> None:
30 |     # 注意：PostgreSQL 不支持直接从枚举中删除值
31 |     # 如果需要回滚，需要重新创建枚举类型
32 |     pass
33 | 


--------------------------------------------------------------------------------
/docker/redis/redis.conf:
--------------------------------------------------------------------------------
 1 | # Redis 配置文件
 2 | 
 3 | # 基本配置
 4 | port 6379
 5 | bind 0.0.0.0
 6 | protected-mode no
 7 | 
 8 | # 持久化配置
 9 | dir /data
10 | dbfilename dump.rdb
11 | 
12 | # 内存配置
13 | maxmemory 512mb
14 | maxmemory-policy allkeys-lru
15 | 
16 | # 日志配置
17 | loglevel notice
18 | 
19 | # 忽略加载RDB时的错误
20 | # 这将允许Redis启动即使RDB文件包含无法识别的模块
21 | ignore-warnings ARM64-COW-BUG
22 | 
23 | # 如果RDB文件损坏或包含未知模块，继续启动
24 | # 注意：这可能会导致数据丢失
25 | stop-writes-on-bgsave-error no
26 | rdbchecksum yes
27 | rdbcompression yes
28 | 
29 | # 禁用模块加载错误导致的启动失败
30 | # Redis 7.0+ 支持的选项
31 | sanitize-dump-payload no
32 | 
33 | # AOF配置（可选，提供更好的持久性）
34 | appendonly no
35 | appendfilename "appendonly.aof"
36 | appendfsync everysec
37 | 
38 | # 性能优化
39 | tcp-backlog 511
40 | timeout 0
41 | tcp-keepalive 300
42 | 
43 | # 慢查询日志
44 | slowlog-log-slower-than 10000
45 | slowlog-max-len 128
46 | 
47 | # 客户端输出缓冲限制
48 | client-output-buffer-limit normal 0 0 0
49 | client-output-buffer-limit replica 256mb 64mb 60
50 | client-output-buffer-limit pubsub 32mb 8mb 60
51 | 
52 | # 其他优化
53 | hz 10
54 | dynamic-hz yes 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/annotation.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "imageAnnotation": "Image Annotation",
 3 |   "videoAnnotation": "Video Annotation",
 4 |   "qa": "Q&A",
 5 |   "caption": "Caption",
 6 |   "transcript": "Transcript",
 7 |   "question": "Question",
 8 |   "answer": "Answer",
 9 |   "confidence": "Confidence",
10 |   "newQuestion": "New Question",
11 |   "newAnswer": "New Answer",
12 |   "addQA": "Add Q&A",
13 |   "questionPlaceholder": "Enter your question...",
14 |   "answerPlaceholder": "Enter your answer...",
15 |   "captionPlaceholder": "Enter image caption...",
16 |   "transcriptPlaceholder": "Enter video transcript...",
17 |   "aiAssist": "AI Assist",
18 |   "aiAssistTitle": "AI Assisted Annotation",
19 |   "aiPrompt": "Prompt",
20 |   "aiPromptPlaceholder": "Enter a prompt to help AI generate more accurate annotations...",
21 |   "generate": "Generate",
22 |   "aiAssistSuccess": "AI Assisted Annotation Success",
23 |   "aiAnnotationsGenerated": "AI has generated annotations with confidence: {confidence}",
24 |   "aiAssistFailed": "AI Assisted Annotation Failed",
25 |   "aiAnnotationsFailed": "Failed to generate AI annotations, please try again"
26 | } 


--------------------------------------------------------------------------------
/frontend/public/vector---0-9.svg:
--------------------------------------------------------------------------------
1 | <svg width="18" height="20" viewBox="0 0 18 20" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M9 0C3.95344 0 0 2.30625 0 5.25V14.25C0 17.1938 3.95344 19.5 9 19.5C14.0466 19.5 18 17.1938 18 14.25V5.25C18 2.30625 14.0466 0 9 0ZM16.5 9.75C16.5 10.6519 15.7612 11.5716 14.4741 12.2738C13.0247 13.0641 11.0803 13.5 9 13.5C6.91969 13.5 4.97531 13.0641 3.52594 12.2738C2.23875 11.5716 1.5 10.6519 1.5 9.75V8.19C3.09937 9.59625 5.83406 10.5 9 10.5C12.1659 10.5 14.9006 9.5925 16.5 8.19V9.75ZM3.52594 2.72625C4.97531 1.93594 6.91969 1.5 9 1.5C11.0803 1.5 13.0247 1.93594 14.4741 2.72625C15.7612 3.42844 16.5 4.34812 16.5 5.25C16.5 6.15187 15.7612 7.07156 14.4741 7.77375C13.0247 8.56406 11.0803 9 9 9C6.91969 9 4.97531 8.56406 3.52594 7.77375C2.23875 7.07156 1.5 6.15187 1.5 5.25C1.5 4.34812 2.23875 3.42844 3.52594 2.72625ZM14.4741 16.7738C13.0247 17.5641 11.0803 18 9 18C6.91969 18 4.97531 17.5641 3.52594 16.7738C2.23875 16.0716 1.5 15.1519 1.5 14.25V12.69C3.09937 14.0962 5.83406 15 9 15C12.1659 15 14.9006 14.0925 16.5 12.69V14.25C16.5 15.1519 15.7612 16.0716 14.4741 16.7738Z" fill="#0D141C"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/plugins/custom_distillers/__init__.py:
--------------------------------------------------------------------------------
 1 | # plugins/custom_distillers/__init__.py
 2 | # 用于自定义蒸馏器插件
 3 | 
 4 | # 示例：自动注册逻辑 (类似 custom_parsers)
 5 | # from backend.app.plugins.registry import plugin_registry
 6 | 
 7 | # def register_custom_distillers():
 8 | #     import os
 9 | #     import importlib
10 | #     current_dir = os.path.dirname(os.path.abspath(__file__))
11 | #     for filename in os.listdir(current_dir):
12 | #         if filename.endswith('.py') and filename != '__init__.py':
13 | #             module_name = filename[:-3]
14 | #             try:
15 | #                 module = importlib.import_module(f'.{module_name}', package=__name__)
16 | #                 if hasattr(module, 'distiller_class') and hasattr(module, 'distiller_name'):
17 | #                     plugin_registry.register_distiller(module.distiller_name, module.distiller_class)
18 | #                     print(f"Registered custom distiller: {module.distiller_name}")
19 | #             except Exception as e:
20 | #                 print(f"Error loading custom distiller from {filename}: {e}")
21 | 
22 | # register_custom_distillers()
23 | 
24 | print("Custom distillers package initialized (mock).") 


--------------------------------------------------------------------------------
/backend/requirements_optimized.txt:
--------------------------------------------------------------------------------
 1 | # Flask核心
 2 | Flask==3.0.0
 3 | Flask-RESTful==0.3.10
 4 | Flask-CORS==4.0.0
 5 | Flask-JWT-Extended==4.5.3
 6 | 
 7 | # 数据库相关
 8 | SQLAlchemy==2.0.31
 9 | psycopg2-binary==2.9.10
10 | alembic==1.13.0
11 | redis==5.0.1
12 | flask_sqlalchemy
13 | 
14 | # API文档
15 | flasgger==0.9.7.1
16 | 
17 | # 数据处理
18 | pandas>=1.5.0
19 | numpy>=1.24.0,<2.0.0
20 | 
21 | # 对象存储
22 | minio==7.2.0
23 | 
24 | # 任务队列
25 | celery==5.3.4
26 | kombu==5.3.4
27 | 
28 | # 工具库
29 | python-dotenv==1.0.0
30 | marshmallow==3.19.0
31 | marshmallow-sqlalchemy==0.29.0
32 | python-multipart==0.0.6
33 | 
34 | # 文档解析
35 | python-docx==1.1.0
36 | python-pptx==0.6.23
37 | PyPDF2==3.0.1
38 | 
39 | # 开发工具
40 | pytest==7.4.3
41 | pytest-cov==4.1.0
42 | black==23.12.0
43 | flake8==6.1.0
44 | 
45 | # Markdown
46 | markitdown
47 | 
48 | # LLM集成
49 | langchain
50 | langchain-openai
51 | langchain-google-genai
52 | langchain-anthropic
53 | langchain-community
54 | pillow>=9.0.0
55 | pdf2image
56 | pytesseract
57 | google-generativeai
58 | anthropic>=0.8.0
59 | openai>=1.0.0
60 | 
61 | # 数据集导入
62 | huggingface_hub>=0.19.0
63 | modelscope>=1.9.0
64 | 
65 | # 图像处理
66 | opencv-python-headless>=4.8.0
67 | scikit-image>=0.21.0
68 | 


--------------------------------------------------------------------------------
/frontend/src/components/ui/slider.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import * as SliderPrimitive from "@radix-ui/react-slider"
 3 | import { cn } from "../../lib/utils"
 4 | 
 5 | const Slider = React.forwardRef<
 6 |   React.ElementRef<typeof SliderPrimitive.Root>,
 7 |   React.ComponentPropsWithoutRef<typeof SliderPrimitive.Root>
 8 | >(({ className, ...props }, ref) => (
 9 |   <SliderPrimitive.Root
10 |     ref={ref}
11 |     className={cn(
12 |       "relative flex w-full touch-none select-none items-center",
13 |       className
14 |     )}
15 |     {...props}
16 |   >
17 |     <SliderPrimitive.Track className="relative h-2 w-full grow overflow-hidden rounded-full bg-secondary">
18 |       <SliderPrimitive.Range className="absolute h-full bg-primary" />
19 |     </SliderPrimitive.Track>
20 |     <SliderPrimitive.Thumb className="block h-5 w-5 rounded-full border-2 border-primary bg-background ring-offset-background transition-colors focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50" />
21 |   </SliderPrimitive.Root>
22 | ))
23 | Slider.displayName = SliderPrimitive.Root.displayName
24 | 
25 | export { Slider }


--------------------------------------------------------------------------------
/frontend/src/components/ui/language-switcher.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { useTranslation } from 'react-i18next';
 3 | import { Button } from './button';
 4 | 
 5 | interface LanguageSwitcherProps {
 6 |   isCollapsed?: boolean;
 7 | }
 8 | 
 9 | export const LanguageSwitcher = ({ isCollapsed = false }: LanguageSwitcherProps): JSX.Element => {
10 |   const { i18n } = useTranslation();
11 | 
12 |   const toggleLanguage = () => {
13 |     const languages = ['en', 'zh', 'ja'];
14 |     const currentIndex = languages.indexOf(i18n.language);
15 |     const nextIndex = (currentIndex + 1) % languages.length;
16 |     i18n.changeLanguage(languages[nextIndex]);
17 |   };
18 | 
19 |   return (
20 |     <Button
21 |       variant="ghost"
22 |       onClick={toggleLanguage}
23 |       className="text-sm font-medium"
24 |     >
25 |       {isCollapsed ? (
26 |         <span className="w-6 h-6 flex items-center justify-center">
27 |           {i18n.language === 'en' ? '中' : i18n.language === 'zh' ? '日' : 'En'}
28 |         </span>
29 |       ) : (
30 |         <span>
31 |           {i18n.language === 'en' ? '中文' : i18n.language === 'zh' ? '日本語' : 'English'}
32 |         </span>
33 |       )}
34 |     </Button>
35 |   );
36 | };


--------------------------------------------------------------------------------
/frontend/src/components/Layout.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useState } from "react";
 2 | import { Outlet } from "react-router-dom";
 3 | import { Sidenav } from "./ui/sidenav";
 4 | import { MainContentLayout } from "./ui/main-content-layout";
 5 | 
 6 | export const Layout = (): JSX.Element => {
 7 |   const [isSidenavCollapsed, setIsSidenavCollapsed] = useState(false);
 8 | 
 9 |   const handleSidenavCollapsedChange = (isCollapsed: boolean) => {
10 |     setIsSidenavCollapsed(isCollapsed);
11 |   };
12 | 
13 |   return (
14 |     <main className="flex w-full h-screen bg-gradient-to-br from-gray-50 to-gray-100 overflow-hidden">
15 |       {/* 侧边导航栏 */}
16 |       <div className="fixed left-0 top-0 h-screen z-20">
17 |         <Sidenav 
18 |           onCollapsedChange={handleSidenavCollapsedChange}
19 |         />
20 |       </div>
21 |       
22 |       {/* 主内容区域 */}
23 |       <div 
24 |         className={`flex-1 min-h-screen overflow-y-auto transition-all duration-300 ease-in-out ${
25 |           isSidenavCollapsed ? 'ml-16' : 'ml-[300px]'
26 |         }`}
27 |       >
28 |         <div className="p-6 w-full">
29 |           <MainContentLayout>
30 |             <Outlet />
31 |           </MainContentLayout>
32 |         </div>
33 |       </div>
34 |     </main>
35 |   );
36 | }; 


--------------------------------------------------------------------------------
/frontend/src/components/ui/badge.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { cva, type VariantProps } from "class-variance-authority"
 3 | import { cn } from "../../lib/utils"
 4 | 
 5 | const badgeVariants = cva(
 6 |   "inline-flex items-center rounded-full border px-2.5 py-0.5 text-xs font-semibold transition-colors focus:outline-none focus:ring-2 focus:ring-ring focus:ring-offset-2",
 7 |   {
 8 |     variants: {
 9 |       variant: {
10 |         default:
11 |           "border-transparent bg-primary text-primary-foreground hover:bg-primary/80",
12 |         secondary:
13 |           "border-transparent bg-secondary text-secondary-foreground hover:bg-secondary/80",
14 |         destructive:
15 |           "border-transparent bg-destructive text-destructive-foreground hover:bg-destructive/80",
16 |         outline: "text-foreground",
17 |       },
18 |     },
19 |     defaultVariants: {
20 |       variant: "default",
21 |     },
22 |   }
23 | )
24 | 
25 | export interface BadgeProps
26 |   extends React.HTMLAttributes<HTMLDivElement>,
27 |     VariantProps<typeof badgeVariants> {}
28 | 
29 | function Badge({ className, variant, ...props }: BadgeProps) {
30 |   return (
31 |     <div className={cn(badgeVariants({ variant }), className)} {...props} />
32 |   )
33 | }
34 | 
35 | export { Badge, badgeVariants } 


--------------------------------------------------------------------------------
/plugins/custom_parsers/__init__.py:
--------------------------------------------------------------------------------
 1 | # plugins/custom_parsers/__init__.py
 2 | # 这个文件使得 custom_parsers 成为一个 Python 包。
 3 | # 可以在这里定义如何发现和注册这个目录下的自定义解析器。
 4 | 
 5 | # 示例：如果有一个全局的插件注册表
 6 | # from backend.app.plugins.registry import plugin_registry # 假设可以这样导入
 7 | 
 8 | # def register_custom_parsers():
 9 | #     # 动态导入并注册此目录下的解析器
10 | #     import os
11 | #     import importlib
12 | #     current_dir = os.path.dirname(os.path.abspath(__file__))
13 | #     for filename in os.listdir(current_dir):
14 | #         if filename.endswith('.py') and filename != '__init__.py':
15 | #             module_name = filename[:-3]
16 | #             try:
17 | #                 module = importlib.import_module(f'.{module_name}', package=__name__)
18 | #                 # 假设每个解析器模块都有一个名为 'parser_class' 的类变量
19 | #                 # และ一个名为 'parser_name' 的字符串变量
20 | #                 if hasattr(module, 'parser_class') and hasattr(module, 'parser_name'):
21 | #                     plugin_registry.register_parser(module.parser_name, module.parser_class)
22 | #                     print(f"Registered custom parser: {module.parser_name}")
23 | #             except Exception as e:
24 | #                 print(f"Error loading custom parser from {filename}: {e}")
25 | 
26 | # 如果在应用启动时调用此函数：
27 | # register_custom_parsers()
28 | 
29 | print("Custom parsers package initialized (mock).") 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/overview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "welcome": "欢迎使用拼好数",
 3 |   "welcomeDescription": "开始探索您的数据集、管理任务和监控存储使用情况。",
 4 |   "systemOverview": "系统概览",
 5 |   "recentActivity": "最近活动",
 6 |   "quickActions": "快捷操作",
 7 |   "systemNotifications": "系统通知",
 8 |   "comingSoon": "即将推出",
 9 |   "pluginsDevelopment": "插件支持正在开发中",
10 |   "features": {
11 |     "dataImport": {
12 |       "title": "数据导入与处理",
13 |       "description": "支持多种格式的原始数据导入，包括 CSV、JSON、Excel 等。智能数据预处理，自动识别数据类型，生成高质量的训练数据集。"
14 |     },
15 |     "datasetManagement": {
16 |       "title": "多平台数据集管理",
17 |       "description": "无缝集成 Hugging Face Hub 和魔搭社区等主流平台。一键导入热门开源数据集，支持版本管理和更新同步。"
18 |     },
19 |     "taskScheduling": {
20 |       "title": "智能任务调度",
21 |       "description": "自动化数据处理流水线，支持批量操作和定时任务。实时监控任务状态，提供详细的执行日志和性能指标。"
22 |     },
23 |     "storageSharing": {
24 |       "title": "存储与分享",
25 |       "description": "云端存储，支持大规模数据集的安全存储和备份。团队协作功能，支持数据集共享和权限管理。"
26 |     }
27 |   },
28 |   "quickStart": {
29 |     "title": "快速开始指南",
30 |     "step1": {
31 |       "title": "导入数据",
32 |       "description": "本地文件或HF、魔搭平台"
33 |     },
34 |     "step2": {
35 |       "title": "处理数据",
36 |       "description": "清洗、转换和质量检查"
37 |     },
38 |     "step3": {
39 |       "title": "生成数据集",
40 |       "description": "训练数据集并导出"
41 |     }
42 |   },
43 |   "error": {
44 |     "loadingOverview": "加载概览数据时出错: {{error}}"
45 |   }
46 | }


--------------------------------------------------------------------------------
/frontend/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "version": "1.0.0",
 3 |   "source": "./index.html",
 4 |   "type": "module",
 5 |   "name": "pindata",
 6 |   "description": "A React project for managing and analyzing datasets with LLaMA",
 7 |   "scripts": {
 8 |     "dev": "vite",
 9 |     "build": "vite build"
10 |   },
11 |   "dependencies": {
12 |     "@heroicons/react": "^2.2.0",
13 |     "@radix-ui/react-dialog": "^1.0.5",
14 |     "@radix-ui/react-dropdown-menu": "^2.0.6",
15 |     "@radix-ui/react-slider": "^1.3.5",
16 |     "@radix-ui/react-slot": "^1.1.0",
17 |     "@radix-ui/react-tabs": "^1.0.4",
18 |     "@tanstack/react-table": "^8.13.2",
19 |     "class-variance-authority": "^0.7.0",
20 |     "clsx": "2.1.1",
21 |     "i18next": "^23.10.1",
22 |     "i18next-browser-languagedetector": "^7.2.0",
23 |     "lucide-react": "^0.453.0",
24 |     "react": "^18.2.0",
25 |     "react-dom": "^18.2.0",
26 |     "react-hot-toast": "^2.5.2",
27 |     "react-i18next": "^14.1.0",
28 |     "react-router-dom": "^6.8.1",
29 |     "tailwind-merge": "2.5.4",
30 |     "zustand": "^5.0.5"
31 |   },
32 |   "devDependencies": {
33 |     "@types/react": "18.2.0",
34 |     "@types/react-dom": "18.2.0",
35 |     "@vitejs/plugin-react": "4.3.4",
36 |     "esbuild": "0.24.0",
37 |     "globals": "15.12.0",
38 |     "tailwindcss": "3.4.16",
39 |     "vite": "6.0.4"
40 |   },
41 |   "alias": {
42 |     "@/*": "./src/components/ui/$1"
43 |   }
44 | }


--------------------------------------------------------------------------------
/frontend/src/components/auth/AuthProvider.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useEffect, useState } from 'react';
 2 | import { useAuthStore, setupTokenRefresh, clearTokenRefresh } from '../../store/authStore';
 3 | 
 4 | interface AuthProviderProps {
 5 |   children: React.ReactNode;
 6 | }
 7 | 
 8 | export const AuthProvider: React.FC<AuthProviderProps> = ({ children }) => {
 9 |   const [isInitialized, setIsInitialized] = useState(false);
10 |   const { initialize } = useAuthStore();
11 | 
12 |   useEffect(() => {
13 |     const initAuth = async () => {
14 |       try {
15 |         await initialize();
16 |         setupTokenRefresh();
17 |       } catch (error) {
18 |         console.error('Failed to initialize auth:', error);
19 |       } finally {
20 |         setIsInitialized(true);
21 |       }
22 |     };
23 | 
24 |     initAuth();
25 | 
26 |     // 清理定时器
27 |     return () => {
28 |       clearTokenRefresh();
29 |     };
30 |   }, [initialize]);
31 | 
32 |   // 认证初始化中显示加载状态
33 |   if (!isInitialized) {
34 |     return (
35 |       <div className="min-h-screen flex items-center justify-center bg-gray-50">
36 |         <div className="flex flex-col items-center space-y-4">
37 |           <div className="animate-spin rounded-full h-12 w-12 border-b-2 border-blue-600"></div>
38 |           <p className="text-gray-600">初始化中...</p>
39 |         </div>
40 |       </div>
41 |     );
42 |   }
43 | 
44 |   return <>{children}</>;
45 | };


--------------------------------------------------------------------------------
/frontend/src/lib/config.ts:
--------------------------------------------------------------------------------
 1 | // 配置常量
 2 | export const config = {
 3 |   // API基础地址
 4 |   apiBaseUrl: 'http://localhost:8897/api/v1/',
 5 |   downloadBaseUrl: 'http://localhost:8897/',
 6 |   
 7 |   // 应用信息
 8 |   appName: 'LLaMA Dataset Manager',
 9 |   appVersion: '1.0.0',
10 |   
11 |   // 分页配置
12 |   defaultPageSize: 20,
13 |   maxPageSize: 100,
14 |   
15 |   // 文件上传配置
16 |   maxFileSize: 100 * 1024 * 1024, // 100MB
17 |   allowedFileTypes: [
18 |     'application/pdf',
19 |     'application/vnd.openxmlformats-officedocument.wordprocessingml.document',
20 |     'application/vnd.openxmlformats-officedocument.presentationml.presentation',
21 |     'text/plain',
22 |     'text/markdown'
23 |   ],
24 |   
25 |   // UI配置
26 |   tableRefreshInterval: 30000, // 30秒自动刷新
27 |   notificationDuration: 3000, // 通知显示时长
28 | };
29 | 
30 | // 数据类型映射
31 | export const dataTypeLabels = {
32 |   training: '训练数据',
33 |   evaluation: '评估数据',
34 |   mixed: '混合数据',
35 | };
36 | 
37 | // 处理状态映射
38 | export const processStatusLabels = {
39 |   pending: '等待处理',
40 |   processing: '处理中',
41 |   completed: '已完成',
42 |   failed: '处理失败',
43 | };
44 | 
45 | // 处理状态颜色映射
46 | export const processStatusColors = {
47 |   pending: 'text-yellow-600 bg-yellow-50 border-yellow-200',
48 |   processing: 'text-blue-600 bg-blue-50 border-blue-200',
49 |   completed: 'text-green-600 bg-green-50 border-green-200',
50 |   failed: 'text-red-600 bg-red-50 border-red-200',
51 | }; 


--------------------------------------------------------------------------------
/frontend/src/components/ui/switch.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | import { cn } from "../../lib/utils";
 3 | 
 4 | export interface SwitchProps {
 5 |   checked?: boolean;
 6 |   onCheckedChange?: (checked: boolean) => void;
 7 |   disabled?: boolean;
 8 |   className?: string;
 9 | }
10 | 
11 | const Switch = React.forwardRef<
12 |   HTMLButtonElement,
13 |   SwitchProps
14 | >(({ className, checked = false, onCheckedChange, disabled = false, ...props }, ref) => {
15 |   return (
16 |     <button
17 |       type="button"
18 |       role="switch"
19 |       aria-checked={checked}
20 |       ref={ref}
21 |       className={cn(
22 |         "peer inline-flex h-6 w-11 shrink-0 cursor-pointer items-center rounded-full border-2 border-transparent transition-colors",
23 |         "focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-[#1977e5] focus-visible:ring-offset-2",
24 |         "disabled:cursor-not-allowed disabled:opacity-50",
25 |         checked ? "bg-[#1977e5]" : "bg-[#d1dbe8]",
26 |         className
27 |       )}
28 |       disabled={disabled}
29 |       onClick={() => onCheckedChange?.(!checked)}
30 |       {...props}
31 |     >
32 |       <div
33 |         className={cn(
34 |           "pointer-events-none block h-5 w-5 rounded-full bg-white shadow-lg ring-0 transition-transform",
35 |           checked ? "translate-x-5" : "translate-x-0"
36 |         )}
37 |       />
38 |     </button>
39 |   );
40 | });
41 | Switch.displayName = "Switch";
42 | 
43 | export { Switch }; 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/dataPreview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "データプレビュー",
 3 |   "dataset": "データセット",
 4 |   "version": "バージョン",
 5 |   "switching": "切り替え中...",
 6 |   "noVersion": "バージョンなし",
 7 |   "default": "デフォルト",
 8 |   "current": "現在",
 9 |   "defaultVersion": "デフォルトバージョン",
10 |   "draft": "ドラフト",
11 |   "deprecated": "廃止済み",
12 |   "selectVersion": "バージョンを選択",
13 |   "loading": "読み込み中...",
14 |   "totalFiles": "総ファイル数",
15 |   "previewFiles": "プレビューファイル数",
16 |   "versionSwitchFailed": "バージョン切り替えに失敗",
17 |   "refreshPreview": "プレビューを更新",
18 |   "exportInfo": "情報をエクスポート",
19 |   "allTypes": "すべてのタイプ",
20 |   "selected": "選択済み",
21 |   "files": "個のファイル",
22 |   "deleteSelected": "選択項目を削除",
23 |   "uploading": "アップロード中...",
24 |   "addFiles": "ファイルを追加",
25 |   "noPreviewData": "プレビュー可能なデータがありません",
26 |   "noDataToPreview": "データなし",
27 |   "totalRows": "総行数",
28 |   "columns": "列数",
29 |   "preview": "プレビュー",
30 |   "download": "ダウンロード",
31 |   "rows": "行",
32 |   "format": "フォーマット",
33 |   "totalItems": "総項目数",
34 |   "items": "項目",
35 |   "entry": "エントリー",
36 |   "size": "サイズ",
37 |   "mode": "モード",
38 |   "imagePreviewDeveloping": "画像プレビュー機能は開発中です",
39 |   "previewDeveloping": "プレビュー機能は開発中です",
40 |   "unsupportedPreview": "このファイルタイプのプレビューはサポートされていません",
41 |   "downloadFile": "ダウンロード",
42 |   "deleteFile": "削除",
43 |   "checksum": "チェックサム",
44 |   "unsupportedFileType": "このファイルタイプはプレビューできません",
45 |   "versionInfo": "バージョン情報",
46 |   "commitHash": "コミットハッシュ",
47 |   "createTime": "作成時間",
48 |   "commitMessage": "コミットメッセージ"
49 | }


--------------------------------------------------------------------------------
/backend/app/utils/response.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List, Optional
 2 | from flask import jsonify
 3 | import math
 4 | 
 5 | def success_response(data: Any = None, message: str = "操作成功", meta: Optional[Dict] = None) -> Dict:
 6 |     """成功响应格式"""
 7 |     response = {
 8 |         "success": True,
 9 |         "message": message,
10 |         "data": data
11 |     }
12 |     
13 |     if meta:
14 |         response["meta"] = meta
15 |     
16 |     return response
17 | 
18 | def error_response(message: str = "操作失败", errors: Optional[Dict] = None, code: Optional[str] = None) -> Dict:
19 |     """错误响应格式"""
20 |     response = {
21 |         "success": False,
22 |         "message": message
23 |     }
24 |     
25 |     if errors:
26 |         response["errors"] = errors
27 |     
28 |     if code:
29 |         response["code"] = code
30 |     
31 |     return response
32 | 
33 | def paginated_response(
34 |     data: List[Any], 
35 |     page: int, 
36 |     per_page: int, 
37 |     total: int, 
38 |     message: str = "获取数据成功"
39 | ) -> Dict:
40 |     """分页响应格式"""
41 |     total_pages = math.ceil(total / per_page) if per_page > 0 else 0
42 |     
43 |     return {
44 |         "success": True,
45 |         "message": message,
46 |         "data": data,
47 |         "pagination": {
48 |             "page": page,
49 |             "per_page": per_page,
50 |             "total": total,
51 |             "total_pages": total_pages,
52 |             "has_next": page < total_pages,
53 |             "has_prev": page > 1
54 |         }
55 |     } 


--------------------------------------------------------------------------------
/backend/start_celery_threads.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # 定义 Conda 环境名称
 4 | CONDA_ENV_NAME="pindata-env"
 5 | 
 6 | # 定义清理函数
 7 | cleanup() {
 8 |     echo ""
 9 |     echo "🛑 接收到停止信号，正在停止Celery..."
10 |     # 发送TERM信号给当前进程组
11 |     kill -TERM 0 2>/dev/null
12 |     sleep 3
13 |     # 如果还在运行，强制停止
14 |     kill -9 0 2>/dev/null
15 |     echo "✅ Celery已停止"
16 |     exit 0
17 | }
18 | 
19 | # 设置信号处理
20 | trap cleanup SIGINT SIGTERM
21 | 
22 | echo "🚀 Celery Worker 独立启动脚本"
23 | echo "按 Ctrl+C 停止服务"
24 | echo "========================"
25 | 
26 | # 启动 Celery Worker 脚本（使用线程池）
27 | echo "Starting Celery Worker with threads in Conda env: $CONDA_ENV_NAME..."
28 | 
29 | # 设置环境变量
30 | export FLASK_APP=run.py
31 | export FLASK_ENV=development
32 | 
33 | # macOS 兼容性设置
34 | export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
35 | 
36 | echo "📋 Celery Configuration:"
37 | echo "   - Environment: $CONDA_ENV_NAME"
38 | echo "   - Pool Type: threads"
39 | echo "   - Concurrency: 4"
40 | echo "   - Node Name: worker@$(hostname)"
41 | echo ""
42 | 
43 | # 使用 conda run 在指定环境中启动 Celery Worker
44 | # --pool=threads: 使用线程池而不是进程池（在 macOS 上更稳定）
45 | # -c: 并发数（线程数）
46 | echo "🚀 Starting Celery worker..."
47 | echo "⏳ Celery运行中... (关闭此终端将停止Celery)"
48 | echo ""
49 | 
50 | # 前台运行Celery
51 | conda run -n "$CONDA_ENV_NAME" celery -A celery_worker.celery worker --loglevel=info --pool=threads --concurrency=4 -n worker@%h
52 | 
53 | # 检查退出状态
54 | EXIT_CODE=$?
55 | if [ $EXIT_CODE -eq 0 ]; then
56 |     echo "✅ Celery worker stopped gracefully"
57 | else
58 |     echo "❌ Celery worker exited with error code: $EXIT_CODE"
59 | fi
60 | 
61 | # End of script 


--------------------------------------------------------------------------------
/frontend/src/components/ui/checkbox.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import { Check } from "lucide-react"
 3 | import { cn } from "../../lib/utils"
 4 | 
 5 | interface CheckboxProps {
 6 |   checked?: boolean;
 7 |   onCheckedChange?: (checked: boolean) => void;
 8 |   disabled?: boolean;
 9 |   className?: string;
10 |   id?: string;
11 | }
12 | 
13 | const Checkbox = React.forwardRef<HTMLInputElement, CheckboxProps>(
14 |   ({ className, checked, onCheckedChange, disabled, id, ...props }, ref) => {
15 |     return (
16 |       <label 
17 |         className={cn(
18 |           "relative inline-flex items-center cursor-pointer",
19 |           disabled && "cursor-not-allowed opacity-50",
20 |           className
21 |         )}
22 |       >
23 |         <input
24 |           ref={ref}
25 |           type="checkbox"
26 |           id={id}
27 |           checked={checked}
28 |           onChange={(e) => onCheckedChange?.(e.target.checked)}
29 |           disabled={disabled}
30 |           className="sr-only"
31 |           {...props}
32 |         />
33 |         <div
34 |           className={cn(
35 |             "h-4 w-4 rounded-sm border border-gray-300 flex items-center justify-center transition-colors",
36 |             checked ? "bg-blue-600 border-blue-600" : "bg-white",
37 |             disabled ? "opacity-50" : "hover:border-gray-400"
38 |           )}
39 |         >
40 |           {checked && (
41 |             <Check className="h-3 w-3 text-white" />
42 |           )}
43 |         </div>
44 |       </label>
45 |     )
46 |   }
47 | )
48 | 
49 | Checkbox.displayName = "Checkbox"
50 | 
51 | export { Checkbox } 


--------------------------------------------------------------------------------
/backend/app/api/v1/schemas/conversion_schemas.py:
--------------------------------------------------------------------------------
 1 | from marshmallow import Schema, fields, validate, validates_schema, ValidationError
 2 | 
 3 | class ConversionConfigSchema(Schema):
 4 |     """转换配置验证模式"""
 5 |     method = fields.String(required=True, validate=validate.OneOf(['markitdown', 'vision_llm']))
 6 |     llmConfigId = fields.String(allow_none=True)
 7 |     customPrompt = fields.String(allow_none=True)
 8 |     enableOCR = fields.Boolean(missing=True)
 9 |     preserveFormatting = fields.Boolean(missing=True)
10 |     extractTables = fields.Boolean(missing=True)
11 |     extractImages = fields.Boolean(missing=False)
12 |     pageProcessing = fields.Dict(allow_none=True)
13 |     
14 |     @validates_schema
15 |     def validate_llm_config(self, data, **kwargs):
16 |         """验证LLM配置"""
17 |         if data.get('method') == 'vision_llm' and not data.get('llmConfigId'):
18 |             raise ValidationError('使用AI智能转换时必须选择LLM配置', 'llmConfigId')
19 | 
20 | class ConversionJobCreateSchema(Schema):
21 |     """创建转换任务的验证模式"""
22 |     file_ids = fields.List(fields.String(), required=True, validate=validate.Length(min=1))
23 |     conversion_config = fields.Nested(ConversionConfigSchema, required=True)
24 | 
25 | class ConversionJobQuerySchema(Schema):
26 |     """查询转换任务的验证模式"""
27 |     page = fields.Integer(validate=validate.Range(min=1), missing=1)
28 |     per_page = fields.Integer(validate=validate.Range(min=1, max=100), missing=20)
29 |     library_id = fields.String(allow_none=True)
30 |     status = fields.String(validate=validate.OneOf(['pending', 'processing', 'completed', 'failed', 'cancelled']), allow_none=True) 


--------------------------------------------------------------------------------
/frontend/src/hooks/useOverview.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | import { overviewService, OverviewStats, Activity, Notification } from '../services/overview.service';
 3 | 
 4 | interface UseOverviewReturn {
 5 |   stats: OverviewStats | null;
 6 |   activities: Activity[];
 7 |   notifications: Notification[];
 8 |   isLoading: boolean;
 9 |   error: string | null;
10 |   refetch: () => Promise<void>;
11 | }
12 | 
13 | export const useOverview = (): UseOverviewReturn => {
14 |   const [stats, setStats] = useState<OverviewStats | null>(null);
15 |   const [activities, setActivities] = useState<Activity[]>([]);
16 |   const [notifications, setNotifications] = useState<Notification[]>([]);
17 |   const [isLoading, setIsLoading] = useState(true);
18 |   const [error, setError] = useState<string | null>(null);
19 | 
20 |   const fetchOverviewData = async () => {
21 |     try {
22 |       setIsLoading(true);
23 |       setError(null);
24 |       
25 |       const data = await overviewService.getAllOverviewData();
26 |       
27 |       setStats(data.stats);
28 |       setActivities(data.activities);
29 |       setNotifications(data.notifications);
30 |     } catch (err) {
31 |       console.error('Failed to fetch overview data:', err);
32 |       setError(err instanceof Error ? err.message : '获取概览数据失败');
33 |     } finally {
34 |       setIsLoading(false);
35 |     }
36 |   };
37 | 
38 |   useEffect(() => {
39 |     fetchOverviewData();
40 |   }, []);
41 | 
42 |   return {
43 |     stats,
44 |     activities,
45 |     notifications,
46 |     isLoading,
47 |     error,
48 |     refetch: fetchOverviewData
49 |   };
50 | }; 


--------------------------------------------------------------------------------
/frontend/src/types/systemLog.ts:
--------------------------------------------------------------------------------
 1 | import { ApiResponse, PaginatedResponse } from './api';
 2 | 
 3 | export type LogLevel = 'debug' | 'info' | 'warn' | 'error';
 4 | 
 5 | export interface SystemLog {
 6 |   id: string;
 7 |   level: LogLevel;
 8 |   message: string;
 9 |   source: string;
10 |   details?: string;
11 |   module?: string;
12 |   function?: string;
13 |   line_number?: number;
14 |   request_id?: string;
15 |   user_id?: string;
16 |   ip_address?: string;
17 |   extra_data?: Record<string, any>;
18 |   error_code?: string;
19 |   stack_trace?: string;
20 |   timestamp: string;
21 | }
22 | 
23 | export interface SystemLogQueryParams {
24 |   page?: number;
25 |   per_page?: number;
26 |   level?: LogLevel;
27 |   source?: string;
28 |   search?: string;
29 |   start_date?: string;
30 |   end_date?: string;
31 |   request_id?: string;
32 | }
33 | 
34 | export interface SystemLogStats {
35 |   level_stats: Record<LogLevel, number>;
36 |   total_logs: number;
37 |   recent_errors: number;
38 |   active_sources: Array<{
39 |     source: string;
40 |     count: number;
41 |   }>;
42 |   time_range_hours: number;
43 | }
44 | 
45 | export interface LogCleanupRequest {
46 |   days: number;
47 | }
48 | 
49 | export interface LogCleanupResponse {
50 |   deleted_count: number;
51 |   retention_days: number;
52 | }
53 | 
54 | export interface LogExportRequest {
55 |   level?: LogLevel;
56 |   source?: string;
57 |   search?: string;
58 |   start_date?: string;
59 |   end_date?: string;
60 |   limit?: number;
61 | }
62 | 
63 | export interface LogExportResponse {
64 |   logs: SystemLog[];
65 |   count: number;
66 |   exported_at: string;
67 | } 


--------------------------------------------------------------------------------
/frontend/src/hooks/useTaskStats.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | import { overviewService } from '../services/overview.service';
 3 | 
 4 | interface TaskStats {
 5 |   total: number;
 6 |   completed: number;
 7 |   running: number;
 8 |   failed: number;
 9 |   pending: number;
10 | }
11 | 
12 | interface UseTaskStatsReturn {
13 |   taskStats: TaskStats | null;
14 |   isLoading: boolean;
15 |   error: string | null;
16 |   refetch: () => Promise<void>;
17 | }
18 | 
19 | export const useTaskStats = (): UseTaskStatsReturn => {
20 |   const [taskStats, setTaskStats] = useState<TaskStats | null>(null);
21 |   const [isLoading, setIsLoading] = useState(true);
22 |   const [error, setError] = useState<string | null>(null);
23 | 
24 |   const fetchTaskStats = async () => {
25 |     try {
26 |       setIsLoading(true);
27 |       setError(null);
28 |       
29 |       const stats = await overviewService.getStats();
30 |       
31 |       // 计算pending任务数 (total - completed - running - failed)
32 |       const pending = Math.max(0, stats.tasks.total - stats.tasks.completed - stats.tasks.running - stats.tasks.failed);
33 |       
34 |       setTaskStats({
35 |         ...stats.tasks,
36 |         pending
37 |       });
38 |     } catch (err) {
39 |       console.error('Failed to fetch task stats:', err);
40 |       setError(err instanceof Error ? err.message : '获取任务统计失败');
41 |     } finally {
42 |       setIsLoading(false);
43 |     }
44 |   };
45 | 
46 |   useEffect(() => {
47 |     fetchTaskStats();
48 |   }, []);
49 | 
50 |   return {
51 |     taskStats,
52 |     isLoading,
53 |     error,
54 |     refetch: fetchTaskStats
55 |   };
56 | }; 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/overview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "welcome": "pindataへようこそ",
 3 |   "welcomeDescription": "データセットの探索、タスクの管理、ストレージ使用状況の監視を開始します。",
 4 |   "systemOverview": "システム概要",
 5 |   "recentActivity": "最近のアクティビティ",
 6 |   "quickActions": "クイックアクション",
 7 |   "systemNotifications": "システム通知",
 8 |   "comingSoon": "近日公開",
 9 |   "pluginsDevelopment": "プラグインサポートは現在開発中です",
10 |   "features": {
11 |     "dataImport": {
12 |       "title": "データインポートと処理",
13 |       "description": "CSV、JSON、Excelなど、複数の形式の生データインポートをサポート。インテリジェントなデータ前処理、自動データ型認識、高品質なトレーニングデータセットの生成。"
14 |     },
15 |     "datasetManagement": {
16 |       "title": "マルチプラットフォームデータセット管理",
17 |       "description": "Hugging Face HubやModelScopeなどの主要プラットフォームとシームレスに統合。人気のオープンソースデータセットをワンクリックでインポート、バージョン管理と更新同期をサポート。"
18 |     },
19 |     "taskScheduling": {
20 |       "title": "スマートタスクスケジューリング",
21 |       "description": "バッチ操作とスケジュールタスクをサポートする自動化データ処理パイプライン。詳細な実行ログとパフォーマンス指標を提供するリアルタイムタスクステータス監視。"
22 |     },
23 |     "storageSharing": {
24 |       "title": "ストレージと共有",
25 |       "description": "大規模データセットの安全なストレージとバックアップをサポートするクラウドストレージ。データセット共有と権限管理をサポートするチームコラボレーション機能。"
26 |     }
27 |   },
28 |   "quickStart": {
29 |     "title": "クイックスタートガイド",
30 |     "step1": {
31 |       "title": "データのインポート",
32 |       "description": "ローカルファイルまたはHF、ModelScopeプラットフォーム"
33 |     },
34 |     "step2": {
35 |       "title": "データの処理",
36 |       "description": "クリーニング、変換、品質チェック"
37 |     },
38 |     "step3": {
39 |       "title": "データセットの生成",
40 |       "description": "トレーニングデータセットの作成とエクスポート"
41 |     }
42 |   },
43 |   "error": {
44 |     "loadingOverview": "概要データの読み込み中にエラーが発生しました: {{error}}"
45 |   }
46 | }


--------------------------------------------------------------------------------
/backend/app/celery_app.py:
--------------------------------------------------------------------------------
 1 | from celery import Celery
 2 | from config.config import Config
 3 | 
 4 | def make_celery(app_name=__name__):
 5 |     """创建并配置Celery实例"""
 6 |     celery = Celery(app_name)
 7 |     
 8 |     # 使用新的配置格式
 9 |     celery.conf.update(
10 |         # Broker 和 Backend 配置
11 |         broker_url=Config.CELERY_BROKER_URL,
12 |         result_backend=Config.CELERY_RESULT_BACKEND,
13 |         
14 |         # 序列化配置
15 |         task_serializer=getattr(Config, 'CELERY_TASK_SERIALIZER', 'json'),
16 |         accept_content=getattr(Config, 'CELERY_ACCEPT_CONTENT', ['json']),
17 |         result_serializer=getattr(Config, 'CELERY_RESULT_SERIALIZER', 'json'),
18 |         
19 |         # 时区配置
20 |         timezone=getattr(Config, 'CELERY_TIMEZONE', 'UTC'),
21 |         enable_utc=getattr(Config, 'CELERY_ENABLE_UTC', True),
22 |         
23 |         # 任务配置
24 |         task_track_started=True,
25 |         task_time_limit=30 * 60,  # 30分钟超时
26 |         task_soft_time_limit=25 * 60,  # 25分钟软超时
27 |         
28 |         # Worker 配置
29 |         worker_prefetch_multiplier=1,
30 |         worker_max_tasks_per_child=1000,
31 |         
32 |         # 自动发现任务
33 |         include=[
34 |             'app.tasks.conversion_tasks',
35 |             'app.tasks.dataset_import_tasks', 
36 |             'app.tasks.dataset_generation_tasks',
37 |             'app.tasks.dataflow_tasks',  # 添加DataFlow任务
38 |             'app.tasks.chinese_dataflow_tasks'  # 添加中文DataFlow任务
39 |             # 'app.tasks.multimodal_dataset_tasks'  # 暂时移除，功能开发中
40 |         ]
41 |     )
42 |     
43 |     return celery
44 | 
45 | # 创建全局Celery实例
46 | celery = make_celery('pindata_celery') 


--------------------------------------------------------------------------------
/frontend/src/screens/Datasets/SmartDatasetCreator/components/NavigationButtons.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Button } from '../../../../components/ui/button';
 3 | import { Play } from 'lucide-react';
 4 | import { useTranslation } from 'react-i18next';
 5 | import { useSmartDatasetCreatorStore } from '../store/useSmartDatasetCreatorStore';
 6 | import { getSteps } from '../constants';
 7 | 
 8 | export const NavigationButtons: React.FC = () => {
 9 |   const { t } = useTranslation();
10 |   const {
11 |     currentStep,
12 |     selectedFiles,
13 |     datasetName,
14 |     prevStep,
15 |     nextStep
16 |   } = useSmartDatasetCreatorStore();
17 | 
18 |   if (currentStep >= 4) {
19 |     return null;
20 |   }
21 | 
22 |   const canProceed = () => {
23 |     switch (currentStep) {
24 |       case 1:
25 |         return selectedFiles.length > 0;
26 |       case 2:
27 |         return datasetName.trim() !== '';
28 |       default:
29 |         return true;
30 |     }
31 |   };
32 | 
33 |   return (
34 |     <div className="flex items-center justify-between">
35 |       <Button 
36 |         variant="outline" 
37 |         onClick={prevStep}
38 |         disabled={currentStep === 1}
39 |         className="border-[#d1dbe8]"
40 |       >
41 |         {t('smartDatasetCreator.navigation.prevStep')}
42 |       </Button>
43 |       
44 |       <div className="flex gap-3">
45 |         <Button 
46 |           className="bg-[#1977e5] hover:bg-[#1565c0]"
47 |           onClick={nextStep}
48 |           disabled={!canProceed()}
49 |         >
50 |           {t('smartDatasetCreator.navigation.nextStep')}
51 |         </Button>
52 |       </div>
53 |     </div>
54 |   );
55 | }; 


--------------------------------------------------------------------------------
/backend/app/models/permission.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, Text, DateTime
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import uuid
 5 | 
 6 | from app.db import db
 7 | 
 8 | 
 9 | class Permission(db.Model):
10 |     __tablename__ = 'permissions'
11 |     
12 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
13 |     name = Column(String(100), nullable=False)
14 |     code = Column(String(100), unique=True, nullable=False, index=True)
15 |     description = Column(Text)
16 |     category = Column(String(100))      # 权限分类
17 |     is_system_permission = Column(db.Boolean, default=False, nullable=False)
18 |     created_at = Column(DateTime, default=datetime.utcnow)
19 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
20 |     
21 |     # Relationships
22 |     role_permissions = relationship("RolePermission", back_populates="permission", cascade="all, delete-orphan")
23 |     
24 |     def to_dict(self):
25 |         return {
26 |             'id': self.id,
27 |             'name': self.name,
28 |             'code': self.code,
29 |             'description': self.description,
30 |             'category': self.category,
31 |             'is_system_permission': self.is_system_permission,
32 |             'created_at': self.created_at.isoformat() if self.created_at else None,
33 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None
34 |         }
35 |     
36 |     def __repr__(self):
37 |         return f'<Permission {self.code}>'
38 |     
39 |     # 索引
40 |     __table_args__ = (
41 |         db.Index('idx_permissions_code', 'code'),
42 |     )


--------------------------------------------------------------------------------
/frontend/src/screens/DataGovernance/ProjectDetail/README.md:
--------------------------------------------------------------------------------
 1 | # ProjectDetail 组件结构
 2 | 
 3 | 此目录包含项目详情页面的所有相关组件，采用模块化设计以提高代码的可维护性和重用性。
 4 | 
 5 | ## 文件结构
 6 | 
 7 | ```
 8 | ProjectDetail/
 9 | ├── index.ts                 # 导出所有组件
10 | ├── ProjectDetail.tsx        # 主组件，包含页面布局和tab切换
11 | ├── RawDataTab.tsx          # 原始数据管理Tab
12 | ├── GovernedDataTab.tsx     # 治理后数据Tab
13 | ├── KnowledgeTab.tsx        # 知识管理Tab
14 | ├── DatasetsTab.tsx         # 数据集管理Tab
15 | ├── AnalyticsTab.tsx        # 数据分析Tab
16 | └── README.md               # 此文件
17 | ```
18 | 
19 | ## 组件说明
20 | 
21 | ### ProjectDetail.tsx
22 | - 主要负责页面布局和结构
23 | - 包含项目头部信息、指标总览、Tab导航
24 | - 处理项目数据的加载和错误状态
25 | - 管理Tab切换逻辑
26 | 
27 | ### RawDataTab.tsx
28 | - 原始数据源管理
29 | - 文件上传功能
30 | - 文件列表展示和筛选
31 | - 文件预览功能（包含模态框）
32 | - 独立的状态管理（loading、file lists等）
33 | 
34 | ### GovernedDataTab.tsx
35 | - 显示经过治理的数据
36 | - 数据质量评分展示
37 | - 数据下载和预览功能
38 | 
39 | ### KnowledgeTab.tsx
40 | - 知识库管理
41 | - 数据字典、业务规则、最佳实践
42 | - 知识项的查看和编辑
43 | 
44 | ### DatasetsTab.tsx
45 | - 数据集管理
46 | - 数据集创建和发布状态
47 | - API访问和下载功能
48 | 
49 | ### AnalyticsTab.tsx
50 | - 数据分析报告
51 | - 质量趋势图表
52 | - 异常检测报告
53 | - 使用统计
54 | 
55 | ## 设计原则
56 | 
57 | 1. **单一职责**: 每个Tab组件只负责自己的功能域
58 | 2. **数据传递**: 通过props传递必要的项目数据
59 | 3. **状态隔离**: 每个组件管理自己的状态
60 | 4. **类型安全**: 使用TypeScript确保类型安全
61 | 5. **可扩展性**: 便于添加新的Tab或功能
62 | 
63 | ## 使用方式
64 | 
65 | ```typescript
66 | import { ProjectDetail } from './ProjectDetail';
67 | 
68 | // 在路由中使用
69 | <Route path="/governance/project/:id" component={ProjectDetail} />
70 | ```
71 | 
72 | ## 未来扩展
73 | 
74 | - 可以轻松添加新的Tab组件
75 | - 每个Tab可以独立开发和测试
76 | - 支持懒加载优化性能
77 | - 便于团队并行开发
78 | 
79 | ## 注意事项
80 | 
81 | - 确保所有Tab组件都遵循相同的props接口
82 | - 保持样式的一致性
83 | - 处理好错误状态和加载状态
84 | - 考虑性能优化（如虚拟滚动等） 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/dataPreview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "Data Preview",
 3 |   "dataset": "Dataset",
 4 |   "version": "Version",
 5 |   "switching": "Switching...",
 6 |   "noVersion": "No Version",
 7 |   "default": "Default",
 8 |   "current": "Current",
 9 |   "defaultVersion": "Default Version",
10 |   "draft": "Draft",
11 |   "deprecated": "Deprecated",
12 |   "selectVersion": "Select Version",
13 |   "loading": "Loading...",
14 |   "totalFiles": "Total Files",
15 |   "previewFiles": "Preview Files",
16 |   "versionSwitchFailed": "Version Switch Failed",
17 |   "refreshPreview": "Refresh Preview",
18 |   "exportInfo": "Export Info",
19 |   "allTypes": "All Types",
20 |   "selected": "Selected",
21 |   "files": "files",
22 |   "deleteSelected": "Delete Selected",
23 |   "uploading": "Uploading...",
24 |   "addFiles": "Add Files",
25 |   "noPreviewData": "No preview data available",
26 |   "noDataToPreview": "No data",
27 |   "totalRows": "Total Rows",
28 |   "columns": "Columns",
29 |   "preview": "Preview",
30 |   "download": "Download",
31 |   "rows": "rows",
32 |   "format": "Format",
33 |   "totalItems": "Total Items",
34 |   "items": "items",
35 |   "entry": "Entry",
36 |   "size": "Size",
37 |   "mode": "Mode",
38 |   "imagePreviewDeveloping": "Image preview feature under development",
39 |   "previewDeveloping": "Preview feature under development",
40 |   "unsupportedPreview": "This file type preview is not supported",
41 |   "downloadFile": "Download",
42 |   "deleteFile": "Delete",
43 |   "checksum": "Checksum",
44 |   "unsupportedFileType": "This file type cannot be previewed",
45 |   "versionInfo": "Version Info",
46 |   "commitHash": "Commit Hash",
47 |   "createTime": "Create Time",
48 |   "commitMessage": "Commit Message"
49 | }


--------------------------------------------------------------------------------
/doc/tmp/user.md:
--------------------------------------------------------------------------------
 1 | 
 2 |   1. 数据库表设计
 3 |   - 10张核心表：用户、组织、角色、权限、用户-组织关系、用户-角色关系、角色-权限关系、资源权限、用户
 4 |   会话、审计日志
 5 |   - 支持层级组织架构、灵活角色分配、细粒度权限控制
 6 | 
 7 |   2. SQLAlchemy模型
 8 |   - 完整的模型定义，包含所有关系映射
 9 |   - 枚举类型定义，便于状态管理
10 |   - 实用方法如to_dict()、权限检查等
11 | 
12 |   3. 数据库迁移脚本
13 |   - Python迁移脚本：create_user_management_tables.py
14 |   - SQL初始化脚本：init_user_permissions.sql
15 |   - 包含基础数据初始化（权限、角色、默认管理员）
16 | 
17 |   4. 服务层
18 |   - AuthService: JWT认证、会话管理、权限验证
19 |   - UserService: 用户CRUD、角色分配、组织管理
20 | 
21 |   5. API端点
22 |   - 认证端点: 登录、注册、登出、令牌刷新、密码修改
23 |   - 用户管理端点: 用户CRUD、角色分配、组织管理
24 |   - 装饰器: @login_required、@permission_required
25 | 
26 |   🔐 权限系统特点
27 | 
28 |   1. 多层级权限控制
29 |     - 系统级权限（通过角色）
30 |     - 资源级权限（owner/admin/write/read）
31 |     - 组织级权限隔离
32 |   2. 企业级安全
33 |     - JWT令牌认证
34 |     - 会话管理和设备跟踪
35 |     - 密码安全存储
36 |     - 操作审计日志
37 |   3. 灵活的组织架构
38 |     - 支持无限层级组织结构
39 |     - 用户可属于多个组织
40 |     - 组织内角色分配
41 | 
42 |   📋 使用说明
43 | 
44 |   1. 初始化数据库
45 |   cd backend
46 |   python migrations/create_user_management_tables.py
47 | 
48 |   2. 默认管理员账号
49 |   - 用户名: admin
50 |   - 密码: admin123
51 |   - 邮箱: admin@pindata.com
52 | 
53 |   3. API使用示例
54 |   # 登录
55 |   POST /api/v1/login
56 |   {
57 |     "username": "admin",
58 |     "password": "admin123"
59 |   }
60 | 
61 |   # 创建用户
62 |   POST /api/v1/users
63 |   Headers: Authorization: Bearer <token>
64 |   {
65 |     "username": "newuser",
66 |     "email": "user@example.com",
67 |     "password": "password123"
68 |   }
69 | 
70 |   🔄 与现有系统集成
71 | 
72 |   系统设计时充分考虑了与现有PinData系统的集成：
73 | 
74 |   1. 数据模型兼容: 使用UUID主键，与现有模型保持一致
75 |   2. API设计: 遵循现有RESTful模式
76 |   3. 权限集成: 可为现有的Dataset、Library等资源添加权限控制
77 |   4. 审计集成: 与现有SystemLog系统协同工作
78 | 
79 |   这个权限系统为你的数据治理项目提供了企业级的用户管理和访问控制基础，支持未来的功能扩展和权限细化


--------------------------------------------------------------------------------
/backend/app/models/plugin.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from sqlalchemy import Column, Integer, String, Text, DateTime, Boolean, JSON, Enum
 3 | import enum
 4 | from app.db import db
 5 | 
 6 | class PluginType(enum.Enum):
 7 |     """插件类型枚举"""
 8 |     PARSER = "parser"
 9 |     CLEANER = "cleaner"
10 |     DISTILLER = "distiller"
11 | 
12 | class Plugin(db.Model):
13 |     """插件模型"""
14 |     __tablename__ = 'plugins'
15 |     
16 |     id = Column(Integer, primary_key=True)
17 |     name = Column(String(255), nullable=False, unique=True)
18 |     display_name = Column(String(255), nullable=False)
19 |     type = Column(Enum(PluginType), nullable=False)
20 |     description = Column(Text)
21 |     version = Column(String(50), nullable=False)
22 |     author = Column(String(255))
23 |     is_builtin = Column(Boolean, default=False)
24 |     is_enabled = Column(Boolean, default=True)
25 |     config_schema = Column(JSON)  # 配置模式
26 |     created_at = Column(DateTime, default=datetime.utcnow)
27 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
28 |     
29 |     def to_dict(self):
30 |         return {
31 |             'id': self.id,
32 |             'name': self.name,
33 |             'display_name': self.display_name,
34 |             'type': self.type.value if self.type else None,
35 |             'description': self.description,
36 |             'version': self.version,
37 |             'author': self.author,
38 |             'is_builtin': self.is_builtin,
39 |             'is_enabled': self.is_enabled,
40 |             'config_schema': self.config_schema,
41 |             'created_at': self.created_at.isoformat() if self.created_at else None,
42 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None
43 |         } 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/libraryDetails.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backToList": "リストに戻る",
 3 |   "uploadFiles": "ファイルをアップロード",
 4 |   "totalFiles": "総ファイル数",
 5 |   "processed": "処理済み",
 6 |   "processing": "処理中",
 7 |   "pending": "保留中",
 8 |   "mdFiles": "MDファイル",
 9 |   "fileList": "ファイルリスト",
10 |   "totalFilesCount": "合計 {count} 個のファイル",
11 |   "selectedCount": "{count} 個選択済み",
12 |   "convertToMD": "MDに変換",
13 |   "batchDelete": "一括削除",
14 |   "refresh": "更新",
15 |   "loadingFiles": "ファイルリストを読み込み中...",
16 |   "loadFailed": "ファイルリストの読み込みに失敗しました",
17 |   "retry": "再試行",
18 |   "noFiles": "ファイルがありません",
19 |   "uploadTip": "上の「ファイルをアップロード」ボタンをクリックしてファイルの追加を開始してください",
20 |   "fileName": "ファイル名",
21 |   "type": "タイプ",
22 |   "size": "サイズ",
23 |   "status": "ステータス",
24 |   "uploadTime": "アップロード時間",
25 |   "actions": "アクション",
26 |   "selectAll": "すべて選択",
27 |   "selectFile": "ファイルを選択",
28 |   "convertToMDAction": "MDに変換",
29 |   "viewDetails": "詳細を表示",
30 |   "deleteFileAction": "ファイルを削除",
31 |   "downloadFileAction": "ファイルをダウンロード",
32 |   "uploadSuccess": "{count} 個のファイルを正常にアップロードしました",
33 |   "deleteSuccess": "ファイル \"{fileName}\" を正常に削除しました",
34 |   "deleteFailed": "ファイル \"{fileName}\" の削除に失敗しました",
35 |   "downloadStart": "ファイル \"{fileName}\" のダウンロードを開始しました",
36 |   "downloadFailed": "ファイル \"{fileName}\" のダウンロードに失敗しました",
37 |   "deleteConfirm": "ファイル \"{fileName}\" を削除してもよろしいですか？",
38 |   "batchDeleteConfirm": "⚠️ 警告：{count} 個のファイルを削除しようとしています！\n\nファイルリスト：\n{fileNames}\n\nこの操作は元に戻せません。続行してもよろしいですか？",
39 |   "batchDeleteSuccess": "{count} 個のファイルを正常に削除しました",
40 |   "batchDeletePartial": "削除完了：成功 {successCount} 個、失敗 {failCount} 個",
41 |   "convertSubmitted": "{count} 個のファイルの変換タスクを送信しました",
42 |   "convertFailed": "変換タスクの送信に失敗しました",
43 |   "cancelSuccess": "変換タスクをキャンセルしました",
44 |   "cancelFailed": "タスクのキャンセルに失敗しました"
45 | }


--------------------------------------------------------------------------------
/backend/config.example.env:
--------------------------------------------------------------------------------
 1 | # Flask配置
 2 | FLASK_APP=run.py
 3 | FLASK_ENV=development
 4 | SECRET_KEY=your-secret-key-here
 5 | 
 6 | # 数据库配置 - 使用docker服务配置
 7 | DATABASE_URL=postgresql://postgres:password@localhost:15432/pindata_dataset
 8 | REDIS_URL=redis://localhost:16379/0
 9 | 
10 | # 数据库连接池配置
11 | DB_POOL_SIZE=10
12 | DB_POOL_TIMEOUT=30
13 | DB_POOL_RECYCLE=3600
14 | DB_MAX_OVERFLOW=20
15 | DB_POOL_PRE_PING=true
16 | DB_CONNECT_TIMEOUT=10
17 | 
18 | # 数据库自动初始化配置
19 | AUTO_CREATE_DATABASE=true
20 | DATABASE_INIT_RETRY_COUNT=3
21 | DATABASE_INIT_RETRY_DELAY=5
22 | 
23 | # MinIO配置 - 使用docker服务配置
24 | MINIO_ENDPOINT=localhost:9000
25 | MINIO_ACCESS_KEY=minioadmin
26 | MINIO_SECRET_KEY=minioadmin
27 | MINIO_SECURE=false
28 | MINIO_BUCKET_NAME=pindata-bucket
29 | MINIO_RAW_DATA_BUCKET=raw-data
30 | MINIO_DATASETS_BUCKET=datasets
31 | MINIO_DEFAULT_BUCKET=pindata-bucket
32 | 
33 | # Celery配置 - 使用docker服务配置
34 | CELERY_BROKER_URL=redis://localhost:16379/0
35 | CELERY_RESULT_BACKEND=redis://localhost:16379/1
36 | CELERY_TASK_SERIALIZER=json
37 | CELERY_RESULT_SERIALIZER=json
38 | CELERY_ACCEPT_CONTENT=json
39 | CELERY_TIMEZONE=UTC  
40 | CELERY_ENABLE_UTC=true
41 | 
42 | # API配置
43 | API_PREFIX=/api/v1
44 | PAGINATION_PAGE_SIZE=20
45 | 
46 | # JWT配置
47 | JWT_SECRET_KEY=your-jwt-secret-key
48 | JWT_ACCESS_TOKEN_EXPIRES=3600
49 | 
50 | # CORS配置
51 | CORS_ORIGINS=*
52 | 
53 | # 文件上传配置
54 | MAX_CONTENT_LENGTH=104857600
55 | ALLOWED_EXTENSIONS=txt,pdf,docx,pptx,doc,ppt
56 | 
57 | # 健康检查配置
58 | HEALTH_CHECK_ENABLED=true
59 | 
60 | # Docker环境变量（用于容器内部通信）
61 | # 当在docker容器中运行时，将localhost替换为服务名
62 | # DATABASE_URL=postgresql://postgres:password@db:15432/pindata_dataset
63 | # REDIS_URL=redis://redis:16379/0
64 | # MINIO_ENDPOINT=minio:9000
65 | # CELERY_BROKER_URL=redis://redis:16379/0
66 | # CELERY_RESULT_BACKEND=redis://redis:16379/1


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/auth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "login": {
 3 |     "title": "登录 PinData",
 4 |     "subtitle": "数据治理管理平台",
 5 |     "rememberMe": "记住我",
 6 |     "forgotPassword": "忘记密码？",
 7 |     "submit": "登录",
 8 |     "loggingIn": "登录中...",
 9 |     "noAccount": "还没有账号？",
10 |     "registerLink": "立即注册"
11 |   },
12 |   "register": {
13 |     "title": "注册 PinData",
14 |     "subtitle": "创建您的数据治理账户",
15 |     "agreeTerms": "我同意",
16 |     "termsOfService": "服务条款",
17 |     "and": "和",
18 |     "privacyPolicy": "隐私政策",
19 |     "submit": "注册",
20 |     "registering": "注册中...",
21 |     "hasAccount": "已有账号？",
22 |     "loginLink": "立即登录",
23 |     "success": "注册成功，请登录",
24 |     "firstUserSuccess": "欢迎！您是第一个用户，已自动获得管理员权限"
25 |   },
26 |   "logout": {
27 |     "button": "登出",
28 |     "loggingOut": "登出中..."
29 |   },
30 |   "fields": {
31 |     "username": "用户名",
32 |     "email": "邮箱",
33 |     "password": "密码",
34 |     "confirmPassword": "确认密码",
35 |     "fullName": "姓名",
36 |     "phone": "手机号"
37 |   },
38 |   "placeholders": {
39 |     "username": "请输入用户名或邮箱",
40 |     "email": "请输入邮箱地址",
41 |     "password": "请输入密码",
42 |     "confirmPassword": "请再次输入密码",
43 |     "fullName": "请输入真实姓名",
44 |     "phone": "请输入手机号"
45 |   },
46 |   "validation": {
47 |     "usernameRequired": "用户名不能为空",
48 |     "usernameMinLength": "用户名至少3个字符",
49 |     "usernameFormat": "用户名只能包含字母、数字和下划线",
50 |     "emailRequired": "邮箱不能为空",
51 |     "emailInvalid": "邮箱格式不正确",
52 |     "passwordRequired": "密码不能为空",
53 |     "passwordMinLength": "密码至少6个字符",
54 |     "passwordMismatch": "两次输入的密码不一致",
55 |     "agreeTermsRequired": "请同意服务条款"
56 |   },
57 |   "errors": {
58 |     "loginFailed": "登录失败，请检查用户名和密码",
59 |     "registerFailed": "注册失败，请重试",
60 |     "unauthorized": "您没有权限访问此页面"
61 |   },
62 |   "footer": {
63 |     "copyright": "© 2024 PinData. All rights reserved."
64 |   }
65 | }


--------------------------------------------------------------------------------
/backend/app/api/v1/endpoints/plugins.py:
--------------------------------------------------------------------------------
 1 | from flask import jsonify, request
 2 | from flasgger import swag_from
 3 | from app.api.v1 import api_v1
 4 | from app.models import Plugin
 5 | from app.db import db
 6 | 
 7 | @api_v1.route('/plugins', methods=['GET'])
 8 | @swag_from({
 9 |     'tags': ['插件'],
10 |     'summary': '获取插件列表',
11 |     'parameters': [{
12 |         'name': 'type',
13 |         'in': 'query',
14 |         'type': 'string',
15 |         'enum': ['parser', 'cleaner', 'distiller']
16 |     }, {
17 |         'name': 'enabled',
18 |         'in': 'query',
19 |         'type': 'boolean'
20 |     }],
21 |     'responses': {
22 |         200: {
23 |             'description': '成功获取插件列表'
24 |         }
25 |     }
26 | })
27 | def get_plugins():
28 |     """获取插件列表"""
29 |     plugin_type = request.args.get('type')
30 |     enabled = request.args.get('enabled', type=bool)
31 |     
32 |     query = Plugin.query
33 |     if plugin_type:
34 |         query = query.filter_by(type=plugin_type)
35 |     if enabled is not None:
36 |         query = query.filter_by(is_enabled=enabled)
37 |     
38 |     plugins = query.all()
39 |     
40 |     return jsonify({
41 |         'plugins': [plugin.to_dict() for plugin in plugins],
42 |         'total': len(plugins)
43 |     })
44 | 
45 | @api_v1.route('/plugins/<int:plugin_id>', methods=['GET'])
46 | @swag_from({
47 |     'tags': ['插件'],
48 |     'summary': '获取插件详情',
49 |     'parameters': [{
50 |         'name': 'plugin_id',
51 |         'in': 'path',
52 |         'type': 'integer',
53 |         'required': True
54 |     }],
55 |     'responses': {
56 |         200: {
57 |             'description': '成功获取插件详情'
58 |         },
59 |         404: {
60 |             'description': '插件不存在'
61 |         }
62 |     }
63 | })
64 | def get_plugin(plugin_id):
65 |     """获取插件详情"""
66 |     plugin = Plugin.query.get_or_404(plugin_id)
67 |     return jsonify(plugin.to_dict()) 


--------------------------------------------------------------------------------
/backend/app/models/role_permission.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, DateTime, ForeignKey, UniqueConstraint
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import uuid
 5 | 
 6 | from app.db import db
 7 | 
 8 | 
 9 | class RolePermission(db.Model):
10 |     __tablename__ = 'role_permissions'
11 |     
12 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
13 |     role_id = Column(String(36), ForeignKey('roles.id'), nullable=False, index=True)
14 |     permission_id = Column(String(36), ForeignKey('permissions.id'), nullable=False, index=True)
15 |     created_by = Column(String(36), ForeignKey('users.id'))
16 |     created_at = Column(DateTime, default=datetime.utcnow)
17 |     
18 |     # Relationships
19 |     role = relationship("Role", back_populates="role_permissions")
20 |     permission = relationship("Permission", back_populates="role_permissions")
21 |     creator = relationship("User", foreign_keys=[created_by])
22 |     
23 |     def to_dict(self, include_role=False, include_permission=False):
24 |         result = {
25 |             'id': self.id,
26 |             'role_id': self.role_id,
27 |             'permission_id': self.permission_id,
28 |             'created_by': self.created_by,
29 |             'created_at': self.created_at.isoformat() if self.created_at else None
30 |         }
31 |         
32 |         if include_role and self.role:
33 |             result['role'] = self.role.to_dict()
34 |         
35 |         if include_permission and self.permission:
36 |             result['permission'] = self.permission.to_dict()
37 |         
38 |         return result
39 |     
40 |     def __repr__(self):
41 |         return f'<RolePermission {self.role_id}-{self.permission_id}>'
42 |     
43 |     # 约束
44 |     __table_args__ = (
45 |         UniqueConstraint('role_id', 'permission_id', name='uk_role_permission'),
46 |     )


--------------------------------------------------------------------------------
/plugins/custom_parsers/my_custom_parser.py:
--------------------------------------------------------------------------------
 1 | # plugins/custom_parsers/my_custom_parser.py
 2 | 
 3 | # 假设 BaseParser 可以从后端应用中导入
 4 | # 这需要后端应用的 app 目录在 PYTHONPATH 中，或者通过相对路径正确引用
 5 | # 例如: from backend.app.plugins.parsers.base_parser import BaseParser
 6 | 
 7 | # 为了简单起见，我们在这里模拟一个 BaseParser
 8 | class BaseParserMock:
 9 |     def parse(self, file_path, config=None):
10 |         raise NotImplementedError
11 |     def get_config_schema(self):
12 |         return {}
13 | 
14 | # 假设这是我们自定义的解析器
15 | class MyCustomTextParser(BaseParserMock): # 实际应继承真实的 BaseParser
16 |     def parse(self, file_path, config=None):
17 |         '''解析自定义的 .mytext 文件格式'''
18 |         text_blocks = []
19 |         try:
20 |             with open(file_path, 'r', encoding='utf-8') as f:
21 |                 content = f.read()
22 |             # 假设 .mytext 文件以 "---" 分割块
23 |             blocks = content.split("\n---\n")
24 |             for block in blocks:
25 |                 if block.strip():
26 |                     text_blocks.append(block.strip())
27 |             if not text_blocks:
28 |                 text_blocks.append(f"No text found in custom format file: {file_path} (mock)")
29 |         except Exception as e:
30 |             print(f"Error parsing .mytext file {file_path}: {e}")
31 |             text_blocks.append(f"Error parsing {file_path}: {e} (mock)")
32 |         return text_blocks
33 | 
34 |     def get_config_schema(self):
35 |         return {
36 |             "type": "object",
37 |             "properties": {
38 |                 "custom_option": {
39 |                     "type": "string",
40 |                     "title": "Custom Parser Option",
41 |                     "default": "default_value"
42 |                 }
43 |             }
44 |         }
45 | 
46 | # 用于插件系统发现此解析器
47 | parser_name = "my_custom_text_parser" # 插件注册时使用的名字
48 | parser_class = MyCustomTextParser      # 要注册的类
49 | 
50 | print(f"Custom parser module {__name__} loaded (mock).") 


--------------------------------------------------------------------------------
/doc/tmp/muti.md:
--------------------------------------------------------------------------------
 1 |  Read 1493 lines (ctrl+r to expand)
 2 | 
 3 | ⏺ 基于对你的代码架构分析，我来详细回答你的需求分析：
 4 | 
 5 |   当前架构分析
 6 | 
 7 |   1. 原始数据支持情况
 8 | 
 9 |   ✅ 已经支持图片和视频上传
10 |   - RawData模型已经包含图片和视频的FileType枚举
11 |   - 支持的图片格式：JPG, PNG, GIF, BMP, SVG, WEBP
12 |   - 支持的视频格式：MP4, AVI, MOV, WMV, FLV, WEBM
13 |   - 模型包含图片/视频特定字段（尺寸、时长、编码等）
14 | 
15 |   2. 数据治理架构
16 | 
17 |   ✅ 治理框架已搭建
18 |   - DataGovernanceProject：治理项目管理
19 |   - GovernedData：治理后数据存储，支持标签、分类、质量评分
20 |   - KnowledgeItem：知识管理系统
21 |   - 前端有完整的数据治理界面
22 | 
23 |   3. 需要实现的功能
24 | 
25 |   实现方案
26 | 
27 |   阶段1：扩展数据治理模型支持多媒体标注
28 |   ⎿  ☐ 扩展GovernedData模型支持多媒体标注和问答数据结构
29 |      ☐ 创建多媒体标注API端点，支持图片问答、视频字幕等标注
30 |      ☐ 实现AI辅助标注服务，集成视觉和语言模型
31 |      ☐ 开发前端多媒体标注界面，支持手工和AI辅助标注
32 |      ☐ 在治理后数据展示中整合多媒体标注结果
33 | 
34 | ⏺ 核心改进建议：
35 | 
36 |   1. 数据模型扩展
37 |   需要在GovernedData模型中添加多媒体标注字段：
38 |   # 在governed_data.py中添加
39 |   annotation_data = Column(JSON)  # 存储标注数据
40 |   annotation_type = Column(String(50))  # 标注类型：qa, caption, transcript等
41 |   ai_annotations = Column(JSON)  # AI生成的标注
42 |   human_annotations = Column(JSON)  # 人工标注
43 |   annotation_confidence = Column(Float)  # 标注置信度
44 | 
45 |   2. 新增多媒体标注API
46 |   创建专门的标注端点：
47 |   - /api/v1/annotations/image-qa - 图片问答标注
48 |   - /api/v1/annotations/video-transcript - 视频字幕标注
49 |   - /api/v1/annotations/ai-assist - AI辅助标注
50 | 
51 |   3. AI服务集成
52 |   集成视觉和语言模型：
53 |   - 图片理解：GPT-4V, Claude 3 Vision
54 |   - 视频处理：Whisper（语音转文字）+ 视觉模型
55 |   - 标注质量评估：基于置信度的自动质检
56 | 
57 |   4. 前端标注界面
58 |   在/governance/projects/{id}的治理后数据标签页中：
59 |   - 图片标注器：支持区域选择+问答
60 |   - 视频标注器：时间轴+字幕编辑
61 |   - AI建议面板：显示AI生成的标注建议
62 | 
63 |   5. 工作流程设计
64 |   原始图片/视频 → AI预标注 → 人工审核修正 → 质量评估 → 治理后数据展示
65 | 
66 |   优势
67 | 
68 |   1. 无需重构：现有架构已支持多媒体文件和数据治理
69 |   2. 渐进实现：可按优先级逐步添加功能
70 |   3. 数据统一：所有标注数据通过GovernedData统一管理
71 |   4. AI+人工：结合自动化和人工质控
72 | 
73 |   下一步建议
74 | 
75 |   1. 先实现图片问答标注功能作为MVP
76 |   2. 再扩展到视频字幕标注
77 |   3. 最后完善AI辅助和质量评估
78 | 
79 |   你希望我开始实现哪个部分？我建议从扩展数据模型开始。
80 | 


--------------------------------------------------------------------------------
/frontend/public/vector---0-2.svg:
--------------------------------------------------------------------------------
1 | <svg width="18" height="17" viewBox="0 0 18 17" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M18 8.2502C18 8.66441 17.6642 9.00018 17.25 9.00018H9.00024C8.58604 9.00018 8.25026 8.66441 8.25026 8.2502C8.25026 7.836 8.58604 7.50022 9.00024 7.50022H17.25C17.6642 7.50022 18 7.836 18 8.2502ZM9.00024 3.00035H17.25C17.6642 3.00035 18 2.66457 18 2.25037C18 1.83616 17.6642 1.50039 17.25 1.50039H9.00024C8.58604 1.50039 8.25026 1.83616 8.25026 2.25037C8.25026 2.66457 8.58604 3.00035 9.00024 3.00035ZM17.25 13.5001H9.00024C8.58604 13.5001 8.25026 13.8358 8.25026 14.25C8.25026 14.6642 8.58604 15 9.00024 15H17.25C17.6642 15 18 14.6642 18 14.25C18 13.8358 17.6642 13.5001 17.25 13.5001ZM4.71974 0.219796L2.25043 2.69004L1.28108 1.71976C0.98803 1.42671 0.512905 1.42671 0.219857 1.71976C-0.0731908 2.0128 -0.0731908 2.48793 0.219857 2.78098L1.71982 4.28094C1.86049 4.42177 2.05138 4.5009 2.25043 4.5009C2.44948 4.5009 2.64037 4.42177 2.78104 4.28094L5.78096 1.28102C6.07401 0.987969 6.07401 0.512844 5.78096 0.219796C5.48791 -0.0732519 5.01278 -0.0732519 4.71974 0.219796ZM4.71974 6.21963L2.25043 8.68988L1.28108 7.71959C1.09151 7.53002 0.815207 7.45599 0.556251 7.52538C0.297295 7.59476 0.0950273 7.79703 0.0256403 8.05599C-0.0437467 8.31494 0.0302884 8.59125 0.219857 8.78081L1.71982 10.2808C1.86049 10.4216 2.05138 10.5007 2.25043 10.5007C2.44948 10.5007 2.64037 10.4216 2.78104 10.2808L5.78096 7.28086C6.07401 6.98781 6.07401 6.51268 5.78096 6.21963C5.48791 5.92658 5.01278 5.92658 4.71974 6.21963ZM4.71974 12.2195L2.25043 14.6897L1.28108 13.7194C1.09151 13.5299 0.815207 13.4558 0.556251 13.5252C0.297295 13.5946 0.0950273 13.7969 0.0256403 14.0558C-0.0437467 14.3148 0.0302884 14.5911 0.219857 14.7807L1.71982 16.2806C1.86049 16.4214 2.05138 16.5006 2.25043 16.5006C2.44948 16.5006 2.64037 16.4214 2.78104 16.2806L5.78096 13.2807C6.07401 12.9876 6.07401 12.5125 5.78096 12.2195C5.48791 11.9264 5.01278 11.9264 4.71974 12.2195Z" fill="#0D141C"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/backend/app/api/v1/endpoints/llm_test.py:
--------------------------------------------------------------------------------
 1 | # backend/app/api/v1/endpoints/llm_test.py
 2 | from flask import Blueprint, request, jsonify
 3 | from app.services.llm_test_service import LLMTestService
 4 | 
 5 | bp = Blueprint('llm_test', __name__, url_prefix='/llms')
 6 | 
 7 | @bp.route('/test', methods=['POST'])
 8 | def test_llm():
 9 |     """
10 |     测试大模型配置
11 |     ---
12 |     tags:
13 |       - LLM Test
14 |     parameters:
15 |       - in: body
16 |         name: body
17 |         schema:
18 |           type: object
19 |           properties:
20 |             llm_config_id:
21 |               type: string
22 |               description: "要测试的大模型配置ID"
23 |             prompt:
24 |               type: string
25 |               description: "用户输入的提示"
26 |             image_url:
27 |               type: string
28 |               description: "（可选）用于多模态模型测试的图像URL"
29 |     responses:
30 |       200:
31 |         description: "模型测试成功"
32 |         schema:
33 |           type: object
34 |           properties:
35 |             success:
36 |               type: boolean
37 |             result:
38 |               type: object
39 |               description: "模型的结构化输出"
40 |       400:
41 |         description: "请求参数错误"
42 |       404:
43 |         description: "未找到指定的大模型配置"
44 |       500:
45 |         description: "模型测试失败"
46 |     """
47 |     data = request.get_json()
48 |     llm_config_id = data.get('llm_config_id')
49 |     prompt = data.get('prompt')
50 |     image_url = data.get('image_url')
51 | 
52 |     if not llm_config_id or not prompt:
53 |         return jsonify({"success": False, "message": "缺少必要的参数：llm_config_id 和 prompt"}), 400
54 | 
55 |     try:
56 |         result = LLMTestService.test_model(llm_config_id, prompt, image_url)
57 |         return jsonify({"success": True, "result": result})
58 |     except ValueError as e:
59 |         return jsonify({"success": False, "message": str(e)}), 404
60 |     except Exception as e:
61 |         return jsonify({"success": False, "message": f"模型测试时发生内部错误: {str(e)}"}), 500
62 | 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/ja/auth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "login": {
 3 |     "title": "PinDataにログイン",
 4 |     "subtitle": "データガバナンス管理プラットフォーム",
 5 |     "rememberMe": "ログインを記憶する",
 6 |     "forgotPassword": "パスワードをお忘れですか？",
 7 |     "submit": "サインイン",
 8 |     "loggingIn": "サインイン中...",
 9 |     "noAccount": "アカウントをお持ちでないですか？",
10 |     "registerLink": "今すぐサインアップ"
11 |   },
12 |   "register": {
13 |     "title": "PinDataにサインアップ",
14 |     "subtitle": "データガバナンスアカウントを作成",
15 |     "agreeTerms": "以下に同意します",
16 |     "termsOfService": "利用規約",
17 |     "and": "および",
18 |     "privacyPolicy": "プライバシーポリシー",
19 |     "submit": "サインアップ",
20 |     "registering": "アカウントを作成中...",
21 |     "hasAccount": "既にアカウントをお持ちですか？",
22 |     "loginLink": "サインイン",
23 |     "success": "登録が完了しました。サインインしてください",
24 |     "firstUserSuccess": "ようこそ！あなたは最初のユーザーとして管理者権限が付与されました"
25 |   },
26 |   "logout": {
27 |     "button": "サインアウト",
28 |     "loggingOut": "サインアウト中..."
29 |   },
30 |   "fields": {
31 |     "username": "ユーザー名",
32 |     "email": "メール",
33 |     "password": "パスワード",
34 |     "confirmPassword": "パスワード確認",
35 |     "fullName": "フルネーム",
36 |     "phone": "電話番号"
37 |   },
38 |   "placeholders": {
39 |     "username": "ユーザー名またはメールを入力",
40 |     "email": "メールアドレスを入力",
41 |     "password": "パスワードを入力",
42 |     "confirmPassword": "パスワードを確認",
43 |     "fullName": "フルネームを入力",
44 |     "phone": "電話番号を入力"
45 |   },
46 |   "validation": {
47 |     "usernameRequired": "ユーザー名は必須です",
48 |     "usernameMinLength": "ユーザー名は3文字以上である必要があります",
49 |     "usernameFormat": "ユーザー名には英数字とアンダースコアのみ使用できます",
50 |     "emailRequired": "メールは必須です",
51 |     "emailInvalid": "メール形式が無効です",
52 |     "passwordRequired": "パスワードは必須です",
53 |     "passwordMinLength": "パスワードは6文字以上である必要があります",
54 |     "passwordMismatch": "パスワードが一致しません",
55 |     "agreeTermsRequired": "利用規約に同意してください"
56 |   },
57 |   "errors": {
58 |     "loginFailed": "ログインに失敗しました。認証情報を確認してください",
59 |     "registerFailed": "登録に失敗しました。再試行してください",
60 |     "unauthorized": "このページにアクセスする権限がありません"
61 |   },
62 |   "footer": {
63 |     "copyright": "© 2024 PinData. All rights reserved."
64 |   }
65 | }


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/libraryDetails.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "backToList": "Back to List",
 3 |   "uploadFiles": "Upload Files",
 4 |   "totalFiles": "Total Files",
 5 |   "processed": "Processed",
 6 |   "processing": "Processing",
 7 |   "pending": "Pending",
 8 |   "mdFiles": "MD Files",
 9 |   "fileList": "File List",
10 |   "totalFilesCount": "{count} files in total",
11 |   "selectedCount": "{count} selected",
12 |   "convertToMD": "Convert to MD",
13 |   "batchDelete": "Batch Delete",
14 |   "refresh": "Refresh",
15 |   "loadingFiles": "Loading file list...",
16 |   "loadFailed": "Failed to load file list",
17 |   "retry": "Retry",
18 |   "noFiles": "No files",
19 |   "uploadTip": "Click the \"Upload Files\" button above to start adding files",
20 |   "fileName": "File Name",
21 |   "type": "Type",
22 |   "size": "Size",
23 |   "status": "Status",
24 |   "uploadTime": "Upload Time",
25 |   "actions": "Actions",
26 |   "selectAll": "Select All",
27 |   "selectFile": "Select File",
28 |   "convertToMDAction": "Convert to MD",
29 |   "viewDetails": "View Details",
30 |   "deleteFileAction": "Delete File",
31 |   "downloadFileAction": "Download File",
32 |   "uploadSuccess": "Successfully uploaded {count} files",
33 |   "deleteSuccess": "File \"{fileName}\" deleted successfully",
34 |   "deleteFailed": "Failed to delete file \"{fileName}\"",
35 |   "downloadStart": "Download started for file \"{fileName}\"",
36 |   "downloadFailed": "Failed to download file \"{fileName}\"",
37 |   "deleteConfirm": "Are you sure you want to delete file \"{fileName}\"?",
38 |   "batchDeleteConfirm": "⚠️ Warning: You are about to delete {count} files!\n\nFile list:\n{fileNames}\n\nThis operation cannot be undone. Are you sure you want to continue?",
39 |   "batchDeleteSuccess": "Successfully deleted {count} files",
40 |   "batchDeletePartial": "Deletion completed: {successCount} successful, {failCount} failed",
41 |   "convertSubmitted": "Submitted conversion task for {count} files",
42 |   "convertFailed": "Failed to submit conversion task",
43 |   "cancelSuccess": "Conversion task cancelled",
44 |   "cancelFailed": "Failed to cancel task"
45 | }


--------------------------------------------------------------------------------
/frontend/src/components/ui/tabs.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | import * as TabsPrimitive from "@radix-ui/react-tabs";
 3 | import { cn } from "../../lib/utils";
 4 | 
 5 | const Tabs = TabsPrimitive.Root;
 6 | 
 7 | const TabsList = React.forwardRef<
 8 |   React.ElementRef<typeof TabsPrimitive.List>,
 9 |   React.ComponentPropsWithoutRef<typeof TabsPrimitive.List>
10 | >(({ className, ...props }, ref) => (
11 |   <TabsPrimitive.List
12 |     ref={ref}
13 |     className={cn(
14 |       "inline-flex h-10 items-center justify-center rounded-md bg-muted p-1 text-muted-foreground",
15 |       className
16 |     )}
17 |     {...props}
18 |   />
19 | ));
20 | TabsList.displayName = TabsPrimitive.List.displayName;
21 | 
22 | const TabsTrigger = React.forwardRef<
23 |   React.ElementRef<typeof TabsPrimitive.Trigger>,
24 |   React.ComponentPropsWithoutRef<typeof TabsPrimitive.Trigger>
25 | >(({ className, ...props }, ref) => (
26 |   <TabsPrimitive.Trigger
27 |     ref={ref}
28 |     className={cn(
29 |       "inline-flex items-center justify-center whitespace-nowrap px-3 py-1.5 text-sm font-medium ring-offset-background transition-all focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2 disabled:pointer-events-none disabled:opacity-50 data-[state=active]:bg-background data-[state=active]:text-foreground data-[state=active]:shadow-sm",
30 |       className
31 |     )}
32 |     {...props}
33 |   />
34 | ));
35 | TabsTrigger.displayName = TabsPrimitive.Trigger.displayName;
36 | 
37 | const TabsContent = React.forwardRef<
38 |   React.ElementRef<typeof TabsPrimitive.Content>,
39 |   React.ComponentPropsWithoutRef<typeof TabsPrimitive.Content>
40 | >(({ className, ...props }, ref) => (
41 |   <TabsPrimitive.Content
42 |     ref={ref}
43 |     className={cn(
44 |       "mt-2 ring-offset-background focus-visible:outline-none focus-visible:ring-2 focus-visible:ring-ring focus-visible:ring-offset-2",
45 |       className
46 |     )}
47 |     {...props}
48 |   />
49 | ));
50 | TabsContent.displayName = TabsPrimitive.Content.displayName;
51 | 
52 | export { Tabs, TabsList, TabsTrigger, TabsContent };


--------------------------------------------------------------------------------
/cleanup.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | echo "🧹 PinData 进程清理工具"
 4 | echo "========================"
 5 | 
 6 | # 检查并停止占用8897端口的进程
 7 | echo ""
 8 | echo "🔍 检查8897端口占用情况..."
 9 | PORT_PROCESSES=$(lsof -i :8897 | grep LISTEN)
10 | 
11 | if [ -z "$PORT_PROCESSES" ]; then
12 |     echo "✅ 端口8897空闲"
13 | else
14 |     echo "⚠️  发现占用8897端口的进程："
15 |     echo "$PORT_PROCESSES"
16 |     
17 |     # 提取PID并停止进程
18 |     PIDS=$(lsof -t -i :8897)
19 |     if [ ! -z "$PIDS" ]; then
20 |         echo ""
21 |         echo "🛑 正在停止这些进程..."
22 |         for pid in $PIDS; do
23 |             echo "   停止进程 $pid"
24 |             kill -9 $pid 2>/dev/null
25 |         done
26 |         
27 |         sleep 2
28 |         
29 |         # 再次检查
30 |         if [ -z "$(lsof -t -i :8897)" ]; then
31 |             echo "✅ 端口8897已释放"
32 |         else
33 |             echo "❌ 某些进程可能仍在运行"
34 |         fi
35 |     fi
36 | fi
37 | 
38 | # 检查相关的Python进程
39 | echo ""
40 | echo "🔍 检查相关的Python进程..."
41 | PYTHON_PROCESSES=$(ps aux | grep -E "(run\.py|celery)" | grep -v grep)
42 | 
43 | if [ -z "$PYTHON_PROCESSES" ]; then
44 |     echo "✅ 没有发现相关的Python进程"
45 | else
46 |     echo "⚠️  发现相关进程："
47 |     echo "$PYTHON_PROCESSES"
48 |     
49 |     echo ""
50 |     read -p "是否要停止这些进程? (y/N): " -n 1 -r
51 |     echo
52 |     if [[ $REPLY =~ ^[Yy]$ ]]; then
53 |         # 停止run.py进程
54 |         pkill -f "run.py"
55 |         # 停止celery进程
56 |         pkill -f "celery"
57 |         echo "✅ 进程已停止"
58 |     fi
59 | fi
60 | 
61 | # 检查端口5173 (前端)
62 | echo ""
63 | echo "🔍 检查5173端口占用情况..."
64 | FRONTEND_PROCESSES=$(lsof -i :5173 | grep LISTEN)
65 | 
66 | if [ -z "$FRONTEND_PROCESSES" ]; then
67 |     echo "✅ 端口5173空闲"
68 | else
69 |     echo "⚠️  发现占用5173端口的进程："
70 |     echo "$FRONTEND_PROCESSES"
71 | fi
72 | 
73 | # 检查Redis连接
74 | echo ""
75 | echo "🔍 检查Redis连接..."
76 | if command -v redis-cli &> /dev/null; then
77 |     if redis-cli -p 16379 ping &> /dev/null; then
78 |         echo "✅ Redis (端口16379) 连接正常"
79 |     else
80 |         echo "❌ Redis (端口16379) 连接失败"
81 |     fi
82 | else
83 |     echo "⚠️  Redis CLI 未安装，无法测试连接"
84 | fi
85 | 
86 | echo ""
87 | echo "🎯 清理完成！现在可以运行 ./start.sh 启动服务" 


--------------------------------------------------------------------------------
/frontend/public/vector---0-3.svg:
--------------------------------------------------------------------------------
1 | <svg width="21" height="20" viewBox="0 0 21 20" fill="none" xmlns="http://www.w3.org/2000/svg">
2 | <path fill-rule="evenodd" clip-rule="evenodd" d="M20.4121 13.7058C20.1892 13.5652 19.9098 13.5485 19.6717 13.6615C18.8637 14.0437 17.8988 13.8187 17.3432 13.1186C16.7875 12.4184 16.7875 11.4277 17.3432 10.7276C17.8988 10.0274 18.8637 9.80247 19.6717 10.1846C19.91 10.2977 20.1897 10.2808 20.4127 10.1398C20.6357 9.99873 20.7708 9.75326 20.7707 9.48942V5.38462C20.7707 4.53495 20.0819 3.84615 19.2323 3.84615H15.7496C15.7639 3.71845 15.771 3.59004 15.7707 3.46154C15.7693 2.50604 15.3742 1.59336 14.6784 0.938461C13.7056 0.0247571 12.2941 -0.248251 11.0507 0.236788C9.80727 0.721828 8.95348 1.8785 8.85631 3.20962C8.84153 3.42177 8.84604 3.63482 8.86977 3.84615H5.38612C4.53645 3.84615 3.84766 4.53495 3.84766 5.38462V8.48269C3.71995 8.46838 3.59154 8.46132 3.46304 8.46154C2.50757 8.46308 1.59493 8.85818 0.939964 9.55385C0.280072 10.2509 -0.0578245 11.1924 0.00823314 12.15C0.1222 13.8743 1.48784 15.2519 3.21112 15.3808C3.42326 15.396 3.63635 15.3915 3.84766 15.3673V18.4615C3.84766 19.3112 4.53645 20 5.38612 20H19.2323C20.0819 20 20.7707 19.3112 20.7707 18.4615V14.3567C20.7709 14.0926 20.6355 13.8468 20.4121 13.7058ZM19.2323 18.4615H5.38612V14.3567C5.3862 14.0929 5.25107 13.8474 5.0281 13.7064C4.80513 13.5653 4.52543 13.5484 4.28708 13.6615C3.47904 14.0437 2.51423 13.8187 1.95857 13.1186C1.40291 12.4184 1.40291 11.4277 1.95857 10.7276C2.51423 10.0274 3.47904 9.80247 4.28708 10.1846C4.52543 10.2977 4.80513 10.2808 5.0281 10.1398C5.25107 9.99873 5.3862 9.75326 5.38612 9.48942V5.38462H9.87554C10.1394 5.3847 10.3848 5.24957 10.5259 5.0266C10.6669 4.80363 10.6839 4.52393 10.5707 4.28558C10.1886 3.47754 10.4136 2.51273 11.1137 1.95707C11.8138 1.40141 12.8045 1.40141 13.5047 1.95707C14.2048 2.51273 14.4298 3.47754 14.0477 4.28558C13.9345 4.52393 13.9515 4.80363 14.0925 5.0266C14.2335 5.24957 14.479 5.3847 14.7428 5.38462H19.2323V8.48365C19.021 8.45945 18.8079 8.45494 18.5957 8.47019C16.7348 8.60197 15.3131 10.1848 15.381 12.0491C15.4489 13.9135 16.9821 15.3886 18.8477 15.3846C18.9762 15.3848 19.1046 15.3778 19.2323 15.3635V18.4615Z" fill="#0D141C"/>
3 | </svg>
4 | 


--------------------------------------------------------------------------------
/backend/app/models/role.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, Text, DateTime, Enum as SQLEnum
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class RoleType(enum.Enum):
11 |     SYSTEM = "system"
12 |     CUSTOM = "custom"
13 | 
14 | 
15 | class RoleStatus(enum.Enum):
16 |     ACTIVE = "active"
17 |     INACTIVE = "inactive"
18 | 
19 | 
20 | class Role(db.Model):
21 |     __tablename__ = 'roles'
22 |     
23 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
24 |     name = Column(String(100), nullable=False)
25 |     code = Column(String(50), unique=True, nullable=False, index=True)
26 |     description = Column(Text)
27 |     type = Column(SQLEnum(RoleType), default=RoleType.CUSTOM, index=True)
28 |     status = Column(SQLEnum(RoleStatus), default=RoleStatus.ACTIVE, index=True)
29 |     created_at = Column(DateTime, default=datetime.utcnow)
30 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
31 |     created_by = Column(String(36))
32 |     updated_by = Column(String(36))
33 |     
34 |     # Relationships
35 |     user_roles = relationship("UserRole", back_populates="role", cascade="all, delete-orphan")
36 |     role_permissions = relationship("RolePermission", back_populates="role", cascade="all, delete-orphan")
37 |     
38 |     def to_dict(self, include_permissions=False):
39 |         result = {
40 |             'id': self.id,
41 |             'name': self.name,
42 |             'code': self.code,
43 |             'description': self.description,
44 |             'type': self.type.value,
45 |             'status': self.status.value,
46 |             'created_at': self.created_at.isoformat() if self.created_at else None,
47 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None
48 |         }
49 |         
50 |         if include_permissions:
51 |             result['permissions'] = [rp.permission.to_dict() for rp in self.role_permissions]
52 |         
53 |         return result
54 |     
55 |     def __repr__(self):
56 |         return f'<Role {self.name}>'


--------------------------------------------------------------------------------
/frontend/src/components/auth/ProtectedRoute.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { Navigate, useLocation } from 'react-router-dom';
 3 | import { useAuthStore } from '../../store/authStore';
 4 | 
 5 | interface ProtectedRouteProps {
 6 |   children: React.ReactNode;
 7 |   requireAuth?: boolean;
 8 |   requiredPermissions?: string[];
 9 |   requireAllPermissions?: boolean;
10 |   fallbackPath?: string;
11 | }
12 | 
13 | export const ProtectedRoute: React.FC<ProtectedRouteProps> = ({
14 |   children,
15 |   requireAuth = true,
16 |   requiredPermissions = [],
17 |   requireAllPermissions = false,
18 |   fallbackPath = '/auth/login'
19 | }) => {
20 |   const location = useLocation();
21 |   const { isAuthenticated, hasPermission, hasAnyPermission, hasAllPermissions } = useAuthStore();
22 | 
23 |   // 如果需要认证但用户未登录
24 |   if (requireAuth && !isAuthenticated) {
25 |     return <Navigate to={fallbackPath} state={{ from: location }} replace />;
26 |   }
27 | 
28 |   // 如果不需要认证但用户已登录，重定向到主页（用于登录/注册页面）
29 |   if (!requireAuth && isAuthenticated) {
30 |     return <Navigate to="/" replace />;
31 |   }
32 | 
33 |   // 权限检查
34 |   if (requiredPermissions.length > 0 && isAuthenticated) {
35 |     const hasRequiredPermissions = requireAllPermissions
36 |       ? hasAllPermissions(requiredPermissions)
37 |       : hasAnyPermission(requiredPermissions);
38 | 
39 |     if (!hasRequiredPermissions) {
40 |       return <Navigate to="/unauthorized" replace />;
41 |     }
42 |   }
43 | 
44 |   return <>{children}</>;
45 | };
46 | 
47 | // 权限检查组件
48 | interface PermissionGuardProps {
49 |   children: React.ReactNode;
50 |   permissions: string[];
51 |   requireAll?: boolean;
52 |   fallback?: React.ReactNode;
53 | }
54 | 
55 | export const PermissionGuard: React.FC<PermissionGuardProps> = ({
56 |   children,
57 |   permissions,
58 |   requireAll = false,
59 |   fallback = null
60 | }) => {
61 |   const { hasAnyPermission, hasAllPermissions } = useAuthStore();
62 | 
63 |   const hasPermission = requireAll
64 |     ? hasAllPermissions(permissions)
65 |     : hasAnyPermission(permissions);
66 | 
67 |   if (!hasPermission) {
68 |     return <>{fallback}</>;
69 |   }
70 | 
71 |   return <>{children}</>;
72 | };


--------------------------------------------------------------------------------
/frontend/src/components/ui/button.tsx:
--------------------------------------------------------------------------------
 1 | import { Slot } from "@radix-ui/react-slot";
 2 | import { type VariantProps, cva } from "class-variance-authority";
 3 | import * as React from "react";
 4 | 
 5 | import { cn } from "../../lib/utils";
 6 | 
 7 | const buttonVariants = cva(
 8 |   "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0",
 9 |   {
10 |     variants: {
11 |       variant: {
12 |         default:
13 |           "bg-primary text-primary-foreground shadow hover:bg-primary/90",
14 |         destructive:
15 |           "bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90",
16 |         outline:
17 |           "border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground",
18 |         secondary:
19 |           "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80",
20 |         ghost: "hover:bg-accent hover:text-accent-foreground",
21 |         link: "text-primary underline-offset-4 hover:underline",
22 |       },
23 |       size: {
24 |         default: "h-9 px-4 py-2",
25 |         sm: "h-8 rounded-md px-3 text-xs",
26 |         lg: "h-10 rounded-md px-8",
27 |         icon: "h-9 w-9",
28 |       },
29 |     },
30 |     defaultVariants: {
31 |       variant: "default",
32 |       size: "default",
33 |     },
34 |   },
35 | );
36 | 
37 | export interface ButtonProps
38 |   extends React.ButtonHTMLAttributes<HTMLButtonElement>,
39 |     VariantProps<typeof buttonVariants> {
40 |   asChild?: boolean;
41 | }
42 | 
43 | const Button = React.forwardRef<HTMLButtonElement, ButtonProps>(
44 |   ({ className, variant, size, asChild = false, ...props }, ref) => {
45 |     const Comp = asChild ? Slot : "button";
46 |     return (
47 |       <Comp
48 |         className={cn(buttonVariants({ variant, size, className }))}
49 |         ref={ref}
50 |         {...props}
51 |       />
52 |     );
53 |   },
54 | );
55 | Button.displayName = "Button";
56 | 
57 | export { Button, buttonVariants };
58 | 


--------------------------------------------------------------------------------
/frontend/src/services/overview.service.ts:
--------------------------------------------------------------------------------
 1 | import { apiClient } from '../lib/api-client';
 2 | 
 3 | // 概览统计数据类型
 4 | export interface OverviewStats {
 5 |   datasets: {
 6 |     total: number;
 7 |   };
 8 |   tasks: {
 9 |     total: number;
10 |   };
11 |   plugins: {
12 |     total: number;
13 |     status: string;
14 |   };
15 |   storage: {
16 |     used: number;
17 |     total: number;
18 |     used_gb: number;
19 |     total_gb: number;
20 |   };
21 |   raw_data: {
22 |     total: number;
23 |   };
24 | }
25 | 
26 | // 活动项类型
27 | export interface Activity {
28 |   id: string | number;
29 |   title: string;
30 |   time: string;
31 |   type: string;
32 |   icon: string;
33 | }
34 | 
35 | // 通知项类型
36 | export interface Notification {
37 |   id: string | number;
38 |   title: string;
39 |   time: string;
40 |   type: string;
41 |   icon: string;
42 | }
43 | 
44 | // 最近活动响应类型
45 | export interface RecentActivitiesResponse {
46 |   activities: Activity[];
47 | }
48 | 
49 | // 通知响应类型
50 | export interface NotificationsResponse {
51 |   notifications: Notification[];
52 | }
53 | 
54 | class OverviewService {
55 |   /**
56 |    * 获取系统统计信息
57 |    */
58 |   async getStats(): Promise<OverviewStats> {
59 |     return await apiClient.get<OverviewStats>('/api/v1/overview/stats');
60 |   }
61 | 
62 |   /**
63 |    * 获取最近活动
64 |    */
65 |   async getRecentActivities(): Promise<RecentActivitiesResponse> {
66 |     return await apiClient.get<RecentActivitiesResponse>('/api/v1/overview/recent-activities');
67 |   }
68 | 
69 |   /**
70 |    * 获取系统通知
71 |    */
72 |   async getNotifications(): Promise<NotificationsResponse> {
73 |     return await apiClient.get<NotificationsResponse>('/api/v1/overview/notifications');
74 |   }
75 | 
76 |   /**
77 |    * 获取所有概览数据
78 |    */
79 |   async getAllOverviewData() {
80 |     const [stats, activities, notifications] = await Promise.all([
81 |       this.getStats(),
82 |       this.getRecentActivities(),
83 |       this.getNotifications()
84 |     ]);
85 | 
86 |     return {
87 |       stats,
88 |       activities: activities.activities,
89 |       notifications: notifications.notifications
90 |     };
91 |   }
92 | }
93 | 
94 | export const overviewService = new OverviewService(); 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/overview.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "welcome": "Welcome to pindata",
 3 |   "welcomeDescription": "Get started by exploring your datasets, managing tasks, and monitoring storage usage.",
 4 |   "systemOverview": "System Overview",
 5 |   "recentActivity": "Recent Activity",
 6 |   "quickActions": "Quick Actions",
 7 |   "systemNotifications": "System Notifications",
 8 |   "comingSoon": "Coming Soon",
 9 |   "pluginsDevelopment": "Plugin support is currently under development",
10 |   "features": {
11 |     "dataImport": {
12 |       "title": "Data Import and Processing",
13 |       "description": "Support for importing raw data in multiple formats including CSV, JSON, Excel, etc. Intelligent data preprocessing, automatic data type recognition, and generation of high-quality training datasets."
14 |     },
15 |     "datasetManagement": {
16 |       "title": "Multi-platform Dataset Management",
17 |       "description": "Seamless integration with mainstream platforms like Hugging Face Hub and ModelScope. One-click import of popular open-source datasets with version management and update synchronization."
18 |     },
19 |     "taskScheduling": {
20 |       "title": "Smart Task Scheduling",
21 |       "description": "Automated data processing pipeline supporting batch operations and scheduled tasks. Real-time task status monitoring with detailed execution logs and performance metrics."
22 |     },
23 |     "storageSharing": {
24 |       "title": "Storage and Sharing",
25 |       "description": "Cloud storage supporting secure storage and backup of large-scale datasets. Team collaboration features with dataset sharing and permission management."
26 |     }
27 |   },
28 |   "quickStart": {
29 |     "title": "Quick Start Guide",
30 |     "step1": {
31 |       "title": "Import Data",
32 |       "description": "Local files or HF, ModelScope platforms"
33 |     },
34 |     "step2": {
35 |       "title": "Process Data",
36 |       "description": "Cleaning, transformation and quality check"
37 |     },
38 |     "step3": {
39 |       "title": "Generate Dataset",
40 |       "description": "Train dataset and export"
41 |     }
42 |   },
43 |   "error": {
44 |     "loadingOverview": "Error loading overview data: {{error}}"
45 |   }
46 | }


--------------------------------------------------------------------------------
/frontend/src/components/ui/card.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | 
 3 | import { cn } from "../../lib/utils";
 4 | 
 5 | const Card = React.forwardRef<
 6 |   HTMLDivElement,
 7 |   React.HTMLAttributes<HTMLDivElement>
 8 | >(({ className, ...props }, ref) => (
 9 |   <div
10 |     ref={ref}
11 |     className={cn(
12 |       "rounded-xl border bg-card text-card-foreground shadow",
13 |       className,
14 |     )}
15 |     {...props}
16 |   />
17 | ));
18 | Card.displayName = "Card";
19 | 
20 | const CardHeader = React.forwardRef<
21 |   HTMLDivElement,
22 |   React.HTMLAttributes<HTMLDivElement>
23 | >(({ className, ...props }, ref) => (
24 |   <div
25 |     ref={ref}
26 |     className={cn("flex flex-col space-y-1.5 p-6", className)}
27 |     {...props}
28 |   />
29 | ));
30 | CardHeader.displayName = "CardHeader";
31 | 
32 | const CardTitle = React.forwardRef<
33 |   HTMLDivElement,
34 |   React.HTMLAttributes<HTMLDivElement>
35 | >(({ className, ...props }, ref) => (
36 |   <div
37 |     ref={ref}
38 |     className={cn("font-semibold leading-none tracking-tight", className)}
39 |     {...props}
40 |   />
41 | ));
42 | CardTitle.displayName = "CardTitle";
43 | 
44 | const CardDescription = React.forwardRef<
45 |   HTMLDivElement,
46 |   React.HTMLAttributes<HTMLDivElement>
47 | >(({ className, ...props }, ref) => (
48 |   <div
49 |     ref={ref}
50 |     className={cn("text-sm text-muted-foreground", className)}
51 |     {...props}
52 |   />
53 | ));
54 | CardDescription.displayName = "CardDescription";
55 | 
56 | const CardContent = React.forwardRef<
57 |   HTMLDivElement,
58 |   React.HTMLAttributes<HTMLDivElement>
59 | >(({ className, ...props }, ref) => (
60 |   <div ref={ref} className={cn("p-6 pt-0", className)} {...props} />
61 | ));
62 | CardContent.displayName = "CardContent";
63 | 
64 | const CardFooter = React.forwardRef<
65 |   HTMLDivElement,
66 |   React.HTMLAttributes<HTMLDivElement>
67 | >(({ className, ...props }, ref) => (
68 |   <div
69 |     ref={ref}
70 |     className={cn("flex items-center p-6 pt-0", className)}
71 |     {...props}
72 |   />
73 | ));
74 | CardFooter.displayName = "CardFooter";
75 | 
76 | export {
77 |   Card,
78 |   CardHeader,
79 |   CardFooter,
80 |   CardTitle,
81 |   CardDescription,
82 |   CardContent,
83 | };
84 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useFileDetails.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useEffect } from 'react';
 2 | import { fileService } from '../services/file.service';
 3 | 
 4 | interface FileData {
 5 |   id: string;
 6 |   filename: string;
 7 |   original_filename?: string;
 8 |   file_type: string;
 9 |   file_category: string;
10 |   file_category_display: string;
11 |   file_size?: number;
12 |   processing_status: string;
13 |   processing_progress?: number;
14 |   processing_error?: string;
15 |   content_quality_score?: number;
16 |   extraction_confidence?: number;
17 |   upload_at?: string;
18 |   processed_at?: string;
19 |   image_width?: number;
20 |   image_height?: number;
21 |   color_mode?: string;
22 |   video_width?: number;
23 |   video_height?: number;
24 |   duration?: number;
25 |   frame_rate?: string;
26 |   video_codec?: string;
27 |   audio_codec?: string;
28 |   mime_type?: string;
29 |   encoding?: string;
30 |   checksum?: string;
31 |   minio_object_name: string;
32 |   thumbnail_path?: string;
33 |   preview_content?: string;
34 |   extracted_text?: string;
35 |   file_metadata?: any;
36 |   extraction_metadata?: any;
37 |   is_supported_preview: boolean;
38 |   preview_type: string;
39 |   page_count?: number;
40 |   word_count?: number;
41 | }
42 | 
43 | export const useFileDetails = (libraryId: string, fileId: string) => {
44 |   const [fileData, setFileData] = useState<FileData | null>(null);
45 |   const [loading, setLoading] = useState(true);
46 |   const [error, setError] = useState<string | null>(null);
47 | 
48 |   const fetchFileDetails = async () => {
49 |     if (!libraryId || !fileId) return;
50 |     
51 |     setLoading(true);
52 |     setError(null);
53 |     
54 |     try {
55 |       const response = await fileService.getFileDetails(libraryId, fileId);
56 |       setFileData(response.data);
57 |     } catch (err: any) {
58 |       console.error('获取文件详情失败:', err);
59 |       setError(err.message || '获取文件详情失败');
60 |     } finally {
61 |       setLoading(false);
62 |     }
63 |   };
64 | 
65 |   const refreshFile = () => {
66 |     fetchFileDetails();
67 |   };
68 | 
69 |   useEffect(() => {
70 |     fetchFileDetails();
71 |   }, [libraryId, fileId]);
72 | 
73 |   return {
74 |     fileData,
75 |     loading,
76 |     error,
77 |     refreshFile
78 |   };
79 | };


--------------------------------------------------------------------------------
/frontend/src/screens/Settings/README.md:
--------------------------------------------------------------------------------
  1 | # Settings 组件重构说明
  2 | 
  3 | ## 概述
  4 | 
  5 | 本次重构将原来的单一大型Settings组件拆分为更模块化的结构，提高了代码的可维护性和复用性。
  6 | 
  7 | ## 文件结构
  8 | 
  9 | ```
 10 | frontend/src/screens/Settings/
 11 | ├── Settings.tsx              # 主设置页面组件
 12 | ├── components/
 13 | │   ├── index.ts              # 组件统一导出
 14 | │   ├── LLMConfig.tsx         # 大模型配置子组件
 15 | │   └── SystemLogs.tsx        # 系统日志子组件
 16 | └── README.md                 # 本说明文档
 17 | ```
 18 | 
 19 | ## 重构内容
 20 | 
 21 | ### 1. 组件拆分
 22 | 
 23 | - **LLMConfigComponent**: 负责大模型配置的所有功能
 24 |   - 添加新配置
 25 |   - 编辑现有配置
 26 |   - 删除配置（带确认对话框）
 27 |   - 设置默认配置
 28 |   - 测试配置连接
 29 |   - 启用/禁用配置
 30 | 
 31 | - **SystemLogs**: 负责系统日志管理
 32 |   - 日志搜索和过滤
 33 |   - 日志导出
 34 |   - 日志清理
 35 |   - 实时日志显示
 36 | 
 37 | ### 2. 功能完善
 38 | 
 39 | #### 大模型配置功能增强：
 40 | - ✅ 完整的编辑功能，支持修改所有配置参数
 41 | - ✅ 删除确认对话框，防止误删除
 42 | - ✅ 优化的用户体验，包括加载状态和错误处理
 43 | - ✅ 支持测试配置连接，实时反馈连接状态
 44 | 
 45 | #### 系统日志功能增强：
 46 | - ✅ 优化的日志搜索和过滤
 47 | - ✅ 日志统计信息显示
 48 | - ✅ 日志清理功能
 49 | - ✅ 日志导出功能
 50 | 
 51 | ### 3. 技术改进
 52 | 
 53 | - **类型安全**: 使用严格的TypeScript类型定义
 54 | - **错误处理**: 完善的错误捕获和用户反馈
 55 | - **代码复用**: 抽取公共逻辑到自定义Hook
 56 | - **性能优化**: 使用useCallback优化函数缓存
 57 | 
 58 | ## 使用方法
 59 | 
 60 | ```tsx
 61 | import { Settings } from './screens/Settings/Settings';
 62 | 
 63 | // 在路由中使用
 64 | <Route path="/settings" element={<Settings />} />
 65 | ```
 66 | 
 67 | ## 子组件单独使用
 68 | 
 69 | ```tsx
 70 | import { LLMConfigComponent, SystemLogs } from './screens/Settings/components';
 71 | 
 72 | // 单独使用大模型配置组件
 73 | <LLMConfigComponent />
 74 | 
 75 | // 单独使用系统日志组件
 76 | <SystemLogs />
 77 | ```
 78 | 
 79 | ## API 对接
 80 | 
 81 | ### 大模型配置相关接口
 82 | - `GET /api/v1/llm/configs` - 获取配置列表
 83 | - `POST /api/v1/llm/configs` - 创建新配置
 84 | - `PUT /api/v1/llm/configs/{id}` - 更新配置
 85 | - `DELETE /api/v1/llm/configs/{id}` - 删除配置
 86 | - `POST /api/v1/llm/configs/set-default` - 设置默认配置
 87 | - `POST /api/v1/llm/configs/{id}/test` - 测试配置
 88 | 
 89 | ### 系统日志相关接口
 90 | - `GET /api/v1/system/logs` - 获取日志列表
 91 | - `GET /api/v1/system/logs/stats` - 获取日志统计
 92 | - `POST /api/v1/system/logs/cleanup` - 清理旧日志
 93 | - `POST /api/v1/system/logs/export` - 导出日志
 94 | 
 95 | ## 注意事项
 96 | 
 97 | 1. 所有组件都支持国际化（i18n）
 98 | 2. 使用统一的UI组件库（shadcn/ui）
 99 | 3. 遵循项目的设计规范和色彩主题
100 | 4. 包含完整的错误处理和用户反馈
101 | 5. 支持移动端响应式设计 


--------------------------------------------------------------------------------
/docker/start.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # PinData 快速启动脚本
 4 | 
 5 | set -e
 6 | 
 7 | # 颜色输出
 8 | RED='\033[0;31m'
 9 | GREEN='\033[0;32m'
10 | YELLOW='\033[1;33m'
11 | BLUE='\033[0;34m'
12 | NC='\033[0m' # No Color
13 | 
14 | echo -e "${BLUE}"
15 | echo "=================================================="
16 | echo "  🚀 PinData Docker 快速启动脚本"
17 | echo "=================================================="
18 | echo -e "${NC}"
19 | 
20 | # 检查 Docker 是否安装
21 | if ! command -v docker &> /dev/null; then
22 |     echo -e "${RED}❌ Docker 未安装，请先安装 Docker${NC}"
23 |     exit 1
24 | fi
25 | 
26 | # 检查 Docker Compose 是否安装
27 | if ! command -v docker compose &> /dev/null; then
28 |     echo -e "${RED}❌ Docker Compose 未安装，请先安装 Docker Compose${NC}"
29 |     exit 1
30 | fi
31 | 
32 | echo -e "${GREEN}✅ Docker 环境检查通过${NC}"
33 | 
34 | # 创建必要的目录
35 | mkdir -p postgres redis
36 | 
37 | # 停止现有服务（如果存在）
38 | echo -e "${YELLOW}🛑 停止现有服务...${NC}"
39 | docker compose down 2>/dev/null || true
40 | 
41 | # 拉取最新镜像
42 | echo -e "${YELLOW}📥 拉取基础镜像...${NC}"
43 | docker compose pull db redis minio
44 | 
45 | # 构建 PinData 镜像
46 | echo -e "${YELLOW}🔨 构建 PinData 镜像...${NC}"
47 | docker compose build pindata-api pindata-frontend
48 | 
49 | # 启动所有服务
50 | echo -e "${YELLOW}🚀 启动所有服务...${NC}"
51 | docker compose up -d
52 | 
53 | # 等待服务启动
54 | echo -e "${YELLOW}⏳ 等待服务启动...${NC}"
55 | sleep 10
56 | 
57 | # 检查服务状态
58 | echo -e "${GREEN}📊 检查服务状态:${NC}"
59 | docker compose ps
60 | 
61 | # 显示访问信息
62 | echo -e "${BLUE}"
63 | echo "=================================================="
64 | echo "  🎉 PinData 服务启动完成！"
65 | echo "=================================================="
66 | echo -e "${NC}"
67 | 
68 | echo -e "${GREEN}📱 服务访问地址:${NC}"
69 | echo "  • PinData 前端:    http://localhost:3000"
70 | echo "  • PinData API:     http://localhost:8897"
71 | echo "  • MinIO 控制台:    http://localhost:9001"
72 | echo "  • PostgreSQL:      localhost:5432"
73 | echo "  • Redis:           localhost:6379"
74 | 
75 | echo -e "${GREEN}🔑 默认凭据:${NC}"
76 | echo "  • MinIO 用户名:    minioadmin"
77 | echo "  • MinIO 密码:      minioadmin"
78 | echo "  • PostgreSQL 用户: postgres"
79 | echo "  • PostgreSQL 密码: password"
80 | 
81 | echo -e "${YELLOW}💡 常用命令:${NC}"
82 | echo "  • 查看日志:        docker compose logs -f"
83 | echo "  • 停止服务:        docker compose down"
84 | echo "  • 重启服务:        docker compose restart"
85 | 
86 | echo -e "${GREEN}✨ 部署完成！请检查上述服务是否正常运行。${NC}" 


--------------------------------------------------------------------------------
/frontend/src/screens/StitchDesign/sections/OverviewSection/OverviewSection.tsx:
--------------------------------------------------------------------------------
 1 | import {
 2 |   DatabaseIcon,
 3 |   HardDriveIcon,
 4 |   LayoutDashboardIcon,
 5 |   ListTodoIcon,
 6 |   PuzzleIcon,
 7 |   SettingsIcon,
 8 | } from "lucide-react";
 9 | import React from "react";
10 | import { useTranslation } from 'react-i18next';
11 | import { Button } from "../../../../components/ui/button";
12 | 
13 | interface OverviewSectionProps {
14 |   isCollapsed?: boolean;
15 | }
16 | 
17 | export const OverviewSection = ({ isCollapsed = false }: OverviewSectionProps): JSX.Element => {
18 |   const { t } = useTranslation();
19 | 
20 |   const navigationItems = [
21 |     {
22 |       icon: <LayoutDashboardIcon size={24} />,
23 |       label: t('navigation.overview'),
24 |       active: true,
25 |     },
26 |     { icon: <DatabaseIcon size={24} />, label: t('navigation.datasets'), active: false },
27 |     { icon: <ListTodoIcon size={24} />, label: t('navigation.tasks'), active: false },
28 |     { icon: <HardDriveIcon size={24} />, label: t('navigation.rawData'), active: false },
29 |     { icon: <PuzzleIcon size={24} />, label: t('navigation.plugins'), active: false },
30 |     { icon: <SettingsIcon size={24} />, label: t('navigation.settings'), active: false },
31 |   ];
32 | 
33 |   return (
34 |     <nav className="flex flex-col h-full bg-[#f7f9fc] p-4">
35 |       <div className="flex flex-col gap-4 w-full">
36 |         <div className="w-full">
37 |           <h2 className={`font-medium text-base text-[#0c141c] leading-6 ${isCollapsed ? "hidden" : ""}`}>
38 |             {t('appName')}
39 |           </h2>
40 |         </div>
41 | 
42 |         <div className="flex flex-col gap-2 w-full">
43 |           {navigationItems.map((item, index) => (
44 |             <Button
45 |               key={index}
46 |               variant={item.active ? "secondary" : "ghost"}
47 |               className={`flex justify-start gap-3 px-3 py-2 h-auto w-full ${
48 |                 item.active ? "bg-[#e8edf2]" : ""
49 |               }`}
50 |             >
51 |               <span className="w-6">{item.icon}</span>
52 |               {!isCollapsed && (
53 |                 <span className="font-medium text-sm text-[#0c141c] leading-[21px]">
54 |                   {item.label}
55 |                 </span>
56 |               )}
57 |             </Button>
58 |           ))}
59 |         </div>
60 |       </div>
61 |     </nav>
62 |   );
63 | };


--------------------------------------------------------------------------------
/frontend/src/types/library.ts:
--------------------------------------------------------------------------------
 1 | import { QueryParams } from './api';
 2 | 
 3 | // 数据类型枚举
 4 | export type DataType = 'training' | 'evaluation' | 'mixed';
 5 | 
 6 | // 处理状态枚举
 7 | export type ProcessStatus = 'pending' | 'processing' | 'completed' | 'failed';
 8 | 
 9 | // 文件库接口
10 | export interface Library {
11 |   id: string;
12 |   name: string;
13 |   description?: string;
14 |   data_type: DataType;
15 |   tags: string[];
16 |   file_count: number;
17 |   total_size: string;
18 |   processed_count: number;
19 |   processing_count: number;
20 |   pending_count: number;
21 |   md_count: number;
22 |   created_at: string;
23 |   updated_at: string;
24 |   last_updated: string;
25 | }
26 | 
27 | // 文件库详情（包含文件列表）
28 | export interface LibraryDetail extends Library {
29 |   files: LibraryFile[];
30 | }
31 | 
32 | // 文件库中的文件
33 | export interface LibraryFile {
34 |   id: string;
35 |   filename: string;
36 |   original_filename: string;
37 |   file_type: string;
38 |   file_size: number;
39 |   file_size_human: string;
40 |   minio_object_name: string;
41 |   minio_bucket: string;
42 |   process_status: ProcessStatus;
43 |   process_status_label: string;
44 |   converted_format?: string;
45 |   converted_object_name?: string;
46 |   converted_file_size?: number;
47 |   conversion_method?: string;
48 |   conversion_error?: string;
49 |   page_count?: number;
50 |   word_count?: number;
51 |   language?: string;
52 |   library_id: string;
53 |   uploaded_at: string;
54 |   processed_at?: string;
55 |   created_at: string;
56 |   updated_at: string;
57 | }
58 | 
59 | // 创建文件库的请求数据
60 | export interface CreateLibraryRequest {
61 |   name: string;
62 |   description?: string;
63 |   data_type: DataType;
64 |   tags?: string[];
65 | }
66 | 
67 | // 更新文件库的请求数据
68 | export interface UpdateLibraryRequest {
69 |   name?: string;
70 |   description?: string;
71 |   data_type?: DataType;
72 |   tags?: string[];
73 | }
74 | 
75 | // 文件库查询参数
76 | export interface LibraryQueryParams extends QueryParams {
77 |   name?: string;
78 |   data_type?: DataType;
79 |   tags?: string[];
80 | }
81 | 
82 | // 文件查询参数
83 | export interface LibraryFileQueryParams extends QueryParams {
84 |   filename?: string;
85 |   file_type?: string;
86 |   process_status?: ProcessStatus;
87 | }
88 | 
89 | // 统计信息
90 | export interface LibraryStatistics {
91 |   total_libraries: number;
92 |   total_files: number;
93 |   total_processed: number;
94 |   total_size: string;
95 |   conversion_rate: number;
96 | } 


--------------------------------------------------------------------------------
/frontend/src/examples/DataPreviewExample.tsx:
--------------------------------------------------------------------------------
 1 | import React from 'react';
 2 | import { DataPreviewContainer } from '../components/DataPreview';
 3 | 
 4 | /**
 5 |  * DataPreview 使用示例
 6 |  * 
 7 |  * 这个示例展示了如何使用 DataPreviewContainer 组件来预览数据集
 8 |  * 并实现版本切换功能
 9 |  */
10 | 
11 | const DataPreviewExample: React.FC = () => {
12 |   // 示例数据集ID - 在实际使用中这应该从props或路由参数获取
13 |   const datasetId = 1;
14 | 
15 |   const handleError = (error: Error) => {
16 |     // 在实际应用中，你可能会显示一个toast通知或者错误对话框
17 |     console.error('数据预览错误:', error);
18 |     // 例如：showErrorToast(error.message);
19 |   };
20 | 
21 |   return (
22 |     <div className="container mx-auto py-6 space-y-6">
23 |       <div className="border-b pb-4">
24 |         <h1 className="text-2xl font-bold">数据集预览示例</h1>
25 |         <p className="text-gray-600 mt-2">
26 |           展示数据集预览组件的版本切换和文件管理功能
27 |         </p>
28 |       </div>
29 | 
30 |       {/* 使用容器组件 - 最简单的方式 */}
31 |       <div className="space-y-4">
32 |         <h2 className="text-xl font-semibold">方式一：使用容器组件（推荐）</h2>
33 |         <DataPreviewContainer
34 |           datasetId={datasetId}
35 |           onError={handleError}
36 |         />
37 |       </div>
38 | 
39 |       {/* 说明文档 */}
40 |       <div className="bg-gray-50 p-6 rounded-lg">
41 |         <h3 className="text-lg font-semibold mb-3">功能说明</h3>
42 |         <div className="space-y-2 text-sm">
43 |           <div className="flex items-start gap-2">
44 |             <span className="text-blue-600">🔄</span>
45 |             <div>
46 |               <strong>版本切换:</strong> 点击版本按钮可以查看和切换不同版本，支持显示版本状态和详细信息
47 |             </div>
48 |           </div>
49 |           <div className="flex items-start gap-2">
50 |             <span className="text-green-600">📁</span>
51 |             <div>
52 |               <strong>文件管理:</strong> 支持文件预览、批量选择、上传和删除操作
53 |             </div>
54 |           </div>
55 |           <div className="flex items-start gap-2">
56 |             <span className="text-purple-600">🔍</span>
57 |             <div>
58 |               <strong>过滤筛选:</strong> 可以按文件类型过滤，支持全选/取消全选
59 |             </div>
60 |           </div>
61 |           <div className="flex items-start gap-2">
62 |             <span className="text-orange-600">📊</span>
63 |             <div>
64 |               <strong>数据预览:</strong> 支持表格、JSON、文本、图像等多种文件类型的预览
65 |             </div>
66 |           </div>
67 |         </div>
68 |       </div>
69 |     </div>
70 |   );
71 | };
72 | 
73 | export default DataPreviewExample; 


--------------------------------------------------------------------------------
/frontend/src/components/ui/table.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react";
 2 | import { cn } from "../../lib/utils";
 3 | 
 4 | const Table = React.forwardRef<
 5 |   HTMLTableElement,
 6 |   React.HTMLAttributes<HTMLTableElement>
 7 | >(({ className, ...props }, ref) => (
 8 |   <div className="relative w-full overflow-auto">
 9 |     <table
10 |       ref={ref}
11 |       className={cn("w-full caption-bottom text-sm", className)}
12 |       {...props}
13 |     />
14 |   </div>
15 | ));
16 | Table.displayName = "Table";
17 | 
18 | const TableHeader = React.forwardRef<
19 |   HTMLTableSectionElement,
20 |   React.HTMLAttributes<HTMLTableSectionElement>
21 | >(({ className, ...props }, ref) => (
22 |   <thead ref={ref} className={cn("[&_tr]:border-b", className)} {...props} />
23 | ));
24 | TableHeader.displayName = "TableHeader";
25 | 
26 | const TableBody = React.forwardRef<
27 |   HTMLTableSectionElement,
28 |   React.HTMLAttributes<HTMLTableSectionElement>
29 | >(({ className, ...props }, ref) => (
30 |   <tbody
31 |     ref={ref}
32 |     className={cn("[&_tr:last-child]:border-0", className)}
33 |     {...props}
34 |   />
35 | ));
36 | TableBody.displayName = "TableBody";
37 | 
38 | const TableRow = React.forwardRef<
39 |   HTMLTableRowElement,
40 |   React.HTMLAttributes<HTMLTableRowElement>
41 | >(({ className, ...props }, ref) => (
42 |   <tr
43 |     ref={ref}
44 |     className={cn(
45 |       "border-b transition-colors hover:bg-muted/50 data-[state=selected]:bg-muted",
46 |       className
47 |     )}
48 |     {...props}
49 |   />
50 | ));
51 | TableRow.displayName = "TableRow";
52 | 
53 | const TableHead = React.forwardRef<
54 |   HTMLTableCellElement,
55 |   React.ThHTMLAttributes<HTMLTableCellElement>
56 | >(({ className, ...props }, ref) => (
57 |   <th
58 |     ref={ref}
59 |     className={cn(
60 |       "h-12 px-4 text-left align-middle font-medium text-muted-foreground [&:has([role=checkbox])]:pr-0",
61 |       className
62 |     )}
63 |     {...props}
64 |   />
65 | ));
66 | TableHead.displayName = "TableHead";
67 | 
68 | const TableCell = React.forwardRef<
69 |   HTMLTableCellElement,
70 |   React.TdHTMLAttributes<HTMLTableCellElement>
71 | >(({ className, ...props }, ref) => (
72 |   <td
73 |     ref={ref}
74 |     className={cn("px-4 align-middle [&:has([role=checkbox])]:pr-0", className)}
75 |     {...props}
76 |   />
77 | ));
78 | TableCell.displayName = "TableCell";
79 | 
80 | export {
81 |   Table,
82 |   TableHeader,
83 |   TableBody,
84 |   TableRow,
85 |   TableHead,
86 |   TableCell,
87 | };


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/en/auth.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "login": {
 3 |     "title": "Login to PinData",
 4 |     "subtitle": "Data Governance Management Platform",
 5 |     "rememberMe": "Remember me",
 6 |     "forgotPassword": "Forgot password?",
 7 |     "submit": "Sign In",
 8 |     "loggingIn": "Signing in...",
 9 |     "noAccount": "Don't have an account?",
10 |     "registerLink": "Sign up now"
11 |   },
12 |   "register": {
13 |     "title": "Sign up for PinData",
14 |     "subtitle": "Create your data governance account",
15 |     "agreeTerms": "I agree to the",
16 |     "termsOfService": "Terms of Service",
17 |     "and": "and",
18 |     "privacyPolicy": "Privacy Policy",
19 |     "submit": "Sign Up",
20 |     "registering": "Creating account...",
21 |     "hasAccount": "Already have an account?",
22 |     "loginLink": "Sign in",
23 |     "success": "Registration successful, please sign in",
24 |     "firstUserSuccess": "Welcome! You are the first user and have been granted administrator privileges"
25 |   },
26 |   "logout": {
27 |     "button": "Sign Out",
28 |     "loggingOut": "Signing out..."
29 |   },
30 |   "fields": {
31 |     "username": "Username",
32 |     "email": "Email",
33 |     "password": "Password",
34 |     "confirmPassword": "Confirm Password",
35 |     "fullName": "Full Name",
36 |     "phone": "Phone"
37 |   },
38 |   "placeholders": {
39 |     "username": "Enter username or email",
40 |     "email": "Enter email address",
41 |     "password": "Enter password",
42 |     "confirmPassword": "Confirm your password",
43 |     "fullName": "Enter your full name",
44 |     "phone": "Enter phone number"
45 |   },
46 |   "validation": {
47 |     "usernameRequired": "Username is required",
48 |     "usernameMinLength": "Username must be at least 3 characters",
49 |     "usernameFormat": "Username can only contain letters, numbers, and underscores",
50 |     "emailRequired": "Email is required",
51 |     "emailInvalid": "Invalid email format",
52 |     "passwordRequired": "Password is required",
53 |     "passwordMinLength": "Password must be at least 6 characters",
54 |     "passwordMismatch": "Passwords do not match",
55 |     "agreeTermsRequired": "Please agree to the terms of service"
56 |   },
57 |   "errors": {
58 |     "loginFailed": "Login failed, please check your credentials",
59 |     "registerFailed": "Registration failed, please try again",
60 |     "unauthorized": "You do not have permission to access this page"
61 |   },
62 |   "footer": {
63 |     "copyright": "© 2024 PinData. All rights reserved."
64 |   }
65 | }


--------------------------------------------------------------------------------
/backend/app/models/conversion_file_detail.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from sqlalchemy import Column, String, Text, DateTime, Enum, JSON, Integer, ForeignKey
 3 | from sqlalchemy.orm import relationship
 4 | import uuid
 5 | from app.db import db
 6 | from .conversion_job import ConversionStatus
 7 | 
 8 | class ConversionFileDetail(db.Model):
 9 |     """文档转换文件详情模型"""
10 |     __tablename__ = 'conversion_file_details'
11 |     
12 |     id = Column(String(36), primary_key=True)  # UUID
13 |     conversion_job_id = Column(String(36), ForeignKey('conversion_jobs.id'), nullable=False)
14 |     library_file_id = Column(String(36), ForeignKey('library_files.id'), nullable=False)
15 |     
16 |     # 转换状态
17 |     status = Column(Enum(ConversionStatus), default=ConversionStatus.PENDING)
18 |     
19 |     # 转换结果
20 |     converted_object_name = Column(String(500))  # 转换后文件在MinIO中的对象名
21 |     converted_file_size = Column(Integer)  # 转换后文件大小
22 |     
23 |     # 页面处理信息（用于分批处理）
24 |     total_pages = Column(Integer)  # 总页数
25 |     processed_pages = Column(Integer, default=0)  # 已处理页数
26 |     current_batch = Column(Integer, default=0)  # 当前批次
27 |     
28 |     # 错误信息
29 |     error_message = Column(Text)
30 |     
31 |     # 时间戳
32 |     started_at = Column(DateTime)
33 |     completed_at = Column(DateTime)
34 |     
35 |     # 关系
36 |     conversion_job = relationship('ConversionJob', backref='file_details')
37 |     library_file = relationship('LibraryFile', backref='conversion_details')
38 |     
39 |     def __init__(self, **kwargs):
40 |         super().__init__(**kwargs)
41 |         if not self.id:
42 |             self.id = str(uuid.uuid4())
43 |     
44 |     def to_dict(self):
45 |         return {
46 |             'id': self.id,
47 |             'conversion_job_id': self.conversion_job_id,
48 |             'library_file_id': self.library_file_id,
49 |             'status': self.status.value if self.status else None,
50 |             'converted_object_name': self.converted_object_name,
51 |             'converted_file_size': self.converted_file_size,
52 |             'total_pages': self.total_pages,
53 |             'processed_pages': self.processed_pages,
54 |             'current_batch': self.current_batch,
55 |             'error_message': self.error_message,
56 |             'started_at': self.started_at.isoformat() if self.started_at else None,
57 |             'completed_at': self.completed_at.isoformat() if self.completed_at else None,
58 |             'library_file': self.library_file.to_dict() if self.library_file else None
59 |         } 


--------------------------------------------------------------------------------
/backend/app/models/user_session.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, DateTime, Text, Enum as SQLEnum, ForeignKey
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class SessionStatus(enum.Enum):
11 |     ACTIVE = "active"
12 |     REVOKED = "revoked"
13 |     EXPIRED = "expired"
14 | 
15 | 
16 | class UserSession(db.Model):
17 |     __tablename__ = 'user_sessions'
18 |     
19 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
20 |     user_id = Column(String(36), ForeignKey('users.id'), nullable=False, index=True)
21 |     access_token_hash = Column(String(255), nullable=False, index=True)
22 |     refresh_token_hash = Column(String(255))
23 |     device_info = Column(Text)  # 设备信息
24 |     ip_address = Column(String(45))  # IPv4/IPv6
25 |     user_agent = Column(Text)
26 |     created_at = Column(DateTime, default=datetime.utcnow)
27 |     expires_at = Column(DateTime, nullable=False, index=True)
28 |     last_activity_at = Column(DateTime, default=datetime.utcnow)
29 |     status = Column(SQLEnum(SessionStatus), default=SessionStatus.ACTIVE, index=True)
30 |     
31 |     # Relationships
32 |     user = relationship("User", back_populates="user_sessions")
33 |     
34 |     def to_dict(self, include_user=False):
35 |         result = {
36 |             'id': self.id,
37 |             'user_id': self.user_id,
38 |             'device_info': self.device_info,
39 |             'ip_address': self.ip_address,
40 |             'user_agent': self.user_agent,
41 |             'created_at': self.created_at.isoformat() if self.created_at else None,
42 |             'expires_at': self.expires_at.isoformat() if self.expires_at else None,
43 |             'last_activity_at': self.last_activity_at.isoformat() if self.last_activity_at else None,
44 |             'status': self.status.value
45 |         }
46 |         
47 |         if include_user and self.user:
48 |             result['user'] = self.user.to_dict()
49 |         
50 |         return result
51 |     
52 |     def is_expired(self):
53 |         return datetime.utcnow() > self.expires_at
54 |     
55 |     def is_active(self):
56 |         return self.status == SessionStatus.ACTIVE and not self.is_expired()
57 |     
58 |     def revoke(self):
59 |         self.status = SessionStatus.REVOKED
60 |     
61 |     def update_activity(self):
62 |         self.last_activity_at = datetime.utcnow()
63 |     
64 |     def __repr__(self):
65 |         return f'<UserSession {self.user_id}-{self.id}>'


--------------------------------------------------------------------------------
/backend/app/models/user_organization.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, Boolean, Date, DateTime, Enum as SQLEnum, ForeignKey, UniqueConstraint
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class UserOrgStatus(enum.Enum):
11 |     ACTIVE = "ACTIVE"
12 |     INACTIVE = "INACTIVE"
13 | 
14 | 
15 | class UserOrganization(db.Model):
16 |     __tablename__ = 'user_organizations'
17 |     
18 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
19 |     user_id = Column(String(36), ForeignKey('users.id'), nullable=False, index=True)
20 |     organization_id = Column(String(36), ForeignKey('organizations.id'), nullable=False, index=True)
21 |     is_primary = Column(Boolean, default=False, index=True)  # 是否主组织
22 |     position = Column(String(100))  # 职位
23 |     joined_at = Column(DateTime, default=datetime.utcnow)
24 |     status = Column(SQLEnum(UserOrgStatus, native_enum=True), default=UserOrgStatus.ACTIVE)
25 |     created_at = Column(DateTime, default=datetime.utcnow)
26 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
27 |     
28 |     # Relationships
29 |     user = relationship("User", back_populates="user_organizations")
30 |     organization = relationship("Organization", back_populates="user_organizations")
31 |     
32 |     def to_dict(self, include_user=False, include_organization=False):
33 |         result = {
34 |             'id': self.id,
35 |             'user_id': self.user_id,
36 |             'organization_id': self.organization_id,
37 |             'is_primary': self.is_primary,
38 |             'position': self.position,
39 |             'joined_at': self.joined_at.isoformat() if self.joined_at else None,
40 |             'status': self.status.value,
41 |             'created_at': self.created_at.isoformat() if self.created_at else None,
42 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None
43 |         }
44 |         
45 |         if include_user and self.user:
46 |             result['user'] = self.user.to_dict()
47 |         
48 |         if include_organization and self.organization:
49 |             result['organization'] = self.organization.to_dict()
50 |         
51 |         return result
52 |     
53 |     def __repr__(self):
54 |         return f'<UserOrganization {self.user_id}-{self.organization_id}>'
55 |     
56 |     # 约束
57 |     __table_args__ = (
58 |         UniqueConstraint('user_id', 'organization_id', name='uk_user_org'),
59 |     )


--------------------------------------------------------------------------------
/frontend/src/components/ui/dropdown-menu.tsx:
--------------------------------------------------------------------------------
 1 | import * as React from "react"
 2 | import * as DropdownMenuPrimitive from "@radix-ui/react-dropdown-menu"
 3 | import { cn } from "../../lib/utils"
 4 | 
 5 | const DropdownMenu = DropdownMenuPrimitive.Root
 6 | const DropdownMenuTrigger = DropdownMenuPrimitive.Trigger
 7 | const DropdownMenuGroup = DropdownMenuPrimitive.Group
 8 | const DropdownMenuPortal = DropdownMenuPrimitive.Portal
 9 | const DropdownMenuSub = DropdownMenuPrimitive.Sub
10 | const DropdownMenuRadioGroup = DropdownMenuPrimitive.RadioGroup
11 | 
12 | const DropdownMenuContent = React.forwardRef<
13 |   React.ElementRef<typeof DropdownMenuPrimitive.Content>,
14 |   React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Content>
15 | >(({ className, sideOffset = 4, ...props }, ref) => (
16 |   <DropdownMenuPrimitive.Portal>
17 |     <DropdownMenuPrimitive.Content
18 |       ref={ref}
19 |       sideOffset={sideOffset}
20 |       className={cn(
21 |         "z-50 min-w-[8rem] overflow-hidden rounded-md border bg-popover p-1 text-popover-foreground shadow-md",
22 |         "data-[state=open]:animate-in data-[state=closed]:animate-out data-[state=closed]:fade-out-0 data-[state=open]:fade-in-0 data-[state=closed]:zoom-out-95 data-[state=open]:zoom-in-95 data-[side=bottom]:slide-in-from-top-2 data-[side=left]:slide-in-from-right-2 data-[side=right]:slide-in-from-left-2 data-[side=top]:slide-in-from-bottom-2",
23 |         className
24 |       )}
25 |       {...props}
26 |     />
27 |   </DropdownMenuPrimitive.Portal>
28 | ))
29 | DropdownMenuContent.displayName = DropdownMenuPrimitive.Content.displayName
30 | 
31 | const DropdownMenuItem = React.forwardRef<
32 |   React.ElementRef<typeof DropdownMenuPrimitive.Item>,
33 |   React.ComponentPropsWithoutRef<typeof DropdownMenuPrimitive.Item> & {
34 |     inset?: boolean
35 |   }
36 | >(({ className, inset, ...props }, ref) => (
37 |   <DropdownMenuPrimitive.Item
38 |     ref={ref}
39 |     className={cn(
40 |       "relative flex cursor-default select-none items-center rounded-sm px-2 py-1.5 text-sm outline-none transition-colors focus:bg-accent focus:text-accent-foreground data-[disabled]:pointer-events-none data-[disabled]:opacity-50",
41 |       inset && "pl-8",
42 |       className
43 |     )}
44 |     {...props}
45 |   />
46 | ))
47 | DropdownMenuItem.displayName = DropdownMenuPrimitive.Item.displayName
48 | 
49 | export {
50 |   DropdownMenu,
51 |   DropdownMenuTrigger,
52 |   DropdownMenuContent,
53 |   DropdownMenuItem,
54 |   DropdownMenuGroup,
55 |   DropdownMenuPortal,
56 |   DropdownMenuSub,
57 |   DropdownMenuRadioGroup,
58 | }


--------------------------------------------------------------------------------
/frontend/src/screens/RawData/LibraryDetails/README.md:
--------------------------------------------------------------------------------
  1 | # LibraryDetails 组件
  2 | 
  3 | LibraryDetails 组件负责显示单个文件库的详细信息，包括文件列表、转换功能和任务进度管理。
  4 | 
  5 | ## 功能特性
  6 | 
  7 | ### 1. 文件管理
  8 | - ✅ 文件列表显示（支持分页）
  9 | - ✅ 批量选择文件
 10 | - ✅ 单个/批量删除文件
 11 | - ✅ 文件上传功能
 12 | - ✅ 文件详情查看
 13 | 
 14 | ### 2. 转换功能
 15 | - ✅ **快速转换（markitdown）**: 使用 markitdown 库进行快速文档转换
 16 | - ✅ **AI 智能转换（vision_llm）**: 使用视觉大模型进行高质量转换
 17 | - ✅ 批量转换支持
 18 | - ✅ 单文件转换支持
 19 | - ✅ 转换配置选项（OCR、格式保持、表格提取、图片提取）
 20 | 
 21 | ### 3. 转换进度监控
 22 | - ✅ 实时显示转换任务进度
 23 | - ✅ 任务状态管理（等待、处理中、已完成、失败、已取消）
 24 | - ✅ 进度条和百分比显示
 25 | - ✅ 任务取消功能
 26 | - ✅ 错误信息显示
 27 | 
 28 | ## 组件架构
 29 | 
 30 | ```
 31 | LibraryDetails/
 32 | ├── LibraryDetails.tsx          # 主组件
 33 | ├── components/
 34 | │   ├── ConvertToMarkdownDialog.tsx  # 转换配置弹窗
 35 | │   └── ConversionProgress.tsx       # 转换进度显示
 36 | ├── index.ts                    # 导出文件
 37 | └── README.md                   # 说明文档
 38 | ```
 39 | 
 40 | ## API 接口
 41 | 
 42 | ### 文件转换接口
 43 | - `POST /api/v1/libraries/{library_id}/files/convert-to-markdown` - 提交转换任务
 44 | - `GET /api/v1/conversion-jobs/{job_id}` - 获取转换任务状态
 45 | - `POST /api/v1/conversion-jobs/{job_id}/cancel` - 取消转换任务
 46 | 
 47 | ### 转换配置
 48 | ```typescript
 49 | interface ConversionConfig {
 50 |   method: 'markitdown' | 'vision_llm';
 51 |   llmConfigId?: string;          // AI转换时的模型配置ID
 52 |   customPrompt?: string;         // 自定义提示词
 53 |   enableOCR?: boolean;           // 启用OCR
 54 |   preserveFormatting?: boolean;  // 保持格式
 55 |   extractTables?: boolean;       // 提取表格
 56 |   extractImages?: boolean;       // 提取图片
 57 | }
 58 | ```
 59 | 
 60 | ## 转换方法对比
 61 | 
 62 | | 功能 | 快速转换 (markitdown) | AI 智能转换 (vision_llm) |
 63 | |------|---------------------|------------------------|
 64 | | 转换速度 | ⚡ 快速 | 🐌 较慢 |
 65 | | 转换质量 | 📄 标准 | 🎯 高质量 |
 66 | | 支持格式 | PDF, DOCX, PPT, TXT | PDF, 图片, 复杂布局文档 |
 67 | | 成本 | 💰 免费 | 💸 消耗 API 调用 |
 68 | | 复杂布局 | ❌ 有限支持 | ✅ 智能识别 |
 69 | | 图表理解 | ❌ 基础提取 | ✅ 智能解析 |
 70 | | 自定义 | ❌ 固定规则 | ✅ 可配置提示词 |
 71 | 
 72 | ## 使用方式
 73 | 
 74 | ### 1. 单文件转换
 75 | 点击文件列表中任意文件行的"转换为MD"按钮
 76 | 
 77 | ### 2. 批量转换
 78 | 1. 选择多个文件（使用复选框）
 79 | 2. 点击"转换为MD (N)"按钮
 80 | 3. 在弹窗中选择转换方法和配置
 81 | 4. 点击"开始转换"
 82 | 
 83 | ### 3. 进度监控
 84 | 转换任务提交后，会在文件列表上方显示转换进度组件，包括：
 85 | - 正在执行的任务（带进度条）
 86 | - 最近完成的任务
 87 | - 任务取消功能
 88 | 
 89 | ## 注意事项
 90 | 
 91 | 1. **AI 智能转换**需要配置支持视觉功能的 LLM 模型
 92 | 2. **转换任务**为异步处理，可能需要较长时间
 93 | 3. **批量操作**建议分批处理，避免一次选择过多文件
 94 | 4. **转换失败**时会显示详细错误信息，便于调试
 95 | 
 96 | ## 未来改进
 97 | 
 98 | - [ ] 转换历史记录
 99 | - [ ] 转换模板管理
100 | - [ ] 转换结果预览
101 | - [ ] 批量下载转换结果
102 | - [ ] 转换质量评估
103 | - [ ] 转换时间预估 


--------------------------------------------------------------------------------
/frontend/tailwind.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   content: [
 3 |     "./src/**/*.{html,js,ts,jsx,tsx}",
 4 |     "app/**/*.{ts,tsx}",
 5 |     "components/**/*.{ts,tsx}",
 6 |   ],
 7 |   theme: {
 8 |     extend: {
 9 |       colors: {
10 |         border: "hsl(var(--border))",
11 |         input: "hsl(var(--input))",
12 |         ring: "hsl(var(--ring))",
13 |         background: "hsl(var(--background))",
14 |         foreground: "hsl(var(--foreground))",
15 |         primary: {
16 |           DEFAULT: "hsl(var(--primary))",
17 |           foreground: "hsl(var(--primary-foreground))",
18 |         },
19 |         secondary: {
20 |           DEFAULT: "hsl(var(--secondary))",
21 |           foreground: "hsl(var(--secondary-foreground))",
22 |         },
23 |         destructive: {
24 |           DEFAULT: "hsl(var(--destructive))",
25 |           foreground: "hsl(var(--destructive-foreground))",
26 |         },
27 |         muted: {
28 |           DEFAULT: "hsl(var(--muted))",
29 |           foreground: "hsl(var(--muted-foreground))",
30 |         },
31 |         accent: {
32 |           DEFAULT: "hsl(var(--accent))",
33 |           foreground: "hsl(var(--accent-foreground))",
34 |         },
35 |         popover: {
36 |           DEFAULT: "hsl(var(--popover))",
37 |           foreground: "hsl(var(--popover-foreground))",
38 |         },
39 |         card: {
40 |           DEFAULT: "hsl(var(--card))",
41 |           foreground: "hsl(var(--card-foreground))",
42 |         },
43 |       },
44 |       borderRadius: {
45 |         lg: "var(--radius)",
46 |         md: "calc(var(--radius) - 2px)",
47 |         sm: "calc(var(--radius) - 4px)",
48 |       },
49 |       fontFamily: {
50 |         sans: [
51 |           "ui-sans-serif",
52 |           "system-ui",
53 |           "sans-serif",
54 |           '"Apple Color Emoji"',
55 |           '"Segoe UI Emoji"',
56 |           '"Segoe UI Symbol"',
57 |           '"Noto Color Emoji"',
58 |         ],
59 |       },
60 |       keyframes: {
61 |         "accordion-down": {
62 |           from: { height: "0" },
63 |           to: { height: "var(--radix-accordion-content-height)" },
64 |         },
65 |         "accordion-up": {
66 |           from: { height: "var(--radix-accordion-content-height)" },
67 |           to: { height: "0" },
68 |         },
69 |       },
70 |       animation: {
71 |         "accordion-down": "accordion-down 0.2s ease-out",
72 |         "accordion-up": "accordion-up 0.2s ease-out",
73 |       },
74 |     },
75 |     container: { center: true, padding: "2rem", screens: { "2xl": "1400px" } },
76 |   },
77 |   plugins: [],
78 |   darkMode: ["class"],
79 | };
80 | 


--------------------------------------------------------------------------------
/backend/app/models/user.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, Boolean, DateTime, Text, Enum as SQLEnum
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class UserStatus(enum.Enum):
11 |     ACTIVE = "ACTIVE"
12 |     INACTIVE = "INACTIVE"
13 |     SUSPENDED = "SUSPENDED"
14 | 
15 | 
16 | class User(db.Model):
17 |     __tablename__ = 'users'
18 |     
19 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
20 |     username = Column(String(100), unique=True, nullable=False, index=True)
21 |     email = Column(String(255), unique=True, nullable=False, index=True)
22 |     password_hash = Column(String(255), nullable=False)
23 |     full_name = Column(String(255))
24 |     avatar_url = Column(Text)
25 |     phone = Column(String(50))
26 |     status = Column(SQLEnum(UserStatus), default=UserStatus.ACTIVE, index=True)
27 |     last_login_at = Column(DateTime)
28 |     created_at = Column(DateTime, default=datetime.utcnow, index=True)
29 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
30 |     created_by = Column(String(36))
31 |     updated_by = Column(String(36))
32 |     
33 |     # Relationships
34 |     user_organizations = relationship("UserOrganization", back_populates="user", cascade="all, delete-orphan")
35 |     user_roles = relationship("UserRole", foreign_keys="UserRole.user_id", back_populates="user", cascade="all, delete-orphan")
36 |     resource_permissions = relationship("ResourcePermission", foreign_keys="ResourcePermission.user_id", back_populates="user", cascade="all, delete-orphan")
37 |     user_sessions = relationship("UserSession", back_populates="user", cascade="all, delete-orphan")
38 |     
39 |     # Audit relationships
40 |     created_audit_logs = relationship("AuditLog", foreign_keys="AuditLog.user_id", back_populates="user")
41 |     
42 |     def to_dict(self):
43 |         return {
44 |             'id': self.id,
45 |             'username': self.username,
46 |             'email': self.email,
47 |             'full_name': self.full_name,
48 |             'avatar_url': self.avatar_url,
49 |             'phone': self.phone,
50 |             'status': self.status.value,
51 |             'last_login_at': self.last_login_at.isoformat() if self.last_login_at else None,
52 |             'created_at': self.created_at.isoformat() if self.created_at else None,
53 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None
54 |         }
55 |     
56 |     def __repr__(self):
57 |         return f'<User {self.username}>'


--------------------------------------------------------------------------------
/frontend/nginx.conf:
--------------------------------------------------------------------------------
 1 | events {
 2 |     worker_connections 1024;
 3 | }
 4 | 
 5 | http {
 6 |     include       /etc/nginx/mime.types;
 7 |     default_type  application/octet-stream;
 8 | 
 9 |     # 日志格式
10 |     log_format  main  '$remote_addr - $remote_user [$time_local] "$request" '
11 |                       '$status $body_bytes_sent "$http_referer" '
12 |                       '"$http_user_agent" "$http_x_forwarded_for"';
13 | 
14 |     # 访问日志
15 |     access_log  /var/log/nginx/access.log  main;
16 | 
17 |     # 基本设置
18 |     sendfile        on;
19 |     tcp_nopush      on;
20 |     tcp_nodelay     on;
21 |     keepalive_timeout  65;
22 |     types_hash_max_size 2048;
23 | 
24 |     # Gzip 压缩
25 |     gzip on;
26 |     gzip_vary on;
27 |     gzip_min_length 1024;
28 |     gzip_proxied any;
29 |     gzip_comp_level 6;
30 |     gzip_types
31 |         text/plain
32 |         text/css
33 |         text/xml
34 |         text/javascript
35 |         application/json
36 |         application/javascript
37 |         application/xml+rss
38 |         application/atom+xml
39 |         image/svg+xml;
40 | 
41 |     server {
42 |         listen 80;
43 |         server_name localhost;
44 |         root /usr/share/nginx/html;
45 |         index index.html;
46 | 
47 |         # 静态资源缓存
48 |         location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg|woff|woff2|ttf|eot)$ {
49 |             expires 1y;
50 |             add_header Cache-Control "public, immutable";
51 |             try_files $uri =404;
52 |         }
53 | 
54 |         # API 请求代理到后端
55 |         location /api/ {
56 |             proxy_pass http://pindata-api:8897/;
57 |             proxy_set_header Host $host;
58 |             proxy_set_header X-Real-IP $remote_addr;
59 |             proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
60 |             proxy_set_header X-Forwarded-Proto $scheme;
61 |             
62 |             # 超时设置
63 |             proxy_connect_timeout 60s;
64 |             proxy_send_timeout 60s;
65 |             proxy_read_timeout 60s;
66 |         }
67 | 
68 |         # SPA 路由支持
69 |         location / {
70 |             try_files $uri $uri/ /index.html;
71 |             add_header Cache-Control "no-cache, no-store, must-revalidate";
72 |             add_header Pragma "no-cache";
73 |             add_header Expires "0";
74 |         }
75 | 
76 |         # 健康检查
77 |         location /health {
78 |             access_log off;
79 |             return 200 "healthy\n";
80 |             add_header Content-Type text/plain;
81 |         }
82 | 
83 |         # 错误页面
84 |         error_page   500 502 503 504  /50x.html;
85 |         location = /50x.html {
86 |             root   /usr/share/nginx/html;
87 |         }
88 |     }
89 | } 


--------------------------------------------------------------------------------
/doc/tmp/design.md:
--------------------------------------------------------------------------------
  1 | 基于你的代码和需求，我来帮你分析一下项目的整体架构设计思路：
  2 | 
  3 | ## 1. 数据分层架构设计
  4 | 
  5 | ### 原始数据层 vs 治理后数据层
  6 | 你的想法是对的，应该按数据类型和治理状态来区分：
  7 | 
  8 | ```
  9 | 原始数据层 (Raw Data)
 10 | ├── 数据库表 (未治理)
 11 | ├── 文档文件 (.md, .pdf, .docx等)
 12 | ├── 图片文件 (.jpg, .png等) 
 13 | ├── 视频文件 (.mp4, .avi等)
 14 | └── API数据源
 15 | 
 16 | 治理后数据层 (Governed Data)  
 17 | ├── 结构化数据
 18 | │   ├── 清洗后的数据库表
 19 | │   ├── 标准化的主题域表
 20 | │   └── 数据质量检查后的表
 21 | ├── 半结构化数据
 22 | │   ├── 解析后的markdown内容
 23 | │   ├── 提取的文档元数据
 24 | │   └── OCR后的图片文本
 25 | └── 向量化数据
 26 |     ├── 文档embedding
 27 |     ├── 图片特征向量
 28 |     └── 语义索引
 29 | ```
 30 | 
 31 | ## 2. 知识管理架构
 32 | 
 33 | ### 知识的多层次分类
 34 | ```
 35 | 知识层 (Knowledge Layer)
 36 | ├── 元数据知识
 37 | │   ├── 数据字典
 38 | │   ├── 数据血缘
 39 | │   └── 业务术语表
 40 | ├── 结构化知识
 41 | │   ├── 业务规则
 42 | │   ├── 数据模型
 43 | │   └── 标准规范
 44 | ├── 语义知识 (RAG向量库)
 45 | │   ├── 文档语义向量
 46 | │   ├── 知识图谱
 47 | │   └── 问答对向量
 48 | └── 多媒体知识
 49 |     ├── 图文说明
 50 |     ├── 操作视频
 51 |     └── 培训材料
 52 | ```
 53 | 
 54 | ## 3. 数据质量评价体系
 55 | 
 56 | ### 基于大模型的多维度评价
 57 | ```
 58 | 数据质量评价 (AI-Powered Quality Assessment)
 59 | ├── 完整性评价
 60 | │   ├── 字段完整度检查
 61 | │   ├── 记录完整性验证
 62 | │   └── 关联数据完整性
 63 | ├── 准确性评价
 64 | │   ├── 数据格式验证
 65 | │   ├── 业务规则校验  
 66 | │   └── LLM语义准确性检查
 67 | ├── 一致性评价
 68 | │   ├── 跨系统数据一致性
 69 | │   ├── 历史数据一致性
 70 | │   └── 标准化程度评估
 71 | └── 时效性评价
 72 |     ├── 数据更新频率
 73 |     ├── 数据延迟监控
 74 |     └── 实时性要求匹配度
 75 | ```
 76 | 
 77 | ## 4. 建议的技术架构
 78 | 
 79 | ### 数据处理管道设计
 80 | ```
 81 | 数据治理管道 (Data Governance Pipeline)
 82 | ├── 数据接入层
 83 | │   ├── 文件上传处理
 84 | │   ├── 数据库连接器
 85 | │   └── API数据采集
 86 | ├── 数据治理层
 87 | │   ├── 数据清洗引擎
 88 | │   ├── 数据标准化
 89 | │   ├── 质量检查引擎
 90 | │   └── 向量化处理
 91 | ├── 知识提取层
 92 | │   ├── NLP文本处理
 93 | │   ├── OCR图像识别
 94 | │   ├── 语义理解
 95 | │   └── 知识图谱构建
 96 | └── 服务访问层
 97 |     ├── 结构化数据API
 98 |     ├── 向量检索服务
 99 |     ├── 知识问答接口
100 |     └── 质量评分服务
101 | ```
102 | 
103 | ## 5. 具体实施建议
104 | 
105 | ### Phase 1: 数据分类和治理
106 | 1. **数据类型识别器**：自动识别上传数据的类型
107 | 2. **治理规则引擎**：根据数据类型应用不同治理策略
108 | 3. **质量评分模型**：集成大模型进行智能质量评估
109 | 
110 | ### Phase 2: 知识体系构建
111 | 1. **多模态知识管理**：支持文本、图片、视频等多种知识形式
112 | 2. **RAG向量系统**：构建企业私有的向量知识库
113 | 3. **知识关联图谱**：建立知识间的语义关联
114 | 
115 | ### Phase 3: 智能分析服务
116 | 1. **AI质量评估**：基于LLM的数据质量持续监控
117 | 2. **智能推荐**：根据数据使用模式推荐优化建议
118 | 3. **自动化治理**：逐步实现治理流程自动化
119 | 
120 | 这样的架构既保持了数据的原始状态，又提供了高质量的治理数据，同时构建了丰富的知识体系。你觉得这个方向如何？需要我详细展开某个部分吗？
121 | 
122 | 🚀 实施策略
123 | 优先级排序
124 | P0 (立即开始)：扩展数据模型，完善ProjectDetail页面交互
125 | P1 (第2周)：实现数据治理API，集成质量评估
126 | P2 (第3-4周)：知识提取和向量化功能
127 | P3 (第5-6周)：AI质量评估和分析报告
128 | 技术选型建议
129 | 向量数据库：集成Qdrant或Weaviate用于RAG
130 | 任务队列：使用现有的Celery处理长时间任务
131 | 缓存层：Redis缓存频繁查询的质量指标
132 | 监控告警：集成现有的SystemLog体系


--------------------------------------------------------------------------------
/backend/app/models/project_team_member.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from sqlalchemy import Column, String, DateTime, Enum, ForeignKey, UniqueConstraint
 3 | from sqlalchemy.orm import relationship
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class ProjectRole(enum.Enum):
11 |     """项目角色枚举"""
12 |     OWNER = "owner"           # 项目负责人
13 |     ADMIN = "admin"          # 管理员
14 |     EDITOR = "editor"        # 编辑者
15 |     VIEWER = "viewer"        # 查看者
16 | 
17 | 
18 | class MemberStatus(enum.Enum):
19 |     """成员状态枚举"""
20 |     ACTIVE = "active"        # 活跃
21 |     INACTIVE = "inactive"    # 非活跃
22 |     INVITED = "invited"      # 已邀请
23 |     REMOVED = "removed"      # 已移除
24 | 
25 | 
26 | class ProjectTeamMember(db.Model):
27 |     """项目团队成员模型"""
28 |     __tablename__ = 'project_team_members'
29 |     __table_args__ = (
30 |         UniqueConstraint('project_id', 'user_id', name='uq_project_user'),
31 |     )
32 |     
33 |     # 基础信息
34 |     id = Column(String(36), primary_key=True)
35 |     project_id = Column(String(36), ForeignKey('data_governance_projects.id'), nullable=False)
36 |     user_id = Column(String(36), ForeignKey('users.id'), nullable=False)
37 |     
38 |     # 角色信息
39 |     role = Column(Enum(ProjectRole), nullable=False)
40 |     status = Column(Enum(MemberStatus), default=MemberStatus.ACTIVE)
41 |     
42 |     # 时间戳
43 |     joined_at = Column(DateTime, default=datetime.utcnow)
44 |     invited_at = Column(DateTime)
45 |     removed_at = Column(DateTime)
46 |     
47 |     # 邀请信息
48 |     invited_by = Column(String(36), ForeignKey('users.id'))
49 |     invitation_message = Column(String(500))
50 |     
51 |     # 关系
52 |     project = relationship("DataGovernanceProject", back_populates="team_members")
53 |     user = relationship("User", foreign_keys=[user_id])
54 |     inviter = relationship("User", foreign_keys=[invited_by])
55 |     
56 |     def __init__(self, **kwargs):
57 |         super().__init__(**kwargs)
58 |         if not self.id:
59 |             self.id = str(uuid.uuid4())
60 |     
61 |     def to_dict(self):
62 |         return {
63 |             'id': self.id,
64 |             'project_id': self.project_id,
65 |             'user_id': self.user_id,
66 |             'role': self.role.value if self.role else None,
67 |             'status': self.status.value if self.status else None,
68 |             'joined_at': self.joined_at.isoformat() if self.joined_at else None,
69 |             'invited_at': self.invited_at.isoformat() if self.invited_at else None,
70 |             'removed_at': self.removed_at.isoformat() if self.removed_at else None,
71 |             'invited_by': self.invited_by,
72 |             'invitation_message': self.invitation_message,
73 |         } 


--------------------------------------------------------------------------------
/backend/test/test_minio_fix.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | 测试MinIO配置和bucket设置
 4 | """
 5 | import os
 6 | from minio import Minio
 7 | from minio.error import S3Error
 8 | from dotenv import load_dotenv
 9 | 
10 | # 加载环境变量
11 | load_dotenv()
12 | 
13 | def test_minio_setup():
14 |     """测试MinIO配置"""
15 |     # 配置
16 |     endpoint = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
17 |     access_key = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
18 |     secret_key = os.getenv('MINIO_SECRET_KEY', 'minioadmin')
19 |     secure = os.getenv('MINIO_SECURE', 'false').lower() == 'true'
20 |     
21 |     try:
22 |         # 创建MinIO客户端
23 |         client = Minio(
24 |             endpoint=endpoint,
25 |             access_key=access_key,
26 |             secret_key=secret_key,
27 |             secure=secure
28 |         )
29 |         
30 |         print(f"✅ 连接到MinIO: {endpoint}")
31 |         
32 |         # 获取所有bucket
33 |         buckets = client.list_buckets()
34 |         bucket_names = [bucket.name for bucket in buckets]
35 |         print(f"📁 现有buckets: {bucket_names}")
36 |         
37 |         # 检查raw-data bucket
38 |         raw_data_exists = client.bucket_exists('raw-data')
39 |         print(f"🔍 raw-data bucket存在: {raw_data_exists}")
40 |         
41 |         # 如果不存在，创建它
42 |         if not raw_data_exists:
43 |             print("🆕 创建raw-data bucket...")
44 |             client.make_bucket('raw-data')
45 |             print("✅ raw-data bucket创建成功")
46 |         
47 |         # 测试文件操作
48 |         test_content = "测试文件内容"
49 |         test_object = "test/upload_download_test.txt"
50 |         test_bytes = test_content.encode('utf-8')
51 |         
52 |         print(f"📤 测试上传文件到raw-data bucket...")
53 |         from io import BytesIO
54 |         client.put_object(
55 |             'raw-data', 
56 |             test_object, 
57 |             BytesIO(test_bytes),
58 |             len(test_bytes),
59 |             content_type='text/plain'
60 |         )
61 |         print("✅ 测试上传成功")
62 |         
63 |         print("📥 测试下载文件...")
64 |         response = client.get_object('raw-data', test_object)
65 |         downloaded_content = response.data.decode('utf-8')
66 |         if downloaded_content == test_content:
67 |             print("✅ 测试下载成功")
68 |         else:
69 |             print("❌ 测试下载失败: 内容不匹配")
70 |             return False
71 |         
72 |         # 清理测试文件
73 |         client.remove_object('raw-data', test_object)
74 |         print("🧹 清理测试文件成功")
75 |         
76 |         print("\n🎉 MinIO配置测试通过!")
77 |         return True
78 |         
79 |     except S3Error as e:
80 |         print(f"❌ MinIO操作失败: {e}")
81 |         return False
82 |     except Exception as e:
83 |         print(f"❌ 测试失败: {e}")
84 |         return False
85 | 
86 | if __name__ == "__main__":
87 |     test_minio_setup() 


--------------------------------------------------------------------------------
/backend/app/models/user_role.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, DateTime, Enum as SQLEnum, ForeignKey, UniqueConstraint
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class UserRoleStatus(enum.Enum):
11 |     ACTIVE = "ACTIVE"
12 |     INACTIVE = "INACTIVE"
13 |     EXPIRED = "EXPIRED"
14 | 
15 | 
16 | class UserRole(db.Model):
17 |     __tablename__ = 'user_roles'
18 |     
19 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
20 |     user_id = Column(String(36), ForeignKey('users.id'), nullable=False, index=True)
21 |     role_id = Column(String(36), ForeignKey('roles.id'), nullable=False, index=True)
22 |     organization_id = Column(String(36), ForeignKey('organizations.id'), index=True)  # 在特定组织下的角色
23 |     granted_by = Column(String(36), ForeignKey('users.id'))  # 授权人
24 |     granted_at = Column(DateTime, default=datetime.utcnow)
25 |     expires_at = Column(DateTime)  # 角色过期时间（可选）
26 |     status = Column(SQLEnum(UserRoleStatus), default=UserRoleStatus.ACTIVE)
27 |     
28 |     # Relationships
29 |     user = relationship("User", foreign_keys=[user_id], back_populates="user_roles")
30 |     role = relationship("Role", back_populates="user_roles")
31 |     organization = relationship("Organization", back_populates="user_roles")
32 |     granter = relationship("User", foreign_keys=[granted_by])
33 |     
34 |     def to_dict(self, include_user=False, include_role=False, include_organization=False):
35 |         result = {
36 |             'id': self.id,
37 |             'user_id': self.user_id,
38 |             'role_id': self.role_id,
39 |             'organization_id': self.organization_id,
40 |             'granted_by': self.granted_by,
41 |             'granted_at': self.granted_at.isoformat() if self.granted_at else None,
42 |             'expires_at': self.expires_at.isoformat() if self.expires_at else None,
43 |             'status': self.status.value
44 |         }
45 |         
46 |         if include_user and self.user:
47 |             result['user'] = self.user.to_dict()
48 |         
49 |         if include_role and self.role:
50 |             result['role'] = self.role.to_dict()
51 |         
52 |         if include_organization and self.organization:
53 |             result['organization'] = self.organization.to_dict()
54 |         
55 |         return result
56 |     
57 |     def is_expired(self):
58 |         if self.expires_at:
59 |             return datetime.utcnow() > self.expires_at
60 |         return False
61 |     
62 |     def __repr__(self):
63 |         return f'<UserRole {self.user_id}-{self.role_id}>'
64 |     
65 |     # 约束
66 |     __table_args__ = (
67 |         UniqueConstraint('user_id', 'role_id', 'organization_id', name='uk_user_role_org'),
68 |     )


--------------------------------------------------------------------------------
/backend/app/models/resource_permission.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, DateTime, Enum as SQLEnum, ForeignKey, UniqueConstraint
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class ResourcePermissionType(enum.Enum):
11 |     OWNER = "owner"
12 |     ADMIN = "admin"
13 |     WRITE = "write"
14 |     READ = "read"
15 | 
16 | 
17 | class ResourcePermissionStatus(enum.Enum):
18 |     ACTIVE = "active"
19 |     INACTIVE = "inactive"
20 |     EXPIRED = "expired"
21 | 
22 | 
23 | class ResourcePermission(db.Model):
24 |     __tablename__ = 'resource_permissions'
25 |     
26 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
27 |     user_id = Column(String(36), ForeignKey('users.id'), nullable=False, index=True)
28 |     resource_type = Column(String(50), nullable=False)  # dataset, library, task等
29 |     resource_id = Column(String(255), nullable=False)   # 资源ID
30 |     permission_type = Column(SQLEnum(ResourcePermissionType), nullable=False, index=True)
31 |     granted_by = Column(String(36), ForeignKey('users.id'))
32 |     granted_at = Column(DateTime, default=datetime.utcnow)
33 |     expires_at = Column(DateTime)
34 |     status = Column(SQLEnum(ResourcePermissionStatus), default=ResourcePermissionStatus.ACTIVE)
35 |     
36 |     # Relationships
37 |     user = relationship("User", foreign_keys=[user_id], back_populates="resource_permissions")
38 |     granter = relationship("User", foreign_keys=[granted_by])
39 |     
40 |     def to_dict(self, include_user=False):
41 |         result = {
42 |             'id': self.id,
43 |             'user_id': self.user_id,
44 |             'resource_type': self.resource_type,
45 |             'resource_id': self.resource_id,
46 |             'permission_type': self.permission_type.value,
47 |             'granted_by': self.granted_by,
48 |             'granted_at': self.granted_at.isoformat() if self.granted_at else None,
49 |             'expires_at': self.expires_at.isoformat() if self.expires_at else None,
50 |             'status': self.status.value
51 |         }
52 |         
53 |         if include_user and self.user:
54 |             result['user'] = self.user.to_dict()
55 |         
56 |         return result
57 |     
58 |     def is_expired(self):
59 |         if self.expires_at:
60 |             return datetime.utcnow() > self.expires_at
61 |         return False
62 |     
63 |     def __repr__(self):
64 |         return f'<ResourcePermission {self.user_id}-{self.resource_type}:{self.resource_id}>'
65 |     
66 |     # 索引和约束
67 |     __table_args__ = (
68 |         UniqueConstraint('user_id', 'resource_type', 'resource_id', name='uk_user_resource'),
69 |         db.Index('resource_permissions_idx_resource', 'resource_type', 'resource_id'),
70 |     )


--------------------------------------------------------------------------------
/backend/app/models/audit_log.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, DateTime, Text, JSON, Enum as SQLEnum, ForeignKey
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class AuditStatus(enum.Enum):
11 |     SUCCESS = "success"
12 |     FAILED = "failed"
13 | 
14 | 
15 | class AuditLog(db.Model):
16 |     __tablename__ = 'audit_logs'
17 |     
18 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
19 |     user_id = Column(String(36), ForeignKey('users.id'), index=True)
20 |     organization_id = Column(String(36), ForeignKey('organizations.id'), index=True)
21 |     action = Column(String(100), nullable=False, index=True)  # 操作类型
22 |     resource_type = Column(String(50))  # 资源类型
23 |     resource_id = Column(String(255))   # 资源ID
24 |     old_values = Column(JSON)           # 修改前的值
25 |     new_values = Column(JSON)           # 修改后的值
26 |     ip_address = Column(String(45))
27 |     user_agent = Column(Text)
28 |     request_id = Column(String(36), index=True)  # 关联系统日志
29 |     status = Column(SQLEnum(AuditStatus), default=AuditStatus.SUCCESS)
30 |     error_message = Column(Text)
31 |     created_at = Column(DateTime, default=datetime.utcnow, index=True)
32 |     
33 |     # Relationships
34 |     user = relationship("User", foreign_keys=[user_id], back_populates="created_audit_logs")
35 |     organization = relationship("Organization", foreign_keys=[organization_id])
36 |     
37 |     def to_dict(self, include_user=False, include_organization=False):
38 |         result = {
39 |             'id': self.id,
40 |             'user_id': self.user_id,
41 |             'organization_id': self.organization_id,
42 |             'action': self.action,
43 |             'resource_type': self.resource_type,
44 |             'resource_id': self.resource_id,
45 |             'old_values': self.old_values,
46 |             'new_values': self.new_values,
47 |             'ip_address': self.ip_address,
48 |             'user_agent': self.user_agent,
49 |             'request_id': self.request_id,
50 |             'status': self.status.value,
51 |             'error_message': self.error_message,
52 |             'created_at': self.created_at.isoformat() if self.created_at else None
53 |         }
54 |         
55 |         if include_user and self.user:
56 |             result['user'] = self.user.to_dict()
57 |         
58 |         if include_organization and self.organization:
59 |             result['organization'] = self.organization.to_dict()
60 |         
61 |         return result
62 |     
63 |     def __repr__(self):
64 |         return f'<AuditLog {self.action}-{self.resource_type}:{self.resource_id}>'
65 |     
66 |     # 索引
67 |     __table_args__ = (
68 |         db.Index('audit_logs_idx_resource', 'resource_type', 'resource_id'),
69 |     )


--------------------------------------------------------------------------------
/frontend/src/types/llm.ts:
--------------------------------------------------------------------------------
 1 | import { ApiResponse, PaginatedResponse } from './api';
 2 | 
 3 | export type ProviderType = 'openai' | 'claude' | 'gemini' | 'ollama' | 'custom';
 4 | export type ReasoningExtractionMethod = 'tag_based' | 'json_field';
 5 | 
 6 | export interface LLMConfig {
 7 |   id: string;
 8 |   name: string;
 9 |   provider: ProviderType;
10 |   model_name: string;
11 |   api_key: string; // 脱敏后的API密钥
12 |   base_url?: string;
13 |   temperature: number;
14 |   max_tokens: number;
15 |   supports_vision: boolean;
16 |   supports_reasoning: boolean;
17 |   reasoning_extraction_method?: ReasoningExtractionMethod;
18 |   reasoning_extraction_config?: Record<string, any>;
19 |   is_active: boolean;
20 |   is_default: boolean;
21 |   custom_headers?: Record<string, string>;
22 |   provider_config?: Record<string, any>;
23 |   usage_count: number;
24 |   total_tokens_used: number;
25 |   last_used_at?: string;
26 |   created_at?: string;
27 |   updated_at?: string;
28 | }
29 | 
30 | export interface CreateLLMConfigRequest {
31 |   name: string;
32 |   provider: ProviderType;
33 |   model_name: string;
34 |   api_key: string;
35 |   base_url?: string;
36 |   temperature?: number;
37 |   max_tokens?: number;
38 |   supports_vision?: boolean;
39 |   supports_reasoning?: boolean;
40 |   reasoning_extraction_method?: ReasoningExtractionMethod;
41 |   reasoning_extraction_config?: Record<string, any>;
42 |   is_active?: boolean;
43 |   custom_headers?: Record<string, string>;
44 |   provider_config?: Record<string, any>;
45 | }
46 | 
47 | export interface UpdateLLMConfigRequest {
48 |   name?: string;
49 |   provider?: ProviderType;
50 |   model_name?: string;
51 |   api_key?: string;
52 |   base_url?: string;
53 |   temperature?: number;
54 |   max_tokens?: number;
55 |   supports_vision?: boolean;
56 |   supports_reasoning?: boolean;
57 |   reasoning_extraction_method?: ReasoningExtractionMethod;
58 |   reasoning_extraction_config?: Record<string, any>;
59 |   is_active?: boolean;
60 |   custom_headers?: Record<string, string>;
61 |   provider_config?: Record<string, any>;
62 | }
63 | 
64 | export interface LLMConfigQueryParams {
65 |   page?: number;
66 |   per_page?: number;
67 |   provider?: ProviderType;
68 |   is_active?: boolean;
69 |   supports_vision?: boolean;
70 |   supports_reasoning?: boolean;
71 |   search?: string;
72 | }
73 | 
74 | export interface SetDefaultConfigRequest {
75 |   config_id: string;
76 | }
77 | 
78 | export interface TestConfigResponse {
79 |   latency: number;
80 |   status: string;
81 |   model_info?: {
82 |     model: string;
83 |     provider: string;
84 |     response_preview?: string;
85 |   };
86 |   test_time?: string;
87 |   error_detail?: string;
88 | }
89 | 
90 | export interface ModelProvider {
91 |   id: string;
92 |   name: string;
93 |   type: ProviderType;
94 |   icon: string;
95 |   baseUrl?: string;
96 |   models: string[];
97 | } 


--------------------------------------------------------------------------------
/backend/debug_llm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | """
 3 | 调试LLM配置问题的脚本
 4 | """
 5 | import os
 6 | import sys
 7 | sys.path.append(os.path.dirname(os.path.abspath(__file__)))
 8 | 
 9 | from app import create_app
10 | from app.models import LLMConfig, ProviderType
11 | from app.services.llm_conversion_service import LLMConversionService
12 | from app.db import db
13 | 
14 | def debug_llm_configs():
15 |     """调试LLM配置"""
16 |     app = create_app()
17 |     
18 |     with app.app_context():
19 |         print("=== 调试LLM配置 ===")
20 |         
21 |         # 1. 查看所有LLM配置
22 |         configs = LLMConfig.query.all()
23 |         print(f"\n找到 {len(configs)} 个LLM配置:")
24 |         
25 |         for config in configs:
26 |             print(f"\n配置ID: {config.id}")
27 |             print(f"名称: {config.name}")
28 |             print(f"提供商: {config.provider}")
29 |             print(f"模型: {config.model_name}")
30 |             print(f"API Key前缀: {config.api_key[:10]}... (长度: {len(config.api_key)})")
31 |             print(f"激活状态: {config.is_active}")
32 |             print(f"默认配置: {config.is_default}")
33 |             print(f"支持视觉: {config.supports_vision}")
34 |             
35 |             # 如果是Gemini配置，尝试创建客户端
36 |             if config.provider == ProviderType.GEMINI and config.is_active:
37 |                 print(f"\n--- 测试Gemini配置: {config.name} ---")
38 |                 try:
39 |                     llm_service = LLMConversionService()
40 |                     client = llm_service.get_llm_client(config)
41 |                     print(f"✅ Gemini客户端创建成功")
42 |                     
43 |                     # 尝试简单调用
44 |                     from langchain.schema import HumanMessage
45 |                     response = client.invoke([HumanMessage(content="Hi, please reply 'OK'")])
46 |                     print(f"✅ Gemini API调用成功: {response.content}")
47 |                     
48 |                 except Exception as e:
49 |                     print(f"❌ Gemini客户端创建/调用失败: {str(e)}")
50 |                     print(f"错误类型: {type(e).__name__}")
51 |                     
52 |                     # 检查是否是API Key问题
53 |                     if "API_KEY_INVALID" in str(e) or "api key" in str(e).lower():
54 |                         print("🔍 这是API Key问题！")
55 |                         
56 |                         # 检查API Key是否有特殊字符或格式问题
57 |                         api_key = config.api_key
58 |                         print(f"API Key字符分析:")
59 |                         print(f"  - 长度: {len(api_key)}")
60 |                         print(f"  - 前10个字符: {repr(api_key[:10])}")
61 |                         print(f"  - 后10个字符: {repr(api_key[-10:])}")
62 |                         print(f"  - 是否包含换行符: {'\\n' in api_key}")
63 |                         print(f"  - 是否包含制表符: {'\\t' in api_key}")
64 |                         print(f"  - 是否包含空格: {' ' in api_key}")
65 |                         print(f"  - 去除首尾空白后: {repr(api_key.strip())}")
66 | 
67 | if __name__ == "__main__":
68 |     debug_llm_configs() 


--------------------------------------------------------------------------------
/backend/alembic/env.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from logging.config import fileConfig
 4 | 
 5 | from sqlalchemy import engine_from_config
 6 | from sqlalchemy import pool
 7 | from dotenv import load_dotenv
 8 | 
 9 | from alembic import context
10 | 
11 | # 将 backend 目录添加到 Python 路径
12 | sys.path.insert(0, os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
13 | 
14 | # 加载环境变量
15 | backend_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))
16 | env_path = os.path.join(backend_dir, '.env')
17 | load_dotenv(env_path)
18 | 
19 | # this is the Alembic Config object, which provides
20 | # access to the values within the .ini file in use.
21 | config = context.config
22 | 
23 | # 从环境变量读取数据库URL，如果没有则使用alembic.ini中的配置
24 | database_url = os.getenv('DATABASE_URL')
25 | if database_url:
26 |     config.set_main_option('sqlalchemy.url', database_url)
27 | 
28 | # Interpret the config file for Python logging.
29 | # This line sets up loggers basically.
30 | if config.config_file_name is not None:
31 |     fileConfig(config.config_file_name)
32 | 
33 | # add your model's MetaData object here
34 | # for 'autogenerate' support
35 | from app.db import Base
36 | from app.models import *  # 确保所有模型都被导入
37 | target_metadata = Base.metadata
38 | 
39 | # other values from the config, defined by the needs of env.py,
40 | # can be acquired:
41 | # my_important_option = config.get_main_option("my_important_option")
42 | # ... etc.
43 | 
44 | 
45 | def run_migrations_offline() -> None:
46 |     """Run migrations in 'offline' mode.
47 | 
48 |     This configures the context with just a URL
49 |     and not an Engine, though an Engine is acceptable
50 |     here as well.  By skipping the Engine creation
51 |     we don't even need a DBAPI to be available.
52 | 
53 |     Calls to context.execute() here emit the given string to the
54 |     script output.
55 | 
56 |     """
57 |     url = config.get_main_option("sqlalchemy.url")
58 |     context.configure(
59 |         url=url,
60 |         target_metadata=target_metadata,
61 |         literal_binds=True,
62 |         dialect_opts={"paramstyle": "named"},
63 |     )
64 | 
65 |     with context.begin_transaction():
66 |         context.run_migrations()
67 | 
68 | 
69 | def run_migrations_online() -> None:
70 |     """Run migrations in 'online' mode.
71 | 
72 |     In this scenario we need to create an Engine
73 |     and associate a connection with the context.
74 | 
75 |     """
76 |     connectable = engine_from_config(
77 |         config.get_section(config.config_ini_section, {}),
78 |         prefix="sqlalchemy.",
79 |         poolclass=pool.NullPool,
80 |     )
81 | 
82 |     with connectable.connect() as connection:
83 |         context.configure(
84 |             connection=connection, target_metadata=target_metadata
85 |         )
86 | 
87 |         with context.begin_transaction():
88 |             context.run_migrations()
89 | 
90 | 
91 | if context.is_offline_mode():
92 |     run_migrations_offline()
93 | else:
94 |     run_migrations_online()
95 | 


--------------------------------------------------------------------------------
/frontend/src/hooks/useFileConversion.ts:
--------------------------------------------------------------------------------
 1 | import { useState, useCallback } from 'react';
 2 | import { apiClient } from '../lib/api-client';
 3 | import { ConversionConfig } from '../screens/RawData/LibraryDetails/components/ConvertToMarkdownDialog';
 4 | 
 5 | interface ConvertFilesRequest {
 6 |   file_ids: string[];
 7 |   conversion_config: ConversionConfig;
 8 | }
 9 | 
10 | interface ConversionJob {
11 |   id: string;
12 |   status: 'pending' | 'processing' | 'completed' | 'failed';
13 |   file_count: number;
14 |   completed_count: number;
15 |   failed_count: number;
16 |   created_at: string;
17 |   updated_at: string;
18 | }
19 | 
20 | export const useFileConversion = () => {
21 |   const [loading, setLoading] = useState(false);
22 |   const [error, setError] = useState<string | null>(null);
23 | 
24 |   const convertFiles = useCallback(async (
25 |     libraryId: string,
26 |     fileIds: string[],
27 |     config: ConversionConfig
28 |   ): Promise<ConversionJob | null> => {
29 |     try {
30 |       setLoading(true);
31 |       setError(null);
32 | 
33 |       const request: ConvertFilesRequest = {
34 |         file_ids: fileIds,
35 |         conversion_config: config,
36 |       };
37 | 
38 |       const response = await apiClient.post<{
39 |         success: boolean;
40 |         data: ConversionJob;
41 |       }>(`/api/v1/libraries/${libraryId}/files/convert-to-markdown`, request);
42 | 
43 |       if (response.success && response.data) {
44 |         return response.data;
45 |       } else {
46 |         throw new Error('转换请求失败');
47 |       }
48 |     } catch (err) {
49 |       const errorMessage = err instanceof Error ? err.message : '转换失败';
50 |       setError(errorMessage);
51 |       console.error('Failed to convert files:', err);
52 |       return null;
53 |     } finally {
54 |       setLoading(false);
55 |     }
56 |   }, []);
57 | 
58 |   const getConversionJob = useCallback(async (jobId: string): Promise<ConversionJob | null> => {
59 |     try {
60 |       const response = await apiClient.get<{
61 |         success: boolean;
62 |         data: ConversionJob;
63 |       }>(`/api/v1/conversion-jobs/${jobId}`);
64 | 
65 |       if (response.success && response.data) {
66 |         return response.data;
67 |       } else {
68 |         throw new Error('获取转换任务失败');
69 |       }
70 |     } catch (err) {
71 |       console.error('Failed to get conversion job:', err);
72 |       return null;
73 |     }
74 |   }, []);
75 | 
76 |   const cancelConversionJob = useCallback(async (jobId: string): Promise<boolean> => {
77 |     try {
78 |       const response = await apiClient.post<{
79 |         success: boolean;
80 |       }>(`/api/v1/conversion-jobs/${jobId}/cancel`);
81 | 
82 |       return response.success;
83 |     } catch (err) {
84 |       console.error('Failed to cancel conversion job:', err);
85 |       return false;
86 |     }
87 |   }, []);
88 | 
89 |   return {
90 |     convertFiles,
91 |     getConversionJob,
92 |     cancelConversionJob,
93 |     loading,
94 |     error,
95 |   };
96 | }; 


--------------------------------------------------------------------------------
/frontend/src/screens/Datasets/SmartDatasetCreator/index.tsx:
--------------------------------------------------------------------------------
 1 | import React, { useEffect } from 'react';
 2 | import { useTranslation } from 'react-i18next';
 3 | import { Link, useNavigate } from 'react-router-dom';
 4 | import { Button } from '../../../components/ui/button';
 5 | import { ArrowLeft, Wand2 } from 'lucide-react';
 6 | import { useSmartDatasetCreatorStore } from './store/useSmartDatasetCreatorStore';
 7 | import {
 8 |   StepIndicator,
 9 |   Step1DataSelection,
10 |   Step2DatasetConfig,
11 |   Step3ModelConfig,
12 |   Step4PreviewConfirm,
13 |   Step5Generation,
14 |   NavigationButtons,
15 |   ErrorMessage
16 | } from './components';
17 | 
18 | export const SmartDatasetCreator: React.FC = () => {
19 |   const { t } = useTranslation();
20 |   const navigate = useNavigate();
21 |   const { currentStep, progress, resetState } = useSmartDatasetCreatorStore();
22 | 
23 |   // 组件卸载时重置状态
24 |   useEffect(() => {
25 |     return () => {
26 |       if (progress !== 100) {
27 |         resetState();
28 |       }
29 |     };
30 |   }, [progress, resetState]);
31 | 
32 |   // 生成完成后跳转
33 |   useEffect(() => {
34 |     if (progress === 100) {
35 |       const timer = setTimeout(() => {
36 |         navigate('/datasets');
37 |       }, 2000);
38 |       return () => clearTimeout(timer);
39 |     }
40 |   }, [progress, navigate]);
41 | 
42 |   const renderStepContent = () => {
43 |     switch (currentStep) {
44 |       case 1:
45 |         return <Step1DataSelection />;
46 |       case 2:
47 |         return <Step2DatasetConfig />;
48 |       case 3:
49 |         return <Step3ModelConfig />;
50 |       case 4:
51 |         return <Step4PreviewConfirm />;
52 |       case 5:
53 |         return <Step5Generation />;
54 |       default:
55 |         return <Step1DataSelection />;
56 |     }
57 |   };
58 | 
59 |   return (
60 |     <div className="w-full max-w-[1000px] p-6">
61 |       {/* Back Button */}
62 |       <div className="mb-6">
63 |         <Link to="/datasets">
64 |           <Button variant="outline" className="border-[#d1dbe8] flex items-center gap-2">
65 |             <ArrowLeft className="w-4 h-4" />
66 |             {t('datasets.create.backToList')}
67 |           </Button>
68 |         </Link>
69 |       </div>
70 | 
71 |       {/* Header */}
72 |       <div className="mb-8">
73 |         <div className="flex items-center gap-3 mb-4">
74 |           <Wand2 className="w-8 h-8 text-[#1977e5]" />
75 |           <h1 className="text-2xl font-bold text-[#0c141c]">{t('smartDatasetCreator.title')}</h1>
76 |         </div>
77 |         <p className="text-[#4f7096] text-lg max-w-3xl">
78 |           {t('smartDatasetCreator.description')}
79 |         </p>
80 |       </div>
81 | 
82 |       {/* Error Message */}
83 |       <ErrorMessage />
84 | 
85 |       {/* Step Indicator */}
86 |       <StepIndicator />
87 | 
88 |       {/* Step Content */}
89 |       <div className="mb-8">
90 |         {renderStepContent()}
91 |       </div>
92 | 
93 |       {/* Navigation Buttons */}
94 |       <NavigationButtons />
95 |     </div>
96 |   );
97 | }; 


--------------------------------------------------------------------------------
/backend/app/models/project_data_source.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | from sqlalchemy import Column, String, Text, DateTime, Enum, JSON, ForeignKey, Boolean, Integer
 3 | from sqlalchemy.orm import relationship
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class DataSourceType(enum.Enum):
11 |     """数据源类型枚举"""
12 |     UPLOAD = "upload"
13 |     DATABASE = "database"
14 |     API = "api"
15 |     STORAGE = "storage"
16 |     URL = "url"
17 | 
18 | 
19 | class DataSourceStatus(enum.Enum):
20 |     """数据源状态枚举"""
21 |     CONNECTED = "connected"
22 |     DISCONNECTED = "disconnected"
23 |     ERROR = "error"
24 |     SYNCING = "syncing"
25 | 
26 | 
27 | class ProjectDataSource(db.Model):
28 |     """项目数据源配置模型"""
29 |     __tablename__ = 'project_data_sources'
30 |     
31 |     # 基础信息
32 |     id = Column(String(36), primary_key=True)
33 |     project_id = Column(String(36), ForeignKey('data_governance_projects.id'), nullable=False)
34 |     name = Column(String(255), nullable=False)
35 |     description = Column(Text)
36 |     
37 |     # 数据源配置
38 |     source_type = Column(Enum(DataSourceType), nullable=False)
39 |     status = Column(Enum(DataSourceStatus), default=DataSourceStatus.DISCONNECTED)
40 |     config = Column(JSON)  # 数据源特定配置
41 |     connection_string = Column(Text)  # 连接字符串（加密存储）
42 |     
43 |     # 同步信息
44 |     last_sync_at = Column(DateTime)
45 |     sync_frequency = Column(String(50))  # 同步频率 (hourly, daily, weekly, manual)
46 |     auto_sync_enabled = Column(Boolean, default=False)
47 |     
48 |     # 数据统计
49 |     file_count = Column(Integer, default=0)
50 |     total_size = Column(Integer, default=0)  # 总大小（字节）
51 |     
52 |     # 时间戳
53 |     created_at = Column(DateTime, default=datetime.utcnow)
54 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
55 |     
56 |     # 关系
57 |     project = relationship("DataGovernanceProject", back_populates="data_sources")
58 |     raw_data_items = relationship("RawData", foreign_keys="RawData.data_source_id", cascade="all, delete-orphan", overlaps="data_source")
59 |     
60 |     def __init__(self, **kwargs):
61 |         super().__init__(**kwargs)
62 |         if not self.id:
63 |             self.id = str(uuid.uuid4())
64 |     
65 |     def to_dict(self):
66 |         return {
67 |             'id': self.id,
68 |             'project_id': self.project_id,
69 |             'name': self.name,
70 |             'description': self.description,
71 |             'source_type': self.source_type.value if self.source_type else None,
72 |             'status': self.status.value if self.status else None,
73 |             'config': self.config,
74 |             'last_sync_at': self.last_sync_at.isoformat() if self.last_sync_at else None,
75 |             'sync_frequency': self.sync_frequency,
76 |             'auto_sync_enabled': self.auto_sync_enabled,
77 |             'file_count': self.file_count,
78 |             'total_size': self.total_size,
79 |             'created_at': self.created_at.isoformat() if self.created_at else None,
80 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None,
81 |         } 


--------------------------------------------------------------------------------
/frontend/src/i18n/locales/zh/datasets.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "title": "数据集",
 3 |   "totalDatasets": "{{count}} 个数据集",
 4 |   "smartCreate": "智能创建",
 5 |   "createDataset": "创建数据集",
 6 |   "searchPlaceholder": "搜索数据集名称或描述...",
 7 |   "taskType": "任务类型",
 8 |   "allTaskTypes": "全部",
 9 |   "nlp": "自然语言处理",
10 |   "qa": "问答系统",
11 |   "textClassification": "文本分类",
12 |   "loading": "加载中...",
13 |   "error": "错误",
14 |   "retry": "重试",
15 |   "noDatasets": "没有找到数据集",
16 |   "recommended": "推荐",
17 |   "detail": {
18 |     "backToList": "返回列表",
19 |     "dataPreview": "数据预览",
20 |     "versionManagement": "版本管理",
21 |     "detailInfo": "详细信息",
22 |     "settings": "设置",
23 |     "datasetId": "数据集ID",
24 |     "name": "名称",
25 |     "status": "状态",
26 |     "published": "已发布",
27 |     "createdAt": "创建时间"
28 |   },
29 |   "create": {
30 |     "title": "创建数据集",
31 |     "description": "数据集描述",
32 |     "backToList": "返回数据集列表",
33 |     "subtitle": "选择数据集的创建方式，并录入相关信息。",
34 |     "creationFailed": "创建失败",
35 |     "selectMethod": "选择创建方式",
36 |     "createEmpty": "创建空数据集",
37 |     "createEmptyDesc": "创建一个新的空数据集目录，后续可以上传和管理数据文件。",
38 |     "importHuggingface": "从 Hugging Face 导入",
39 |     "importHuggingfaceDesc": "从 Hugging Face Hub 导入现有的数据集，支持 URL 或数据集路径。",
40 |     "importModelscope": "从 ModelScope 导入",
41 |     "importModelscopeDesc": "从魔搭社区导入面向中文优化的数据集，专注于中文AI应用场景。",
42 |     "importInfo": "导入信息",
43 |     "datasetUrl": "数据集URL或路径",
44 |     "required": "必填",
45 |     "urlPlaceholder": "例如: squad 或 https://huggingface.co/datasets/squad",
46 |     "modelscopeUrlPlaceholder": "例如: open-r1/Mixture-of-Thoughts 或完整URL",
47 |     "urlHint": "支持数据集名称 (例如 open-r1/Mixture-of-Thoughts) 或完整URL",
48 |     "modelscopeUrlHint": "支持路径格式 (例如 open-r1/Mixture-of-Thoughts) 或完整URL，系统将自动获取数据集信息",
49 |     "visitHuggingface": "访问 Hugging Face",
50 |     "visitModelscope": "访问魔搭社区",
51 |     "basicInfo": "基本信息",
52 |     "datasetName": "数据集名称",
53 |     "datasetNamePlaceholder": "输入数据集名称...",
54 |     "owner": "所有者",
55 |     "ownerPlaceholder": "输入所有者名称或组织名称...",
56 |     "ownerHint": "所有者和数据集名称的组合必须唯一",
57 |     "license": "许可证",
58 |     "taskType": "任务类型",
59 |     "tags": "标签",
60 |     "tagsPlaceholder": "使用逗号分隔标签, 例如 nlp, japanese, qa...",
61 |     "descriptionPlaceholder": "描述数据集的内容、用途和特点...",
62 |     "createExplanation": "创建说明",
63 |     "importExplanation": "导入说明",
64 |     "emptyDatasetSteps": [
65 |       "• 系统将创建一个新的空数据集目录",
66 |       "• 您可以稍后从文件管理界面上传数据文件",
67 |       "• 支持多种格式: JSON, CSV, Parquet, TXT等"
68 |     ],
69 |     "huggingfaceSteps": [
70 |       "• 系统将从Hugging Face Hub下载指定的数据集",
71 |       "• 数据集信息（名称、描述等）将自动从源获取",
72 |       "• 导入过程可能需要几分钟，具体取决于数据集的大小"
73 |     ],
74 |     "modelscopeSteps": [
75 |       "• 系统将从魔搭社区下载指定的数据集",
76 |       "• 数据集信息（名称、描述等）将自动从源获取",
77 |       "• 导入过程可能需要几分钟，具体取决于数据集的大小"
78 |     ],
79 |     "reselect": "重新选择",
80 |     "creating": "创建中...",
81 |     "importing": "导入中...",
82 |     "success": {
83 |       "title": "数据集创建成功！",
84 |       "redirecting": "正在重定向到数据集列表...",
85 |       "importStarted": "导入任务已开始！",
86 |       "importMessage": "正在从 {{source}} 导入数据集。您可以在任务管理页面查看进度。",
87 |       "viewTaskProgress": "查看任务进度",
88 |       "backToDatasetList": "返回数据集列表"
89 |     }
90 |   }
91 | }


--------------------------------------------------------------------------------
/backend/app/api/v1/schemas/library_schemas.py:
--------------------------------------------------------------------------------
 1 | from marshmallow import Schema, fields, validate, validates_schema, ValidationError
 2 | import re
 3 | 
 4 | def validate_library_name(name):
 5 |     """验证文件库名称"""
 6 |     if not name or not name.strip():
 7 |         raise ValidationError('文件库名称不能为空')
 8 |     if len(name.strip()) > 255:
 9 |         raise ValidationError('文件库名称不能超过255个字符')
10 |     return name.strip()
11 | 
12 | class LibraryCreateSchema(Schema):
13 |     """创建文件库的验证模式"""
14 |     name = fields.Str(required=True, validate=[validate.Length(min=1, max=255), validate_library_name])
15 |     description = fields.Str(missing=None, validate=validate.Length(max=1000))
16 |     data_type = fields.Str(
17 |         required=True, 
18 |         validate=validate.OneOf(['training', 'evaluation', 'mixed'])
19 |     )
20 |     tags = fields.List(fields.Str(), missing=[])
21 | 
22 | class LibraryUpdateSchema(Schema):
23 |     """更新文件库的验证模式"""
24 |     name = fields.Str(validate=validate.Length(min=1, max=255))
25 |     description = fields.Str(validate=validate.Length(max=1000), allow_none=True)
26 |     data_type = fields.Str(validate=validate.OneOf(['training', 'evaluation', 'mixed']))
27 |     tags = fields.List(fields.Str())
28 | 
29 | class LibraryQuerySchema(Schema):
30 |     """文件库查询参数验证模式"""
31 |     page = fields.Int(missing=1, validate=validate.Range(min=1))
32 |     per_page = fields.Int(missing=20, validate=validate.Range(min=1, max=100))
33 |     name = fields.Str(missing=None)
34 |     data_type = fields.Str(missing=None, validate=validate.OneOf(['training', 'evaluation', 'mixed']))
35 |     tags = fields.List(fields.Str(), missing=[])
36 |     sort_by = fields.Str(missing='created_at', validate=validate.OneOf([
37 |         'created_at', 'updated_at', 'name', 'file_count', 'total_size'
38 |     ]))
39 |     sort_order = fields.Str(missing='desc', validate=validate.OneOf(['asc', 'desc']))
40 | 
41 | class LibraryFileUploadSchema(Schema):
42 |     """文件上传验证模式"""
43 |     files = fields.List(fields.Raw(), required=True)
44 | 
45 | class LibraryFileQuerySchema(Schema):
46 |     """文件查询参数验证模式"""
47 |     page = fields.Int(missing=1, validate=validate.Range(min=1))
48 |     per_page = fields.Int(missing=20, validate=validate.Range(min=1, max=100))
49 |     filename = fields.Str(missing=None)
50 |     file_type = fields.Str(missing=None)
51 |     process_status = fields.Str(missing=None, validate=validate.OneOf([
52 |         'pending', 'processing', 'completed', 'failed'
53 |     ]))
54 |     sort_by = fields.Str(missing='uploaded_at', validate=validate.OneOf([
55 |         'uploaded_at', 'filename', 'file_size', 'process_status'
56 |     ]))
57 |     sort_order = fields.Str(missing='desc', validate=validate.OneOf(['asc', 'desc']))
58 | 
59 | class LibraryFileUpdateSchema(Schema):
60 |     """文件更新验证模式"""
61 |     filename = fields.Str(validate=validate.Length(min=1, max=255))
62 |     original_filename = fields.Str(validate=validate.Length(min=1, max=255))
63 | 
64 | class LibraryStatisticsSchema(Schema):
65 |     """统计信息返回模式"""
66 |     total_libraries = fields.Int()
67 |     total_files = fields.Int()
68 |     total_processed = fields.Int()
69 |     total_size = fields.Str()
70 |     conversion_rate = fields.Float() 


--------------------------------------------------------------------------------
/backend/app/models/organization.py:
--------------------------------------------------------------------------------
 1 | from sqlalchemy import Column, String, Integer, Text, DateTime, Enum as SQLEnum, ForeignKey
 2 | from sqlalchemy.orm import relationship
 3 | from datetime import datetime
 4 | import enum
 5 | import uuid
 6 | 
 7 | from app.db import db
 8 | 
 9 | 
10 | class OrganizationStatus(enum.Enum):
11 |     ACTIVE = "ACTIVE"
12 |     INACTIVE = "INACTIVE"
13 | 
14 | 
15 | class Organization(db.Model):
16 |     __tablename__ = 'organizations'
17 |     
18 |     id = Column(String(36), primary_key=True, default=lambda: str(uuid.uuid4()))
19 |     name = Column(String(255), nullable=False)
20 |     code = Column(String(100), unique=True, index=True)
21 |     description = Column(Text)
22 |     parent_id = Column(String(36), ForeignKey('organizations.id'), index=True)
23 |     path = Column(String(1000), index=True)  # 层级路径，如 /root/dept1/team1
24 |     level = Column(Integer, default=1, index=True)
25 |     sort_order = Column(Integer, default=0)
26 |     status = Column(SQLEnum(OrganizationStatus, native_enum=True), default=OrganizationStatus.ACTIVE)
27 |     created_at = Column(DateTime, default=datetime.utcnow)
28 |     updated_at = Column(DateTime, default=datetime.utcnow, onupdate=datetime.utcnow)
29 |     created_by = Column(String(36))
30 |     updated_by = Column(String(36))
31 |     
32 |     # Self-referential relationship
33 |     parent = relationship("Organization", remote_side="Organization.id", back_populates="children")
34 |     children = relationship("Organization", back_populates="parent", cascade="all, delete-orphan")
35 |     
36 |     # Relationships
37 |     user_organizations = relationship("UserOrganization", back_populates="organization", cascade="all, delete-orphan")
38 |     user_roles = relationship("UserRole", back_populates="organization")
39 |     
40 |     def to_dict(self, include_children=False):
41 |         result = {
42 |             'id': self.id,
43 |             'name': self.name,
44 |             'code': self.code,
45 |             'description': self.description,
46 |             'parent_id': self.parent_id,
47 |             'path': self.path,
48 |             'level': self.level,
49 |             'sort_order': self.sort_order,
50 |             'status': self.status.value,
51 |             'created_at': self.created_at.isoformat() if self.created_at else None,
52 |             'updated_at': self.updated_at.isoformat() if self.updated_at else None
53 |         }
54 |         
55 |         if include_children:
56 |             result['children'] = [child.to_dict() for child in self.children]
57 |         
58 |         return result
59 |     
60 |     def get_ancestors(self):
61 |         """获取所有上级组织"""
62 |         ancestors = []
63 |         current = self.parent
64 |         while current:
65 |             ancestors.append(current)
66 |             current = current.parent
67 |         return ancestors
68 |     
69 |     def get_descendants(self):
70 |         """获取所有下级组织"""
71 |         descendants = []
72 |         for child in self.children:
73 |             descendants.append(child)
74 |             descendants.extend(child.get_descendants())
75 |         return descendants
76 |     
77 |     def __repr__(self):
78 |         return f'<Organization {self.name}>'


--------------------------------------------------------------------------------