├── .env.example ├── .github └── workflows │ └── publish.yml ├── .gitignore ├── LICENSE ├── README.md ├── README_PYPI.md ├── README_en.md ├── app ├── __init__.py ├── api │ ├── __init__.py │ └── dataset.py ├── core │ ├── __init__.py │ ├── config.py │ ├── dataset.py │ ├── document.py │ ├── llm.py │ ├── logger.py │ ├── prompt.py │ └── storage.py ├── main.py ├── services │ ├── __init__.py │ └── dataset_service.py └── utils │ └── __init__.py ├── data ├── dataflow.png ├── flow.png └── test_llm.png ├── docs ├── custom_data_conversion.md └── knowledge_distillation.md ├── fastdatasets.png ├── fastdatasets ├── __init__.py ├── api.py └── cli.py ├── pyproject.toml ├── requirements.txt ├── response.json ├── scripts ├── dataset_generator.py ├── distill_dataset.py ├── sample_high_quality.py ├── test_document.py └── test_llm.py ├── setup.py ├── space ├── README.md ├── app.py ├── requirements.txt └── samples │ ├── mini.txt │ └── precomputed │ └── dataset-alpaca.json ├── tests ├── AttentionIsAllYouNeed.pdf ├── 1706.03762v7.pdf ├── BERT:Pre-training of Deep Bidirectional Transformers for Language Understanding.pdf ├── __init__.py ├── test.txt └── test_dataset.py ├── web.png └── web ├── README.md ├── __init__.py ├── config.py ├── demo.py ├── requirements.txt ├── results.json ├── run.py ├── status_manager.py ├── tasks.json ├── tasks_status.json ├── utils.py └── web_app.py /.env.example: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/.env.example -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/.github/workflows/publish.yml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/README.md -------------------------------------------------------------------------------- /README_PYPI.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/README_PYPI.md -------------------------------------------------------------------------------- /README_en.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/README_en.md -------------------------------------------------------------------------------- /app/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/__init__.py -------------------------------------------------------------------------------- /app/api/__init__.py: -------------------------------------------------------------------------------- 1 | # API路由模块 2 | from .dataset import router -------------------------------------------------------------------------------- /app/api/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/api/dataset.py -------------------------------------------------------------------------------- /app/core/__init__.py: -------------------------------------------------------------------------------- 1 | # FastDatasets 核心模块 -------------------------------------------------------------------------------- /app/core/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/config.py -------------------------------------------------------------------------------- /app/core/dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/dataset.py -------------------------------------------------------------------------------- /app/core/document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/document.py -------------------------------------------------------------------------------- /app/core/llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/llm.py -------------------------------------------------------------------------------- /app/core/logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/logger.py -------------------------------------------------------------------------------- /app/core/prompt.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/prompt.py -------------------------------------------------------------------------------- /app/core/storage.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/core/storage.py -------------------------------------------------------------------------------- /app/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/main.py -------------------------------------------------------------------------------- /app/services/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /app/services/dataset_service.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/app/services/dataset_service.py -------------------------------------------------------------------------------- /app/utils/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /data/dataflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/data/dataflow.png -------------------------------------------------------------------------------- /data/flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/data/flow.png -------------------------------------------------------------------------------- /data/test_llm.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/data/test_llm.png -------------------------------------------------------------------------------- /docs/custom_data_conversion.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/docs/custom_data_conversion.md -------------------------------------------------------------------------------- /docs/knowledge_distillation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/docs/knowledge_distillation.md -------------------------------------------------------------------------------- /fastdatasets.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/fastdatasets.png -------------------------------------------------------------------------------- /fastdatasets/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/fastdatasets/__init__.py -------------------------------------------------------------------------------- /fastdatasets/api.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/fastdatasets/api.py -------------------------------------------------------------------------------- /fastdatasets/cli.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/fastdatasets/cli.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/requirements.txt -------------------------------------------------------------------------------- /response.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/response.json -------------------------------------------------------------------------------- /scripts/dataset_generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/scripts/dataset_generator.py -------------------------------------------------------------------------------- /scripts/distill_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/scripts/distill_dataset.py -------------------------------------------------------------------------------- /scripts/sample_high_quality.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/scripts/sample_high_quality.py -------------------------------------------------------------------------------- /scripts/test_document.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/scripts/test_document.py -------------------------------------------------------------------------------- /scripts/test_llm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/scripts/test_llm.py -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/setup.py -------------------------------------------------------------------------------- /space/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/space/README.md -------------------------------------------------------------------------------- /space/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/space/app.py -------------------------------------------------------------------------------- /space/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/space/requirements.txt -------------------------------------------------------------------------------- /space/samples/mini.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/space/samples/mini.txt -------------------------------------------------------------------------------- /space/samples/precomputed/dataset-alpaca.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/space/samples/precomputed/dataset-alpaca.json -------------------------------------------------------------------------------- /tests/ AttentionIsAllYouNeed.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/tests/ AttentionIsAllYouNeed.pdf -------------------------------------------------------------------------------- /tests/1706.03762v7.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/tests/1706.03762v7.pdf -------------------------------------------------------------------------------- /tests/BERT:Pre-training of Deep Bidirectional Transformers for Language Understanding.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/tests/BERT:Pre-training of Deep Bidirectional Transformers for Language Understanding.pdf -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/test.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/tests/test.txt -------------------------------------------------------------------------------- /tests/test_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/tests/test_dataset.py -------------------------------------------------------------------------------- /web.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web.png -------------------------------------------------------------------------------- /web/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/README.md -------------------------------------------------------------------------------- /web/__init__.py: -------------------------------------------------------------------------------- 1 | # Web界面模块 -------------------------------------------------------------------------------- /web/config.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/config.py -------------------------------------------------------------------------------- /web/demo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/demo.py -------------------------------------------------------------------------------- /web/requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/requirements.txt -------------------------------------------------------------------------------- /web/results.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/results.json -------------------------------------------------------------------------------- /web/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/run.py -------------------------------------------------------------------------------- /web/status_manager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/status_manager.py -------------------------------------------------------------------------------- /web/tasks.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/tasks.json -------------------------------------------------------------------------------- /web/tasks_status.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/tasks_status.json -------------------------------------------------------------------------------- /web/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/utils.py -------------------------------------------------------------------------------- /web/web_app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ZhuLinsen/FastDatasets/HEAD/web/web_app.py --------------------------------------------------------------------------------