├── .dockerignore ├── .env.local.template ├── .gitattributes ├── .gitignore ├── LICENSE ├── README.md ├── app.py ├── assets ├── argilla.png ├── flow.png ├── logo.png ├── logo.svg ├── ui-full.png └── ui.png ├── docker-compose.yml ├── docker ├── .env.docker.template ├── Dockerfile ├── README.md ├── argilla │ └── compose.yml └── ollama │ ├── compose.yml │ └── entrypoint.sh ├── examples ├── argilla-deployment.py ├── blog_private_synthetic_data_generation.md ├── fine-tune-deepseek-reasoning-sft.ipynb ├── fine-tune-modernbert-classifier.ipynb ├── fine-tune-modernbert-rag.ipynb ├── fine-tune-smollm2-on-synthetic-data.ipynb ├── hf-dedicated-or-tgi-deployment.py ├── hf-serverless-deployment-deepseek.py ├── hf-serverless-deployment.py ├── hf-serverless-different-model-for-completion.py ├── ollama-deployment.py ├── ollama-different-model-for-completion.py ├── openai-deployment.py └── vllm-deployment.py ├── packages.txt ├── pdm.lock ├── pyproject.toml ├── requirements.txt ├── src └── synthetic_dataset_generator │ ├── __init__.py │ ├── __main__.py │ ├── _distiset.py │ ├── _inference_endpoints.py │ ├── _tabbedinterface.py │ ├── app.py │ ├── apps │ ├── __init__.py │ ├── about.py │ ├── base.py │ ├── chat.py │ ├── eval.py │ ├── rag.py │ └── textcat.py │ ├── constants.py │ ├── pipelines │ ├── __init__.py │ ├── base.py │ ├── chat.py │ ├── embeddings.py │ ├── eval.py │ ├── rag.py │ └── textcat.py │ └── utils.py └── tests └── __init__.py /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/.dockerignore -------------------------------------------------------------------------------- /.env.local.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/.env.local.template -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/.gitattributes -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/README.md -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/app.py -------------------------------------------------------------------------------- /assets/argilla.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/assets/argilla.png -------------------------------------------------------------------------------- /assets/flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/assets/flow.png -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/assets/logo.png -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/assets/logo.svg -------------------------------------------------------------------------------- /assets/ui-full.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/assets/ui-full.png -------------------------------------------------------------------------------- /assets/ui.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/assets/ui.png -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /docker/.env.docker.template: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker/.env.docker.template -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker/Dockerfile -------------------------------------------------------------------------------- /docker/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker/README.md -------------------------------------------------------------------------------- /docker/argilla/compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker/argilla/compose.yml -------------------------------------------------------------------------------- /docker/ollama/compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker/ollama/compose.yml -------------------------------------------------------------------------------- /docker/ollama/entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/docker/ollama/entrypoint.sh -------------------------------------------------------------------------------- /examples/argilla-deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/argilla-deployment.py -------------------------------------------------------------------------------- /examples/blog_private_synthetic_data_generation.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/blog_private_synthetic_data_generation.md -------------------------------------------------------------------------------- /examples/fine-tune-deepseek-reasoning-sft.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/fine-tune-deepseek-reasoning-sft.ipynb -------------------------------------------------------------------------------- /examples/fine-tune-modernbert-classifier.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/fine-tune-modernbert-classifier.ipynb -------------------------------------------------------------------------------- /examples/fine-tune-modernbert-rag.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/fine-tune-modernbert-rag.ipynb -------------------------------------------------------------------------------- /examples/fine-tune-smollm2-on-synthetic-data.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/fine-tune-smollm2-on-synthetic-data.ipynb -------------------------------------------------------------------------------- /examples/hf-dedicated-or-tgi-deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/hf-dedicated-or-tgi-deployment.py -------------------------------------------------------------------------------- /examples/hf-serverless-deployment-deepseek.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/hf-serverless-deployment-deepseek.py -------------------------------------------------------------------------------- /examples/hf-serverless-deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/hf-serverless-deployment.py -------------------------------------------------------------------------------- /examples/hf-serverless-different-model-for-completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/hf-serverless-different-model-for-completion.py -------------------------------------------------------------------------------- /examples/ollama-deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/ollama-deployment.py -------------------------------------------------------------------------------- /examples/ollama-different-model-for-completion.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/ollama-different-model-for-completion.py -------------------------------------------------------------------------------- /examples/openai-deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/openai-deployment.py -------------------------------------------------------------------------------- /examples/vllm-deployment.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/examples/vllm-deployment.py -------------------------------------------------------------------------------- /packages.txt: -------------------------------------------------------------------------------- 1 | poppler-utils 2 | tesseract-ocr -------------------------------------------------------------------------------- /pdm.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/pdm.lock -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/pyproject.toml -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/requirements.txt -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/__init__.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/__main__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/__main__.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/_distiset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/_distiset.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/_inference_endpoints.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/_inference_endpoints.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/_tabbedinterface.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/_tabbedinterface.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/app.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/about.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/apps/about.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/apps/base.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/apps/chat.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/apps/eval.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/rag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/apps/rag.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/apps/textcat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/apps/textcat.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/constants.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/pipelines/base.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/chat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/pipelines/chat.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/embeddings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/pipelines/embeddings.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/pipelines/eval.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/rag.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/pipelines/rag.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/pipelines/textcat.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/pipelines/textcat.py -------------------------------------------------------------------------------- /src/synthetic_dataset_generator/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argilla-io/synthetic-data-generator/HEAD/src/synthetic_dataset_generator/utils.py -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | --------------------------------------------------------------------------------