├── .github ├── ISSUE_TEMPLATE │ ├── bug_report.md │ ├── config.yml │ └── feature_request.md ├── PULL_REQUEST_TEMPLATE.md └── workflows │ ├── build-push-codespace.yml │ ├── code-formatting.yml │ ├── markdown-link-check.yml │ ├── markdown_check_config.json │ ├── pull_request.yml │ └── readme-projects-check.yml ├── .gitignore ├── .gitmodules ├── .typos.toml ├── CODE-OF-CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── _assets ├── community_meetup.png ├── demo.gif ├── zenfiles.png └── zenml_project.gif ├── bank_subscription_prediction ├── .dockerignore ├── Dockerfile.codespace ├── README.md ├── __init__.py ├── assets │ ├── eval_vis.png │ └── training_dag.png ├── configs │ ├── baseline.yaml │ ├── deeper_trees.yaml │ └── more_trees.yaml ├── data │ └── bank.csv ├── pipelines │ ├── __init__.py │ └── training_pipeline.py ├── predict_bank_cd_subs_by_xgboost_clf_for_imbalance_dataset.ipynb ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── data_cleaner.py │ ├── data_loader.py │ ├── data_preprocessor.py │ ├── data_splitter.py │ ├── model_evaluator.py │ └── model_trainer.py └── utils │ ├── __init__.py │ └── model_utils.py ├── credit-scorer ├── .gitignore ├── README.md ├── assets │ ├── compliance-dashboard.png │ ├── deployment-dag.png │ ├── e2e.png │ ├── feature-engineering-dag.png │ ├── modal-deployment.png │ ├── streamlit-app.png │ └── training-dag.png ├── docs │ ├── guides │ │ ├── cloud_deployment.md │ │ ├── compliance_data_sources.md │ │ ├── interdependencies.md │ │ ├── pipeline_to_articles.md │ │ └── zenml_eu_act_features.md │ ├── pipeline_logs │ │ └── log_config.json │ ├── releases │ │ └── cf52cd4e-5bc2-45e9-b24d-ad5f2902829f │ │ │ ├── README.md │ │ │ ├── annex_iv.md │ │ │ ├── approval_record.json │ │ │ ├── compliance_dashboard.html │ │ │ ├── eval_visualization.html │ │ │ ├── evaluation_results.yaml │ │ │ ├── git_info.md │ │ │ ├── log_metadata.json │ │ │ ├── model_card.md │ │ │ ├── monitoring_plan.json │ │ │ ├── risk_scores.yaml │ │ │ ├── sbom.json │ │ │ └── whylogs_profile.html │ ├── risk │ │ ├── incident_log.json │ │ └── risk_register.xlsx │ └── templates │ │ ├── annex_iv_template.j2 │ │ ├── declaration_of_conformity.md │ │ ├── qms │ │ ├── audit_plan.md │ │ ├── qms_template.md │ │ ├── roles_and_responsibilities.md │ │ └── sops │ │ │ ├── data_ingestion_sop.md │ │ │ ├── drift_monitoring_sop.md │ │ │ ├── incident_response_sop.md │ │ │ ├── model_release_sop.md │ │ │ └── risk_mitigation_sop.md │ │ └── sample_inputs.json ├── modal_app │ ├── __init__.py │ ├── api_guide.md │ ├── modal_deployment.py │ └── schemas.py ├── requirements.txt ├── run.py ├── run_dashboard.py ├── scripts │ ├── collect_log_metadata.py │ └── test_compliance_tracker.py ├── src │ ├── __init__.py │ ├── configs │ │ ├── deployment.yaml │ │ ├── feature_engineering.yaml │ │ └── training.yaml │ ├── constants │ │ ├── __init__.py │ │ ├── annotations.py │ │ ├── config.py │ │ └── risk.py │ ├── data │ │ └── credit_scoring.csv │ ├── pipelines │ │ ├── __init__.py │ │ ├── deployment.py │ │ ├── feature_engineering.py │ │ └── training.py │ ├── steps │ │ ├── __init__.py │ │ ├── deployment │ │ │ ├── __init__.py │ │ │ ├── approve.py │ │ │ ├── deploy.py │ │ │ ├── generate_dashboard.py │ │ │ ├── generate_sbom.py │ │ │ ├── post_market_monitoring.py │ │ │ └── post_run_annex.py │ │ ├── feature_engineering │ │ │ ├── __init__.py │ │ │ ├── data_preprocessor.py │ │ │ ├── data_profiler.py │ │ │ ├── data_splitter.py │ │ │ └── ingest.py │ │ └── training │ │ │ ├── __init__.py │ │ │ ├── evaluate.py │ │ │ ├── risk_assessment.py │ │ │ └── train.py │ └── utils │ │ ├── __init__.py │ │ ├── compliance │ │ ├── __init__.py │ │ ├── annex_iv.py │ │ ├── compliance_articles.yaml │ │ ├── compliance_calculator.py │ │ ├── compliance_constants.py │ │ ├── data_loader.py │ │ ├── exceptions.py │ │ ├── orchestrator.py │ │ ├── schemas.py │ │ └── template.py │ │ ├── eval.py │ │ ├── preprocess.py │ │ ├── storage.py │ │ └── visualizations │ │ ├── __init__.py │ │ ├── dashboard.py │ │ ├── eval.py │ │ └── whylogs.py └── streamlit_app │ ├── __init__.py │ ├── components │ ├── __init__.py │ ├── api_dashboard.py │ ├── data_profile.py │ ├── documentation.py │ ├── executive_summary.py │ ├── header.py │ └── risks.py │ ├── config.py │ ├── data │ ├── __init__.py │ ├── compliance_utils.py │ ├── loader.py │ ├── processor.py │ └── validator.py │ ├── main.py │ ├── styling.py │ └── utils │ ├── __init__.py │ └── export.py ├── databricks-production-qa-demo ├── .assets │ └── 00_pipelines_composition.png ├── .copier-answers.yml ├── .dockerignore ├── LICENSE ├── Makefile ├── README.md ├── configs │ ├── deployer_config.yaml │ ├── inference_config.yaml │ └── train_config.yaml ├── pipelines │ ├── __init__.py │ ├── batch_inference.py │ ├── deployment.py │ └── training.py ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── alerts │ │ ├── __init__.py │ │ └── notify_on.py │ ├── data_quality │ │ ├── __init__.py │ │ └── drift_quality_gate.py │ ├── deployment │ │ ├── __init__.py │ │ └── deployment_deploy.py │ ├── etl │ │ ├── __init__.py │ │ ├── data_loader.py │ │ ├── inference_data_preprocessor.py │ │ ├── train_data_preprocessor.py │ │ └── train_data_splitter.py │ ├── explainability │ │ ├── __init__.py │ │ ├── shap_explainer.py │ │ └── shap_visualization.py │ ├── hp_tuning │ │ ├── __init__.py │ │ ├── hp_tuning_select_best_model.py │ │ └── hp_tuning_single_search.py │ ├── inference │ │ ├── __init__.py │ │ └── inference_predict.py │ ├── promotion │ │ ├── __init__.py │ │ ├── compute_performance_metrics_on_current_data.py │ │ └── promote_with_metric_compare.py │ └── training │ │ ├── __init__.py │ │ ├── model_evaluator.py │ │ └── model_trainer.py └── utils │ ├── __init__.py │ ├── get_model_from_config.py │ ├── preprocess.py │ └── promote_in_model_registry.py ├── deep_research ├── README.md ├── __init__.py ├── assets │ ├── pipeline_visualization.png │ ├── sample_report.gif │ └── styles.css ├── configs │ ├── balanced_research.yaml │ ├── deep_research.yaml │ ├── enhanced_research.yaml │ ├── enhanced_research_with_approval.yaml │ ├── quick_research.yaml │ └── rapid_research.yaml ├── logging_config.py ├── materializers │ ├── __init__.py │ ├── analysis_data_materializer.py │ ├── approval_decision_materializer.py │ ├── final_report_materializer.py │ ├── mcp_result_materializer.py │ ├── prompt_materializer.py │ ├── query_context_materializer.py │ ├── search_data_materializer.py │ ├── synthesis_data_materializer.py │ └── tracing_metadata_materializer.py ├── pipelines │ ├── __init__.py │ └── parallel_research_pipeline.py ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── approval_step.py │ ├── collect_tracing_metadata_step.py │ ├── cross_viewpoint_step.py │ ├── execute_approved_searches_step.py │ ├── generate_reflection_step.py │ ├── initialize_prompts_step.py │ ├── mcp_step.py │ ├── merge_results_step.py │ ├── process_sub_question_step.py │ ├── pydantic_final_report_step.py │ └── query_decomposition_step.py ├── tests │ ├── __init__.py │ ├── conftest.py │ ├── test_approval_utils.py │ ├── test_artifact_models.py │ ├── test_prompt_models.py │ ├── test_pydantic_final_report_step.py │ └── test_pydantic_models.py └── utils │ ├── __init__.py │ ├── approval_utils.py │ ├── config_utils.py │ ├── css_utils.py │ ├── llm_utils.py │ ├── prompt_models.py │ ├── prompts.py │ ├── pydantic_models.py │ ├── search_utils.py │ └── tracing_metadata_utils.py ├── end-to-end-computer-vision ├── .dockerignore ├── .gitignore ├── README.md ├── _assets │ ├── diagram.png │ ├── labeling_interface.png │ ├── labeling_setup.png │ └── project_creation_label_studio.png ├── bus.jpg ├── configs │ ├── data_export.yaml │ ├── inference_pipeline.yaml │ ├── ingest_data.yaml │ ├── training_pipeline.yaml │ ├── training_pipeline_remote_eks_example.yaml │ └── training_pipeline_remote_gpu.yaml ├── data │ ├── .gitignore │ └── README.md ├── ls_export.py ├── materializers │ ├── __init__.py │ ├── label_studio_export_materializer.py │ ├── label_studio_yolo_dataset_materializer.py │ ├── ultralytics_materializer.py │ └── yolo_materializer.py ├── notebooks │ ├── fiftyone_prelabelling.ipynb │ ├── huggingface.ipynb │ ├── label_studio_annotation.ipynb │ ├── scratchpad.ipynb │ └── ultralytics.ipynb ├── pipelines │ ├── __init__.py │ ├── data_export.py │ ├── data_ingestion.py │ ├── inference.py │ └── training.py ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── export_label_studio.py │ ├── fiftyone_inference.py │ ├── load_model.py │ ├── predict_image.py │ ├── process_hf_dataset.py │ ├── promote_model.py │ ├── train_model.py │ └── upload_to_label_studio.py └── utils │ ├── __init__.py │ ├── constants.py │ ├── dataset_utils.py │ └── split_data.py ├── eurorate-predictor ├── .assets │ └── zenml_airflow_vertex_gcp_mlops.png ├── .dockerignore ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── configs │ ├── etl_develop.yaml │ ├── etl_production.yaml │ ├── feature_engineering_develop.yaml │ ├── feature_engineering_production.yaml │ ├── training_develop.yaml │ └── training_production.yaml ├── data │ └── raw_data.csv ├── demo.py ├── materializers │ ├── __init__.py │ ├── bq_dataset.py │ ├── bq_dataset_materializer.py │ ├── csv_dataset.py │ ├── csv_dataset_materializer.py │ └── dataset.py ├── pipelines │ ├── __init__.py │ ├── etl.py │ ├── feature_engineering.py │ └── training.py ├── requirements.txt ├── run.py ├── script.sh └── steps │ ├── __init__.py │ ├── etl │ ├── __init__.py │ ├── extract_data_local.py │ ├── extract_data_remote.py │ └── transform.py │ ├── feature_engineering │ ├── __init__.py │ └── augment.py │ ├── promotion │ ├── __init__.py │ └── promote.py │ └── training │ ├── __init__.py │ └── model_trainer.py ├── gamesense ├── .assets │ ├── model.png │ └── pipeline.png ├── .dockerignore ├── LICENSE ├── README.md ├── configs │ ├── llama3-1_finetune_local.yaml │ ├── llama3-1_finetune_remote.yaml │ ├── orchestrator_finetune.yaml │ ├── phi3.5_finetune_cpu.yaml │ ├── phi3.5_finetune_local.yaml │ ├── phi3.5_finetune_remote.yaml │ ├── phi3_finetune.yaml │ └── remote_finetune.yaml ├── materializers │ ├── __init__.py │ └── directory_materializer.py ├── pipelines │ ├── __init__.py │ ├── train.py │ └── train_accelerated.py ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── evaluate_model.py │ ├── finetune.py │ ├── log_metadata.py │ ├── prepare_datasets.py │ └── promote.py └── utils │ ├── __init__.py │ ├── callbacks.py │ ├── loaders.py │ ├── logging.py │ └── tokenizer.py ├── huggingface-sagemaker ├── .dockerignore ├── .gitignore ├── Dockerfile ├── Dockerfile.codespace ├── LICENSE ├── Makefile ├── README.md ├── assets │ ├── deploying_pipeline_overview.png │ ├── header.png │ ├── hf_repo_commit.png │ ├── mcp_1.png │ ├── mcp_2.png │ ├── nlp_zenml_demo.png │ ├── pipelines_feature_eng.png │ ├── pipelines_overview.png │ ├── promoting_pipeline_overview.png │ ├── training_pipeline_overview.png │ └── training_pipeline_with_hf.png ├── configs │ ├── deploying_config.yaml │ ├── feature_engineering_config.yaml │ ├── promoting_config.yaml │ └── trainer_config.yaml ├── gradio │ ├── .dockerignore │ ├── Dockerfile │ ├── __init__.py │ ├── app.py │ ├── aws_helper.py │ ├── requirements.txt │ └── serve.yaml ├── pipelines │ ├── __init__.py │ ├── deploying.py │ ├── feature_engineering.py │ ├── promoting.py │ └── training.py ├── requirements.txt ├── run.ipynb ├── run.py ├── run_delete_endpoint.py ├── steps │ ├── __init__.py │ ├── alerts │ │ ├── __init__.py │ │ └── notify_on.py │ ├── dataset_loader │ │ ├── __init__.py │ │ ├── data_loader.py │ │ └── generate_reference_and_comparison_datasets.py │ ├── deploying │ │ ├── __init__.py │ │ ├── huggingface_deployment.py │ │ ├── sagemaker_deployment.py │ │ └── save_model.py │ ├── promotion │ │ ├── __init__.py │ │ ├── promote_get_metrics.py │ │ └── promote_metric_compare_promoter.py │ ├── registerer │ │ ├── __init__.py │ │ └── model_log_register.py │ ├── tokenization │ │ ├── __init__.py │ │ └── tokenization.py │ ├── tokenizer_loader │ │ ├── __init__.py │ │ └── tokenizer_loader.py │ └── training │ │ ├── __init__.py │ │ └── model_trainer.py └── utils │ └── misc.py ├── llm-complete-guide ├── .assets │ ├── argilla_secret.png │ ├── huggingface-space-rag-deployment.png │ ├── rag-pipeline-zenml-cloud.png │ ├── supabase-connection-string.png │ ├── supabase-create-project.png │ ├── tsne.png │ └── umap.png ├── .dockerignore ├── LICENSE ├── README.md ├── ZENML_VERSION.txt ├── configs │ ├── dev │ │ ├── embeddings.yaml │ │ ├── rag.yaml │ │ ├── rag_eval.yaml │ │ └── synthetic.yaml │ ├── production │ │ ├── embeddings.yaml │ │ ├── eval.yaml │ │ ├── rag.yaml │ │ └── synthetic.yaml │ └── staging │ │ ├── embeddings.yaml │ │ ├── eval.yaml │ │ ├── rag.yaml │ │ └── synthetic.yaml ├── constants.py ├── data │ ├── test_dataset.json │ └── train_dataset.json ├── deployment_hf.py ├── gh_action_rag.py ├── materializers │ ├── __init__.py │ └── document_materializer.py ├── most_basic_eval.py ├── most_basic_rag_pipeline.py ├── notebooks │ ├── __init__.py │ ├── argilla_embeddings.ipynb │ ├── finetune_embeddings.ipynb │ ├── finetune_embeddings.py │ ├── reranking.ipynb │ └── visualise_embeddings.ipynb ├── pipelines │ ├── __init__.py │ ├── distilabel_generation.py │ ├── finetune_embeddings.py │ ├── generate_chunk_questions.py │ ├── llm_basic_rag.py │ ├── llm_eval.py │ ├── llm_index_and_evaluate.py │ ├── llm_langfuse_evals.py │ └── rag_deployment.py ├── requirements-argilla.txt ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── chunk_documents.py │ ├── create_prompt.py │ ├── distilabel_generate_queries.py │ ├── eval_e2e.py │ ├── eval_langfuse.py │ ├── eval_pii.py │ ├── eval_retrieval.py │ ├── eval_visualisation.py │ ├── finetune_embeddings.py │ ├── finetune_embeddings_legacy.py │ ├── generate_questions.py │ ├── hf_dataset_loader.py │ ├── huggingface_dataset_upload.py │ ├── markdown_loader.py │ ├── populate_index.py │ ├── preprocess_markdown.py │ ├── push_to_argilla.py │ ├── push_to_hf.py │ ├── rag_deployment.py │ ├── synthetic_data.py │ ├── url_scraper.py │ ├── url_scraping_utils.py │ └── web_url_loader.py ├── structures.py ├── tests │ ├── __init__.py │ └── test_url_scraping_utils.py └── utils │ ├── __init__.py │ ├── hf_utils.py │ ├── llm_utils.py │ ├── openai_utils.py │ └── visualization_utils.py ├── magic-photobooth ├── .dockerignore ├── .gitignore ├── Dockerfile.codespace ├── README.md ├── assets │ ├── app_screenshot_1.png │ ├── app_screenshot_2.png │ ├── app_screenshot_3.png │ ├── batch-dreambooth.png │ ├── blupus-demo.png │ └── hamza_superman.mp4 ├── configs │ └── k8s_run_refactored_multi_video.yaml ├── constants.py ├── frontend.py ├── k8s_run.py ├── k8s_run_refactored.py ├── k8s_run_refactored_multi_video.py ├── modal_run.py ├── modal_run_using_azure_data.py ├── paris_run.py ├── requirements-frontend.txt ├── requirements-modal.txt ├── requirements.txt ├── run.py ├── test_examples_utils.py ├── train_dreambooth.py ├── train_dreambooth_lora_flux.py └── walkthrough.ipynb ├── nightwatch-ai ├── .dockerignore ├── .flake8 ├── .gitignore ├── README.md ├── assets │ └── youtldr_summarizer_slack.png ├── pyproject.toml └── src │ ├── __init__.py │ ├── pipelines │ ├── __init__.py │ └── supabase_summary.py │ ├── requirements.txt │ ├── run.py │ └── steps │ ├── __init__.py │ ├── alerters.py │ ├── importers.py │ └── summarizers.py ├── omni-reader ├── .dockerignore ├── .env.example ├── .gitignore ├── Dockerfile.codespace ├── LICENSE ├── README.md ├── app.py ├── assets │ ├── docs │ │ ├── metrics.png │ │ ├── pipeline_dags.png │ │ ├── streamlit.png │ │ └── visualization.png │ ├── logos │ │ ├── default.svg │ │ ├── gemma.svg │ │ ├── microsoft.svg │ │ ├── mistral.svg │ │ ├── ollama.svg │ │ └── openai.svg │ ├── omni-reader-blog-cover.png │ └── samples_for_ocr │ │ ├── handwritten │ │ ├── easy_example.jpeg │ │ ├── education_article_excerpt.webp │ │ ├── incomplete_sentence.png │ │ └── reporter_notes.png │ │ ├── numbers │ │ ├── lexus_vin_number.webp │ │ └── tire_serial_number.jpg │ │ ├── rx_prescriptions │ │ ├── rx_prescription_clear.jpg │ │ └── rx_prescription_unclear.png │ │ └── street_signs │ │ ├── montreal_signs.jpg │ │ └── paris_signs.jpg ├── configs │ ├── batch_pipeline.yaml │ └── evaluation_pipeline.yaml ├── ground_truth_texts │ ├── education_article_excerpt.txt │ ├── incomplete_sentence.txt │ ├── lexus_vin_number.txt │ ├── montreal_signs.txt │ ├── paris_signs.txt │ ├── reporter_notes.txt │ ├── rx_prescription_clear.txt │ ├── rx_prescription_unclear.txt │ └── tire_serial_number.txt ├── pipelines │ ├── __init__.py │ ├── batch_pipeline.py │ └── evaluation_pipeline.py ├── requirements.txt ├── run.py ├── schemas │ ├── __init__.py │ ├── image_description.py │ └── ocr_result.py ├── settings.json ├── steps │ ├── __init__.py │ ├── evaluate_models.py │ ├── loaders.py │ └── run_ocr.py └── utils │ ├── __init__.py │ ├── config.py │ ├── encode_image.py │ ├── extract_json.py │ ├── metrics.py │ ├── model_configs.py │ ├── ocr_processing.py │ ├── prompt.py │ └── visualizations.py ├── oncoclear ├── .assets │ ├── cloud_mcp.png │ ├── deployment_architecture.png │ ├── deployment_pipeline.png │ ├── fastapi_docs.png │ ├── feature_engineering_pipeline.png │ ├── inference_pipeline.png │ ├── pipeline_overview.png │ └── training_pipeline.png ├── .dockerignore ├── Dockerfile.codespace ├── README.md ├── api │ ├── .dockerignore │ ├── Dockerfile │ ├── main.py │ ├── requirements.txt │ └── utils │ │ ├── __init__.py │ │ ├── __init__.py:Zone.Identifier │ │ ├── preprocess.py │ │ └── preprocess.py:Zone.Identifier ├── configs │ ├── feature_engineering.yaml │ ├── inference.yaml │ ├── training_rf.yaml │ └── training_sgd.yaml ├── pipelines │ ├── __init__.py │ ├── deployment.py │ ├── feature_engineering.py │ ├── inference.py │ └── training.py ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── data_loader.py │ ├── data_preprocessor.py │ ├── data_splitter.py │ ├── deployment.py │ ├── inference_predict.py │ ├── inference_preprocessor.py │ ├── model_evaluator.py │ ├── model_promoter.py │ └── model_trainer.py └── utils │ ├── __init__.py │ └── preprocess.py ├── pyproject.toml ├── research-radar ├── .dockerignore ├── .env.example ├── .gitignore ├── LICENSE ├── README.md ├── assets │ ├── ft_model_comparison.png │ ├── modernbert_claude_haiku_comparison.png │ ├── resuming-from-checkpoint.png │ ├── starting-new-run.png │ └── training_pipeline.png ├── classification_results │ ├── README.md │ └── compute_stats.py ├── configs │ ├── base_config.yaml │ └── remote_finetune.yaml ├── data │ ├── README.md │ ├── composite_dataset.jsonl │ └── unclassified_dataset.jsonl ├── materializers │ ├── __init__.py │ ├── dataset_materializer.py │ ├── datetime_materializer.py │ ├── lazyframe_materializer.py │ ├── register_materializers.py │ └── url_materializer.py ├── model_compare_metrics │ └── README.md ├── pipelines │ ├── README.md │ ├── __init__.py │ ├── classification.py │ ├── deployment.py │ ├── model_comparison.py │ └── training.py ├── prompts │ ├── README.md │ ├── room_to_think_prompt.txt │ ├── system_prompt.txt │ └── user_prompt.txt ├── requirements.txt ├── run.py ├── schemas │ ├── README.md │ ├── __init__.py │ ├── classification_output.py │ ├── claude_response.py │ ├── config_models.py │ ├── input_article.py │ ├── training_config.py │ └── zenml_project.py ├── steps │ ├── README.md │ ├── __init__.py │ ├── classify_articles.py │ ├── compare_models.py │ ├── data_loader.py │ ├── data_preprocessor.py │ ├── data_splitter.py │ ├── finetune_modernbert.py │ ├── load_test_set.py │ ├── merge_classifications.py │ ├── push_model_to_huggingface.py │ ├── save_classifications.py │ ├── save_comparison_metrics.py │ ├── save_model_local.py │ └── save_test_set.py └── utils │ ├── README.md │ ├── __init__.py │ ├── checkpoint.py │ ├── classification_helpers.py │ ├── classification_metrics.py │ ├── claude_evaluator.py │ ├── docker_settings.py │ ├── json_parser.py │ ├── load_config.py │ ├── logger.py │ ├── merge.py │ ├── model_comparison_metrics.py │ ├── model_loaders.py │ ├── prompt.py │ ├── remote_setup.py │ ├── setup_environment.py │ └── training_eval_metrics.py ├── retail-forecast ├── .dockerignore ├── Dockerfile.codespace ├── README.md ├── assets │ ├── data_visualization.gif │ ├── forecast_dashboard.png │ ├── inference_pipeline.png │ ├── training_pipeline.png │ └── zenml_dashboard.png ├── configs │ ├── inference.yaml │ └── training.yaml ├── data │ ├── calendar.csv │ └── sales.csv ├── materializers │ ├── __init__.py │ └── prophet_materializer.py ├── pipelines │ ├── inference_pipeline.py │ └── training_pipeline.py ├── requirements.txt ├── run.py └── steps │ ├── data_loader.py │ ├── data_preprocessor.py │ ├── data_validator.py │ ├── data_visualizer.py │ ├── model_evaluator.py │ ├── model_trainer.py │ └── predictor.py ├── scripts ├── check-readme-projects.sh ├── check-spelling.sh ├── check_readme_projects.py ├── docstring.sh ├── format.sh ├── generate_codespace_dockerfile.py └── lint.sh ├── sign-language-detection-yolov5 ├── .dockerignore ├── README.md ├── materializer │ ├── __init__.py │ ├── dataset_materializer.py │ └── yolo_model_materializer.py ├── model.py ├── pipelines │ ├── __init__.py │ ├── deployment_pipeline.py │ ├── inference_pipeline.py │ └── train_pipeline.py ├── requirements.txt ├── run.py ├── service.py └── steps │ ├── __init__.py │ ├── bento_builder.py │ ├── bento_deployer.py │ ├── data_loader.py │ ├── deployment_trigger.py │ ├── detector.py │ ├── inference_loader.py │ ├── model_loader.py │ ├── prediction_service_loader.py │ ├── predictor.py │ ├── train_augmenter.py │ ├── trainer.py │ └── valid_augmenter.py ├── vertex-registry-and-deployer ├── .assets │ ├── cloud_mcp.png │ ├── cloud_mcp_predictions.png │ ├── cloud_mcp_screenshot.png │ ├── feature_engineering_pipeline.png │ ├── inference_pipeline.png │ ├── pipeline_overview.png │ └── training_pipeline.png ├── .copier-answers.yml ├── .dockerignore ├── Dockerfile.codespace ├── LICENSE ├── README.md ├── configs │ ├── inference.yaml │ └── training_sgd.yaml ├── pipelines │ ├── __init__.py │ ├── inference.py │ └── training.py ├── requirements.txt ├── run.py └── steps │ ├── __init__.py │ ├── model_deployer.py │ ├── model_promoter.py │ ├── model_register.py │ └── model_trainer.py ├── zencoder ├── .assets │ ├── zencoder_header.png │ ├── zencoder_mcp_1.png │ └── zencoder_mcp_2.png ├── .copier-answers.yml ├── .dockerignore ├── README.md ├── configs │ ├── deployment_a10.yaml │ ├── deployment_a100.yaml │ ├── deployment_t4.yaml │ ├── finetune_aws.yaml │ ├── finetune_gcp.yaml │ ├── finetune_local.yaml │ └── generate_code_dataset.yaml ├── license ├── license_header ├── materializers │ ├── __init__.py │ └── huggingface_model_materializer.py ├── pipelines │ ├── __init__.py │ ├── _generate_code_dataset_by_gpt4turbo.py │ ├── deployment.py │ ├── finetune.py │ └── generate_code_dataset.py ├── requirements.txt ├── run.py ├── steps │ ├── __init__.py │ ├── deployment.py │ ├── parallel_clones.py │ ├── prepare_dataset.py │ ├── push_dataset_to_hub.py │ ├── trainer.py │ └── vllm_deployer.py ├── test_starcoder_bigcode.py └── test_zencoder.py └── zenml-support-agent ├── .dockerignore ├── .flake8 ├── .gitignore ├── README.md ├── __init__.py ├── agent ├── agent_executor_materializer.py └── prompt.py ├── assets ├── llm-agent │ ├── LLM Agent pipeline.png │ ├── image.jpg │ ├── model_promotion.png │ ├── model_version_metadata.png │ └── model_versions.png └── slackbot │ ├── big-picture-workflow.png │ ├── slack-automated-redeployment.png │ ├── slackbot-small.png │ ├── slackbot.png │ ├── slackbot_pipeline_project.png │ └── slackbot_support_redacted.png ├── configs └── agent_config.yaml ├── materializers └── faiss_materializer.py ├── pipelines ├── __init__.py └── agent_creator.py ├── pyproject.toml ├── requirements.txt ├── run.ipynb ├── run.py ├── scripts └── production_deploy.sh └── steps ├── __init__.py ├── agent_creator.py ├── index_generator.py ├── url_scraper.py ├── url_scraping_utils.py └── web_url_loader.py /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Bug Report" 3 | about: "Create a report to help us improve" 4 | title: "[Bug] " 5 | labels: bug 6 | assignees: '' 7 | --- 8 | 9 | **Describe the bug** 10 | A clear and concise description of what the bug is. 11 | 12 | **To Reproduce** 13 | Steps to reproduce the behavior: 14 | 1. Go to '...' 15 | 2. Click on '....' 16 | 3. Scroll down to '....' 17 | 4. See error 18 | 19 | **Expected behavior** 20 | A clear and concise description of what you expected to happen. 21 | 22 | **Screenshots** 23 | If applicable, add screenshots to help explain your problem. 24 | 25 | **Environment (please complete the following information):** 26 | - OS: [e.g. Ubuntu 22.04] 27 | - Python version: [e.g. 3.11] 28 | - ZenML version: [e.g. 0.56.0] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Ask a question 4 | url: https://zenml.io/slack-invite/ 5 | about: Please ask and answer questions here. 6 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: "Feature Request" 3 | about: "Suggest an idea for this project" 4 | title: "[Feature] " 5 | labels: enhancement 6 | assignees: '' 7 | --- 8 | 9 | **Is your feature request related to a problem? Please describe.** 10 | A clear and concise description of what the problem is. Ex: I'm always frustrated when [...] 11 | 12 | **Describe the solution you'd like** 13 | A clear and concise description of what you want to happen. 14 | 15 | **Describe alternatives you've considered** 16 | A clear and concise description of any alternative solutions or features you've considered. 17 | 18 | **Additional context** 19 | Add any other context or screenshots about the feature request here. 20 | -------------------------------------------------------------------------------- /.github/PULL_REQUEST_TEMPLATE.md: -------------------------------------------------------------------------------- 1 | # Summary 2 | 3 | Please provide a short summary explaining the motivation behind these changes. 4 | 5 | # Checklist 6 | - [ ] I have read the [contributing guidelines](../CONTRIBUTING.md). 7 | - [ ] I have run the necessary tests and linters. 8 | - [ ] I have updated relevant documentation where applicable. 9 | 10 | # Related Issues 11 | Please link to any relevant issues or discussions. 12 | -------------------------------------------------------------------------------- /.github/workflows/code-formatting.yml: -------------------------------------------------------------------------------- 1 | name: Code Formatting 2 | 3 | on: 4 | workflow_call: 5 | push: 6 | branches: 7 | - main 8 | 9 | jobs: 10 | formatting-check: 11 | name: Code Formatting Check 12 | runs-on: ubuntu-latest 13 | if: github.event.pull_request.draft == false 14 | env: 15 | ZENML_DEBUG: 1 16 | ZENML_ANALYTICS_OPT_IN: false 17 | steps: 18 | - name: Checkout repository 19 | uses: actions/checkout@v3 20 | 21 | - name: Set up Python 22 | uses: actions/setup-python@v4 23 | with: 24 | python-version: '3.9' 25 | 26 | - name: Install latest ruff 27 | run: pip install --upgrade ruff 28 | 29 | - name: Run formatting script 30 | run: bash scripts/format.sh 31 | 32 | - name: Check for changes 33 | id: git-check 34 | run: | 35 | git diff --exit-code || echo "changes=true" >> $GITHUB_OUTPUT 36 | 37 | - name: Fail if changes were made 38 | if: steps.git-check.outputs.changes == 'true' 39 | run: | 40 | echo "::error::Formatting check failed. Please run 'scripts/format.sh' locally and commit the changes." 41 | exit 1 42 | -------------------------------------------------------------------------------- /.github/workflows/markdown-link-check.yml: -------------------------------------------------------------------------------- 1 | name: Check Markdown Links 2 | 3 | on: workflow_call 4 | 5 | jobs: 6 | markdown-link-check: 7 | runs-on: ubuntu-latest 8 | env: 9 | ZENML_DEBUG: 1 10 | ZENML_ANALYTICS_OPT_IN: false 11 | steps: 12 | - uses: actions/checkout@main 13 | - uses: gaurav-nelson/github-action-markdown-link-check@v1 14 | with: 15 | use-quiet-mode: 'yes' 16 | use-verbose-mode: 'no' 17 | config-file: .github/workflows/markdown_check_config.json 18 | -------------------------------------------------------------------------------- /.github/workflows/markdown_check_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "ignorePatterns": [ 3 | { 4 | "pattern": "^http://0.0.0.0" 5 | }, 6 | { 7 | "pattern": "^http://127.0.0.1" 8 | }, 9 | { 10 | "pattern": "^http://localhost" 11 | } 12 | ], 13 | "aliveStatusCodes": [ 14 | 999, 15 | 403, 16 | 503, 17 | 200, 18 | 0 19 | ], 20 | "timeout": "20s", 21 | "retryOn429": true, 22 | "retryCount": 5, 23 | "fallbackRetryDelay": "30s" 24 | } -------------------------------------------------------------------------------- /.github/workflows/pull_request.yml: -------------------------------------------------------------------------------- 1 | name: Pull Request Checks 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize] 6 | 7 | jobs: 8 | spell-check: 9 | name: spell-check 10 | runs-on: ubuntu-latest 11 | if: github.event.pull_request.draft == false 12 | env: 13 | ZENML_DEBUG: 1 14 | ZENML_ANALYTICS_OPT_IN: false 15 | steps: 16 | - name: Checkout Actions Repository 17 | uses: actions/checkout@v2 18 | 19 | - name: Spelling checker 20 | uses: crate-ci/typos@master 21 | with: 22 | files: "." 23 | config: ./.typos.toml 24 | 25 | markdown-link-check: 26 | uses: ./.github/workflows/markdown-link-check.yml 27 | if: github.event.pull_request.draft == false 28 | 29 | code-formatting-check: 30 | uses: ./.github/workflows/code-formatting.yml 31 | if: github.event.pull_request.draft == false 32 | 33 | readme-projects-check: 34 | uses: ./.github/workflows/readme-projects-check.yml 35 | if: github.event.pull_request.draft == false 36 | -------------------------------------------------------------------------------- /.github/workflows/readme-projects-check.yml: -------------------------------------------------------------------------------- 1 | name: README Projects Check 2 | 3 | on: 4 | workflow_call: 5 | 6 | jobs: 7 | readme-projects-check: 8 | name: Check Projects in README 9 | runs-on: ubuntu-latest 10 | if: github.event.pull_request.draft == false 11 | steps: 12 | - name: Checkout repository 13 | uses: actions/checkout@v3 14 | 15 | - name: Set up Python 16 | uses: actions/setup-python@v4 17 | with: 18 | python-version: '3.9' 19 | 20 | - name: Run README projects check 21 | run: python3 scripts/check_readme_projects.py -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "sign-language-detection-yolov5/yolov5"] 2 | path = sign-language-detection-yolov5/yolov5 3 | url = https://github.com/safoinme/yolov5.git 4 | -------------------------------------------------------------------------------- /_assets/community_meetup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/_assets/community_meetup.png -------------------------------------------------------------------------------- /_assets/demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/_assets/demo.gif -------------------------------------------------------------------------------- /_assets/zenfiles.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/_assets/zenfiles.png -------------------------------------------------------------------------------- /_assets/zenml_project.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/_assets/zenml_project.gif -------------------------------------------------------------------------------- /bank_subscription_prediction/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control metadata 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Python cache and artifacts 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | 13 | # Build outputs 14 | build/ 15 | dist/ 16 | develop-eggs/ 17 | downloads/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # Testing and coverage reports 32 | .pytest_cache/ 33 | .nox/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | *.py,cover 42 | .hypothesis/ 43 | 44 | # Virtual environments 45 | .env 46 | .venv* 47 | venv/ 48 | ENV/ 49 | env/ 50 | venv.bak/ 51 | env.bak/ 52 | 53 | # IDE directories 54 | .vscode/ 55 | .idea/ 56 | .spyderproject 57 | .spyproject 58 | .ropeproject 59 | 60 | # OS files 61 | .DS_Store 62 | Thumbs.db 63 | -------------------------------------------------------------------------------- /bank_subscription_prediction/__init__.py: -------------------------------------------------------------------------------- 1 | """Bank Subscription Prediction Project using ZenML.""" 2 | -------------------------------------------------------------------------------- /bank_subscription_prediction/assets/eval_vis.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/bank_subscription_prediction/assets/eval_vis.png -------------------------------------------------------------------------------- /bank_subscription_prediction/assets/training_dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/bank_subscription_prediction/assets/training_dag.png -------------------------------------------------------------------------------- /bank_subscription_prediction/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/bank_subscription_prediction/pipelines/__init__.py -------------------------------------------------------------------------------- /bank_subscription_prediction/requirements.txt: -------------------------------------------------------------------------------- 1 | pandas 2 | numpy 3 | scikit-learn 4 | xgboost 5 | matplotlib 6 | plotly 7 | zenml>=0.82.1 8 | click 9 | requests 10 | pyarrow -------------------------------------------------------------------------------- /bank_subscription_prediction/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/bank_subscription_prediction/steps/__init__.py -------------------------------------------------------------------------------- /bank_subscription_prediction/steps/data_cleaner.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | from zenml import step 3 | 4 | 5 | @step 6 | def clean_data_step(df: pd.DataFrame) -> pd.DataFrame: 7 | """Cleans the input DataFrame. 8 | 9 | Args: 10 | df: Pandas DataFrame to clean. 11 | 12 | Returns: 13 | Cleaned Pandas DataFrame. 14 | """ 15 | # Drop rows with missing data 16 | data = df.dropna() 17 | 18 | # Convert the 'day' column type to object as 'day' is categorical 19 | # Ensure the column exists before trying to modify it 20 | if "day" in data.columns: 21 | data["day"] = data["day"].astype("object") 22 | else: 23 | # Handle the case where 'day' column might be missing, perhaps log a warning 24 | # For now, we'll proceed without this conversion if the column isn't there 25 | pass 26 | 27 | return data 28 | -------------------------------------------------------------------------------- /bank_subscription_prediction/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility functions for the Bank Subscription Prediction project.""" 2 | -------------------------------------------------------------------------------- /credit-scorer/assets/compliance-dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/compliance-dashboard.png -------------------------------------------------------------------------------- /credit-scorer/assets/deployment-dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/deployment-dag.png -------------------------------------------------------------------------------- /credit-scorer/assets/e2e.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/e2e.png -------------------------------------------------------------------------------- /credit-scorer/assets/feature-engineering-dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/feature-engineering-dag.png -------------------------------------------------------------------------------- /credit-scorer/assets/modal-deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/modal-deployment.png -------------------------------------------------------------------------------- /credit-scorer/assets/streamlit-app.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/streamlit-app.png -------------------------------------------------------------------------------- /credit-scorer/assets/training-dag.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/assets/training-dag.png -------------------------------------------------------------------------------- /credit-scorer/docs/pipeline_logs/log_config.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": 1, 3 | "formatters": { 4 | "standard": { 5 | "format": "%(asctime)s [%(levelname)s] %(name)s: %(message)s" 6 | } 7 | }, 8 | "handlers": { 9 | "file": { 10 | "class": "logging.handlers.RotatingFileHandler", 11 | "formatter": "standard", 12 | "filename": "docs/pipeline_logs/application.log", 13 | "maxBytes": 10485760, 14 | "backupCount": 10 15 | } 16 | }, 17 | "root": { 18 | "level": "INFO", 19 | "handlers": ["file"] 20 | }, 21 | "retention": { 22 | "days": 3650, 23 | "policy": "preserve_all", 24 | "description": "EU AI Act Article 12 compliance - 10-year retention period for all logs", 25 | "backup_location": "secure_archive", 26 | "compliance_version": "EU AI Act 2025" 27 | } 28 | } -------------------------------------------------------------------------------- /credit-scorer/docs/releases/cf52cd4e-5bc2-45e9-b24d-ad5f2902829f/git_info.md: -------------------------------------------------------------------------------- 1 | # Git Information 2 | 3 | **Commit SHA:** d0912589685ee746ffe5cf2ecda59ad13eb5425f 4 | 5 | **Commit Date:** 2025-05-22T16:58:33 6 | 7 | **Author:** marwan37 8 | 9 | **Message:** 10 | ``` 11 | remove extra html file 12 | 13 | ``` 14 | -------------------------------------------------------------------------------- /credit-scorer/docs/releases/cf52cd4e-5bc2-45e9-b24d-ad5f2902829f/log_metadata.json: -------------------------------------------------------------------------------- 1 | { 2 | "log_uri": "s3://zenml-dev/pipeline_runs/logs/a8cb0ed7-c0b1-46fc-bc95-00169663dc62.log", 3 | "pipeline_name": "deployment", 4 | "run_id": "cf52cd4e-5bc2-45e9-b24d-ad5f2902829f", 5 | "created": "2025-05-23 00:09:57" 6 | } -------------------------------------------------------------------------------- /credit-scorer/docs/releases/cf52cd4e-5bc2-45e9-b24d-ad5f2902829f/monitoring_plan.json: -------------------------------------------------------------------------------- 1 | { 2 | "plan_id": "monitoring_plan_2025-05-22T19-12-38.288688", 3 | "model_id": "unknown", 4 | "created_at": "2025-05-22T19:12:38.288688", 5 | "description": "Post-market monitoring plan for credit scoring model", 6 | "monitoring_frequency": { 7 | "data_drift": "daily", 8 | "performance_evaluation": "weekly", 9 | "fairness_audit": "monthly" 10 | }, 11 | "alert_thresholds": { 12 | "accuracy_drop": 0.7398571428571429, 13 | "data_drift_score": 0.15, 14 | "fairness_metrics": { 15 | "disparate_impact_min": 0.8, 16 | "disparate_impact_max": 1.25 17 | } 18 | }, 19 | "response_procedures": { 20 | "minor_drift": "Log and monitor more frequently", 21 | "significant_drift": "Alert data science team and investigate", 22 | "critical_issue": "Trigger incident response and model reevaluation" 23 | }, 24 | "responsible_parties": { 25 | "monitoring_owner": "data_science_team@example.com", 26 | "escalation_contact": "compliance_officer@example.com" 27 | } 28 | } -------------------------------------------------------------------------------- /credit-scorer/docs/releases/cf52cd4e-5bc2-45e9-b24d-ad5f2902829f/risk_scores.yaml: -------------------------------------------------------------------------------- 1 | hazards: 2 | - description: Unfair bias against protected demographic groups 3 | id: BIAS_PROTECTED_GROUPS 4 | mitigation: Re-sample training data; add fairness constraints or post-processing 5 | techniques 6 | severity: high 7 | - description: ROC-AUC risk proxy > 0.3 indicates drift fragility 8 | id: DRIFT_VULNERABILITY 9 | mitigation: Enable drift monitoring; schedule periodic retraining 10 | severity: medium 11 | overall: 0.65 12 | risk_auc: 0.5 13 | risk_bias: 0.8 14 | risk_register_path: docs/risk/risk_register.xlsx 15 | -------------------------------------------------------------------------------- /credit-scorer/docs/risk/risk_register.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/credit-scorer/docs/risk/risk_register.xlsx -------------------------------------------------------------------------------- /credit-scorer/modal_app/__init__.py: -------------------------------------------------------------------------------- 1 | """Modal app for the project.""" 2 | -------------------------------------------------------------------------------- /credit-scorer/requirements.txt: -------------------------------------------------------------------------------- 1 | cyclonedx-python-lib>=10.0.1 2 | fairlearn>=0.12.0 3 | lightgbm>=4.6.0 4 | markdown>=3.8 5 | matplotlib>=3.10.3 6 | modal>=0.74.55 7 | openpyxl>=3.1.5 8 | pandas>=2.2.3 9 | plotly>=6.0.1 10 | scikit-learn>=1.6.1 11 | seaborn>=0.13.2 12 | slack-sdk>=3.35.0 13 | streamlit>=1.45.1 14 | streamlit-option-menu>=0.4.0 15 | tabulate>=0.9.0 16 | weasyprint>=65.1 17 | xlsxwriter>=3.2.3 18 | zenml>=0.82.1 -------------------------------------------------------------------------------- /credit-scorer/run_dashboard.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | """Entry point script to run the Streamlit dashboard application.""" 3 | 4 | import os 5 | import subprocess 6 | import sys 7 | 8 | # Ensure the project root is in the Python path 9 | PROJECT_ROOT = os.path.dirname(os.path.abspath(__file__)) 10 | sys.path.insert(0, PROJECT_ROOT) 11 | 12 | 13 | def run_dashboard(): 14 | """Run the Streamlit dashboard.""" 15 | # Set PYTHONPATH environment variable to include the project root 16 | env = os.environ.copy() 17 | 18 | # Append the current directory to PYTHONPATH if it exists, 19 | # otherwise set it to the current directory 20 | if "PYTHONPATH" in env: 21 | env["PYTHONPATH"] = f"{PROJECT_ROOT}:{env['PYTHONPATH']}" 22 | else: 23 | env["PYTHONPATH"] = PROJECT_ROOT 24 | 25 | script_path = os.path.join(PROJECT_ROOT, "streamlit_app", "main.py") 26 | cmd = ["streamlit", "run", script_path] 27 | 28 | print(f"Starting dashboard at: {script_path}") 29 | print(f"PYTHONPATH: {env['PYTHONPATH']}") 30 | subprocess.run(cmd, env=env) 31 | 32 | 33 | if __name__ == "__main__": 34 | run_dashboard() 35 | -------------------------------------------------------------------------------- /credit-scorer/src/__init__.py: -------------------------------------------------------------------------------- 1 | """Main module for the project.""" 2 | -------------------------------------------------------------------------------- /credit-scorer/src/configs/feature_engineering.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | enable_artifact_metadata: True 3 | enable_artifact_visualization: True 4 | enable_step_logs: True 5 | 6 | settings: 7 | docker: 8 | required_integrations: 9 | - s3 10 | - aws 11 | - sklearn 12 | requirements: requirements.txt 13 | python_package_installer: "uv" 14 | resources: 15 | cpu_count: 2 16 | memory: "1GB" 17 | 18 | parameters: 19 | test_size: 0.35 20 | sample_fraction: null 21 | dataset_path: "src/data/credit_scoring.csv" 22 | target: "TARGET" 23 | sensitive_attributes: 24 | - "CODE_GENDER" 25 | - "DAYS_BIRTH" 26 | - "AGE_YEARS" # derived from DAYS_BIRTH in data_preprocessor 27 | - "NAME_EDUCATION_TYPE" 28 | - "NAME_FAMILY_STATUS" 29 | - "NAME_HOUSING_TYPE" 30 | 31 | compliance: 32 | data_governance: 33 | data_quality_profile: True # Enable WhyLogs profiling 34 | data_schema_validation: True # Enable schema validation 35 | minimum_feature_coverage: 0.95 # Minimum required feature coverage 36 | -------------------------------------------------------------------------------- /credit-scorer/src/constants/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .annotations import * 19 | from .config import * 20 | from .risk import * 21 | -------------------------------------------------------------------------------- /credit-scorer/src/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """Pipelines for the project.""" 19 | 20 | from .deployment import deployment 21 | from .feature_engineering import feature_engineering 22 | from .training import training 23 | -------------------------------------------------------------------------------- /credit-scorer/src/steps/deployment/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """Deployment steps for the project.""" 19 | 20 | from .approve import approve_deployment 21 | from .deploy import modal_deployment 22 | from .generate_dashboard import generate_compliance_dashboard 23 | from .generate_sbom import generate_sbom 24 | from .post_market_monitoring import post_market_monitoring 25 | from .post_run_annex import generate_annex_iv_documentation 26 | -------------------------------------------------------------------------------- /credit-scorer/src/steps/feature_engineering/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """Feature engineering steps for the project.""" 19 | 20 | from .data_preprocessor import data_preprocessor 21 | from .data_profiler import data_profiler 22 | from .data_splitter import data_splitter 23 | from .ingest import ingest 24 | 25 | __all__ = ["data_preprocessor", "data_splitter", "ingest", "data_profiler"] 26 | -------------------------------------------------------------------------------- /credit-scorer/src/steps/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """Training steps for the project.""" 19 | 20 | from .evaluate import evaluate_model 21 | from .risk_assessment import risk_assessment 22 | from .train import train_model 23 | -------------------------------------------------------------------------------- /credit-scorer/src/utils/compliance/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """Utility functions for the compliance module.""" 19 | 20 | from .compliance_constants import ( 21 | ARTICLE_DESCRIPTIONS, 22 | COMPLIANCE_DATA_SOURCES, 23 | DEFAULT_COMPLIANCE_PATHS, 24 | EU_AI_ACT_ARTICLES, 25 | ) 26 | from .exceptions import ( 27 | ComplianceCalculationError, 28 | ComplianceDataError, 29 | ComplianceError, 30 | ) 31 | from .schemas import ( 32 | ComplianceThresholds, 33 | EUArticle, 34 | RiskCategory, 35 | RiskStatus, 36 | ) 37 | -------------------------------------------------------------------------------- /credit-scorer/src/utils/visualizations/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | """HTML component utilities for rendering compliance dashboards.""" 19 | 20 | from .dashboard import generate_compliance_dashboard_html 21 | from .eval import generate_eval_visualization 22 | from .whylogs import generate_whylogs_visualization 23 | 24 | __all__ = [ 25 | "generate_eval_visualization", 26 | "generate_whylogs_visualization", 27 | "generate_compliance_dashboard_html", 28 | ] 29 | -------------------------------------------------------------------------------- /credit-scorer/streamlit_app/__init__.py: -------------------------------------------------------------------------------- 1 | """Streamlit dashboard application for EU AI Act compliance monitoring.""" 2 | -------------------------------------------------------------------------------- /credit-scorer/streamlit_app/components/__init__.py: -------------------------------------------------------------------------------- 1 | """Dashboard UI components.""" 2 | -------------------------------------------------------------------------------- /credit-scorer/streamlit_app/data/__init__.py: -------------------------------------------------------------------------------- 1 | """Data access and processing modules.""" 2 | -------------------------------------------------------------------------------- /credit-scorer/streamlit_app/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """Utility modules for the dashboard.""" 2 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/.assets/00_pipelines_composition.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/databricks-production-qa-demo/.assets/00_pipelines_composition.png -------------------------------------------------------------------------------- /databricks-production-qa-demo/.copier-answers.yml: -------------------------------------------------------------------------------- 1 | # Changes here will be overwritten by Copier 2 | _commit: 2024.06.06 3 | _src_path: gh:zenml-io/template-e2e-batch 4 | data_quality_checks: true 5 | email: info@zenml.io 6 | full_name: ZenML GmbH 7 | hyperparameters_tuning: true 8 | metric_compare_promotion: true 9 | notify_on_failures: true 10 | notify_on_successes: false 11 | open_source_license: apache 12 | product_name: production_line_qa 13 | project_name: ZenML E2E project 14 | target_environment: staging 15 | version: 0.0.1 16 | zenml_server_url: '' 17 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* -------------------------------------------------------------------------------- /databricks-production-qa-demo/LICENSE: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/Makefile: -------------------------------------------------------------------------------- 1 | stack_name ?= e2e_template_stack 2 | setup: 3 | pip install -r requirements.txt 4 | zenml integration install aws sklearn mlflow slack evidently kubeflow kubernetes -y 5 | 6 | install-stack-local: 7 | @echo "Specify stack name [$(stack_name)]: " && read input && [ -n "$$input" ] && stack_name="$$input" || stack_name="$(stack_name)" && \ 8 | zenml experiment-tracker register -f mlflow mlflow_local_$${stack_name} && \ 9 | zenml model-registry register -f mlflow mlflow_local_$${stack_name} && \ 10 | zenml model-deployer register -f mlflow mlflow_local_$${stack_name} && \ 11 | zenml data-validator register -f evidently evidently_$${stack_name} && \ 12 | zenml stack register -a default -o default -r mlflow_local_$${stack_name} \ 13 | -d mlflow_local_$${stack_name} -e mlflow_local_$${stack_name} -dv \ 14 | evidently_$${stack_name} $${stack_name} && \ 15 | zenml stack set $${stack_name} && \ 16 | zenml stack up 17 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .batch_inference import production_line_qa_batch_inference 20 | from .deployment import production_line_qa_deployment 21 | from .training import production_line_qa_training 22 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml[server]>=0.70.0 2 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/alerts/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .notify_on import notify_on_failure, notify_on_success 20 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/data_quality/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .drift_quality_gate import drift_quality_gate 20 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/deployment/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .deployment_deploy import deployment_deploy 20 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/etl/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .data_loader import data_loader 20 | from .inference_data_preprocessor import inference_data_preprocessor 21 | from .train_data_preprocessor import train_data_preprocessor 22 | from .train_data_splitter import train_data_splitter 23 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/explainability/__init__.py: -------------------------------------------------------------------------------- 1 | from .shap_explainer import explain_model 2 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/hp_tuning/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .hp_tuning_select_best_model import hp_tuning_select_best_model 20 | from .hp_tuning_single_search import hp_tuning_single_search 21 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/inference/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .inference_predict import inference_predict 20 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/promotion/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from .compute_performance_metrics_on_current_data import ( 18 | compute_performance_metrics_on_current_data, 19 | ) 20 | from .promote_with_metric_compare import promote_with_metric_compare 21 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/steps/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .model_evaluator import model_evaluator 20 | from .model_trainer import model_trainer 21 | -------------------------------------------------------------------------------- /databricks-production-qa-demo/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .get_model_from_config import get_model_from_config 20 | from .promote_in_model_registry import promote_in_model_registry 21 | -------------------------------------------------------------------------------- /deep_research/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/deep_research/__init__.py -------------------------------------------------------------------------------- /deep_research/assets/pipeline_visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/deep_research/assets/pipeline_visualization.png -------------------------------------------------------------------------------- /deep_research/assets/sample_report.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/deep_research/assets/sample_report.gif -------------------------------------------------------------------------------- /deep_research/materializers/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Materializers package for the ZenML Deep Research project. 3 | 4 | This package contains custom ZenML materializers that handle serialization and 5 | deserialization of complex data types used in the research pipeline. 6 | """ 7 | 8 | from .analysis_data_materializer import AnalysisDataMaterializer 9 | from .approval_decision_materializer import ApprovalDecisionMaterializer 10 | from .final_report_materializer import FinalReportMaterializer 11 | from .mcp_result_materializer import MCPResultMaterializer 12 | from .prompt_materializer import PromptMaterializer 13 | from .query_context_materializer import QueryContextMaterializer 14 | from .search_data_materializer import SearchDataMaterializer 15 | from .synthesis_data_materializer import SynthesisDataMaterializer 16 | from .tracing_metadata_materializer import TracingMetadataMaterializer 17 | 18 | __all__ = [ 19 | "ApprovalDecisionMaterializer", 20 | "PromptMaterializer", 21 | "TracingMetadataMaterializer", 22 | "QueryContextMaterializer", 23 | "SearchDataMaterializer", 24 | "SynthesisDataMaterializer", 25 | "AnalysisDataMaterializer", 26 | "FinalReportMaterializer", 27 | "MCPResultMaterializer", 28 | ] 29 | -------------------------------------------------------------------------------- /deep_research/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Pipelines package for the ZenML Deep Research project. 3 | 4 | This package contains the ZenML pipeline definitions for running deep research 5 | workflows. Each pipeline orchestrates a sequence of steps for comprehensive 6 | research on a given query topic. 7 | """ 8 | 9 | from .parallel_research_pipeline import parallelized_deep_research_pipeline 10 | 11 | __all__ = ["parallelized_deep_research_pipeline"] 12 | -------------------------------------------------------------------------------- /deep_research/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml>=0.82.0 2 | litellm>=1.70.0,<2.0.0 3 | tavily-python>=0.2.8 4 | exa-py>=1.0.0 5 | PyYAML>=6.0 6 | click>=8.0.0 7 | pydantic>=2.0.0 8 | typing_extensions>=4.0.0 9 | requests 10 | langfuse>=2.0.0 11 | anthropic>=0.52.2 12 | -------------------------------------------------------------------------------- /deep_research/steps/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Steps package for the ZenML Deep Research project. 3 | 4 | This package contains individual ZenML steps used in the research pipelines. 5 | Each step is responsible for a specific part of the research process, such as 6 | query decomposition, searching, synthesis, and report generation. 7 | """ 8 | -------------------------------------------------------------------------------- /deep_research/tests/__init__.py: -------------------------------------------------------------------------------- 1 | """Test package for ZenML Deep Research project.""" 2 | -------------------------------------------------------------------------------- /deep_research/tests/conftest.py: -------------------------------------------------------------------------------- 1 | """Test configuration for pytest. 2 | 3 | This file sets up the proper Python path for importing modules in tests. 4 | """ 5 | 6 | import os 7 | import sys 8 | 9 | # Add the project root directory to the Python path 10 | project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) 11 | sys.path.insert(0, project_root) 12 | -------------------------------------------------------------------------------- /deep_research/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utilities package for the ZenML Deep Research project. 3 | 4 | This package contains various utility functions and helpers used throughout the project, 5 | including data models, LLM interaction utilities, search functionality, and common helper 6 | functions for text processing and state management. 7 | """ 8 | -------------------------------------------------------------------------------- /deep_research/utils/prompt_models.py: -------------------------------------------------------------------------------- 1 | """Pydantic models for prompt tracking and management. 2 | 3 | This module contains models for tracking prompts as artifacts 4 | in the ZenML pipeline, enabling better observability and version control. 5 | """ 6 | 7 | from pydantic import BaseModel, Field 8 | 9 | 10 | class PromptTemplate(BaseModel): 11 | """Represents a single prompt template with metadata.""" 12 | 13 | name: str = Field(..., description="Unique identifier for the prompt") 14 | content: str = Field(..., description="The actual prompt template content") 15 | description: str = Field( 16 | "", description="Human-readable description of what this prompt does" 17 | ) 18 | version: str = Field("1.0.0", description="Version of the prompt template") 19 | tags: list[str] = Field( 20 | default_factory=list, description="Tags for categorizing prompts" 21 | ) 22 | 23 | model_config = { 24 | "extra": "ignore", 25 | "frozen": False, 26 | "validate_assignment": True, 27 | } 28 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* 3 | **/tmp* 4 | data/* 5 | *.jpg 6 | *.pt 7 | notebooks/* 8 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/.gitignore: -------------------------------------------------------------------------------- 1 | *.pt 2 | data/* 3 | !data/.gitkeep 4 | images 5 | loaded-images 6 | runs/ 7 | **/tmp* 8 | runs_dir 9 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/_assets/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/_assets/diagram.png -------------------------------------------------------------------------------- /end-to-end-computer-vision/_assets/labeling_interface.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/_assets/labeling_interface.png -------------------------------------------------------------------------------- /end-to-end-computer-vision/_assets/labeling_setup.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/_assets/labeling_setup.png -------------------------------------------------------------------------------- /end-to-end-computer-vision/_assets/project_creation_label_studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/_assets/project_creation_label_studio.png -------------------------------------------------------------------------------- /end-to-end-computer-vision/bus.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/bus.jpg -------------------------------------------------------------------------------- /end-to-end-computer-vision/configs/data_export.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # pipeline configuration 4 | parameters: 5 | dataset_name: "ship_detection_gcp" # This is the name of the dataset in label studio 6 | 7 | # Configuration of the Model Control Plane 8 | model: 9 | name: ShipDetector 10 | license: Apache 2.0 11 | description: Object Detection Model. 12 | tags: ["object detection"] 13 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/configs/inference_pipeline.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | settings: 4 | docker: 5 | apt_packages: 6 | - ffmpeg 7 | - libsm6 8 | - libxext6 9 | - libcurl4 10 | required_integrations: 11 | - gcp # For AWS use "s3" instead of "gcp" 12 | - github 13 | requirements: 14 | - ultralytics 15 | - fiftyone 16 | 17 | steps: 18 | create_fiftyone_dataset: 19 | enable_cache: False 20 | enable_step_logs: False 21 | parameters: 22 | inference_data_source: # Set this to the path to a data source, for example "gs://zenml-20219041791-054405/ship_detection_ds" 23 | 24 | # configuration of the Model Control Plane 25 | model: 26 | name: ShipDetector 27 | license: Apache 2.0 28 | description: Object Detection Model. 29 | tags: ["object detection"] 30 | version: production -------------------------------------------------------------------------------- /end-to-end-computer-vision/configs/ingest_data.yaml: -------------------------------------------------------------------------------- 1 | steps: 2 | process_hf_dataset: 3 | enable_cache: True 4 | enable_step_logs: False 5 | parameters: 6 | dataset: "datadrivenscience/ship-detection" 7 | data_source: # Set this to the path to a data source, for example "gs://zenml-20219041791-054405/ship_detection_ds" 8 | upload_labels_to_label_studio: 9 | enable_cache: False 10 | parameters: 11 | ls_project_id: 1 # Adjust to reflect the id of your label studio project 12 | ls_storage_id: 1 # Adjust to reflect the id of your label studio storage 13 | storage_type: gcs # For AWS use "s3" instead of "gcs" (the default value) 14 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/configs/training_pipeline.yaml: -------------------------------------------------------------------------------- 1 | parameters: 2 | model_checkpoint: yolov8l.pt 3 | 4 | steps: 5 | load_model: 6 | enable_cache: True 7 | train_model: 8 | enable_cache: False 9 | parameters: 10 | data_source: # Insert your bucket path here where the training images lives e.g. "gs://foo/bar" 11 | batch_size: 8 12 | imgsz: 720 13 | epochs: 1 14 | is_apple_silicon_env: False 15 | 16 | settings: 17 | docker: 18 | apt_packages: 19 | - ffmpeg 20 | - libsm6 21 | - libxext6 22 | required_integrations: 23 | - gcp 24 | - github 25 | requirements: 26 | - ultralytics 27 | 28 | # configuration of the Model Control Plane 29 | model: 30 | name: ShipDetector 31 | license: Apache 2.0 32 | description: Object Detection Model. 33 | tags: ["object detection"] 34 | version: staging 35 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/data/.gitignore: -------------------------------------------------------------------------------- 1 | # Ignore everything in this directory 2 | * 3 | # Except this file 4 | !.gitignore 5 | !README.md -------------------------------------------------------------------------------- /end-to-end-computer-vision/data/README.md: -------------------------------------------------------------------------------- 1 | This directory serves as a place to store and access temporary datafiles. -------------------------------------------------------------------------------- /end-to-end-computer-vision/materializers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/materializers/__init__.py -------------------------------------------------------------------------------- /end-to-end-computer-vision/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .data_export import data_export_pipeline 19 | from .data_ingestion import data_ingestion_pipeline 20 | from .inference import inference_pipeline 21 | from .training import training_pipeline 22 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/pipelines/data_export.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from steps.export_label_studio import load_data_from_label_studio 18 | from zenml import pipeline 19 | from zenml.logger import get_logger 20 | 21 | logger = get_logger(__name__) 22 | 23 | 24 | @pipeline 25 | def data_export_pipeline(dataset_name: str = "polution"): 26 | """Loads data from Label studio. 27 | 28 | Args: 29 | dataset_name: Name of the dataset to load. 30 | """ 31 | load_data_from_label_studio(dataset_name=dataset_name) 32 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/pipelines/data_ingestion.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from steps.process_hf_dataset import process_hf_dataset 18 | from steps.upload_to_label_studio import upload_labels_to_label_studio 19 | from zenml import pipeline 20 | from zenml.logger import get_logger 21 | 22 | logger = get_logger(__name__) 23 | 24 | 25 | @pipeline 26 | def data_ingestion_pipeline(): 27 | labels_dict = process_hf_dataset() 28 | upload_labels_to_label_studio(labels_dict) 29 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/pipelines/inference.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from steps.fiftyone_inference import create_fiftyone_dataset 18 | from zenml import pipeline 19 | from zenml.logger import get_logger 20 | 21 | logger = get_logger(__name__) 22 | 23 | 24 | @pipeline 25 | def inference_pipeline(): 26 | """Uses FiftyOne for inference on a dataset.""" 27 | create_fiftyone_dataset() 28 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml[server]>=0.70.0 2 | notebook 3 | scikit-learn<1.3 4 | pyarrow 5 | seaborn 6 | xgboost 7 | ultralytics 8 | torch 9 | huggingface_hub>=0.20.0 10 | fiftyone 11 | datasets 12 | albumentations 13 | pillow>=10.0.0 14 | dill -------------------------------------------------------------------------------- /end-to-end-computer-vision/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/steps/__init__.py -------------------------------------------------------------------------------- /end-to-end-computer-vision/steps/predict_image.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from ultralytics import YOLO 18 | from zenml import step 19 | 20 | 21 | @step 22 | def predict_image(model: YOLO): 23 | """Predicts an image using the model. 24 | 25 | Args: 26 | model: YOLO model to use for prediction. 27 | """ 28 | results = model("https://ultralytics.com/images/bus.jpg") 29 | print(results) 30 | -------------------------------------------------------------------------------- /end-to-end-computer-vision/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/end-to-end-computer-vision/utils/__init__.py -------------------------------------------------------------------------------- /end-to-end-computer-vision/utils/constants.py: -------------------------------------------------------------------------------- 1 | # Dataset export from labelstudio 2 | LABELED_DATASET_NAME = "ship_od_dataset" 3 | 4 | # Trained Model 5 | TRAINED_MODEL_NAME = "Trained_YOLO" 6 | 7 | # Name of Model in ZenML Model Control Plane 8 | ZENML_MODEL_NAME = "ShipDetector" 9 | 10 | # Constants for inference pipeline 11 | PREDICTIONS_DATASET_ARTIFACT_NAME = "predictions_dataset_json" 12 | DATASET_NAME = "ships" 13 | DATASET_DIR = "data/ships/subset" 14 | -------------------------------------------------------------------------------- /eurorate-predictor/.assets/zenml_airflow_vertex_gcp_mlops.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/eurorate-predictor/.assets/zenml_airflow_vertex_gcp_mlops.png -------------------------------------------------------------------------------- /eurorate-predictor/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* -------------------------------------------------------------------------------- /eurorate-predictor/.gitignore: -------------------------------------------------------------------------------- 1 | .zen 2 | tmp/ -------------------------------------------------------------------------------- /eurorate-predictor/LICENSE: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /eurorate-predictor/Makefile: -------------------------------------------------------------------------------- 1 | stack_name ?= e2e_template_stack 2 | setup: 3 | pip install -r requirements.txt 4 | zenml integration install aws sklearn mlflow slack evidently kubeflow kubernetes -y 5 | 6 | install-stack-local: 7 | @echo "Specify stack name [$(stack_name)]: " && read input && [ -n "$$input" ] && stack_name="$$input" || stack_name="$(stack_name)" && \ 8 | zenml experiment-tracker register -f mlflow mlflow_local_$${stack_name} && \ 9 | zenml model-registry register -f mlflow mlflow_local_$${stack_name} && \ 10 | zenml model-deployer register -f mlflow mlflow_local_$${stack_name} && \ 11 | zenml data-validator register -f evidently evidently_$${stack_name} && \ 12 | zenml stack register -a default -o default -r mlflow_local_$${stack_name} \ 13 | -d mlflow_local_$${stack_name} -e mlflow_local_$${stack_name} -dv \ 14 | evidently_$${stack_name} $${stack_name} && \ 15 | zenml stack set $${stack_name} && \ 16 | zenml stack up 17 | -------------------------------------------------------------------------------- /eurorate-predictor/configs/etl_develop.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | enable_cache: false 3 | settings: 4 | docker: 5 | required_integrations: 6 | - xgboost 7 | requirements: 8 | - zenml[server] 9 | - pandas 10 | - xgboost 11 | - google-cloud-bigquery 12 | - pyarrow 13 | - db-dtypes 14 | 15 | # pipeline configuration 16 | model: 17 | name: ecb_interest_rate_model 18 | description: An ECB interest rate prediction model 19 | tags: ["ecb", "interest_rate", "prediction", "xgboost"] 20 | 21 | 22 | steps: 23 | extract_data_local: 24 | parameters: 25 | data_path: data/raw_data.csv 26 | -------------------------------------------------------------------------------- /eurorate-predictor/configs/etl_production.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | enable_cache: false 3 | settings: 4 | docker: 5 | required_integrations: 6 | - xgboost 7 | requirements: 8 | - zenml[server] 9 | - pandas 10 | - xgboost 11 | - google-cloud-bigquery 12 | - pyarrow 13 | - db-dtypes 14 | 15 | # pipeline configuration 16 | model: 17 | name: ecb_interest_rate_model 18 | description: An ECB interest rate prediction model 19 | tags: ["ecb", "interest_rate", "prediction", "xgboost"] 20 | 21 | 22 | steps: 23 | extract_data_remote: 24 | parameters: 25 | data_path: gs://zenml-internal-artifact-store/raw_data.csv 26 | 27 | transform_bq: 28 | parameters: 29 | table_id: zenml-core.ecb_experiments.ecb_transformed_dataset 30 | -------------------------------------------------------------------------------- /eurorate-predictor/configs/feature_engineering_develop.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - xgboost 6 | requirements: 7 | - zenml[server] 8 | - pandas 9 | - xgboost 10 | - google-cloud-bigquery 11 | - pyarrow 12 | - db-dtypes 13 | 14 | # configuration of the Model Control Plane 15 | model: 16 | name: ecb_interest_rate_model 17 | version: latest 18 | description: An ECB interest rate prediction model 19 | tags: ["ecb", "interest_rate", "prediction", "xgboost"] 20 | -------------------------------------------------------------------------------- /eurorate-predictor/configs/feature_engineering_production.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - xgboost 6 | requirements: 7 | - zenml[server] 8 | - pandas 9 | - xgboost 10 | - google-cloud-bigquery 11 | - pyarrow 12 | - db-dtypes 13 | 14 | # configuration of the Model Control Plane 15 | model: 16 | name: ecb_interest_rate_model 17 | version: latest 18 | description: An ECB interest rate prediction model 19 | tags: ["ecb", "interest_rate", "prediction", "xgboost"] 20 | 21 | steps: 22 | augment_bq: 23 | parameters: 24 | table_id: zenml-core.ecb_experiments.ecb_augmented_dataset 25 | -------------------------------------------------------------------------------- /eurorate-predictor/configs/training_develop.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - xgboost 6 | requirements: 7 | - zenml[server] 8 | - pandas 9 | - xgboost 10 | - google-cloud-bigquery 11 | - pyarrow 12 | - db-dtypes 13 | 14 | # configuration of the Model Control Plane 15 | model: 16 | name: ecb_interest_rate_model 17 | version: latest 18 | description: An ECB interest rate prediction model 19 | tags: ["ecb", "interest_rate", "prediction", "xgboost"] 20 | 21 | steps: 22 | train_xgboost_model: 23 | step_operator: gcp-airflow-step-operator 24 | settings: 25 | step_operator.vertex: 26 | accelerator_type: "NVIDIA_TESLA_P100" 27 | accelerator_count: 1 28 | machine_type: "n1-standard-8" 29 | -------------------------------------------------------------------------------- /eurorate-predictor/configs/training_production.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - xgboost 6 | requirements: 7 | - zenml[server] 8 | - pandas 9 | - xgboost 10 | - google-cloud-bigquery 11 | - pyarrow 12 | - db-dtypes 13 | 14 | # configuration of the Model Control Plane 15 | model: 16 | name: ecb_interest_rate_model 17 | version: latest 18 | description: An ECB interest rate prediction model 19 | tags: ["ecb", "interest_rate", "prediction", "xgboost"] 20 | 21 | steps: 22 | train_xgboost_model: 23 | step_operator: gcp-airflow-step-operator 24 | settings: 25 | step_operator.vertex: 26 | accelerator_type: "NVIDIA_TESLA_P100" 27 | accelerator_count: 1 28 | machine_type: "n1-standard-8" -------------------------------------------------------------------------------- /eurorate-predictor/materializers/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .bq_dataset import BigQueryDataset 20 | from .bq_dataset_materializer import BigQueryDatasetMaterializer 21 | from .csv_dataset import CSVDataset 22 | from .csv_dataset_materializer import CSVDatasetMaterializer 23 | from .dataset import Dataset 24 | -------------------------------------------------------------------------------- /eurorate-predictor/materializers/csv_dataset.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from typing import Optional 18 | 19 | import pandas as pd 20 | 21 | from materializers.dataset import Dataset 22 | 23 | 24 | class CSVDataset(Dataset): 25 | def __init__(self, data_path: str, df: Optional[pd.DataFrame] = None): 26 | self.data_path = data_path 27 | if df is not None: 28 | self.df = df 29 | else: 30 | self.df = self.read_data() 31 | 32 | def read_data(self) -> pd.DataFrame: 33 | return pd.read_csv(self.data_path) 34 | -------------------------------------------------------------------------------- /eurorate-predictor/materializers/dataset.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | 17 | from abc import ABC, abstractmethod 18 | 19 | import pandas as pd 20 | 21 | 22 | class Dataset(ABC): 23 | @abstractmethod 24 | def __init__(self, *args, **kwargs): 25 | pass 26 | 27 | @abstractmethod 28 | def read_data(self) -> pd.DataFrame: 29 | pass 30 | -------------------------------------------------------------------------------- /eurorate-predictor/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .etl import ecb_predictor_etl_pipeline 19 | from .feature_engineering import ecb_predictor_feature_engineering_pipeline 20 | from .training import ecb_predictor_model_training_pipeline 21 | -------------------------------------------------------------------------------- /eurorate-predictor/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml[server] 2 | pandas 3 | xgboost 4 | google-cloud-bigquery 5 | pyarrow 6 | gradio 7 | db-dtypes -------------------------------------------------------------------------------- /eurorate-predictor/script.sh: -------------------------------------------------------------------------------- 1 | zenml model delete ecb_interest_rate_model 2 | 3 | zenml stack set local-gcp-step-operator 4 | 5 | python run.py --etl --mode develop 6 | 7 | python run.py --feature --mode develop 8 | 9 | python run.py --training --mode develop -------------------------------------------------------------------------------- /eurorate-predictor/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .etl import ( 20 | extract_data_local, 21 | extract_data_remote, 22 | transform_bq, 23 | transform_csv, 24 | ) 25 | from .feature_engineering import ( 26 | augment_bq, 27 | augment_csv, 28 | ) 29 | from .promotion import promote_model 30 | from .training import train_xgboost_model 31 | -------------------------------------------------------------------------------- /eurorate-predictor/steps/etl/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .extract_data_local import extract_data_local 20 | from .extract_data_remote import extract_data_remote 21 | from .transform import transform_bq, transform_csv 22 | -------------------------------------------------------------------------------- /eurorate-predictor/steps/feature_engineering/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .augment import augment_bq, augment_csv 20 | -------------------------------------------------------------------------------- /eurorate-predictor/steps/promotion/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .promote import promote_model 19 | -------------------------------------------------------------------------------- /eurorate-predictor/steps/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .model_trainer import train_xgboost_model 20 | -------------------------------------------------------------------------------- /gamesense/.assets/model.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/gamesense/.assets/model.png -------------------------------------------------------------------------------- /gamesense/.assets/pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/gamesense/.assets/pipeline.png -------------------------------------------------------------------------------- /gamesense/.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !/materializers/** 3 | !/pipelines/** 4 | !/steps/** 5 | !/utils/** 6 | -------------------------------------------------------------------------------- /gamesense/LICENSE: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /gamesense/materializers/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /gamesense/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /gamesense/requirements.txt: -------------------------------------------------------------------------------- 1 | datasets>=2.19.1 2 | transformers>=4.43.1 3 | peft 4 | bitsandbytes>=0.41.3 5 | scipy 6 | evaluate 7 | rouge_score 8 | nltk 9 | accelerate>=0.30.0 10 | urllib3<2 11 | zenml>=0.62.0 12 | torch>=2.2.0 13 | sentencepiece 14 | huggingface_hub 15 | -------------------------------------------------------------------------------- /gamesense/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .evaluate_model import evaluate_model 19 | from .finetune import finetune 20 | from .log_metadata import log_metadata_from_step_artifact 21 | from .prepare_datasets import prepare_data 22 | from .promote import promote 23 | -------------------------------------------------------------------------------- /gamesense/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /huggingface-sagemaker/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* 3 | gradio/* 4 | .ruff_cache 5 | 6 | !gradio/__init__.py 7 | !gradio/aws_helper.py -------------------------------------------------------------------------------- /huggingface-sagemaker/Dockerfile: -------------------------------------------------------------------------------- 1 | # Use the zenmldocker/zenml image as the base image 2 | FROM zenmldocker/zenml:0.47.0 3 | 4 | # Run 'apt update' to update the package list 5 | RUN apt-get update 6 | 7 | # Install curl without any prompts (assume the default 'yes' to all prompts) 8 | RUN apt-get install -y curl 9 | 10 | # Download the Git LFS installation script and execute it 11 | RUN curl -s https://packagecloud.io/install/repositories/github/git-lfs/script.deb.sh | bash 12 | 13 | # Install Git LFS 14 | RUN apt-get install -y git-lfs 15 | 16 | # Clear out the local repository of retrieved package files to reduce the image size 17 | RUN apt-get clean && rm -rf /var/lib/apt/lists/* 18 | 19 | # Set the working directory to /app 20 | WORKDIR /app 21 | 22 | # Initialize Git in the current directory 23 | # Needed to speed up HF push 24 | RUN git init 25 | 26 | # Install Git LFS 27 | # Needed to speed up HF push 28 | RUN git lfs install -------------------------------------------------------------------------------- /huggingface-sagemaker/Makefile: -------------------------------------------------------------------------------- 1 | stack_name ?= huggingface_local_stack 2 | setup: 3 | pip install -r requirements.txt 4 | zenml integration install pytorch mlflow huggingface aws s3 kubeflow slack github evidently -y 5 | 6 | install-stack: 7 | @echo "Specify stack name [$(stack_name)]: " && read input && [ -n "$$input" ] && stack_name="$$input" || stack_name="$(stack_name)" && \ 8 | zenml experiment-tracker register -f mlflow mlflow_local_$${stack_name} && \ 9 | zenml data-validator register -f evidently evidently_local_$${stack_name} && \ 10 | zenml model-registry register -f mlflow mlflow_local_$${stack_name} && \ 11 | zenml model-deployer register -f mlflow mlflow_local_$${stack_name} && \ 12 | zenml stack register -a default -o default -r mlflow_local_$${stack_name} \ 13 | -d mlflow_local_$${stack_name} -e mlflow_local_$${stack_name} $${stack_name} && \ 14 | zenml stack set $${stack_name} && \ 15 | zenml stack up 16 | -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/deploying_pipeline_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/deploying_pipeline_overview.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/header.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/hf_repo_commit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/hf_repo_commit.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/mcp_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/mcp_1.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/mcp_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/mcp_2.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/nlp_zenml_demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/nlp_zenml_demo.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/pipelines_feature_eng.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/pipelines_feature_eng.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/pipelines_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/pipelines_overview.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/promoting_pipeline_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/promoting_pipeline_overview.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/training_pipeline_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/training_pipeline_overview.png -------------------------------------------------------------------------------- /huggingface-sagemaker/assets/training_pipeline_with_hf.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/huggingface-sagemaker/assets/training_pipeline_with_hf.png -------------------------------------------------------------------------------- /huggingface-sagemaker/gradio/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control metadata 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Python cache and artifacts 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | 13 | # Build outputs 14 | build/ 15 | dist/ 16 | develop-eggs/ 17 | downloads/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # Testing and coverage reports 32 | .pytest_cache/ 33 | .nox/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | *.py,cover 42 | .hypothesis/ 43 | 44 | # Virtual environments 45 | .env 46 | .venv* 47 | venv/ 48 | ENV/ 49 | env/ 50 | venv.bak/ 51 | env.bak/ 52 | 53 | # IDE directories 54 | .vscode/ 55 | .idea/ 56 | .spyderproject 57 | .spyproject 58 | .ropeproject 59 | 60 | # OS files 61 | .DS_Store 62 | Thumbs.db 63 | -------------------------------------------------------------------------------- /huggingface-sagemaker/gradio/Dockerfile: -------------------------------------------------------------------------------- 1 | # read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker 2 | # you will also find guides on how best to write your Dockerfile 3 | 4 | FROM python:3.9 5 | 6 | WORKDIR /code 7 | 8 | COPY ./requirements.txt /code/requirements.txt 9 | 10 | RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt 11 | 12 | # Set up a new user named "user" with user ID 1000 13 | RUN useradd -m -u 1000 user 14 | # Switch to the "user" user 15 | USER user 16 | # Set home to the user's home directory 17 | ENV HOME=/home/user \ 18 | PATH=/home/user/.local/bin:$PATH 19 | 20 | # Set the working directory to the user's home directory 21 | WORKDIR $HOME/app 22 | 23 | # Copy the current directory contents into the container at $HOME/app setting the owner to the user 24 | COPY --chown=user . $HOME/app 25 | 26 | CMD ["python", "app.py", "--server.port=7860", "--server.address=0.0.0.0"] -------------------------------------------------------------------------------- /huggingface-sagemaker/gradio/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /huggingface-sagemaker/gradio/aws_helper.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import boto3 4 | import sagemaker 5 | 6 | # Assign default value if env variable not fond 7 | REGION_NAME = os.getenv("AWS_REGION", "us-east-1") 8 | ROLE_NAME = os.getenv("AWS_ROLE_NAME", "zenml-connectors") 9 | os.environ["AWS_DEFAULT_REGION"] = REGION_NAME 10 | 11 | auth_arguments = { 12 | "aws_access_key_id": os.getenv("AWS_ACCESS_KEY_ID", None), 13 | "aws_secret_access_key": os.getenv("AWS_SECRET_ACCESS_KEY", None), 14 | "aws_session_token": os.getenv("AWS_SESSION_TOKEN", None), 15 | "region_name": REGION_NAME, 16 | } 17 | 18 | 19 | def get_sagemaker_role(): 20 | iam = boto3.client("iam", **auth_arguments) 21 | role = iam.get_role(RoleName=ROLE_NAME)["Role"]["Arn"] 22 | return role 23 | 24 | 25 | def get_sagemaker_session(): 26 | session = sagemaker.Session(boto3.Session(**auth_arguments)) 27 | return session 28 | -------------------------------------------------------------------------------- /huggingface-sagemaker/gradio/requirements.txt: -------------------------------------------------------------------------------- 1 | nltk 2 | torch 3 | torchvision 4 | torchaudio 5 | gradio 6 | datasets==2.12.0 7 | numpy==1.22.4 8 | pandas==1.5.3 9 | session_info==1.0.0 10 | scikit-learn==1.2.2 11 | transformers==4.28.1 12 | IPython==7.34.0 -------------------------------------------------------------------------------- /huggingface-sagemaker/gradio/serve.yaml: -------------------------------------------------------------------------------- 1 | # Task name (optional), used for display purposes. 2 | name: ZenML NLP project} 3 | 4 | resources: 5 | cloud: gcp # The cloud to use (optional). 6 | # Working directory (optional), synced to ~/sky_workdir on the remote cluster 7 | # each time launch or exec is run with the yaml file. 8 | # 9 | # Commands in "setup" and "run" will be executed under it. 10 | # 11 | # If a .gitignore file (or a .git/info/exclude file) exists in the working 12 | # directory, files and directories listed in it will be excluded from syncing. 13 | workdir: ./gradio 14 | 15 | setup: | 16 | echo "Begin setup." 17 | pip install -r requirements.txt 18 | echo "Setup complete." 19 | 20 | run: | 21 | echo 'Starting gradio app...' 22 | python app.py -------------------------------------------------------------------------------- /huggingface-sagemaker/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .deploying import sentinment_analysis_deploy_pipeline 20 | from .feature_engineering import ( 21 | sentinment_analysis_feature_engineering_pipeline, 22 | ) 23 | from .promoting import sentinment_analysis_promote_pipeline 24 | from .training import sentinment_analysis_training_pipeline 25 | -------------------------------------------------------------------------------- /huggingface-sagemaker/requirements.txt: -------------------------------------------------------------------------------- 1 | torch==2.1.1 2 | torchvision==0.16.1 3 | zenml[server]>=0.72.0 4 | sagemaker==2.117.0 5 | cuda-python==12.3.0 6 | nvidia-cuda-cupti-cu12==12.1.105 7 | nvidia-cuda-nvrtc-cu12==12.1.105 8 | nvidia-cuda-runtime-cu12==12.1.105 9 | datasets==2.14.7 10 | transformers==4.31.0 11 | accelerate==0.24.1 -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/alerts/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .notify_on import notify_on_failure, notify_on_success 20 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/dataset_loader/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .data_loader import data_loader 19 | from .generate_reference_and_comparison_datasets import ( 20 | generate_reference_and_comparison_datasets, 21 | ) 22 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/deploying/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .huggingface_deployment import deploy_to_huggingface 20 | from .sagemaker_deployment import deploy_hf_to_sagemaker 21 | from .save_model import save_model_to_deploy 22 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/promotion/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from .promote_get_metrics import promote_get_metrics 18 | from .promote_metric_compare_promoter import promote_metric_compare_promoter 19 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/registerer/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .model_log_register import register_model 20 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/tokenization/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .tokenization import tokenization_step 20 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/tokenizer_loader/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | 19 | from .tokenizer_loader import tokenizer_loader 20 | -------------------------------------------------------------------------------- /huggingface-sagemaker/steps/training/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2023. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .model_trainer import model_trainer 19 | -------------------------------------------------------------------------------- /llm-complete-guide/.assets/argilla_secret.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/argilla_secret.png -------------------------------------------------------------------------------- /llm-complete-guide/.assets/huggingface-space-rag-deployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/huggingface-space-rag-deployment.png -------------------------------------------------------------------------------- /llm-complete-guide/.assets/rag-pipeline-zenml-cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/rag-pipeline-zenml-cloud.png -------------------------------------------------------------------------------- /llm-complete-guide/.assets/supabase-connection-string.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/supabase-connection-string.png -------------------------------------------------------------------------------- /llm-complete-guide/.assets/supabase-create-project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/supabase-create-project.png -------------------------------------------------------------------------------- /llm-complete-guide/.assets/tsne.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/tsne.png -------------------------------------------------------------------------------- /llm-complete-guide/.assets/umap.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/.assets/umap.png -------------------------------------------------------------------------------- /llm-complete-guide/.dockerignore: -------------------------------------------------------------------------------- 1 | * 2 | !/pipelines/** 3 | !/steps/** 4 | !/materializers/** 5 | !/evaluate/** 6 | !/finetune/** 7 | !/generate/** 8 | !/lit_gpt/** 9 | !/scripts/** 10 | -------------------------------------------------------------------------------- /llm-complete-guide/LICENSE: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /llm-complete-guide/ZENML_VERSION.txt: -------------------------------------------------------------------------------- 1 | 0.75.0 2 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/dev/embeddings.yaml: -------------------------------------------------------------------------------- 1 | # enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | python_package_installer: "uv" 7 | requirements: 8 | - langchain-community 9 | - ratelimit 10 | - langchain>=0.0.325 11 | - langchain-openai 12 | - pgvector 13 | - psycopg2-binary 14 | - beautifulsoup4 15 | - unstructured 16 | - pandas 17 | - numpy 18 | - sentence-transformers>=3 19 | - transformers[torch]==4.43.1 20 | - litellm 21 | - ollama 22 | - tiktoken 23 | - umap-learn 24 | - matplotlib 25 | - pyarrow 26 | - rerankers[flashrank] 27 | - datasets 28 | - torch 29 | - pygithub 30 | - openai 31 | environment: 32 | ZENML_PROJECT_SECRET_NAME: llm_complete -------------------------------------------------------------------------------- /llm-complete-guide/configs/dev/rag.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | settings: 4 | docker: 5 | requirements: 6 | - unstructured 7 | - sentence-transformers>=3 8 | - pgvector 9 | - datasets 10 | - litellm 11 | - numpy 12 | - psycopg2-binary 13 | - tiktoken 14 | - ratelimit 15 | - rerankers 16 | - pygithub 17 | - rerankers[flashrank] 18 | - matplotlib 19 | - elasticsearch 20 | 21 | environment: 22 | ZENML_PROJECT_SECRET_NAME: llm_complete 23 | ZENML_ENABLE_RICH_TRACEBACK: FALSE 24 | ZENML_LOGGING_VERBOSITY: INFO 25 | python_package_installer: "uv" 26 | steps: 27 | url_scraper: 28 | parameters: 29 | docs_url: https://docs.zenml.io/ 30 | use_dev_set: true 31 | index_generator: 32 | parameters: 33 | index_type: pinecone 34 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/dev/rag_eval.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - unstructured 8 | - sentence-transformers>=3 9 | - pgvector 10 | - datasets 11 | - litellm 12 | - numpy 13 | - psycopg2-binary 14 | - tiktoken 15 | - pygithub 16 | - elasticsearch 17 | python_package_installer: "uv" 18 | 19 | steps: 20 | url_scraper: 21 | parameters: 22 | docs_url: https://docs.zenml.io/ 23 | use_dev_set: true 24 | index_generator: 25 | parameters: 26 | index_type: pinecone -------------------------------------------------------------------------------- /llm-complete-guide/configs/dev/synthetic.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | requirements: 5 | - langchain-community 6 | - ratelimit 7 | - langchain>=0.0.325 8 | - langchain-openai 9 | - pgvector 10 | - psycopg2-binary 11 | - beautifulsoup4 12 | - unstructured 13 | - pandas 14 | - numpy 15 | - sentence-transformers>=3 16 | - transformers==4.43.1 17 | - litellm 18 | - ollama 19 | - tiktoken 20 | - umap-learn 21 | - matplotlib 22 | - pyarrow 23 | - rerankers[flashrank] 24 | - datasets 25 | - torch 26 | - distilabel 27 | - pygithub 28 | - openai 29 | environment: 30 | ZENML_PROJECT_SECRET_NAME: llm_complete 31 | python_package_installer: "uv" -------------------------------------------------------------------------------- /llm-complete-guide/configs/production/embeddings.yaml: -------------------------------------------------------------------------------- 1 | # enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - ratelimit 8 | - pgvector 9 | - psycopg2-binary 10 | - beautifulsoup4 11 | - unstructured 12 | - pandas 13 | - numpy 14 | - sentence-transformers>=3 15 | - transformers[torch]==4.43.1 16 | - litellm 17 | - ollama 18 | - tiktoken 19 | - umap-learn 20 | - matplotlib 21 | - pyarrow 22 | - rerankers[flashrank] 23 | - datasets 24 | - torch 25 | - pygithub 26 | - openai 27 | environment: 28 | ZENML_PROJECT_SECRET_NAME: llm_complete 29 | python_package_installer: "uv" 30 | 31 | steps: 32 | finetune: 33 | step_operator: "gcp_a100" 34 | settings: 35 | step_operator.vertex: 36 | accelerator_count: 1 37 | accelerator_type: NVIDIA_TESLA_A100 -------------------------------------------------------------------------------- /llm-complete-guide/configs/production/eval.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - unstructured 8 | - sentence-transformers>=3 9 | - pgvector 10 | - datasets 11 | - litellm 12 | - numpy 13 | - psycopg2-binary 14 | - tiktoken 15 | - ratelimit 16 | - rerankers[flashrank] 17 | - matplotlib 18 | - pillow 19 | - pygithub 20 | - elasticsearch 21 | environment: 22 | ZENML_PROJECT_SECRET_NAME: llm_complete 23 | ZENML_ENABLE_RICH_TRACEBACK: FALSE 24 | ZENML_LOGGING_VERBOSITY: INFO 25 | python_package_installer: "uv" 26 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/production/rag.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - unstructured 8 | - sentence-transformers>=3 9 | - pgvector 10 | - datasets 11 | - litellm 12 | - numpy 13 | - psycopg2-binary 14 | - tiktoken 15 | - ratelimit 16 | - rerankers 17 | - pygithub 18 | - rerankers[flashrank] 19 | - matplotlib 20 | - elasticsearch 21 | 22 | environment: 23 | ZENML_PROJECT_SECRET_NAME: llm_complete 24 | ZENML_ENABLE_RICH_TRACEBACK: FALSE 25 | ZENML_LOGGING_VERBOSITY: INFO 26 | python_package_installer: "uv" 27 | steps: 28 | url_scraper: 29 | parameters: 30 | docs_url: https://docs.zenml.io 31 | use_dev_set: false 32 | # generate_embeddings: 33 | # step_operator: "sagemaker" 34 | # settings: 35 | # step_operator.sagemaker: 36 | # accelerator_count: 1 37 | # accelerator_type: NVIDIA_TESLA_A100 -------------------------------------------------------------------------------- /llm-complete-guide/configs/production/synthetic.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | requirements: 5 | - ratelimit 6 | - pgvector 7 | - psycopg2-binary 8 | - beautifulsoup4 9 | - unstructured 10 | - pandas 11 | - numpy 12 | - sentence-transformers>=3 13 | - transformers==4.43.1 14 | - litellm 15 | - ollama 16 | - tiktoken 17 | - umap-learn 18 | - matplotlib 19 | - pyarrow 20 | - rerankers[flashrank] 21 | - datasets 22 | - torch 23 | - distilabel 24 | - argilla 25 | - pygithub 26 | - openai 27 | environment: 28 | ZENML_PROJECT_SECRET_NAME: llm_complete 29 | python_package_installer: "uv" 30 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/staging/embeddings.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - ratelimit 8 | - pgvector 9 | - psycopg2-binary 10 | - beautifulsoup4 11 | - unstructured 12 | - pandas 13 | - numpy 14 | - sentence-transformers>=3 15 | - transformers[torch]==4.43.1 16 | - litellm 17 | - ollama 18 | - tiktoken 19 | - umap-learn 20 | - matplotlib 21 | - pyarrow 22 | - rerankers[flashrank] 23 | - datasets 24 | - torch 25 | - pygithub 26 | - openai 27 | environment: 28 | ZENML_PROJECT_SECRET_NAME: llm_complete 29 | python_package_installer: "uv" 30 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/staging/eval.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - unstructured 8 | - sentence-transformers>=3 9 | - pgvector 10 | - datasets 11 | - litellm 12 | - numpy 13 | - psycopg2-binary 14 | - tiktoken 15 | - ratelimit 16 | - rerankers[flashrank] 17 | - matplotlib 18 | - pillow 19 | - pygithub 20 | - elasticsearch 21 | environment: 22 | ZENML_PROJECT_SECRET_NAME: llm_complete 23 | ZENML_ENABLE_RICH_TRACEBACK: FALSE 24 | ZENML_LOGGING_VERBOSITY: INFO 25 | python_package_installer: "uv" 26 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/staging/rag.yaml: -------------------------------------------------------------------------------- 1 | enable_cache: False 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | requirements: 7 | - unstructured 8 | - sentence-transformers>=3 9 | - pgvector 10 | - datasets 11 | - litellm 12 | - numpy 13 | - psycopg2-binary 14 | - tiktoken 15 | - ratelimit 16 | - rerankers 17 | - pygithub 18 | - rerankers[flashrank] 19 | - matplotlib 20 | - elasticsearch 21 | 22 | environment: 23 | ZENML_PROJECT_SECRET_NAME: llm_complete 24 | ZENML_ENABLE_RICH_TRACEBACK: FALSE 25 | ZENML_LOGGING_VERBOSITY: INFO 26 | python_package_installer: "uv" 27 | parent_image: "339712793861.dkr.ecr.eu-central-1.amazonaws.com/zenml:llm_index_and_evaluate-orchestrator" 28 | skip_build: true 29 | 30 | steps: 31 | url_scraper: 32 | parameters: 33 | docs_url: https://docs.zenml.io 34 | use_dev_set: false 35 | -------------------------------------------------------------------------------- /llm-complete-guide/configs/staging/synthetic.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | requirements: 5 | - ratelimit 6 | - pgvector 7 | - psycopg2-binary 8 | - beautifulsoup4 9 | - unstructured 10 | - pandas 11 | - numpy 12 | - sentence-transformers>=3 13 | - transformers==4.43.1 14 | - litellm 15 | - ollama 16 | - tiktoken 17 | - umap-learn 18 | - matplotlib 19 | - pyarrow 20 | - rerankers[flashrank] 21 | - datasets 22 | - torch 23 | - distilabel 24 | - argilla 25 | - pygithub 26 | - openai 27 | environment: 28 | ZENML_PROJECT_SECRET_NAME: llm_complete 29 | python_package_installer: "uv" 30 | -------------------------------------------------------------------------------- /llm-complete-guide/materializers/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/materializers/__init__.py -------------------------------------------------------------------------------- /llm-complete-guide/notebooks/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | -------------------------------------------------------------------------------- /llm-complete-guide/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | from pipelines.distilabel_generation import generate_synthetic_data 18 | from pipelines.finetune_embeddings import finetune_embeddings 19 | from pipelines.generate_chunk_questions import generate_chunk_questions 20 | from pipelines.llm_basic_rag import llm_basic_rag 21 | from pipelines.llm_eval import llm_eval 22 | from pipelines.llm_index_and_evaluate import llm_index_and_evaluate 23 | from pipelines.llm_langfuse_evals import llm_langfuse_evaluation 24 | from pipelines.rag_deployment import rag_deployment 25 | -------------------------------------------------------------------------------- /llm-complete-guide/pipelines/llm_langfuse_evals.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | from steps.create_prompt import create_prompt 4 | from steps.eval_langfuse import fast_eval, visualize_fast_eval_results 5 | from zenml import pipeline 6 | 7 | 8 | @pipeline(enable_cache=False) 9 | def llm_langfuse_evaluation(after: Optional[str] = None) -> None: 10 | """Evaluate the LLM using Langfuse.""" 11 | # create prompt 12 | prompt = create_prompt() 13 | results = fast_eval(after=after, prompt=prompt) 14 | visualize_fast_eval_results(results) 15 | 16 | 17 | if __name__ == "__main__": 18 | llm_langfuse_evaluation() 19 | -------------------------------------------------------------------------------- /llm-complete-guide/pipelines/rag_deployment.py: -------------------------------------------------------------------------------- 1 | from steps.rag_deployment import gradio_rag_deployment 2 | from zenml import pipeline 3 | 4 | 5 | @pipeline(enable_cache=False) 6 | def rag_deployment(): 7 | gradio_rag_deployment() 8 | -------------------------------------------------------------------------------- /llm-complete-guide/requirements-argilla.txt: -------------------------------------------------------------------------------- 1 | zenml[server] 2 | sentence-transformers>=3,<=3.0.1 3 | transformers<=4.44.0 4 | litellm 5 | ollama 6 | polars<=1.4.1 7 | datasets<=2.20.0 8 | argilla<=2.0.0 9 | distilabel<=1.3.1 10 | accelerate<=0.33.0 11 | -------------------------------------------------------------------------------- /llm-complete-guide/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/zenml-io/zenml.git@develop#egg=zenml[server] # will work for zenml>=0.75.0 2 | ratelimit 3 | pgvector 4 | psycopg2-binary 5 | beautifulsoup4 6 | unstructured 7 | pandas 8 | openai 9 | numpy 10 | sentence-transformers>=3 11 | transformers 12 | litellm 13 | ollama 14 | tiktoken 15 | umap-learn 16 | matplotlib 17 | pyarrow 18 | rerankers[flashrank] 19 | datasets 20 | torch 21 | gradio>=5.13.0 22 | huggingface-hub 23 | elasticsearch 24 | tenacity 25 | langfuse 26 | pinecone 27 | 28 | # optional requirements for S3 artifact store 29 | # s3fs>2022.3.0 30 | # boto3 31 | # aws-profile-manager 32 | -------------------------------------------------------------------------------- /llm-complete-guide/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /llm-complete-guide/tests/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/llm-complete-guide/tests/__init__.py -------------------------------------------------------------------------------- /llm-complete-guide/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /llm-complete-guide/utils/hf_utils.py: -------------------------------------------------------------------------------- 1 | from constants import SECRET_NAME 2 | from zenml.client import Client 3 | 4 | 5 | def get_hf_token() -> str: 6 | api_key = Client().get_secret(SECRET_NAME).secret_values["hf_token"] 7 | 8 | return api_key 9 | -------------------------------------------------------------------------------- /llm-complete-guide/utils/openai_utils.py: -------------------------------------------------------------------------------- 1 | from constants import SECRET_NAME 2 | from zenml.client import Client 3 | 4 | 5 | def get_openai_api_key() -> str: 6 | api_key = Client().get_secret(SECRET_NAME).secret_values["openai_api_key"] 7 | 8 | return api_key 9 | -------------------------------------------------------------------------------- /magic-photobooth/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control metadata 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Python cache and artifacts 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | 13 | # Build outputs 14 | build/ 15 | dist/ 16 | develop-eggs/ 17 | downloads/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # Testing and coverage reports 32 | .pytest_cache/ 33 | .nox/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | *.py,cover 42 | .hypothesis/ 43 | 44 | # Virtual environments 45 | .env 46 | .venv* 47 | venv/ 48 | ENV/ 49 | env/ 50 | venv.bak/ 51 | env.bak/ 52 | 53 | # IDE directories 54 | .vscode/ 55 | .idea/ 56 | .spyderproject 57 | .spyproject 58 | .ropeproject 59 | 60 | # OS files 61 | .DS_Store 62 | Thumbs.db 63 | -------------------------------------------------------------------------------- /magic-photobooth/assets/app_screenshot_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/assets/app_screenshot_1.png -------------------------------------------------------------------------------- /magic-photobooth/assets/app_screenshot_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/assets/app_screenshot_2.png -------------------------------------------------------------------------------- /magic-photobooth/assets/app_screenshot_3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/assets/app_screenshot_3.png -------------------------------------------------------------------------------- /magic-photobooth/assets/batch-dreambooth.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/assets/batch-dreambooth.png -------------------------------------------------------------------------------- /magic-photobooth/assets/blupus-demo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/assets/blupus-demo.png -------------------------------------------------------------------------------- /magic-photobooth/assets/hamza_superman.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/assets/hamza_superman.mp4 -------------------------------------------------------------------------------- /magic-photobooth/constants.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/magic-photobooth/constants.py -------------------------------------------------------------------------------- /magic-photobooth/requirements-frontend.txt: -------------------------------------------------------------------------------- 1 | streamlit -------------------------------------------------------------------------------- /magic-photobooth/requirements-modal.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/accelerate.git@main 2 | datasets 3 | transformers~=4.41.2 4 | sentencepiece>=0.1.91,!=0.1.92 5 | torch~=2.2.0 6 | torchvision~=0.16 7 | peft 8 | git+https://github.com/zenml-io/zenml.git@feature/modal-step-operator 9 | git+https://github.com/huggingface/diffusers.git@main 10 | pillow 11 | tensorboard 12 | Jinja2 13 | bitsandbytes 14 | opencv-python 15 | imageio 16 | imageio-ffmpeg 17 | modal 18 | -------------------------------------------------------------------------------- /magic-photobooth/requirements.txt: -------------------------------------------------------------------------------- 1 | git+https://github.com/huggingface/accelerate.git@v0.33.0 2 | datasets 3 | ftfy~=6.1.0 4 | transformers~=4.41.2 5 | sentencepiece>=0.1.91,!=0.1.92 6 | torch~=2.2.0 7 | torchvision~=0.16 8 | peft 9 | smart_open 10 | git+https://github.com/zenml-io/zenml.git@main 11 | git+https://github.com/huggingface/diffusers.git@v0.30.2 12 | pillow 13 | tensorboard 14 | Jinja2 15 | bitsandbytes 16 | opencv-python 17 | imageio 18 | imageio-ffmpeg 19 | -------------------------------------------------------------------------------- /nightwatch-ai/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control metadata 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Python cache and artifacts 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | 13 | # Build outputs 14 | build/ 15 | dist/ 16 | develop-eggs/ 17 | downloads/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # Testing and coverage reports 32 | .pytest_cache/ 33 | .nox/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | *.py,cover 42 | .hypothesis/ 43 | 44 | # Virtual environments 45 | .env 46 | .venv* 47 | venv/ 48 | ENV/ 49 | env/ 50 | venv.bak/ 51 | env.bak/ 52 | 53 | # IDE directories 54 | .vscode/ 55 | .idea/ 56 | .spyderproject 57 | .spyproject 58 | .ropeproject 59 | 60 | # OS files 61 | .DS_Store 62 | Thumbs.db 63 | -------------------------------------------------------------------------------- /nightwatch-ai/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 79 3 | max-complexity = 18 4 | select = B,C,E,F,W,T4,B9 5 | ignore = E203, E266, E501, W503, F403, F401 6 | -------------------------------------------------------------------------------- /nightwatch-ai/assets/youtldr_summarizer_slack.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/nightwatch-ai/assets/youtldr_summarizer_slack.png -------------------------------------------------------------------------------- /nightwatch-ai/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "supabase-openai-summary" 3 | version = "0.0.1" 4 | description = "A Supabase summary pipeline for OpenAI's GPT-4 model." 5 | authors = ["ZenML"] 6 | 7 | [tool.poetry.dependencies] 8 | python = ">=3.8.1,<3.11" 9 | zenml = { extras = ["server"], version = "0.38.0" } 10 | openai = ">=0.27.5" 11 | slack-sdk = ">=3.16.1" 12 | aiohttp = ">=3.8.1" 13 | supabase = ">=0.5.0" 14 | gcsfs = ">=2023.4.0" 15 | 16 | [tool.poetry.dev-dependencies] 17 | black = "^21.9b0" 18 | isort = "^5.9.3" 19 | pytest = "^6.2.5" 20 | 21 | [build-system] 22 | requires = ["poetry-core>=1.0.0"] 23 | build-backend = "poetry.core.masonry.api" 24 | 25 | [tool.isort] 26 | profile = "black" 27 | known_third_party = [] 28 | skip_glob = [] 29 | line_length = 79 30 | 31 | [tool.black] 32 | line-length = 79 33 | include = '\.pyi?$' 34 | exclude = ''' 35 | /( 36 | \.git 37 | | \.hg 38 | | \.mypy_cache 39 | | \.tox 40 | | \.venv 41 | | _build 42 | | buck-out 43 | | build 44 | )/ 45 | ''' 46 | -------------------------------------------------------------------------------- /nightwatch-ai/src/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2023. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | -------------------------------------------------------------------------------- /nightwatch-ai/src/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2023. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | -------------------------------------------------------------------------------- /nightwatch-ai/src/requirements.txt: -------------------------------------------------------------------------------- 1 | openai 2 | zenml==0.39.1 3 | slack-sdk>=3.16.1 4 | aiohttp>=3.8.1 5 | supabase>=0.5.0 6 | gcsfs -------------------------------------------------------------------------------- /nightwatch-ai/src/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/nightwatch-ai/src/steps/__init__.py -------------------------------------------------------------------------------- /nightwatch-ai/src/steps/alerters.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2023. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | 15 | from zenml.steps import step 16 | 17 | 18 | @step(enable_cache=True) 19 | def print_alerter(message: str) -> bool: 20 | print("\n\n***** SUPABASE SUMMARY *****\n\n") 21 | print(message) 22 | return True 23 | -------------------------------------------------------------------------------- /omni-reader/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | -------------------------------------------------------------------------------- /omni-reader/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY=your_openai_api_key 2 | MISTRAL_API_KEY=your_mistral_api_key 3 | OLLAMA_HOST=base_url_for_ollama_host # defaults to "http://localhost:11434/api/generate" if not set 4 | -------------------------------------------------------------------------------- /omni-reader/assets/docs/metrics.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/docs/metrics.png -------------------------------------------------------------------------------- /omni-reader/assets/docs/pipeline_dags.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/docs/pipeline_dags.png -------------------------------------------------------------------------------- /omni-reader/assets/docs/streamlit.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/docs/streamlit.png -------------------------------------------------------------------------------- /omni-reader/assets/docs/visualization.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/docs/visualization.png -------------------------------------------------------------------------------- /omni-reader/assets/logos/microsoft.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /omni-reader/assets/omni-reader-blog-cover.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/omni-reader-blog-cover.png -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/handwritten/easy_example.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/handwritten/easy_example.jpeg -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/handwritten/education_article_excerpt.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/handwritten/education_article_excerpt.webp -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/handwritten/incomplete_sentence.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/handwritten/incomplete_sentence.png -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/handwritten/reporter_notes.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/handwritten/reporter_notes.png -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/numbers/lexus_vin_number.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/numbers/lexus_vin_number.webp -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/numbers/tire_serial_number.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/numbers/tire_serial_number.jpg -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/rx_prescriptions/rx_prescription_clear.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/rx_prescriptions/rx_prescription_clear.jpg -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/rx_prescriptions/rx_prescription_unclear.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/rx_prescriptions/rx_prescription_unclear.png -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/street_signs/montreal_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/street_signs/montreal_signs.jpg -------------------------------------------------------------------------------- /omni-reader/assets/samples_for_ocr/street_signs/paris_signs.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/omni-reader/assets/samples_for_ocr/street_signs/paris_signs.jpg -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/education_article_excerpt.txt: -------------------------------------------------------------------------------- 1 | Only a matter of style? 2 | 3 | For educational purposes we analyse the opening pages of an 11-page article that appeared in The American Mathematical Monthly, Volume 102 Number 2 / February 1995. We have added line numbers in the right margin. 4 | 5 | line 4: Since in this article, squares don’t get alternating colours, it could be argued that the term “chessboard” is misplaced. 6 | 7 | line 4: The introduction of the name “B” seems unnecessary; it is used in the combination “the board B” in the text for Figure 1 and in line 7; in both cases just “the board” would have done fine. In line 77 occurs the last use of B, viz. in “X⊂B”, which is dubious since B was a board and not a set; in line 77, I would have preferred “Given a set X of cells”. 8 | 9 | line 7/8: The first move, being a move like any other, does not deserve a separate description. The term “step” is redundant. 10 | 11 | line 8: Why not “a move consists of”? 12 | 13 | line 10/11: At this stage the italics are puzzling, since a move is possible if, 14 | -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/incomplete_sentence.txt: -------------------------------------------------------------------------------- 1 | In mid-April Anglesey moved his family and entourage from Rome to Naples, there to await the arrival of 2 | -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/lexus_vin_number.txt: -------------------------------------------------------------------------------- 1 | JTHBH5D2405012812 -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/montreal_signs.txt: -------------------------------------------------------------------------------- 1 | Basilique Notre-Dame 2 | Place Royale 3 | Place d’Armes -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/paris_signs.txt: -------------------------------------------------------------------------------- 1 | Palais Royal 2 | Les Arts Décoratifs 3 | Musée du LOUVRE 4 | Église ST GERMAIN l’AUXERROIS 5 | Musée Eugène DELACROIX -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/reporter_notes.txt: -------------------------------------------------------------------------------- 1 | An attempt to get more information about the Admiralty House meeting will be made in the House of Commons this afternoon. Labour M.P.s already have many questions to the Prime Minister asking for a statement. President Kennedy flew from London Airport last night to arrive in Washington this morning. He is to make a 30-minute nation-wide broadcast and television report on his talks with Mr. Khrushchev this evening. 2 | -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/rx_prescription_clear.txt: -------------------------------------------------------------------------------- 1 | Rx 2 | Amoxicillin + Clavulanic acid (Co-Amoxiclav) 500/125 mg/tab #21 3 | Sig: Take one with food every 8 hours for 7 days 4 | 5 | Paracetamol 500 mg/tab #5 6 | Sig: Take one with food every 4 hours as needed for fever (temp. ≥ 37.8°C) 7 | -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/rx_prescription_unclear.txt: -------------------------------------------------------------------------------- 1 | Rx 2 | 3 | Tab. Pansec 20 4 | 1 + 0 + 1 5 | 6 | Tab. Apitez 160 7 | 0 + 1 + 0 8 | 9 | Tab. Linagliptin / Linita 5mg 10 | 2 + 0 + 0 -------------------------------------------------------------------------------- /omni-reader/ground_truth_texts/tire_serial_number.txt: -------------------------------------------------------------------------------- 1 | 3702692432 -------------------------------------------------------------------------------- /omni-reader/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """OCR pipelines""" 17 | 18 | from pipelines.batch_pipeline import batch_ocr_pipeline, run_batch_ocr_pipeline 19 | from pipelines.evaluation_pipeline import ( 20 | ocr_evaluation_pipeline, 21 | run_ocr_evaluation_pipeline, 22 | ) 23 | -------------------------------------------------------------------------------- /omni-reader/requirements.txt: -------------------------------------------------------------------------------- 1 | instructor 2 | jiwer 3 | jiter 4 | importlib-metadata<7.0,>=1.4.0 5 | litellm 6 | mistralai==1.0.3 7 | numpy<2.0,>=1.9.0 8 | openai==1.69.0 9 | Pillow==11.1.0 10 | polars==1.26.0 11 | pyarrow>=7.0.0 12 | python-dotenv 13 | streamlit==1.44.0 14 | pydantic>=2.8.2,<2.9.0 15 | tqdm==4.66.4 16 | zenml>=0.80.0 17 | -------------------------------------------------------------------------------- /omni-reader/schemas/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | from .image_description import ImageDescription 17 | from .ocr_result import ( 18 | OCRResult, 19 | OCRResultMapping, 20 | ) 21 | -------------------------------------------------------------------------------- /omni-reader/schemas/image_description.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Schemas for LLM responses.""" 17 | 18 | from typing import Optional 19 | 20 | from pydantic import BaseModel 21 | 22 | 23 | class ImageDescription(BaseModel): 24 | """Base model for OCR results.""" 25 | 26 | raw_text: str 27 | confidence: Optional[float] = None 28 | -------------------------------------------------------------------------------- /omni-reader/schemas/ocr_result.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | """Schemas for OCR results.""" 17 | 18 | from typing import Dict, List 19 | 20 | from pydantic import BaseModel, RootModel 21 | 22 | 23 | class OCRResult(BaseModel): 24 | """OCR result for a single image.""" 25 | 26 | id: int 27 | image_name: str 28 | raw_text: str 29 | processing_time: float 30 | confidence: float 31 | 32 | 33 | class OCRResultMapping(RootModel): 34 | """Each model name maps to a list of OCRResult entries.""" 35 | 36 | root: Dict[str, List[OCRResult]] 37 | -------------------------------------------------------------------------------- /omni-reader/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.formatOnSave": true, 3 | "python.defaultInterpreterPath": "/opt/venv/bin/python", 4 | "python.analysis.extraPaths": ["/workspace"], 5 | "workbench.startupEditor": "none", 6 | "terminal.integrated.defaultProfile.linux": "bash", 7 | "workbench.colorTheme": "Default Dark Modern", 8 | "python.linting.enabled": true, 9 | "python.linting.pylintEnabled": true, 10 | "files.exclude": { 11 | "**/__pycache__": true, 12 | "**/.pytest_cache": true 13 | } 14 | } -------------------------------------------------------------------------------- /omni-reader/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | from .evaluate_models import evaluate_models 17 | from .loaders import ( 18 | load_ground_truth_texts, 19 | load_images, 20 | load_ocr_results, 21 | ) 22 | from .run_ocr import run_ocr 23 | -------------------------------------------------------------------------------- /oncoclear/.assets/cloud_mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/cloud_mcp.png -------------------------------------------------------------------------------- /oncoclear/.assets/deployment_architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/deployment_architecture.png -------------------------------------------------------------------------------- /oncoclear/.assets/deployment_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/deployment_pipeline.png -------------------------------------------------------------------------------- /oncoclear/.assets/fastapi_docs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/fastapi_docs.png -------------------------------------------------------------------------------- /oncoclear/.assets/feature_engineering_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/feature_engineering_pipeline.png -------------------------------------------------------------------------------- /oncoclear/.assets/inference_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/inference_pipeline.png -------------------------------------------------------------------------------- /oncoclear/.assets/pipeline_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/pipeline_overview.png -------------------------------------------------------------------------------- /oncoclear/.assets/training_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/.assets/training_pipeline.png -------------------------------------------------------------------------------- /oncoclear/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* -------------------------------------------------------------------------------- /oncoclear/Dockerfile.codespace: -------------------------------------------------------------------------------- 1 | # Sandbox base image 2 | FROM zenmldocker/zenml-sandbox:latest 3 | 4 | # Install uv from official distroless image 5 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ 6 | 7 | # Set uv environment variables for optimization 8 | ENV UV_SYSTEM_PYTHON=1 9 | ENV UV_COMPILE_BYTECODE=1 10 | 11 | # Project metadata 12 | LABEL project_name="oncoclear" 13 | LABEL project_version="0.1.0" 14 | 15 | # Install dependencies with uv and cache optimization 16 | RUN --mount=type=cache,target=/root/.cache/uv \ 17 | uv pip install --system \ 18 | "zenml[server]>=0.50.0" \ 19 | "notebook" \ 20 | "scikit-learn" \ 21 | "pyarrow" \ 22 | "pandas" 23 | 24 | # Set workspace directory 25 | WORKDIR /workspace 26 | 27 | # Clone only the project directory and reorganize 28 | RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \ 29 | cp -r /tmp/zenml-projects/oncoclear/* /workspace/ && \ 30 | rm -rf /tmp/zenml-projects 31 | 32 | # VSCode settings 33 | RUN mkdir -p /workspace/.vscode && \ 34 | printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json 35 | 36 | 37 | -------------------------------------------------------------------------------- /oncoclear/api/.dockerignore: -------------------------------------------------------------------------------- 1 | # Python 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.so 6 | .Python 7 | env/ 8 | build/ 9 | develop-eggs/ 10 | dist/ 11 | downloads/ 12 | eggs/ 13 | .eggs/ 14 | lib/ 15 | lib64/ 16 | parts/ 17 | sdist/ 18 | var/ 19 | *.egg-info/ 20 | .installed.cfg 21 | *.egg 22 | 23 | # Environment and editor files 24 | .env 25 | .venv 26 | venv/ 27 | ENV/ 28 | .idea/ 29 | .vscode/ 30 | *.swp 31 | *.swo 32 | *.swn 33 | *~ 34 | 35 | # Documentation or local development files 36 | README.md 37 | CHANGELOG.md 38 | docs/ 39 | tests/ 40 | 41 | # Git and GitHub 42 | .git/ 43 | .github/ 44 | .gitignore 45 | 46 | # Docker 47 | Dockerfile.* 48 | docker-compose*.yml 49 | 50 | # Miscellaneous 51 | .DS_Store -------------------------------------------------------------------------------- /oncoclear/api/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn[standard] 3 | scikit-learn # Replace with specific version later if needed 4 | zenml # Replace with specific version later if needed 5 | python-dotenv 6 | numpy 7 | pandas 8 | 9 | # AWS dependencies for S3 artifact store 10 | aws-profile-manager 11 | boto3 12 | s3fs>=2022.3.0 -------------------------------------------------------------------------------- /oncoclear/api/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /oncoclear/api/utils/__init__.py:Zone.Identifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/api/utils/__init__.py:Zone.Identifier -------------------------------------------------------------------------------- /oncoclear/api/utils/preprocess.py:Zone.Identifier: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/oncoclear/api/utils/preprocess.py:Zone.Identifier -------------------------------------------------------------------------------- /oncoclear/configs/feature_engineering.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - sklearn 6 | - pandas 7 | requirements: 8 | - pyarrow 9 | 10 | # pipeline configuration 11 | test_size: 0.35 -------------------------------------------------------------------------------- /oncoclear/configs/inference.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - sklearn 6 | - pandas 7 | requirements: 8 | - pyarrow 9 | 10 | # configuration of the Model Control Plane 11 | model: 12 | name: "breast_cancer_classifier" 13 | version: "production" 14 | license: Apache 2.0 15 | description: A breast cancer classifier 16 | tags: ["breast_cancer", "classifier"] -------------------------------------------------------------------------------- /oncoclear/configs/training_rf.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - sklearn 6 | - pandas 7 | requirements: 8 | - pyarrow 9 | 10 | # configuration of the Model Control Plane 11 | model: 12 | name: breast_cancer_classifier 13 | version: rf 14 | license: Apache 2.0 15 | description: A breast cancer classifier 16 | tags: ["breast_cancer", "classifier"] 17 | 18 | # Configure the pipeline 19 | parameters: 20 | model_type: "rf" # Choose between rf/sgd 21 | -------------------------------------------------------------------------------- /oncoclear/configs/training_sgd.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - sklearn 6 | - pandas 7 | requirements: 8 | - pyarrow 9 | 10 | # configuration of the Model Control Plane 11 | model: 12 | name: breast_cancer_classifier 13 | version: sgd 14 | license: Apache 2.0 15 | description: A breast cancer classifier 16 | tags: ["breast_cancer", "classifier"] 17 | 18 | # Configure the pipeline 19 | parameters: 20 | model_type: "sgd" # Choose between rf/sgd -------------------------------------------------------------------------------- /oncoclear/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .deployment import local_deployment 19 | from .feature_engineering import feature_engineering 20 | from .inference import inference 21 | from .training import training 22 | -------------------------------------------------------------------------------- /oncoclear/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml[server]>=0.50.0 2 | notebook 3 | scikit-learn 4 | pyarrow 5 | pandas 6 | -------------------------------------------------------------------------------- /oncoclear/utils/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | -------------------------------------------------------------------------------- /research-radar/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | -------------------------------------------------------------------------------- /research-radar/.env.example: -------------------------------------------------------------------------------- 1 | ANTHROPIC_API_KEY="your_anthropic_api_key" 2 | HF_TOKEN="your_hugging_face_api_token" 3 | -------------------------------------------------------------------------------- /research-radar/assets/ft_model_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/research-radar/assets/ft_model_comparison.png -------------------------------------------------------------------------------- /research-radar/assets/modernbert_claude_haiku_comparison.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/research-radar/assets/modernbert_claude_haiku_comparison.png -------------------------------------------------------------------------------- /research-radar/assets/resuming-from-checkpoint.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/research-radar/assets/resuming-from-checkpoint.png -------------------------------------------------------------------------------- /research-radar/assets/starting-new-run.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/research-radar/assets/starting-new-run.png -------------------------------------------------------------------------------- /research-radar/assets/training_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/research-radar/assets/training_pipeline.png -------------------------------------------------------------------------------- /research-radar/materializers/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | # materializers/__init__.py 19 | from .dataset_materializer import DatasetMaterializer 20 | from .datetime_materializer import DatetimeMaterializer 21 | from .lazyframe_materializer import LazyFrameMaterializer 22 | from .url_materializer import UrlMaterializer 23 | 24 | __all__ = [ 25 | "LazyFrameMaterializer", 26 | "DatetimeMaterializer", 27 | "DatasetMaterializer", 28 | "UrlMaterializer", 29 | ] 30 | -------------------------------------------------------------------------------- /research-radar/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .classification import classification_pipeline 19 | from .deployment import deployment_pipeline 20 | from .model_comparison import model_comparison_pipeline 21 | from .training import training_pipeline 22 | -------------------------------------------------------------------------------- /research-radar/prompts/room_to_think_prompt.txt: -------------------------------------------------------------------------------- 1 | Evaluate whether the following article provides concrete evidence of a valid case study for a GenAI system. 2 | 3 | Return your evaluation strictly as a JSON object with ONLY these keys: 4 | - is_accepted: a **boolean** indicating whether you accept or reject the article 5 | - confidence: a **float** between 0 and 1 representing the confidence score for the evaluation 6 | - reason: a **string** describing the reasoning you used to come to your conclusion 7 | 8 | Example JSON output: 9 | ```json 10 | { 11 | "is_accepted": false, 12 | "confidence": 0.75, 13 | "reason": "The article does not demonstrate a fully deployed system with sufficient operational details and real-world impact." 14 | } 15 | ``` 16 | 17 | **Room to Think:** 18 | Before finalizing your answer, take a moment to internally reason through the evidence and criteria. Use your internal chain-of-thought to assess the article thoroughly. *Do not include any of this internal reasoning in your final output.* 19 | 20 | **Final Answer:** 21 | After thoroughly considering the evidence and criteria, provide your final answer. 22 | 23 | Article to evaluate: {{article_text}} 24 | 25 | -------------------------------------------------------------------------------- /research-radar/prompts/user_prompt.txt: -------------------------------------------------------------------------------- 1 | Evaluate whether the following article provides concrete evidence of a valid case study for a GenAI system. 2 | 3 | Return your evaluation strictly as a JSON object with ONLY these keys: 4 | - is_accepted: a **boolean** indicating whether you accept or reject the article 5 | - confidence: a **float** between 0 and 1 representing the confidence score for the evaluation 6 | - reason: a **string** describing the reasoning you used to come to your conclusion 7 | 8 | Example JSON output: 9 | ```json 10 | { 11 | "is_accepted": false, 12 | "confidence": 0.75, 13 | "reason": "The article does not demonstrate a fully deployed system with sufficient operational details and real-world impact." 14 | } 15 | ``` 16 | 17 | Article to evaluate: 18 | {{article_text}} -------------------------------------------------------------------------------- /research-radar/requirements.txt: -------------------------------------------------------------------------------- 1 | accelerate==1.5.2 2 | anthropic==0.46.0 3 | beautifulsoup4==4.13.3 4 | colorama==0.4.6 5 | datasets==3.2.0 6 | evaluate==0.4.3 7 | huggingface_hub==0.28.1 8 | numpy==2.2.3 9 | pandas==2.2.3 10 | polars==1.22.0 11 | psutil==6.1.1 12 | pydantic==2.8.2 13 | pydantic_core==2.20.1 14 | python-dotenv==1.0.1 15 | scikit_learn==1.6.1 16 | tenacity==9.0.0 17 | torch==2.6.0 18 | tqdm>=4.64.0 19 | transformers==4.48.0 20 | typing_extensions==4.12.2 21 | zenml>=0.80.0 -------------------------------------------------------------------------------- /research-radar/schemas/classification_output.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from pydantic import BaseModel, Field 19 | 20 | 21 | class ClassificationOutput(BaseModel): 22 | """ 23 | Schema for LLM generated classification outputs 24 | """ 25 | 26 | is_accepted: bool 27 | confidence: float = Field(ge=0.0, le=1.0) 28 | reason: str 29 | -------------------------------------------------------------------------------- /research-radar/steps/save_test_set.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2025. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from pathlib import Path 19 | 20 | from datasets import Dataset 21 | from schemas import zenml_project 22 | from zenml import step 23 | 24 | 25 | @step(model=zenml_project) 26 | def save_test_set(test_set: Dataset, artifact_path: str) -> str: 27 | """ 28 | Saves the Hugging Face Dataset to disk and returns the path. 29 | """ 30 | 31 | artifact_path = Path(artifact_path) 32 | artifact_path.mkdir(parents=True, exist_ok=True) 33 | test_set.save_to_disk(str(artifact_path)) 34 | return str(artifact_path) 35 | -------------------------------------------------------------------------------- /retail-forecast/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control metadata 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Python cache and artifacts 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | 13 | # Build outputs 14 | build/ 15 | dist/ 16 | develop-eggs/ 17 | downloads/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # Testing and coverage reports 32 | .pytest_cache/ 33 | .nox/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | *.py,cover 42 | .hypothesis/ 43 | 44 | # Virtual environments 45 | .env 46 | .venv* 47 | venv/ 48 | ENV/ 49 | env/ 50 | venv.bak/ 51 | env.bak/ 52 | 53 | # IDE directories 54 | .vscode/ 55 | .idea/ 56 | .spyderproject 57 | .spyproject 58 | .ropeproject 59 | 60 | # OS files 61 | .DS_Store 62 | Thumbs.db 63 | -------------------------------------------------------------------------------- /retail-forecast/assets/data_visualization.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/retail-forecast/assets/data_visualization.gif -------------------------------------------------------------------------------- /retail-forecast/assets/forecast_dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/retail-forecast/assets/forecast_dashboard.png -------------------------------------------------------------------------------- /retail-forecast/assets/inference_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/retail-forecast/assets/inference_pipeline.png -------------------------------------------------------------------------------- /retail-forecast/assets/training_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/retail-forecast/assets/training_pipeline.png -------------------------------------------------------------------------------- /retail-forecast/assets/zenml_dashboard.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/retail-forecast/assets/zenml_dashboard.png -------------------------------------------------------------------------------- /retail-forecast/configs/inference.yaml: -------------------------------------------------------------------------------- 1 | # Inference configuration for retail forecasting 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | required_integrations: 7 | - pandas 8 | - numpy 9 | requirements: 10 | - matplotlib>=3.5.0 11 | - plotly 12 | - prophet>=3.5.0 13 | - pyarrow 14 | - fastparquet 15 | - typing_extensions>=4.0.0 16 | 17 | # configuration of the Model Control Plane 18 | model: 19 | name: retail_forecast_model 20 | version: 0.1.0 21 | license: MIT 22 | description: A retail forecast model for inference 23 | tags: ["retail", "forecasting", "prophet", "inference"] 24 | 25 | # Step-specific parameters 26 | steps: 27 | # Data loading parameters 28 | load_data: 29 | # No specific parameters needed for this step 30 | 31 | # Data preprocessing parameters 32 | preprocess_data: 33 | test_size: 0.05 # Small test set for visualization only 34 | 35 | # Forecasting parameters 36 | generate_forecasts: 37 | forecast_periods: 30 -------------------------------------------------------------------------------- /retail-forecast/configs/training.yaml: -------------------------------------------------------------------------------- 1 | # Training configuration for retail forecasting 2 | 3 | # environment configuration 4 | settings: 5 | docker: 6 | required_integrations: 7 | - pandas 8 | - numpy 9 | requirements: 10 | - matplotlib>=3.5.0 11 | - plotly 12 | - prophet>=3.5.0 13 | - pyarrow 14 | - fastparquet 15 | - typing_extensions>=4.0.0 16 | 17 | # configuration of the Model Control Plane 18 | model: 19 | name: retail_forecast_model 20 | version: 0.1.0 21 | license: MIT 22 | description: A retail forecast model with enhanced seasonality 23 | tags: ["retail", "forecasting", "prophet", "seasonal"] 24 | 25 | # Step-specific parameters 26 | steps: 27 | # Data loading parameters 28 | load_data: 29 | # No specific parameters needed for this step 30 | 31 | # Data preprocessing parameters 32 | preprocess_data: 33 | test_size: 0.15 34 | 35 | # Model training parameters 36 | train_model: 37 | weekly_seasonality: true 38 | yearly_seasonality: true 39 | daily_seasonality: true 40 | seasonality_mode: "additive" 41 | 42 | # Forecasting parameters 43 | generate_forecasts: 44 | forecast_periods: 60 -------------------------------------------------------------------------------- /retail-forecast/materializers/__init__.py: -------------------------------------------------------------------------------- 1 | from .prophet_materializer import ProphetMaterializer 2 | 3 | __all__ = ["ProphetMaterializer"] 4 | -------------------------------------------------------------------------------- /retail-forecast/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml~=0.82.0 2 | numpy>=1.20.0 3 | pandas>=1.3.0 4 | matplotlib>=3.5.0 5 | prophet>=3.5.0 6 | typing_extensions>=4.0.0 7 | pyarrow 8 | fastparquet 9 | plotly -------------------------------------------------------------------------------- /scripts/check-readme-projects.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # Check if all project directories are listed in the README table 3 | python scripts/check_readme_projects.py -------------------------------------------------------------------------------- /scripts/check-spelling.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh -e 2 | set -x 3 | 4 | export ZENML_DEBUG=1 5 | export ZENML_ANALYTICS_OPT_IN=false 6 | pyspelling 7 | -------------------------------------------------------------------------------- /scripts/docstring.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | set -x 4 | 5 | DOCSTRING_SRC=${1:-"."} 6 | 7 | export ZENML_DEBUG=1 8 | export ZENML_ANALYTICS_OPT_IN=false 9 | 10 | darglint -v 2 $DOCSTRING_SRC 11 | -------------------------------------------------------------------------------- /scripts/format.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -x 3 | 4 | export ZENML_DEBUG=1 5 | export ZENML_ANALYTICS_OPT_IN=false 6 | 7 | # Initialize default source directories 8 | default_src="." 9 | # Initialize SRC as an empty string 10 | SRC="" 11 | 12 | # If no source directories were provided, use the default 13 | if [ -z "$SRC" ]; then 14 | SRC="$default_src" 15 | fi 16 | 17 | export ZENML_DEBUG=1 18 | export ZENML_ANALYTICS_OPT_IN=false 19 | 20 | # Print ruff version for debugging 21 | echo "Using ruff version:" 22 | ruff --version 23 | 24 | # autoflake replacement: removes unused imports and variables 25 | ruff check $SRC --select F401,F841 --fix --exclude "__init__.py" --exclude "llm-finetuning/" --exclude "sign-language-detection-yolov5/model.py" --exclude "*.ipynb" --isolated 26 | 27 | # sorts imports 28 | ruff check $SRC --exclude "llm-finetuning/" --exclude "sign-language-detection-yolov5/model.py" --select I --fix --ignore D 29 | ruff format $SRC --exclude "sign-language-detection-yolov5/model.py" --exclude "llm-finetuning/" 30 | 31 | -------------------------------------------------------------------------------- /scripts/lint.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | set -e 3 | set -x 4 | set -o pipefail 5 | 6 | LINT_FILES=${1:-"."} 7 | 8 | export ZENML_DEBUG=1 9 | export ZENML_ANALYTICS_OPT_IN=false 10 | 11 | ruff $LINT_FILES 12 | # TODO: Fix docstrings in tests and examples and remove the `--extend-ignore D` flag 13 | ruff $LINT_FILES --extend-ignore D 14 | 15 | # autoflake replacement: checks for unused imports and variables 16 | ruff $LINT_FILES --select F401,F841 --exclude "__init__.py" --isolated 17 | 18 | black $SRC --check 19 | 20 | # check type annotations 21 | mypy $LINT_FILES 22 | -------------------------------------------------------------------------------- /sign-language-detection-yolov5/.dockerignore: -------------------------------------------------------------------------------- 1 | American-Sign-Language-Letters-6 2 | augment 3 | inference -------------------------------------------------------------------------------- /sign-language-detection-yolov5/materializer/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2022. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | 15 | from materializer.dataset_materializer import DatasetMaterializer 16 | from materializer.yolo_model_materializer import Yolov5ModelMaterializer 17 | 18 | __all__ = ["DatasetMaterializer", "Yolov5ModelMaterializer"] 19 | -------------------------------------------------------------------------------- /sign-language-detection-yolov5/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2022. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | -------------------------------------------------------------------------------- /sign-language-detection-yolov5/pipelines/deployment_pipeline.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2022. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | from steps import ( 15 | bento_builder, 16 | bentoml_model_deployer, 17 | deployment_trigger, 18 | model_loader, 19 | ) 20 | from zenml.pipelines import pipeline 21 | 22 | 23 | @pipeline(enable_cache=False) 24 | def sign_language_detection_deployment_pipeline(): 25 | model_path, model = model_loader() 26 | decision = deployment_trigger(model_path) 27 | bento = bento_builder(model=model) 28 | bentoml_model_deployer(deploy_decision=decision, bento=bento) 29 | -------------------------------------------------------------------------------- /sign-language-detection-yolov5/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml>=0.73.0 2 | roboflow==0.2.18 3 | albumentations==1.3.0 4 | albumentations[imgaug] 5 | # mlflow[extras]==2.16.0 6 | itsdangerous>=2.2 7 | Werkzeug>=3.1 8 | bentoml==1.3.5 9 | torch==2.0.1 10 | numpy==1.26.4 11 | yolov5==7.0.10 -------------------------------------------------------------------------------- /sign-language-detection-yolov5/steps/bento_deployer.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2022. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | from zenml.integrations.bentoml.steps import ( 15 | bentoml_model_deployer_step, 16 | ) 17 | 18 | bentoml_model_deployer = bentoml_model_deployer_step.with_options( 19 | parameters=dict( 20 | model_name="sign_language_yolov5", 21 | port=3001, 22 | production=True, 23 | timeout=30, 24 | ) 25 | ) 26 | -------------------------------------------------------------------------------- /sign-language-detection-yolov5/steps/deployment_trigger.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2022. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | import os 15 | 16 | from zenml.steps import step 17 | 18 | 19 | @step 20 | def deployment_trigger( 21 | model_path: str, 22 | ) -> bool: 23 | """Implements a simple model deployment strigger that looks 24 | if the model checkpoint is saved in the model directory.""" 25 | if os.path.isfile(model_path): 26 | return True 27 | return False 28 | -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/cloud_mcp.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/cloud_mcp.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/cloud_mcp_predictions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/cloud_mcp_predictions.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/cloud_mcp_screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/cloud_mcp_screenshot.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/feature_engineering_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/feature_engineering_pipeline.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/inference_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/inference_pipeline.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/pipeline_overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/pipeline_overview.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.assets/training_pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/vertex-registry-and-deployer/.assets/training_pipeline.png -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.copier-answers.yml: -------------------------------------------------------------------------------- 1 | # Changes here will be overwritten by Copier 2 | _commit: 2024.09.24 3 | _src_path: gh:zenml-io/template-starter 4 | email: info@zenml.io 5 | full_name: ZenML GmbH 6 | open_source_license: apache 7 | project_name: ZenML Starter 8 | version: 0.1.0 9 | -------------------------------------------------------------------------------- /vertex-registry-and-deployer/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* -------------------------------------------------------------------------------- /vertex-registry-and-deployer/Dockerfile.codespace: -------------------------------------------------------------------------------- 1 | # Sandbox base image 2 | FROM zenmldocker/zenml-sandbox:latest 3 | 4 | # Install uv from official distroless image 5 | COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ 6 | 7 | # Set uv environment variables for optimization 8 | ENV UV_SYSTEM_PYTHON=1 9 | ENV UV_COMPILE_BYTECODE=1 10 | 11 | # Project metadata 12 | LABEL project_name="vertex-registry-and-deployer" 13 | LABEL project_version="0.1.0" 14 | 15 | # Install dependencies with uv and cache optimization 16 | RUN --mount=type=cache,target=/root/.cache/uv \ 17 | uv pip install --system \ 18 | "zenml[server]>=0.70.1" \ 19 | "notebook" \ 20 | "scikit-learn" \ 21 | "pyarrow" \ 22 | "pandas" 23 | 24 | # Set workspace directory 25 | WORKDIR /workspace 26 | 27 | # Clone only the project directory and reorganize 28 | RUN git clone --depth 1 https://github.com/zenml-io/zenml-projects.git /tmp/zenml-projects && \ 29 | cp -r /tmp/zenml-projects/vertex-registry-and-deployer/* /workspace/ && \ 30 | rm -rf /tmp/zenml-projects 31 | 32 | # VSCode settings 33 | RUN mkdir -p /workspace/.vscode && \ 34 | printf '{\n "workbench.colorTheme": "Default Dark Modern"\n}' > /workspace/.vscode/settings.json 35 | 36 | 37 | -------------------------------------------------------------------------------- /vertex-registry-and-deployer/LICENSE: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /vertex-registry-and-deployer/configs/inference.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - sklearn 6 | - pandas 7 | requirements: 8 | - pyarrow 9 | 10 | # configuration of the Model Control Plane 11 | model: 12 | name: "breast_cancer_classifier" 13 | version: "production" 14 | license: Apache 2.0 15 | description: A breast cancer classifier 16 | tags: ["breast_cancer", "classifier"] -------------------------------------------------------------------------------- /vertex-registry-and-deployer/configs/training_sgd.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | required_integrations: 5 | - sklearn 6 | - pandas 7 | requirements: 8 | - pyarrow 9 | 10 | # configuration of the Model Control Plane 11 | model: 12 | name: breast_cancer_classifier 13 | version: sgd 14 | license: Apache 2.0 15 | description: A breast cancer classifier 16 | tags: ["breast_cancer", "classifier"] -------------------------------------------------------------------------------- /vertex-registry-and-deployer/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .inference import inference 19 | from .training import training 20 | -------------------------------------------------------------------------------- /vertex-registry-and-deployer/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml[server]>=0.70.1 2 | notebook 3 | scikit-learn 4 | pyarrow 5 | pandas 6 | -------------------------------------------------------------------------------- /vertex-registry-and-deployer/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .model_deployer import ( 19 | model_deployer, 20 | ) 21 | from .model_promoter import ( 22 | model_promoter, 23 | ) 24 | from .model_register import ( 25 | model_register, 26 | ) 27 | from .model_trainer import ( 28 | model_trainer, 29 | ) 30 | -------------------------------------------------------------------------------- /zencoder/.assets/zencoder_header.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zencoder/.assets/zencoder_header.png -------------------------------------------------------------------------------- /zencoder/.assets/zencoder_mcp_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zencoder/.assets/zencoder_mcp_1.png -------------------------------------------------------------------------------- /zencoder/.assets/zencoder_mcp_2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zencoder/.assets/zencoder_mcp_2.png -------------------------------------------------------------------------------- /zencoder/.copier-answers.yml: -------------------------------------------------------------------------------- 1 | # Changes here will be overwritten by Copier 2 | _commit: 2023.12.18 3 | _src_path: gh:zenml-io/template-starter 4 | email: '' 5 | full_name: ZenML GmbH 6 | open_source_license: apache 7 | project_name: ZenML Starter 8 | version: 0.1.0 9 | -------------------------------------------------------------------------------- /zencoder/.dockerignore: -------------------------------------------------------------------------------- 1 | .venv* 2 | .requirements* -------------------------------------------------------------------------------- /zencoder/configs/deployment_a10.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | requirements: requirements.txt 5 | python_package_installer: "uv" 6 | 7 | model: 8 | name: peft-lora-zencoder15B-personal-copilot 9 | version: production 10 | 11 | steps: 12 | deploy_model_to_hf_hub: 13 | parameters: 14 | hf_endpoint_cfg: 15 | framework: pytorch 16 | task: text-generation 17 | accelerator: gpu 18 | vendor: aws 19 | region: us-east-1 20 | max_replica: 1 21 | instance_size: xxlarge 22 | instance_type: g5.12xlarge 23 | namespace: zenml 24 | custom_image: 25 | health_route: /health 26 | env: 27 | MAX_BATCH_PREFILL_TOKENS: "2048" 28 | MAX_INPUT_LENGTH: "1024" 29 | MAX_TOTAL_TOKENS: "1512" 30 | QUANTIZE: bitsandbytes 31 | MODEL_ID: /repository 32 | url: registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-564f2a3 33 | -------------------------------------------------------------------------------- /zencoder/configs/deployment_a100.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | requirements: requirements.txt 5 | python_package_installer: "uv" 6 | 7 | model: 8 | name: "peft-lora-zencoder15B-personal-copilot" 9 | version: production 10 | 11 | steps: 12 | deploy_model_to_hf_hub: 13 | parameters: 14 | hf_endpoint_cfg: 15 | framework: pytorch 16 | task: text-generation 17 | accelerator: gpu 18 | vendor: aws 19 | region: us-east-1 20 | max_replica: 1 21 | instance_size: xlarge 22 | instance_type: p4de 23 | namespace: zenml 24 | custom_image: 25 | health_route: /health 26 | env: 27 | MAX_BATCH_PREFILL_TOKENS: "2048" 28 | MAX_INPUT_LENGTH: "1024" 29 | MAX_TOTAL_TOKENS: "1512" 30 | QUANTIZE: bitsandbytes 31 | MODEL_ID: /repository 32 | url: registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-564f2a3 33 | -------------------------------------------------------------------------------- /zencoder/configs/deployment_t4.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | python_package_installer: "uv" 5 | requirements: requirements.txt 6 | 7 | model: 8 | name: "peft-lora-zencoder15B-personal-copilot" 9 | version: production 10 | 11 | steps: 12 | deploy_model_to_hf_hub: 13 | parameters: 14 | hf_endpoint_cfg: 15 | framework: pytorch 16 | task: text-generation 17 | accelerator: gpu 18 | vendor: aws 19 | region: us-east-1 20 | max_replica: 1 21 | instance_size: large 22 | instance_type: g4dn.12xlarge 23 | namespace: zenml 24 | custom_image: 25 | health_route: /health 26 | env: 27 | MAX_BATCH_PREFILL_TOKENS: "2048" 28 | MAX_INPUT_LENGTH: "1024" 29 | MAX_TOTAL_TOKENS: "1512" 30 | QUANTIZE: bitsandbytes 31 | MODEL_ID: /repository 32 | url: registry.internal.huggingface.tech/api-inference/community/text-generation-inference:sha-564f2a3 33 | -------------------------------------------------------------------------------- /zencoder/configs/generate_code_dataset.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | python_package_installer: "uv" 5 | requirements: requirements.txt 6 | apt_packages: 7 | - git 8 | environment: 9 | HF_HOME: "/tmp/huggingface" 10 | HF_HUB_CACHE: "/tmp/huggingface" 11 | 12 | # pipeline configuration 13 | parameters: 14 | dataset_id: zenml/zenml-codegen-v1 15 | 16 | steps: 17 | mirror_repositories: 18 | parameters: 19 | repositories: 20 | - zenml 21 | -------------------------------------------------------------------------------- /zencoder/license: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | 3 | Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | 5 | Licensed under the Apache License, Version 2.0 (the "License"); 6 | you may not use this file except in compliance with the License. 7 | You may obtain a copy of the License at 8 | 9 | http://www.apache.org/licenses/LICENSE-2.0 10 | 11 | Unless required by applicable law or agreed to in writing, software 12 | distributed under the License is distributed on an "AS IS" BASIS, 13 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | See the License for the specific language governing permissions and 15 | limitations under the License. 16 | -------------------------------------------------------------------------------- /zencoder/license_header: -------------------------------------------------------------------------------- 1 | Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # -------------------------------------------------------------------------------- /zencoder/materializers/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2024. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | """Implementation of the Huggingface Trainer materializer.""" 15 | 16 | from .huggingface_model_materializer import ( 17 | DEFAULT_TRAINER_MODEL_DIR, 18 | HFTrainerMaterializer, 19 | ) 20 | -------------------------------------------------------------------------------- /zencoder/pipelines/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .deployment import deployment_pipeline 19 | from .finetune import finetune_starcoder 20 | from .generate_code_dataset import generate_code_dataset 21 | -------------------------------------------------------------------------------- /zencoder/pipelines/deployment.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from steps import deploy_model_to_hf_hub, vllm_model_deployer_step 19 | from zenml import pipeline 20 | from zenml.logger import get_logger 21 | 22 | logger = get_logger(__name__) 23 | 24 | 25 | @pipeline 26 | def deployment_pipeline(target: str = "huggingface"): 27 | """This pipeline pushes the model to the hub.""" 28 | if target == "huggingface": 29 | deploy_model_to_hf_hub() 30 | elif target == "vllm": 31 | vllm_model_deployer_step() 32 | else: 33 | raise ValueError(f"Invalid target: {target}") 34 | -------------------------------------------------------------------------------- /zencoder/requirements.txt: -------------------------------------------------------------------------------- 1 | zenml[server]>=0.73.0 2 | packaging 3 | notebook 4 | datasets 5 | nbformat 6 | pandas 7 | PyGithub 8 | ninja 9 | 10 | # gcp 11 | kfp>=2.6.0 12 | gcsfs 13 | google-cloud-secret-manager 14 | google-cloud-container>=2.21.0 15 | google-cloud-artifact-registry>=1.11.3 16 | google-cloud-storage>=2.9.0 17 | google-cloud-aiplatform>=1.34.0 18 | google-cloud-build>=3.11.0 19 | kubernetes 20 | 21 | git+https://github.com/huggingface/transformers 22 | git+https://github.com/huggingface/accelerate 23 | git+https://github.com/huggingface/peft 24 | trl 25 | huggingface-hub 26 | bitsandbytes 27 | evaluate 28 | einops 29 | wandb>=0.12.12 30 | tiktoken 31 | deepspeed 32 | tqdm==4.65.0 33 | safetensors 34 | -------------------------------------------------------------------------------- /zencoder/steps/__init__.py: -------------------------------------------------------------------------------- 1 | # Apache Software License 2.0 2 | # 3 | # Copyright (c) ZenML GmbH 2024. All rights reserved. 4 | # 5 | # Licensed under the Apache License, Version 2.0 (the "License"); 6 | # you may not use this file except in compliance with the License. 7 | # You may obtain a copy of the License at 8 | # 9 | # http://www.apache.org/licenses/LICENSE-2.0 10 | # 11 | # Unless required by applicable law or agreed to in writing, software 12 | # distributed under the License is distributed on an "AS IS" BASIS, 13 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 | # See the License for the specific language governing permissions and 15 | # limitations under the License. 16 | # 17 | 18 | from .deployment import deploy_model_to_hf_hub 19 | from .parallel_clones import mirror_repositories 20 | from .prepare_dataset import prepare_dataset 21 | from .push_dataset_to_hub import push_to_hub 22 | from .trainer import merge_and_push, trainer 23 | from .vllm_deployer import vllm_model_deployer_step 24 | -------------------------------------------------------------------------------- /zencoder/test_starcoder_bigcode.py: -------------------------------------------------------------------------------- 1 | # Write a zenml pipeline that loads sklearn iris dataset and builds a sklearn classifier 2 | 3 | from zenml.pipelines import pipeline 4 | from zenml.steps.evaluator import TFMAEvaluator 5 | from zenml.steps.preprocesser import StandardPreprocesser 6 | from zenml.steps.preprocesser.standard_preprocesser.standard_preprocesser import ( 7 | StandardPreprocesser, 8 | ) 9 | from zenml.steps.trainer import TFFeed 10 | 11 | 12 | @pipeline 13 | def tf_mnist_pipeline(epochs: int = 5, lr: float = 0.001): 14 | """Links all the steps together in a pipeline.""" 15 | # Link all the steps together by calling them and passing the output 16 | # of one step as the input 17 | 18 | # x_train, x_test, y_train, y_test = RandomSplit(test_size=0.2)( 19 | # dataset=iris_data_loader() 20 | # ) 21 | x_train, x_test, y_train, y_test = StandardPreprocesser( 22 | test_size=0.2, 23 | random_state=42, 24 | )(dataset=iris_data_loader()) 25 | model = TFFeed(epochs=epochs, lr=lr)(x_train=x_train, y_train=y_train) 26 | 27 | # Complete the pipeline with evaluation or deployment steps 28 | metrics = TFMAEvaluator()(model=model, x_test=x_test, y_test=y_test) 29 | 30 | return model, metrics 31 | -------------------------------------------------------------------------------- /zencoder/test_zencoder.py: -------------------------------------------------------------------------------- 1 | # Write a zenml pipeline that loads sklearn iris dataset and builds a sklearn classifier 2 | 3 | from sklearn.datasets import load_iris 4 | from sklearn.ensemble import RandomForestClassifier 5 | from sklearn.model_selection import train_test_split 6 | from zenml import pipeline, step 7 | 8 | 9 | @step 10 | def importer() -> pd.DataFrame: 11 | """Load the iris dataset.""" 12 | df = load_iris(as_frame=True)["data"] 13 | return df 14 | 15 | 16 | @step 17 | def trainer(df: pd.DataFrame) -> Any: 18 | """Train a model on the dataset.""" 19 | X_train, X_test, y_train, y_test = train_test_split( 20 | df.to_numpy()[:, :2], 21 | df.to_numpy()[:, 2], 22 | test_size=0.2, 23 | random_state=42, 24 | ) 25 | model = RandomForestClassifier(n_estimators=10) 26 | model.fit(X_train, y_train) 27 | return model 28 | 29 | 30 | @pipeline 31 | def sklearn_pipeline(): 32 | df = importer() 33 | trainer(df) 34 | -------------------------------------------------------------------------------- /zenml-support-agent/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore version control metadata 2 | .git 3 | .gitignore 4 | .github 5 | 6 | # Python cache and artifacts 7 | __pycache__/ 8 | *.py[cod] 9 | *$py.class 10 | *.so 11 | .Python 12 | 13 | # Build outputs 14 | build/ 15 | dist/ 16 | develop-eggs/ 17 | downloads/ 18 | .eggs/ 19 | lib/ 20 | lib64/ 21 | parts/ 22 | sdist/ 23 | var/ 24 | wheels/ 25 | pip-wheel-metadata/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | 31 | # Testing and coverage reports 32 | .pytest_cache/ 33 | .nox/ 34 | .tox/ 35 | .coverage 36 | .coverage.* 37 | .cache 38 | nosetests.xml 39 | coverage.xml 40 | *.cover 41 | *.py,cover 42 | .hypothesis/ 43 | 44 | # Virtual environments 45 | .env 46 | .venv* 47 | venv/ 48 | ENV/ 49 | env/ 50 | venv.bak/ 51 | env.bak/ 52 | 53 | # IDE directories 54 | .vscode/ 55 | .idea/ 56 | .spyderproject 57 | .spyproject 58 | .ropeproject 59 | 60 | # OS files 61 | .DS_Store 62 | Thumbs.db 63 | -------------------------------------------------------------------------------- /zenml-support-agent/.flake8: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 79 3 | max-complexity = 18 4 | select = B,C,E,F,W,T4,B9 5 | ignore = E203, E266, E501, W503, F403, F401 6 | -------------------------------------------------------------------------------- /zenml-support-agent/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) ZenML GmbH 2024. All Rights Reserved. 2 | # 3 | # Licensed under the Apache License, Version 2.0 (the "License"); 4 | # you may not use this file except in compliance with the License. 5 | # You may obtain a copy of the License at: 6 | # 7 | # https://www.apache.org/licenses/LICENSE-2.0 8 | # 9 | # Unless required by applicable law or agreed to in writing, software 10 | # distributed under the License is distributed on an "AS IS" BASIS, 11 | # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express 12 | # or implied. See the License for the specific language governing 13 | # permissions and limitations under the License. 14 | -------------------------------------------------------------------------------- /zenml-support-agent/assets/llm-agent/LLM Agent pipeline.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/llm-agent/LLM Agent pipeline.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/llm-agent/image.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/llm-agent/image.jpg -------------------------------------------------------------------------------- /zenml-support-agent/assets/llm-agent/model_promotion.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/llm-agent/model_promotion.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/llm-agent/model_version_metadata.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/llm-agent/model_version_metadata.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/llm-agent/model_versions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/llm-agent/model_versions.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/slackbot/big-picture-workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/slackbot/big-picture-workflow.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/slackbot/slack-automated-redeployment.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/slackbot/slack-automated-redeployment.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/slackbot/slackbot-small.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/slackbot/slackbot-small.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/slackbot/slackbot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/slackbot/slackbot.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/slackbot/slackbot_pipeline_project.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/slackbot/slackbot_pipeline_project.png -------------------------------------------------------------------------------- /zenml-support-agent/assets/slackbot/slackbot_support_redacted.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/assets/slackbot/slackbot_support_redacted.png -------------------------------------------------------------------------------- /zenml-support-agent/configs/agent_config.yaml: -------------------------------------------------------------------------------- 1 | # environment configuration 2 | settings: 3 | docker: 4 | requirements: requirements.txt 5 | required_integrations: ["langchain", "openai", "pillow"] 6 | 7 | model: 8 | name: zenml_agent 9 | license: Apache 2.0 10 | description: "ZenML Agent with a vector store tool." 11 | tags: ["llm", "agent", "rag"] 12 | 13 | steps: 14 | url_scraper: 15 | enable_cache: False 16 | parameters: 17 | docs_url: "https://docs.zenml.io" 18 | website_url: "https://zenml.io" 19 | repo_url: "https://github.com/zenml-io/zenml/tree/0.55.0/examples" 20 | agent_creator: 21 | enable_cache: False -------------------------------------------------------------------------------- /zenml-support-agent/pipelines/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/pipelines/__init__.py -------------------------------------------------------------------------------- /zenml-support-agent/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "llm-agents" 3 | version = "0.0.1" 4 | description = "A slackbot for ZenML documentation" 5 | authors = ["ZenML"] 6 | 7 | [tool.poetry.dependencies] 8 | python = ">=3.8.1,<3.11" 9 | zenml = { extras = ["server"], version = "0.43.0" } 10 | faiss-cpu = ">=1.7.3" 11 | langchain = "0.0.263" 12 | openai = "0.27.2" 13 | slack-bolt = "1.16.2" 14 | slack-sdk = "3.20.0" 15 | gcsfs = "2023.5.0" 16 | unstructured = "0.5.7" 17 | fastapi = "@latest" 18 | flask = "@latest" 19 | uvicorn = "@latest" 20 | 21 | [tool.poetry.dev-dependencies] 22 | black = "^21.9b0" 23 | isort = "^5.9.3" 24 | pytest = "^6.2.5" 25 | 26 | [build-system] 27 | requires = ["poetry-core>=1.0.0"] 28 | build-backend = "poetry.core.masonry.api" 29 | 30 | [tool.isort] 31 | profile = "black" 32 | known_third_party = [] 33 | skip_glob = [] 34 | line_length = 79 35 | 36 | [tool.black] 37 | line-length = 79 38 | include = '\.pyi?$' 39 | exclude = ''' 40 | /( 41 | \.git 42 | | \.hg 43 | | \.mypy_cache 44 | | \.tox 45 | | \.venv 46 | | _build 47 | | buck-out 48 | | build 49 | )/ 50 | ''' 51 | -------------------------------------------------------------------------------- /zenml-support-agent/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.3.15 2 | langchain-openai==0.3.1 3 | langchain-community 4 | zenml==0.73.0 5 | fastapi 6 | flask 7 | uvicorn 8 | gcsfs==2023.5.0 9 | faiss-cpu==1.7.3 10 | unstructured==0.16.5 11 | tiktoken 12 | bs4 13 | typing_extensions 14 | -------------------------------------------------------------------------------- /zenml-support-agent/steps/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zenml-io/zenml-projects/411d2fbef0b4cff4e04dc387b20e72d36c70fcb1/zenml-support-agent/steps/__init__.py --------------------------------------------------------------------------------