├── .codecov.yml ├── .dockerignore ├── .github ├── dependabot.yml └── workflows │ ├── build.yml │ ├── changelog.yaml │ ├── dependabot-reviewer.yml │ ├── release.yml │ ├── snyk.yml │ └── stale.yml ├── .gitignore ├── .golangci.yml ├── .snyk ├── CHANGELOG.md ├── Dockerfile ├── LICENSE ├── Makefile ├── PROJECT ├── Procfile ├── README.md ├── USERS.md ├── api └── v1alpha1 │ ├── abstract_step.go │ ├── abstract_volume_source.go │ ├── abstract_volume_source_test.go │ ├── aws_credentials.go │ ├── aws_endpoint.go │ ├── backoff.go │ ├── cat.go │ ├── cat_test.go │ ├── code.go │ ├── code_test.go │ ├── const.go │ ├── container.go │ ├── container_builder.go │ ├── container_builder_test.go │ ├── container_supplier.go │ ├── container_test.go │ ├── cron.go │ ├── cron_test.go │ ├── db.go │ ├── db_sink.go │ ├── db_source.go │ ├── db_test.go │ ├── dedupe.go │ ├── expand.go │ ├── expand_test.go │ ├── filter.go │ ├── filter_test.go │ ├── flatten.go │ ├── flatten_test.go │ ├── generated.pb.go │ ├── generated.proto │ ├── get_pod_req.go │ ├── git.go │ ├── git_test.go │ ├── group.go │ ├── group_format.go │ ├── group_test.go │ ├── groupversion_info.go │ ├── http.go │ ├── http_header.go │ ├── http_header_source.go │ ├── http_sink.go │ ├── http_source.go │ ├── http_source_test.go │ ├── interface.go │ ├── jetstream.go │ ├── jetstream_sink.go │ ├── jetstream_source.go │ ├── kafka.go │ ├── kafka_offset.go │ ├── kafka_offset_test.go │ ├── kafka_sink.go │ ├── kafka_sink_test.go │ ├── kafka_source.go │ ├── kafka_source_test.go │ ├── kafka_test.go │ ├── log.go │ ├── map.go │ ├── map_test.go │ ├── meta.go │ ├── meta_test.go │ ├── metadata.go │ ├── pipeline_phase.go │ ├── pipeline_phase_test.go │ ├── pipeline_spec.go │ ├── pipeline_status.go │ ├── pipeline_types.go │ ├── runtime.go │ ├── s3.go │ ├── s3_sink.go │ ├── s3_source.go │ ├── s3_test.go │ ├── sasl.go │ ├── scale.go │ ├── sidecar.go │ ├── sink.go │ ├── source.go │ ├── sources.go │ ├── stan.go │ ├── stan_test.go │ ├── step_phase.go │ ├── step_phase_message.go │ ├── step_phase_message_test.go │ ├── step_spec.go │ ├── step_spec_test.go │ ├── step_status.go │ ├── step_types.go │ ├── step_types_test.go │ ├── string.go │ ├── string_test.go │ ├── subject_prefix.go │ ├── subject_prefix_test.go │ ├── tls.go │ ├── trunc.go │ ├── trunc_test.go │ ├── urner.go │ ├── volume_sink.go │ ├── volume_source.go │ └── zz_generated.deepcopy.go ├── config ├── apps │ ├── argo-server.yaml │ ├── argo-server │ │ ├── argo-server-deploy.yaml │ │ ├── argo-server-role.yaml │ │ ├── argo-server-rolebinding.yaml │ │ └── kustomization.yaml │ ├── jaeger.yaml │ ├── jaeger │ │ ├── kustomization.yaml │ │ └── simplist.yaml │ ├── jetstream.yaml │ ├── jetstream │ │ ├── kustomization.yaml │ │ └── nats-js.yml │ ├── kafka.yaml │ ├── kafka │ │ ├── kafka-minimal.yaml │ │ └── kustomization.yaml │ ├── metrics-server │ │ ├── kustomization.yaml │ │ └── metrics-server-deploy.yaml │ ├── moto.yaml │ ├── moto │ │ ├── kustomization.yaml │ │ ├── moto-statefulset.yaml │ │ └── moto-svc.yaml │ ├── mysql.yaml │ ├── mysql │ │ ├── kustomization.yaml │ │ ├── mysql-secret.yaml │ │ ├── mysql-statefulset.yaml │ │ └── mysql-svc.yaml │ ├── nats │ │ ├── kustomization.yaml │ │ ├── nats-statefulset.yaml │ │ └── single-server-nats.yml │ ├── prometheus │ │ ├── kustomization.yaml │ │ └── monitor.yaml │ ├── stan.yaml │ ├── stan │ │ ├── kustomization.yaml │ │ ├── single-server-stan.yml │ │ └── stan-statefulset.yaml │ └── testapi │ │ ├── kustomization.yaml │ │ ├── test.yaml │ │ ├── testapi-statefulset.yaml │ │ └── testapi-svc.yaml ├── base-patch │ ├── lead-replica-priorityclass.yaml │ ├── manager_auth_proxy_patch.yaml │ ├── manager_webhook_patch.yaml │ ├── ssh-configmap.yaml │ └── webhookcainjection_patch.yaml ├── certmanager │ ├── certificate.yaml │ ├── kustomization.yaml │ └── kustomizeconfig.yaml ├── ci.yaml ├── ci │ ├── controller-manager-deploy.yaml │ └── kustomization.yaml ├── cluster-quick-start.yaml ├── cluster-quick-start │ ├── controller-manager-deploy.yaml │ └── kustomization.yaml ├── crd │ ├── bases │ │ ├── dataflow.argoproj.io_pipelines.yaml │ │ └── dataflow.argoproj.io_steps.yaml │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── patches │ │ ├── cainjection_in_pipelines.yaml │ │ ├── cainjection_in_steps.yaml │ │ ├── webhook_in_pipelines.yaml │ │ └── webhook_in_steps.yaml ├── default-cluster.yaml ├── default-cluster │ ├── kustomization.yaml │ ├── pipeline-role-binding.yaml │ ├── pipeline-role.yaml │ ├── pipeline-sa.yaml │ └── rbac │ │ ├── cluster_role.yaml │ │ └── cluster_role_binding.yaml ├── default.yaml ├── default │ ├── controller-manager-deploy.yaml │ └── kustomization.yaml ├── dev.yaml ├── dev │ ├── controller-manager-deploy.yaml │ ├── kustomization.yaml │ └── ssh-configmap.yaml ├── kafka │ └── kustomization.yaml ├── manager │ ├── kustomization.yaml │ ├── manager-sa.yaml │ └── manager.yaml ├── quick-start.yaml ├── quick-start │ ├── controller-manager-deploy.yaml │ └── kustomization.yaml ├── rbac │ ├── auth_proxy_client_clusterrole.yaml │ ├── auth_proxy_role.yaml │ ├── auth_proxy_role_binding.yaml │ ├── auth_proxy_service.yaml │ ├── kustomization.yaml │ ├── leader_election_role.yaml │ ├── leader_election_role_binding.yaml │ ├── pipeline-role.yaml │ ├── pipeline-rolebinding.yaml │ ├── pipeline-sa.yaml │ ├── pipeline_editor_role.yaml │ ├── pipeline_viewer_role.yaml │ ├── role.yaml │ ├── role_binding.yaml │ ├── step_editor_role.yaml │ └── step_viewer_role.yaml └── webhook │ ├── kustomization.yaml │ ├── kustomizeconfig.yaml │ └── service.yaml ├── docs ├── BIG_PAYLOAD.md ├── CLI.md ├── CLUSTER_QUICK_START.md ├── CODE.md ├── CONCEPTS.md ├── CONFIGURATION.md ├── CONTRIBUTING.md ├── CUSTOM_IMAGE.md ├── DATAFLOW_VS_X.md ├── EVENTS_INTEROP.md ├── EXAMPLES.md ├── EXPRESSIONS.md ├── FEATURES.md ├── FILES.md ├── GC.md ├── GIT.md ├── IDEMPOTENCE.md ├── IMAGE_CONTRACT.md ├── JAEGER.md ├── KAKFA.md ├── KUBECTL.md ├── LIMITATIONS.md ├── META.md ├── METRICS.md ├── PROCESSORS.md ├── PROPOSAL.md ├── QUICK_START.md ├── READING.md ├── RELEASING.md ├── RELIABILITY.md ├── SCALING.md ├── SECURITY.md ├── SINKS.md ├── SOURCES.md ├── STAN.md ├── STRESS.md ├── VERSIONING.md ├── WORKFLOW_INTEROP.md └── assets │ ├── architecture.png │ └── screenshot.png ├── dsls └── python │ ├── .gitignore │ ├── LICENSE.txt │ ├── MANIFEST │ ├── Makefile │ ├── README │ ├── argo_dataflow │ ├── __init__.py │ └── pipeline.py │ └── setup.py ├── examples ├── 101-hello-pipeline.py ├── 101-hello-pipeline.yaml ├── 101-two-node-pipeline.py ├── 101-two-node-pipeline.yaml ├── 102-dedupe-pipeline.py ├── 102-dedupe-pipeline.yaml ├── 102-filter-pipeline.py ├── 102-filter-pipeline.yaml ├── 102-flatten-expand-pipeline.py ├── 102-flatten-expand-pipeline.yaml ├── 102-map-pipeline.py ├── 102-map-pipeline.yaml ├── 103-autoscaling-pipeline.py ├── 103-autoscaling-pipeline.yaml ├── 103-scaling-pipeline.py ├── 103-scaling-pipeline.yaml ├── 104-golang1-17-pipeline.py ├── 104-golang1-17-pipeline.yaml ├── 104-java16-pipeline.py ├── 104-java16-pipeline.yaml ├── 104-node16-pipeline.py ├── 104-node16-pipeline.yaml ├── 104-python3-9-pipeline.py ├── 104-python3-9-pipeline.yaml ├── 106-git-go-pipeline.py ├── 106-git-go-pipeline.yaml ├── 106-git-nodejs-pipeline.py ├── 106-git-nodejs-pipeline.yaml ├── 106-git-python-generator-pipeline.py ├── 106-git-python-generator-pipeline.yaml ├── 106-git-python-pipeline.py ├── 106-git-python-pipeline.yaml ├── 107-completion-pipeline.py ├── 107-completion-pipeline.yaml ├── 107-terminator-pipeline.py ├── 107-terminator-pipeline.yaml ├── 108-container-pipeline.py ├── 108-container-pipeline.yaml ├── 108-fifos-pipeline.py ├── 108-fifos-pipeline.yaml ├── 109-group-pipeline.py ├── 109-group-pipeline.yaml ├── 201-vetinary-pipeline.py ├── 201-vetinary-pipeline.yaml ├── 201-word-count-pipeline.py ├── 201-word-count-pipeline.yaml ├── 301-cron-log-pipeline.py ├── 301-cron-log-pipeline.yaml ├── 301-erroring-pipeline.py ├── 301-erroring-pipeline.yaml ├── 301-http-pipeline.py ├── 301-http-pipeline.yaml ├── 301-jetstream-pipeline.py ├── 301-jetstream-pipeline.yaml ├── 301-kafka-pipeline.py ├── 301-kafka-pipeline.yaml ├── 301-stan-pipeline.py ├── 301-stan-pipeline.yaml ├── 301-two-sinks-pipeline.py ├── 301-two-sinks-pipeline.yaml ├── 301-two-source-pipeline.py ├── 301-two-sources-pipeline.yaml ├── dataflow-103-http-main-source-default-secret.yaml ├── dataflow-jetstream-default-secret.yaml ├── dataflow-kafka-default-secret.yaml ├── dataflow-s3-default-secret.yaml ├── dataflow-stan-default-secret.yaml ├── example-hpa.yaml ├── git-nodejs │ ├── handler.js │ ├── index.js │ ├── package.json │ └── start.sh ├── git-python-generator-step │ ├── handler.py │ ├── main.py │ ├── requirements.txt │ └── start.sh ├── git-python │ ├── handler.py │ ├── main.py │ ├── requirements.txt │ └── start.sh ├── git │ ├── handler.go │ └── main.go ├── jupyter │ ├── .gitignore │ ├── Makefile │ ├── example.ipynb │ └── requirements.txt ├── kafka-two-step-pipeline.py ├── main.go ├── pets-configmap.yaml ├── python │ ├── Makefile │ ├── example.py │ └── requirements.txt └── word-count-input-configmap.yaml ├── go.mod ├── go.sum ├── hack ├── boilerplate.go.txt └── changelog.sh ├── kill └── kill.go ├── manager.env ├── manager ├── controllers │ ├── config.go │ ├── infer.go │ ├── infer_test.go │ ├── pipeline_controller.go │ ├── pipeline_controller_test.go │ ├── scaling │ │ ├── funcs.go │ │ ├── funcs_test.go │ │ ├── metrics_cache.go │ │ ├── scaling.go │ │ └── scaling_test.go │ ├── step_controller.go │ └── suite_test.go └── main.go ├── prestop └── main.go ├── runner ├── init │ ├── errors.go │ └── init.go ├── main.go ├── sidecar │ ├── backoff.go │ ├── backoff_test.go │ ├── in.go │ ├── jetstream.go │ ├── kafka.go │ ├── kafka_test.go │ ├── lifecycle.go │ ├── out.go │ ├── s3.go │ ├── shared │ │ ├── kafka │ │ │ ├── kafka.go │ │ │ └── kafka_test.go │ │ ├── nats │ │ │ └── nats.go │ │ └── stan │ │ │ └── stan_conn.go │ ├── sidecar.go │ ├── sink │ │ ├── db │ │ │ └── db.go │ │ ├── http │ │ │ └── http.go │ │ ├── interface.go │ │ ├── jetstream │ │ │ └── jetstream.go │ │ ├── kafka │ │ │ └── kafka.go │ │ ├── log │ │ │ └── log.go │ │ ├── s3 │ │ │ └── s3.go │ │ ├── stan │ │ │ └── stan.go │ │ └── volume │ │ │ └── volume.go │ ├── sinks.go │ ├── source │ │ ├── cron │ │ │ └── cron.go │ │ ├── db │ │ │ └── db.go │ │ ├── http │ │ │ └── http.go │ │ ├── jetstream │ │ │ └── jetstream.go │ │ ├── kafka │ │ │ ├── kafka.go │ │ │ └── stats.go │ │ ├── loadbalanced │ │ │ └── loadbalanced.go │ │ ├── s3 │ │ │ └── s3.go │ │ ├── source.go │ │ ├── stan │ │ │ └── stan.go │ │ └── volume │ │ │ └── volume.go │ ├── sources.go │ ├── stan.go │ └── tls │ │ ├── tls.go │ │ └── tls_test.go └── util │ ├── expr_env.go │ ├── expr_env_test.go │ ├── sha1.go │ └── sha1_test.go ├── runtimes ├── golang1-17 │ ├── entrypoint.sh │ ├── handler.go │ └── main.go ├── java16 │ ├── .dockerignore │ ├── .gitignore │ ├── Handler.java │ ├── Main.java │ └── entrypoint.sh ├── node16 │ ├── entrypoint.sh │ ├── handler.js │ ├── index.js │ └── package.json └── python3-9 │ ├── entrypoint.sh │ ├── handler.py │ └── main.py ├── sdks ├── golang │ ├── crash.go │ ├── gen.sh │ ├── meta.go │ └── start.go ├── nodejs │ ├── index.js │ ├── package-lock.json │ └── package.json └── python │ ├── .gitignore │ ├── LICENSE │ ├── MANIFEST │ ├── Makefile │ ├── README.md │ ├── build │ └── lib │ │ └── argo_dataflow_sdk │ │ └── __init__.py │ ├── pyproject.toml │ ├── requirements.txt │ ├── setup.cfg │ ├── src │ └── argo_dataflow_sdk │ │ ├── __init__.py │ │ └── main.py │ └── tests │ ├── fixtures │ ├── default_step_async_error_handler │ │ └── app.py │ ├── default_step_async_handler │ │ └── app.py │ ├── default_step_error_handler │ │ └── app.py │ ├── default_step_handler │ │ └── app.py │ ├── default_step_termination_handler │ │ └── app.py │ ├── generator_step_async_error_handler │ │ └── app.py │ ├── generator_step_async_handler │ │ └── app.py │ ├── generator_step_error_handler │ │ └── app.py │ └── generator_step_handler │ │ └── app.py │ └── integration │ └── test_integration.py ├── shared ├── builtin │ ├── cat │ │ ├── cat.go │ │ └── cat_test.go │ ├── dedupe │ │ ├── dedupe.go │ │ ├── dedupe_test.go │ │ ├── item.go │ │ ├── items.go │ │ ├── uniq_items.go │ │ └── uniq_items_test.go │ ├── exec.go │ ├── expand │ │ ├── expand.go │ │ └── expand_test.go │ ├── filter │ │ ├── filter.go │ │ └── filter_test.go │ ├── flatten │ │ ├── flatten.go │ │ └── flatten_test.go │ ├── group │ │ ├── group.go │ │ └── group_test.go │ └── map │ │ ├── map.go │ │ └── map_test.go ├── containerkiller │ └── container_killer.go ├── debug │ ├── debug.go │ └── debug_test.go ├── podexec │ └── podexec.go ├── symbol │ └── symbol.go └── util │ ├── .gitignore │ ├── cpu.go │ ├── env.go │ ├── env_test.go │ ├── equal.go │ ├── error.go │ ├── func.go │ ├── func_test.go │ ├── hash.go │ ├── hash_test.go │ ├── io.go │ ├── json.go │ ├── json_test.go │ ├── log.go │ ├── log_test.go │ ├── print.go │ ├── print_test.go │ ├── process.go │ ├── rand.go │ ├── rand_test.go │ ├── resource.go │ ├── retry │ └── retry.go │ ├── uid.go │ ├── uid_test.go │ └── version.go ├── test ├── configmap.go ├── count.go ├── db-e2e │ ├── db_sink_test.go │ └── db_source_test.go ├── e2e │ ├── cat_step_test.go │ ├── completion_test.go │ ├── container_step_test.go │ ├── cron_test.go │ ├── dedupe_test.go │ ├── dlq_test.go │ ├── expand_step_test.go │ ├── filter_step_test.go │ ├── flatten_step_test.go │ ├── git_step_test.go │ ├── golang_code_step_test.go │ ├── http_test.go │ ├── java_code_step_test.go │ ├── map_step_test.go │ ├── messages_test.go │ ├── metrics_test.go │ ├── python_code_step_test.go │ ├── volume_sink_test.go │ └── volume_source_test.go ├── examples │ └── examples_test.go ├── fixtures.go ├── http-fmea │ └── http_fmea_test.go ├── http-stress │ ├── http_stress_test.go │ └── test-results.json ├── http.go ├── jetstream-e2e │ └── jetstream_test.go ├── jetstream-fmea │ └── jetstream_fmea_test.go ├── jetstream-stress │ ├── jetstream_stress_test.go │ └── test-results.json ├── jetstream.go ├── kafka-e2e │ └── kafka_test.go ├── kafka-fmea │ └── kafka_fmea_test.go ├── kafka-stress │ ├── kafka_stress_test.go │ └── test-results.json ├── kafka.go ├── log.go ├── log_sink.go ├── matchers.go ├── metric.go ├── panic.go ├── pipeline.go ├── pod.go ├── port_forward.go ├── s3-e2e │ ├── s3.go │ ├── s3_sink_test.go │ └── s3_source_test.go ├── secrets.go ├── service.go ├── stall.go ├── stan-e2e │ └── stan_test.go ├── stan-fmea │ └── stan_fmea_test.go ├── stan-stress │ ├── stan_stress_test.go │ └── test-results.json ├── stan.go ├── statefulset.go ├── stress │ ├── context.go │ ├── params.go │ ├── results.go │ ├── tps.go │ └── tps_test.go ├── testapi.go ├── unstructured.go └── wait.go └── testapi ├── count.go ├── funny_animals.go ├── http.go ├── jetstream.go ├── kafka.go ├── kafka_stats.go ├── main.go ├── message_factory.go ├── ready.go └── stan.go /.codecov.yml: -------------------------------------------------------------------------------- 1 | ignore: 2 | - .github 3 | - api/v1alpha1/generated.pb.go 4 | - api/v1alpha1/zz_generated.deepcopy.go 5 | - bin 6 | - config 7 | - docs 8 | - test 9 | - examples 10 | - hack 11 | - kill 12 | - manager 13 | - runner 14 | - runtimes 15 | - shared/containerkiller 16 | - shared/podexec 17 | - shared/util/error.go 18 | - shared/util/io.go 19 | coverage: 20 | status: 21 | # we've found this not to be useful 22 | patch: off 23 | project: 24 | default: 25 | # allow test coverage to drop by 2%, assume that it's typically due to CI problems 26 | threshold: 2 -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | *.iml 2 | .github 3 | .idea 4 | .snyk 5 | cover.out 6 | LICENSE 7 | Makefile 8 | Procfile 9 | PROJECT 10 | bin 11 | config 12 | docs 13 | examples 14 | hack 15 | test -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "gomod" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | ignore: 8 | - dependency-name: k8s.io/* 9 | open-pull-requests-limit: 2 10 | 11 | - package-ecosystem: "github-actions" 12 | directory: "/" 13 | schedule: 14 | interval: "daily" 15 | -------------------------------------------------------------------------------- /.github/workflows/changelog.yaml: -------------------------------------------------------------------------------- 1 | name: Changelog 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | - "!v0.0.0" 8 | jobs: 9 | generate_changelog: 10 | runs-on: ubuntu-latest 11 | name: Generate changelog 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | ref: main 16 | fetch-depth: 0 17 | - run: git fetch --prune --prune-tags 18 | - run: git tag -l 'v*' 19 | # avoid invoking `make` to reduce the risk of a Makefile bug failing this workflow 20 | - run: ./hack/changelog.sh > CHANGELOG.md 21 | - uses: peter-evans/create-pull-request@v5 22 | with: 23 | title: 'docs: updated CHANGELOG.md' 24 | commit-message: 'docs: updated CHANGELOG.md' 25 | signoff: true -------------------------------------------------------------------------------- /.github/workflows/dependabot-reviewer.yml: -------------------------------------------------------------------------------- 1 | # https://docs.github.com/en/code-security/dependabot/working-with-dependabot/automating-dependabot-with-github-actions 2 | name: Approve and enable auto-merge for dependabot 3 | on: pull_request 4 | 5 | permissions: 6 | pull-requests: write 7 | contents: write 8 | 9 | jobs: 10 | review: 11 | runs-on: ubuntu-latest 12 | if: ${{ github.actor == 'dependabot[bot]' }} 13 | steps: 14 | - name: Dependabot metadata 15 | id: metadata 16 | uses: dependabot/fetch-metadata@v1.5.1 17 | with: 18 | github-token: "${{ secrets.GITHUB_TOKEN }}" 19 | - name: Approve PR 20 | run: gh pr review --approve "$PR_URL" 21 | env: 22 | PR_URL: ${{github.event.pull_request.html_url}} 23 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} 24 | - name: Enable auto-merge for Dependabot PRs 25 | run: gh pr merge --auto --squash "$PR_URL" 26 | env: 27 | PR_URL: ${{github.event.pull_request.html_url}} 28 | GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}} -------------------------------------------------------------------------------- /.github/workflows/snyk.yml: -------------------------------------------------------------------------------- 1 | name: Snyk 2 | on: 3 | schedule: 4 | - cron: "30 2 * * *" 5 | jobs: 6 | security: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v3 10 | - name: Run Snyk to check for vulnerabilities 11 | uses: snyk/actions/golang@master 12 | env: 13 | SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} 14 | with: 15 | args: --severity-threshold=high 16 | -------------------------------------------------------------------------------- /.github/workflows/stale.yml: -------------------------------------------------------------------------------- 1 | name: Mark stale issues and pull requests 2 | 3 | on: 4 | schedule: 5 | - cron: "30 1 * * *" 6 | 7 | jobs: 8 | stale: 9 | 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/stale@v8 14 | with: 15 | repo-token: ${{ secrets.GITHUB_TOKEN }} 16 | stale-issue-message: 'Stale issue message' 17 | stale-pr-message: 'Stale pull request message' 18 | stale-issue-label: 'no-issue-activity' 19 | stale-pr-label: 'no-pr-activity' 20 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Binaries for programs and plugins 3 | *.exe 4 | *.exe~ 5 | *.dll 6 | *.so 7 | *.dylib 8 | bin 9 | 10 | # Test binary, build with `go test -c` 11 | *.test 12 | 13 | # Output of the go coverage tool, specifically when used with LiteIDE 14 | *.out 15 | 16 | # Kubernetes Generated files - skip generated files, except for vendored files 17 | 18 | !vendor/**/zz_generated.* 19 | 20 | # editor and IDE paraphernalia 21 | .idea 22 | *.iml 23 | *.swp 24 | *.swo 25 | *~ 26 | .vscode 27 | 28 | # Used in nodejs SDK 29 | node_modules 30 | 31 | # Used in Python SDK 32 | venv 33 | *.egg-info 34 | sdks/python/build/* 35 | dsls/python/build/* 36 | -------------------------------------------------------------------------------- /.golangci.yml: -------------------------------------------------------------------------------- 1 | # https://golangci-lint.run/usage/quick-start/ 2 | run: 3 | concurrency: 4 4 | timeout: 5m 5 | build-tags: 6 | - test 7 | linters: 8 | enable: 9 | - bodyclose 10 | - deadcode 11 | - errcheck 12 | # only minor issues 13 | # - errorlint 14 | - exportloopref 15 | - gci 16 | - godot 17 | # too many false-positives 18 | # - gosec 19 | - gosimple 20 | - govet 21 | - ineffassign 22 | - misspell 23 | - rowserrcheck 24 | - sqlclosecheck 25 | - staticcheck 26 | - structcheck 27 | - typecheck 28 | - unparam 29 | - unused 30 | - whitespace 31 | - varcheck 32 | issues: 33 | exclude-rules: 34 | - linters: 35 | - staticcheck 36 | # SA1029: should not use built-in type string as key for value; define your own type to avoid collisions 37 | text: "SA1029:" -------------------------------------------------------------------------------- /.snyk: -------------------------------------------------------------------------------- 1 | # Snyk (https://snyk.io) policy file, patches or ignores known vulnerabilities. 2 | version: v1.19.0 3 | # ignores vulnerabilities until expiry date; change duration by modifying expiry date 4 | ignore: 5 | SNYK-GOLANG-GITHUBCOMDGRIJALVAJWTGO-596515: 6 | - '*': 7 | reason: None Given 8 | expires: 2021-10-17T22:25:16.559Z 9 | created: 2021-07-18T22:25:16.562Z 10 | patch: {} 11 | -------------------------------------------------------------------------------- /PROJECT: -------------------------------------------------------------------------------- 1 | domain: argoproj.io 2 | repo: github.com/argoproj-labs/argo-dataflow 3 | resources: 4 | - group: dataflow 5 | kind: Pipeline 6 | version: v1alpha1 7 | - group: dataflow 8 | kind: Step 9 | version: v1alpha1 10 | version: "2" 11 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | controller: source manager.env && go run -race -ldflags="-X 'github.com/argoproj-labs/argo-dataflow/shared/util.version=v0.0.0-latest-0'" ./manager 2 | argocli: [ "$UI" = true ] && make argocli 3 | ui: [ "$UI" = true ] && make ui 4 | jaeger: [ "$JAEGER_DISABLED" = false ] && make jaeger || make nojaeger -------------------------------------------------------------------------------- /USERS.md: -------------------------------------------------------------------------------- 1 | ## Who uses Dataflow? 2 | 3 | As the community grows, we'd like to keep track of our users. Please send a pull request with your organization or 4 | project. 5 | 6 | 1. [Intuit](https://www.intuit.com/) -------------------------------------------------------------------------------- /api/v1alpha1/abstract_step.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import corev1 "k8s.io/api/core/v1" 4 | 5 | type AbstractStep struct { 6 | // +kubebuilder:default={limits: {"cpu": "500m", "memory": "256Mi"}, requests: {"cpu": "100m", "memory": "64Mi"}} 7 | Resources corev1.ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,1,opt,name=resources"` 8 | } 9 | -------------------------------------------------------------------------------- /api/v1alpha1/abstract_volume_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | 7 | corev1 "k8s.io/api/core/v1" 8 | ) 9 | 10 | type AbstractVolumeSource corev1.VolumeSource 11 | 12 | func (in AbstractVolumeSource) getURNParts() (kind string, name string) { 13 | if v := in.ConfigMap; v != nil { 14 | return "configmap", v.Name 15 | } else if v := in.PersistentVolumeClaim; v != nil { 16 | return "persistentvolumeclaim", v.ClaimName 17 | } else if v := in.Secret; v != nil { 18 | return "secret", v.SecretName 19 | } 20 | panic(fmt.Errorf("un-suppported volume source %v", in)) 21 | } 22 | 23 | func (in AbstractVolumeSource) GenURN(cluster, namespace string) string { 24 | kind, name := in.getURNParts() 25 | return fmt.Sprintf("urn:dataflow:volume:%s:%s.%s.%s.%s", strings.ToLower(kind), name, kind, namespace, cluster) 26 | } 27 | -------------------------------------------------------------------------------- /api/v1alpha1/aws_credentials.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import corev1 "k8s.io/api/core/v1" 4 | 5 | type AWSCredentials struct { 6 | AccessKeyID corev1.SecretKeySelector `json:"accessKeyId" protobuf:"bytes,1,opt,name=accessKeyId"` 7 | SecretAccessKey corev1.SecretKeySelector `json:"secretAccessKey" protobuf:"bytes,2,opt,name=secretAccessKey"` 8 | SessionToken corev1.SecretKeySelector `json:"sessionToken" protobuf:"bytes,3,opt,name=sessionToken"` 9 | } 10 | -------------------------------------------------------------------------------- /api/v1alpha1/aws_endpoint.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type AWSEndpoint struct { 4 | URL string `json:"url" protobuf:"bytes,1,opt,name=url"` 5 | } 6 | -------------------------------------------------------------------------------- /api/v1alpha1/backoff.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | type Backoff struct { 8 | // +kubebuilder:default="100ms" 9 | Duration *metav1.Duration `json:"duration,omitempty" protobuf:"bytes,4,opt,name=duration"` 10 | // +kubebuilder:default=200 11 | FactorPercentage uint32 `json:"factorPercentage,omitempty" protobuf:"varint,5,opt,name=FactorPercentage"` 12 | // the number of backoff steps, zero means no retries 13 | // +kubebuilder:default=20 14 | Steps uint64 `json:"steps,omitempty" protobuf:"varint,1,opt,name=steps"` 15 | // +kubebuilder:default="0ms" 16 | Cap *metav1.Duration `json:"cap,omitempty" protobuf:"bytes,2,opt,name=cap"` 17 | // the amount of jitter per step, typically 10-20%, >100% is valid, but strange 18 | // +kubebuilder:default=10 19 | JitterPercentage uint32 `json:"jitterPercentage,omitempty" protobuf:"varint,3,opt,name=jitterPercentage"` 20 | } 21 | -------------------------------------------------------------------------------- /api/v1alpha1/cat.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | ) 6 | 7 | type Cat struct { 8 | AbstractStep `json:",inline" protobuf:"bytes,1,opt,name=abstractStep"` 9 | } 10 | 11 | func (m Cat) getContainer(req getContainerReq) corev1.Container { 12 | return containerBuilder{}. 13 | init(req). 14 | args("cat"). 15 | resources(m.Resources). 16 | build() 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/cat_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | v1 "k8s.io/api/core/v1" 8 | "k8s.io/apimachinery/pkg/api/resource" 9 | ) 10 | 11 | func TestCat_getContainer(t *testing.T) { 12 | x := Cat{} 13 | c := x.getContainer(getContainerReq{}) 14 | assert.Equal(t, []string{"cat"}, c.Args) 15 | assert.Equal(t, x.Resources, c.Resources) 16 | 17 | resource := v1.ResourceRequirements{ 18 | Requests: v1.ResourceList{ 19 | 20 | v1.ResourceMemory: resource.MustParse("2Gi"), 21 | }, 22 | } 23 | x = Cat{AbstractStep: AbstractStep{Resources: resource}} 24 | c = x.getContainer(getContainerReq{}) 25 | assert.Equal(t, resource.Requests.Memory(), c.Resources.Requests.Memory()) 26 | } 27 | -------------------------------------------------------------------------------- /api/v1alpha1/code.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "fmt" 5 | 6 | corev1 "k8s.io/api/core/v1" 7 | ) 8 | 9 | type Code struct { 10 | Runtime Runtime `json:"runtime,omitempty" protobuf:"bytes,4,opt,name=runtime,casttype=Runtime"` 11 | // Image is used in preference to Runtime. 12 | Image string `json:"image,omitempty" protobuf:"bytes,5,opt,name=image"` 13 | Source string `json:"source" protobuf:"bytes,3,opt,name=source"` 14 | } 15 | 16 | func (in Code) getContainer(req getContainerReq) corev1.Container { 17 | image := in.Image 18 | if image == "" { 19 | image = fmt.Sprintf(req.imageFormat, "dataflow-"+in.Runtime) 20 | } 21 | return containerBuilder{}. 22 | init(req). 23 | image(image). 24 | build() 25 | } 26 | -------------------------------------------------------------------------------- /api/v1alpha1/code_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestCode_getContainer(t *testing.T) { 10 | t.Run("Runtime", func(t *testing.T) { 11 | x := Code{Runtime: "my-runtime"} 12 | c := x.getContainer(getContainerReq{imageFormat: "fmt-%s"}) 13 | assert.Equal(t, "fmt-dataflow-my-runtime", c.Image) 14 | }) 15 | t.Run("Runtime", func(t *testing.T) { 16 | x := Code{Image: "my-image"} 17 | c := x.getContainer(getContainerReq{}) 18 | assert.Equal(t, "my-image", c.Image) 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /api/v1alpha1/container_builder_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_containerBuilder(t *testing.T) { 10 | c := containerBuilder{}. 11 | init(getContainerReq{}). 12 | build() 13 | assert.Equal(t, "main", c.Name) 14 | assert.Len(t, c.VolumeMounts, 0) 15 | assert.Equal(t, standardResources, c.Resources) 16 | } 17 | -------------------------------------------------------------------------------- /api/v1alpha1/container_supplier.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import corev1 "k8s.io/api/core/v1" 4 | 5 | type getContainerReq struct { 6 | env []corev1.EnvVar 7 | imageFormat string 8 | imagePullPolicy corev1.PullPolicy 9 | lifecycle *corev1.Lifecycle 10 | runnerImage string 11 | securityContext *corev1.SecurityContext 12 | volumeMounts []corev1.VolumeMount 13 | } 14 | 15 | type containerSupplier interface { 16 | getContainer(req getContainerReq) corev1.Container 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/cron.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type Cron struct { 8 | Schedule string `json:"schedule" protobuf:"bytes,1,opt,name=schedule"` 9 | // +kubebuilder:default="2006-01-02T15:04:05Z07:00" 10 | Layout string `json:"layout,omitempty" protobuf:"bytes,2,opt,name=layout"` 11 | } 12 | 13 | func (in Cron) GenURN(cluster, namespace string) string { 14 | return fmt.Sprintf("urn:dataflow:cron:%s", in.Schedule) 15 | } 16 | -------------------------------------------------------------------------------- /api/v1alpha1/cron_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestCron_GenURN(t *testing.T) { 10 | urn := Cron{Schedule: "* * * * *"}.GenURN(cluster, namespace) 11 | assert.Equal(t, "urn:dataflow:cron:* * * * *", urn) 12 | } 13 | -------------------------------------------------------------------------------- /api/v1alpha1/db_sink.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type DBSink struct { 4 | Database `json:",inline" protobuf:"bytes,1,opt,name=database"` 5 | Actions []SQLAction `json:"actions,omitempty" protobuf:"bytes,2,rep,name=actions"` 6 | } 7 | -------------------------------------------------------------------------------- /api/v1alpha1/db_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | type DBSource struct { 8 | Database `json:",inline" protobuf:"bytes,1,opt,name=database"` 9 | Query string `json:"query,omitempty" protobuf:"bytes,2,opt,name=query"` 10 | OffsetColumn string `json:"offsetColumn,omitempty" protobuf:"bytes,3,opt,name=offsetColumn"` 11 | // +kubebuilder:default="1s" 12 | PollInterval metav1.Duration `json:"pollInterval,omitempty" protobuf:"bytes,4,opt,name=pollInterval"` 13 | // +kubebuilder:default="5s" 14 | CommitInterval metav1.Duration `json:"commitInterval,omitempty" protobuf:"bytes,5,opt,name=commitInterval"` 15 | // +kubebuilder:default=true 16 | InitSchema bool `json:"initSchema,omitempty" protobuf:"bytes,6,opt,name=initSchema"` 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/db_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | corev1 "k8s.io/api/core/v1" 8 | ) 9 | 10 | func TestDatabase_GenURN(t *testing.T) { 11 | t.Run("Value", func(t *testing.T) { 12 | urn := Database{DataSource: &DBDataSource{Value: "my-value"}}.GenURN(cluster, namespace) 13 | assert.Equal(t, "urn:dataflow:db:my-value", urn) 14 | }) 15 | t.Run("ValueFrom", func(t *testing.T) { 16 | urn := Database{DataSource: &DBDataSource{ValueFrom: &DBDataSourceFrom{SecretKeyRef: &corev1.SecretKeySelector{ 17 | LocalObjectReference: corev1.LocalObjectReference{Name: "my-name"}, 18 | Key: "my-key", 19 | }}}}.GenURN(cluster, namespace) 20 | assert.Equal(t, "urn:dataflow:db:my-name.secret.my-ns.my-cluster:my-key", urn) 21 | }) 22 | } 23 | -------------------------------------------------------------------------------- /api/v1alpha1/dedupe.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | "k8s.io/apimachinery/pkg/api/resource" 6 | ) 7 | 8 | // https://segment.com/blog/exactly-once-delivery/ 9 | 10 | type Dedupe struct { 11 | AbstractStep `json:",inline" protobuf:"bytes,1,opt,name=abstractStep"` 12 | 13 | // +kubebuilder:default="sha1(msg)" 14 | UID string `json:"uid,omitempty" protobuf:"bytes,2,opt,name=uid"` 15 | // MaxSize is the maximum number of entries to keep in the in-memory database used to store recent UIDs. 16 | // Larger number mean bigger windows of time for dedupe, but greater memory usage. 17 | // +kubebuilder:default="1M" 18 | MaxSize resource.Quantity `json:"maxSize,omitempty" protobuf:"bytes,3,opt,name=maxSize"` 19 | } 20 | 21 | func (d Dedupe) getContainer(req getContainerReq) corev1.Container { 22 | return containerBuilder{}. 23 | init(req). 24 | args("dedupe", d.UID, d.MaxSize.String()). 25 | enablePrometheus(). 26 | resources(d.Resources). 27 | build() 28 | } 29 | -------------------------------------------------------------------------------- /api/v1alpha1/expand.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | ) 6 | 7 | type Expand struct { 8 | AbstractStep `json:",inline" protobuf:"bytes,1,opt,name=abstractStep"` 9 | } 10 | 11 | func (m Expand) getContainer(req getContainerReq) corev1.Container { 12 | return containerBuilder{}. 13 | init(req). 14 | args("expand"). 15 | resources(m.Resources). 16 | build() 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/expand_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestExpand_getContainer(t *testing.T) { 10 | x := Expand{ 11 | AbstractStep{Resources: standardResources}, 12 | } 13 | c := x.getContainer(getContainerReq{}) 14 | assert.Equal(t, []string{"expand"}, c.Args) 15 | assert.Equal(t, c.Resources, standardResources) 16 | } 17 | -------------------------------------------------------------------------------- /api/v1alpha1/filter.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | ) 6 | 7 | type Filter struct { 8 | AbstractStep `json:",inline" protobuf:"bytes,1,opt,name=abstractStep"` 9 | Expression string `json:"expression" protobuf:"bytes,2,opt,name=expression"` 10 | } 11 | 12 | func (m Filter) getContainer(req getContainerReq) corev1.Container { 13 | return containerBuilder{}. 14 | init(req). 15 | args("filter", m.Expression). 16 | resources(m.Resources). 17 | build() 18 | } 19 | -------------------------------------------------------------------------------- /api/v1alpha1/filter_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestFilter_getContainer(t *testing.T) { 10 | x := &Filter{ 11 | Expression: "my-filter", 12 | AbstractStep: AbstractStep{ 13 | Resources: standardResources, 14 | }, 15 | } 16 | c := x.getContainer(getContainerReq{}) 17 | assert.Equal(t, []string{"filter", "my-filter"}, c.Args) 18 | assert.Equal(t, c.Resources, standardResources) 19 | } 20 | -------------------------------------------------------------------------------- /api/v1alpha1/flatten.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | ) 6 | 7 | type Flatten struct { 8 | AbstractStep `json:",inline" protobuf:"bytes,1,opt,name=abstractStep"` 9 | } 10 | 11 | func (m *Flatten) getContainer(req getContainerReq) corev1.Container { 12 | return containerBuilder{}. 13 | init(req). 14 | args("flatten"). 15 | resources(m.Resources). 16 | build() 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/flatten_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestFlatten_getContainer(t *testing.T) { 10 | x := Flatten{ 11 | AbstractStep{Resources: standardResources}, 12 | } 13 | c := x.getContainer(getContainerReq{}) 14 | assert.Equal(t, []string{"flatten"}, c.Args) 15 | assert.Equal(t, c.Resources, standardResources) 16 | } 17 | -------------------------------------------------------------------------------- /api/v1alpha1/git_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | corev1 "k8s.io/api/core/v1" 8 | ) 9 | 10 | func TestGit_getContainer(t *testing.T) { 11 | x := Git{ 12 | Image: "my-image", 13 | Command: []string{"my-command"}, 14 | Env: []corev1.EnvVar{{Name: "my-env"}}, 15 | } 16 | c := x.getContainer(getContainerReq{}) 17 | 18 | assert.Equal(t, x.Image, c.Image) 19 | assert.Equal(t, x.Command, c.Command) 20 | assert.Equal(t, x.Env, c.Env) 21 | assert.Equal(t, PathWorkingDir, c.WorkingDir) 22 | } 23 | -------------------------------------------------------------------------------- /api/v1alpha1/group_format.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // +kubebuilder:validation:Enum="";JSONBytesArray;JSONStringArray 4 | type GroupFormat string 5 | 6 | const ( 7 | GroupFormatUnknown GroupFormat = "" // all messages are sent one by one - probably not what you want 8 | GroupFormatJSONBytesArray GroupFormat = "JSONBytesArray" // messages are sent as an array where each element is a base 64 encoded 9 | GroupFormatJSONStringArray GroupFormat = "JSONStringArray" // messages are sent as an array where each element is a string 10 | ) 11 | -------------------------------------------------------------------------------- /api/v1alpha1/group_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | corev1 "k8s.io/api/core/v1" 8 | ) 9 | 10 | func TestGroup_getContainer(t *testing.T) { 11 | x := Group{ 12 | Key: "my-key", 13 | EndOfGroup: "my-eog", 14 | Format: "my-fmt", 15 | Storage: &Storage{ 16 | Name: "my-storage", 17 | SubPath: "my-sub-path", 18 | }, 19 | } 20 | c := x.getContainer(getContainerReq{}) 21 | assert.Equal(t, []string{"group", "my-key", "my-eog", "my-fmt"}, c.Args) 22 | assert.Contains(t, c.VolumeMounts, corev1.VolumeMount{Name: "my-storage", MountPath: "/var/run/argo-dataflow/groups", SubPath: "my-sub-path"}) 23 | } 24 | -------------------------------------------------------------------------------- /api/v1alpha1/http.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type HTTP struct{} 4 | -------------------------------------------------------------------------------- /api/v1alpha1/http_header.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type HTTPHeader struct { 4 | Name string `json:"name" protobuf:"bytes,1,opt,name=name"` 5 | Value string `json:"value,omitempty" protobuf:"bytes,2,opt,name=value"` 6 | ValueFrom *HTTPHeaderSource `json:"valueFrom,omitempty" protobuf:"bytes,3,opt,name=valueFrom"` 7 | } 8 | -------------------------------------------------------------------------------- /api/v1alpha1/http_header_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import corev1 "k8s.io/api/core/v1" 4 | 5 | type HTTPHeaderSource struct { 6 | SecretKeyRef corev1.SecretKeySelector `json:"secretKeyRef" protobuf:"bytes,1,opt,name=secretKeyRef"` 7 | } 8 | -------------------------------------------------------------------------------- /api/v1alpha1/http_sink.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type HTTPSink struct { 4 | URL string `json:"url" protobuf:"bytes,1,opt,name=url"` 5 | Headers []HTTPHeader `json:"headers,omitempty" protobuf:"bytes,2,rep,name=headers"` 6 | InsecureSkipVerify bool `json:"insecureSkipVerify,omitempty" protobuf:"varint,3,opt,name=insecureSkipVerify"` 7 | } 8 | -------------------------------------------------------------------------------- /api/v1alpha1/http_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type HTTPSource struct { 8 | ServiceName string `json:"serviceName,omitempty" protobuf:"bytes,1,opt,name=serviceName"` // the service name to create, defaults to `${pipelineName}-${stepName}`. 9 | } 10 | 11 | func (in HTTPSource) GenURN(cluster, namespace string) string { 12 | return fmt.Sprintf("urn:dataflow:http:https://%s.svc.%s.%s", in.ServiceName, namespace, cluster) 13 | } 14 | -------------------------------------------------------------------------------- /api/v1alpha1/http_source_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestHTTPSource_GenURN(t *testing.T) { 10 | urn := HTTPSource{ 11 | ServiceName: "my-name", 12 | }.GenURN(cluster, namespace) 13 | assert.Equal(t, "urn:dataflow:http:https://my-name.svc.my-ns.my-cluster", urn) 14 | } 15 | -------------------------------------------------------------------------------- /api/v1alpha1/interface.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Interface struct { 4 | FIFO bool `json:"fifo,omitempty" protobuf:"varint,1,opt,name=fifo"` 5 | HTTP *HTTP `json:"http,omitempty" protobuf:"bytes,2,opt,name=http"` 6 | } 7 | 8 | var DefaultInterface = &Interface{HTTP: &HTTP{}} 9 | -------------------------------------------------------------------------------- /api/v1alpha1/jetstream.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type JetStream struct { 4 | // +kubebuilder:default=default 5 | Name string `json:"name,omitempty" protobuf:"bytes,1,opt,name=name"` 6 | NATSURL string `json:"natsUrl,omitempty" protobuf:"bytes,2,opt,name=natsUrl"` 7 | Subject string `json:"subject" protobuf:"bytes,3,opt,name=subject"` 8 | Auth *NATSAuth `json:"auth,omitempty" protobuf:"bytes,4,opt,name=auth"` 9 | } 10 | -------------------------------------------------------------------------------- /api/v1alpha1/jetstream_sink.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type JetStreamSink struct { 4 | JetStream `json:",inline" protobuf:"bytes,1,opt,name=jetstream"` 5 | } 6 | -------------------------------------------------------------------------------- /api/v1alpha1/jetstream_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import "fmt" 4 | 5 | type JetStreamSource struct { 6 | JetStream `json:",inline" protobuf:"bytes,1,opt,name=jetstream"` 7 | } 8 | 9 | func (j JetStreamSource) GenURN(cluster, namespace string) string { 10 | return fmt.Sprintf("urn:dataflow:jetstream:%s:%s", j.NATSURL, j.Subject) 11 | } 12 | -------------------------------------------------------------------------------- /api/v1alpha1/kafka_offset.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // +kubebuilder:validation:Enum=First;Last 4 | type KafkaOffset string 5 | 6 | func (k KafkaOffset) GetAutoOffsetReset() string { 7 | switch k { 8 | case "First": 9 | return "earliest" 10 | default: 11 | return "latest" 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /api/v1alpha1/kafka_offset_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestKafkaOffset_GetAutoOffsetReset(t *testing.T) { 10 | t.Run("First", func(t *testing.T) { 11 | assert.Equal(t, "earliest", KafkaOffset("First").GetAutoOffsetReset()) 12 | }) 13 | t.Run("Last", func(t *testing.T) { 14 | assert.Equal(t, "latest", KafkaOffset("Last").GetAutoOffsetReset()) 15 | }) 16 | } 17 | -------------------------------------------------------------------------------- /api/v1alpha1/kafka_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestKafka_GenURN(t *testing.T) { 10 | urn := Kafka{ 11 | KafkaConfig: KafkaConfig{ 12 | Brokers: []string{"my-broker"}, 13 | }, 14 | Topic: "my-topic", 15 | }.GenURN(cluster, namespace) 16 | assert.Equal(t, "urn:dataflow:kafka:my-broker:my-topic", urn) 17 | } 18 | 19 | func TestKafkaNot_GetSecurityProtocol(t *testing.T) { 20 | t.Run("plaintext", func(t *testing.T) { 21 | n := KafkaNET{} 22 | assert.Equal(t, "plaintext", n.GetSecurityProtocol()) 23 | }) 24 | t.Run("ssl", func(t *testing.T) { 25 | n := KafkaNET{TLS: &TLS{}} 26 | assert.Equal(t, "ssl", n.GetSecurityProtocol()) 27 | }) 28 | t.Run("sasl_plaintext", func(t *testing.T) { 29 | n := KafkaNET{SASL: &SASL{}} 30 | assert.Equal(t, "sasl_plaintext", n.GetSecurityProtocol()) 31 | }) 32 | t.Run("sasl_ssl", func(t *testing.T) { 33 | n := KafkaNET{TLS: &TLS{}, SASL: &SASL{}} 34 | assert.Equal(t, "sasl_ssl", n.GetSecurityProtocol()) 35 | }) 36 | } 37 | -------------------------------------------------------------------------------- /api/v1alpha1/log.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Log struct { 4 | Truncate *uint64 `json:"truncate,omitempty" protobuf:"varint,1,opt,name=truncate"` 5 | } 6 | -------------------------------------------------------------------------------- /api/v1alpha1/map.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | corev1 "k8s.io/api/core/v1" 5 | ) 6 | 7 | type Map struct { 8 | AbstractStep `json:",inline" protobuf:"bytes,1,opt,name=abstractStep"` 9 | Expression string `json:"expression" protobuf:"bytes,2,opt,name=expression"` 10 | } 11 | 12 | func (m Map) getContainer(req getContainerReq) corev1.Container { 13 | return containerBuilder{}. 14 | init(req). 15 | args("map", m.Expression). 16 | resources(m.Resources). 17 | build() 18 | } 19 | -------------------------------------------------------------------------------- /api/v1alpha1/map_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMap_getContainer(t *testing.T) { 10 | x := &Map{ 11 | Expression: "my-expr", 12 | AbstractStep: AbstractStep{Resources: standardResources}, 13 | } 14 | c := x.getContainer(getContainerReq{}) 15 | assert.Equal(t, []string{"map", "my-expr"}, c.Args) 16 | assert.Equal(t, c.Resources, standardResources) 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/meta_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestContextWithMeta(t *testing.T) { 11 | var timestamp int64 12 | ctx := ContextWithMeta(context.Background(), Meta{Source: "my-source", ID: "my-id", Time: timestamp}) 13 | m, err := MetaFromContext(ctx) 14 | assert.NoError(t, err) 15 | assert.Equal(t, "my-source", m.Source) 16 | assert.Equal(t, "my-id", m.ID) 17 | assert.Equal(t, timestamp, m.Time) 18 | } 19 | -------------------------------------------------------------------------------- /api/v1alpha1/metadata.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Metadata struct { 4 | Annotations map[string]string `json:"annotations,omitempty" protobuf:"bytes,1,rep,name=annotations"` 5 | Labels map[string]string `json:"labels,omitempty" protobuf:"bytes,2,rep,name=labels"` 6 | } 7 | -------------------------------------------------------------------------------- /api/v1alpha1/pipeline_phase.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // +kubebuilder:validation:Enum="";Pending;Running;Succeeded;Failed 4 | type PipelinePhase string 5 | 6 | func (p PipelinePhase) Completed() bool { 7 | return p == PipelineSucceeded || p == PipelineFailed 8 | } 9 | 10 | const ( 11 | PipelineUnknown PipelinePhase = "" 12 | PipelinePending PipelinePhase = "Pending" 13 | PipelineRunning PipelinePhase = "Running" 14 | PipelineSucceeded PipelinePhase = "Succeeded" 15 | PipelineFailed PipelinePhase = "Failed" 16 | ) 17 | 18 | func MinPipelinePhase(v ...PipelinePhase) PipelinePhase { 19 | for _, p := range []PipelinePhase{PipelineFailed, PipelinePending, PipelineRunning, PipelineSucceeded} { 20 | for _, x := range v { 21 | if x == p { 22 | return p 23 | } 24 | } 25 | } 26 | return PipelineUnknown 27 | } 28 | -------------------------------------------------------------------------------- /api/v1alpha1/pipeline_phase_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMinPipelinePhase(t *testing.T) { 10 | assert.Equal(t, PipelineFailed, MinPipelinePhase(PipelineFailed, PipelineRunning)) 11 | } 12 | -------------------------------------------------------------------------------- /api/v1alpha1/pipeline_spec.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | type PipelineSpec struct { 8 | // +patchStrategy=merge 9 | // +patchMergeKey=name 10 | Steps []StepSpec `json:"steps,omitempty" protobuf:"bytes,1,rep,name=steps"` 11 | // +kubebuilder:default="72h" 12 | DeletionDelay *metav1.Duration `json:"deletionDelay,omitempty" protobuf:"bytes,2,opt,name=deletionDelay"` 13 | } 14 | 15 | func (in *PipelineSpec) HasStep(name string) bool { 16 | for _, step := range in.Steps { 17 | if step.Name == name { 18 | return true 19 | } 20 | } 21 | return false 22 | } 23 | -------------------------------------------------------------------------------- /api/v1alpha1/pipeline_status.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 4 | 5 | type PipelineStatus struct { 6 | Phase PipelinePhase `json:"phase,omitempty" protobuf:"bytes,1,opt,name=phase,casttype=PipelinePhase"` 7 | Message string `json:"message,omitempty" protobuf:"bytes,2,opt,name=message"` 8 | Conditions []metav1.Condition `json:"conditions,omitempty" protobuf:"bytes,3,rep,name=conditions"` 9 | LastUpdated metav1.Time `json:"lastUpdated,omitempty" protobuf:"bytes,4,opt,name=lastUpdated"` 10 | } 11 | -------------------------------------------------------------------------------- /api/v1alpha1/runtime.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // +kubebuilder:validation:Enum=golang1-16;golang1-17;java16;python3-9;node16 4 | type Runtime string 5 | -------------------------------------------------------------------------------- /api/v1alpha1/s3.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "fmt" 5 | ) 6 | 7 | type S3 struct { 8 | // +kubebuilder:default=default 9 | Name string `json:"name,omitempty" protobuf:"bytes,1,opt,name=name"` 10 | Bucket string `json:"bucket" protobuf:"bytes,2,opt,name=bucket"` 11 | Region string `json:"region,omitempty" protobuf:"bytes,3,opt,name=region"` 12 | Credentials *AWSCredentials `json:"credentials,omitempty" protobuf:"bytes,4,opt,name=credentials"` 13 | Endpoint *AWSEndpoint `json:"endpoint,omitempty" protobuf:"bytes,5,opt,name=endpoint"` 14 | } 15 | 16 | func (in S3) GenURN(cluster, namespace string) string { 17 | // An Amazon S3 bucket name is globally unique, and the namespace is shared by all AWS accounts. 18 | return fmt.Sprintf("urn:dataflow:s3:%s", in.Bucket) 19 | } 20 | -------------------------------------------------------------------------------- /api/v1alpha1/s3_sink.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type S3Sink struct { 4 | S3 `json:",inline" protobuf:"bytes,4,opt,name=s3"` 5 | } 6 | -------------------------------------------------------------------------------- /api/v1alpha1/s3_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | type S3Source struct { 8 | S3 `json:",inline" protobuf:"bytes,7,opt,name=s3"` 9 | // +kubebuilder:default="1m" 10 | PollPeriod *metav1.Duration `json:"pollPeriod,omitempty" protobuf:"bytes,6,opt,name=pollPeriod"` 11 | // +kubebuilder:default=1 12 | Concurrency uint32 `json:"concurrency,omitempty" protobuf:"varint,8,opt,name=concurrency"` 13 | } 14 | -------------------------------------------------------------------------------- /api/v1alpha1/s3_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestS3_GenURN(t *testing.T) { 10 | urn := S3{Bucket: "my-bucket"}.GenURN(cluster, namespace) 11 | assert.Equal(t, "urn:dataflow:s3:my-bucket", urn) 12 | } 13 | -------------------------------------------------------------------------------- /api/v1alpha1/scale.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Scale struct { 4 | // An expression to determine the number of replicas. Must evaluation to an `int`. 5 | DesiredReplicas string `json:"desiredReplicas,omitempty" protobuf:"bytes,1,opt,name=desiredReplicas"` 6 | // An expression to determine the delay for peeking. Maybe string or duration, e.g. `"4m"` 7 | // +kubebuilder:default="defaultPeekDelay" 8 | PeekDelay string `json:"peekDelay,omitempty" protobuf:"bytes,2,opt,name=peekDelay"` 9 | // An expression to determine the delay for scaling. Maybe string or duration, e.g. `"1m"` 10 | // +kubebuilder:default="defaultScalingDelay" 11 | ScalingDelay string `json:"scalingDelay,omitempty" protobuf:"bytes,3,opt,name=scalingDelay"` 12 | } 13 | -------------------------------------------------------------------------------- /api/v1alpha1/sidecar.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import corev1 "k8s.io/api/core/v1" 4 | 5 | type Sidecar struct { 6 | // +kubebuilder:default={limits: {"cpu": "500m", "memory": "256Mi"}, requests: {"cpu": "100m", "memory": "64Mi"}} 7 | Resources corev1.ResourceRequirements `json:"resources,omitempty" protobuf:"bytes,1,opt,name=resources"` 8 | } 9 | -------------------------------------------------------------------------------- /api/v1alpha1/sink.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Sink struct { 4 | // +kubebuilder:default=default 5 | Name string `json:"name,omitempty" protobuf:"bytes,1,opt,name=name"` 6 | STAN *STAN `json:"stan,omitempty" protobuf:"bytes,2,opt,name=stan"` 7 | Kafka *KafkaSink `json:"kafka,omitempty" protobuf:"bytes,3,opt,name=kafka"` 8 | Log *Log `json:"log,omitempty" protobuf:"bytes,4,opt,name=log"` 9 | HTTP *HTTPSink `json:"http,omitempty" protobuf:"bytes,5,opt,name=http"` 10 | S3 *S3Sink `json:"s3,omitempty" protobuf:"bytes,6,opt,name=s3"` 11 | DB *DBSink `json:"db,omitempty" protobuf:"bytes,7,opt,name=db"` 12 | Volume *VolumeSink `json:"volume,omitempty" protobuf:"bytes,8,opt,name=volume"` 13 | JetStream *JetStreamSink `json:"jetstream,omitempty" protobuf:"bytes,9,opt,name=jetstream"` 14 | DeadLetterQueue bool `json:"deadLetterQueue,omitempty" protobuf:"varint,10,opt,name=deadLetterQueue"` 15 | } 16 | -------------------------------------------------------------------------------- /api/v1alpha1/sources.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type Sources []Source 4 | -------------------------------------------------------------------------------- /api/v1alpha1/stan_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestSTAN_GenURN(t *testing.T) { 10 | urn := STAN{NATSURL: "my-url", Subject: "my-subject"}.GenURN(cluster, namespace) 11 | assert.Equal(t, "urn:dataflow:stan:my-url:my-subject", urn) 12 | } 13 | -------------------------------------------------------------------------------- /api/v1alpha1/step_phase.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // +kubebuilder:validation:Enum="";Pending;Running;Succeeded;Failed 4 | type StepPhase string 5 | 6 | func (p StepPhase) Completed() bool { 7 | return p == StepSucceeded || p == StepFailed 8 | } 9 | 10 | const ( 11 | StepUnknown StepPhase = "" 12 | StepPending StepPhase = "Pending" 13 | StepRunning StepPhase = "Running" 14 | StepSucceeded StepPhase = "Succeeded" 15 | StepFailed StepPhase = "Failed" 16 | ) 17 | -------------------------------------------------------------------------------- /api/v1alpha1/step_phase_message.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "fmt" 5 | "strings" 6 | ) 7 | 8 | type StepPhaseMessage string 9 | 10 | func (m StepPhaseMessage) GetPhase() StepPhase { 11 | return StepPhase(strings.Split(string(m), "/")[0]) 12 | } 13 | 14 | func (m StepPhaseMessage) GetReason() string { 15 | return strings.Split(string(m), "/")[1] 16 | } 17 | 18 | func (m StepPhaseMessage) GetMessage() string { 19 | return strings.Split(string(m), "/")[2] 20 | } 21 | 22 | func NewStepPhaseMessage(phase StepPhase, reason, message string) StepPhaseMessage { 23 | return StepPhaseMessage(fmt.Sprintf("%s/%s/%s", phase, reason, message)) 24 | } 25 | 26 | func MinStepPhaseMessage(v ...StepPhaseMessage) StepPhaseMessage { 27 | for _, p := range []StepPhase{StepFailed, StepPending, StepRunning, StepSucceeded} { 28 | for _, x := range v { 29 | if x.GetPhase() == p { 30 | return x 31 | } 32 | } 33 | } 34 | return NewStepPhaseMessage(StepUnknown, "", "") 35 | } 36 | -------------------------------------------------------------------------------- /api/v1alpha1/step_phase_message_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMinStepPhaseMessage(t *testing.T) { 10 | x := MinStepPhaseMessage(NewStepPhaseMessage(StepFailed, "baz", "foo"), NewStepPhaseMessage(StepRunning, "qux", "bar")) 11 | assert.Equal(t, StepFailed, x.GetPhase()) 12 | assert.Equal(t, "baz", x.GetReason()) 13 | assert.Equal(t, "foo", x.GetMessage()) 14 | } 15 | -------------------------------------------------------------------------------- /api/v1alpha1/step_spec_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestStepSpec_WithOutReplicas(t *testing.T) { 10 | in := StepSpec{Replicas: 1, Name: "foo"}.WithOutReplicas() 11 | assert.Zero(t, in.Replicas) 12 | assert.Equal(t, "foo", in.Name) 13 | } 14 | -------------------------------------------------------------------------------- /api/v1alpha1/step_status.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | type StepStatus struct { 8 | Phase StepPhase `json:"phase" protobuf:"bytes,1,opt,name=phase,casttype=StepPhase"` 9 | Reason string `json:"reason,omitempty" protobuf:"bytes,6,opt,name=reason"` 10 | Message string `json:"message,omitempty" protobuf:"bytes,2,opt,name=message"` 11 | Replicas uint32 `json:"replicas" protobuf:"varint,3,opt,name=replicas"` 12 | Selector string `json:"selector,omitempty" protobuf:"bytes,5,opt,name=selector"` 13 | LastScaledAt metav1.Time `json:"lastScaledAt,omitempty" protobuf:"bytes,4,opt,name=lastScaledAt"` 14 | } 15 | 16 | func (m StepStatus) GetReplicas() int { 17 | return int(m.Replicas) 18 | } 19 | -------------------------------------------------------------------------------- /api/v1alpha1/string.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | func StringOr(a, b string) string { 4 | if a != "" { 5 | return a 6 | } 7 | return b 8 | } 9 | 10 | func StringsOr(a, b []string) []string { 11 | if len(a) > 0 { 12 | return a 13 | } 14 | return b 15 | } 16 | -------------------------------------------------------------------------------- /api/v1alpha1/string_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestStringOr(t *testing.T) { 10 | assert.Equal(t, "bar", StringOr("", "bar")) 11 | assert.Equal(t, "foo", StringOr("foo", "bar")) 12 | } 13 | 14 | func TestStringsOr(t *testing.T) { 15 | assert.Equal(t, []string{"bar"}, StringsOr(nil, []string{"bar"})) 16 | assert.Equal(t, []string{"bar"}, StringsOr([]string{}, []string{"bar"})) 17 | assert.Equal(t, []string{"foo"}, StringsOr([]string{"foo"}, []string{"bar"})) 18 | } 19 | -------------------------------------------------------------------------------- /api/v1alpha1/subject_prefix.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // +kubebuilder:validation:Enum="";None;NamespaceName;NamespacedPipelineName 4 | type SubjectPrefix string 5 | 6 | const ( 7 | SubjectPrefixNone SubjectPrefix = "None" 8 | SubjectPrefixNamespaceName SubjectPrefix = "NamespaceName" 9 | SubjectPrefixNamespacedPipelineName SubjectPrefix = "NamespacedPipelineName" 10 | ) 11 | 12 | func SubjectPrefixOr(a, b SubjectPrefix) SubjectPrefix { 13 | if a != "" { 14 | return a 15 | } 16 | return b 17 | } 18 | -------------------------------------------------------------------------------- /api/v1alpha1/subject_prefix_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestSubjectPrefixOr(t *testing.T) { 10 | assert.Equal(t, SubjectPrefixNone, SubjectPrefixOr("", SubjectPrefixNone)) 11 | } 12 | -------------------------------------------------------------------------------- /api/v1alpha1/tls.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import corev1 "k8s.io/api/core/v1" 4 | 5 | type TLS struct { 6 | // CACertSecret refers to the secret that contains the CA cert 7 | CACertSecret *corev1.SecretKeySelector `json:"caCertSecret,omitempty" protobuf:"bytes,1,opt,name=caCertSecret"` 8 | // CertSecret refers to the secret that contains the cert 9 | CertSecret *corev1.SecretKeySelector `json:"clientCertSecret,omitempty" protobuf:"bytes,2,opt,name=certSecret"` 10 | // KeySecret refers to the secret that contains the key 11 | KeySecret *corev1.SecretKeySelector `json:"clientKeySecret,omitempty" protobuf:"bytes,3,opt,name=keySecret"` 12 | } 13 | -------------------------------------------------------------------------------- /api/v1alpha1/trunc.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | func trunc(msg string) string { 4 | return truncN(msg, 64) 5 | } 6 | 7 | func truncN(msg string, n int) string { 8 | x := n / 2 9 | if len(msg) > n { 10 | return msg[0:x-1] + "..." + msg[len(msg)-x+2:] 11 | } 12 | return msg 13 | } 14 | -------------------------------------------------------------------------------- /api/v1alpha1/trunc_test.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | "strings" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func Test_trunc(t *testing.T) { 11 | t.Run("63", func(t *testing.T) { 12 | x := trunc(strings.Repeat("x", 63)) 13 | assert.Len(t, x, 63) 14 | assert.Equal(t, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", x) 15 | }) 16 | t.Run("64", func(t *testing.T) { 17 | x := trunc(strings.Repeat("x", 64)) 18 | assert.Len(t, x, 64) 19 | assert.Equal(t, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", x) 20 | }) 21 | t.Run("65", func(t *testing.T) { 22 | x := trunc(strings.Repeat("x", 65)) 23 | assert.Len(t, x, 64) 24 | assert.Equal(t, "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx...xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx", x) 25 | }) 26 | } 27 | -------------------------------------------------------------------------------- /api/v1alpha1/urner.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | // what a strange name - "URN" + "er" 4 | // why? https://golang.org/doc/effective_go#interface-names 5 | type urner interface { // only private to defeat codegen 6 | GenURN(cluster, namespace string) string 7 | } 8 | -------------------------------------------------------------------------------- /api/v1alpha1/volume_sink.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | type VolumeSink struct { 4 | AbstractVolumeSource `json:",inline" protobuf:"bytes,1,opt,name=abstractVolumeSource"` 5 | } 6 | -------------------------------------------------------------------------------- /api/v1alpha1/volume_source.go: -------------------------------------------------------------------------------- 1 | package v1alpha1 2 | 3 | import ( 4 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 5 | ) 6 | 7 | type VolumeSource struct { 8 | AbstractVolumeSource `json:",inline" protobuf:"bytes,9,opt,name=abstractVolumeSource"` 9 | // +kubebuilder:default="1m" 10 | PollPeriod *metav1.Duration `json:"pollPeriod,omitempty" protobuf:"bytes,6,opt,name=pollPeriod"` 11 | // +kubebuilder:default=1 12 | Concurrency uint32 `json:"concurrency,omitempty" protobuf:"varint,8,opt,name=concurrency"` 13 | ReadOnly bool `json:"readOnly,omitempty" protobuf:"varint,10,opt,name=readOnly"` 14 | } 15 | -------------------------------------------------------------------------------- /config/apps/argo-server/argo-server-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: argo-server 5 | spec: 6 | replicas: 1 7 | template: 8 | spec: 9 | securityContext: 10 | runAsUser: 8737 11 | containers: 12 | - name: argo-server 13 | image: quay.io/argoproj/argocli:latest 14 | imagePullPolicy: Always 15 | args: [ server, --namespaced=true, --auth-mode=server, --secure=false ] 16 | resources: 17 | requests: 18 | cpu: 100m 19 | memory: 20Mi 20 | readinessProbe: 21 | httpGet: 22 | scheme: HTTP 23 | -------------------------------------------------------------------------------- /config/apps/argo-server/argo-server-rolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: argo-server-binding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: argo-server-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: argo-server 12 | -------------------------------------------------------------------------------- /config/apps/argo-server/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | resources: 3 | - github.com/argoproj/argo-workflows/manifests/base/argo-server 4 | - argo-server-role.yaml 5 | - argo-server-rolebinding.yaml 6 | patchesStrategicMerge: 7 | - argo-server-deploy.yaml 8 | -------------------------------------------------------------------------------- /config/apps/jaeger/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - simplist.yaml 3 | -------------------------------------------------------------------------------- /config/apps/jetstream/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - nats-js.yml 3 | - ../../../examples/dataflow-jetstream-default-secret.yaml 4 | patchesStrategicMerge: 5 | -------------------------------------------------------------------------------- /config/apps/kafka/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | resources: 3 | - kafka-minimal.yaml 4 | - ../../../examples/dataflow-kafka-default-secret.yaml 5 | -------------------------------------------------------------------------------- /config/apps/metrics-server/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: kube-system 2 | resources: 3 | - https://github.com/kubernetes-sigs/metrics-server/releases/download/v0.5.0/components.yaml 4 | patchesStrategicMerge: 5 | - metrics-server-deploy.yaml -------------------------------------------------------------------------------- /config/apps/metrics-server/metrics-server-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: metrics-server 5 | namespace: kube-system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - args: 11 | - --cert-dir=/tmp 12 | - --secure-port=443 13 | - --kubelet-preferred-address-types=InternalIP,ExternalIP,Hostname 14 | - --kubelet-use-node-status-port 15 | - --metric-resolution=15s 16 | - --kubelet-insecure-tls 17 | name: metrics-server -------------------------------------------------------------------------------- /config/apps/moto/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | 3 | resources: 4 | - moto-statefulset.yaml 5 | - moto-svc.yaml 6 | - ../../../examples/dataflow-s3-default-secret.yaml 7 | -------------------------------------------------------------------------------- /config/apps/moto/moto-statefulset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: moto 5 | labels: 6 | app: moto 7 | spec: 8 | selector: 9 | matchLabels: 10 | app: moto 11 | template: 12 | metadata: 13 | labels: 14 | app: moto 15 | spec: 16 | containers: 17 | - name: main 18 | image: motoserver/moto:2.2.4 19 | ports: 20 | - containerPort: 5000 21 | command: [ "/usr/local/bin/moto_server", "-H", "0.0.0.0"] 22 | readinessProbe: 23 | httpGet: 24 | path: / 25 | port: 5000 26 | initialDelaySeconds: 15 27 | periodSeconds: 10 28 | failureThreshold: 5 29 | timeoutSeconds: 10 30 | livenessProbe: 31 | httpGet: 32 | path: / 33 | port: 5000 34 | initialDelaySeconds: 15 35 | periodSeconds: 10 36 | failureThreshold: 5 37 | timeoutSeconds: 10 38 | serviceName: moto -------------------------------------------------------------------------------- /config/apps/moto/moto-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: moto 5 | labels: 6 | app: moto 7 | spec: 8 | selector: 9 | app: moto 10 | ports: 11 | - protocol: TCP 12 | port: 5000 13 | targetPort: 5000 14 | -------------------------------------------------------------------------------- /config/apps/mysql.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: mysql-data-source 5 | namespace: argo-dataflow-system 6 | stringData: 7 | dataSource: root:password@tcp(mysql)/test 8 | --- 9 | apiVersion: v1 10 | kind: Service 11 | metadata: 12 | name: mysql 13 | namespace: argo-dataflow-system 14 | spec: 15 | clusterIP: None 16 | ports: 17 | - port: 3306 18 | selector: 19 | app: mysql 20 | --- 21 | apiVersion: apps/v1 22 | kind: StatefulSet 23 | metadata: 24 | name: mysql 25 | namespace: argo-dataflow-system 26 | spec: 27 | selector: 28 | matchLabels: 29 | app: mysql 30 | serviceName: mysql 31 | template: 32 | metadata: 33 | labels: 34 | app: mysql 35 | spec: 36 | containers: 37 | - env: 38 | - name: MYSQL_ROOT_PASSWORD 39 | value: password 40 | - name: MYSQL_DATABASE 41 | value: test 42 | image: mysql:5.7 43 | name: mysql 44 | ports: 45 | - containerPort: 3306 46 | name: mysql 47 | -------------------------------------------------------------------------------- /config/apps/mysql/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | 3 | resources: 4 | - mysql-secret.yaml 5 | - mysql-statefulset.yaml 6 | - mysql-svc.yaml 7 | -------------------------------------------------------------------------------- /config/apps/mysql/mysql-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: mysql-data-source 5 | stringData: 6 | dataSource: root:password@tcp(mysql)/test 7 | -------------------------------------------------------------------------------- /config/apps/mysql/mysql-statefulset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: mysql 5 | spec: 6 | selector: 7 | matchLabels: 8 | app: mysql 9 | serviceName: mysql 10 | template: 11 | metadata: 12 | labels: 13 | app: mysql 14 | spec: 15 | containers: 16 | - image: mysql:5.7 17 | name: mysql 18 | env: 19 | # Use secret in real usage 20 | - name: MYSQL_ROOT_PASSWORD 21 | value: password 22 | - name: "MYSQL_DATABASE" 23 | value: "test" 24 | ports: 25 | - containerPort: 3306 26 | name: mysql 27 | -------------------------------------------------------------------------------- /config/apps/mysql/mysql-svc.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: mysql 5 | spec: 6 | ports: 7 | - port: 3306 8 | selector: 9 | app: mysql 10 | clusterIP: None 11 | -------------------------------------------------------------------------------- /config/apps/nats/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - single-server-nats.yml 3 | patchesStrategicMerge: 4 | - nats-statefulset.yaml -------------------------------------------------------------------------------- /config/apps/nats/nats-statefulset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: nats 5 | spec: 6 | template: 7 | spec: 8 | containers: 9 | - name: nats 10 | ports: 11 | - containerPort: 4222 12 | hostPort: 0 13 | - containerPort: 7422 14 | hostPort: 0 -------------------------------------------------------------------------------- /config/apps/prometheus/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - monitor.yaml 3 | -------------------------------------------------------------------------------- /config/apps/prometheus/monitor.yaml: -------------------------------------------------------------------------------- 1 | 2 | # Prometheus Monitor Service (Metrics) 3 | apiVersion: monitoring.coreos.com/v1 4 | kind: ServiceMonitor 5 | metadata: 6 | labels: 7 | control-plane: controller-manager 8 | name: controller-manager-metrics-monitor 9 | namespace: system 10 | spec: 11 | endpoints: 12 | - path: /metrics 13 | port: https 14 | selector: 15 | matchLabels: 16 | control-plane: controller-manager 17 | -------------------------------------------------------------------------------- /config/apps/stan/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | resources: 3 | - ../nats 4 | - single-server-stan.yml 5 | - ../../../examples/dataflow-stan-default-secret.yaml 6 | patchesStrategicMerge: 7 | - stan-statefulset.yaml -------------------------------------------------------------------------------- /config/apps/stan/stan-statefulset.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: StatefulSet 3 | metadata: 4 | name: stan 5 | spec: 6 | template: 7 | spec: 8 | # stan depends on nats, and will go into CrashloopBackoff if that is not ready 9 | initContainers: 10 | - name: wait 11 | image: golang:1.17 12 | imagePullPolicy: IfNotPresent 13 | command: 14 | - curl 15 | args: 16 | - -fvN 17 | - nats:8222 18 | containers: 19 | - name: stan 20 | readinessProbe: 21 | httpGet: 22 | port: 8222 23 | path: /streaming/channelsz -------------------------------------------------------------------------------- /config/apps/testapi/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | resources: 3 | - testapi-statefulset.yaml 4 | - testapi-svc.yaml -------------------------------------------------------------------------------- /config/apps/testapi/test.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: testapi 5 | namespace: argo-dataflow-system 6 | spec: 7 | ports: 8 | - port: 80 9 | targetPort: 8378 10 | selector: 11 | app: testapi 12 | --- 13 | apiVersion: apps/v1 14 | kind: StatefulSet 15 | metadata: 16 | name: testapi 17 | namespace: argo-dataflow-system 18 | spec: 19 | selector: 20 | matchLabels: 21 | app: testapi 22 | serviceName: testapi 23 | template: 24 | metadata: 25 | labels: 26 | app: testapi 27 | spec: 28 | containers: 29 | - image: quay.io/argoproj/dataflow-testapi 30 | imagePullPolicy: IfNotPresent 31 | name: main 32 | ports: 33 | - containerPort: 8378 34 | readinessProbe: 35 | httpGet: 36 | path: /ready 37 | port: 8378 38 | -------------------------------------------------------------------------------- /config/apps/testapi/testapi-statefulset.yaml: -------------------------------------------------------------------------------- 1 | kind: StatefulSet 2 | apiVersion: apps/v1 3 | metadata: 4 | name: testapi 5 | spec: 6 | template: 7 | metadata: 8 | labels: 9 | app: testapi 10 | spec: 11 | containers: 12 | - name: main 13 | image: quay.io/argoprojlabs/dataflow-testapi 14 | imagePullPolicy: IfNotPresent 15 | ports: 16 | - containerPort: 8378 17 | readinessProbe: 18 | httpGet: 19 | port: 8378 20 | path: /ready 21 | selector: 22 | matchLabels: 23 | app: testapi 24 | serviceName: testapi -------------------------------------------------------------------------------- /config/apps/testapi/testapi-svc.yaml: -------------------------------------------------------------------------------- 1 | kind: Service 2 | apiVersion: v1 3 | metadata: 4 | name: testapi 5 | spec: 6 | ports: 7 | - port: 80 8 | targetPort: 8378 9 | selector: 10 | app: testapi -------------------------------------------------------------------------------- /config/base-patch/lead-replica-priorityclass.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: scheduling.k8s.io/v1 2 | kind: PriorityClass 3 | metadata: 4 | name: lead-replica 5 | value: 1 6 | description: "This priority class is used to ensure that lead replicas are prioritized over other replicas." -------------------------------------------------------------------------------- /config/base-patch/manager_auth_proxy_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch inject a sidecar container which is a HTTP proxy for the 2 | # controller manager, it performs RBAC authorization against the Kubernetes API using SubjectAccessReviews. 3 | apiVersion: apps/v1 4 | kind: Deployment 5 | metadata: 6 | name: controller-manager 7 | namespace: system 8 | spec: 9 | template: 10 | spec: 11 | containers: 12 | - name: kube-rbac-proxy 13 | image: gcr.io/kubebuilder/kube-rbac-proxy:v0.5.0 14 | args: 15 | - "--secure-listen-address=0.0.0.0:8443" 16 | - "--upstream=http://127.0.0.1:9090/" 17 | - "--logtostderr=true" 18 | - "--v=10" 19 | ports: 20 | - containerPort: 8443 21 | name: https 22 | - name: manager 23 | args: 24 | - "--metrics-addr=127.0.0.1:9090" 25 | - "--enable-leader-election" 26 | -------------------------------------------------------------------------------- /config/base-patch/manager_webhook_patch.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | ports: 12 | - containerPort: 9443 13 | name: webhook-server 14 | protocol: TCP 15 | volumeMounts: 16 | - mountPath: /tmp/k8s-webhook-server/serving-certs 17 | name: cert 18 | readOnly: true 19 | volumes: 20 | - name: cert 21 | secret: 22 | defaultMode: 420 23 | secretName: webhook-server-cert 24 | -------------------------------------------------------------------------------- /config/base-patch/ssh-configmap.yaml: -------------------------------------------------------------------------------- 1 | kind: Secret 2 | apiVersion: v1 3 | metadata: 4 | name: ssh 5 | stringData: 6 | # add new hosts using `ssh-keyscan github.com` 7 | known_hosts: "" -------------------------------------------------------------------------------- /config/base-patch/webhookcainjection_patch.yaml: -------------------------------------------------------------------------------- 1 | # This patch add annotation to admission webhook config and 2 | # the variables $(CERTIFICATE_NAMESPACE) and $(CERTIFICATE_NAME) will be substituted by kustomize. 3 | apiVersion: admissionregistration.k8s.io/v1beta1 4 | kind: MutatingWebhookConfiguration 5 | metadata: 6 | name: mutating-webhook-configuration 7 | annotations: 8 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 9 | --- 10 | apiVersion: admissionregistration.k8s.io/v1beta1 11 | kind: ValidatingWebhookConfiguration 12 | metadata: 13 | name: validating-webhook-configuration 14 | annotations: 15 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 16 | -------------------------------------------------------------------------------- /config/certmanager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - certificate.yaml 3 | 4 | configurations: 5 | - kustomizeconfig.yaml 6 | -------------------------------------------------------------------------------- /config/certmanager/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This configuration is for teaching kustomize how to update name ref and var substitution 2 | nameReference: 3 | - kind: Issuer 4 | group: cert-manager.io 5 | fieldSpecs: 6 | - kind: Certificate 7 | group: cert-manager.io 8 | path: spec/issuerRef/name 9 | 10 | varReference: 11 | - kind: Certificate 12 | group: cert-manager.io 13 | path: spec/commonName 14 | - kind: Certificate 15 | group: cert-manager.io 16 | path: spec/dnsNames 17 | -------------------------------------------------------------------------------- /config/ci/controller-manager-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | replicas: 1 8 | template: 9 | spec: 10 | containers: 11 | - name: manager 12 | imagePullPolicy: Never 13 | env: 14 | - name: ARGO_DATAFLOW_PULL_POLICY 15 | value: IfNotPresent 16 | - name: ARGO_DATAFLOW_UPDATE_INTERVAL 17 | value: 5s 18 | - name: ARGO_DATAFLOW_CLUSTER 19 | value: ci 20 | -------------------------------------------------------------------------------- /config/ci/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../dev 3 | 4 | patchesStrategicMerge: 5 | - controller-manager-deploy.yaml 6 | -------------------------------------------------------------------------------- /config/cluster-quick-start/controller-manager-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | env: 12 | - name: ARGO_DATAFLOW_CLUSTER 13 | value: quick-start 14 | -------------------------------------------------------------------------------- /config/cluster-quick-start/kustomization.yaml: -------------------------------------------------------------------------------- 1 | bases: 2 | - ../default-cluster 3 | patchesStrategicMerge: 4 | - controller-manager-deploy.yaml 5 | -------------------------------------------------------------------------------- /config/crd/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # This file is for teaching kustomize how to substitute name and namespace reference in CRD 2 | nameReference: 3 | - kind: Service 4 | version: v1 5 | fieldSpecs: 6 | - kind: CustomResourceDefinition 7 | group: apiextensions.k8s.io 8 | path: spec/conversion/webhookClientConfig/service/name 9 | 10 | namespace: 11 | - kind: CustomResourceDefinition 12 | group: apiextensions.k8s.io 13 | path: spec/conversion/webhookClientConfig/service/namespace 14 | create: false 15 | 16 | varReference: 17 | - path: metadata/annotations 18 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_pipelines.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: pipelines.dataflow.argoproj.io 9 | -------------------------------------------------------------------------------- /config/crd/patches/cainjection_in_steps.yaml: -------------------------------------------------------------------------------- 1 | # The following patch adds a directive for certmanager to inject CA into the CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | annotations: 7 | cert-manager.io/inject-ca-from: $(CERTIFICATE_NAMESPACE)/$(CERTIFICATE_NAME) 8 | name: steps.dataflow.argoproj.io 9 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_pipelines.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: pipelines.dataflow.argoproj.io 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /config/crd/patches/webhook_in_steps.yaml: -------------------------------------------------------------------------------- 1 | # The following patch enables conversion webhook for CRD 2 | # CRD conversion requires k8s 1.13 or later. 3 | apiVersion: apiextensions.k8s.io/v1beta1 4 | kind: CustomResourceDefinition 5 | metadata: 6 | name: steps.dataflow.argoproj.io 7 | spec: 8 | conversion: 9 | strategy: Webhook 10 | webhookClientConfig: 11 | # this is "\n" used as a placeholder, otherwise it will be rejected by the apiserver for being blank, 12 | # but we're going to set it later using the cert-manager (or potentially a patch if not using cert-manager) 13 | caBundle: Cg== 14 | service: 15 | namespace: system 16 | name: webhook-service 17 | path: /convert 18 | -------------------------------------------------------------------------------- /config/default-cluster/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | bases: 3 | - ../crd 4 | - ../manager 5 | resources: 6 | - rbac/cluster_role.yaml 7 | - rbac/cluster_role_binding.yaml 8 | - ../rbac/leader_election_role.yaml 9 | - ../rbac/leader_election_role_binding.yaml 10 | # Comment the following 4 lines if you want to disable 11 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 12 | # which protects your /metrics endpoint. 13 | - ../rbac/auth_proxy_service.yaml 14 | - ../rbac/auth_proxy_role.yaml 15 | - ../rbac/auth_proxy_role_binding.yaml 16 | - ../rbac/auth_proxy_client_clusterrole.yaml 17 | - ../rbac/pipeline-sa.yaml 18 | - ../rbac/pipeline-role.yaml 19 | - ../rbac/pipeline-rolebinding.yaml 20 | - ../base-patch/ssh-configmap.yaml 21 | - ../base-patch/lead-replica-priorityclass.yaml 22 | 23 | patchesStrategicMerge: 24 | - ../base-patch/manager_auth_proxy_patch.yaml 25 | 26 | -------------------------------------------------------------------------------- /config/default-cluster/pipeline-role-binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: pipeline 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: pipeline 9 | subjects: 10 | - kind: ServiceAccount 11 | name: pipeline 12 | namespace: default 13 | -------------------------------------------------------------------------------- /config/default-cluster/pipeline-sa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: pipeline 5 | namespace: default 6 | -------------------------------------------------------------------------------- /config/default-cluster/rbac/cluster_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: dataflow-manager 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: dataflow-manager 9 | subjects: 10 | - kind: ServiceAccount 11 | name: manager 12 | namespace: argo-dataflow-system 13 | -------------------------------------------------------------------------------- /config/default/controller-manager-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | env: 12 | - name: ARGO_DATAFLOW_NAMESPACE 13 | valueFrom: 14 | fieldRef: 15 | fieldPath: metadata.namespace 16 | -------------------------------------------------------------------------------- /config/dev/controller-manager-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | replicas: 0 -------------------------------------------------------------------------------- /config/dev/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | 3 | resources: 4 | - ../default 5 | - ../apps/testapi 6 | 7 | patchesStrategicMerge: 8 | - controller-manager-deploy.yaml 9 | - ssh-configmap.yaml 10 | 11 | -------------------------------------------------------------------------------- /config/dev/ssh-configmap.yaml: -------------------------------------------------------------------------------- 1 | kind: Secret 2 | apiVersion: v1 3 | metadata: 4 | name: ssh 5 | stringData: 6 | # add new hosts using `ssh-keyscan github.com` 7 | known_hosts: | 8 | # github.com:22 SSH-2.0-babeld-83b59434 9 | # github.com:22 SSH-2.0-babeld-83b59434 10 | github.com ssh-rsa AAAAB3NzaC1yc2EAAAABIwAAAQEAq2A7hRGmdnm9tUDbO9IDSwBK6TbQa+PXYPCPy6rbTrTtw7PHkccKrpp0yVhp5HdEIcKr6pLlVDBfOLX9QUsyCOV0wzfjIJNlGEYsdlLJizHhbn2mUjvSAHQqZETYP81eFzLQNnPHt4EVVUh7VfDESU84KezmD5QlWpXLmvU31/yMf+Se8xhHTvKSCZIFImWwoG6mbUoWf9nzpIoaSjB+weqqUUmpaaasXVal72J+UX2B+2RPW3RcT0eOzQgqlJL3RKrTJvdsjE3JEAvGq3lGHSZXy28G3skua2SmVi/w4yCE6gbODqnTWlg7+wC604ydGXA8VJiS5ap43JXiUFFAaQ== 11 | # github.com:22 SSH-2.0-babeld-83b59434 -------------------------------------------------------------------------------- /config/kafka/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - ../apps/kafka -------------------------------------------------------------------------------- /config/manager/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manager-sa.yaml 3 | - manager.yaml 4 | -------------------------------------------------------------------------------- /config/manager/manager-sa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: manager 5 | namespace: system 6 | -------------------------------------------------------------------------------- /config/quick-start/controller-manager-deploy.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: controller-manager 5 | namespace: system 6 | spec: 7 | template: 8 | spec: 9 | containers: 10 | - name: manager 11 | env: 12 | - name: ARGO_DATAFLOW_CLUSTER 13 | value: quick-start -------------------------------------------------------------------------------- /config/quick-start/kustomization.yaml: -------------------------------------------------------------------------------- 1 | namespace: argo-dataflow-system 2 | resources: 3 | - ../default 4 | 5 | patchesStrategicMerge: 6 | - controller-manager-deploy.yaml -------------------------------------------------------------------------------- /config/rbac/auth_proxy_client_clusterrole.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: metrics-reader 5 | rules: 6 | - nonResourceURLs: ["/metrics"] 7 | verbs: ["get"] 8 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRole 3 | metadata: 4 | name: proxy-role 5 | rules: 6 | - apiGroups: ["authentication.k8s.io"] 7 | resources: 8 | - tokenreviews 9 | verbs: ["create"] 10 | - apiGroups: ["authorization.k8s.io"] 11 | resources: 12 | - subjectaccessreviews 13 | verbs: ["create"] 14 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: ClusterRoleBinding 3 | metadata: 4 | name: proxy-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: ClusterRole 8 | name: proxy-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: manager 12 | namespace: argo-dataflow-system 13 | -------------------------------------------------------------------------------- /config/rbac/auth_proxy_service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | labels: 5 | control-plane: controller-manager 6 | name: controller-manager-metrics-service 7 | namespace: system 8 | spec: 9 | ports: 10 | - name: https 11 | port: 8443 12 | targetPort: https 13 | selector: 14 | control-plane: controller-manager 15 | -------------------------------------------------------------------------------- /config/rbac/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - role.yaml 3 | - role_binding.yaml 4 | - leader_election_role.yaml 5 | - leader_election_role_binding.yaml 6 | # Comment the following 4 lines if you want to disable 7 | # the auth proxy (https://github.com/brancz/kube-rbac-proxy) 8 | # which protects your /metrics endpoint. 9 | - auth_proxy_service.yaml 10 | - auth_proxy_role.yaml 11 | - auth_proxy_role_binding.yaml 12 | - auth_proxy_client_clusterrole.yaml 13 | - pipeline-sa.yaml 14 | - pipeline-role.yaml 15 | - pipeline-rolebinding.yaml 16 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions to do leader election. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: Role 4 | metadata: 5 | name: leader-election-role 6 | rules: 7 | - apiGroups: 8 | - coordination.k8s.io 9 | resources: 10 | - leases 11 | verbs: 12 | - create 13 | - get 14 | - update 15 | - apiGroups: 16 | - "" 17 | resources: 18 | - configmaps 19 | verbs: 20 | - get 21 | - list 22 | - watch 23 | - create 24 | - update 25 | - patch 26 | - delete 27 | - apiGroups: 28 | - "" 29 | resources: 30 | - configmaps/status 31 | verbs: 32 | - get 33 | - update 34 | - patch 35 | - apiGroups: 36 | - "" 37 | resources: 38 | - events 39 | verbs: 40 | - create 41 | -------------------------------------------------------------------------------- /config/rbac/leader_election_role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: leader-election-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: leader-election-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: manager 12 | namespace: argo-dataflow-system 13 | -------------------------------------------------------------------------------- /config/rbac/pipeline-role.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: Role 3 | metadata: 4 | name: pipeline 5 | rules: 6 | - apiGroups: 7 | - "" 8 | resources: 9 | - secrets 10 | verbs: 11 | - create 12 | - get 13 | -------------------------------------------------------------------------------- /config/rbac/pipeline-rolebinding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: pipeline 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: pipeline 9 | subjects: 10 | - kind: ServiceAccount 11 | name: pipeline 12 | 13 | -------------------------------------------------------------------------------- /config/rbac/pipeline-sa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ServiceAccount 3 | metadata: 4 | name: pipeline 5 | -------------------------------------------------------------------------------- /config/rbac/pipeline_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit pipelines. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: pipeline-editor-role 6 | rules: 7 | - apiGroups: 8 | - dataflow.argoproj.io 9 | resources: 10 | - pipelines 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - dataflow.argoproj.io 21 | resources: 22 | - pipelines/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/pipeline_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view pipelines. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: pipeline-viewer-role 6 | rules: 7 | - apiGroups: 8 | - dataflow.argoproj.io 9 | resources: 10 | - pipelines 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - dataflow.argoproj.io 17 | resources: 18 | - pipelines/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/rbac/role_binding.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: rbac.authorization.k8s.io/v1 2 | kind: RoleBinding 3 | metadata: 4 | name: manager-rolebinding 5 | roleRef: 6 | apiGroup: rbac.authorization.k8s.io 7 | kind: Role 8 | name: manager-role 9 | subjects: 10 | - kind: ServiceAccount 11 | name: manager 12 | namespace: argo-dataflow-system 13 | -------------------------------------------------------------------------------- /config/rbac/step_editor_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to edit steps. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: func-editor-role 6 | rules: 7 | - apiGroups: 8 | - dataflow.argoproj.io 9 | resources: 10 | - steps 11 | verbs: 12 | - create 13 | - delete 14 | - get 15 | - list 16 | - patch 17 | - update 18 | - watch 19 | - apiGroups: 20 | - dataflow.argoproj.io 21 | resources: 22 | - steps/status 23 | verbs: 24 | - get 25 | -------------------------------------------------------------------------------- /config/rbac/step_viewer_role.yaml: -------------------------------------------------------------------------------- 1 | # permissions for end users to view steps. 2 | apiVersion: rbac.authorization.k8s.io/v1 3 | kind: ClusterRole 4 | metadata: 5 | name: func-viewer-role 6 | rules: 7 | - apiGroups: 8 | - dataflow.argoproj.io 9 | resources: 10 | - steps 11 | verbs: 12 | - get 13 | - list 14 | - watch 15 | - apiGroups: 16 | - dataflow.argoproj.io 17 | resources: 18 | - steps/status 19 | verbs: 20 | - get 21 | -------------------------------------------------------------------------------- /config/webhook/kustomization.yaml: -------------------------------------------------------------------------------- 1 | resources: 2 | - manifests.yaml 3 | - service.yaml 4 | 5 | configurations: 6 | - kustomizeconfig.yaml 7 | -------------------------------------------------------------------------------- /config/webhook/kustomizeconfig.yaml: -------------------------------------------------------------------------------- 1 | # the following config is for teaching kustomize where to look at when substituting vars. 2 | # It requires kustomize v2.1.0 or newer to work properly. 3 | nameReference: 4 | - kind: Service 5 | version: v1 6 | fieldSpecs: 7 | - kind: MutatingWebhookConfiguration 8 | group: admissionregistration.k8s.io 9 | path: webhooks/clientConfig/service/name 10 | - kind: ValidatingWebhookConfiguration 11 | group: admissionregistration.k8s.io 12 | path: webhooks/clientConfig/service/name 13 | 14 | namespace: 15 | - kind: MutatingWebhookConfiguration 16 | group: admissionregistration.k8s.io 17 | path: webhooks/clientConfig/service/namespace 18 | create: true 19 | - kind: ValidatingWebhookConfiguration 20 | group: admissionregistration.k8s.io 21 | path: webhooks/clientConfig/service/namespace 22 | create: true 23 | 24 | varReference: 25 | - path: metadata/annotations 26 | -------------------------------------------------------------------------------- /config/webhook/service.yaml: -------------------------------------------------------------------------------- 1 | 2 | apiVersion: v1 3 | kind: Service 4 | metadata: 5 | name: webhook-service 6 | namespace: system 7 | spec: 8 | ports: 9 | - port: 443 10 | targetPort: 9443 11 | selector: 12 | control-plane: controller-manager 13 | -------------------------------------------------------------------------------- /docs/CLI.md: -------------------------------------------------------------------------------- 1 | # CLI 2 | 3 | List pipelines: 4 | 5 | ``` 6 | kubectl get pipeline 7 | ``` 8 | 9 | List steps for a pipeline: 10 | 11 | ``` 12 | kubectl get step -l dataflow.argoproj.io/pipeline-name=my-pipeline 13 | ``` 14 | 15 | List pods for a pipeline step: 16 | 17 | ``` 18 | kubectl get pod -l dataflow.argoproj.io/pipeline-name=my-pipeline,step.argoproj.io/pipeline-name=my-step 19 | ``` 20 | 21 | Restart a pipeline: 22 | 23 | ``` 24 | kubectl delete pod -l dataflow.argoproj.io/pipeline-name=my-pipeline 25 | ``` 26 | 27 | Restart a step: 28 | 29 | ``` 30 | kubectl delete pod -l dataflow.argoproj.io/pipeline-name=my-pipeline,step.argoproj.io/pipeline-name=my-step 31 | ``` 32 | -------------------------------------------------------------------------------- /docs/FILES.md: -------------------------------------------------------------------------------- 1 | # Files 2 | 3 | Initially, Dataflow was aimed at processing messages from streaming sources such as Kafka or NATS Streaming. However, it 4 | has quickly become clear that there are other types of data users want to process, but based on some other core 5 | concept (files, database record). 6 | 7 | Files are not really suitable to bundling into a message, as a file could be 1GB and messages size should be smaller ( 8 | 10KB, maybe 64KB), it's too much data to keep in memory. 9 | 10 | Instead, when we work with files, we use a shared volume and FIFOs. E.g. when an S3 source gets a message, it'll create 11 | a FIFO at `/var/run/argo-dataflow/sources/default/the-file`, and use the S3 API `GetObject` to write data to this FIFO 12 | for consumption by the main container. 13 | -------------------------------------------------------------------------------- /docs/GC.md: -------------------------------------------------------------------------------- 1 | # Garbage Collection 2 | 3 | The controller will, by default, try to delete any pipelines 720h (~30d) after they complete. But, by default, the controller does not have permission to do this. 4 | 5 | You need to add the permission `delete pipelines` if you want it do do this. 6 | 7 | To prevent this for a single pipeline, [add a finalizer](https://kubernetes.io/blog/2021/05/14/using-finalizers-to-control-deletion/). -------------------------------------------------------------------------------- /docs/JAEGER.md: -------------------------------------------------------------------------------- 1 | # Jaeger 2 | 3 | We use conventional configuration for Jaeger, example 4 | 5 | ``` 6 | export JAEGER_DISABLED=false 7 | export JAEGER_ENDPOINT=http://my-jaeger-collector:14268/api/traces 8 | # export JAEGER_REPORTER_LOG_SPANS=true 9 | # sample one message per second 10 | export JAEGER_SAMPLER_TYPE=ratelimiting 11 | export JAEGER_SAMPLER_PARAM=0.2 12 | ``` -------------------------------------------------------------------------------- /docs/KUBECTL.md: -------------------------------------------------------------------------------- 1 | # `kubctl` 2 | 3 | Dataflow is designed to work well with `kubectl`, rather than needing its own CLI (though maybe we'll add one 4 | someday 😀). 5 | 6 | Task you can do with `kubectl`: 7 | 8 | Create a pipeline: 9 | 10 | ``` 11 | kubectl apply -f examples/101-hello-pipeline.yaml 12 | ``` 13 | 14 | Wait for the pipeline to be running: 15 | 16 | ``` 17 | kubectl wait pipeline/101-hello --for=condition=running 18 | ``` 19 | 20 | Restart pipeline: 21 | 22 | ``` 23 | kubectl delete pod -l dataflow.argoproj.io/pipeline-name=xxx 24 | ``` -------------------------------------------------------------------------------- /docs/LIMITATIONS.md: -------------------------------------------------------------------------------- 1 | # Limitations 2 | 3 | ## Message Size 4 | 5 | Messages are handled in memory by Dataflow, so are limited by the amount of memory available x the number of messages cached in memory. 6 | 7 | * HTTP messages must be < 4GB. 8 | * Kafka messages are typically < 1MB. 9 | * NATS streaming messages are < 1MB. 10 | * NATS JetStream messags are < 1MB. 11 | 12 | This create a practical limitation of message size to 1MB. 13 | 14 | ## Message Throughput 15 | 16 | * HTTP source tested to 2k TPS 17 | * Kafka source tested to 12k TPS 18 | * Sinking is limited by the rate of the sink. It will typically be slower. 19 | -------------------------------------------------------------------------------- /docs/RELEASING.md: -------------------------------------------------------------------------------- 1 | # Releasing 2 | 3 | Run 4 | 5 | ``` 6 | make pre-commit -B 7 | ``` 8 | 9 | Update tag, then push tag. Wait. 10 | 11 | ## Publishing Python 12 | 13 | https://medium.com/@joel.barmettler/how-to-upload-your-python-package-to-pypi-65edc5fe9c56 -------------------------------------------------------------------------------- /docs/RELIABILITY.md: -------------------------------------------------------------------------------- 1 | # Reliability 2 | 3 | Dataflow has to run on Kubernetes, which means that pods can be deleted and processes killed at anytime. It avoids 4 | using its own storage, and relies on the source or sink for storage. 5 | 6 | Dataflow aims for **at-least once** message delivery semantics. 7 | 8 | The following disruptions are tolerated: 9 | 10 | * Loss of network connection to source or sink. 11 | * Pod deletion. 12 | * Pipeline deletion (metrics will be lost, but no messages). 13 | 14 | Under disruption, no messages should be lost and up to 20 messages maybe duplicated. 15 | 16 | ## NATS Jet Stream 17 | 18 | No message lost or duplicated is seen under following disruption: 19 | 20 | * Loss of network connection to source or sink. 21 | * Pod deletion. 22 | * Pipeline deletion. -------------------------------------------------------------------------------- /docs/SCALING.md: -------------------------------------------------------------------------------- 1 | # Scaling 2 | 3 | You can scale in the following ways: 4 | 5 | * Using the built-in scaling, as shown in 103-replicas-pipeline.yaml 6 | * Using `kubect scale step/{pipelineName}-{stepName}` --replicas 1 7 | * Using a [Horizontal Pod Autoscaler](https://kubernetes.io/docs/tasks/run-application/horizontal-pod-autoscale/). 8 | 9 | Not all sources or steps types will scale linearly. Some cannot be scaled. See [examples](EXAMPLES.md). -------------------------------------------------------------------------------- /docs/SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security 2 | 3 | ## Supply Chain 4 | 5 | * For base images prefer `scratch` then `distroless` then `alpine`. 6 | * Snyk is used to scan images. 7 | * Snyk is used to scan imported Go modules. 8 | 9 | ## Configuration 10 | 11 | * Step pods `runAsNonRoot: true` with user `9653`. 12 | * Step pods have `automountServiceAccountToken: true`, but the `pipeline` service account has only `get secrects` 13 | and `patch steps/status`. 14 | 15 | ## Inter-container/process Communication (IPC) 16 | 17 | Messages are shared between containers using HTTP. As the pod gets its own network namespace, no other Linux network 18 | namespace can see the packets. 19 | 20 | Data is also shared using a Kubernetes empty-dir. -------------------------------------------------------------------------------- /docs/STAN.md: -------------------------------------------------------------------------------- 1 | # STAN 2 | 3 | If you want to experiment with STAN, install STAN: 4 | 5 | ```bash 6 | kubectl apply -f https://raw.githubusercontent.com/argoproj-labs/argo-dataflow/main/config/apps/stan.yaml 7 | ``` 8 | 9 | Configure dataflow to use that STAN by default: 10 | 11 | ```bash 12 | kubectl apply -f https://raw.githubusercontent.com/argoproj-labs/argo-dataflow/main/examples/dataflow-stan-default-secret.yaml 13 | ``` 14 | 15 | Wait for the statefulsets to be available (ctrl+c when available): 16 | 17 | ```bash 18 | kubectl get statefulset -w 19 | ``` 20 | -------------------------------------------------------------------------------- /docs/VERSIONING.md: -------------------------------------------------------------------------------- 1 | # Versioning 2 | 3 | We use Semantic Versioning, with "v" prefix. Semantic version are not actually prefixed with "v", but basically everyone 4 | tolerates this. 5 | 6 | We also build the tip of the `main` branch; tag it `:latest` and version `v0.0.0-latest-0`. -------------------------------------------------------------------------------- /docs/assets/architecture.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argoproj-labs/old-argo-dataflow/b842e3b9d4f0816dd1f29d1b87faea78aa1d3c18/docs/assets/architecture.png -------------------------------------------------------------------------------- /docs/assets/screenshot.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argoproj-labs/old-argo-dataflow/b842e3b9d4f0816dd1f29d1b87faea78aa1d3c18/docs/assets/screenshot.png -------------------------------------------------------------------------------- /dsls/python/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | dist -------------------------------------------------------------------------------- /dsls/python/MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | README 3 | __init__.py 4 | pipeline.py 5 | setup.py 6 | -------------------------------------------------------------------------------- /dsls/python/Makefile: -------------------------------------------------------------------------------- 1 | tag=$(shell git describe --tags --abbrev=0) 2 | 3 | build: 4 | sed -i "" "s/version='.*'/version='$(tag)'/" setup.py 5 | 6 | publish: build 7 | rm -Rf dist 8 | python setup.py sdist 9 | twine upload dist/* -------------------------------------------------------------------------------- /dsls/python/README: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/argoproj-labs/old-argo-dataflow/b842e3b9d4f0816dd1f29d1b87faea78aa1d3c18/dsls/python/README -------------------------------------------------------------------------------- /dsls/python/argo_dataflow/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import * 2 | -------------------------------------------------------------------------------- /dsls/python/setup.py: -------------------------------------------------------------------------------- 1 | from distutils.core import setup 2 | 3 | setup( 4 | name='argo_dataflow', 5 | packages=['argo_dataflow'], 6 | install_requires=['kubernetes'], 7 | version='v0.0.62', 8 | license='apache-2.0', 9 | description='Argo Dataflow', 10 | author='Alex Collins', 11 | author_email='alex_collins@intuit.com', 12 | url='https://github.com/argoproj-labs/argo-dataflow', 13 | keywords=['Argo', 'Kubernetes'], 14 | classifiers=[ 15 | 'Development Status :: 3 - Alpha', 16 | 'Intended Audience :: Developers', 17 | 'Topic :: Software Development :: Build Tools', 18 | 'License :: OSI Approved :: Apache Software License', 19 | 'Programming Language :: Python :: 3', 20 | 'Programming Language :: Python :: 3.4', 21 | 'Programming Language :: Python :: 3.5', 22 | 'Programming Language :: Python :: 3.6', 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /examples/101-hello-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import cron, pipeline 2 | 3 | if __name__ == '__main__': 4 | (pipeline("101-hello") 5 | .owner('argoproj-labs') 6 | .namespace('argo-dataflow-system') 7 | .describe("""This is the hello world of pipelines. 8 | 9 | It uses a cron schedule as a source and then just cat the message to a log""") 10 | .annotate('dataflow.argoproj.io/test', "true") 11 | .step( 12 | (cron('*/3 * * * * *') 13 | .cat() 14 | .log()) 15 | ) 16 | .save()) 17 | -------------------------------------------------------------------------------- /examples/101-hello-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This is the hello world of pipelines. 7 | 8 | It uses a cron schedule as a source and then just cat the message to a log 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | dataflow.argoproj.io/test: 'true' 11 | name: 101-hello 12 | namespace: argo-dataflow-system 13 | spec: 14 | steps: 15 | - cat: {} 16 | name: main 17 | sinks: 18 | - log: {} 19 | sources: 20 | - cron: 21 | schedule: '*/3 * * * * *' 22 | -------------------------------------------------------------------------------- /examples/101-two-node-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import kafka, pipeline, stan 2 | 3 | if __name__ == '__main__': 4 | (pipeline("101-two-node") 5 | .owner('argoproj-labs') 6 | .describe("""This example shows an example of having two nodes in a pipeline. 7 | 8 | While they read from Kafka, they are connected by a NATS Streaming subject.""") 9 | .step( 10 | (kafka('input-topic') 11 | .cat('a') 12 | .stan('a-b')) 13 | ) 14 | .step( 15 | (stan('a-b') 16 | .cat('b') 17 | .kafka('output-topic')) 18 | ) 19 | .save()) 20 | -------------------------------------------------------------------------------- /examples/101-two-node-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example shows an example of having two nodes in a pipeline. 7 | 8 | While they read from Kafka, they are connected by a NATS Streaming subject. 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | name: 101-two-node 11 | spec: 12 | steps: 13 | - cat: {} 14 | name: a 15 | sinks: 16 | - stan: 17 | subject: a-b 18 | sources: 19 | - kafka: 20 | topic: input-topic 21 | - cat: {} 22 | name: b 23 | sinks: 24 | - kafka: 25 | topic: output-topic 26 | sources: 27 | - stan: 28 | subject: a-b 29 | -------------------------------------------------------------------------------- /examples/102-dedupe-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("102-dedupe") 5 | .owner('argoproj-labs') 6 | .describe("""This is an example of built-in de-duplication step.""") 7 | .step( 8 | (kafka('input-topic') 9 | .dedupe() 10 | .kafka('output-topic')) 11 | ) 12 | .save()) 13 | -------------------------------------------------------------------------------- /examples/102-dedupe-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: This is an example of built-in de-duplication 6 | step. 7 | dataflow.argoproj.io/owner: argoproj-labs 8 | name: 102-dedupe 9 | spec: 10 | steps: 11 | - dedupe: {} 12 | name: main 13 | sinks: 14 | - kafka: 15 | topic: output-topic 16 | sources: 17 | - kafka: 18 | topic: input-topic 19 | -------------------------------------------------------------------------------- /examples/102-filter-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import kafka, pipeline 2 | 3 | if __name__ == '__main__': 4 | (pipeline("102-filter") 5 | .owner('argoproj-labs') 6 | .describe("""This is an example of built-in filtering. 7 | 8 | Filters are written using expression syntax and must return a boolean. 9 | 10 | They have a single variable, `msg`, which is a byte array. 11 | 12 | [Learn about expressions](../docs/EXPRESSIONS.md)""") 13 | .step( 14 | kafka('input-topic') 15 | .filter(expression='string(msg) contains "-"') 16 | .kafka('output-topic') 17 | ) 18 | .save()) 19 | -------------------------------------------------------------------------------- /examples/102-filter-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This is an example of built-in filtering. 7 | 8 | Filters are written using expression syntax and must return a boolean. 9 | 10 | They have a single variable, `msg`, which is a byte array. 11 | 12 | [Learn about expressions](../docs/EXPRESSIONS.md) 13 | dataflow.argoproj.io/owner: argoproj-labs 14 | name: 102-filter 15 | spec: 16 | steps: 17 | - filter: 18 | expression: |- 19 | string(msg) contains "-" 20 | name: main 21 | sinks: 22 | - kafka: 23 | topic: output-topic 24 | sources: 25 | - kafka: 26 | topic: input-topic 27 | -------------------------------------------------------------------------------- /examples/102-flatten-expand-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import cron, pipeline, stan 2 | 3 | if __name__ == '__main__': 4 | (pipeline("102-flatten-expand") 5 | .owner('argoproj-labs') 6 | .describe("""This is an example of built-in flattening and expanding.""") 7 | .step( 8 | cron('*/3 * * * * *') 9 | .map('generate', """bytes('{"foo": {"bar": "' + string(msg) + '"}}')""") 10 | .stan('data')) 11 | .step( 12 | stan('data') 13 | .flatten('flatten') 14 | .stan('flattened') 15 | ) 16 | .step( 17 | stan('flattened') 18 | .expand('expand') 19 | .log() 20 | ) 21 | .save()) 22 | -------------------------------------------------------------------------------- /examples/102-flatten-expand-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: This is an example of built-in flattening and 6 | expanding. 7 | dataflow.argoproj.io/owner: argoproj-labs 8 | name: 102-flatten-expand 9 | spec: 10 | steps: 11 | - map: 12 | expression: |- 13 | bytes('{"foo": {"bar": "' + string(msg) + '"}}') 14 | name: generate 15 | sinks: 16 | - stan: 17 | subject: data 18 | sources: 19 | - cron: 20 | schedule: '*/3 * * * * *' 21 | - flatten: {} 22 | name: flatten 23 | sinks: 24 | - stan: 25 | subject: flattened 26 | sources: 27 | - stan: 28 | subject: data 29 | - expand: {} 30 | name: expand 31 | sinks: 32 | - log: {} 33 | sources: 34 | - stan: 35 | subject: flattened 36 | -------------------------------------------------------------------------------- /examples/102-map-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("102-map") 5 | .owner('argoproj-labs') 6 | .describe("""This is an example of built-in mapping. 7 | 8 | Maps are written using expression syntax and must return a byte array. 9 | 10 | They have a single variable, `msg`, which is a byte array. 11 | 12 | [Learn about expressions](../docs/EXPRESSIONS.md)""") 13 | .step( 14 | (kafka('input-topic') 15 | .map(expression="bytes('hi ' + string(msg))") 16 | .kafka('output-topic')) 17 | ) 18 | .save()) 19 | -------------------------------------------------------------------------------- /examples/102-map-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This is an example of built-in mapping. 7 | 8 | Maps are written using expression syntax and must return a byte array. 9 | 10 | They have a single variable, `msg`, which is a byte array. 11 | 12 | [Learn about expressions](../docs/EXPRESSIONS.md) 13 | dataflow.argoproj.io/owner: argoproj-labs 14 | name: 102-map 15 | spec: 16 | steps: 17 | - map: 18 | expression: |- 19 | bytes('hi ' + string(msg)) 20 | name: main 21 | sinks: 22 | - kafka: 23 | topic: output-topic 24 | sources: 25 | - kafka: 26 | topic: input-topic 27 | -------------------------------------------------------------------------------- /examples/103-scaling-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("103-scaling") 5 | .owner('argoproj-labs') 6 | .describe("""This is an example of having multiple replicas for a single step. 7 | 8 | Steps can be manually scaled using `kubectl`: 9 | 10 | ``` 11 | kubectl scale step/scaling-main --replicas 3 12 | ```""") 13 | .step( 14 | (kafka('input-topic') 15 | .cat() 16 | .kafka('output-topic')) 17 | ) 18 | .save()) 19 | -------------------------------------------------------------------------------- /examples/103-scaling-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This is an example of having multiple replicas for a single step. 7 | 8 | Steps can be manually scaled using `kubectl`: 9 | 10 | ``` 11 | kubectl scale step/scaling-main --replicas 3 12 | ``` 13 | dataflow.argoproj.io/owner: argoproj-labs 14 | name: 103-scaling 15 | spec: 16 | steps: 17 | - cat: {} 18 | name: main 19 | sinks: 20 | - kafka: 21 | topic: output-topic 22 | sources: 23 | - kafka: 24 | topic: input-topic 25 | -------------------------------------------------------------------------------- /examples/104-golang1-17-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | 4 | def handler(msg): 5 | return msg 6 | 7 | 8 | if __name__ == '__main__': 9 | (pipeline("104-golang1-17") 10 | .owner('argoproj-labs') 11 | .describe("""This example of Go 1.17 handler. 12 | 13 | [Learn about handlers](../docs/HANDLERS.md)""") 14 | .step( 15 | (kafka('input-topic') 16 | .code(code="""package main 17 | 18 | import "context" 19 | 20 | func Handler(ctx context.Context, m []byte) ([]byte, error) { 21 | return []byte("hi " + string(m)), nil 22 | }""", runtime='golang1-17') 23 | .log() 24 | )) 25 | .save()) 26 | -------------------------------------------------------------------------------- /examples/104-golang1-17-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example of Go 1.17 handler. 7 | 8 | [Learn about handlers](../docs/HANDLERS.md) 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | name: 104-golang1-17 11 | spec: 12 | steps: 13 | - code: 14 | runtime: golang1-17 15 | source: |- 16 | package main 17 | 18 | import "context" 19 | 20 | func Handler(ctx context.Context, m []byte) ([]byte, error) { 21 | return []byte("hi " + string(m)), nil 22 | } 23 | name: main 24 | sinks: 25 | - log: {} 26 | sources: 27 | - kafka: 28 | topic: input-topic 29 | -------------------------------------------------------------------------------- /examples/104-java16-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | 4 | def handler(msg): 5 | return msg 6 | 7 | 8 | if __name__ == '__main__': 9 | (pipeline("104-java16") 10 | .owner('argoproj-labs') 11 | .describe("""This example is of the Java 16 handler. 12 | 13 | [Learn about handlers](../docs/HANDLERS.md)""") 14 | .step( 15 | (kafka('input-topic') 16 | .code(code="""import java.util.Map; 17 | 18 | public class Handler { 19 | public static byte[] Handle(byte[] msg, Map context) throws Exception { 20 | return msg; 21 | } 22 | }""", runtime='java16') 23 | .kafka('output-topic') 24 | )) 25 | .save()) 26 | -------------------------------------------------------------------------------- /examples/104-java16-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example is of the Java 16 handler. 7 | 8 | [Learn about handlers](../docs/HANDLERS.md) 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | name: 104-java16 11 | spec: 12 | steps: 13 | - code: 14 | runtime: java16 15 | source: |- 16 | import java.util.Map; 17 | 18 | public class Handler { 19 | public static byte[] Handle(byte[] msg, Map context) throws Exception { 20 | return msg; 21 | } 22 | } 23 | name: main 24 | sinks: 25 | - kafka: 26 | topic: output-topic 27 | sources: 28 | - kafka: 29 | topic: input-topic 30 | -------------------------------------------------------------------------------- /examples/104-node16-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | 4 | def handler(msg, context): 5 | return ("hi! " + msg.decode("UTF-8")).encode("UTF-8") 6 | 7 | 8 | if __name__ == '__main__': 9 | (pipeline("104-node16") 10 | .owner('argoproj-labs') 11 | .describe("""This example is of the NodeJS 16 handler. 12 | 13 | [Learn about handlers](../docs/HANDLERS.md)""") 14 | .step( 15 | (kafka('input-topic') 16 | .code(code="""module.exports = async function (messageBuf, context) { 17 | const msg = messageBuf.toString('utf8') 18 | return Buffer.from('hi ' + msg) 19 | }""", runtime='node16') 20 | .kafka('output-topic') 21 | )) 22 | .save()) 23 | -------------------------------------------------------------------------------- /examples/104-node16-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example is of the NodeJS 16 handler. 7 | 8 | [Learn about handlers](../docs/HANDLERS.md) 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | name: 104-node16 11 | spec: 12 | steps: 13 | - code: 14 | runtime: node16 15 | source: |- 16 | module.exports = async function (messageBuf, context) { 17 | const msg = messageBuf.toString('utf8') 18 | return Buffer.from('hi ' + msg) 19 | } 20 | name: main 21 | sinks: 22 | - kafka: 23 | topic: output-topic 24 | sources: 25 | - kafka: 26 | topic: input-topic 27 | -------------------------------------------------------------------------------- /examples/104-python3-9-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | 4 | def a(msg, context): 5 | return ("hi! " + msg.decode("UTF-8")).encode("UTF-8") 6 | 7 | 8 | def b(msg, context): 9 | return ("bye! " + msg.decode("UTF-8")).encode("UTF-8") 10 | 11 | 12 | if __name__ == '__main__': 13 | (pipeline("104-python3-9") 14 | .owner('argoproj-labs') 15 | .describe("""This example is of the Python 3.9 handler. 16 | 17 | [Learn about handlers](../docs/HANDLERS.md)""") 18 | .step( 19 | (kafka('input-topic') 20 | .code('a', source=a) 21 | .kafka('middle-topic')) 22 | ) 23 | .step( 24 | (kafka('middle-topic') 25 | .code('b', source=b) 26 | .kafka('output-topic')) 27 | ) 28 | .save()) 29 | -------------------------------------------------------------------------------- /examples/104-python3-9-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example is of the Python 3.9 handler. 7 | 8 | [Learn about handlers](../docs/HANDLERS.md) 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | name: 104-python3-9 11 | spec: 12 | steps: 13 | - code: 14 | runtime: python3-9 15 | source: | 16 | def handler(msg, context): 17 | return ("hi! " + msg.decode("UTF-8")).encode("UTF-8") 18 | name: a 19 | sinks: 20 | - kafka: 21 | topic: middle-topic 22 | sources: 23 | - kafka: 24 | topic: input-topic 25 | - code: 26 | runtime: python3-9 27 | source: | 28 | def handler(msg, context): 29 | return ("bye! " + msg.decode("UTF-8")).encode("UTF-8") 30 | name: b 31 | sinks: 32 | - kafka: 33 | topic: output-topic 34 | sources: 35 | - kafka: 36 | topic: middle-topic 37 | -------------------------------------------------------------------------------- /examples/106-git-go-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("106-git-go") 5 | .owner('argoproj-labs') 6 | .describe("""This example of a pipeline using Git. 7 | 8 | The Git handler allows you to check your application source code into Git. Dataflow will checkout and build 9 | your code when the step starts. This example presents how one can use go runtime git step. 10 | 11 | [Link to directory that is be cloned with git](../examples/git/) 12 | 13 | [Learn about Git steps](../docs/GIT.md)""") 14 | .step( 15 | (kafka('input-topic') 16 | .git('main', 'https://github.com/argoproj-labs/argo-dataflow', 'main', 'examples/git', 17 | 'quay.io/argoprojlabs/dataflow-golang1-17', 18 | env=[{'name': "GOCACHE", 'value': "/tmp/.gocache"}], command=["/dumb-init", "--", "go", "run", "."]) 19 | .kafka('output-topic') 20 | )) 21 | .save()) 22 | -------------------------------------------------------------------------------- /examples/106-git-nodejs-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("106-git-nodejs") 5 | .owner('argoproj-labs') 6 | .describe("""This example of a pipeline using Git with NodeJS. 7 | 8 | The Git handler allows you to check your application source code into Git. Dataflow will checkout and build 9 | your code when the step starts. This example presents how one can use nodejs runtime git step. 10 | 11 | [Link to directory that is be cloned with git](../examples/git-nodejs/) 12 | 13 | [Learn about Git steps](../docs/GIT.md)""") 14 | .step( 15 | (kafka('input-topic') 16 | .git('main', 'https://github.com/argoproj-labs/argo-dataflow', 'main', 'examples/git-nodejs', 17 | 'quay.io/argoprojlabs/dataflow-node16', command=["/dumb-init", "--", "./start.sh"]) 18 | .kafka('output-topic') 19 | )) 20 | .save()) 21 | -------------------------------------------------------------------------------- /examples/106-git-python-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("106-git-python") 5 | .owner('argoproj-labs') 6 | .describe("""This example of a pipeline using Git. 7 | 8 | The Git handler allows you to check your application source code into Git. Dataflow will checkout and build 9 | your code when the step starts. This example presents how one can use python runtime git step. 10 | 11 | [Link to directory that is be cloned with git](../examples/git-python/) 12 | 13 | [Learn about Git steps](../docs/GIT.md)""") 14 | .step( 15 | (kafka('input-topic') 16 | .git('main', 'https://github.com/argoproj-labs/argo-dataflow', 'main', 'examples/git-python', 17 | 'quay.io/argoprojlabs/dataflow-python3-9', 18 | command=["/dumb-init", "--", "./start.sh"]) 19 | .kafka('output-topic') 20 | )) 21 | .save()) 22 | -------------------------------------------------------------------------------- /examples/107-completion-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, container 2 | 3 | if __name__ == '__main__': 4 | (pipeline("107-completion") 5 | .owner('argoproj-labs') 6 | .describe("""This example shows a pipeline running to completion. 7 | 8 | A pipeline that run to completion (aka "terminating") is one that will finish. 9 | 10 | For a pipeline to terminate one of two things must happen: 11 | 12 | * Every steps exits successfully (i.e. with exit code 0). 13 | * One step exits successfully, and is marked with `terminator: true`. When this happens, all other steps are killed.""") 14 | .annotate('dataflow.argoproj.io/wait-for', 'Completed') 15 | .step( 16 | (container('main', 17 | args=['sh', '-c', 'exit 0'], 18 | image='golang:1.17' 19 | ) 20 | )) 21 | .save()) 22 | -------------------------------------------------------------------------------- /examples/107-completion-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example shows a pipeline running to completion. 7 | 8 | A pipeline that run to completion (aka "terminating") is one that will finish. 9 | 10 | For a pipeline to terminate one of two things must happen: 11 | 12 | * Every steps exits successfully (i.e. with exit code 0). 13 | * One step exits successfully, and is marked with `terminator: true`. When this happens, all other steps are killed. 14 | dataflow.argoproj.io/owner: argoproj-labs 15 | dataflow.argoproj.io/wait-for: Completed 16 | name: 107-completion 17 | spec: 18 | steps: 19 | - container: 20 | args: 21 | - sh 22 | - -c 23 | - exit 0 24 | image: golang:1.17 25 | name: main 26 | -------------------------------------------------------------------------------- /examples/107-terminator-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, container 2 | 3 | if __name__ == '__main__': 4 | (pipeline("107-terminator") 5 | .owner('argoproj-labs') 6 | .describe("""This example demonstrates having a terminator step, and then terminating other steps 7 | using different terminations strategies.""") 8 | .annotate('dataflow.argoproj.io/wait-for', 'Completed') 9 | .step( 10 | (container('main', 11 | args=['sh', '-c', 'cat'], 12 | image='golang:1.17' 13 | ) 14 | )) 15 | .step( 16 | (container('terminator', 17 | args=['sh', '-c', 'exit 0'], 18 | image='golang:1.17', 19 | terminator=True 20 | ) 21 | )) 22 | .save()) 23 | -------------------------------------------------------------------------------- /examples/107-terminator-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: |- 6 | This example demonstrates having a terminator step, and then terminating other steps 7 | using different terminations strategies. 8 | dataflow.argoproj.io/owner: argoproj-labs 9 | dataflow.argoproj.io/wait-for: Completed 10 | name: 107-terminator 11 | spec: 12 | steps: 13 | - container: 14 | args: 15 | - sh 16 | - -c 17 | - cat 18 | image: golang:1.17 19 | name: main 20 | - container: 21 | args: 22 | - sh 23 | - -c 24 | - exit 0 25 | image: golang:1.17 26 | name: terminator 27 | terminator: true 28 | -------------------------------------------------------------------------------- /examples/108-container-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, container 2 | 3 | if __name__ == '__main__': 4 | (pipeline("108-container") 5 | .owner('argoproj-labs') 6 | .describe("""This example showcases container options.""") 7 | .annotate('dataflow.argoproj.io/wait-for', 'Completed') 8 | .step( 9 | (container('main', 10 | args=['sh', '-c', 'exit 0'], 11 | image='golang:1.17', 12 | env={'FOO': 'bar'}, 13 | resources={'requests': {'cpu': 1}} 14 | ) 15 | .annotations({'my-annotation': 'my-value'}) 16 | )) 17 | .save()) 18 | -------------------------------------------------------------------------------- /examples/108-container-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: This example showcases container options. 6 | dataflow.argoproj.io/owner: argoproj-labs 7 | dataflow.argoproj.io/wait-for: Completed 8 | name: 108-container 9 | spec: 10 | steps: 11 | - container: 12 | args: 13 | - sh 14 | - -c 15 | - exit 0 16 | env: 17 | - name: FOO 18 | value: bar 19 | image: golang:1.17 20 | resources: 21 | requests: 22 | cpu: 1 23 | metadata: 24 | annotations: 25 | my-annotation: my-value 26 | name: main 27 | -------------------------------------------------------------------------------- /examples/108-fifos-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("108-fifos") 5 | .owner('argoproj-labs') 6 | .describe("""This example use named pipe to send and receive messages. 7 | 8 | Two named pipes are made available: 9 | 10 | * The container can read lines from `/var/run/argo-dataflow/in`. Each line will be a single message. 11 | * The contain can write to `/var/run/argo-dataflow/out`. Each line MUST be a single message. 12 | 13 | You MUST escape new lines.""") 14 | .step( 15 | (kafka('input-topic') 16 | .container('main', 17 | args=['sh', '-c', """cat /var/run/argo-dataflow/in | while read line ; do 18 | echo "hi $line" 19 | done > /var/run/argo-dataflow/out"""], 20 | image='golang:1.17', 21 | fifo=True 22 | ) 23 | .kafka('output-topic') 24 | )) 25 | .save()) 26 | -------------------------------------------------------------------------------- /examples/301-cron-log-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, cron 2 | 3 | if __name__ == '__main__': 4 | (pipeline("301-cron-log") 5 | .owner('argoproj-labs') 6 | .describe("""This example uses a cron source and a log sink. 7 | 8 | ## Cron 9 | 10 | You can format dates using a "layout": 11 | 12 | https://golang.org/pkg/time/#Time.Format 13 | 14 | By default, the layout is RFC3339. 15 | 16 | * Cron sources are **unreliable**. Messages will not be sent when a pod is not running, which can happen at any time in Kubernetes. 17 | * Cron sources must not be scaled to zero. They will stop working. 18 | 19 | ## Log 20 | 21 | This logs the message. 22 | 23 | * Log sinks are totally reliable. 24 | """) 25 | .step( 26 | (cron('*/3 * * * * *', layout='15:04:05') 27 | .cat() 28 | .log()) 29 | ).save()) 30 | -------------------------------------------------------------------------------- /examples/301-cron-log-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This example uses a cron source and a log sink. 7 | 8 | ## Cron 9 | 10 | You can format dates using a "layout": 11 | 12 | https://golang.org/pkg/time/#Time.Format 13 | 14 | By default, the layout is RFC3339. 15 | 16 | * Cron sources are **unreliable**. Messages will not be sent when a pod is not running, which can happen at any time in Kubernetes. 17 | * Cron sources must not be scaled to zero. They will stop working. 18 | 19 | ## Log 20 | 21 | This logs the message. 22 | 23 | * Log sinks are totally reliable. 24 | dataflow.argoproj.io/owner: argoproj-labs 25 | name: 301-cron-log 26 | spec: 27 | steps: 28 | - cat: {} 29 | name: main 30 | sinks: 31 | - log: {} 32 | sources: 33 | - cron: 34 | layout: '15:04:05' 35 | schedule: '*/3 * * * * *' 36 | -------------------------------------------------------------------------------- /examples/301-erroring-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, cron 2 | 3 | 4 | def handler(msg, context): 5 | import random 6 | if random.randint(0, 4) == 1: 7 | raise Exception("random error") 8 | return msg 9 | 10 | 11 | if __name__ == '__main__': 12 | (pipeline("301-erroring") 13 | .owner('argoproj-labs') 14 | .describe("""This example showcases retry policies.""") 15 | .annotate('dataflow.argoproj.io/test', 'false') 16 | .step( 17 | (cron('*/3 * * * * *', retry={'steps': 99999999}) 18 | .code('always', source=handler) 19 | .log()) 20 | ) 21 | .step( 22 | (cron('*/3 * * * * *', retry={'steps': 0}) 23 | .code('never', source=handler) 24 | .log()) 25 | ).save()) 26 | -------------------------------------------------------------------------------- /examples/301-jetstream-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, jetstream 2 | 3 | if __name__ == '__main__': 4 | (pipeline("301-jetstream") 5 | .owner('argoproj-labs') 6 | .describe("""This example shows reading and writing to a JetStream subject. 7 | 8 | * Adding replicas will nearly linearly increase throughput. 9 | """) 10 | .annotate('dataflow.argoproj.io/test', 'false') 11 | .step( 12 | (jetstream('input-subject') 13 | .cat() 14 | .jetstream('output-subject') 15 | )) 16 | .save()) 17 | -------------------------------------------------------------------------------- /examples/301-jetstream-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: "This example shows reading and writing to a\ 6 | \ JetStream subject.\n\n* Adding replicas will nearly linearly increase throughput.\ 7 | \ \n" 8 | dataflow.argoproj.io/owner: argoproj-labs 9 | dataflow.argoproj.io/test: 'false' 10 | name: 301-jetstream 11 | spec: 12 | steps: 13 | - cat: {} 14 | name: main 15 | sinks: 16 | - jetstream: 17 | subject: output-subject 18 | sources: 19 | - jetstream: 20 | subject: input-subject 21 | -------------------------------------------------------------------------------- /examples/301-kafka-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, kafka 2 | 3 | if __name__ == '__main__': 4 | (pipeline("301-kafka") 5 | .owner('argoproj-labs') 6 | .describe("""This example shows reading and writing to a Kafka topic 7 | 8 | * Kafka topics are typically partitioned. Dataflow will process each partition simultaneously. 9 | * Adding replicas will nearly linearly increase throughput. 10 | * If you scale beyond the number of partitions, those additional replicas will be idle. 11 | """) 12 | .annotate("dataflow.argoproj.io/test", "true") 13 | .step( 14 | (kafka('input-topic', groupId='my-group') 15 | .cat() 16 | .kafka('output-topic', a_sync=True) 17 | )) 18 | .save()) 19 | -------------------------------------------------------------------------------- /examples/301-kafka-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: "This example shows reading and writing to a\ 6 | \ Kafka topic\n \n* Kafka topics are typically partitioned. Dataflow will\ 7 | \ process each partition simultaneously.\n* Adding replicas will nearly linearly\ 8 | \ increase throughput.\n* If you scale beyond the number of partitions, those\ 9 | \ additional replicas will be idle.\n " 10 | dataflow.argoproj.io/owner: argoproj-labs 11 | dataflow.argoproj.io/test: 'true' 12 | name: 301-kafka 13 | spec: 14 | steps: 15 | - cat: {} 16 | name: main 17 | sinks: 18 | - kafka: 19 | async: true 20 | topic: output-topic 21 | sources: 22 | - kafka: 23 | groupId: my-group 24 | topic: input-topic 25 | -------------------------------------------------------------------------------- /examples/301-stan-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, stan 2 | 3 | if __name__ == '__main__': 4 | (pipeline("301-stan") 5 | .owner('argoproj-labs') 6 | .describe("""This example shows reading and writing to a STAN subject. 7 | 8 | * Adding replicas will nearly linearly increase throughput. 9 | """) 10 | .annotate('dataflow.argoproj.io/test', 'false') 11 | .step( 12 | (stan('input-subject') 13 | .cat() 14 | .stan('output-subject') 15 | )) 16 | .save()) 17 | -------------------------------------------------------------------------------- /examples/301-stan-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: "This example shows reading and writing to a\ 6 | \ STAN subject.\n\n* Adding replicas will nearly linearly increase throughput.\ 7 | \ \n" 8 | dataflow.argoproj.io/owner: argoproj-labs 9 | dataflow.argoproj.io/test: 'false' 10 | name: 301-stan 11 | spec: 12 | steps: 13 | - cat: {} 14 | name: main 15 | sinks: 16 | - stan: 17 | subject: output-subject 18 | sources: 19 | - stan: 20 | subject: input-subject 21 | -------------------------------------------------------------------------------- /examples/301-two-sinks-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, CatStep, KafkaSource, LogSink 2 | 3 | if __name__ == '__main__': 4 | (pipeline("301-two-sinks") 5 | .owner('argoproj-labs') 6 | .describe("""This example has two sinks. 7 | 8 | * When using two sinks, you should put the most reliable first in the list, if the message cannot be delivered, then subsequent sinks will not get the message. 9 | """) 10 | .step( 11 | CatStep( 12 | sources=[KafkaSource('input-topic')], 13 | sinks=[LogSink('a'), LogSink('b')] 14 | ) 15 | ) 16 | .save()) 17 | -------------------------------------------------------------------------------- /examples/301-two-sinks-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This example has two sinks. 7 | 8 | * When using two sinks, you should put the most reliable first in the list, if the message cannot be delivered, then subsequent sinks will not get the message. 9 | dataflow.argoproj.io/owner: argoproj-labs 10 | name: 301-two-sinks 11 | spec: 12 | steps: 13 | - cat: {} 14 | name: main 15 | sinks: 16 | - log: {} 17 | name: a 18 | - log: {} 19 | name: b 20 | sources: 21 | - kafka: 22 | topic: input-topic 23 | -------------------------------------------------------------------------------- /examples/301-two-source-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import pipeline, CatStep, KafkaSource, CronSource, LogSink 2 | 3 | if __name__ == '__main__': 4 | (pipeline("301-two-sources") 5 | .owner('argoproj-labs') 6 | .describe("""This example has two sources 7 | """) 8 | .step( 9 | CatStep( 10 | sources=[KafkaSource( 11 | 'input-topic'), CronSource(schedule='*/3 * * * * *', layout="15:04:05")], 12 | sinks=[LogSink()] 13 | ) 14 | ) 15 | .save()) 16 | -------------------------------------------------------------------------------- /examples/301-two-sources-pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: dataflow.argoproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This example has two sources 7 | dataflow.argoproj.io/owner: argoproj-labs 8 | name: 301-two-sources 9 | spec: 10 | steps: 11 | - cat: {} 12 | name: main 13 | sinks: 14 | - log: {} 15 | sources: 16 | - kafka: 17 | topic: input-topic 18 | - cron: 19 | layout: '15:04:05' 20 | schedule: '*/3 * * * * *' 21 | -------------------------------------------------------------------------------- /examples/dataflow-103-http-main-source-default-secret.yaml: -------------------------------------------------------------------------------- 1 | kind: Secret 2 | apiVersion: v1 3 | metadata: 4 | name: dataflow-http-main-source-default 5 | annotations: 6 | dataflow.argoproj.io/description: | 7 | This is an example of providing a bearer token for a HTTP source. 8 | stringData: 9 | authorization: Bearer my-bearer-token -------------------------------------------------------------------------------- /examples/dataflow-jetstream-default-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This is an example of providing a namespace named NATS Streaming configuration. 7 | 8 | The secret must be named `dataflow-jetstream-${name}`. 9 | 10 | [Learn about configuration](../docs/CONFIGURATION.md) 11 | name: dataflow-jetstream-default 12 | stringData: 13 | natsUrl: nats-js 14 | authToken: testingtokentestingtoken 15 | -------------------------------------------------------------------------------- /examples/dataflow-kafka-default-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This is an example of providing a namespace named Kafka configuration. 7 | 8 | The secret must be named `dataflow-kafka-${name}`. 9 | 10 | # Brokers as a comma-separated list 11 | brokers: broker.a,broker.b 12 | # Kafka version 13 | version: "2.0.0" 14 | 15 | # Enable TLS 16 | net.tls.caCert: "" 17 | net.tls.cert: "" 18 | net.tls.key: "" 19 | 20 | # Enable SASL 21 | net.sasl.mechanism: PLAIN 22 | net.sasl.user: "" 23 | net.sasl.password: "" 24 | 25 | [Learn about configuration](../docs/CONFIGURATION.md) 26 | name: dataflow-kafka-default 27 | stringData: 28 | brokers: kafka-broker:9092 29 | -------------------------------------------------------------------------------- /examples/dataflow-s3-default-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This is an example of providing a namespace named S3 configuration. 7 | 8 | The secret must be named `dataflow-s3-${name}`. 9 | [Learn about configuration](../docs/CONFIGURATION.md) 10 | name: dataflow-s3-default 11 | stringData: 12 | region: us-west-2 13 | endpoint.url: http://moto:5000 14 | credentials.accessKeyId.name: dataflow-s3-default 15 | credentials.accessKeyId.key: accessKeyId 16 | credentials.secretAccessKey.name: dataflow-s3-default 17 | credentials.secretAccessKey.key: secretAccessKey 18 | credentials.sessionToken.name: dataflow-s3-default 19 | credentials.sessionToken.key: sessionToken 20 | accessKeyId: admin 21 | secretAccessKey: password 22 | sessionToken: "" -------------------------------------------------------------------------------- /examples/dataflow-stan-default-secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | annotations: 5 | dataflow.argoproj.io/description: | 6 | This is an example of providing a namespace named NATS Streaming configuration. 7 | 8 | The secret must be named `dataflow-stan-${name}`. 9 | 10 | [Learn about configuration](../docs/CONFIGURATION.md) 11 | name: dataflow-stan-default 12 | stringData: 13 | clusterId: stan 14 | natsUrl: nats 15 | natsMonitoringUrl: http://stan:8222 16 | subjectPrefix: NamespacedPipelineName 17 | authToken: testingtokentestingtoken 18 | maxInflight: "15" 19 | -------------------------------------------------------------------------------- /examples/example-hpa.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: autoscaling/v1 2 | kind: HorizontalPodAutoscaler 3 | metadata: 4 | name: example-hpa 5 | spec: 6 | maxReplicas: 2 7 | minReplicas: 1 8 | scaleTargetRef: 9 | apiVersion: dataflow.argoproj.io/v1alpha1 10 | kind: Step 11 | name: replicas-main 12 | -------------------------------------------------------------------------------- /examples/git-nodejs/handler.js: -------------------------------------------------------------------------------- 1 | module.exports = async function (messageBuf, context) { 2 | const msg = messageBuf.toString('utf8') 3 | console.log('Got message', msg) 4 | return Buffer.from('hi ' + msg) 5 | } -------------------------------------------------------------------------------- /examples/git-nodejs/index.js: -------------------------------------------------------------------------------- 1 | const ProcessHandler = require('argo-dataflow-sdk') 2 | 3 | const handler = require('./handler') 4 | 5 | async function main() { 6 | ProcessHandler.start(handler) 7 | } 8 | 9 | main() 10 | -------------------------------------------------------------------------------- /examples/git-nodejs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "git-nodejs", 3 | "version": "0.0.0", 4 | "description": "An example project showing how to use Argo-Dataflow-Sdk to run a handler.", 5 | "main": "index.js", 6 | "author": "Dom Deren (@domderen)", 7 | "license": "Apache-2.0", 8 | "dependencies": { 9 | "argo-dataflow-sdk": "https://gitpkg.now.sh/argoproj-labs/argo-dataflow/sdks/nodejs?main" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /examples/git-nodejs/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "Installing dependencies" 4 | mkdir -p ./cache 5 | npm install --cache ./cache 6 | echo "Running handler" 7 | node index.js 8 | -------------------------------------------------------------------------------- /examples/git-python-generator-step/handler.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | 3 | 4 | async def handler(): 5 | i = 0 6 | while True: 7 | print('running generator fn', i) 8 | yield f'Some Value {i}'.encode('UTF-8') 9 | i = i + 1 10 | await asyncio.sleep(1) 11 | -------------------------------------------------------------------------------- /examples/git-python-generator-step/main.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | 3 | from handler import handler 4 | 5 | if __name__ == '__main__': 6 | processHandler = ProcessHandler() 7 | processHandler.start_generator(handler) 8 | -------------------------------------------------------------------------------- /examples/git-python-generator-step/requirements.txt: -------------------------------------------------------------------------------- 1 | # Argo_Dataflow_sdk python package is built into the runtime image, so there is no need to download it again. -------------------------------------------------------------------------------- /examples/git-python-generator-step/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "Installing dependencies in virtual environment" 4 | pip3 --no-cache-dir install -r requirements.txt 5 | echo "Running handler" 6 | python3 main.py -------------------------------------------------------------------------------- /examples/git-python/handler.py: -------------------------------------------------------------------------------- 1 | def handler(message, context): 2 | msg = message.decode("UTF-8") 3 | print('Got message', msg) 4 | return ("hi " + msg).encode('UTF-8') 5 | -------------------------------------------------------------------------------- /examples/git-python/main.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | 3 | from handler import handler 4 | 5 | if __name__ == '__main__': 6 | processHandler = ProcessHandler() 7 | processHandler.start(handler) 8 | -------------------------------------------------------------------------------- /examples/git-python/requirements.txt: -------------------------------------------------------------------------------- 1 | # Argo_Dataflow_sdk python package is built into the runtime image, so there is no need to download it again. -------------------------------------------------------------------------------- /examples/git-python/start.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo "Installing dependencies in virtual environment" 4 | pip3 --no-cache-dir install -r requirements.txt 5 | echo "Running handler" 6 | python3 main.py -------------------------------------------------------------------------------- /examples/git/handler.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "context" 4 | 5 | func Handler(context context.Context, m []byte) ([]byte, error) { 6 | return []byte("hi " + string(m)), nil 7 | } 8 | -------------------------------------------------------------------------------- /examples/git/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "github.com/argoproj-labs/argo-dataflow/sdks/golang" 4 | 5 | func main() { 6 | golang.Start(Handler) 7 | } 8 | -------------------------------------------------------------------------------- /examples/jupyter/.gitignore: -------------------------------------------------------------------------------- 1 | .ipynb_checkpoints -------------------------------------------------------------------------------- /examples/jupyter/Makefile: -------------------------------------------------------------------------------- 1 | run: 2 | pip3 install -r requirements.txt 3 | jupyter notebook -------------------------------------------------------------------------------- /examples/jupyter/requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | git+https://github.com/argoproj-labs/argo-dataflow#subdirectory=dsls/python -------------------------------------------------------------------------------- /examples/kafka-two-step-pipeline.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import kafka, pipeline, stan 2 | 3 | if __name__ == '__main__': 4 | (pipeline("kafka-two-step") 5 | .owner('acollins8') 6 | .step( 7 | (kafka('input-topic') 8 | .cat('a') 9 | .stan('a-b')) 10 | ) 11 | .step( 12 | (stan('a-b') 13 | .cat('b') 14 | .kafka('output-topic')) 15 | ) 16 | .save()) 17 | -------------------------------------------------------------------------------- /examples/pets-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | text: | 4 | {"type": "dog", "name": "Emma"} 5 | {"type": "cat", "name": "Dino"} 6 | EOF 7 | kind: ConfigMap 8 | metadata: 9 | name: pets 10 | -------------------------------------------------------------------------------- /examples/python/Makefile: -------------------------------------------------------------------------------- 1 | test: 2 | pip3 install -r requirements.txt 3 | python3 example.py -------------------------------------------------------------------------------- /examples/python/example.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow import cron, pipeline 2 | 3 | if __name__ == '__main__': 4 | (pipeline('hello') 5 | .namespace('argo-dataflow-system') 6 | .step( 7 | (cron('*/3 * * * * *') 8 | .cat() 9 | .log()) 10 | ) 11 | .run()) 12 | -------------------------------------------------------------------------------- /examples/python/requirements.txt: -------------------------------------------------------------------------------- 1 | 2 | git+https://github.com/argoproj-labs/argo-dataflow#subdirectory=dsls/python -------------------------------------------------------------------------------- /examples/word-count-input-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | data: 3 | text: | 4 | Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et dolore magna aliqua. 5 | Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat. 6 | Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur. 7 | Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum. 8 | EOF 9 | kind: ConfigMap 10 | metadata: 11 | name: word-count-input 12 | -------------------------------------------------------------------------------- /hack/boilerplate.go.txt: -------------------------------------------------------------------------------- 1 | /* 2 | 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ -------------------------------------------------------------------------------- /hack/changelog.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -eu 3 | 4 | echo '# Changelog' 5 | echo 6 | 7 | tag= 8 | # we skip v0.0.0 tags, so these can be used on branches without updating release notes 9 | git tag -l 'v*' | grep -v 0.0.0 | sort -rV | while read last; do 10 | if [ "$tag" != "" ]; then 11 | echo "## $tag ($(git log $tag -n1 --format=%as))" 12 | echo 13 | git_log='git --no-pager log --no-merges --invert-grep --grep=^\(build\|chore\|ci\|docs\|test\):' 14 | $git_log --format=' * [%h](https://github.com/argoproj-labs/argo-dataflow/commit/%H) %s' $last..$tag 15 | echo 16 | echo "### Contributors" 17 | echo 18 | $git_log --format=' * %an' $last..$tag | sort -u 19 | echo 20 | fi 21 | tag=$last 22 | done -------------------------------------------------------------------------------- /kill/kill.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "log" 5 | "os" 6 | "strconv" 7 | "syscall" 8 | ) 9 | 10 | func main() { 11 | pid, err := strconv.Atoi(os.Args[1]) 12 | if err != nil { 13 | panic(err) 14 | } 15 | if err := mainE(pid); err != nil { 16 | panic(err) 17 | } 18 | } 19 | 20 | func mainE(pid int) error { 21 | p, err := os.FindProcess(pid) 22 | if err != nil { 23 | return err 24 | } 25 | log.Printf("signaling pid %d with SIGTERM\n", pid) 26 | if err := p.Signal(syscall.SIGTERM); err != nil { 27 | return err 28 | } 29 | return nil 30 | } 31 | -------------------------------------------------------------------------------- /manager.env: -------------------------------------------------------------------------------- 1 | export ARGO_DATAFLOW_DEBUG=pprof,kafka.generic 2 | export ARGO_DATAFLOW_DELETION_DELAY=5m 3 | export ARGO_DATAFLOW_CLUSTER=default 4 | export ARGO_DATAFLOW_NAMESPACE=argo-dataflow-system 5 | export ARGO_DATAFLOW_PULL_POLICY=IfNotPresent 6 | export ARGO_DATAFLOW_UPDATE_INTERVAL=5s 7 | # export ARGO_DATAFLOW_UNIX_DOMAIN_SOCKET=false 8 | export GODEBUG= 9 | # export JAEGER_DISABLED=false 10 | export JAEGER_ENDPOINT=http://my-jaeger-collector:14268/api/traces 11 | export JAEGER_REPORTER_LOG_SPANS=true 12 | # sample one message per second 13 | export JAEGER_SAMPLER_TYPE=ratelimiting 14 | export JAEGER_SAMPLER_PARAM=0.2 -------------------------------------------------------------------------------- /manager/controllers/scaling/funcs.go: -------------------------------------------------------------------------------- 1 | package scaling 2 | 3 | func minmax(v, min, max int) int { 4 | if v < min { 5 | return min 6 | } else if v > max { 7 | return max 8 | } else { 9 | return v 10 | } 11 | } 12 | 13 | func limit(c int) func(v, min, max, delta int) int { 14 | return func(v, min, max, delta int) int { 15 | return minmax(minmax(v, c-delta, c+delta), min, max) 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /manager/controllers/scaling/funcs_test.go: -------------------------------------------------------------------------------- 1 | package scaling 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_limit(t *testing.T) { 10 | t.Run("MinMax", func(t *testing.T) { 11 | assert.Equal(t, 0, limit(0)(-1, 0, 1, 1)) 12 | assert.Equal(t, 0, limit(0)(0, -1, 1, 1)) 13 | assert.Equal(t, 0, limit(0)(1, -1, 0, 1)) 14 | }) 15 | t.Run("Delta", func(t *testing.T) { 16 | assert.Equal(t, 0, limit(0)(-1, -1, 1, 0)) 17 | assert.Equal(t, 0, limit(0)(0, -1, 1, 0)) 18 | assert.Equal(t, 0, limit(0)(1, -1, 1, 0)) 19 | }) 20 | } 21 | -------------------------------------------------------------------------------- /prestop/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "net/http" 4 | 5 | func main() { 6 | resp, err := http.Get("http://localhost:3569/pre-stop?source=main") 7 | if err != nil { 8 | panic(err) 9 | } 10 | defer func() { _ = resp.Body.Close() }() 11 | if resp.StatusCode >= 300 { 12 | panic(resp.Status) 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /runner/init/errors.go: -------------------------------------------------------------------------------- 1 | package init 2 | 3 | import ( 4 | "github.com/go-git/go-git/v5" 5 | ) 6 | 7 | func IgnoreErrRepositoryAlreadyExists(err error) error { 8 | if err == git.ErrRepositoryAlreadyExists { 9 | return nil 10 | } 11 | return err 12 | } 13 | -------------------------------------------------------------------------------- /runner/sidecar/backoff.go: -------------------------------------------------------------------------------- 1 | package sidecar 2 | 3 | import ( 4 | dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 5 | "k8s.io/apimachinery/pkg/util/wait" 6 | ) 7 | 8 | func newBackoff(backoff dfv1.Backoff) wait.Backoff { 9 | return wait.Backoff{ 10 | Duration: backoff.Duration.Duration, 11 | Factor: float64(backoff.FactorPercentage) / 100, 12 | Jitter: float64(backoff.JitterPercentage) / 100, 13 | Steps: int(backoff.Steps), 14 | Cap: backoff.Cap.Duration, 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /runner/sidecar/sink/interface.go: -------------------------------------------------------------------------------- 1 | package sink 2 | 3 | import "context" 4 | 5 | type Interface interface { 6 | Sink(ctx context.Context, msg []byte) error 7 | } 8 | -------------------------------------------------------------------------------- /runner/sidecar/sink/volume/volume.go: -------------------------------------------------------------------------------- 1 | package volume 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/argoproj-labs/argo-dataflow/runner/sidecar/sink" 8 | "github.com/opentracing/opentracing-go" 9 | ) 10 | 11 | type volumeSink struct { 12 | sinkName string 13 | } 14 | 15 | func New(sinkName string) (sink.Interface, error) { 16 | return volumeSink{sinkName}, nil 17 | } 18 | 19 | func (s volumeSink) Sink(ctx context.Context, msg []byte) error { 20 | span, _ := opentracing.StartSpanFromContext(ctx, fmt.Sprintf("volume-sink-%s", s.sinkName)) 21 | defer span.Finish() 22 | return nil 23 | } 24 | -------------------------------------------------------------------------------- /runner/sidecar/source/kafka/stats.go: -------------------------------------------------------------------------------- 1 | package kafka 2 | 3 | type Stats struct { 4 | Topics map[string]struct { 5 | Partitions map[string]struct { 6 | ConsumerLag int64 `json:"consumer_lag"` 7 | } `json:"partitions"` 8 | } `json:"topics"` 9 | } 10 | 11 | func (s Stats) totalLag(topic string) int64 { 12 | var totalLag int64 13 | for _, p := range s.Topics[topic].Partitions { 14 | totalLag += p.ConsumerLag 15 | } 16 | return totalLag 17 | } 18 | -------------------------------------------------------------------------------- /runner/sidecar/source/source.go: -------------------------------------------------------------------------------- 1 | package source 2 | 3 | import ( 4 | "context" 5 | "errors" 6 | "io" 7 | ) 8 | 9 | type Interface interface { 10 | io.Closer 11 | } 12 | 13 | type Process func(ctx context.Context, msg []byte) error 14 | 15 | var ErrPendingUnavailable = errors.New("pending not available") 16 | 17 | type HasPending interface { 18 | Interface 19 | // GetPending returns the number of pending messages. 20 | // It may return ErrPendingUnavailable if this is not available yet. 21 | GetPending(ctx context.Context) (uint64, error) 22 | } 23 | -------------------------------------------------------------------------------- /runner/util/sha1.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "crypto/sha1" 5 | "encoding/base64" 6 | ) 7 | 8 | func _sha1(data interface{}) string { 9 | switch v := data.(type) { 10 | case []byte: 11 | h := sha1.New() 12 | _, err := h.Write(v) 13 | if err != nil { 14 | panic(err) 15 | } 16 | return base64.StdEncoding.EncodeToString(h.Sum(nil)) 17 | default: 18 | return _sha1(_bytes(data)) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /runner/util/sha1_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test__sha1(t *testing.T) { 10 | assert.Equal(t, "2jmj7l5rSw0yVb/vlWAYkK/YBwk=", _sha1(nil)) 11 | assert.Equal(t, "v4tFMNjSRt10rFOhNHG7oXlB3/c=", _sha1([]byte{1})) 12 | } 13 | -------------------------------------------------------------------------------- /runtimes/golang1-17/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | pwd 5 | 6 | cp /var/run/argo-dataflow/handler handler.go 7 | 8 | go env 9 | go run . -------------------------------------------------------------------------------- /runtimes/golang1-17/handler.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import "context" 4 | 5 | func Handler(ctx context.Context, m []byte) ([]byte, error) { 6 | return []byte("hi! " + string(m)), nil 7 | } 8 | -------------------------------------------------------------------------------- /runtimes/golang1-17/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "github.com/argoproj-labs/argo-dataflow/sdks/golang" 5 | ) 6 | 7 | func main() { 8 | golang.Start(Handler) 9 | } 10 | -------------------------------------------------------------------------------- /runtimes/java16/.dockerignore: -------------------------------------------------------------------------------- 1 | *.class -------------------------------------------------------------------------------- /runtimes/java16/.gitignore: -------------------------------------------------------------------------------- 1 | *.class -------------------------------------------------------------------------------- /runtimes/java16/Handler.java: -------------------------------------------------------------------------------- 1 | import java.util.Map; 2 | 3 | public class Handler { 4 | public static byte[] Handle(byte[] msg, Map context) throws Exception { 5 | return ("hi! " + new String(msg)).getBytes("UTF-8"); 6 | } 7 | } -------------------------------------------------------------------------------- /runtimes/java16/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | cp /var/run/argo-dataflow/handler Handler.java 5 | 6 | javac *.java 7 | java -cp . Main -------------------------------------------------------------------------------- /runtimes/node16/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | cp /var/run/argo-dataflow/handler handler.js 5 | 6 | node index.js 7 | -------------------------------------------------------------------------------- /runtimes/node16/handler.js: -------------------------------------------------------------------------------- 1 | module.exports = async function (messageBuf, context) { 2 | const msg = messageBuf.toString('utf8') 3 | return Buffer.from('hi ' + msg) 4 | } -------------------------------------------------------------------------------- /runtimes/node16/index.js: -------------------------------------------------------------------------------- 1 | const ProcessHandler = require('argo-dataflow-sdk') 2 | 3 | const handler = require('./handler') 4 | 5 | async function main() { 6 | ProcessHandler.start(handler) 7 | } 8 | 9 | main() 10 | -------------------------------------------------------------------------------- /runtimes/node16/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "git-nodejs", 3 | "version": "0.0.0", 4 | "description": "An example project showing how to use Argo-Dataflow-Sdk to run a handler.", 5 | "main": "index.js", 6 | "author": "Dom Deren (@domderen)", 7 | "license": "Apache-2.0", 8 | "dependencies": { 9 | "argo-dataflow-sdk": "https://gitpkg.now.sh/argoproj-labs/argo-dataflow/sdks/nodejs?main" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /runtimes/python3-9/entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | cp /var/run/argo-dataflow/handler handler.py 5 | 6 | python3 main.py -------------------------------------------------------------------------------- /runtimes/python3-9/handler.py: -------------------------------------------------------------------------------- 1 | def handler(msg, context): 2 | return ("hi! " + msg.decode("UTF-8")).encode("UTF-8") 3 | -------------------------------------------------------------------------------- /runtimes/python3-9/main.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | 3 | from handler import handler 4 | 5 | if __name__ == '__main__': 6 | processHandler = ProcessHandler() 7 | processHandler.start(handler) 8 | -------------------------------------------------------------------------------- /sdks/golang/crash.go: -------------------------------------------------------------------------------- 1 | package golang 2 | 3 | import "log" 4 | 5 | func HandleCrash() { 6 | if r := recover(); r != nil { 7 | log.Printf("recovered from crash: %v\n", r) 8 | } 9 | } 10 | -------------------------------------------------------------------------------- /sdks/golang/gen.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | echo '// Code generated by gen.sh. DO NOT EDIT.' > meta.go 4 | sed 's/package v1alpha1/package golang/' < ../../api/v1alpha1/meta.go >> meta.go -------------------------------------------------------------------------------- /sdks/nodejs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "argo-dataflow-sdk", 3 | "version": "0.0.77", 4 | "description": "Argo Dataflow SDK. Can be used to fulfill Argo-Dataflow\\'s IMAGE CONTRACT: https://github.com/argoproj-labs/argo-dataflow/blob/main/docs/IMAGE_CONTRACT.md", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "standard" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/argoproj-labs/argo-dataflow.git" 12 | }, 13 | "keywords": [ 14 | "Argo", 15 | "Kubernetes" 16 | ], 17 | "author": "Dom Deren (@domderen)", 18 | "license": "Apache-2.0", 19 | "bugs": { 20 | "url": "https://github.com/argoproj-labs/argo-dataflow/issues" 21 | }, 22 | "homepage": "https://github.com/argoproj-labs/argo-dataflow#readme", 23 | "devDependencies": { 24 | "standard": "^16.0.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /sdks/python/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | dist -------------------------------------------------------------------------------- /sdks/python/MANIFEST: -------------------------------------------------------------------------------- 1 | # file GENERATED by distutils, do NOT edit 2 | README 3 | __init__.py 4 | main.py 5 | setup.py 6 | -------------------------------------------------------------------------------- /sdks/python/Makefile: -------------------------------------------------------------------------------- 1 | install: env 2 | ./venv/bin/pip3 install . 3 | ./venv/bin/pip3 install -r ./requirements.txt 4 | 5 | env: 6 | python3 -m venv venv 7 | 8 | test: 9 | ./venv/bin/python3 -m pytest 10 | 11 | publish: build 12 | rm -Rf dist 13 | python3 -m build 14 | python3 -m twine upload dist/* -------------------------------------------------------------------------------- /sdks/python/build/lib/argo_dataflow_sdk/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import ProcessHandler 2 | -------------------------------------------------------------------------------- /sdks/python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = [ 3 | "setuptools>=42", 4 | "wheel" 5 | ] 6 | build-backend = "setuptools.build_meta" -------------------------------------------------------------------------------- /sdks/python/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.7.4.post0 2 | async-timeout==3.0.1 3 | attrs==21.2.0 4 | chardet==4.0.0 5 | idna==3.2 6 | iniconfig==1.1.1 7 | multidict==5.1.0 8 | packaging==21.0 9 | pluggy==1.0.0 10 | py==1.10.0 11 | pyparsing==2.4.7 12 | pytest==6.2.5 13 | pytest-aiohttp==0.3.0 14 | pytest-asyncio==0.15.1 15 | toml==0.10.2 16 | typing-extensions==3.10.0.2 17 | yarl==1.6.3 18 | -------------------------------------------------------------------------------- /sdks/python/setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | name = argo_dataflow_sdk 3 | version = 0.0.1 4 | author = Dom Deren (@domderen) 5 | author_email = dominik.deren@live.com 6 | description = Argo Dataflow SDK. Can be used to fulfill Argo-Dataflows IMAGE CONTRACT: https://github.com/argoproj-labs/argo-dataflow/blob/main/docs/IMAGE_CONTRACT.md 7 | long_description = file: README.md 8 | long_description_content_type = text/markdown 9 | url = https://github.com/argoproj-labs/argo-dataflow 10 | project_urls = 11 | Bug Tracker = https://github.com/argoproj-labs/argo-dataflow/issues 12 | classifiers = 13 | Programming Language :: Python :: 3 14 | Programming Language :: Python :: 3.7 15 | Programming Language :: Python :: 3.8 16 | Programming Language :: Python :: 3.9 17 | License :: OSI Approved :: Apache Software License 18 | Operating System :: OS Independent 19 | 20 | [options] 21 | package_dir = 22 | = src 23 | packages = find: 24 | install_requires = 25 | aiohttp >=3 26 | python_requires = >=3.6 27 | 28 | [options.packages.find] 29 | where = src -------------------------------------------------------------------------------- /sdks/python/src/argo_dataflow_sdk/__init__.py: -------------------------------------------------------------------------------- 1 | from .main import ProcessHandler 2 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/default_step_async_error_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | from aiohttp import ClientSession 3 | 4 | 5 | async def handler(message, _): 6 | msg = message.decode("UTF-8") 7 | async with ClientSession() as session: 8 | async with session.get('http://localhost:8080/ready') as response: 9 | body = (await response.content.read()).decode('utf-8') 10 | assert response.status == 203 11 | assert body == '' 12 | return ("Hi " + msg).encode('UTF-8') 13 | 14 | if __name__ == '__main__': 15 | processHandler = ProcessHandler() 16 | processHandler.start(handler) 17 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/default_step_async_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | from aiohttp import ClientSession 3 | 4 | 5 | async def handler(message, _): 6 | msg = message.decode("UTF-8") 7 | async with ClientSession() as session: 8 | async with session.get('http://localhost:8080/ready') as response: 9 | body = (await response.content.read()).decode('utf-8') 10 | assert response.status == 204 11 | assert body == '' 12 | return ("Hi " + msg).encode('UTF-8') 13 | 14 | if __name__ == '__main__': 15 | processHandler = ProcessHandler() 16 | processHandler.start(handler) 17 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/default_step_error_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | 3 | 4 | def handler(message, _): 5 | raise ValueError('Some error') 6 | 7 | 8 | if __name__ == '__main__': 9 | processHandler = ProcessHandler() 10 | processHandler.start(handler) 11 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/default_step_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | 3 | 4 | def handler(message, _): 5 | msg = message.decode("UTF-8") 6 | return ("Hi " + msg).encode('UTF-8') 7 | 8 | 9 | if __name__ == '__main__': 10 | processHandler = ProcessHandler() 11 | processHandler.start(handler) 12 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/default_step_termination_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | import time 3 | 4 | 5 | def handler(message, _): 6 | msg = message.decode("UTF-8") 7 | time.sleep(0.2) 8 | return ("Hi " + msg).encode('UTF-8') 9 | 10 | 11 | if __name__ == '__main__': 12 | processHandler = ProcessHandler() 13 | processHandler.start(handler) 14 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/generator_step_async_error_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | import asyncio 3 | 4 | 5 | async def generator_handler(): 6 | i = 0 7 | while True: 8 | print('running generator fn', i) 9 | yield f'Some Value {i}'.encode('UTF-8') 10 | await asyncio.sleep(0.1) 11 | raise ValueError('Some error') 12 | 13 | if __name__ == '__main__': 14 | processHandler = ProcessHandler() 15 | processHandler.start_generator(generator_handler) 16 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/generator_step_async_handler/app.py: -------------------------------------------------------------------------------- 1 | from asyncio.exceptions import CancelledError 2 | from argo_dataflow_sdk import ProcessHandler 3 | import asyncio 4 | 5 | 6 | async def generator_handler(): 7 | try: 8 | i = 0 9 | while True: 10 | print('running generator fn', i) 11 | yield f'Some Value {i}'.encode('UTF-8') 12 | i = i + 1 13 | await asyncio.sleep(1) 14 | except CancelledError: 15 | print('Generator function got cancelled, time to cleanup.') 16 | 17 | if __name__ == '__main__': 18 | processHandler = ProcessHandler() 19 | processHandler.start_generator(generator_handler) 20 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/generator_step_error_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | 3 | 4 | def generator_handler(): 5 | i = 0 6 | while True: 7 | print('running generator fn', i) 8 | yield f'Some Value {i}'.encode('UTF-8') 9 | 10 | raise ValueError('Some error') 11 | 12 | 13 | if __name__ == '__main__': 14 | processHandler = ProcessHandler() 15 | processHandler.start_generator(generator_handler) 16 | -------------------------------------------------------------------------------- /sdks/python/tests/fixtures/generator_step_handler/app.py: -------------------------------------------------------------------------------- 1 | from argo_dataflow_sdk import ProcessHandler 2 | import time 3 | 4 | 5 | def generator_handler(): 6 | i = 0 7 | while True: 8 | print('running generator fn', i) 9 | yield f'Some Value {i}'.encode('UTF-8') 10 | i = i + 1 11 | time.sleep(1) 12 | 13 | 14 | if __name__ == '__main__': 15 | processHandler = ProcessHandler() 16 | processHandler.start_generator(generator_handler) 17 | -------------------------------------------------------------------------------- /shared/builtin/cat/cat.go: -------------------------------------------------------------------------------- 1 | package cat 2 | 3 | import ( 4 | "context" 5 | 6 | "github.com/argoproj-labs/argo-dataflow/shared/builtin" 7 | ) 8 | 9 | func New() builtin.Process { 10 | return func(ctx context.Context, msg []byte) ([]byte, error) { 11 | return msg, nil 12 | } 13 | } 14 | -------------------------------------------------------------------------------- /shared/builtin/cat/cat_test.go: -------------------------------------------------------------------------------- 1 | package cat 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestNew(t *testing.T) { 11 | ctx := context.Background() 12 | p := New() 13 | req := []byte{0} 14 | resp, err := p(ctx, req) 15 | assert.NoError(t, err) 16 | assert.Equal(t, req, resp) 17 | } 18 | -------------------------------------------------------------------------------- /shared/builtin/dedupe/dedupe_test.go: -------------------------------------------------------------------------------- 1 | package dedupe 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 8 | "github.com/stretchr/testify/assert" 9 | "k8s.io/apimachinery/pkg/api/resource" 10 | ) 11 | 12 | func TestNew(t *testing.T) { 13 | ctx := dfv1.ContextWithMeta(context.Background(), dfv1.Meta{Source: "my-source", ID: "my-id"}) 14 | p, err := New(ctx, `"1"`, resource.MustParse("1")) 15 | assert.NoError(t, err) 16 | resp, err := p(ctx, []byte{0}) 17 | assert.NoError(t, err) 18 | assert.NotNil(t, resp) 19 | resp, err = p(ctx, []byte{0}) 20 | assert.NoError(t, err) 21 | assert.Nil(t, resp) 22 | } 23 | -------------------------------------------------------------------------------- /shared/builtin/dedupe/item.go: -------------------------------------------------------------------------------- 1 | package dedupe 2 | 3 | import "time" 4 | 5 | type item struct { 6 | id string 7 | lastObserved time.Time 8 | index int 9 | } 10 | -------------------------------------------------------------------------------- /shared/builtin/dedupe/items.go: -------------------------------------------------------------------------------- 1 | package dedupe 2 | 3 | type items []*item 4 | 5 | func (is items) Len() int { return len(is) } 6 | 7 | func (is items) Less(i, j int) bool { 8 | return is[i].lastObserved.After(is[j].lastObserved) 9 | } 10 | 11 | func (is items) Swap(i, j int) { 12 | is[i], is[j] = is[j], is[i] 13 | is[i].index = i 14 | is[j].index = j 15 | } 16 | 17 | func (is *items) Push(x interface{}) { 18 | n := len(*is) 19 | item := x.(*item) 20 | item.index = n 21 | *is = append(*is, item) 22 | } 23 | 24 | func (is *items) Pop() interface{} { 25 | old := *is 26 | n := len(old) 27 | item := old[n-1] 28 | old[n-1] = nil // avoid memory leak 29 | item.index = -1 // for safety 30 | *is = old[0 : n-1] 31 | return item 32 | } 33 | -------------------------------------------------------------------------------- /shared/builtin/dedupe/uniq_items.go: -------------------------------------------------------------------------------- 1 | package dedupe 2 | 3 | import ( 4 | "container/heap" 5 | "time" 6 | ) 7 | 8 | type uniqItems struct { 9 | ids map[string]*item 10 | items 11 | } 12 | 13 | func (is *uniqItems) update(id string) bool { 14 | i, ok := is.ids[id] 15 | if ok { 16 | i.lastObserved = time.Now() 17 | heap.Fix(&is.items, i.index) 18 | } else { 19 | i = &item{id: id, lastObserved: time.Now()} 20 | heap.Push(&is.items, i) 21 | is.ids[id] = i 22 | } 23 | return ok 24 | } 25 | 26 | func (is *uniqItems) shrink() { 27 | i := heap.Pop(&is.items).(*item) 28 | delete(is.ids, i.id) 29 | } 30 | 31 | func (is *uniqItems) size() int { 32 | return len(is.ids) 33 | } 34 | -------------------------------------------------------------------------------- /shared/builtin/dedupe/uniq_items_test.go: -------------------------------------------------------------------------------- 1 | package dedupe 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_uniqItems(t *testing.T) { 10 | x := &uniqItems{ids: map[string]*item{}} 11 | assert.False(t, x.update("foo")) 12 | assert.True(t, x.update("foo")) 13 | assert.False(t, x.update("bar")) 14 | } 15 | -------------------------------------------------------------------------------- /shared/builtin/exec.go: -------------------------------------------------------------------------------- 1 | package builtin 2 | 3 | import ( 4 | "context" 5 | ) 6 | 7 | type Process func(ctx context.Context, msg []byte) ([]byte, error) 8 | -------------------------------------------------------------------------------- /shared/builtin/expand/expand.go: -------------------------------------------------------------------------------- 1 | package expand 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | 7 | "github.com/argoproj-labs/argo-dataflow/shared/builtin" 8 | "github.com/doublerebel/bellows" 9 | ) 10 | 11 | func New() builtin.Process { 12 | return func(ctx context.Context, msg []byte) ([]byte, error) { 13 | v := make(map[string]interface{}) 14 | if err := json.Unmarshal(msg, &v); err != nil { 15 | return nil, err 16 | } 17 | if data, err := json.Marshal(bellows.Expand(v)); err != nil { 18 | return nil, err 19 | } else { 20 | return data, nil 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /shared/builtin/expand/expand_test.go: -------------------------------------------------------------------------------- 1 | package expand 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestNew(t *testing.T) { 11 | ctx := context.Background() 12 | p := New() 13 | req := []byte(`{"a.b":1}`) 14 | resp, err := p(ctx, req) 15 | assert.NoError(t, err) 16 | assert.Equal(t, `{"a":{"b":1}}`, string(resp)) 17 | } 18 | -------------------------------------------------------------------------------- /shared/builtin/filter/filter.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/antonmedv/expr" 8 | "github.com/argoproj-labs/argo-dataflow/runner/util" 9 | "github.com/argoproj-labs/argo-dataflow/shared/builtin" 10 | ) 11 | 12 | func New(expression string) (builtin.Process, error) { 13 | prog, err := expr.Compile(expression) 14 | if err != nil { 15 | return nil, fmt.Errorf("failed to compile %q: %w", expression, err) 16 | } 17 | return func(ctx context.Context, msg []byte) ([]byte, error) { 18 | env, err := util.ExprEnv(ctx, msg) 19 | if err != nil { 20 | return nil, fmt.Errorf("failed to create expr env: %w", err) 21 | } 22 | res, err := expr.Run(prog, env) 23 | if err != nil { 24 | return nil, fmt.Errorf("failed to run program: %w", err) 25 | } 26 | accept, ok := res.(bool) 27 | if !ok { 28 | return nil, fmt.Errorf("must return bool") 29 | } 30 | if accept { 31 | return msg, nil 32 | } else { 33 | return nil, nil 34 | } 35 | }, nil 36 | } 37 | -------------------------------------------------------------------------------- /shared/builtin/filter/filter_test.go: -------------------------------------------------------------------------------- 1 | package filter 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestNew(t *testing.T) { 12 | ctx := dfv1.ContextWithMeta(context.Background(), dfv1.Meta{Source: "my-source", ID: "my-id", Time: 0}) 13 | p, err := New(`string(msg) == "accept"`) 14 | assert.NoError(t, err) 15 | resp, err := p(ctx, []byte("accept")) 16 | assert.NoError(t, err) 17 | assert.NotNil(t, resp) 18 | resp, err = p(ctx, []byte("deny")) 19 | assert.NoError(t, err) 20 | assert.Nil(t, resp) 21 | } 22 | -------------------------------------------------------------------------------- /shared/builtin/flatten/flatten.go: -------------------------------------------------------------------------------- 1 | package flatten 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | 7 | "github.com/argoproj-labs/argo-dataflow/shared/builtin" 8 | "github.com/doublerebel/bellows" 9 | ) 10 | 11 | func New() builtin.Process { 12 | return func(ctx context.Context, msg []byte) ([]byte, error) { 13 | v := make(map[string]interface{}) 14 | if err := json.Unmarshal(msg, &v); err != nil { 15 | return nil, err 16 | } 17 | if data, err := json.Marshal(bellows.Flatten(v)); err != nil { 18 | return nil, err 19 | } else { 20 | return data, nil 21 | } 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /shared/builtin/flatten/flatten_test.go: -------------------------------------------------------------------------------- 1 | package flatten 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | "github.com/stretchr/testify/assert" 8 | ) 9 | 10 | func TestNew(t *testing.T) { 11 | ctx := context.Background() 12 | p := New() 13 | req := []byte(`{"a":{"b":1}}`) 14 | resp, err := p(ctx, req) 15 | assert.NoError(t, err) 16 | assert.Equal(t, `{"a.b":1}`, string(resp)) 17 | } 18 | -------------------------------------------------------------------------------- /shared/builtin/group/group_test.go: -------------------------------------------------------------------------------- 1 | package group 2 | 3 | import ( 4 | "context" 5 | "encoding/json" 6 | "os" 7 | "testing" 8 | 9 | dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | "github.com/stretchr/testify/assert" 11 | ) 12 | 13 | func TestNew(t *testing.T) { 14 | tmp, err := os.MkdirTemp("/tmp", "test") 15 | assert.NoError(t, err) 16 | ctx := dfv1.ContextWithMeta(context.Background(), dfv1.Meta{Source: "my-source", ID: "my-id"}) 17 | p, err := New(tmp, `"1"`, `string(msg) == "end"`, dfv1.GroupFormatJSONStringArray) 18 | assert.NoError(t, err) 19 | resp, err := p(ctx, []byte("1")) 20 | assert.NoError(t, err) 21 | assert.Nil(t, resp) 22 | resp, err = p(ctx, []byte(`end`)) 23 | assert.NoError(t, err) 24 | items := make([]string, 0) 25 | err = json.Unmarshal(resp, &items) 26 | assert.NoError(t, err) 27 | assert.ElementsMatch(t, []string{"1", "end"}, items) 28 | } 29 | -------------------------------------------------------------------------------- /shared/builtin/map/map.go: -------------------------------------------------------------------------------- 1 | package _map 2 | 3 | import ( 4 | "context" 5 | "fmt" 6 | 7 | "github.com/antonmedv/expr" 8 | "github.com/argoproj-labs/argo-dataflow/runner/util" 9 | "github.com/argoproj-labs/argo-dataflow/shared/builtin" 10 | ) 11 | 12 | func New(expression string) (builtin.Process, error) { 13 | prog, err := expr.Compile(expression) 14 | if err != nil { 15 | return nil, fmt.Errorf("failed to compile %q: %w", expression, err) 16 | } 17 | return func(ctx context.Context, msg []byte) ([]byte, error) { 18 | env, err := util.ExprEnv(ctx, msg) 19 | if err != nil { 20 | return nil, fmt.Errorf("failed to create expr env: %w", err) 21 | } 22 | res, err := expr.Run(prog, env) 23 | if err != nil { 24 | return nil, err 25 | } 26 | b, ok := res.([]byte) 27 | if !ok { 28 | return nil, fmt.Errorf("must return []byte") 29 | } 30 | return b, nil 31 | }, nil 32 | } 33 | -------------------------------------------------------------------------------- /shared/builtin/map/map_test.go: -------------------------------------------------------------------------------- 1 | package _map 2 | 3 | import ( 4 | "context" 5 | "testing" 6 | 7 | dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func TestNew(t *testing.T) { 12 | ctx := dfv1.ContextWithMeta(context.Background(), dfv1.Meta{Source: "my-source", ID: "my-id"}) 13 | p, err := New(`bytes("hi " + string(msg))`) 14 | assert.NoError(t, err) 15 | resp, err := p(ctx, []byte("foo")) 16 | assert.NoError(t, err) 17 | assert.Equal(t, "hi foo", string(resp)) 18 | } 19 | -------------------------------------------------------------------------------- /shared/debug/debug.go: -------------------------------------------------------------------------------- 1 | package debug 2 | 3 | import ( 4 | "os" 5 | "strings" 6 | 7 | dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 8 | ) 9 | 10 | var v = os.Getenv(dfv1.EnvDebug) 11 | 12 | func Enabled(flag string) bool { 13 | switch v { 14 | case "true": 15 | return true 16 | case "false": 17 | return false 18 | } 19 | for _, s := range strings.Split(v, ",") { 20 | if flag == s { 21 | return true 22 | } 23 | } 24 | return false 25 | } 26 | 27 | func EnabledFlags(prefix string) []string { 28 | var flags []string 29 | for _, s := range strings.Split(v, ",") { 30 | if strings.HasPrefix(s, prefix) { 31 | flags = append(flags, strings.TrimPrefix(s, prefix)) 32 | } 33 | } 34 | return flags 35 | } 36 | -------------------------------------------------------------------------------- /shared/debug/debug_test.go: -------------------------------------------------------------------------------- 1 | package debug 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_Enabled(t *testing.T) { 10 | t.Run("true", func(t *testing.T) { 11 | v = "true" 12 | assert.True(t, Enabled("foo")) 13 | }) 14 | t.Run("empty", func(t *testing.T) { 15 | v = "" 16 | assert.False(t, Enabled("foo")) 17 | }) 18 | t.Run("false", func(t *testing.T) { 19 | v = "false" 20 | assert.False(t, Enabled("foo")) 21 | }) 22 | t.Run("foo,bar", func(t *testing.T) { 23 | v = "foo,bar" 24 | assert.True(t, Enabled("foo")) 25 | assert.True(t, Enabled("foo")) 26 | assert.False(t, Enabled("baz")) 27 | }) 28 | } 29 | 30 | func Test_EnabledFlags(t *testing.T) { 31 | t.Run("empty", func(t *testing.T) { 32 | v = "" 33 | assert.Nil(t, EnabledFlags("foo")) 34 | }) 35 | t.Run("foo.bar", func(t *testing.T) { 36 | v = "foo.bar" 37 | assert.Equal(t, []string{"bar"}, EnabledFlags("foo.")) 38 | }) 39 | } 40 | -------------------------------------------------------------------------------- /shared/symbol/symbol.go: -------------------------------------------------------------------------------- 1 | package symbol 2 | 3 | const ( 4 | Error = "⚠" 5 | // Failed = "✖". 6 | Pending = "◷" 7 | Total = "Σ" 8 | // Running = "●" 9 | // Succeeded = "✔" 10 | // Unknown = "?". 11 | ) 12 | -------------------------------------------------------------------------------- /shared/util/.gitignore: -------------------------------------------------------------------------------- 1 | message -------------------------------------------------------------------------------- /shared/util/cpu.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "runtime" 5 | ) 6 | 7 | func init() { 8 | logger.Info("cpu", "numCPU", runtime.NumCPU()) 9 | } 10 | -------------------------------------------------------------------------------- /shared/util/env.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | "os" 6 | "strconv" 7 | "strings" 8 | "time" 9 | ) 10 | 11 | func GetEnvDuration(key string, def time.Duration) time.Duration { 12 | if x, ok := os.LookupEnv(key); ok { 13 | if v, err := time.ParseDuration(x); err != nil { 14 | panic(fmt.Errorf("%s=%s; value must be duration: %w", key, x, err)) 15 | } else { 16 | return v 17 | } 18 | } 19 | return def 20 | } 21 | 22 | func GetEnvInt(key string, def int) int { 23 | if x, ok := os.LookupEnv(key); ok { 24 | if v, err := strconv.Atoi(x); err != nil { 25 | panic(fmt.Errorf("%s=%s; value must be int: %w", key, x, err)) 26 | } else { 27 | return v 28 | } 29 | } 30 | return def 31 | } 32 | 33 | func GetEnvStringArr(key string, def []string) []string { 34 | if x, ok := os.LookupEnv(key); ok { 35 | return strings.Split(x, ",") 36 | } 37 | return def 38 | } 39 | -------------------------------------------------------------------------------- /shared/util/env_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "os" 5 | "testing" 6 | "time" 7 | 8 | "github.com/stretchr/testify/assert" 9 | ) 10 | 11 | func Test_GetEnvDuration(t *testing.T) { 12 | defer os.Unsetenv("FOO") 13 | assert.Equal(t, time.Minute, GetEnvDuration("FOO", time.Minute)) 14 | _ = os.Setenv("FOO", "2m") 15 | assert.Equal(t, 2*time.Minute, GetEnvDuration("FOO", 0)) 16 | _ = os.Setenv("FOO", "xx") 17 | 18 | assert.Panics(t, func() { 19 | _ = GetEnvDuration("FOO", 0) 20 | }) 21 | } 22 | -------------------------------------------------------------------------------- /shared/util/equal.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import jsonpatch "github.com/evanphx/json-patch" 4 | 5 | func NotEqual(a, b interface{}) (notEqual bool, patch string) { 6 | x := MustJSON(a) 7 | y := MustJSON(b) 8 | if x != y { 9 | patch, _ := jsonpatch.CreateMergePatch([]byte(x), []byte(y)) 10 | return true, string(patch) 11 | } else { 12 | return false, "" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /shared/util/error.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "errors" 5 | "os" 6 | "strings" 7 | 8 | apierr "k8s.io/apimachinery/pkg/api/errors" 9 | ) 10 | 11 | func IgnorePermission(err error) error { 12 | if errors.Is(err, os.ErrPermission) { 13 | return nil 14 | } 15 | return err 16 | } 17 | 18 | func IgnoreExist(err error) error { 19 | if errors.Is(err, os.ErrExist) { 20 | return nil 21 | } 22 | return err 23 | } 24 | 25 | func IgnoreAlreadyExists(err error) error { 26 | if apierr.IsAlreadyExists(err) { 27 | return nil 28 | } 29 | return err 30 | } 31 | 32 | func IgnoreNotFound(err error) error { 33 | if apierr.IsNotFound(err) { 34 | return nil 35 | } 36 | return err 37 | } 38 | 39 | func IgnoreContainerNotFound(err error) error { 40 | if err != nil && strings.Contains(err.Error(), "container not found") { 41 | return nil 42 | } 43 | return err 44 | } 45 | 46 | func IgnoreConflict(err error) error { 47 | if apierr.IsConflict(err) { 48 | return nil 49 | } 50 | return err 51 | } 52 | -------------------------------------------------------------------------------- /shared/util/func.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "reflect" 5 | "runtime" 6 | "strings" 7 | ) 8 | 9 | func GetFuncName(i interface{}) string { 10 | ptr := runtime.FuncForPC(reflect.ValueOf(i).Pointer()) 11 | parts := strings.SplitN(ptr.Name(), ".", 3) 12 | return parts[2] 13 | } 14 | -------------------------------------------------------------------------------- /shared/util/func_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func namedFunc() {} 10 | 11 | var varFunc = func() {} 12 | 13 | func Test_GetFuncName(t *testing.T) { 14 | assert.Equal(t, "namedFunc", GetFuncName(namedFunc)) 15 | assert.Equal(t, "glob..func1", GetFuncName(varFunc)) 16 | assert.Equal(t, "Test_GetFuncName.func1", GetFuncName(func() {})) 17 | } 18 | -------------------------------------------------------------------------------- /shared/util/hash.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "crypto/sha256" 5 | "encoding/hex" 6 | ) 7 | 8 | func MustHash(v interface{}) string { 9 | switch data := v.(type) { 10 | case []byte: 11 | hash := sha256.New() 12 | if _, err := hash.Write(data); err != nil { 13 | panic(err) 14 | } 15 | return hex.EncodeToString(hash.Sum(nil)) 16 | case string: 17 | return MustHash([]byte(data)) 18 | default: 19 | return MustHash([]byte(MustJSON(v))) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /shared/util/hash_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMustHash(t *testing.T) { 10 | assert.Equal(t, "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae", MustHash([]byte("foo"))) 11 | } 12 | -------------------------------------------------------------------------------- /shared/util/json.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "encoding/json" 4 | 5 | func MustJSON(in interface{}) string { 6 | if data, err := json.Marshal(in); err != nil { 7 | panic(err) 8 | } else { 9 | return string(data) 10 | } 11 | } 12 | 13 | // MustUnJSON unmarshalls JSON or panics. 14 | // v - must be []byte or string 15 | // in - must be a pointer. 16 | func MustUnJSON(v interface{}, in interface{}) { 17 | switch data := v.(type) { 18 | case []byte: 19 | if err := json.Unmarshal(data, in); err != nil { 20 | panic(err) 21 | } 22 | case string: 23 | MustUnJSON([]byte(data), in) 24 | default: 25 | panic("unknown type") 26 | } 27 | } 28 | -------------------------------------------------------------------------------- /shared/util/json_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestMustJson(t *testing.T) { 10 | assert.Equal(t, "1", MustJSON(1)) 11 | } 12 | 13 | func TestUnJSON(t *testing.T) { 14 | var in int 15 | MustUnJSON("1", &in) 16 | assert.Equal(t, 1, in) 17 | } 18 | -------------------------------------------------------------------------------- /shared/util/log.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "bytes" 5 | "os" 6 | 7 | logrusr "github.com/bombsimon/logrusr/v2" 8 | "github.com/go-logr/logr" 9 | log "github.com/sirupsen/logrus" 10 | "github.com/weaveworks/promrus" 11 | ) 12 | 13 | type splitter int 14 | 15 | func (splitter) Write(p []byte) (n int, err error) { 16 | if bytes.Contains(p, []byte("level=error")) { 17 | return os.Stderr.Write(p) 18 | } 19 | return os.Stdout.Write(p) 20 | } 21 | 22 | var logger = newLogger() 23 | 24 | func newLogger() logr.Logger { 25 | l := log.New() 26 | l.SetOutput(splitter(0)) 27 | l.AddHook(promrus.MustNewPrometheusHook()) 28 | return logrusr.New(l) 29 | } 30 | 31 | func NewLogger() logr.Logger { 32 | return logger 33 | } 34 | -------------------------------------------------------------------------------- /shared/util/log_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestNewLogger(t *testing.T) { 8 | NewLogger().Info("test", "a", 1, "b", "c") 9 | } 10 | -------------------------------------------------------------------------------- /shared/util/print.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "encoding/base64" 5 | "strconv" 6 | ) 7 | 8 | func IsPrint(x string) bool { 9 | for _, y := range x { 10 | if !strconv.IsPrint(y) { 11 | return false 12 | } 13 | } 14 | return true 15 | } 16 | 17 | // return a printable string. 18 | func Printable(x string) string { 19 | if IsPrint(x) { 20 | return x 21 | } 22 | return base64.StdEncoding.EncodeToString([]byte(x)) 23 | } 24 | -------------------------------------------------------------------------------- /shared/util/print_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestIsPrint(t *testing.T) { 10 | assert.True(t, IsPrint("")) 11 | assert.True(t, IsPrint("abc")) 12 | assert.False(t, IsPrint("\000")) 13 | } 14 | 15 | func TestPrintable(t *testing.T) { 16 | assert.Equal(t, "", Printable("")) 17 | assert.Equal(t, "abc", Printable("abc")) 18 | assert.Equal(t, "AA==", Printable("\000")) 19 | } 20 | -------------------------------------------------------------------------------- /shared/util/process.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "os" 4 | 5 | func init() { 6 | logger.Info("process", "pid", os.Getpid()) 7 | } 8 | -------------------------------------------------------------------------------- /shared/util/rand.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "encoding/base64" 5 | "fmt" 6 | "math/rand" 7 | "time" 8 | ) 9 | 10 | func init() { 11 | rand.Seed(time.Now().UnixNano()) 12 | } 13 | 14 | func RandString() string { 15 | return base64.URLEncoding.EncodeToString([]byte(fmt.Sprintf("%v", rand.Uint64()))) 16 | } 17 | -------------------------------------------------------------------------------- /shared/util/rand_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func Test_RandString(t *testing.T) { 10 | assert.NotEmpty(t, RandString()) 11 | } 12 | -------------------------------------------------------------------------------- /shared/util/resource.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import "strings" 4 | 5 | func Resource(kind string) string { 6 | return strings.ToLower(kind) + "s" 7 | } 8 | -------------------------------------------------------------------------------- /shared/util/retry/retry.go: -------------------------------------------------------------------------------- 1 | package retry 2 | 3 | import ( 4 | "k8s.io/apimachinery/pkg/api/errors" 5 | "k8s.io/apimachinery/pkg/util/wait" 6 | k8sRetry "k8s.io/client-go/util/retry" 7 | ) 8 | 9 | func retryableErrors(err error) bool { 10 | return errors.IsTimeout(err) || errors.IsServerTimeout(err) || errors.IsTooManyRequests(err) 11 | } 12 | 13 | func WithDefaultRetry(fn func() error) error { 14 | return WithRetry(k8sRetry.DefaultBackoff, fn) 15 | } 16 | 17 | func WithRetry(back wait.Backoff, fn func() error) error { 18 | return k8sRetry.OnError(back, retryableErrors, fn) 19 | } 20 | -------------------------------------------------------------------------------- /shared/util/uid.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "fmt" 5 | 6 | "k8s.io/utils/strings" 7 | ) 8 | 9 | func GetSourceUID(cluster, namespace, pipelineName, stepName, sourceName string) string { 10 | hash := MustHash(fmt.Sprintf("%s.%s.%s.%s.sources.%s", cluster, namespace, pipelineName, stepName, sourceName)) 11 | return fmt.Sprintf("dataflow-%s-%s-%s-%s-%s-%s", strings.ShortenString(cluster, 3), strings.ShortenString(namespace, 3), strings.ShortenString(pipelineName, 3), strings.ShortenString(stepName, 3), strings.ShortenString(sourceName, 3), hash) 12 | } 13 | -------------------------------------------------------------------------------- /shared/util/uid_test.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "testing" 5 | 6 | "github.com/stretchr/testify/assert" 7 | ) 8 | 9 | func TestGetSourceUID(t *testing.T) { 10 | uniqueID := GetSourceUID("cluster", "default", "pipeline", "stepName", "source") 11 | assert.Equal(t, "dataflow-clu-def-pip-ste-sou-7c07c91b03ebf978f5dda8b77130662e016493600b8ca4e6ffe12ec5183e3d25", uniqueID) 12 | } 13 | -------------------------------------------------------------------------------- /shared/util/version.go: -------------------------------------------------------------------------------- 1 | package util 2 | 3 | import ( 4 | "github.com/Masterminds/semver" 5 | ) 6 | 7 | var ( 8 | // The version MUST be "v"+semanticVersion, it should be one of the following options 9 | // * "vX.Y.Z" for released version, e.g. "v1.2.3" 10 | // * "v0.0.0-X-Y" for unreleased versions, e.g. 11 | // - "v0.0.0-latest-0" ("latest" version, i.e. latest build on the "main" branch or local dev build) 12 | version = "v0.0.0-latest-0" 13 | Version semver.Version 14 | ) 15 | 16 | func init() { 17 | logger.Info("version", "version", version) 18 | Version = *semver.MustParse(version) 19 | } 20 | -------------------------------------------------------------------------------- /test/configmap.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package test 5 | 6 | import ( 7 | "context" 8 | "log" 9 | 10 | "github.com/argoproj-labs/argo-dataflow/shared/util" 11 | corev1 "k8s.io/api/core/v1" 12 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 13 | ) 14 | 15 | func CreateConfigMap(x corev1.ConfigMap) { 16 | log.Printf("creating config map %q\n", x.Name) 17 | _, err := kubernetesInterface.CoreV1().ConfigMaps(namespace).Create(context.Background(), &x, metav1.CreateOptions{}) 18 | if util.IgnoreAlreadyExists(err) != nil { 19 | panic(err) 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /test/e2e/completion_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestCompletion(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "completion"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{{ 21 | Name: "main", 22 | Container: &Container{ 23 | Image: "golang:1.17", 24 | Command: []string{"sh"}, 25 | Args: []string{"-c", "exit 0"}, 26 | }, 27 | }}, 28 | }, 29 | }) 30 | 31 | WaitForPipeline(UntilSucceeded) 32 | DeletePipelines() 33 | WaitForPodsToBeDeleted() 34 | } 35 | -------------------------------------------------------------------------------- /test/e2e/cron_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestCronSource(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "cron"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{{ 21 | Name: "main", 22 | Cat: &Cat{}, 23 | Sources: []Source{{Cron: &Cron{Schedule: "*/3 * * * * *"}}}, 24 | Sinks: []Sink{DefaultLogSink}, 25 | }}, 26 | }, 27 | }) 28 | WaitForPipeline() 29 | WaitForPod() 30 | defer StartPortForward("cron-main-0")() 31 | WaitForSunkMessages() 32 | 33 | DeletePipelines() 34 | WaitForPodsToBeDeleted() 35 | } 36 | -------------------------------------------------------------------------------- /test/e2e/expand_step_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestExpandStep(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "expand"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{ 21 | { 22 | Name: "main", 23 | Expand: &Expand{}, 24 | Sources: []Source{{HTTP: &HTTPSource{}}}, 25 | Sinks: []Sink{DefaultLogSink}, 26 | }, 27 | }, 28 | }, 29 | }) 30 | 31 | WaitForPod() 32 | 33 | defer StartPortForward("expand-main-0")() 34 | 35 | SendMessageViaHTTP(`{"foo.bar": "baz"}`) 36 | 37 | WaitForSunkMessages() 38 | WaitForTotalSunkMessages(1) 39 | 40 | ExpectLogLine("main", `"foo\\":`) 41 | 42 | DeletePipelines() 43 | WaitForPodsToBeDeleted() 44 | } 45 | -------------------------------------------------------------------------------- /test/e2e/flatten_step_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestFlattenStep(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "flatten"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{ 21 | { 22 | Name: "main", 23 | Flatten: &Flatten{}, 24 | Sources: []Source{{HTTP: &HTTPSource{}}}, 25 | Sinks: []Sink{DefaultLogSink}, 26 | }, 27 | }, 28 | }, 29 | }) 30 | 31 | WaitForPod() 32 | 33 | defer StartPortForward("flatten-main-0")() 34 | 35 | SendMessageViaHTTP(`{"foo": {"bar": "baz"}}`) 36 | 37 | WaitForSunkMessages() 38 | WaitForTotalSunkMessages(1) 39 | 40 | ExpectLogLine("main", `foo.bar`) 41 | 42 | DeletePipelines() 43 | WaitForPodsToBeDeleted() 44 | } 45 | -------------------------------------------------------------------------------- /test/e2e/http_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestHTTP(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "http"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{ 21 | { 22 | Name: "main", 23 | Cat: &Cat{}, 24 | Sources: []Source{{HTTP: &HTTPSource{ServiceName: "in"}}}, 25 | Sinks: []Sink{{HTTP: &HTTPSink{URL: "http://testapi/count/incr"}}}, 26 | }, 27 | }, 28 | }, 29 | }) 30 | 31 | WaitForPipeline() 32 | WaitForPod() 33 | 34 | defer StartPortForward("http-main-0")() 35 | 36 | SendMessageViaHTTP("my-msg") 37 | 38 | WaitForSunkMessages() 39 | WaitForCounter(1, 1) 40 | 41 | DeletePipelines() 42 | WaitForPodsToBeDeleted() 43 | } 44 | -------------------------------------------------------------------------------- /test/e2e/map_step_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestMapStep(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "map"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{ 21 | { 22 | Name: "main", 23 | Map: &Map{Expression: "bytes('hi! ' + string(msg))"}, 24 | Sources: []Source{{HTTP: &HTTPSource{}}}, 25 | Sinks: []Sink{DefaultLogSink}, 26 | }, 27 | }, 28 | }, 29 | }) 30 | 31 | WaitForPod() 32 | 33 | defer StartPortForward("map-main-0")() 34 | 35 | SendMessageViaHTTP("foo-bar") 36 | 37 | WaitForSunkMessages() 38 | WaitForTotalSunkMessages(1) 39 | 40 | ExpectLogLine("main", `hi! foo-bar`) 41 | 42 | DeletePipelines() 43 | WaitForPodsToBeDeleted() 44 | } 45 | -------------------------------------------------------------------------------- /test/e2e/messages_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package e2e 5 | 6 | import ( 7 | "testing" 8 | 9 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 10 | . "github.com/argoproj-labs/argo-dataflow/test" 11 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 12 | ) 13 | 14 | func TestMessagesEndpoint(t *testing.T) { 15 | defer Setup(t)() 16 | 17 | CreatePipeline(Pipeline{ 18 | ObjectMeta: metav1.ObjectMeta{Name: "messages"}, 19 | Spec: PipelineSpec{ 20 | Steps: []StepSpec{ 21 | { 22 | Name: "main", 23 | Container: &Container{ 24 | Image: "golang:1.17", 25 | Command: []string{"bash", "-c"}, 26 | Args: []string{` 27 | set -eux -o pipefail 28 | curl -H "Authorization: $(cat /var/run/argo-dataflow/authorization)" http://localhost:3569/messages -d 'foo-bar' 29 | `}, 30 | }, 31 | Sinks: []Sink{DefaultLogSink}, 32 | }, 33 | }, 34 | }, 35 | }) 36 | 37 | WaitForPipeline() 38 | ExpectLogLine("main", `foo-bar`) 39 | } 40 | -------------------------------------------------------------------------------- /test/http-stress/test-results.json: -------------------------------------------------------------------------------- 1 | { 2 | "TestHTTPSinkStress/.tps": 1100, 3 | "TestHTTPSinkStress/N=10,messageSize=100.tps": 0, 4 | "TestHTTPSinkStress/N=10,messageSize=1000.tps": 500, 5 | "TestHTTPSinkStress/N=50000.tps": 1000, 6 | "TestHTTPSinkStress/messageSize=1000.tps": 450, 7 | "TestHTTPSinkStress/replicas=2.tps": 1150, 8 | "TestHTTPSourceStress/.tps": 1150, 9 | "TestHTTPSourceStress/N=10,messageSize=100.tps": 0, 10 | "TestHTTPSourceStress/N=10,messageSize=1000.tps": 1050, 11 | "TestHTTPSourceStress/N=100000.tps": 1000, 12 | "TestHTTPSourceStress/N=50000.tps": 1450, 13 | "TestHTTPSourceStress/messageSize=1000.tps": 550, 14 | "TestHTTPSourceStress/messageSize=1000000.tps": 600, 15 | "TestHTTPSourceStress/replicas=2.tps": 1500 16 | } -------------------------------------------------------------------------------- /test/jetstream-stress/test-results.json: -------------------------------------------------------------------------------- 1 | { 2 | "TestJetStreamSourceStress/.tps": 600 3 | } -------------------------------------------------------------------------------- /test/kafka-stress/test-results.json: -------------------------------------------------------------------------------- 1 | { 2 | "TestKafkaAsyncSinkStress/.tps": 1250, 3 | "TestKafkaSinkStress/.tps": 450, 4 | "TestKafkaSinkStress/N=10,messageSize=100.tps": 200, 5 | "TestKafkaSinkStress/N=10,messageSize=1000.tps": 150, 6 | "TestKafkaSinkStress/N=50000.tps": 750, 7 | "TestKafkaSinkStress/async=true.tps": 400, 8 | "TestKafkaSinkStress/messageSize=1000.tps": 300, 9 | "TestKafkaSinkStress/replicas=2.tps": 400, 10 | "TestKafkaSourceStress/.tps": 1200, 11 | "TestKafkaSourceStress/N=10,messageSize=100.tps": 450, 12 | "TestKafkaSourceStress/N=10,messageSize=1000.tps": 650, 13 | "TestKafkaSourceStress/N=50000.tps": 3150, 14 | "TestKafkaSourceStress/messageSize=1000.tps": 850, 15 | "TestKafkaSourceStress/replicas=2.tps": 500 16 | } -------------------------------------------------------------------------------- /test/log_sink.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import dfv1 "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 4 | 5 | var ( 6 | truncate = uint64(32) 7 | truncatePtr = &truncate 8 | DefaultLogSink = dfv1.Sink{Name: "log", Log: &dfv1.Log{Truncate: truncatePtr}} 9 | ) 10 | -------------------------------------------------------------------------------- /test/matchers.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package test 5 | 6 | import "fmt" 7 | 8 | type matcher struct { 9 | string 10 | stall *stall 11 | test func(w int) bool 12 | } 13 | 14 | func (m *matcher) String() string { return m.string } 15 | 16 | func (m *matcher) match(v int) bool { 17 | m.stall.accept(v) 18 | return m.test(v) 19 | } 20 | 21 | func Eq(v int) *matcher { 22 | return &matcher{ 23 | fmt.Sprintf("eq %v", v), 24 | &stall{}, 25 | func(w int) bool { 26 | return w == v 27 | }, 28 | } 29 | } 30 | 31 | func Missing() *matcher { 32 | return &matcher{ 33 | "missing", 34 | &stall{}, 35 | func(w int) bool { 36 | return w == missing 37 | }, 38 | } 39 | } 40 | 41 | func Gt(v int) *matcher { 42 | return &matcher{ 43 | fmt.Sprintf("gt %v", v), 44 | &stall{}, 45 | func(w int) bool { 46 | return w > v 47 | }, 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /test/panic.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import ( 4 | "log" 5 | ) 6 | 7 | func ExpectPanic(f func()) { 8 | defer func() { 9 | r := recover() 10 | if r == nil { 11 | panic("expected panic") 12 | } else { 13 | log.Printf("ignoring panic %v", r) 14 | } 15 | }() 16 | f() 17 | } 18 | 19 | func CatchPanic(try func(), catch func(error)) { 20 | defer func() { 21 | r := recover() 22 | if r != nil { 23 | catch(r.(error)) 24 | } 25 | }() 26 | try() 27 | } 28 | -------------------------------------------------------------------------------- /test/secrets.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package test 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | 10 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 11 | ) 12 | 13 | var secretsInterface = kubernetesInterface.CoreV1().Secrets(namespace) 14 | 15 | func GetAuthorization() string { 16 | ctx := context.Background() 17 | pl := GetPipeline() 18 | for _, step := range pl.Spec.Steps { 19 | for _, source := range step.Sources { 20 | if source.HTTP != nil { 21 | secret, err := secretsInterface.Get(ctx, fmt.Sprintf("%s-%s", pl.Name, step.Name), metav1.GetOptions{}) 22 | if err != nil { 23 | panic(err) 24 | } 25 | data, ok := secret.Data[fmt.Sprintf("sources.%s.http.authorization", source.Name)] 26 | if !ok { 27 | panic(fmt.Errorf("source %q not found", source.Name)) 28 | } 29 | return string(data) 30 | } 31 | } 32 | } 33 | panic(fmt.Errorf("not HTTP source")) 34 | } 35 | -------------------------------------------------------------------------------- /test/service.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package test 5 | 6 | import ( 7 | "context" 8 | "fmt" 9 | "log" 10 | "net/url" 11 | "time" 12 | 13 | . "github.com/argoproj-labs/argo-dataflow/api/v1alpha1" 14 | metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 15 | ) 16 | 17 | var serviceInterface = kubernetesInterface.CoreV1().Services(namespace) 18 | 19 | func WaitForService() { 20 | WaitForPod() 21 | ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second) 22 | defer cancel() 23 | list, err := serviceInterface.List(ctx, metav1.ListOptions{}) 24 | if err != nil { 25 | panic(fmt.Errorf("failed to watch services: %w", err)) 26 | } 27 | for _, x := range list.Items { 28 | if x.Spec.ClusterIP == "None" { 29 | continue 30 | } 31 | if _, ok := x.Spec.Selector[KeyPipelineName]; ok { 32 | log.Printf("waiting for service %q\n", x.Name) 33 | InvokeTestAPI("/http/wait-for?url=%s", url.QueryEscape("https://"+x.Name)) 34 | } 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /test/stall.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import "fmt" 4 | 5 | type stall struct { 6 | stalls int 7 | last int 8 | } 9 | 10 | func (s *stall) accept(v int) { 11 | if v == s.last { 12 | s.stalls++ 13 | } else { 14 | s.stalls = 0 15 | } 16 | s.last = v 17 | if s.stalls >= 10 { 18 | panic(fmt.Errorf("stalled at %d", v)) 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /test/stress/context.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package stress 5 | 6 | import ( 7 | "log" 8 | "os" 9 | 10 | "k8s.io/client-go/tools/clientcmd" 11 | ) 12 | 13 | var currentContext string 14 | 15 | func init() { 16 | home, _ := os.UserHomeDir() 17 | path := home + "/.kube/config" 18 | _, err := os.Stat(path) 19 | if os.IsNotExist(err) { 20 | path = "" 21 | } 22 | r, err := clientcmd.NewNonInteractiveDeferredLoadingClientConfig(&clientcmd.ClientConfigLoadingRules{ 23 | ExplicitPath: path, 24 | }, &clientcmd.ConfigOverrides{}).RawConfig() 25 | if err != nil { 26 | panic(err) 27 | } 28 | currentContext = r.CurrentContext 29 | log.Printf("currentContext=%s\n", currentContext) 30 | } 31 | -------------------------------------------------------------------------------- /test/stress/results.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package stress 5 | 6 | import ( 7 | "encoding/json" 8 | "fmt" 9 | "io/ioutil" 10 | "log" 11 | ) 12 | 13 | func setTestResult(testName string, key string, value int) { 14 | log.Printf("saving test result %q %q %v", testName, key, value) 15 | filename := "test-results.json" 16 | data, err := ioutil.ReadFile(filename) 17 | if err != nil { 18 | panic(err) 19 | } 20 | x := make(map[string]int) 21 | if err := json.Unmarshal(data, &x); err != nil { 22 | panic(fmt.Errorf("failed to unmarshall JSON results: %w", err)) 23 | } 24 | x[fmt.Sprintf("%s.%s", testName, key)] = value 25 | if data, err := json.MarshalIndent(x, "", " "); err != nil { 26 | panic(fmt.Errorf("failed to marshall JSON results: %w", err)) 27 | } else { 28 | if err := ioutil.WriteFile(filename, data, 0o600); err != nil { 29 | panic(fmt.Errorf("failed to write results file: %w", err)) 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /test/stress/tps_test.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package stress 5 | 6 | import ( 7 | "testing" 8 | 9 | "github.com/stretchr/testify/assert" 10 | ) 11 | 12 | func Test_roundToNearest50(t *testing.T) { 13 | assert.Equal(t, 0, roundToNearest50(0)) 14 | assert.Equal(t, 0, roundToNearest50(24)) 15 | assert.Equal(t, 50, roundToNearest50(25)) 16 | assert.Equal(t, 100, roundToNearest50(75)) 17 | } 18 | -------------------------------------------------------------------------------- /test/testapi.go: -------------------------------------------------------------------------------- 1 | //go:build test 2 | // +build test 3 | 4 | package test 5 | 6 | import ( 7 | "bufio" 8 | "errors" 9 | "fmt" 10 | "log" 11 | "net/http" 12 | "strings" 13 | ) 14 | 15 | func InvokeTestAPI(format string, args ...interface{}) string { 16 | url := "http://localhost:8378" + fmt.Sprintf(format, args...) 17 | log.Printf("GET %s\n", url) 18 | resp, err := http.Get(url) 19 | if err != nil { 20 | panic(err) 21 | } 22 | log.Printf("> %s\n", resp.Status) 23 | body := "" 24 | defer resp.Body.Close() 25 | for s := bufio.NewScanner(resp.Body); s.Scan(); { 26 | x := s.Text() 27 | if strings.Contains(x, "ERROR") { // hacky way to return an error from an octet-stream 28 | panic(errors.New(x)) 29 | } 30 | log.Printf("> %s\n", x) 31 | body += x 32 | } 33 | if resp.StatusCode >= 300 { 34 | panic(errors.New(resp.Status)) 35 | } 36 | return body 37 | } 38 | -------------------------------------------------------------------------------- /test/wait.go: -------------------------------------------------------------------------------- 1 | package test 2 | 3 | import "log" 4 | 5 | func WaitForever() { 6 | log.Printf("waiting forever\n") 7 | select {} 8 | } 9 | -------------------------------------------------------------------------------- /testapi/count.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | "strconv" 6 | ) 7 | 8 | var count int 9 | 10 | func init() { 11 | http.HandleFunc("/count/reset", func(w http.ResponseWriter, r *http.Request) { 12 | count = 0 13 | w.WriteHeader(204) 14 | }) 15 | http.HandleFunc("/count/incr", func(w http.ResponseWriter, r *http.Request) { 16 | count++ 17 | w.WriteHeader(204) 18 | }) 19 | http.HandleFunc("/count/get", func(w http.ResponseWriter, r *http.Request) { 20 | w.WriteHeader(200) 21 | _, _ = w.Write([]byte(strconv.Itoa(count))) 22 | }) 23 | } 24 | -------------------------------------------------------------------------------- /testapi/funny_animals.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "math/rand" 6 | ) 7 | 8 | var moods = []string{ 9 | "blissful", 10 | "determined", 11 | "devious", 12 | "excited", 13 | "ecstatic", 14 | "gleeful", 15 | "happy", 16 | "surprised", 17 | } 18 | 19 | var animals = []string{ 20 | "aardvark", 21 | "bear", 22 | "capybara", 23 | "doge", 24 | "elephant", 25 | "flamingo", 26 | "giraffe", 27 | "hippo", 28 | } 29 | 30 | func FunnyAnimal() string { 31 | return fmt.Sprintf("%s-%s", moods[rand.Int()%len(moods)], animals[rand.Int()%len(animals)]) 32 | } 33 | -------------------------------------------------------------------------------- /testapi/kafka_stats.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | type KafkaStats struct { 4 | Topics map[string]struct { 5 | Partitions map[string]struct { 6 | LoOffset int64 `json:"lo_offset"` 7 | HiOffset int64 `json:"hi_offset"` 8 | } `json:"partitions"` 9 | } `json:"topics"` 10 | } 11 | 12 | func (s KafkaStats) count(topic string) int64 { 13 | var count int64 14 | for _, p := range s.Topics[topic].Partitions { 15 | count += p.HiOffset - p.LoOffset 16 | } 17 | return count 18 | } 19 | -------------------------------------------------------------------------------- /testapi/main.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | ) 6 | 7 | func main() { 8 | if err := http.ListenAndServe(":8378", nil); err != nil { 9 | panic(err) 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /testapi/message_factory.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "fmt" 5 | "net/url" 6 | "strconv" 7 | 8 | "k8s.io/apimachinery/pkg/util/rand" 9 | ) 10 | 11 | type messageFactory struct { 12 | prefix string 13 | size int 14 | } 15 | 16 | func newMessageFactory(v url.Values) messageFactory { 17 | prefix := v.Get("prefix") 18 | if prefix == "" { 19 | prefix = FunnyAnimal() 20 | } 21 | size, _ := strconv.Atoi(v.Get("size")) 22 | return messageFactory{prefix: prefix, size: size} 23 | } 24 | 25 | func (f messageFactory) newMessage(i int) string { 26 | y := fmt.Sprintf("%s-%d", f.prefix, i) 27 | if f.size > 0 { 28 | y += "-" 29 | y += rand.String(f.size) 30 | } 31 | return y 32 | } 33 | -------------------------------------------------------------------------------- /testapi/ready.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "net/http" 5 | ) 6 | 7 | func init() { 8 | http.HandleFunc("/ready", func(w http.ResponseWriter, r *http.Request) { 9 | w.WriteHeader(200) 10 | }) 11 | } 12 | --------------------------------------------------------------------------------