├── packages ├── pynumaflow │ ├── tests │ │ ├── map │ │ │ └── __init__.py │ │ ├── sink │ │ │ └── __init__.py │ │ ├── batchmap │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── errors │ │ │ └── __init__.py │ │ ├── mapstream │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── reduce │ │ │ └── __init__.py │ │ ├── sideinput │ │ │ ├── __init__.py │ │ │ └── test_responses.py │ │ ├── source │ │ │ └── __init__.py │ │ ├── accumulator │ │ │ ├── __init__.py │ │ │ └── utils.py │ │ ├── reducestreamer │ │ │ ├── __init__.py │ │ │ └── test_messages.py │ │ ├── sourcetransform │ │ │ └── __init__.py │ │ └── __init__.py │ ├── pynumaflow │ │ ├── info │ │ │ ├── __init__.py │ │ │ └── server.py │ │ ├── proto │ │ │ ├── __init__.py │ │ │ ├── mapper │ │ │ │ └── __init__.py │ │ │ ├── reducer │ │ │ │ └── __init__.py │ │ │ ├── sideinput │ │ │ │ ├── __init__.py │ │ │ │ ├── sideinput_pb2.pyi │ │ │ │ └── sideinput.proto │ │ │ ├── sinker │ │ │ │ └── __init__.py │ │ │ ├── sourcer │ │ │ │ └── __init__.py │ │ │ ├── accumulator │ │ │ │ └── __init__.py │ │ │ ├── sourcetransformer │ │ │ │ └── __init__.py │ │ │ └── common │ │ │ │ ├── metadata_pb2_grpc.py │ │ │ │ └── metadata.proto │ │ ├── sinker │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── accumulator │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── batchmapper │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── mapper │ │ │ ├── _servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── mapstreamer │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── reducer │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── sideinput │ │ │ ├── servicer │ │ │ │ ├── __init__.py │ │ │ │ └── servicer.py │ │ │ └── __init__.py │ │ ├── sourcer │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── reducestreamer │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── sourcetransformer │ │ │ ├── servicer │ │ │ │ └── __init__.py │ │ │ └── __init__.py │ │ ├── shared │ │ │ ├── __init__.py │ │ │ ├── synciter.py │ │ │ └── asynciter.py │ │ ├── errors │ │ │ ├── __init__.py │ │ │ └── _dtypes.py │ │ ├── types.py │ │ ├── exceptions.py │ │ └── __init__.py │ ├── examples │ │ ├── map │ │ │ ├── flatmap │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── example.py │ │ │ │ └── pipeline.yaml │ │ │ ├── even_odd │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── example.py │ │ │ │ ├── pipeline.yaml │ │ │ │ └── Makefile.optimized │ │ │ ├── forward_message │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ │ └── multiproc_map │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── README.md │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ ├── sink │ │ │ ├── log │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ │ └── async_log │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ ├── batchmap │ │ │ └── flatmap │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ ├── reduce │ │ │ ├── counter │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ │ └── asyncio_reduce │ │ │ │ ├── entry.sh │ │ │ │ ├── testing_data │ │ │ │ ├── async │ │ │ │ │ ├── async_10_0.png │ │ │ │ │ ├── async_100_0.png │ │ │ │ │ ├── async_100_1s.png │ │ │ │ │ ├── async_10_1s.png │ │ │ │ │ ├── async_10_200.png │ │ │ │ │ └── async_100_200.png │ │ │ │ └── threadpool │ │ │ │ │ ├── thread_100_0.png │ │ │ │ │ ├── thread_10_0.png │ │ │ │ │ ├── thread_10_1.png │ │ │ │ │ ├── thread_100_1s.png │ │ │ │ │ ├── thread_100_200.png │ │ │ │ │ └── thread_10_200.png │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ └── async_exec.py │ │ ├── reducestream │ │ │ ├── sum │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ │ └── counter │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ ├── source │ │ │ └── simple_source │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── README.md │ │ │ │ ├── pipeline.yaml │ │ │ │ └── Makefile │ │ ├── accumulator │ │ │ └── streamsorter │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── README.md │ │ │ │ ├── Makefile.optimized │ │ │ │ └── pipeline.yaml │ │ ├── mapstream │ │ │ └── flatmap_stream │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── pipeline.yaml │ │ │ │ └── example.py │ │ ├── sideinput │ │ │ └── simple_sideinput │ │ │ │ ├── entry.sh │ │ │ │ ├── udf │ │ │ │ ├── entry.sh │ │ │ │ ├── pyproject.toml │ │ │ │ └── Makefile │ │ │ │ ├── pyproject.toml │ │ │ │ ├── Makefile │ │ │ │ ├── example.py │ │ │ │ └── pipeline.yaml │ │ └── sourcetransform │ │ │ ├── event_time_filter │ │ │ ├── entry.sh │ │ │ ├── pyproject.toml │ │ │ ├── Makefile │ │ │ └── example.py │ │ │ └── async_event_time_filter │ │ │ ├── entry.sh │ │ │ ├── pyproject.toml │ │ │ └── Makefile │ ├── pytest.ini │ └── .coveragerc └── pynumaflow-lite │ ├── pynumaflow_lite │ ├── py.typed │ ├── _sink_dtypes.py │ ├── __init__.pyi │ ├── _reduce_dtypes.py │ ├── _batchmapper_dtypes.py │ ├── _mapstream_dtypes.py │ ├── _map_dtypes.py │ ├── _session_reduce_dtypes.py │ ├── _sideinput_dtypes.py │ ├── _accumulator_dtypes.py │ ├── _reducestreamer_dtypes.py │ ├── _sourcetransformer_dtypes.py │ ├── session_reducer.pyi │ ├── sideinputer.pyi │ ├── mapstreamer.pyi │ ├── mapper.pyi │ ├── sourcetransformer.pyi │ ├── batchmapper.pyi │ └── reducer.pyi │ ├── manifests │ ├── map │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── mapstream │ │ ├── pyproject.toml │ │ ├── pipeline.yaml │ │ ├── README.md │ │ ├── Dockerfile │ │ └── mapstream_cat.py │ ├── batchmap │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ ├── Dockerfile │ │ └── batchmap_cat.py │ ├── reduce │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── sink │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── source │ │ ├── pyproject.toml │ │ ├── pipeline.yaml │ │ ├── README.md │ │ └── Dockerfile │ ├── accumulator │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── reducestream │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── session_reduce │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── sourcetransform │ │ ├── pyproject.toml │ │ ├── pipeline.yaml │ │ ├── README.md │ │ └── Dockerfile │ └── sideinput │ │ ├── pyproject.toml │ │ ├── README.md │ │ ├── pipeline.yaml │ │ └── Dockerfile │ ├── pyproject.toml │ ├── tests │ ├── test_map.py │ ├── test_source.py │ ├── test_sideinput.py │ ├── test_sink.py │ ├── test_batchmap.py │ ├── test_mapstream.py │ ├── test_reduce.py │ ├── test_reducestream.py │ ├── test_sourcetransform.py │ ├── test_accumulator.py │ ├── test_session_reduce.py │ └── examples │ │ ├── map_cat.py │ │ ├── batchmap_cat.py │ │ ├── mapstream_cat.py │ │ ├── sink_log.py │ │ ├── reduce_counter_func.py │ │ ├── mapstream_cat_class.py │ │ └── sideinput_example.py │ ├── README.md │ ├── Makefile │ └── Cargo.toml ├── .github ├── pull_request_template.md ├── CODEOWNERS ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── changelog.yml │ ├── lint.yml │ ├── run-tests.yml │ ├── pypi.yaml │ └── coverage.yml ├── CONTRIBUTING.md ├── CODE_OF_CONDUCT.md ├── .codecov.yml ├── README.md ├── hack └── changelog.sh └── .pre-commit-config.yaml /packages/pynumaflow/tests/map/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/sink/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/info/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/batchmap/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/errors/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/mapstream/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/reduce/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/sideinput/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/source/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/accumulator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/reducestreamer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/sourcetransform/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/mapper/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/reducer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/sideinput/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/sinker/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/sourcer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sinker/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/accumulator/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/batchmapper/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/mapper/_servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/mapstreamer/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/accumulator/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/reducer/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sideinput/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sourcer/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/sourcetransformer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/reducestreamer/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | Kindly explain what this PR does. 2 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sourcetransformer/servicer/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/flatmap/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/log/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/batchmap/flatmap/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/even_odd/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/counter/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/sum/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/async_log/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/forward_message/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/multiproc_map/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python async_exec.py -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/counter/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/source/simple_source/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/accumulator/streamsorter/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/mapstream/flatmap_stream/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/udf/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/event_time_filter/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/async_event_time_filter/entry.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -eux 3 | 4 | python example.py 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | # in test lets turn on DEBUG 4 | os.environ["PYTHONASYNCIODEBUG"] = "1" 5 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributing 2 | 3 | Please refer to [Contributing](https://github.com/numaproj/numaproj/blob/main/CONTRIBUTING.md) 4 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/shared/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.shared.server import NumaflowServer 2 | 3 | 4 | __all__ = ["NumaflowServer"] 5 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/errors/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.errors.errors import persist_critical_error 2 | 3 | __all__ = ["persist_critical_error"] 4 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | # These owners will be the default owners for everything in 2 | # the repo. Unless a later match takes precedence 3 | * @ab93 @vigith @whynowy 4 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | Please refer to [Code of Conduct](https://github.com/numaproj/numaproj/blob/main/CODE_OF_CONDUCT.md) 4 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/py.typed: -------------------------------------------------------------------------------- 1 | # This is generated by AI Agent, we will have to redo when Typing Toolset is integrated with `maturin`. 2 | # More details in: https://pyo3.rs/main/type-stub.html -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_10_0.png -------------------------------------------------------------------------------- /packages/pynumaflow/pytest.ini: -------------------------------------------------------------------------------- 1 | # pytest.ini 2 | [pytest] 3 | log_cli = 1 4 | log_cli_level = DEBUG 5 | log_cli_format = %(asctime)s [%(levelname)8s] %(message)s (%(filename)s:%(lineno)s) 6 | log_cli_date_format=%Y-%m-%d %H:%M:%S -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_100_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_100_0.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_100_1s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_100_1s.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_10_1s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_10_1s.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_10_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_10_200.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_100_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/async/async_100_200.png -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/types.py: -------------------------------------------------------------------------------- 1 | from typing import Union, NewType 2 | 3 | import grpc 4 | 5 | NumaflowServicerContext = NewType( 6 | "NumaflowServicerContext", Union[grpc.aio.ServicerContext, grpc.ServicerContext] 7 | ) 8 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_100_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_100_0.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_10_0.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_10_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_10_1.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_100_1s.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_100_1s.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_100_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_100_200.png -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_10_200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/numaproj/numaflow-python/HEAD/packages/pynumaflow/examples/reduce/asyncio_reduce/testing_data/threadpool/thread_10_200.png -------------------------------------------------------------------------------- /.codecov.yml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: auto 6 | threshold: 3% 7 | patch: 8 | default: 9 | target: auto 10 | threshold: 5% 11 | 12 | ignore: 13 | - "examples/" 14 | - "pynumaflow/proto/*" 15 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # numaflow-python 2 | 3 | Python SDK for Numaflow. 4 | 5 | ## `pynumaflow` 6 | Pure Python SDK implementation for Numaflow - [pynumaflow](packages/pynumaflow/README.md) 7 | 8 | ## `pynumaflow-lite` 9 | 10 | Coming shortly (Rust based Python SDK) with better performance 11 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sideinput/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow._constants import SIDE_INPUT_DIR_PATH 2 | from pynumaflow.sideinput._dtypes import Response, SideInput 3 | from pynumaflow.sideinput.server import SideInputServer 4 | 5 | __all__ = ["Response", "SideInput", "SideInputServer", "SIDE_INPUT_DIR_PATH"] 6 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/accumulator/streamsorter/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "stream-sorter" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [build-system] 12 | requires = ["poetry-core>=1.0.0"] 13 | build-backend = "poetry.core.masonry.api" 14 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/exceptions.py: -------------------------------------------------------------------------------- 1 | class NoPublicConstructorError(TypeError): 2 | """Raise when using ClassName() to create objects while public constructor is not supported""" 3 | 4 | 5 | class SocketError(Exception): 6 | """To raise an error while creating socket or setting its property""" 7 | 8 | 9 | class UDFError(Exception): 10 | """To Raise an error while executing a UDF call""" 11 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/mapstreamer/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow._constants import DROP 2 | 3 | from pynumaflow.mapstreamer._dtypes import Message, Messages, Datum, MapStreamer 4 | from pynumaflow.mapstreamer.async_server import MapStreamAsyncServer 5 | 6 | __all__ = [ 7 | "Message", 8 | "Messages", 9 | "Datum", 10 | "DROP", 11 | "MapStreamAsyncServer", 12 | "MapStreamer", 13 | ] 14 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/even_odd/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "even-odd" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/flatmap/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "map-flatmap" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/counter/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "reduce-counter" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/log/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "example-sink" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = ">=3.10,<3.13" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/batchmap/flatmap/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "batch-map-flatmap" 3 | version = "0.1.0" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/multiproc_map/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "map-forward-message" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/sum/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "reduce-stream-sum" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/async_log/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "example-sink" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = ">=3.10,<3.13" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/map/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "map-cat" 3 | version = "0.1.0" 4 | description = "Testing Python FFI" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core>=2.0.0,<3.0.0"] 16 | build-backend = "poetry.core.masonry.api" 17 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/forward_message/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "map-forward-message" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/counter/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "reduce-stream-counter" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/source/simple_source/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "simple-source" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = ">=3.10,<3.13" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/mapstream/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "mapstream-cat" 3 | version = "0.1.0" 4 | description = "Testing Python FFI" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core>=2.0.0,<3.0.0"] 16 | build-backend = "poetry.core.masonry.api" -------------------------------------------------------------------------------- /packages/pynumaflow/examples/mapstream/flatmap_stream/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "map-flatmap-stream" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "map-forward-message" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../"} 10 | 11 | [tool.poetry.dev-dependencies] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=1.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/batchmap/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "batchmap-cat" 3 | version = "0.1.0" 4 | description = "Testing Python FFI" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core>=2.0.0,<3.0.0"] 16 | build-backend = "poetry.core.masonry.api" 17 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/map/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-map-cat:v2 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ```bash 12 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-map-cat:v2 13 | ``` 14 | 15 | ## Run the pipeline 16 | 17 | ```bash 18 | kubectl apply -f pipeline.yaml 19 | ``` -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/errors/_dtypes.py: -------------------------------------------------------------------------------- 1 | from dataclasses import dataclass, asdict 2 | 3 | 4 | @dataclass 5 | class _RuntimeErrorEntry: 6 | """Represents a runtime error entry to be persisted.""" 7 | 8 | container: str 9 | timestamp: int 10 | code: str 11 | message: str 12 | details: str 13 | 14 | def to_dict(self) -> dict: 15 | """Converts the dataclass instance to a dictionary.""" 16 | return asdict(self) 17 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/batchmap/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-batchmap-cat:v2 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ```bash 12 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-batchmap-cat:v2 13 | ``` 14 | 15 | ## Run the pipeline 16 | 17 | ```bash 18 | kubectl apply -f pipeline.yaml 19 | ``` -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reduce/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "reduce-counter" 3 | version = "0.1.0" 4 | description = "Reduce counter example using pynumaflow-lite" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=2.0.0,<3.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | 17 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sink/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sink-log" 3 | version = "0.1.0" 4 | description = "User-defined sink example using pynumaflow-lite" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core>=2.0.0,<3.0.0"] 16 | build-backend = "poetry.core.masonry.api" 17 | 18 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/source/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "simple-source" 3 | version = "0.1.0" 4 | description = "User-defined source example using pynumaflow-lite" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core>=2.0.0,<3.0.0"] 16 | build-backend = "poetry.core.masonry.api" 17 | 18 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/accumulator/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.accumulator._dtypes import ( 2 | Message, 3 | Datum, 4 | IntervalWindow, 5 | DROP, 6 | KeyedWindow, 7 | Accumulator, 8 | ) 9 | from pynumaflow.accumulator.async_server import AccumulatorAsyncServer 10 | 11 | __all__ = [ 12 | "Message", 13 | "Datum", 14 | "IntervalWindow", 15 | "DROP", 16 | "AccumulatorAsyncServer", 17 | "KeyedWindow", 18 | "Accumulator", 19 | ] 20 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/accumulator/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "stream-sorter" 3 | version = "0.1.0" 4 | description = "Stream sorter accumulator example using pynumaflow-lite" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=2.0.0,<3.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | 17 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/reducer/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.reducer._dtypes import ( 2 | Message, 3 | Messages, 4 | Datum, 5 | IntervalWindow, 6 | Metadata, 7 | DROP, 8 | Reducer, 9 | ) 10 | from pynumaflow.reducer.async_server import ReduceAsyncServer 11 | 12 | __all__ = [ 13 | "Message", 14 | "Messages", 15 | "Datum", 16 | "IntervalWindow", 17 | "Metadata", 18 | "DROP", 19 | "ReduceAsyncServer", 20 | "Reducer", 21 | ] 22 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reducestream/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "reducestream-counter" 3 | version = "0.1.0" 4 | description = "Reduce streaming counter example using pynumaflow-lite" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=2.0.0,<3.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | 17 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/event_time_filter/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "mapt-event-time-filter" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | readme = "README.md" 7 | packages = [{include = "mapt_event_time_filter"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = ">=3.9, <3.12" 11 | pynumaflow = { path = "../../../"} 12 | 13 | [build-system] 14 | requires = ["poetry-core"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/session_reduce/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "session-reduce-counter" 3 | version = "0.1.0" 4 | description = "Session reduce counter example using pynumaflow-lite" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | [build-system] 14 | requires = ["poetry-core>=2.0.0,<3.0.0"] 15 | build-backend = "poetry.core.masonry.api" 16 | 17 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sourcetransform/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sourcetransform-event-filter" 3 | version = "0.1.0" 4 | description = "Source Transformer Event Filter Example" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | ] 12 | 13 | 14 | [build-system] 15 | requires = ["poetry-core>=2.0.0,<3.0.0"] 16 | build-backend = "poetry.core.masonry.api" 17 | 18 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "reduce-counter" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = "~0.6.0" 10 | aiorun = ">=2023.7,<2024.0" 11 | aiohttp = "~3.8.4" 12 | asyncio = "~3.4.3" 13 | 14 | [tool.poetry.dev-dependencies] 15 | 16 | [build-system] 17 | requires = ["poetry-core>=1.0.0"] 18 | build-backend = "poetry.core.masonry.api" 19 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/udf/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "map-forward-message" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | 7 | [tool.poetry.dependencies] 8 | python = "~3.10" 9 | pynumaflow = { path = "../../../../"} 10 | watchfiles = "~0.20.0" 11 | watchdog = "~0.8.2" 12 | 13 | [tool.poetry.dev-dependencies] 14 | 15 | [build-system] 16 | requires = ["poetry-core>=1.0.0"] 17 | build-backend = "poetry.core.masonry.api" 18 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/source/simple_source/README.md: -------------------------------------------------------------------------------- 1 | # Example Python User Defined Source 2 | A simple example of a user-defined source. The source maintains an array of messages and implements the `Read`, 3 | `Ack`, and `Pending` methods: 4 | - The `Read(x)` method returns the next `x` number of messages in the array. 5 | - The `Ack()` method acknowledges the last batch of messages returned by Read(). 6 | - The `Pending()` method returns 0 to indicate that the simple source always has 0 pending messages. -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/async_event_time_filter/pyproject.toml: -------------------------------------------------------------------------------- 1 | [tool.poetry] 2 | name = "async-mapt-event-time-filter" 3 | version = "0.2.4" 4 | description = "" 5 | authors = ["Numaflow developers"] 6 | readme = "README.md" 7 | packages = [{include = "mapt_event_time_filter"}] 8 | 9 | [tool.poetry.dependencies] 10 | python = ">=3.9, <3.12" 11 | pynumaflow = { path = "../../../"} 12 | 13 | [build-system] 14 | requires = ["poetry-core"] 15 | build-backend = "poetry.core.masonry.api" 16 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/batchmapper/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow._constants import DROP 2 | 3 | from pynumaflow.batchmapper._dtypes import ( 4 | Message, 5 | Datum, 6 | BatchMapper, 7 | BatchResponses, 8 | BatchResponse, 9 | ) 10 | from pynumaflow.batchmapper.async_server import BatchMapAsyncServer 11 | 12 | __all__ = [ 13 | "Message", 14 | "Datum", 15 | "DROP", 16 | "BatchMapAsyncServer", 17 | "BatchMapper", 18 | "BatchResponses", 19 | "BatchResponse", 20 | ] 21 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sinker/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.sinker.async_server import SinkAsyncServer 2 | 3 | from pynumaflow.sinker.server import SinkServer 4 | 5 | from pynumaflow._metadata import UserMetadata, SystemMetadata 6 | from pynumaflow.sinker._dtypes import Response, Responses, Datum, Sinker 7 | 8 | __all__ = [ 9 | "Response", 10 | "Responses", 11 | "Datum", 12 | "Sinker", 13 | "SinkAsyncServer", 14 | "SinkServer", 15 | "UserMetadata", 16 | "SystemMetadata", 17 | ] 18 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = "quay.io/numaio/numaflow-python/asyncio-reduce:${TAG}" 4 | 5 | .PHONY: update 6 | update: 7 | poetry update -vv 8 | 9 | .PHONY: image-push 10 | image-push: update 11 | docker buildx build -t ${IMAGE_REGISTRY} --platform linux/amd64,linux/arm64 . --push 12 | 13 | .PHONY: image 14 | image: update 15 | docker build -t ${IMAGE_REGISTRY} . 16 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 17 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sideinput/pyproject.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | name = "sideinput-example" 3 | version = "0.1.0" 4 | description = "Side Input Example with Retriever and Mapper" 5 | authors = [ 6 | { name = "Vigith Maurice", email = "vigith@gmail.com" } 7 | ] 8 | readme = "README.md" 9 | requires-python = ">=3.11" 10 | dependencies = [ 11 | "watchfiles", 12 | "watchdog", 13 | ] 14 | 15 | 16 | [build-system] 17 | requires = ["poetry-core>=2.0.0,<3.0.0"] 18 | build-backend = "poetry.core.masonry.api" 19 | 20 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/reducestreamer/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.reducestreamer._dtypes import ( 2 | Message, 3 | Datum, 4 | IntervalWindow, 5 | Metadata, 6 | DROP, 7 | ReduceStreamer, 8 | ReduceWindow, 9 | ) 10 | from pynumaflow.reducestreamer.async_server import ReduceStreamAsyncServer 11 | 12 | __all__ = [ 13 | "Message", 14 | "Datum", 15 | "IntervalWindow", 16 | "Metadata", 17 | "DROP", 18 | "ReduceStreamAsyncServer", 19 | "ReduceStreamer", 20 | "ReduceWindow", 21 | ] 22 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/source/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-source 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | udsource: 10 | container: 11 | image: quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1 12 | imagePullPolicy: Never 13 | limits: 14 | readBatchSize: 5 15 | - name: out 16 | sink: 17 | log: {} 18 | edges: 19 | - from: in 20 | to: out 21 | 22 | -------------------------------------------------------------------------------- /packages/pynumaflow/.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | branch = True 3 | parallel = True 4 | source = pynumaflow 5 | omit = 6 | pynumaflow/tests/* 7 | examples/* 8 | pynumaflow/proto/* 9 | pynumaflow/shared/server.py 10 | 11 | [report] 12 | exclude_lines = 13 | def sync_server_start 14 | def _run_server 15 | def start_multiproc_server 16 | async def start_async_server 17 | def _reserve_port 18 | if os.getenv("PYTHONDEBUG"): 19 | _LOGGER.setLevel(logging.DEBUG) 20 | def exec_multiproc 21 | def exec 22 | async def aexec 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=1.8,<2.0"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "pynumaflow-lite" 7 | requires-python = ">=3.8" 8 | classifiers = [ 9 | "Programming Language :: Rust", 10 | "Programming Language :: Python :: Implementation :: CPython", 11 | "Programming Language :: Python :: Implementation :: PyPy", 12 | ] 13 | dynamic = ["version"] 14 | [tool.maturin] 15 | features = ["pyo3/extension-module"] 16 | 17 | [dependency-groups] 18 | dev = [ 19 | "pytest>=8.3.5", 20 | ] 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Propose an enhancement for this project 4 | labels: 'enhancement' 5 | --- 6 | # Summary 7 | 8 | What change needs making? 9 | 10 | # Use Cases 11 | 12 | When would you use this? 13 | 14 | --- 15 | 16 | **Message from the maintainers**: 17 | 18 | If you wish to see this enhancement implemented please add a 👍 reaction to this issue! We often sort issues this way to know what to prioritize. 19 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/source/simple_source/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-source 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | udsource: 10 | container: 11 | # A simple user-defined async source 12 | image: quay.io/numaio/numaflow-python/simple-source:stable 13 | imagePullPolicy: Always 14 | limits: 15 | readBatchSize: 2 16 | - name: out 17 | sink: 18 | log: {} 19 | edges: 20 | - from: in 21 | to: out 22 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sink/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-sink-log:v1 --load 7 | ``` 8 | 9 | ### `k3d` 10 | 11 | Load it now to `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-sink-log:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-sink-log:v1 21 | ``` 22 | 23 | ## Run the pipeline 24 | 25 | ```bash 26 | kubectl apply -f pipeline.yaml 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sideinput/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1 --load 7 | ``` 8 | 9 | ### `k3d` 10 | 11 | Load it now to `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1 21 | ``` 22 | 23 | ## Run the pipeline 24 | 25 | ```bash 26 | kubectl apply -f pipeline.yaml 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/mapper/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.mapper.async_server import MapAsyncServer 2 | from pynumaflow.mapper.multiproc_server import MapMultiprocServer 3 | from pynumaflow.mapper.sync_server import MapServer 4 | 5 | from pynumaflow.mapper._dtypes import Message, Messages, Datum, DROP, Mapper 6 | from pynumaflow._metadata import UserMetadata, SystemMetadata 7 | 8 | __all__ = [ 9 | "Message", 10 | "Messages", 11 | "Datum", 12 | "DROP", 13 | "Mapper", 14 | "MapServer", 15 | "MapAsyncServer", 16 | "MapMultiprocServer", 17 | "UserMetadata", 18 | "SystemMetadata", 19 | ] 20 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/source/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1 --load 7 | ``` 8 | 9 | ### `k3d` 10 | 11 | Load it now to `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-simple-source:v1 21 | ``` 22 | 23 | ## Run the pipeline 24 | 25 | ```bash 26 | kubectl apply -f pipeline.yaml 27 | ``` 28 | 29 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/shared/synciter.py: -------------------------------------------------------------------------------- 1 | from queue import Queue 2 | 3 | from pynumaflow._constants import STREAM_EOF 4 | 5 | 6 | class SyncIterator: 7 | """A Sync Interator backed by a queue""" 8 | 9 | __slots__ = "_queue" 10 | 11 | def __init__(self, size=0): 12 | self._queue = Queue(maxsize=size) 13 | 14 | def read_iterator(self): 15 | item = self._queue.get() 16 | while True: 17 | if item == STREAM_EOF: 18 | break 19 | yield item 20 | item = self._queue.get() 21 | 22 | def put(self, item): 23 | self._queue.put(item) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sourcetransform/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: sourcetransform-event-filter 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # HTTP Source to control the event time 10 | http: { } 11 | transformer: 12 | container: 13 | image: quay.io/numaio/numaflow/pynumaflow-lite-sourcetransform-event-filter:v1 14 | imagePullPolicy: Never 15 | - name: sink 16 | scale: 17 | min: 1 18 | sink: 19 | log: { } 20 | edges: 21 | - from: in 22 | to: sink 23 | 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sink/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-sink-log 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 100 12 | duration: 1s 13 | msgSize: 8 14 | - name: log-sink 15 | scale: 16 | min: 1 17 | sink: 18 | udsink: 19 | container: 20 | image: quay.io/numaio/numaflow/pynumaflow-lite-sink-log:v1 21 | imagePullPolicy: Never 22 | edges: 23 | - from: in 24 | to: log-sink 25 | 26 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/log/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/sink-log:${TAG} 4 | DOCKER_FILE_PATH = examples/sink/log/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/even_odd/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/even-odd:${TAG} 4 | DOCKER_FILE_PATH = examples/map/even_odd/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/flatmap/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/map-flatmap:${TAG} 4 | DOCKER_FILE_PATH = examples/map/flatmap/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/multiproc_map/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/multiproc:${TAG} 4 | DOCKER_FILE_PATH = examples/map/multiproc_map/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/counter/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/reduce-counter:${TAG} 4 | DOCKER_FILE_PATH = examples/reduce/counter/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/async_log/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/async-sink-log:${TAG} 4 | DOCKER_FILE_PATH = examples/sink/async_log/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_map.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/map.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/mapper-server-info") 9 | 10 | SCRIPTS = [ 11 | "map_cat.py", 12 | "map_cat_class.py", 13 | ] 14 | 15 | 16 | @pytest.mark.parametrize("script", SCRIPTS) 17 | def test_python_server_and_rust_client(script: str, tmp_path: Path): 18 | run_python_server_with_rust_client( 19 | script=script, 20 | sock_path=SOCK_PATH, 21 | server_info_path=SERVER_INFO, 22 | rust_bin_name="test_map", 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_source.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/source.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/sourcer-server-info") 9 | 10 | SCRIPTS = [ 11 | "source_simple.py", 12 | ] 13 | 14 | 15 | @pytest.mark.parametrize("script", SCRIPTS) 16 | def test_python_source_server_and_rust_client(script: str, tmp_path: Path): 17 | run_python_server_with_rust_client( 18 | script=script, 19 | sock_path=SOCK_PATH, 20 | server_info_path=SERVER_INFO, 21 | rust_bin_name="test_source", 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/batchmap/flatmap/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/batch-map-flatmap:${TAG} 4 | DOCKER_FILE_PATH = examples/batchmap/flatmap/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/sum/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/reduce-stream-sum:${TAG} 4 | DOCKER_FILE_PATH = examples/reducestream/sum/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_sideinput.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/sideinput.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/sideinput-server-info") 9 | 10 | SCRIPTS = [ 11 | "sideinput_example.py", 12 | ] 13 | 14 | 15 | @pytest.mark.parametrize("script", SCRIPTS) 16 | def test_python_server_and_rust_client(script: str, tmp_path: Path): 17 | run_python_server_with_rust_client( 18 | script=script, 19 | sock_path=SOCK_PATH, 20 | server_info_path=SERVER_INFO, 21 | rust_bin_name="test_sideinput", 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/source/simple_source/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/simple-source:${TAG} 4 | DOCKER_FILE_PATH = examples/source/simple_source/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_sink_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.sinker import Datum, Responses 3 | from collections.abc import AsyncIterable 4 | 5 | 6 | class Sinker(metaclass=ABCMeta): 7 | """ 8 | Provides an interface to write a Sink servicer. 9 | """ 10 | 11 | def __call__(self, *args, **kwargs): 12 | return self.handler(*args, **kwargs) 13 | 14 | @abstractmethod 15 | async def handler(self, datums: AsyncIterable[Datum]) -> Responses: 16 | """ 17 | Implement this handler function for sink. 18 | Process the stream of datums and return responses. 19 | """ 20 | pass 21 | 22 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/accumulator/streamsorter/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/streamsorter:${TAG} 4 | DOCKER_FILE_PATH = examples/accumulator/streamsorter/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/forward_message/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/map-forward-message:${TAG} 4 | DOCKER_FILE_PATH = examples/map/forward_message/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sourcetransformer/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.sourcetransformer._dtypes import ( 2 | Message, 3 | Messages, 4 | Datum, 5 | DROP, 6 | SourceTransformer, 7 | ) 8 | from pynumaflow.sourcetransformer.multiproc_server import SourceTransformMultiProcServer 9 | from pynumaflow.sourcetransformer.server import SourceTransformServer 10 | from pynumaflow.sourcetransformer.async_server import SourceTransformAsyncServer 11 | 12 | __all__ = [ 13 | "Message", 14 | "Messages", 15 | "Datum", 16 | "DROP", 17 | "SourceTransformServer", 18 | "SourceTransformer", 19 | "SourceTransformMultiProcServer", 20 | "SourceTransformAsyncServer", 21 | ] 22 | -------------------------------------------------------------------------------- /hack/changelog.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | set -eu 3 | 4 | echo '# Changelog' 5 | echo 6 | 7 | tag= 8 | git tag -l 'v*' | sed 's/-rc/~/' | sort -rV | sed 's/~/-rc/' | while read last; do 9 | if [ "$tag" != "" ]; then 10 | echo "## $(git for-each-ref --format='%(refname:strip=2) (%(creatordate:short))' refs/tags/${tag})" 11 | echo 12 | git_log='git --no-pager log --no-merges --invert-grep --grep=^\(build\|chore\|ci\|docs\|test\):' 13 | $git_log --format=' * [%h](https://github.com/numaproj/numaflow-python/commit/%H) %s' $last..$tag 14 | echo 15 | echo "### Contributors" 16 | echo 17 | $git_log --format=' * %an' $last..$tag | sort -u 18 | echo 19 | fi 20 | tag=$last 21 | done 22 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_sink.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/sink.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/sinker-server-info") 9 | 10 | SCRIPTS = [ 11 | "sink_log.py", 12 | "sink_log_class.py", 13 | ] 14 | 15 | 16 | @pytest.mark.parametrize("script", SCRIPTS) 17 | def test_python_sink_server_and_rust_client(script: str, tmp_path: Path): 18 | run_python_server_with_rust_client( 19 | script=script, 20 | sock_path=SOCK_PATH, 21 | server_info_path=SERVER_INFO, 22 | rust_bin_name="test_sink", 23 | ) 24 | 25 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/counter/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/reduce-stream-counter:${TAG} 4 | DOCKER_FILE_PATH = examples/reducestream/counter/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/mapstream/flatmap_stream/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/map-flatmap-stream:${TAG} 4 | DOCKER_FILE_PATH = examples/mapstream/flatmap_stream/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/sideinput-example:${TAG} 4 | DOCKER_FILE_PATH = examples/sideinput/simple_sideinput/Dockerfile 5 | 6 | 7 | .PHONY: update 8 | update: 9 | poetry update -vv 10 | 11 | .PHONY: image-push 12 | image-push: update 13 | cd ../../../ && docker buildx build \ 14 | -f ${DOCKER_FILE_PATH} \ 15 | -t ${IMAGE_REGISTRY} \ 16 | --platform linux/amd64,linux/arm64 . --push 17 | 18 | .PHONY: image 19 | image: update 20 | cd ../../../ && docker build \ 21 | -f ${DOCKER_FILE_PATH} \ 22 | -t ${IMAGE_REGISTRY} . 23 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_batchmap.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/batchmap.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/mapper-server-info") 9 | 10 | SCRIPTS = [ 11 | "batchmap_cat.py", 12 | "batchmap_cat_class.py", 13 | ] 14 | 15 | 16 | @pytest.mark.parametrize("script", SCRIPTS) 17 | def test_python_batch_server_and_rust_client(script: str, tmp_path: Path): 18 | run_python_server_with_rust_client( 19 | script=script, 20 | sock_path=SOCK_PATH, 21 | server_info_path=SERVER_INFO, 22 | rust_bin_name="test_batchmap", 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_mapstream.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/mapstream.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/mapper-server-info") 9 | 10 | SCRIPTS = [ 11 | "mapstream_cat.py", 12 | "mapstream_cat_class.py", 13 | ] 14 | 15 | 16 | @pytest.mark.parametrize("script", SCRIPTS) 17 | def test_python_stream_server_and_rust_client(script: str, tmp_path: Path): 18 | run_python_server_with_rust_client( 19 | script=script, 20 | sock_path=SOCK_PATH, 21 | server_info_path=SERVER_INFO, 22 | rust_bin_name="test_mapstream", 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_reduce.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/reduce.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/reducer-server-info") 9 | 10 | SCRIPTS = [ 11 | "reduce_counter_class.py", 12 | "reduce_counter_func.py", 13 | ] 14 | 15 | 16 | @pytest.mark.parametrize("script", SCRIPTS) 17 | def test_python_reduce_server_and_rust_client(script: str, tmp_path: Path): 18 | run_python_server_with_rust_client( 19 | script=script, 20 | sock_path=SOCK_PATH, 21 | server_info_path=SERVER_INFO, 22 | rust_bin_name="test_reduce", 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_reducestream.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/reducestream.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/reducestreamer-server-info") 9 | 10 | SCRIPTS = [ 11 | "reducestream_counter.py", 12 | ] 13 | 14 | 15 | @pytest.mark.parametrize("script", SCRIPTS) 16 | def test_python_reducestream_server_and_rust_client(script: str, tmp_path: Path): 17 | run_python_server_with_rust_client( 18 | script=script, 19 | sock_path=SOCK_PATH, 20 | server_info_path=SERVER_INFO, 21 | rust_bin_name="test_reducestream", 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /.github/workflows/changelog.yml: -------------------------------------------------------------------------------- 1 | name: Changelog 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | jobs: 8 | generate_changelog: 9 | if: github.repository == 'numaproj/numaflow-python' 10 | runs-on: ubuntu-latest 11 | name: Generate changelog 12 | steps: 13 | - uses: actions/checkout@v3 14 | with: 15 | ref: main 16 | fetch-depth: 0 17 | - run: git fetch --prune --prune-tags 18 | - run: git tag -l 'v*' 19 | - run: ./hack/changelog.sh > CHANGELOG.md 20 | - uses: peter-evans/create-pull-request@v3 21 | with: 22 | title: 'docs: updated CHANGELOG.md' 23 | commit-message: 'docs: updated CHANGELOG.md' 24 | signoff: true 25 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/__init__.pyi: -------------------------------------------------------------------------------- 1 | import _typeshed 2 | 3 | def __getattr__(name: str) -> _typeshed.Incomplete: ... 4 | 5 | from . import mapper as mapper 6 | from . import batchmapper as batchmapper 7 | from . import mapstreamer as mapstreamer 8 | from . import reducer as reducer 9 | from . import session_reducer as session_reducer 10 | from . import accumulator as accumulator 11 | from . import sinker as sinker 12 | from . import sourcer as sourcer 13 | from . import sourcetransformer as sourcetransformer 14 | from . import sideinputer as sideinputer 15 | 16 | __all__ = ['mapper', 'batchmapper', 'mapstreamer', 'reducer', 'session_reducer', 'accumulator', 'sinker', 'sourcer', 'sourcetransformer', 'sideinputer'] 17 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/udf/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/udf-sideinput-example:${TAG} 4 | DOCKER_FILE_PATH = examples/sideinput/simple_sideinput/udf/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_sourcetransform.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/sourcetransform.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/sourcetransformer-server-info") 9 | 10 | SCRIPTS = [ 11 | "sourcetransform_event_filter.py", 12 | ] 13 | 14 | 15 | @pytest.mark.parametrize("script", SCRIPTS) 16 | def test_python_server_and_rust_client(script: str, tmp_path: Path): 17 | run_python_server_with_rust_client( 18 | script=script, 19 | sock_path=SOCK_PATH, 20 | server_info_path=SERVER_INFO, 21 | rust_bin_name="test_sourcetransform", 22 | ) 23 | 24 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/event_time_filter/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/mapt-event-time-filter:${TAG} 4 | DOCKER_FILE_PATH = examples/sourcetransform/event_time_filter/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}:${TAG}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/map/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: map-cat 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 500 12 | duration: 1s 13 | - name: map 14 | partitions: 2 15 | scale: 16 | min: 1 17 | udf: 18 | container: 19 | image: quay.io/numaio/numaflow/pynumaflow-lite-map-cat:v2 20 | imagePullPolicy: Never 21 | - name: sink 22 | scale: 23 | min: 1 24 | sink: 25 | log: { } 26 | edges: 27 | - from: in 28 | to: map 29 | - from: map 30 | to: sink -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_reduce_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.reducer import Datum, Messages, Metadata 3 | from collections.abc import AsyncIterable 4 | 5 | 6 | class Reducer(metaclass=ABCMeta): 7 | """ 8 | Interface for reduce handlers. A new instance will be created per window. 9 | """ 10 | 11 | def __call__(self, *args, **kwargs): 12 | return self.handler(*args, **kwargs) 13 | 14 | @abstractmethod 15 | async def handler(self, keys: list[str], datums: AsyncIterable[Datum], md: Metadata) -> Messages: 16 | """ 17 | Implement this handler; consume `datums` async iterable and return Messages. 18 | """ 19 | pass 20 | 21 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_batchmapper_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.batchmapper import Datum, BatchResponse 3 | from collections.abc import AsyncIterable 4 | 5 | 6 | class BatchMapper(metaclass=ABCMeta): 7 | """ 8 | Provides an interface to write a BatchMap servicer. 9 | """ 10 | 11 | def __call__(self, *args, **kwargs): 12 | return self.handler(*args, **kwargs) 13 | 14 | @abstractmethod 15 | async def handler(self, batch: AsyncIterable[Datum]) -> list[BatchResponse]: 16 | """ 17 | Implement this handler function for batch mapping. 18 | The returned list length should equal the input batch size. 19 | """ 20 | pass 21 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/async_event_time_filter/Makefile: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/async-mapt-event-time-filter:${TAG} 4 | DOCKER_FILE_PATH = examples/sourcetransform/async_event_time_filter/Dockerfile 5 | 6 | .PHONY: update 7 | update: 8 | poetry update -vv 9 | 10 | .PHONY: image-push 11 | image-push: update 12 | cd ../../../ && docker buildx build \ 13 | -f ${DOCKER_FILE_PATH} \ 14 | -t ${IMAGE_REGISTRY} \ 15 | --platform linux/amd64,linux/arm64 . --push 16 | 17 | .PHONY: image 18 | image: update 19 | cd ../../../ && docker build \ 20 | -f ${DOCKER_FILE_PATH} \ 21 | -t ${IMAGE_REGISTRY} . 22 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}:${TAG}; fi 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_accumulator.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/accumulator.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/accumulator-server-info") 9 | 10 | SCRIPTS = [ 11 | "accumulator_stream_sorter.py", 12 | ] 13 | 14 | 15 | @pytest.mark.parametrize("script", SCRIPTS) 16 | def test_python_accumulator_server_and_rust_client(script: str, tmp_path: Path): 17 | run_python_server_with_rust_client( 18 | script=script, 19 | sock_path=SOCK_PATH, 20 | server_info_path=SERVER_INFO, 21 | rust_bin_name="test_accumulator", 22 | rust_bin_args=[str(SOCK_PATH)], 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sourcer/__init__.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.sourcer._dtypes import ( 2 | Message, 3 | ReadRequest, 4 | PendingResponse, 5 | AckRequest, 6 | NackRequest, 7 | Offset, 8 | PartitionsResponse, 9 | get_default_partitions, 10 | Sourcer, 11 | SourceCallable, 12 | ) 13 | from pynumaflow._metadata import UserMetadata 14 | from pynumaflow.sourcer.async_server import SourceAsyncServer 15 | 16 | __all__ = [ 17 | "Message", 18 | "ReadRequest", 19 | "PendingResponse", 20 | "AckRequest", 21 | "NackRequest", 22 | "Offset", 23 | "PartitionsResponse", 24 | "get_default_partitions", 25 | "Sourcer", 26 | "SourceAsyncServer", 27 | "SourceCallable", 28 | "UserMetadata", 29 | ] 30 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/batchmap/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: batchmap-cat 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 500 12 | duration: 1s 13 | - name: batchmap 14 | partitions: 2 15 | scale: 16 | min: 1 17 | udf: 18 | container: 19 | image: quay.io/numaio/numaflow/pynumaflow-lite-batchmap-cat:v2 20 | imagePullPolicy: Never 21 | - name: sink 22 | scale: 23 | min: 1 24 | sink: 25 | log: { } 26 | edges: 27 | - from: in 28 | to: batchmap 29 | - from: batchmap 30 | to: sink -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/test_session_reduce.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | 5 | from _test_utils import run_python_server_with_rust_client 6 | 7 | SOCK_PATH = Path("/tmp/var/run/numaflow/sessionreduce.sock") 8 | SERVER_INFO = Path("/tmp/var/run/numaflow/sessionreducer-server-info") 9 | 10 | SCRIPTS = [ 11 | "session_reduce_counter_class.py", 12 | ] 13 | 14 | 15 | @pytest.mark.parametrize("script", SCRIPTS) 16 | def test_python_session_reduce_server_and_rust_client(script: str, tmp_path: Path): 17 | run_python_server_with_rust_client( 18 | script=script, 19 | sock_path=SOCK_PATH, 20 | server_info_path=SERVER_INFO, 21 | rust_bin_name="test_session_reduce", 22 | rust_bin_args=[str(SOCK_PATH)], 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/mapstream/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: mapstream-cat 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 500 12 | duration: 1s 13 | - name: mapstream 14 | partitions: 2 15 | scale: 16 | min: 1 17 | udf: 18 | container: 19 | image: quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 20 | imagePullPolicy: Never 21 | - name: sink 22 | scale: 23 | min: 1 24 | sink: 25 | log: { } 26 | edges: 27 | - from: in 28 | to: mapstream 29 | - from: mapstream 30 | to: sink 31 | 32 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/batchmap/flatmap/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: flatmap 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 500 12 | duration: 1s 13 | - name: batch-flatmap 14 | partitions: 2 15 | scale: 16 | min: 1 17 | udf: 18 | container: 19 | image: quay.io/numaio/numaflow-python/batch-map-flatmap:stable 20 | imagePullPolicy: Always 21 | - name: sink 22 | scale: 23 | min: 1 24 | sink: 25 | log: {} 26 | edges: 27 | - from: in 28 | to: batch-flatmap 29 | - from: batch-flatmap 30 | to: sink 31 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/accumulator/utils.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime, timezone 2 | from pynumaflow.accumulator import Datum 3 | 4 | 5 | def create_test_datum(keys, value, event_time=None, watermark=None, id_=None, headers=None): 6 | """Create a test Datum object with default values""" 7 | if event_time is None: 8 | event_time = datetime.fromtimestamp(1662998400, timezone.utc) 9 | if watermark is None: 10 | watermark = datetime.fromtimestamp(1662998460, timezone.utc) 11 | if id_ is None: 12 | id_ = "test_id" 13 | if headers is None: 14 | headers = {} 15 | 16 | return Datum( 17 | keys=keys, 18 | value=value, 19 | event_time=event_time, 20 | watermark=watermark, 21 | id_=id_, 22 | headers=headers, 23 | ) 24 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_mapstream_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.mapstreamer import Datum, Message 3 | from collections.abc import AsyncIterator 4 | 5 | 6 | class MapStreamer(metaclass=ABCMeta): 7 | """ 8 | Provides an interface to write a streaming map servicer. 9 | The handler yields outputs incrementally as an async iterator. 10 | """ 11 | 12 | def __call__(self, *args, **kwargs): 13 | return self.handler(*args, **kwargs) 14 | 15 | @abstractmethod 16 | async def handler(self, keys: list[str], datum: Datum) -> AsyncIterator[Message]: 17 | """ 18 | Implement this handler function for streaming mapping. 19 | It should be an async generator yielding Message objects. 20 | """ 21 | pass 22 | 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_map_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.mapper import Datum, Messages 3 | 4 | 5 | class Mapper(metaclass=ABCMeta): 6 | """ 7 | Provides an interface to write a SyncMapServicer 8 | which will be exposed over a Synchronous gRPC server. 9 | """ 10 | 11 | def __call__(self, *args, **kwargs): 12 | """ 13 | This allows to execute the handler function directly if 14 | class instance is sent as a callable. 15 | """ 16 | return self.handler(*args, **kwargs) 17 | 18 | @abstractmethod 19 | async def handler(self, keys: list[str], payload: Datum) -> Messages: 20 | """ 21 | Implement this handler function which implements the MapAsyncCallable interface. 22 | """ 23 | pass 24 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/shared/asynciter.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | from typing import Generic, TypeVar 3 | from collections.abc import AsyncIterator 4 | 5 | from pynumaflow._constants import STREAM_EOF 6 | 7 | T = TypeVar("T") 8 | 9 | 10 | class NonBlockingIterator(Generic[T]): 11 | """An Async Interator backed by a queue""" 12 | 13 | __slots__ = "_queue" 14 | 15 | def __init__(self, size: int = 0) -> None: 16 | self._queue: asyncio.Queue[T] = asyncio.Queue(maxsize=size) 17 | 18 | async def read_iterator(self) -> AsyncIterator[T]: 19 | item = await self._queue.get() 20 | while True: 21 | if item == STREAM_EOF: 22 | break 23 | yield item 24 | item = await self._queue.get() 25 | 26 | async def put(self, item: T) -> None: 27 | await self._queue.put(item) 28 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/README.md: -------------------------------------------------------------------------------- 1 | ## Development Setup 2 | 3 | ```bash 4 | # new venv 5 | uv venv 6 | 7 | # activate venv 8 | source venv/bin/activate 9 | 10 | uv pip install maturin 11 | 12 | # install dependencies 13 | uv sync 14 | ``` 15 | 16 | ### Testing 17 | 18 | ```bash 19 | make test 20 | ``` 21 | 22 | ### HOWTO create .whl 23 | 24 | Go to `pynumaflow-lite` (top level) directory and run the below command. 25 | 26 | ```bash 27 | docker run --rm -v $(pwd):/io ghcr.io/pyo3/maturin build -i python3.11 --release 28 | ``` 29 | 30 | This will create the `wheel` file in `target/wheels/` directory. You should copy it over to where we 31 | are writing the python code referencing this library. 32 | 33 | e.g., 34 | 35 | ```bash 36 | cp target/wheels/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl manifests/simple-async-map/ 37 | ``` -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/forward_message/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: local-kafka 5 | spec: 6 | vertices: 7 | - name: input 8 | source: 9 | kafka: 10 | brokers: 11 | - kafka-broker:9092 12 | topic: input-topic 13 | consumerGroup: test 14 | - name: forward-message 15 | udf: 16 | container: 17 | args: 18 | - python 19 | - example.py 20 | image: quay.io/numaio/numaflow-python/map-forward-message:stable 21 | imagePullPolicy: Always 22 | - name: log-output 23 | sink: 24 | log: {} 25 | - name: log-kafka-output 26 | sink: 27 | log: {} 28 | edges: 29 | - from: input 30 | to: forward-message 31 | - from: input 32 | to: log-kafka-output 33 | - from: forward-message 34 | to: log-output 35 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/mapstream/utils.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.proto.mapper import map_pb2 2 | from tests.testing_utils import get_time_args, mock_message, mock_headers 3 | 4 | 5 | def request_generator(count, session=1, handshake=True): 6 | event_time_timestamp, watermark_timestamp = get_time_args() 7 | 8 | if handshake: 9 | yield map_pb2.MapRequest(handshake=map_pb2.Handshake(sot=True)) 10 | 11 | for j in range(session): 12 | for i in range(count): 13 | req = map_pb2.MapRequest( 14 | request=map_pb2.MapRequest.Request( 15 | value=mock_message(), 16 | event_time=event_time_timestamp, 17 | watermark=watermark_timestamp, 18 | headers=mock_headers(), 19 | ), 20 | id="test-id-" + str(i), 21 | ) 22 | yield req 23 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reduce/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ### `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 21 | ``` 22 | 23 | #### Delete image from minikube 24 | 25 | `minikube` doesn't like pushing the same image over, delete and load if you are using 26 | the same tag. 27 | 28 | ```bash 29 | minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 30 | ``` 31 | 32 | ## Run the pipeline 33 | 34 | ```bash 35 | kubectl apply -f pipeline.yaml 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: [ "main", "release/*" ] 6 | pull_request: 7 | branches: [ "main", "release/*" ] 8 | 9 | jobs: 10 | black: 11 | name: Black format 12 | runs-on: ubuntu-latest 13 | defaults: 14 | run: 15 | working-directory: packages/pynumaflow/ 16 | steps: 17 | - uses: actions/checkout@v3 18 | - uses: psf/black@stable 19 | with: 20 | src: packages/pynumaflow/ 21 | options: "--check --verbose --exclude=pynumaflow/proto/" 22 | version: "~= 23.3" 23 | 24 | ruff: 25 | name: Ruff lint 26 | runs-on: ubuntu-latest 27 | defaults: 28 | run: 29 | working-directory: packages/pynumaflow/ 30 | steps: 31 | - uses: actions/checkout@v3 32 | - uses: chartboost/ruff-action@v1 33 | with: 34 | src: packages/pynumaflow/ 35 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/mapstream/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 --load 7 | ``` 8 | 9 | Load it now to `k3d` or `minikube`. 10 | 11 | ### `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 21 | ``` 22 | 23 | #### Delete image from minikube 24 | 25 | `minikube` doesn't like pushing the same image over, delete and load if you are using 26 | the same tag. 27 | 28 | ```bash 29 | minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-mapstream-cat:v2 30 | ``` 31 | 32 | ## Run the pipeline 33 | 34 | ```bash 35 | kubectl apply -f pipeline.yaml 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reduce/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: reduce-counter 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 500 12 | duration: 1s 13 | - name: reduce 14 | partitions: 1 # keyed: false 15 | udf: 16 | container: 17 | image: quay.io/numaio/numaflow/pynumaflow-lite-reduce-counter:v1 18 | imagePullPolicy: Never 19 | groupBy: 20 | window: 21 | fixed: 22 | length: 10s 23 | keyed: false 24 | storage: 25 | emptyDir: { } 26 | - name: sink 27 | scale: 28 | min: 1 29 | sink: 30 | log: { } 31 | edges: 32 | - from: in 33 | to: reduce 34 | - from: reduce 35 | to: sink 36 | 37 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reducestream/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-reducestream-counter:v1 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ### `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-reducestream-counter:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-reducestream-counter:v1 21 | ``` 22 | 23 | #### Delete image from minikube 24 | 25 | `minikube` doesn't like pushing the same image over, delete and load if you are using 26 | the same tag. 27 | 28 | ```bash 29 | minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-reducestream-counter:v1 30 | ``` 31 | 32 | ## Run the pipeline 33 | 34 | ```bash 35 | kubectl apply -f pipeline.yaml 36 | ``` 37 | 38 | 39 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/session_reduce/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ### `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 21 | ``` 22 | 23 | #### Delete image from minikube 24 | 25 | `minikube` doesn't like pushing the same image over, delete and load if you are using 26 | the same tag. 27 | 28 | ```bash 29 | minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 30 | ``` 31 | 32 | ## Run the pipeline 33 | 34 | ```bash 35 | kubectl apply -f pipeline.yaml 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/accumulator/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ### `k3d` 12 | 13 | ```bash 14 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 15 | ``` 16 | 17 | ### Minikube 18 | 19 | ```bash 20 | minikube image load quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 21 | ``` 22 | 23 | #### Delete image from minikube 24 | 25 | `minikube` doesn't like pushing the same image over, delete and load if you are using 26 | the same tag. 27 | 28 | ```bash 29 | minikube image rm quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 30 | ``` 31 | 32 | ## Run the pipeline 33 | 34 | ```bash 35 | kubectl apply -f pipeline.yaml 36 | ``` 37 | 38 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/batchmap/utils.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.proto.mapper import map_pb2 2 | from tests.testing_utils import get_time_args, mock_message, mock_headers 3 | 4 | 5 | def request_generator(count, session=1, handshake=True): 6 | event_time_timestamp, watermark_timestamp = get_time_args() 7 | 8 | if handshake: 9 | yield map_pb2.MapRequest(handshake=map_pb2.Handshake(sot=True)) 10 | 11 | for j in range(session): 12 | for i in range(count): 13 | req = map_pb2.MapRequest( 14 | request=map_pb2.MapRequest.Request( 15 | value=mock_message(), 16 | event_time=event_time_timestamp, 17 | watermark=watermark_timestamp, 18 | headers=mock_headers(), 19 | ), 20 | id="test-id-" + str(i), 21 | ) 22 | yield req 23 | 24 | yield map_pb2.MapRequest(status=map_pb2.TransmissionStatus(eot=True)) 25 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reducestream/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: reducestream-counter 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | # A self data generating source 10 | generator: 11 | rpu: 10 12 | duration: 1s 13 | - name: reducestream 14 | partitions: 1 15 | udf: 16 | container: 17 | image: quay.io/numaio/numaflow/pynumaflow-lite-reducestream-counter:v1 18 | imagePullPolicy: Never 19 | groupBy: 20 | window: 21 | fixed: 22 | length: 10s 23 | streaming: true 24 | keyed: true 25 | storage: 26 | emptyDir: { } 27 | - name: sink 28 | scale: 29 | min: 1 30 | sink: 31 | log: { } 32 | edges: 33 | - from: in 34 | to: reducestream 35 | - from: reducestream 36 | to: sink 37 | 38 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- 1 | default_language_version: 2 | python: python3.9 3 | repos: 4 | - repo: https://github.com/psf/black 5 | rev: 23.3.0 6 | hooks: 7 | - id: black 8 | language_version: python3.9 9 | args: [--config=pyproject.toml, --diff, --color ] 10 | - repo: https://github.com/charliermarsh/ruff-pre-commit 11 | rev: 'v0.0.264' 12 | hooks: 13 | - id: ruff 14 | - repo: https://github.com/adamchainz/blacken-docs 15 | rev: "1.13.0" 16 | hooks: 17 | - id: blacken-docs 18 | additional_dependencies: 19 | - black==22.12.0 20 | - repo: https://github.com/pre-commit/pre-commit-hooks 21 | rev: v4.4.0 22 | hooks: 23 | - id: end-of-file-fixer 24 | - id: trailing-whitespace 25 | - id: check-toml 26 | - id: check-added-large-files 27 | - id: check-ast 28 | - id: check-case-conflict 29 | - id: check-docstring-first 30 | - repo: https://github.com/python-poetry/poetry 31 | rev: "1.6" 32 | hooks: 33 | - id: poetry-check 34 | -------------------------------------------------------------------------------- /.github/workflows/run-tests.yml: -------------------------------------------------------------------------------- 1 | name: Build 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | name: Python version 12 | runs-on: ubuntu-latest 13 | defaults: 14 | run: 15 | working-directory: packages/pynumaflow/ 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11", "3.12", "3.13", "3.14"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | 24 | - name: Install poetry 25 | run: pipx install poetry 26 | 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | cache: 'poetry' 32 | 33 | - name: Install dependencies 34 | run: | 35 | poetry env use ${{ matrix.python-version }} 36 | poetry install --with dev --no-root 37 | 38 | - name: Run tests 39 | run: make test 40 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/log/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: udsink-pipeline 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | generator: 10 | rpu: 1 11 | duration: 1s 12 | msgSize: 10 13 | - name: p1 14 | udf: 15 | builtin: 16 | name: cat 17 | - name: out 18 | sink: 19 | udsink: 20 | container: 21 | args: 22 | - python 23 | - example.py 24 | image: quay.io/numaio/numaflow-python/sink-log:stable 25 | imagePullPolicy: Always 26 | env: 27 | - name: PYTHONDEBUG 28 | value: "true" 29 | - name: INVOKE 30 | value: "func_handler" 31 | - name: log-output 32 | sink: 33 | log: {} 34 | edges: 35 | - from: in 36 | to: p1 37 | - from: p1 38 | to: out 39 | - from: p1 40 | to: log-output 41 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/multiproc_map/README.md: -------------------------------------------------------------------------------- 1 | # Multiprocessing Map 2 | 3 | `pynumaflow` supports only asyncio based Reduce UDFs because we found that procedural Python is not able to handle 4 | any substantial traffic. 5 | 6 | This features enables the `pynumaflow` developer to utilise multiprocessing capabilities while 7 | writing UDFs using the map function. These are particularly useful for CPU intensive operations, 8 | as it allows for better resource utilisation. 9 | 10 | In this mode we would spawn N number (N = Cpu count) of grpc servers in different processes, where each of them are 11 | listening on multiple TCP sockets. 12 | 13 | To enable multiprocessing mode start the multiproc server in the UDF using the following command, 14 | providing the optional argument `server_count` to specify the number of 15 | servers to be forked (defaults to `os.cpu_count` if not provided): 16 | ```python 17 | if __name__ == "__main__": 18 | grpc_server = MapMultiProcServer(handler, server_count = 3) 19 | grpc_server.start() 20 | ``` -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/async_log/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: udsink-pipeline 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | generator: 10 | rpu: 1 11 | duration: 1s 12 | msgSize: 10 13 | - name: p1 14 | udf: 15 | builtin: 16 | name: cat 17 | - name: out 18 | sink: 19 | udsink: 20 | container: 21 | args: 22 | - python 23 | - example.py 24 | image: quay.io/numaio/numaflow-python/async-sink-log:stable 25 | imagePullPolicy: Always 26 | env: 27 | - name: PYTHONDEBUG 28 | value: "true" 29 | - name: INVOKE 30 | value: "func_handler" 31 | - name: log-output 32 | sink: 33 | log: {} 34 | edges: 35 | - from: in 36 | to: p1 37 | - from: p1 38 | to: out 39 | - from: p1 40 | to: log-output 41 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/session_reduce/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-session-reduce 5 | spec: 6 | watermark: 7 | idleSource: 8 | threshold: 5s 9 | incrementBy: 3s 10 | stepInterval: 2s 11 | vertices: 12 | - name: in 13 | source: 14 | http: { } 15 | - name: session-counter 16 | udf: 17 | container: 18 | # count element in sessions 19 | image: quay.io/numaio/numaflow/pynumaflow-lite-session-reduce-counter:v1 20 | groupBy: 21 | window: 22 | session: 23 | timeout: 30s 24 | keyed: true 25 | storage: 26 | persistentVolumeClaim: 27 | volumeSize: 1Gi 28 | accessMode: ReadWriteOnce 29 | - name: out 30 | sink: 31 | # A simple log printing sink 32 | log: { } 33 | edges: 34 | - from: in 35 | to: session-counter 36 | - from: session-counter 37 | to: out 38 | 39 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/flatmap/example.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.mapper import Messages, Message, Datum, MapServer, Mapper 2 | 3 | 4 | class Flatmap(Mapper): 5 | """ 6 | This is a class that inherits from the Mapper class. 7 | It implements the handler method that is called for each datum. 8 | """ 9 | 10 | def handler(self, keys: list[str], datum: Datum) -> Messages: 11 | val = datum.value 12 | _ = datum.event_time 13 | _ = datum.watermark 14 | strs = val.decode("utf-8").split(",") 15 | messages = Messages() 16 | if len(strs) == 0: 17 | messages.append(Message.to_drop()) 18 | return messages 19 | for s in strs: 20 | messages.append(Message(str.encode(s))) 21 | return messages 22 | 23 | 24 | if __name__ == "__main__": 25 | """ 26 | This example shows how to use the Flatmap mapper. 27 | We use a class as handler, but a function can be used as well. 28 | """ 29 | grpc_server = MapServer(Flatmap()) 30 | grpc_server.start() 31 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/common/metadata_pb2_grpc.py: -------------------------------------------------------------------------------- 1 | # Generated by the gRPC Python protocol compiler plugin. DO NOT EDIT! 2 | """Client and server classes corresponding to protobuf-defined services.""" 3 | import grpc 4 | import warnings 5 | 6 | 7 | GRPC_GENERATED_VERSION = '1.75.0' 8 | GRPC_VERSION = grpc.__version__ 9 | _version_not_supported = False 10 | 11 | try: 12 | from grpc._utilities import first_version_is_lower 13 | _version_not_supported = first_version_is_lower(GRPC_VERSION, GRPC_GENERATED_VERSION) 14 | except ImportError: 15 | _version_not_supported = True 16 | 17 | if _version_not_supported: 18 | raise RuntimeError( 19 | f'The grpc package installed is at version {GRPC_VERSION},' 20 | + f' but the generated code in pynumaflow/proto/common/metadata_pb2_grpc.py depends on' 21 | + f' grpcio>={GRPC_GENERATED_VERSION}.' 22 | + f' Please upgrade your grpc module to grpcio>={GRPC_GENERATED_VERSION}' 23 | + f' or downgrade your generated code using grpcio-tools<={GRPC_VERSION}.' 24 | ) 25 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | labels: "bug" 5 | --- 6 | 7 | **Describe the bug** 8 | A clear and concise description of what the bug is. 9 | 10 | **To Reproduce** 11 | Steps to reproduce the behavior: 12 | 13 | 1. .... 14 | 2. .... 15 | 3. .... 16 | 17 | **Expected behavior** 18 | A clear and concise description of what you expected to happen. 19 | 20 | **Screenshots** 21 | If applicable, add screenshots to help explain your problem. 22 | 23 | **Environment (please complete the following information):** 24 | 25 | - Kubernetes: [e.g. v1.18.6] 26 | - Numaflow: [e.g. v0.5.1] 27 | - Numalogic: [e.g. v0.2.0] 28 | - Numaflow-python: [e.g. v0.1.1] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | 33 | --- 34 | 35 | 36 | 37 | **Message from the maintainers**: 38 | 39 | Impacted by this bug? Give it a 👍. We often sort issues this way to know what to prioritize. 40 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sideinput/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-sideinput 5 | spec: 6 | sideInputs: 7 | - name: myticker 8 | container: 9 | image: quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1 10 | imagePullPolicy: Never 11 | trigger: 12 | schedule: "*/10 * * * * *" 13 | vertices: 14 | - name: in 15 | source: 16 | # A self data generating source 17 | generator: 18 | rpu: 1 19 | duration: 1s 20 | - name: si-map 21 | udf: 22 | container: 23 | image: quay.io/numaio/numaflow/pynumaflow-lite-sideinput:v1 24 | imagePullPolicy: Never 25 | env: 26 | - name: MAPPER 27 | value: "true" 28 | sideInputs: 29 | - myticker 30 | - name: out 31 | sink: 32 | # A simple log printing sink 33 | log: { } 34 | edges: 35 | - from: in 36 | to: si-map 37 | - from: si-map 38 | to: out 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sourcetransform/README.md: -------------------------------------------------------------------------------- 1 | To create the `wheel` file, refer [root](../../README.md) 2 | 3 | ## HOWTO build Image 4 | 5 | ```bash 6 | docker build . -t quay.io/numaio/numaflow/pynumaflow-lite-sourcetransform-event-filter:v1 --load 7 | ``` 8 | 9 | Load it now to `k3d` 10 | 11 | ```bash 12 | k3d image import quay.io/numaio/numaflow/pynumaflow-lite-sourcetransform-event-filter:v1 13 | ``` 14 | 15 | ## Run the pipeline 16 | 17 | ```bash 18 | kubectl apply -f pipeline.yaml 19 | ``` 20 | 21 | ## About this example 22 | 23 | This source transformer filters and routes messages based on their event time: 24 | 25 | - **Messages before 2022**: Dropped 26 | - **Messages within 2022**: Tagged with `within_year_2022` and event time set to Jan 1, 2022 27 | - **Messages after 2022**: Tagged with `after_year_2022` and event time set to Jan 1, 2023 28 | 29 | This demonstrates how source transformers can be used to: 30 | 1. Filter out old/stale data 31 | 2. Normalize event times 32 | 3. Route messages to different downstream vertices based on conditions 33 | 34 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/example.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | from pynumaflow.sideinput import Response, SideInputServer, SideInput 3 | 4 | 5 | class ExampleSideInput(SideInput): 6 | def __init__(self): 7 | self.counter = 0 8 | 9 | def retrieve_handler(self) -> Response: 10 | """ 11 | This function is called every time the side input is requested. 12 | """ 13 | time_now = datetime.datetime.now() 14 | # val is the value to be broadcasted 15 | val = f"an example: {str(time_now)}" 16 | self.counter += 1 17 | # broadcast every other time 18 | if self.counter % 2 == 0: 19 | # no_broadcast_message() is used to indicate that there is no broadcast 20 | return Response.no_broadcast_message() 21 | # broadcast_message() is used to indicate that there is a broadcast 22 | return Response.broadcast_message(val.encode("utf-8")) 23 | 24 | 25 | if __name__ == "__main__": 26 | grpc_server = SideInputServer(ExampleSideInput()) 27 | grpc_server.start() 28 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/accumulator/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: accumulator-stream-sorter 5 | spec: 6 | watermark: 7 | idleSource: 8 | threshold: 5s 9 | incrementBy: 3s 10 | stepInterval: 2s 11 | vertices: 12 | - name: in 13 | source: 14 | # A self data generating source 15 | http: { } 16 | - name: stream-sorter 17 | udf: 18 | container: 19 | # count element in sessions 20 | image: quay.io/numaio/numaflow/pynumaflow-lite-accumulator-stream-sorter:v1 21 | groupBy: 22 | window: 23 | accumulator: 24 | timeout: 10s 25 | keyed: true 26 | storage: 27 | persistentVolumeClaim: 28 | volumeSize: 1Gi 29 | accessMode: ReadWriteOnce 30 | - name: out 31 | sink: 32 | # A simple log printing sink 33 | log: { } 34 | edges: 35 | - from: in 36 | to: stream-sorter 37 | - from: stream-sorter 38 | to: out 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/even_odd/example.py: -------------------------------------------------------------------------------- 1 | from pynumaflow.mapper import Messages, Message, Datum, MapServer 2 | 3 | 4 | def my_handler(keys: list[str], datum: Datum) -> Messages: 5 | val = datum.value 6 | output_keys = keys 7 | output_tags = [] 8 | _ = datum.event_time 9 | _ = datum.watermark 10 | messages = Messages() 11 | num = int.from_bytes(val, "little") 12 | 13 | if num % 2 == 0: 14 | output_keys = ["even"] 15 | output_tags = ["even-tag"] 16 | else: 17 | output_keys = ["odd"] 18 | output_tags = ["odd-tag"] 19 | 20 | messages.append(Message(val, keys=output_keys, tags=output_tags)) 21 | return messages 22 | 23 | 24 | if __name__ == "__main__": 25 | """ 26 | This example shows how to create a simple map function that takes in a 27 | number and outputs it to the "even" or "odd" key depending on whether it 28 | is even or odd. 29 | We use a function as handler, but a class that implements 30 | a Mapper can be used as well. 31 | """ 32 | grpc_server = MapServer(my_handler) 33 | grpc_server.start() 34 | -------------------------------------------------------------------------------- /.github/workflows/pypi.yaml: -------------------------------------------------------------------------------- 1 | name: PyPi Publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - v* 7 | jobs: 8 | pypi_publish: 9 | if: github.repository == 'numaproj/numaflow-python' 10 | runs-on: ubuntu-latest 11 | environment: production 12 | defaults: 13 | run: 14 | working-directory: packages/pynumaflow/ 15 | strategy: 16 | fail-fast: false 17 | matrix: 18 | python-version: [ "3.9" ] 19 | 20 | name: Publish to PyPi 21 | steps: 22 | - uses: actions/checkout@v3 23 | 24 | - name: Install poetry 25 | run: pipx install poetry 26 | 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | cache: 'poetry' 32 | 33 | - name: Install dependencies 34 | run: | 35 | poetry env use ${{ matrix.python-version }} 36 | poetry install --no-root 37 | 38 | - name: Build dist 39 | run: poetry build 40 | 41 | - name: Publish 42 | run: poetry publish -u __token__ -p ${{ secrets.PYPI_PASSWORD }} 43 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/flatmap/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-pipeline 5 | spec: 6 | limits: 7 | readBatchSize: 2 8 | vertices: 9 | - name: in 10 | source: 11 | # A self data generating source 12 | generator: 13 | rpu: 10 14 | duration: 1s 15 | - name: flatmap 16 | udf: 17 | container: 18 | image: quay.io/numaio/numaflow-python/map-flatmap:stable 19 | imagePullPolicy: Always 20 | env: 21 | - name: PYTHONDEBUG 22 | value: "true" 23 | containerTemplate: 24 | resources: 25 | limits: 26 | cpu: "1" 27 | memory: 2Gi 28 | requests: 29 | cpu: "500m" 30 | memory: 1Gi 31 | env: 32 | - name: NUMAFLOW_DEBUG 33 | value: "true" # DO NOT forget the double quotes!!! 34 | - name: out 35 | sink: 36 | # A simple log printing sink 37 | log: {} 38 | edges: 39 | - from: in 40 | to: flatmap 41 | - from: flatmap 42 | to: out 43 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/__init__.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | import sys 4 | 5 | if os.getenv("PYTHONDEBUG"): 6 | os.environ["PYTHONASYNCIODEBUG"] = "1" 7 | 8 | 9 | class StdoutFilter(logging.Filter): 10 | """ 11 | Filter logs with level less than logging.ERROR so they will go to stdout instead 12 | of default stderr 13 | """ 14 | 15 | def filter(self, record: logging.LogRecord) -> bool: 16 | return record.levelno < logging.ERROR 17 | 18 | 19 | def setup_logging(name): 20 | formatter = logging.Formatter( 21 | fmt="%(asctime)s %(levelname)-8s %(message)s", datefmt="%Y-%m-%d %H:%M:%S" 22 | ) 23 | logger = logging.getLogger(name) 24 | 25 | stdout_handler = logging.StreamHandler(sys.stdout) 26 | stdout_handler.setFormatter(formatter) 27 | stdout_handler.addFilter(StdoutFilter()) 28 | stdout_handler.setLevel(logging.INFO) 29 | logger.addHandler(stdout_handler) 30 | 31 | stderr_handler = logging.StreamHandler(sys.stderr) 32 | stderr_handler.setFormatter(formatter) 33 | stderr_handler.setLevel(logging.ERROR) 34 | logger.addHandler(stderr_handler) 35 | 36 | return logger 37 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/map/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | 37 | CMD ["python", "map_cat.py"] -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/sideinput/sideinput_pb2.pyi: -------------------------------------------------------------------------------- 1 | from google.protobuf import empty_pb2 as _empty_pb2 2 | from pynumaflow.proto.common import metadata_pb2 as _metadata_pb2 3 | from google.protobuf import descriptor as _descriptor 4 | from google.protobuf import message as _message 5 | from collections.abc import Mapping as _Mapping 6 | from typing import ClassVar as _ClassVar, Optional as _Optional, Union as _Union 7 | 8 | DESCRIPTOR: _descriptor.FileDescriptor 9 | 10 | class SideInputResponse(_message.Message): 11 | __slots__ = ("value", "no_broadcast", "metadata") 12 | VALUE_FIELD_NUMBER: _ClassVar[int] 13 | NO_BROADCAST_FIELD_NUMBER: _ClassVar[int] 14 | METADATA_FIELD_NUMBER: _ClassVar[int] 15 | value: bytes 16 | no_broadcast: bool 17 | metadata: _metadata_pb2.Metadata 18 | def __init__(self, value: _Optional[bytes] = ..., no_broadcast: bool = ..., metadata: _Optional[_Union[_metadata_pb2.Metadata, _Mapping]] = ...) -> None: ... 19 | 20 | class ReadyResponse(_message.Message): 21 | __slots__ = ("ready",) 22 | READY_FIELD_NUMBER: _ClassVar[int] 23 | ready: bool 24 | def __init__(self, ready: bool = ...) -> None: ... 25 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/batchmap/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | 37 | CMD ["python", "batchmap_cat.py"] -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sink/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | 37 | CMD ["python", "sink_log.py"] 38 | 39 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/source/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | 37 | CMD ["python", "simple_source.py"] 38 | 39 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sideinput/simple_sideinput/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: my-pipeline 5 | spec: 6 | sideInputs: 7 | - name: myticker 8 | container: 9 | image: quay.io/numaio/numaflow-python/sideinput-example:stable 10 | imagePullPolicy: Always 11 | trigger: 12 | schedule: "0 */2 * * * *" 13 | # timezone: America/Los_Angeles 14 | vertices: 15 | - name: in 16 | source: 17 | # A self data generating source 18 | generator: 19 | rpu: 1 20 | duration: 1s 21 | - name: si-log 22 | udf: 23 | container: 24 | image: quay.io/numaio/numaflow-python/udf-sideinput-example:stable 25 | imagePullPolicy: Always 26 | containerTemplate: 27 | env: 28 | - name: NUMAFLOW_DEBUG 29 | value: "true" # DO NOT forget the double quotes!!! 30 | sideInputs: 31 | - myticker 32 | - name: out 33 | sink: 34 | # A simple log printing sink 35 | log: {} 36 | edges: 37 | - from: in 38 | to: si-log 39 | - from: si-log 40 | to: out 41 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/mapstream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | 37 | CMD ["python", "mapstream_cat.py"] 38 | 39 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_session_reduce_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from collections.abc import AsyncIterator 3 | 4 | from pynumaflow_lite.session_reducer import Datum, Message 5 | 6 | 7 | class SessionReducer(metaclass=ABCMeta): 8 | """ 9 | Interface for session reduce handlers. A new instance will be created per keyed window. 10 | """ 11 | 12 | @abstractmethod 13 | async def session_reduce( 14 | self, keys: list[str], datums: AsyncIterator[Datum] 15 | ) -> AsyncIterator[Message]: 16 | """ 17 | Implement this handler; consume `datums` async iterable and yield Messages. 18 | This is called for each session window. 19 | """ 20 | pass 21 | 22 | @abstractmethod 23 | async def accumulator(self) -> bytes: 24 | """ 25 | Return the current state as bytes. Called when this session is merged with another. 26 | """ 27 | pass 28 | 29 | @abstractmethod 30 | async def merge_accumulator(self, accumulator: bytes) -> None: 31 | """ 32 | Merge the given accumulator (from another session) into this session's state. 33 | """ 34 | pass 35 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sourcetransform/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | 37 | CMD ["python", "sourcetransform_event_filter.py"] 38 | 39 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/sideinput/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | RUN pip 32 | 33 | RUN poetry lock 34 | RUN poetry install --no-cache --no-root && \ 35 | rm -rf ~/.cache/pypoetry/ 36 | RUN poetry add $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 37 | 38 | CMD ["poetry", "run", "python", "sideinput_example.py"] 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_sideinput_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.sideinputer import Response 3 | 4 | 5 | class SideInput(metaclass=ABCMeta): 6 | """ 7 | Provides an interface to write a SideInput retriever 8 | which will be exposed over a gRPC server. 9 | 10 | A SideInput is used for periodically retrieving data that can be 11 | broadcast to other vertices in the pipeline. 12 | """ 13 | 14 | async def __call__(self, *args, **kwargs): 15 | """ 16 | This allows to execute the handler function directly if 17 | class instance is sent as a callable. 18 | """ 19 | return await self.retrieve_handler(*args, **kwargs) 20 | 21 | @abstractmethod 22 | async def retrieve_handler(self) -> Response: 23 | """ 24 | Implement this handler function which implements the SideInput interface. 25 | 26 | This function is called every time the side input is requested. 27 | 28 | Returns: 29 | Response: Either Response.broadcast_message(value) to broadcast a value, 30 | or Response.no_broadcast_message() to skip broadcasting. 31 | """ 32 | pass 33 | 34 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/forward_message/example.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from pynumaflow.mapper import Messages, Message, Datum, MapServer, Mapper 4 | 5 | 6 | class MessageForwarder(Mapper): 7 | """ 8 | This is a class that inherits from the Mapper class. 9 | It implements the handler method that is called for each datum. 10 | """ 11 | 12 | def handler(self, keys: list[str], datum: Datum) -> Messages: 13 | val = datum.value 14 | _ = datum.event_time 15 | _ = datum.watermark 16 | return Messages(Message(value=val, keys=keys)) 17 | 18 | 19 | def my_handler(keys: list[str], datum: Datum) -> Messages: 20 | val = datum.value 21 | _ = datum.event_time 22 | _ = datum.watermark 23 | return Messages(Message(value=val, keys=keys)) 24 | 25 | 26 | if __name__ == "__main__": 27 | """ 28 | Use the class based approach or function based handler 29 | based on the env variable 30 | Both can be used and passed directly to the server class 31 | """ 32 | invoke = os.getenv("INVOKE", "func_handler") 33 | if invoke == "class": 34 | handler = MessageForwarder() 35 | else: 36 | handler = my_handler 37 | grpc_server = MapServer(handler) 38 | grpc_server.start() 39 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/multiproc_map/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-pipeline 5 | spec: 6 | limits: 7 | readBatchSize: 2 8 | vertices: 9 | - name: in 10 | source: 11 | # A self data generating source 12 | generator: 13 | rpu: 10 14 | duration: 1s 15 | - name: mult 16 | udf: 17 | container: 18 | image: quay.io/numaio/numaflow-python/multiproc:stable 19 | imagePullPolicy: Always 20 | env: 21 | - name: PYTHONDEBUG 22 | value: "true" 23 | - name: NUM_CPU_MULTIPROC 24 | value: "3" # DO NOT forget the double quotes!!! 25 | containerTemplate: 26 | resources: 27 | limits: 28 | cpu: "1" 29 | memory: 2Gi 30 | requests: 31 | cpu: "500m" 32 | memory: 1Gi 33 | env: 34 | - name: NUMAFLOW_DEBUG 35 | value: "true" # DO NOT forget the double quotes!!! 36 | 37 | - name: out 38 | sink: 39 | # A simple log printing sink 40 | log: {} 41 | edges: 42 | - from: in 43 | to: mult 44 | - from: mult 45 | to: out 46 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reduce/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | # NOTE: place the built wheel in this directory before building the image 32 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 33 | 34 | RUN poetry lock 35 | RUN poetry install --no-cache --no-root && \ 36 | rm -rf ~/.cache/pypoetry/ 37 | 38 | CMD ["python", "reduce_counter_class.py"] 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/reducestream/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | # NOTE: place the built wheel in this directory before building the image 32 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 33 | 34 | RUN poetry lock 35 | RUN poetry install --no-cache --no-root && \ 36 | rm -rf ~/.cache/pypoetry/ 37 | 38 | CMD ["python", "reducestream_counter.py"] 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/accumulator/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | # NOTE: place the built wheel in this directory before building the image 32 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 33 | 34 | RUN poetry lock 35 | RUN poetry install --no-cache --no-root && \ 36 | rm -rf ~/.cache/pypoetry/ 37 | 38 | CMD ["python", "accumulator_stream_sorter.py"] 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/session_reduce/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim-bullseye AS builder 2 | 3 | ENV PYTHONFAULTHANDLER=1 \ 4 | PYTHONUNBUFFERED=1 \ 5 | PYTHONHASHSEED=random \ 6 | PIP_NO_CACHE_DIR=on \ 7 | PIP_DISABLE_PIP_VERSION_CHECK=on \ 8 | PIP_DEFAULT_TIMEOUT=100 \ 9 | POETRY_HOME="/opt/poetry" \ 10 | POETRY_VIRTUALENVS_IN_PROJECT=true \ 11 | POETRY_NO_INTERACTION=1 \ 12 | PYSETUP_PATH="/opt/pysetup" 13 | 14 | ENV PATH="$POETRY_HOME/bin:$PATH" 15 | 16 | RUN apt-get update \ 17 | && apt-get install --no-install-recommends -y \ 18 | curl \ 19 | wget \ 20 | # deps for building python deps 21 | build-essential \ 22 | && apt-get install -y git \ 23 | && apt-get clean && rm -rf /var/lib/apt/lists/* \ 24 | && curl -sSL https://install.python-poetry.org | python3 - 25 | 26 | FROM builder AS udf 27 | 28 | WORKDIR $PYSETUP_PATH 29 | COPY ./ ./ 30 | 31 | # NOTE: place the built wheel in this directory before building the image 32 | RUN pip install $PYSETUP_PATH/pynumaflow_lite-0.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl 33 | 34 | RUN poetry lock 35 | RUN poetry install --no-cache --no-root && \ 36 | rm -rf ~/.cache/pypoetry/ 37 | 38 | CMD ["python", "session_reduce_counter_class.py"] 39 | 40 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/mapstream/flatmap_stream/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: simple-pipeline 5 | spec: 6 | limits: 7 | readBatchSize: 2 8 | vertices: 9 | - name: in 10 | source: 11 | # A self data generating source 12 | generator: 13 | rpu: 10 14 | duration: 1s 15 | - name: flatmap 16 | limits: 17 | readBatchSize: 1 18 | udf: 19 | container: 20 | image: quay.io/numaio/numaflow-python/map-flatmap-stream:stable 21 | imagePullPolicy: Always 22 | env: 23 | - name: PYTHONDEBUG 24 | value: "true" 25 | - name : INVOKE 26 | value: "func_handler" 27 | containerTemplate: 28 | resources: 29 | limits: 30 | cpu: "1" 31 | memory: 2Gi 32 | requests: 33 | cpu: "500m" 34 | memory: 1Gi 35 | env: 36 | - name: NUMAFLOW_DEBUG 37 | value: "true" # DO NOT forget the double quotes!!! 38 | - name: out 39 | sink: 40 | # A simple log printing sink 41 | log: {} 42 | edges: 43 | - from: in 44 | to: flatmap 45 | - from: flatmap 46 | to: out 47 | -------------------------------------------------------------------------------- /.github/workflows/coverage.yml: -------------------------------------------------------------------------------- 1 | name: Coverage 2 | 3 | on: 4 | push: 5 | branches: [ "main" ] 6 | pull_request: 7 | branches: [ "main" ] 8 | 9 | jobs: 10 | build: 11 | name: Codecov 12 | runs-on: ubuntu-latest 13 | defaults: 14 | run: 15 | working-directory: packages/pynumaflow/ 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9"] 20 | 21 | steps: 22 | - uses: actions/checkout@v3 23 | 24 | - name: Install poetry 25 | run: pipx install poetry 26 | 27 | - name: Set up Python ${{ matrix.python-version }} 28 | uses: actions/setup-python@v4 29 | with: 30 | python-version: ${{ matrix.python-version }} 31 | cache: 'poetry' 32 | 33 | - name: Install dependencies 34 | run: | 35 | poetry env use ${{ matrix.python-version }} 36 | poetry install --with dev --no-root 37 | 38 | - name: Run Coverage 39 | run: | 40 | poetry run pytest --cov-report=xml --cov=pynumaflow --cov-config .coveragerc tests/ -sq 41 | 42 | - name: Upload Coverage 43 | uses: codecov/codecov-action@v4 44 | with: 45 | token: ${{ secrets.CODECOV_TOKEN }} 46 | files: ./packages/pynumaflow/coverage.xml 47 | fail_ci_if_error: true 48 | verbose: true 49 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_accumulator_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from typing import AsyncIterator 3 | 4 | from pynumaflow_lite.accumulator import Datum, Message 5 | 6 | 7 | class Accumulator(metaclass=ABCMeta): 8 | """ 9 | Provides an interface to write an Accumulator 10 | which will be exposed over a gRPC server. 11 | """ 12 | 13 | def __call__(self, *args, **kwargs): 14 | """ 15 | This allows to execute the handler function directly if 16 | class instance is sent as a callable. 17 | """ 18 | return self.handler(*args, **kwargs) 19 | 20 | @abstractmethod 21 | async def handler(self, datums: AsyncIterator[Datum]) -> AsyncIterator[Message]: 22 | """ 23 | Accumulate can read unordered from the input stream and emit the ordered data to the output stream. 24 | Once the watermark (WM) of the output stream progresses, the data in WAL until that WM will be garbage collected. 25 | NOTE: A message can be silently dropped if need be, and it will be cleared from the WAL when the WM progresses. 26 | 27 | Args: 28 | datums: An async iterator of Datum objects 29 | 30 | Yields: 31 | Message objects to be sent to the next vertex 32 | """ 33 | pass 34 | 35 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_reducestreamer_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.reducestreamer import Datum, Message, Metadata 3 | from collections.abc import AsyncIterable, AsyncIterator 4 | 5 | 6 | class ReduceStreamer(metaclass=ABCMeta): 7 | """ 8 | Interface for reduce streaming handlers. A new instance will be created per window. 9 | 10 | Unlike regular Reducer which returns all messages at once, ReduceStreamer 11 | allows you to yield messages incrementally as an async iterator. 12 | """ 13 | 14 | def __call__(self, *args, **kwargs): 15 | return self.handler(*args, **kwargs) 16 | 17 | @abstractmethod 18 | async def handler( 19 | self, 20 | keys: list[str], 21 | datums: AsyncIterable[Datum], 22 | md: Metadata 23 | ) -> AsyncIterator[Message]: 24 | """ 25 | Implement this handler; consume `datums` async iterable and yield Messages incrementally. 26 | 27 | Args: 28 | keys: List of keys for this window 29 | datums: An async iterator of Datum objects 30 | md: Metadata containing window information 31 | 32 | Yields: 33 | Message objects to be sent to the next vertex 34 | """ 35 | pass 36 | 37 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/_sourcetransformer_dtypes.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from pynumaflow_lite.sourcetransformer import Datum, Messages 3 | 4 | 5 | class SourceTransformer(metaclass=ABCMeta): 6 | """ 7 | Provides an interface to write a SourceTransformer 8 | which will be exposed over a gRPC server. 9 | 10 | A SourceTransformer is used for transforming and assigning event time 11 | to input messages from a source. 12 | """ 13 | 14 | def __call__(self, *args, **kwargs): 15 | """ 16 | This allows to execute the handler function directly if 17 | class instance is sent as a callable. 18 | """ 19 | return self.handler(*args, **kwargs) 20 | 21 | @abstractmethod 22 | async def handler(self, keys: list[str], datum: Datum) -> Messages: 23 | """ 24 | Implement this handler function which implements the SourceTransformer interface. 25 | 26 | Args: 27 | keys: The keys associated with the message. 28 | datum: The input datum containing value, event_time, watermark, and headers. 29 | 30 | Returns: 31 | Messages: A collection of transformed messages with potentially modified 32 | event times and tags for conditional forwarding. 33 | """ 34 | pass 35 | 36 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/info/server.py: -------------------------------------------------------------------------------- 1 | import os 2 | from typing import Any 3 | 4 | from pynumaflow import setup_logging 5 | from pynumaflow.info.types import ServerInfo, EOF 6 | import json 7 | import logging 8 | 9 | _LOGGER = setup_logging(__name__) 10 | if os.getenv("PYTHONDEBUG"): 11 | _LOGGER.setLevel(logging.DEBUG) 12 | 13 | 14 | def write(server_info: ServerInfo, info_file: str): 15 | """ 16 | Write the ServerInfo to a file , shared with the client (numa container). 17 | 18 | args: 19 | serv: The ServerInfo object to be shared 20 | info_file: the shared file path 21 | """ 22 | try: 23 | data = server_info.__dict__ 24 | with open(info_file, "w+") as f: 25 | json.dump(data, f, ensure_ascii=False) 26 | f.write(EOF) 27 | except Exception as err: 28 | _LOGGER.critical("Could not write data to Info-Server %r", err, exc_info=True) 29 | raise err 30 | 31 | 32 | def get_metadata_env(envs: list[tuple[str, str]]) -> dict[str, Any]: 33 | """ 34 | Extract the environment var value from the provided list, 35 | and assign them to the given key in the metadata 36 | 37 | args: 38 | envs: List of tuples (key, env_var) 39 | """ 40 | meta = {} 41 | for key, val in envs: 42 | res = os.getenv(val, None) 43 | if res: 44 | meta[key] = res 45 | return meta 46 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/map_cat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | 4 | from pynumaflow_lite import mapper 5 | 6 | 7 | async def async_handler( 8 | keys: list[str], payload: mapper.Datum 9 | ) -> mapper.Messages: 10 | messages = mapper.Messages() 11 | 12 | if payload.value == b"bad world": 13 | messages.append(mapper.Message.message_to_drop()) 14 | else: 15 | messages.append(mapper.Message(payload.value, keys)) 16 | 17 | return messages 18 | 19 | 20 | async def start(f: callable): 21 | sock_file = "/tmp/var/run/numaflow/map.sock" 22 | server_info_file = "/tmp/var/run/numaflow/mapper-server-info" 23 | server = mapper.MapAsyncServer(sock_file, server_info_file) 24 | 25 | # Register loop-level signal handlers to request graceful shutdown 26 | loop = asyncio.get_running_loop() 27 | try: 28 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 29 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 30 | except (NotImplementedError, RuntimeError): 31 | pass 32 | 33 | try: 34 | await server.start(f) 35 | print("Shutting down gracefully...") 36 | except asyncio.CancelledError: 37 | try: 38 | server.stop() 39 | except Exception: 40 | pass 41 | return 42 | 43 | 44 | if __name__ == "__main__": 45 | asyncio.run(start(async_handler)) 46 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/common/metadata.proto: -------------------------------------------------------------------------------- 1 | /* 2 | Copyright 2022 The Numaproj Authors. 3 | 4 | Licensed under the Apache License, Version 2.0 (the "License"); 5 | you may not use this file except in compliance with the License. 6 | You may obtain a copy of the License at 7 | 8 | http://www.apache.org/licenses/LICENSE-2.0 9 | 10 | Unless required by applicable law or agreed to in writing, software 11 | distributed under the License is distributed on an "AS IS" BASIS, 12 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 | See the License for the specific language governing permissions and 14 | limitations under the License. 15 | */ 16 | 17 | syntax = "proto3"; 18 | option go_package = "github.com/numaproj/numaflow-go/pkg/apis/proto/common"; 19 | 20 | package common; 21 | 22 | // Metadata is the metadata of the message 23 | message Metadata { 24 | // PreviousVertex is the name of the previous vertex 25 | string previous_vertex = 1; 26 | // SystemMetadata is the system metadata of the message 27 | // Key of the map is the group name 28 | map sys_metadata = 2; 29 | // UserMetadata is the user metadata of the message 30 | // Key of the map is the group name 31 | map user_metadata = 3; 32 | } 33 | 34 | // KeyValueGroup is a group of key-value pairs for a given group. 35 | message KeyValueGroup { 36 | map key_value = 1; 37 | } -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/multiproc_map/example.py: -------------------------------------------------------------------------------- 1 | import math 2 | import os 3 | 4 | from pynumaflow.mapper import Messages, Message, Datum, Mapper, MapMultiprocServer 5 | 6 | 7 | def is_prime(n): 8 | for i in range(2, int(math.ceil(math.sqrt(n)))): 9 | if n % i == 0: 10 | return False 11 | else: 12 | return True 13 | 14 | 15 | class PrimeMap(Mapper): 16 | """ 17 | This class needs to be of type Mapper class to be used 18 | as a handler for the MapServer class. 19 | Example of a mapper that calculates if a number is prime. 20 | """ 21 | 22 | def handler(self, keys: list[str], datum: Datum) -> Messages: 23 | val = datum.value 24 | _ = datum.event_time 25 | _ = datum.watermark 26 | messages = Messages() 27 | for i in range(2, 100000): 28 | is_prime(i) 29 | messages.append(Message(val, keys=keys)) 30 | return messages 31 | 32 | 33 | if __name__ == "__main__": 34 | """ 35 | Example of starting a multiprocessing map vertex. 36 | """ 37 | # To set the env server_count value set the env variable 38 | # NUM_CPU_MULTIPROC="N" 39 | server_count = int(os.getenv("NUM_CPU_MULTIPROC", "2")) 40 | prime_class = PrimeMap() 41 | # Server count is the number of server processes to start 42 | grpc_server = MapMultiprocServer(prime_class, server_count=server_count) 43 | grpc_server.start() 44 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/session_reducer.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime as _dt 4 | from typing import Optional, List, Dict, Awaitable 5 | 6 | # Re-export the Python ABC for user convenience and typing 7 | from ._session_reduce_dtypes import SessionReducer as SessionReducer 8 | 9 | 10 | class Message: 11 | keys: Optional[List[str]] 12 | value: bytes 13 | tags: Optional[List[str]] 14 | 15 | def __init__( 16 | self, 17 | value: bytes, 18 | keys: Optional[List[str]] = ..., 19 | tags: Optional[List[str]] = ..., 20 | ) -> None: ... 21 | 22 | @staticmethod 23 | def message_to_drop() -> Message: ... 24 | 25 | 26 | class Datum: 27 | keys: List[str] 28 | value: bytes 29 | watermark: _dt.datetime 30 | eventtime: _dt.datetime 31 | headers: Dict[str, str] 32 | 33 | def __repr__(self) -> str: ... 34 | 35 | def __str__(self) -> str: ... 36 | 37 | 38 | class SessionReduceAsyncServer: 39 | def __init__( 40 | self, 41 | sock_file: str | None = ..., 42 | info_file: str | None = ..., 43 | ) -> None: ... 44 | 45 | def start(self, py_creator: type, init_args: tuple | None = ...) -> Awaitable[None]: ... 46 | 47 | def stop(self) -> None: ... 48 | 49 | 50 | __all__ = [ 51 | "Message", 52 | "Datum", 53 | "SessionReduceAsyncServer", 54 | "SessionReducer", 55 | ] 56 | 57 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/sideinputer.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Callable, Awaitable, Any 4 | 5 | # Re-export the Python ABC for user convenience and typing 6 | from ._sideinput_dtypes import SideInput as SideInput 7 | 8 | 9 | class Response: 10 | """Response from the side input retrieve handler.""" 11 | 12 | value: bytes 13 | broadcast: bool 14 | 15 | @staticmethod 16 | def broadcast_message(value: bytes) -> Response: 17 | """Create a response that broadcasts the given value.""" 18 | ... 19 | 20 | @staticmethod 21 | def no_broadcast_message() -> Response: 22 | """Create a response that does not broadcast any value.""" 23 | ... 24 | 25 | def __repr__(self) -> str: ... 26 | 27 | def __str__(self) -> str: ... 28 | 29 | 30 | class SideInputAsyncServer: 31 | """Async SideInput Server that can be started from Python.""" 32 | 33 | def __init__( 34 | self, 35 | sock_file: str | None = ..., 36 | info_file: str | None = ..., 37 | ) -> None: ... 38 | 39 | def start(self, py_sideinput: SideInput) -> Awaitable[None]: ... 40 | 41 | def stop(self) -> None: ... 42 | 43 | 44 | DIR_PATH: str 45 | """Default directory path where side input files are stored.""" 46 | 47 | __all__ = [ 48 | "Response", 49 | "SideInputAsyncServer", 50 | "SideInput", 51 | "DIR_PATH", 52 | ] 53 | 54 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/even_odd/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: local-kafka 5 | spec: 6 | vertices: 7 | - name: input 8 | source: 9 | kafka: 10 | brokers: 11 | - kafka-broker:9092 12 | topic: input-topic 13 | consumerGroup: test 14 | - name: even-or-odd 15 | udf: 16 | container: 17 | args: 18 | - python 19 | - example.py 20 | image: quay.io/numaio/numaflow-python/even-odd:stable 21 | imagePullPolicy: Always 22 | - name: even-sink 23 | scale: 24 | min: 1 25 | sink: 26 | # A simple log printing sink 27 | log: {} 28 | - name: odd-sink 29 | scale: 30 | min: 1 31 | sink: 32 | log: {} 33 | - name: number-sink 34 | scale: 35 | min: 1 36 | sink: 37 | log: {} 38 | edges: 39 | - from: input 40 | to: even-or-odd 41 | - from: even-or-odd 42 | to: even-sink 43 | conditions: 44 | tags: 45 | values: 46 | - even-tag 47 | - from: even-or-odd 48 | to: odd-sink 49 | conditions: 50 | tags: 51 | operator: or 52 | values: 53 | - odd-tag 54 | - from: even-or-odd 55 | to: number-sink 56 | conditions: 57 | tags: 58 | operator: or 59 | values: 60 | - even-tag 61 | - odd-tag 62 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/counter/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: even-odd-sum 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | http: {} 10 | - name: atoi 11 | scale: 12 | min: 3 13 | udf: 14 | container: 15 | # Tell the input number is even or odd, see https://github.com/numaproj/numaflow-go/tree/main/pkg/mapper/examples/even_odd 16 | image: quay.io/numaio/numaflow-go/map-even-odd:stable 17 | imagePullPolicy: Always 18 | - name: compute-sum 19 | udf: 20 | container: 21 | # compute the sum 22 | image: quay.io/numaio/numaflow-python/reduce-counter:stable 23 | imagePullPolicy: Always 24 | env: 25 | - name: PYTHONDEBUG 26 | value: "true" 27 | - name: INVOKE 28 | value: "class" 29 | groupBy: 30 | window: 31 | fixed: 32 | length: 60s 33 | keyed: true 34 | storage: 35 | persistentVolumeClaim: 36 | volumeSize: 10Gi 37 | accessMode: ReadWriteOnce 38 | partitions: 1 39 | - name: sink 40 | scale: 41 | min: 1 42 | sink: 43 | log: {} 44 | edges: 45 | - from: in 46 | to: atoi 47 | - from: atoi 48 | to: compute-sum 49 | - from: compute-sum 50 | to: sink 51 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/counter/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: even-odd-sum 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | http: {} 10 | - name: atoi 11 | scale: 12 | min: 3 13 | udf: 14 | container: 15 | # Tell the input number is even or odd, see https://github.com/numaproj/numaflow-go/tree/main/pkg/mapper/examples/even_odd 16 | image: quay.io/numaio/numaflow-go/map-even-odd:stable 17 | imagePullPolicy: Always 18 | - name: compute-sum 19 | udf: 20 | container: 21 | # compute the sum 22 | image: quay.io/numaio/numaflow-python/reduce-stream-counter:stable 23 | imagePullPolicy: Always 24 | env: 25 | - name: PYTHONDEBUG 26 | value: "true" 27 | - name: INVOKE 28 | value: "class" 29 | groupBy: 30 | window: 31 | fixed: 32 | length: 60s 33 | keyed: true 34 | storage: 35 | persistentVolumeClaim: 36 | volumeSize: 10Gi 37 | accessMode: ReadWriteOnce 38 | partitions: 1 39 | - name: sink 40 | scale: 41 | min: 1 42 | sink: 43 | log: {} 44 | edges: 45 | - from: in 46 | to: atoi 47 | - from: atoi 48 | to: compute-sum 49 | - from: compute-sum 50 | to: sink 51 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/sum/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: even-odd-sum 5 | spec: 6 | vertices: 7 | - name: in 8 | source: 9 | http: {} 10 | - name: atoi 11 | scale: 12 | min: 3 13 | udf: 14 | container: 15 | # Tell the input number is even or odd, see https://github.com/numaproj/numaflow-go/tree/main/pkg/mapper/examples/even_odd 16 | image: quay.io/numaio/numaflow-go/map-even-odd:stable 17 | imagePullPolicy: Always 18 | - name: compute-sum 19 | udf: 20 | container: 21 | # compute the sum 22 | image: quay.io/numaio/numaflow-python/reduce-stream-sum:stable 23 | imagePullPolicy: Always 24 | env: 25 | - name: PYTHONDEBUG 26 | value: "true" 27 | - name: INVOKE 28 | value: "class" 29 | groupBy: 30 | window: 31 | fixed: 32 | length: 60s 33 | streaming: true 34 | keyed: true 35 | storage: 36 | persistentVolumeClaim: 37 | volumeSize: 10Gi 38 | accessMode: ReadWriteOnce 39 | partitions: 1 40 | - name: sink 41 | scale: 42 | min: 1 43 | sink: 44 | log: {} 45 | edges: 46 | - from: in 47 | to: atoi 48 | - from: atoi 49 | to: compute-sum 50 | - from: compute-sum 51 | to: sink 52 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/batchmap/flatmap/example.py: -------------------------------------------------------------------------------- 1 | from collections.abc import AsyncIterable 2 | 3 | from pynumaflow.batchmapper import ( 4 | Message, 5 | Datum, 6 | BatchMapper, 7 | BatchMapAsyncServer, 8 | BatchResponses, 9 | BatchResponse, 10 | ) 11 | 12 | 13 | class Flatmap(BatchMapper): 14 | """ 15 | This is a class that inherits from the BatchMapper class. 16 | It implements a flatmap operation over a batch of input messages 17 | """ 18 | 19 | async def handler( 20 | self, 21 | datums: AsyncIterable[Datum], 22 | ) -> BatchResponses: 23 | batch_responses = BatchResponses() 24 | async for datum in datums: 25 | val = datum.value 26 | _ = datum.event_time 27 | _ = datum.watermark 28 | strs = val.decode("utf-8").split(",") 29 | batch_response = BatchResponse.from_id(datum.id) 30 | if len(strs) == 0: 31 | batch_response.append(Message.to_drop()) 32 | else: 33 | for s in strs: 34 | batch_response.append(Message(str.encode(s))) 35 | batch_responses.append(batch_response) 36 | 37 | return batch_responses 38 | 39 | 40 | if __name__ == "__main__": 41 | """ 42 | This example shows how to use the Batch Map Flatmap. 43 | We use a class as handler, but a function can be used as well. 44 | """ 45 | grpc_server = BatchMapAsyncServer(Flatmap()) 46 | grpc_server.start() 47 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: help build develop stubgen clean test test-rust 2 | 3 | # Default Python package/module name 4 | MODULE ?= pynumaflow_lite 5 | 6 | # Optional args to pass through to cargo test, e.g., ARGS="--lib pyiterables::tests::py_async_iter_stream_yields_incrementally" 7 | ARGS ?= 8 | 9 | 10 | help: 11 | @echo "Targets:" 12 | @echo " build - cargo build the Rust library" 13 | @echo " develop - maturin develop (install in current Python env)" 14 | @echo " test - run end-to-end pytest (depends on develop)" 15 | @echo " test-rust - cargo test with PYTHONHOME set; pass args via ARGS=\"...\"" 16 | 17 | @echo " clean - cargo clean" 18 | 19 | build: 20 | cargo build 21 | 22 | # Installs the extension into the active Python environment. 23 | # You can then discover the installed .so path to run stubgen against it if preferred. 24 | develop: 25 | maturin develop 26 | 27 | # Run pytest end-to-end tests. Assumes a working Python env with pytest installed. 28 | # Example: (cd pynumaflow-lite && make test) 29 | # Note: we do not install pytest here to avoid mutating global envs. 30 | test: develop 31 | pytest -v 32 | 33 | 34 | # Run cargo tests with PYTHONHOME pointed at base_prefix so embedded CPython finds stdlib 35 | # Usage examples: 36 | # make test-rust ARGS="--lib" 37 | # make test-rust ARGS="--lib pyiterables::tests::py_async_iter_stream_yields_incrementally" 38 | test-rust: 39 | @export PYTHONHOME="$(shell python -c 'import sys; print(sys.base_prefix)')" && \ 40 | cargo test $(ARGS) 41 | 42 | 43 | clean: 44 | cargo clean 45 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/mapstreamer.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Optional, List, Dict, Callable, Awaitable, Any, AsyncIterator 4 | import datetime as _dt 5 | 6 | # Re-export the Python ABC for user convenience and typing 7 | from ._stream_dtypes import MapStreamer as MapStreamer 8 | 9 | 10 | class Message: 11 | keys: Optional[List[str]] 12 | value: bytes 13 | tags: Optional[List[str]] 14 | 15 | def __init__( 16 | self, 17 | value: bytes, 18 | keys: Optional[List[str]] = ..., 19 | tags: Optional[List[str]] = ..., 20 | ) -> None: ... 21 | 22 | @staticmethod 23 | def message_to_drop() -> Message: ... 24 | 25 | @staticmethod 26 | def to_drop() -> Message: ... 27 | 28 | 29 | class Datum: 30 | keys: List[str] 31 | value: bytes 32 | watermark: _dt.datetime 33 | eventtime: _dt.datetime 34 | headers: Dict[str, str] 35 | 36 | def __repr__(self) -> str: ... 37 | 38 | def __str__(self) -> str: ... 39 | 40 | 41 | class MapStreamAsyncServer: 42 | def __init__( 43 | self, 44 | sock_file: str | None = ..., 45 | info_file: str | None = ..., 46 | ) -> None: ... 47 | 48 | def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... 49 | 50 | def stop(self) -> None: ... 51 | 52 | 53 | class MapStreamer: 54 | async def handler(self, keys: list[str], datum: Datum) -> AsyncIterator[Message]: ... 55 | 56 | 57 | __all__ = [ 58 | "Message", 59 | "Datum", 60 | "MapStreamAsyncServer", 61 | "MapStreamer", 62 | ] 63 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/batchmap_cat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import collections.abc 3 | 4 | import signal 5 | 6 | from pynumaflow_lite import batchmapper 7 | 8 | 9 | async def async_handler(batch: collections.abc.AsyncIterator[batchmapper.Datum]) -> batchmapper.BatchResponses: 10 | responses = batchmapper.BatchResponses() 11 | async for d in batch: 12 | resp = batchmapper.BatchResponse.from_id(d.id) 13 | if d.value == b"bad world": 14 | resp.append(batchmapper.Message.message_to_drop()) 15 | continue 16 | 17 | resp.append(batchmapper.Message(d.value, d.keys)) 18 | responses.append(resp) 19 | return responses 20 | 21 | 22 | async def start(f: callable): 23 | sock_file = "/tmp/var/run/numaflow/batchmap.sock" 24 | server_info_file = "/tmp/var/run/numaflow/mapper-server-info" 25 | server = batchmapper.BatchMapAsyncServer(sock_file, server_info_file) 26 | 27 | # Register loop-level signal handlers to request graceful shutdown 28 | loop = asyncio.get_running_loop() 29 | try: 30 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 31 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 32 | except (NotImplementedError, RuntimeError): 33 | pass 34 | 35 | try: 36 | await server.start(f) 37 | print("Shutting down gracefully...") 38 | except asyncio.CancelledError: 39 | try: 40 | server.stop() 41 | except Exception: 42 | pass 43 | return 44 | 45 | 46 | if __name__ == "__main__": 47 | asyncio.run(start(async_handler)) 48 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/mapstream_cat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | from collections.abc import AsyncIterator 4 | 5 | from pynumaflow_lite import mapstreamer 6 | from pynumaflow_lite.mapstreamer import Message 7 | 8 | 9 | async def async_handler(keys: list[str], datum: mapstreamer.Datum) -> AsyncIterator[Message]: 10 | """ 11 | A handler that splits the input datum value into multiple strings by `,` separator and 12 | emits them as a stream. 13 | """ 14 | parts = datum.value.decode("utf-8").split(",") 15 | if not parts: 16 | yield Message.to_drop() 17 | return 18 | for s in parts: 19 | yield Message(s.encode(), keys) 20 | 21 | 22 | async def start(f: callable): 23 | sock_file = "/tmp/var/run/numaflow/mapstream.sock" 24 | server_info_file = "/tmp/var/run/numaflow/mapper-server-info" 25 | server = mapstreamer.MapStreamAsyncServer(sock_file, server_info_file) 26 | 27 | # Register loop-level signal handlers to request graceful shutdown 28 | loop = asyncio.get_running_loop() 29 | try: 30 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 31 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 32 | except (NotImplementedError, RuntimeError): 33 | pass 34 | 35 | try: 36 | await server.start(f) 37 | print("Shutting down gracefully...") 38 | except asyncio.CancelledError: 39 | try: 40 | server.stop() 41 | except Exception: 42 | pass 43 | return 44 | 45 | 46 | if __name__ == "__main__": 47 | asyncio.run(start(async_handler)) 48 | 49 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/mapstream/flatmap_stream/example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections.abc import AsyncIterable 3 | from pynumaflow.mapstreamer import Message, Datum, MapStreamAsyncServer, MapStreamer 4 | 5 | 6 | class FlatMapStream(MapStreamer): 7 | async def handler(self, keys: list[str], datum: Datum) -> AsyncIterable[Message]: 8 | """ 9 | A handler that splits the input datum value into multiple strings by `,` separator and 10 | emits them as a stream. 11 | """ 12 | val = datum.value 13 | _ = datum.event_time 14 | _ = datum.watermark 15 | strs = val.decode("utf-8").split(",") 16 | 17 | if len(strs) == 0: 18 | yield Message.to_drop() 19 | return 20 | for s in strs: 21 | yield Message(str.encode(s)) 22 | 23 | 24 | async def map_stream_handler(_: list[str], datum: Datum) -> AsyncIterable[Message]: 25 | """ 26 | A handler that splits the input datum value into multiple strings by `,` separator and 27 | emits them as a stream. 28 | """ 29 | val = datum.value 30 | _ = datum.event_time 31 | _ = datum.watermark 32 | strs = val.decode("utf-8").split(",") 33 | 34 | if len(strs) == 0: 35 | yield Message.to_drop() 36 | return 37 | for s in strs: 38 | yield Message(str.encode(s)) 39 | 40 | 41 | if __name__ == "__main__": 42 | invoke = os.getenv("INVOKE", "func_handler") 43 | if invoke == "class": 44 | handler = FlatMapStream() 45 | else: 46 | handler = map_stream_handler 47 | grpc_server = MapStreamAsyncServer(handler) 48 | grpc_server.start() 49 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/asyncio_reduce/async_exec.py: -------------------------------------------------------------------------------- 1 | import aiohttp 2 | import aiorun 3 | import asyncio 4 | import json 5 | import time 6 | from collections.abc import AsyncIterable 7 | 8 | from pynumaflow import setup_logging 9 | from pynumaflow.reducer import ( 10 | Messages, 11 | Message, 12 | Datum, 13 | Metadata, 14 | AsyncReducer, 15 | ) 16 | 17 | _LOGGER = setup_logging(__name__) 18 | 19 | 20 | async def http_request(session, url): 21 | async with session.get(url) as resp: 22 | result = await resp.read() 23 | try: 24 | res_json = json.loads(result) 25 | return res_json["message"] 26 | except Exception as e: 27 | _LOGGER.error("HTTP request error: %s", e) 28 | return "Error" 29 | 30 | 31 | async def reduce_handler(keys: list[str], datums: AsyncIterable[Datum], md: Metadata) -> Messages: 32 | interval_window = md.interval_window 33 | async with aiohttp.ClientSession() as session: 34 | tasks = [] 35 | start_time = time.time() 36 | async for _ in datums: 37 | url = "http://host.docker.internal:9888/ping" 38 | tasks.append(http_request(session, url)) 39 | await asyncio.gather(*tasks) 40 | end_time = time.time() 41 | 42 | msg = ( 43 | f"batch_time:{end_time-start_time} interval_window_start:{interval_window.start} " 44 | f"interval_window_end:{interval_window.end}" 45 | ) 46 | return Messages(Message(str.encode(msg), keys=keys)) 47 | 48 | 49 | if __name__ == "__main__": 50 | grpc_server = AsyncReducer(handler=reduce_handler) 51 | aiorun.run(grpc_server.start()) 52 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/async_log/example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections.abc import AsyncIterable 3 | from pynumaflow.sinker import Datum, Responses, Response, Sinker 4 | from pynumaflow.sinker import SinkAsyncServer 5 | import logging 6 | 7 | logging.basicConfig(level=logging.DEBUG) 8 | _LOGGER = logging.getLogger(__name__) 9 | 10 | 11 | class UserDefinedSink(Sinker): 12 | async def handler(self, datums: AsyncIterable[Datum]) -> Responses: 13 | responses = Responses() 14 | async for msg in datums: 15 | _LOGGER.info("User Defined Sink %s", msg.value.decode("utf-8")) 16 | responses.append(Response.as_success(msg.id)) 17 | # if we are not able to write to sink and if we have a fallback sink configured 18 | # we can use Response.as_fallback(msg.id)) to write the message to fallback sink 19 | return responses 20 | 21 | 22 | async def udsink_handler(datums: AsyncIterable[Datum]) -> Responses: 23 | responses = Responses() 24 | async for msg in datums: 25 | _LOGGER.info("User Defined Sink %s", msg.value.decode("utf-8")) 26 | responses.append(Response.as_success(msg.id)) 27 | # if we are not able to write to sink and if we have a fallback sink configured 28 | # we can use Response.as_fallback(msg.id)) to write the message to fallback sink 29 | return responses 30 | 31 | 32 | if __name__ == "__main__": 33 | invoke = os.getenv("INVOKE", "func_handler") 34 | if invoke == "class": 35 | sink_handler = UserDefinedSink() 36 | else: 37 | sink_handler = udsink_handler 38 | grpc_server = SinkAsyncServer(sink_handler) 39 | grpc_server.start() 40 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sink/log/example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections.abc import Iterator 3 | from pynumaflow.sinker import Datum, Responses, Response, SinkServer 4 | from pynumaflow.sinker import Sinker 5 | import logging 6 | 7 | logging.basicConfig(level=logging.DEBUG) 8 | _LOGGER = logging.getLogger(__name__) 9 | 10 | 11 | class UserDefinedSink(Sinker): 12 | def handler(self, datums: Iterator[Datum]) -> Responses: 13 | responses = Responses() 14 | for msg in datums: 15 | _LOGGER.info("User Defined Sink %s", msg.value.decode("utf-8")) 16 | responses.append(Response.as_success(msg.id)) 17 | # if we are not able to write to sink and if we have a fallback sink configured 18 | # we can use Response.as_fallback(msg.id)) to write the message to fallback sink 19 | return responses 20 | 21 | 22 | def udsink_handler(datums: Iterator[Datum]) -> Responses: 23 | responses = Responses() 24 | for msg in datums: 25 | _LOGGER.info( 26 | "User Defined Sink: Payload %s , Headers %s", msg.value.decode("utf-8"), msg.headers 27 | ) 28 | responses.append(Response.as_success(msg.id)) 29 | # if we are not able to write to sink and if we have a fallback sink configured 30 | # we can use Response.as_fallback(msg.id)) to write the message to fallback sink 31 | return responses 32 | 33 | 34 | if __name__ == "__main__": 35 | invoke = os.getenv("INVOKE", "func_handler") 36 | if invoke == "class": 37 | sink_handler = UserDefinedSink() 38 | else: 39 | sink_handler = udsink_handler 40 | grpc_server = SinkServer(sink_handler) 41 | grpc_server.start() 42 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/mapper.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Optional, List, Dict, Callable, Awaitable, Any 4 | import datetime as _dt 5 | 6 | # Re-export the Python ABC for user convenience and typing 7 | from ._map_dtypes import Mapper as Mapper 8 | 9 | 10 | class Messages: 11 | def __init__(self) -> None: ... 12 | 13 | def append(self, message: Message) -> None: ... 14 | 15 | def __repr__(self) -> str: ... 16 | 17 | def __str__(self) -> str: ... 18 | 19 | 20 | class Message: 21 | keys: Optional[List[str]] 22 | value: bytes 23 | tags: Optional[List[str]] 24 | 25 | def __init__( 26 | self, 27 | value: bytes, 28 | keys: Optional[List[str]] = ..., 29 | tags: Optional[List[str]] = ..., 30 | ) -> None: ... 31 | 32 | @staticmethod 33 | def message_to_drop() -> Message: ... 34 | 35 | 36 | class Datum: 37 | # Read-only attributes provided by the extension 38 | keys: List[str] 39 | value: bytes 40 | watermark: _dt.datetime 41 | eventtime: _dt.datetime 42 | headers: Dict[str, str] 43 | 44 | def __repr__(self) -> str: ... 45 | 46 | def __str__(self) -> str: ... 47 | 48 | 49 | class MapAsyncServer: 50 | def __init__( 51 | self, 52 | sock_file: str | None = ..., 53 | info_file: str | None = ..., 54 | ) -> None: ... 55 | 56 | def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... 57 | 58 | def stop(self) -> None: ... 59 | 60 | 61 | # Simple utility function exposed by the extension 62 | 63 | 64 | __all__ = [ 65 | "Messages", 66 | "Message", 67 | "Datum", 68 | "MapAsyncServer", 69 | "Mapper", 70 | ] 71 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "pynumaflow-lite" 3 | version = "0.1.0" 4 | edition = "2024" 5 | 6 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 7 | [lib] 8 | name = "pynumaflow_lite" 9 | crate-type = ["cdylib", "rlib"] 10 | 11 | [dependencies] 12 | numaflow = { git = "https://github.com/numaproj/numaflow-rs.git", rev = "44ee3068fcf7088ff265df7ae7ce1881a40694ff" } 13 | pyo3 = { version = "0.27.1", features = ["chrono", "experimental-inspect"] } 14 | tokio = "1.47.1" 15 | tonic = "0.14.2" 16 | tokio-stream = "0.1.17" 17 | tower = "0.5.2" 18 | hyper-util = "0.1.16" 19 | prost-types = "0.14.1" 20 | chrono = "0.4.42" 21 | pyo3-async-runtimes = { version = "0.27.0", features = ["tokio-runtime"] } 22 | futures-core = "0.3.31" 23 | pin-project = "1.1.10" 24 | 25 | ## Binaries for testing 26 | 27 | [[bin]] 28 | name = "test_map" 29 | path = "tests/bin/map.rs" 30 | 31 | [[bin]] 32 | name = "test_batchmap" 33 | path = "tests/bin/batchmap.rs" 34 | 35 | [[bin]] 36 | name = "test_mapstream" 37 | path = "tests/bin/mapstream.rs" 38 | 39 | [[bin]] 40 | name = "test_reduce" 41 | path = "tests/bin/reduce.rs" 42 | 43 | [[bin]] 44 | name = "test_session_reduce" 45 | path = "tests/bin/session_reduce.rs" 46 | 47 | [[bin]] 48 | name = "test_accumulator" 49 | path = "tests/bin/accumulator.rs" 50 | 51 | [[bin]] 52 | name = "test_reducestream" 53 | path = "tests/bin/reducestream.rs" 54 | 55 | [[bin]] 56 | name = "test_sink" 57 | path = "tests/bin/sink.rs" 58 | 59 | [[bin]] 60 | name = "test_source" 61 | path = "tests/bin/source.rs" 62 | 63 | [[bin]] 64 | name = "test_sourcetransform" 65 | path = "tests/bin/sourcetransform.rs" 66 | 67 | [[bin]] 68 | name = "test_sideinput" 69 | path = "tests/bin/sideinput.rs" 70 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/accumulator/streamsorter/README.md: -------------------------------------------------------------------------------- 1 | # Stream Sorter 2 | 3 | An example User Defined Function that sorts the incoming stream by event time. 4 | 5 | ### Applying the Pipeline 6 | 7 | To apply the pipeline, use the following command: 8 | 9 | ```shell 10 | kubectl apply -f pipeline.yaml 11 | ``` 12 | 13 | ### Publish messages 14 | 15 | Port-forward the HTTP endpoint, and make POST requests using curl. Remember to replace xxxx with the appropriate pod names. 16 | 17 | ```shell 18 | kubectl port-forward stream-sorter-http-one-0-xxxx 8444:8443 19 | 20 | # Post data to the HTTP endpoint 21 | curl -kq -X POST -d "101" https://localhost:8444/vertices/http-one -H "X-Numaflow-Event-Time: 60000" 22 | curl -kq -X POST -d "102" https://localhost:8444/vertices/http-one -H "X-Numaflow-Event-Time: 61000" 23 | curl -kq -X POST -d "103" https://localhost:8444/vertices/http-one -H "X-Numaflow-Event-Time: 62000" 24 | curl -kq -X POST -d "104" https://localhost:8444/vertices/http-one -H "X-Numaflow-Event-Time: 63000" 25 | ``` 26 | 27 | ```shell 28 | kubectl port-forward stream-sorter-http-two-0-xxxx 8445:8443 29 | 30 | # Post data to the HTTP endpoint 31 | curl -kq -X POST -d "105" https://localhost:8445/vertices/http-two -H "X-Numaflow-Event-Time: 70000" 32 | curl -kq -X POST -d "106" https://localhost:8445/vertices/http-two -H "X-Numaflow-Event-Time: 71000" 33 | curl -kq -X POST -d "107" https://localhost:8445/vertices/http-two -H "X-Numaflow-Event-Time: 72000" 34 | curl -kq -X POST -d "108" https://localhost:8445/vertices/http-two -H "X-Numaflow-Event-Time: 73000" 35 | ``` 36 | 37 | ### Verify the output 38 | 39 | ```shell 40 | kubectl logs -f stream-sorter-log-sink-0-xxxx 41 | ``` 42 | 43 | The output should be sorted by event time. -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/counter/example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections.abc import AsyncIterable 3 | 4 | from pynumaflow.reducestreamer import ( 5 | Message, 6 | Datum, 7 | Metadata, 8 | ReduceStreamAsyncServer, 9 | ReduceStreamer, 10 | ) 11 | from pynumaflow.shared.asynciter import NonBlockingIterator 12 | 13 | 14 | class ReduceCounter(ReduceStreamer): 15 | def __init__(self, counter): 16 | self.counter = counter 17 | 18 | async def handler( 19 | self, 20 | keys: list[str], 21 | datums: AsyncIterable[Datum], 22 | output: NonBlockingIterator, 23 | md: Metadata, 24 | ): 25 | async for _ in datums: 26 | self.counter += 1 27 | if self.counter > 10: 28 | msg = f"counter:{self.counter}" 29 | # NOTE: this is returning results because we have seen all the data 30 | # use this only if you really need this feature because your next vertex 31 | # will get both early result and final results and it should be able to 32 | # handle both the scenarios. 33 | await output.put(Message(str.encode(msg), keys=keys)) 34 | self.counter = 0 35 | msg = f"counter:{self.counter}" 36 | await output.put(Message(str.encode(msg), keys=keys)) 37 | 38 | 39 | if __name__ == "__main__": 40 | invoke = os.getenv("INVOKE", "class") 41 | if invoke == "class": 42 | # Here we are using the class instance as the reducer_instance 43 | # which will be used to invoke the handler function. 44 | # We are passing the init_args for the class instance. 45 | grpc_server = ReduceStreamAsyncServer(ReduceCounter, init_args=(0,)) 46 | grpc_server.start() 47 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/proto/sideinput/sideinput.proto: -------------------------------------------------------------------------------- 1 | syntax = "proto3"; 2 | 3 | import "google/protobuf/empty.proto"; 4 | import "pynumaflow/proto/common/metadata.proto"; 5 | 6 | package sideinput.v1; 7 | 8 | // SideInput is the gRPC service for user-defined Side Inputs. 9 | // It is used to propagate changes in the values of the provided Side Inputs 10 | // which allows access to slow updated data or configuration without needing to retrieve 11 | // it during each message processing. 12 | // Through this service we should should be able to:- 13 | // 1) Invoke retrieval request for a single Side Input parameter, which in turn should 14 | // check for updates and return its latest value. 15 | // 2) Provide a health check endpoint to indicate whether the service is ready to be used. 16 | service SideInput { 17 | // RetrieveSideInput is the endpoint to retrieve the latest value of a given Side Input. 18 | rpc RetrieveSideInput(google.protobuf.Empty) returns (SideInputResponse); 19 | 20 | // IsReady is the health check endpoint to indicate whether the service is ready to be used. 21 | rpc IsReady(google.protobuf.Empty) returns (ReadyResponse); 22 | } 23 | 24 | /** 25 | * SideInputResponse represents a response to a given side input retrieval request. 26 | */ 27 | message SideInputResponse { 28 | // value represents the latest value of the side input payload 29 | bytes value = 1; 30 | // noBroadcast indicates whether the side input value should be broadcasted to all 31 | // True if value should not be broadcasted 32 | // False if value should be broadcasted 33 | bool no_broadcast = 2; 34 | // metadata of the message 35 | common.Metadata metadata = 3; 36 | } 37 | 38 | /** 39 | * ReadyResponse is the health check result. 40 | */ 41 | message ReadyResponse { 42 | bool ready = 1; 43 | } -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/mapstream/mapstream_cat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | from collections.abc import AsyncIterator 4 | 5 | from pynumaflow_lite import mapstreamer 6 | from pynumaflow_lite.mapstreamer import Message 7 | 8 | 9 | class SimpleStreamCat(mapstreamer.MapStreamer): 10 | async def handler(self, keys: list[str], datum: mapstreamer.Datum) -> AsyncIterator[Message]: 11 | parts = datum.value.decode("utf-8").split(",") 12 | if not parts: 13 | yield Message.to_drop() 14 | return 15 | for s in parts: 16 | yield Message(s.encode(), keys) 17 | 18 | 19 | # Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. 20 | signal.signal(signal.SIGINT, signal.default_int_handler) 21 | try: 22 | signal.signal(signal.SIGTERM, signal.SIG_DFL) 23 | except AttributeError: 24 | pass 25 | 26 | 27 | async def start(f: callable): 28 | # Use default socket/info file locations; no explicit sock file passed 29 | server = mapstreamer.MapStreamAsyncServer() 30 | 31 | # Register loop-level signal handlers so we control shutdown and avoid asyncio.run noise. 32 | loop = asyncio.get_running_loop() 33 | try: 34 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 35 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 36 | except (NotImplementedError, RuntimeError): 37 | pass 38 | 39 | try: 40 | await server.start(f) 41 | print("Shutting down gracefully...") 42 | except asyncio.CancelledError: 43 | try: 44 | server.stop() 45 | except Exception: 46 | pass 47 | return 48 | 49 | 50 | if __name__ == "__main__": 51 | async_handler = SimpleStreamCat() 52 | asyncio.run(start(async_handler)) 53 | 54 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/sink_log.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import collections.abc 3 | import logging 4 | import signal 5 | 6 | from pynumaflow_lite import sinker 7 | 8 | # Configure logging 9 | logging.basicConfig(level=logging.INFO) 10 | _LOGGER = logging.getLogger(__name__) 11 | 12 | 13 | async def async_handler(datums: collections.abc.AsyncIterator[sinker.Datum]) -> sinker.Responses: 14 | """ 15 | Simple log sink that logs each message and returns success responses. 16 | """ 17 | responses = sinker.Responses() 18 | async for msg in datums: 19 | _LOGGER.info("User Defined Sink %s", msg.value.decode("utf-8")) 20 | responses.append(sinker.Response.as_success(msg.id)) 21 | # if we are not able to write to sink and if we have a fallback sink configured 22 | # we can use Response.as_fallback(msg.id) to write the message to fallback sink 23 | return responses 24 | 25 | 26 | async def start(f: callable): 27 | sock_file = "/tmp/var/run/numaflow/sink.sock" 28 | server_info_file = "/tmp/var/run/numaflow/sinker-server-info" 29 | server = sinker.SinkAsyncServer(sock_file, server_info_file) 30 | 31 | # Register loop-level signal handlers to request graceful shutdown 32 | loop = asyncio.get_running_loop() 33 | try: 34 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 35 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 36 | except (NotImplementedError, RuntimeError): 37 | pass 38 | 39 | try: 40 | await server.start(f) 41 | print("Shutting down gracefully...") 42 | except asyncio.CancelledError: 43 | try: 44 | server.stop() 45 | except Exception: 46 | pass 47 | return 48 | 49 | 50 | if __name__ == "__main__": 51 | asyncio.run(start(async_handler)) 52 | 53 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/sourcetransformer.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Optional, List, Dict, Callable, Awaitable, Any 4 | import datetime as _dt 5 | 6 | # Re-export the Python ABC for user convenience and typing 7 | from ._sourcetransformer_dtypes import SourceTransformer as SourceTransformer 8 | 9 | 10 | class Messages: 11 | def __init__(self) -> None: ... 12 | 13 | def append(self, message: Message) -> None: ... 14 | 15 | def __repr__(self) -> str: ... 16 | 17 | def __str__(self) -> str: ... 18 | 19 | 20 | class Message: 21 | keys: Optional[List[str]] 22 | value: bytes 23 | event_time: _dt.datetime 24 | tags: Optional[List[str]] 25 | 26 | def __init__( 27 | self, 28 | value: bytes, 29 | event_time: _dt.datetime, 30 | keys: Optional[List[str]] = ..., 31 | tags: Optional[List[str]] = ..., 32 | ) -> None: ... 33 | 34 | @staticmethod 35 | def message_to_drop(event_time: _dt.datetime) -> Message: ... 36 | 37 | 38 | class Datum: 39 | # Read-only attributes provided by the extension 40 | keys: List[str] 41 | value: bytes 42 | watermark: _dt.datetime 43 | event_time: _dt.datetime 44 | headers: Dict[str, str] 45 | 46 | def __repr__(self) -> str: ... 47 | 48 | def __str__(self) -> str: ... 49 | 50 | 51 | class SourceTransformAsyncServer: 52 | def __init__( 53 | self, 54 | sock_file: str | None = ..., 55 | info_file: str | None = ..., 56 | ) -> None: ... 57 | 58 | def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... 59 | 60 | def stop(self) -> None: ... 61 | 62 | 63 | __all__ = [ 64 | "Messages", 65 | "Message", 66 | "Datum", 67 | "SourceTransformAsyncServer", 68 | "SourceTransformer", 69 | ] 70 | 71 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/map/even_odd/Makefile.optimized: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/even-odd:${TAG} 4 | DOCKER_FILE_PATH = examples/map/even_odd/Dockerfile.optimized 5 | BASE_IMAGE_NAME = numaflow-python-base 6 | 7 | .PHONY: base-image 8 | base-image: 9 | @echo "Building shared base image..." 10 | docker build -f Dockerfile.base -t ${BASE_IMAGE_NAME} . 11 | 12 | .PHONY: update 13 | update: 14 | poetry update -vv 15 | 16 | .PHONY: image-push 17 | image-push: base-image update 18 | cd ../../../ && docker buildx build \ 19 | -f ${DOCKER_FILE_PATH} \ 20 | -t ${IMAGE_REGISTRY} \ 21 | --platform linux/amd64,linux/arm64 . --push 22 | 23 | .PHONY: image 24 | image: base-image update 25 | cd ../../../ && docker build \ 26 | -f ${DOCKER_FILE_PATH} \ 27 | -t ${IMAGE_REGISTRY} . 28 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 29 | 30 | .PHONY: image-fast 31 | image-fast: update 32 | @echo "Building with shared base image (fastest option)..." 33 | cd ../../../ && docker build \ 34 | -f examples/map/even_odd/Dockerfile.shared-base \ 35 | -t ${IMAGE_REGISTRY} . 36 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 37 | 38 | .PHONY: clean 39 | clean: 40 | docker rmi ${BASE_IMAGE_NAME} 2>/dev/null || true 41 | docker rmi ${IMAGE_REGISTRY} 2>/dev/null || true 42 | 43 | .PHONY: help 44 | help: 45 | @echo "Available targets:" 46 | @echo " base-image - Build the shared base image with pynumaflow" 47 | @echo " image - Build UDF image with optimized multi-stage build" 48 | @echo " image-fast - Build UDF image using shared base (fastest)" 49 | @echo " image-push - Build and push multi-platform image" 50 | @echo " update - Update poetry dependencies" 51 | @echo " clean - Remove built images" 52 | @echo " help - Show this help message" -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/batchmapper.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | from typing import Optional, List, Dict, Callable, Awaitable, Any, AsyncIterator 4 | import datetime as _dt 5 | 6 | 7 | class Message: 8 | keys: Optional[List[str]] 9 | value: bytes 10 | tags: Optional[List[str]] 11 | 12 | def __init__( 13 | self, 14 | value: bytes, 15 | keys: Optional[List[str]] = ..., 16 | tags: Optional[List[str]] = ..., 17 | ) -> None: ... 18 | 19 | @staticmethod 20 | def message_to_drop() -> Message: ... 21 | 22 | 23 | class Datum: 24 | keys: List[str] 25 | value: bytes 26 | watermark: _dt.datetime 27 | eventtime: _dt.datetime 28 | id: str 29 | headers: Dict[str, str] 30 | 31 | def __repr__(self) -> str: ... 32 | 33 | def __str__(self) -> str: ... 34 | 35 | 36 | class BatchResponse: 37 | id: str 38 | 39 | def __init__(self, id: str) -> None: ... 40 | 41 | @staticmethod 42 | def from_id(id: str) -> BatchResponse: ... 43 | 44 | def append(self, message: Message) -> None: ... 45 | 46 | 47 | class BatchResponses: 48 | def __init__(self) -> None: ... 49 | 50 | def append(self, response: BatchResponse) -> None: ... 51 | 52 | 53 | class BatchMapAsyncServer: 54 | def __init__( 55 | self, 56 | sock_file: str | None = ..., 57 | info_file: str | None = ..., 58 | ) -> None: ... 59 | 60 | def start(self, py_func: Callable[..., Any]) -> Awaitable[None]: ... 61 | 62 | def stop(self) -> None: ... 63 | 64 | 65 | class BatchMapper: 66 | async def handler(self, batch: AsyncIterator[Datum]) -> BatchResponses: ... 67 | 68 | 69 | __all__ = [ 70 | "Message", 71 | "Datum", 72 | "BatchResponse", 73 | "BatchResponses", 74 | "BatchMapAsyncServer", 75 | "BatchMapper", 76 | ] 77 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/reduce_counter_func.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | from collections.abc import AsyncIterable 4 | 5 | from pynumaflow_lite import reducer 6 | 7 | 8 | async def reduce_handler( 9 | keys: list[str], datums: AsyncIterable[reducer.Datum], md: reducer.Metadata 10 | ) -> reducer.Messages: 11 | interval_window = md.interval_window 12 | counter = 0 13 | async for _ in datums: 14 | counter += 1 15 | msg = ( 16 | f"counter:{counter} interval_window_start:{interval_window.start} " 17 | f"interval_window_end:{interval_window.end}" 18 | ) 19 | out = reducer.Messages() 20 | out.append(reducer.Message(str.encode(msg), keys=keys)) 21 | return out 22 | 23 | 24 | # Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. 25 | signal.signal(signal.SIGINT, signal.default_int_handler) 26 | try: 27 | signal.signal(signal.SIGTERM, signal.SIG_DFL) 28 | except AttributeError: 29 | pass 30 | 31 | 32 | async def start(handler: callable): 33 | sock_file = "/tmp/var/run/numaflow/reduce.sock" 34 | server_info_file = "/tmp/var/run/numaflow/reducer-server-info" 35 | server = reducer.ReduceAsyncServer(sock_file, server_info_file) 36 | 37 | loop = asyncio.get_running_loop() 38 | try: 39 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 40 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 41 | except (NotImplementedError, RuntimeError): 42 | pass 43 | 44 | try: 45 | await server.start(handler) 46 | print("Shutting down gracefully...") 47 | except asyncio.CancelledError: 48 | try: 49 | server.stop() 50 | except Exception: 51 | pass 52 | return 53 | 54 | 55 | if __name__ == "__main__": 56 | asyncio.run(start(reduce_handler)) 57 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/accumulator/streamsorter/Makefile.optimized: -------------------------------------------------------------------------------- 1 | TAG ?= stable 2 | PUSH ?= false 3 | IMAGE_REGISTRY = quay.io/numaio/numaflow-python/streamsorter:${TAG} 4 | DOCKER_FILE_PATH = examples/accumulator/streamsorter/Dockerfile 5 | BASE_IMAGE_NAME = numaflow-python-base 6 | 7 | .PHONY: base-image 8 | base-image: 9 | @echo "Building shared base image..." 10 | docker build -f Dockerfile.base -t ${BASE_IMAGE_NAME} . 11 | 12 | .PHONY: update 13 | update: 14 | poetry update -vv 15 | 16 | .PHONY: image-push 17 | image-push: base-image update 18 | cd ../../../ && docker buildx build \ 19 | -f ${DOCKER_FILE_PATH} \ 20 | -t ${IMAGE_REGISTRY} \ 21 | --platform linux/amd64,linux/arm64 . --push 22 | 23 | .PHONY: image 24 | image: base-image update 25 | cd ../../../ && docker build \ 26 | -f ${DOCKER_FILE_PATH} \ 27 | -t ${IMAGE_REGISTRY} . 28 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 29 | 30 | .PHONY: image-fast 31 | image-fast: update 32 | @echo "Building with shared base image (fastest option)..." 33 | cd ../../../ && docker build \ 34 | -f examples/map/even_odd/Dockerfile.shared-base \ 35 | -t ${IMAGE_REGISTRY} . 36 | @if [ "$(PUSH)" = "true" ]; then docker push ${IMAGE_REGISTRY}; fi 37 | 38 | .PHONY: clean 39 | clean: 40 | docker rmi ${BASE_IMAGE_NAME} 2>/dev/null || true 41 | docker rmi ${IMAGE_REGISTRY} 2>/dev/null || true 42 | 43 | .PHONY: help 44 | help: 45 | @echo "Available targets:" 46 | @echo " base-image - Build the shared base image with pynumaflow" 47 | @echo " image - Build UDF image with optimized multi-stage build" 48 | @echo " image-fast - Build UDF image using shared base (fastest)" 49 | @echo " image-push - Build and push multi-platform image" 50 | @echo " update - Update poetry dependencies" 51 | @echo " clean - Remove built images" 52 | @echo " help - Show this help message" -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reduce/counter/example.py: -------------------------------------------------------------------------------- 1 | import os 2 | from collections.abc import AsyncIterable 3 | 4 | from pynumaflow.reducer import Messages, Message, Datum, Metadata, ReduceAsyncServer, Reducer 5 | 6 | 7 | class ReduceCounter(Reducer): 8 | def __init__(self, counter): 9 | self.counter = counter 10 | 11 | async def handler( 12 | self, keys: list[str], datums: AsyncIterable[Datum], md: Metadata 13 | ) -> Messages: 14 | interval_window = md.interval_window 15 | self.counter = 0 16 | async for _ in datums: 17 | self.counter += 1 18 | msg = ( 19 | f"counter:{self.counter} interval_window_start:{interval_window.start} " 20 | f"interval_window_end:{interval_window.end}" 21 | ) 22 | return Messages(Message(str.encode(msg), keys=keys)) 23 | 24 | 25 | async def reduce_handler(keys: list[str], datums: AsyncIterable[Datum], md: Metadata) -> Messages: 26 | interval_window = md.interval_window 27 | counter = 0 28 | async for _ in datums: 29 | counter += 1 30 | msg = ( 31 | f"counter:{counter} interval_window_start:{interval_window.start} " 32 | f"interval_window_end:{interval_window.end}" 33 | ) 34 | return Messages(Message(str.encode(msg), keys=keys)) 35 | 36 | 37 | if __name__ == "__main__": 38 | invoke = os.getenv("INVOKE", "func_handler") 39 | if invoke == "class": 40 | # Here we are using the class instance as the reducer_instance 41 | # which will be used to invoke the handler function. 42 | # We are passing the init_args for the class instance. 43 | grpc_server = ReduceAsyncServer(ReduceCounter, init_args=(0,)) 44 | else: 45 | # Here we are using the handler function directly as the reducer_instance. 46 | grpc_server = ReduceAsyncServer(reduce_handler) 47 | grpc_server.start() 48 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/mapstream_cat_class.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | from collections.abc import AsyncIterator 4 | 5 | from pynumaflow_lite import mapstreamer 6 | from pynumaflow_lite.mapstreamer import Message 7 | 8 | 9 | class SimpleStreamCat(mapstreamer.MapStreamer): 10 | async def handler(self, keys: list[str], datum: mapstreamer.Datum) -> AsyncIterator[Message]: 11 | parts = datum.value.decode("utf-8").split(",") 12 | if not parts: 13 | yield Message.to_drop() 14 | return 15 | for s in parts: 16 | yield Message(s.encode(), keys) 17 | 18 | 19 | # Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. 20 | signal.signal(signal.SIGINT, signal.default_int_handler) 21 | try: 22 | signal.signal(signal.SIGTERM, signal.SIG_DFL) 23 | except AttributeError: 24 | pass 25 | 26 | 27 | async def start(f: callable): 28 | sock_file = "/tmp/var/run/numaflow/mapstream.sock" 29 | server_info_file = "/tmp/var/run/numaflow/mapper-server-info" 30 | server = mapstreamer.MapStreamAsyncServer(sock_file, server_info_file) 31 | 32 | # Register loop-level signal handlers so we control shutdown and avoid asyncio.run noise. 33 | loop = asyncio.get_running_loop() 34 | try: 35 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 36 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 37 | except (NotImplementedError, RuntimeError): 38 | pass 39 | 40 | try: 41 | await server.start(f) 42 | print("Shutting down gracefully...") 43 | except asyncio.CancelledError: 44 | try: 45 | server.stop() 46 | except Exception: 47 | pass 48 | return 49 | 50 | 51 | if __name__ == "__main__": 52 | async_handler = SimpleStreamCat() 53 | asyncio.run(start(async_handler)) 54 | 55 | -------------------------------------------------------------------------------- /packages/pynumaflow/pynumaflow/sideinput/servicer/servicer.py: -------------------------------------------------------------------------------- 1 | from google.protobuf import empty_pb2 as _empty_pb2 2 | 3 | from pynumaflow._constants import ( 4 | _LOGGER, 5 | ERR_UDF_EXCEPTION_STRING, 6 | ) 7 | from pynumaflow.proto.sideinput import sideinput_pb2_grpc, sideinput_pb2 8 | from pynumaflow.shared.server import exit_on_error 9 | from pynumaflow.sideinput._dtypes import RetrieverCallable 10 | from pynumaflow.types import NumaflowServicerContext 11 | 12 | 13 | class SideInputServicer(sideinput_pb2_grpc.SideInputServicer): 14 | def __init__( 15 | self, 16 | handler: RetrieverCallable, 17 | ): 18 | self.__retrieve_handler: RetrieverCallable = handler 19 | 20 | def RetrieveSideInput( 21 | self, request: _empty_pb2.Empty, context: NumaflowServicerContext 22 | ) -> sideinput_pb2.SideInputResponse: 23 | """ 24 | Applies a sideinput function for a retrieval request. 25 | The pascal case function name comes from the proto sideinput_pb2_grpc.py file. 26 | """ 27 | # if there is an exception, we will mark all the responses as a failure 28 | try: 29 | rspn = self.__retrieve_handler() 30 | except BaseException as err: 31 | err_msg = f"{ERR_UDF_EXCEPTION_STRING}: {repr(err)}" 32 | _LOGGER.critical(err_msg, exc_info=True) 33 | exit_on_error(context, err_msg) 34 | return 35 | 36 | return sideinput_pb2.SideInputResponse(value=rspn.value, no_broadcast=rspn.no_broadcast) 37 | 38 | def IsReady( 39 | self, request: _empty_pb2.Empty, context: NumaflowServicerContext 40 | ) -> sideinput_pb2.ReadyResponse: 41 | """ 42 | IsReady is the heartbeat endpoint for gRPC. 43 | The pascal case function name comes from the proto sideinput_pb2_grpc.py file. 44 | """ 45 | return sideinput_pb2.ReadyResponse(ready=True) 46 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/manifests/batchmap/batchmap_cat.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | from collections.abc import AsyncIterable 4 | 5 | from pynumaflow_lite import batchmapper 6 | from pynumaflow_lite.batchmapper import Message 7 | 8 | 9 | class SimpleBatchCat(batchmapper.BatchMapper): 10 | async def handler(self, batch: AsyncIterable[batchmapper.Datum]) -> batchmapper.BatchResponses: 11 | responses = batchmapper.BatchResponses() 12 | async for d in batch: 13 | resp = batchmapper.BatchResponse(d.id) 14 | if d.value == b"bad world": 15 | resp.append(Message.message_to_drop()) 16 | continue 17 | 18 | resp.append(Message(d.value, d.keys)) 19 | responses.append(resp) 20 | return responses 21 | 22 | 23 | # Optional: ensure default signal handlers are in place so asyncio.run can handle them cleanly. 24 | signal.signal(signal.SIGINT, signal.default_int_handler) 25 | try: 26 | signal.signal(signal.SIGTERM, signal.SIG_DFL) 27 | except AttributeError: 28 | pass 29 | 30 | 31 | async def start(f: callable): 32 | server = batchmapper.BatchMapAsyncServer() 33 | 34 | # Register loop-level signal handlers so we control shutdown and avoid asyncio.run 35 | loop = asyncio.get_running_loop() 36 | try: 37 | loop.add_signal_handler(signal.SIGINT, lambda: server.stop()) 38 | loop.add_signal_handler(signal.SIGTERM, lambda: server.stop()) 39 | except (NotImplementedError, RuntimeError): 40 | pass 41 | 42 | try: 43 | await server.start(f) 44 | print("Shutting down gracefully...") 45 | except asyncio.CancelledError: 46 | try: 47 | server.stop() 48 | except Exception: 49 | pass 50 | return 51 | 52 | 53 | if __name__ == "__main__": 54 | async_handler = SimpleBatchCat() 55 | asyncio.run(start(async_handler)) 56 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/pynumaflow_lite/reducer.pyi: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import datetime as _dt 4 | from typing import Optional, List, Dict, Awaitable 5 | 6 | # Re-export the Python ABC for user convenience and typing 7 | from ._reduce_dtypes import Reducer as Reducer 8 | 9 | 10 | class Message: 11 | keys: Optional[List[str]] 12 | value: bytes 13 | tags: Optional[List[str]] 14 | 15 | def __init__( 16 | self, 17 | value: bytes, 18 | keys: Optional[List[str]] = ..., 19 | tags: Optional[List[str]] = ..., 20 | ) -> None: ... 21 | 22 | @staticmethod 23 | def message_to_drop() -> Message: ... 24 | 25 | @staticmethod 26 | def to_drop() -> Message: ... 27 | 28 | 29 | class Messages: 30 | def __init__(self) -> None: ... 31 | 32 | def append(self, message: Message) -> None: ... 33 | 34 | def __repr__(self) -> str: ... 35 | 36 | def __str__(self) -> str: ... 37 | 38 | 39 | class Datum: 40 | keys: List[str] 41 | value: bytes 42 | watermark: _dt.datetime 43 | eventtime: _dt.datetime 44 | headers: Dict[str, str] 45 | 46 | def __repr__(self) -> str: ... 47 | 48 | def __str__(self) -> str: ... 49 | 50 | 51 | class IntervalWindow: 52 | start: _dt.datetime 53 | end: _dt.datetime 54 | 55 | 56 | class Metadata: 57 | interval_window: IntervalWindow 58 | 59 | 60 | class ReduceAsyncServer: 61 | def __init__( 62 | self, 63 | sock_file: str | None = ..., 64 | info_file: str | None = ..., 65 | ) -> None: ... 66 | 67 | def start(self, py_creator: type, init_args: tuple | None = ...) -> Awaitable[None]: ... 68 | 69 | def stop(self) -> None: ... 70 | 71 | 72 | __all__ = [ 73 | "Message", 74 | "Messages", 75 | "Datum", 76 | "IntervalWindow", 77 | "Metadata", 78 | "ReduceAsyncServer", 79 | "Reducer", 80 | ] 81 | -------------------------------------------------------------------------------- /packages/pynumaflow-lite/tests/examples/sideinput_example.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import signal 3 | import datetime 4 | from pynumaflow_lite import sideinputer 5 | 6 | 7 | class ExampleSideInput(sideinputer.SideInput): 8 | """ 9 | An example SideInput that broadcasts a message every other time. 10 | """ 11 | 12 | def __init__(self): 13 | self.counter = 0 14 | 15 | async def retrieve_handler(self) -> sideinputer.Response: 16 | """ 17 | This function is called every time the side input is requested. 18 | """ 19 | time_now = datetime.datetime.now() 20 | # val is the value to be broadcasted 21 | val = f"an example: {str(time_now)}" 22 | self.counter += 1 23 | # broadcast every other time 24 | if self.counter % 2 == 0: 25 | # no_broadcast_message() is used to indicate that there is no broadcast 26 | return sideinputer.Response.no_broadcast_message() 27 | # broadcast_message() is used to indicate that there is a broadcast 28 | return sideinputer.Response.broadcast_message(val.encode("utf-8")) 29 | 30 | 31 | async def main(): 32 | # Create the server with custom socket paths for testing 33 | server = sideinputer.SideInputAsyncServer( 34 | sock_file="/tmp/var/run/numaflow/sideinput.sock", 35 | info_file="/tmp/var/run/numaflow/sideinput-server-info", 36 | ) 37 | 38 | # Create the side input instance 39 | side_input = ExampleSideInput() 40 | 41 | # Set up signal handling for graceful shutdown 42 | loop = asyncio.get_running_loop() 43 | 44 | def handle_signal(): 45 | server.stop() 46 | 47 | for sig in (signal.SIGINT, signal.SIGTERM): 48 | loop.add_signal_handler(sig, handle_signal) 49 | 50 | # Start the server 51 | await server.start(side_input) 52 | 53 | 54 | if __name__ == "__main__": 55 | asyncio.run(main()) 56 | 57 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/accumulator/streamsorter/pipeline.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: numaflow.numaproj.io/v1alpha1 2 | kind: Pipeline 3 | metadata: 4 | name: stream-sorter 5 | spec: 6 | limits: 7 | readBatchSize: 1 8 | watermark: 9 | maxDelay: 5s 10 | idleSource: 11 | # Duration to be added to the current watermark to progress the watermark when source is idling 12 | incrementBy: 1s 13 | # Duration between the subsequent increment of the watermark as long the source remains idle. 14 | stepInterval: 1s 15 | # Duration after which a source is marked as idle due to lack of data. If determined idle then the watermark is progressed by `incrementBy` 16 | threshold: 5s 17 | # Duration after which, if source doesn't produce any data (from the pipeline's inception), the watermark is initialized with the current wall clock time. 18 | initSourceDelay: 30s 19 | vertices: 20 | - name: http-one 21 | scale: 22 | min: 1 23 | max: 1 24 | source: 25 | http: {} 26 | - name: http-two 27 | scale: 28 | min: 1 29 | max: 1 30 | source: 31 | http: {} 32 | - name: py-accum 33 | udf: 34 | container: 35 | image: quay.io/numaio/numaflow-python/streamsorter:stable 36 | imagePullPolicy: Always 37 | env: 38 | - name: PYTHONDEBUG 39 | value: "true" 40 | groupBy: 41 | window: 42 | accumulator: 43 | timeout: 10s 44 | keyed: true 45 | storage: 46 | persistentVolumeClaim: 47 | volumeSize: 1Gi 48 | - name: py-sink 49 | scale: 50 | min: 1 51 | max: 1 52 | sink: 53 | log: {} 54 | edges: 55 | - from: http-one 56 | to: py-accum 57 | - from: http-two 58 | to: py-accum 59 | - from: py-accum 60 | to: py-sink 61 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/sideinput/test_responses.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pynumaflow.sideinput import Response, SideInput 4 | 5 | 6 | class TestResponse(unittest.TestCase): 7 | """ 8 | Test the Response class for SideInput 9 | """ 10 | 11 | def test_broadcast_message(self): 12 | """ 13 | Test the broadcast_message method, 14 | where we expect the no_broadcast flag to be False. 15 | """ 16 | succ_response = Response.broadcast_message(b"2") 17 | self.assertFalse(succ_response.no_broadcast) 18 | self.assertEqual(b"2", succ_response.value) 19 | 20 | def test_no_broadcast_message(self): 21 | """ 22 | Test the no_broadcast_message method, 23 | where we expect the no_broadcast flag to be True. 24 | """ 25 | succ_response = Response.no_broadcast_message() 26 | self.assertTrue(succ_response.no_broadcast) 27 | 28 | 29 | class ExampleSideInput(SideInput): 30 | def retrieve_handler(self) -> Response: 31 | return Response.broadcast_message(b"testMessage") 32 | 33 | 34 | class TestSideInputClass(unittest.TestCase): 35 | def setUp(self) -> None: 36 | # Create a side input class instance 37 | self.side_input_instance = ExampleSideInput() 38 | 39 | def test_side_input_class_call(self): 40 | """Test that the __call__ functionality for the class works, 41 | ie the class instance can be called directly to invoke the handler function 42 | """ 43 | # make a call to the class directly 44 | ret = self.side_input_instance() 45 | self.assertEqual(b"testMessage", ret.value) 46 | # make a call to the handler 47 | ret_handler = self.side_input_instance.retrieve_handler() 48 | # Both responses should be equal 49 | self.assertEqual(ret, ret_handler) 50 | 51 | 52 | if __name__ == "__main__": 53 | unittest.main() 54 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/reducestream/sum/example.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from collections.abc import AsyncIterable 4 | 5 | from pynumaflow import setup_logging 6 | from pynumaflow.reducestreamer import ( 7 | Message, 8 | Datum, 9 | Metadata, 10 | ReduceStreamAsyncServer, 11 | ReduceStreamer, 12 | ) 13 | from pynumaflow.shared.asynciter import NonBlockingIterator 14 | 15 | _LOGGER = setup_logging(__name__) 16 | if os.getenv("PYTHONDEBUG"): 17 | _LOGGER.setLevel(logging.DEBUG) 18 | 19 | 20 | class ReduceSum(ReduceStreamer): 21 | def __init__(self, counter): 22 | self.counter = counter 23 | 24 | async def handler( 25 | self, 26 | keys: list[str], 27 | datums: AsyncIterable[Datum], 28 | output: NonBlockingIterator, 29 | md: Metadata, 30 | ): 31 | async for msg in datums: 32 | val = int(msg.value) 33 | self.counter += val 34 | if self.counter >= 100: 35 | msg = f"{self.counter}" 36 | # NOTE: this is returning results because we have seen all the data 37 | # use this only if you really need this feature because your next vertex 38 | # will get both early result and final results and it should be able to 39 | # handle both the scenarios. 40 | await output.put(Message(str.encode(msg), keys=keys)) 41 | self.counter = 0 42 | msg = f"{self.counter}" 43 | await output.put(Message(str.encode(msg), keys=keys)) 44 | 45 | 46 | if __name__ == "__main__": 47 | invoke = os.getenv("INVOKE", "class") 48 | if invoke == "class": 49 | # Here we are using the class instance as the reducer_instance 50 | # which will be used to invoke the handler function. 51 | # We are passing the init_args for the class instance. 52 | grpc_server = ReduceStreamAsyncServer(ReduceSum, init_args=(0,)) 53 | grpc_server.start() 54 | -------------------------------------------------------------------------------- /packages/pynumaflow/examples/sourcetransform/event_time_filter/example.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | import logging 3 | 4 | from pynumaflow.sourcetransformer import Messages, Message, Datum, SourceTransformServer 5 | 6 | """ 7 | This is a simple User Defined Function example which receives a message, applies the following 8 | data transformation, and returns the message. 9 | If the message event time is before year 2022, drop the message with event time unchanged. 10 | If it's within year 2022, update the tag to "within_year_2022" and 11 | update the message event time to Jan 1st 2022. 12 | Otherwise, (exclusively after year 2022), update the tag to "after_year_2022" and update the 13 | message event time to Jan 1st 2023. 14 | """ 15 | 16 | january_first_2022 = datetime.datetime.fromtimestamp(1640995200) 17 | january_first_2023 = datetime.datetime.fromtimestamp(1672531200) 18 | 19 | 20 | def my_handler(keys: list[str], datum: Datum) -> Messages: 21 | val = datum.value 22 | event_time = datum.event_time 23 | messages = Messages() 24 | 25 | if event_time < january_first_2022: 26 | logging.info("Got event time:%s, it is before 2022, so dropping", event_time) 27 | messages.append(Message.to_drop(event_time)) 28 | elif event_time < january_first_2023: 29 | logging.info( 30 | "Got event time:%s, it is within year 2022, so forwarding to within_year_2022", 31 | event_time, 32 | ) 33 | messages.append( 34 | Message(value=val, event_time=january_first_2022, tags=["within_year_2022"]) 35 | ) 36 | else: 37 | logging.info( 38 | "Got event time:%s, it is after year 2022, so forwarding to after_year_2022", event_time 39 | ) 40 | messages.append(Message(value=val, event_time=january_first_2023, tags=["after_year_2022"])) 41 | 42 | return messages 43 | 44 | 45 | if __name__ == "__main__": 46 | grpc_server = SourceTransformServer(my_handler) 47 | grpc_server.start() 48 | -------------------------------------------------------------------------------- /packages/pynumaflow/tests/reducestreamer/test_messages.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from pynumaflow.reducestreamer import Message, DROP 4 | from tests.testing_utils import mock_message 5 | 6 | 7 | class TestMessage(unittest.TestCase): 8 | def test_key(self): 9 | mock_obj = {"Keys": ["test-key"], "Value": mock_message()} 10 | msg = Message(value=mock_obj["Value"], keys=mock_obj["Keys"]) 11 | print(msg) 12 | self.assertEqual(mock_obj["Keys"], msg.keys) 13 | 14 | def test_value(self): 15 | mock_obj = {"Keys": ["test-key"], "Value": mock_message()} 16 | msg = Message(value=mock_obj["Value"], keys=mock_obj["Keys"]) 17 | self.assertEqual(mock_obj["Value"], msg.value) 18 | 19 | def test_message_to_all(self): 20 | mock_obj = {"Keys": [], "Value": mock_message(), "Tags": []} 21 | msg = Message(mock_obj["Value"]) 22 | self.assertEqual(Message, type(msg)) 23 | self.assertEqual(mock_obj["Keys"], msg.keys) 24 | self.assertEqual(mock_obj["Value"], msg.value) 25 | self.assertEqual(mock_obj["Tags"], msg.tags) 26 | 27 | def test_message_to_drop(self): 28 | mock_obj = {"Keys": [], "Value": b"", "Tags": [DROP]} 29 | msg = Message(b"").to_drop() 30 | self.assertEqual(Message, type(msg)) 31 | self.assertEqual(mock_obj["Keys"], msg.keys) 32 | self.assertEqual(mock_obj["Value"], msg.value) 33 | self.assertEqual(mock_obj["Tags"], msg.tags) 34 | 35 | def test_message_to(self): 36 | mock_obj = {"Keys": ["__KEY__"], "Value": mock_message(), "Tags": ["__TAG__"]} 37 | msg = Message(value=mock_obj["Value"], keys=mock_obj["Keys"], tags=mock_obj["Tags"]) 38 | self.assertEqual(Message, type(msg)) 39 | self.assertEqual(mock_obj["Keys"], msg.keys) 40 | self.assertEqual(mock_obj["Value"], msg.value) 41 | self.assertEqual(mock_obj["Tags"], msg.tags) 42 | 43 | 44 | if __name__ == "__main__": 45 | unittest.main() 46 | --------------------------------------------------------------------------------