├── .gitmodules ├── python ├── erdos │ ├── internal │ │ ├── py.typed │ │ └── __init__.pyi │ ├── profile.py │ ├── config.py │ ├── utils.py │ ├── message.py │ ├── timestamp.py │ └── context.py ├── doc │ ├── source │ │ ├── _static │ │ │ └── .gitignore │ │ ├── _literalinclude │ │ │ └── python_examples │ │ ├── index.rst │ │ ├── package_reference.rst │ │ ├── messages.rst │ │ ├── streams.rst │ │ ├── operators.rst │ │ └── intro.rst │ ├── requirements-doc.txt │ ├── README.md │ ├── Makefile │ └── make.bat ├── Cargo.toml ├── pyproject.toml ├── src │ ├── py_stream │ │ ├── py_loop_stream.rs │ │ ├── py_ingest_stream.rs │ │ ├── py_operator_stream.rs │ │ ├── py_extract_stream.rs │ │ ├── py_write_stream.rs │ │ ├── py_read_stream.rs │ │ └── mod.rs │ ├── py_message.rs │ ├── py_operators │ │ ├── mod.rs │ │ └── py_source.rs │ └── py_timestamp.rs ├── README.md └── examples │ ├── close_streams.py │ ├── ingest_extract.py │ ├── loop.py │ ├── linq.py │ ├── simple_pipeline.py │ ├── join_streams.py │ └── watermarks.py ├── rust-toolchain.toml ├── Cargo.toml ├── setup.cfg ├── erdos ├── src │ ├── scheduler │ │ ├── mod.rs │ │ └── endpoints_manager.rs │ ├── dataflow │ │ ├── operators │ │ │ ├── ros │ │ │ │ ├── mod.rs │ │ │ │ ├── from_ros_operator.rs │ │ │ │ └── to_ros_operator.rs │ │ │ ├── mod.rs │ │ │ ├── concat.rs │ │ │ ├── filter.rs │ │ │ ├── split.rs │ │ │ ├── map.rs │ │ │ └── join.rs │ │ ├── mod.rs │ │ ├── stream │ │ │ ├── loop_stream.rs │ │ │ ├── errors.rs │ │ │ ├── mod.rs │ │ │ ├── read_stream.rs │ │ │ └── ingest_stream.rs │ │ ├── time.rs │ │ ├── message.rs │ │ ├── state.rs │ │ └── graph │ │ │ ├── default_graph.rs │ │ │ └── mod.rs │ ├── node │ │ ├── mod.rs │ │ └── operator_executors │ │ │ └── source_executor.rs │ ├── communication │ │ ├── endpoints.rs │ │ ├── pusher.rs │ │ ├── control_message_codec.rs │ │ ├── errors.rs │ │ ├── serializable.rs │ │ └── message_codec.rs │ └── configuration.rs └── Cargo.toml ├── scripts ├── install_rust.sh └── plot_erdos_graph.sh ├── .readthedocs.yaml ├── .gitignore ├── examples ├── Cargo.toml ├── ros_to_erdos.rs ├── erdos_to_ros.rs └── linq.rs ├── Dockerfile ├── .github └── workflows │ ├── docs.yaml │ └── ci.yaml └── README.md /.gitmodules: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/erdos/internal/py.typed: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/doc/source/_static/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /python/doc/source/_literalinclude/python_examples: -------------------------------------------------------------------------------- 1 | ../../../examples/ -------------------------------------------------------------------------------- /rust-toolchain.toml: -------------------------------------------------------------------------------- 1 | [toolchain] 2 | channel = "nightly-2022-02-09" -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | 3 | members = [ 4 | "erdos", 5 | "python", 6 | "examples", 7 | ] -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | max-line-length = 88 3 | extend-ignore = E203 4 | 5 | [isort] 6 | known_first_party = erdos 7 | -------------------------------------------------------------------------------- /python/doc/requirements-doc.txt: -------------------------------------------------------------------------------- 1 | sphinx==4.5.0 2 | sphinx-click==4.0.3 3 | sphinx_rtd_theme==1.0.0 4 | sphinx-autodoc-typehints==1.18.1 -------------------------------------------------------------------------------- /erdos/src/scheduler/mod.rs: -------------------------------------------------------------------------------- 1 | // Crate-wide visible submodules 2 | pub(crate) mod endpoints_manager; 3 | 4 | // Public exports 5 | pub mod channel_manager; 6 | -------------------------------------------------------------------------------- /python/doc/source/index.rst: -------------------------------------------------------------------------------- 1 | .. include:: intro.rst 2 | 3 | .. toctree:: 4 | :maxdepth: 1 5 | :caption: Overview 6 | 7 | intro.rst 8 | streams.rst 9 | operators.rst 10 | messages.rst 11 | package_reference.rst 12 | -------------------------------------------------------------------------------- /scripts/install_rust.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Install pip packages 4 | sudo apt-get install -y curl 5 | 6 | curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 7 | export PATH=$PATH:$HOME/.cargo/bin 8 | rustup default nightly 9 | -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | sphinx: 4 | builder: html 5 | configuration: python/doc/source/conf.py 6 | 7 | build: 8 | os: "ubuntu-20.04" 9 | tools: 10 | python: "3.9" 11 | 12 | python: 13 | install: 14 | - requirements: python/doc/requirements-doc.txt 15 | -------------------------------------------------------------------------------- /python/doc/source/package_reference.rst: -------------------------------------------------------------------------------- 1 | ERDOS Package Reference 2 | ======================= 3 | 4 | .. automodule:: erdos 5 | :members: connect_source, connect_sink, connect_one_in_one_out, 6 | connect_two_in_one_out, connect_one_in_two_out, 7 | reset, run, run_async, profile_method, NodeHandle 8 | -------------------------------------------------------------------------------- /scripts/plot_erdos_graph.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | # $1 path to the gv file 3 | # 4 | # Example: 5 | # $ plot_erdos_graph.sh erdos.gv # plots erdos.png 6 | 7 | filename=$(basename -- "$1") 8 | extension="${filename##*.}" 9 | filename="${filename%.*}" 10 | 11 | filepath=$(dirname "$1") 12 | dot -Tpng $1 > $filepath/$filename.png 13 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/ros/mod.rs: -------------------------------------------------------------------------------- 1 | //! Library of ROS operators for building ERDOS applications. 2 | 3 | // Private submodules 4 | mod from_ros_operator; 5 | mod to_ros_operator; 6 | 7 | // Public exports 8 | pub use from_ros_operator::FromRosOperator; 9 | pub use to_ros_operator::ToRosOperator; 10 | 11 | // Constants 12 | const ROS_QUEUE_SIZE: usize = 1024; 13 | -------------------------------------------------------------------------------- /python/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "python" 3 | version = "0.4.0" 4 | edition = "2018" 5 | 6 | [dependencies] 7 | erdos = { path = "../erdos" } 8 | pyo3 = { version = "0.16.2", features = ["extension-module"] } 9 | tracing = "0.1.29" 10 | 11 | [lib] 12 | name = "erdos_python" 13 | crate-type = ["cdylib"] 14 | 15 | [package.metadata.maturin] 16 | name = "erdos.internal" 17 | -------------------------------------------------------------------------------- /python/pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["maturin>=0.12,<0.13"] 3 | build-backend = "maturin" 4 | 5 | [project] 6 | name = "erdos" 7 | requires-python = ">=3.6" 8 | dependencies = ["numpy>=1.19.0"] 9 | classifiers = [ 10 | "Programming Language :: Rust", 11 | "Programming Language :: Python :: Implementation :: CPython", 12 | "Programming Language :: Python :: Implementation :: PyPy", 13 | ] -------------------------------------------------------------------------------- /python/doc/README.md: -------------------------------------------------------------------------------- 1 | # ERDOS Documentation 2 | 3 | To compile the documentation, run the following commands from this directory. 4 | 5 | ``` 6 | pip install -r requirements-doc.txt 7 | make html 8 | open build/html/index.html 9 | ``` 10 | 11 | To test if there are any build errors with the documentation, do the following. 12 | 13 | ``` 14 | sphinx-build -W -b html -d build/doctrees source build/html 15 | ``` 16 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Rust files 2 | /target 3 | **/*.rs.bk 4 | *~ 5 | Cargo.lock 6 | .criterion 7 | 8 | # Library files 9 | *.so 10 | 11 | # Python files 12 | *.egg-info 13 | __pycache__ 14 | *.pyc 15 | dist 16 | /build 17 | 18 | # Environments 19 | .env 20 | .venv 21 | env/ 22 | venv/ 23 | ENV/ 24 | env.bak/ 25 | venv.bak/ 26 | 27 | # Editor files 28 | .vscode 29 | .ipynb_checkpoints 30 | *~ 31 | .idea/ 32 | 33 | # Configuration files 34 | .pre-commit-config.yaml 35 | 36 | # Generated documentation files 37 | /python/doc/build 38 | /python/doc/source/_build 39 | -------------------------------------------------------------------------------- /erdos/src/dataflow/mod.rs: -------------------------------------------------------------------------------- 1 | //! Functions and structures for building an ERDOS application. 2 | 3 | // Public submodules 4 | pub mod connect; 5 | pub mod context; 6 | pub mod deadlines; 7 | pub mod graph; 8 | pub mod message; 9 | pub mod operator; 10 | pub mod operators; 11 | pub mod state; 12 | pub mod stream; 13 | pub mod time; 14 | 15 | // Public exports 16 | pub use deadlines::TimestampDeadline; 17 | pub use message::{Data, Message, TimestampedData}; 18 | pub use operator::OperatorConfig; 19 | pub use state::{AppendableState, State}; 20 | pub use stream::{LoopStream, ReadStream, Stream, WriteStream}; 21 | pub use time::Timestamp; 22 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/mod.rs: -------------------------------------------------------------------------------- 1 | //! Library of generic operators for building ERDOS applications. 2 | 3 | // Public submodules 4 | #[cfg(feature = "ros")] 5 | pub mod ros; 6 | 7 | // Private submodules 8 | // mod join_operator; 9 | mod concat; 10 | mod filter; 11 | mod join; 12 | mod map; 13 | mod split; 14 | 15 | // Public exports 16 | // pub use crate::dataflow::operators::join_operator::JoinOperator; 17 | pub use concat::{Concat, ConcatOperator}; 18 | pub use filter::{Filter, FilterOperator}; 19 | pub use join::{Join, TimestampJoinOperator}; 20 | pub use map::{FlatMapOperator, Map}; 21 | pub use split::{Split, SplitOperator}; 22 | -------------------------------------------------------------------------------- /python/doc/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line. 5 | SPHINXOPTS = 6 | SPHINXBUILD = sphinx-build 7 | SOURCEDIR = source 8 | BUILDDIR = build 9 | 10 | # Put it first so that "make" without argument is like "make help". 11 | help: 12 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 13 | 14 | .PHONY: help Makefile 15 | 16 | # Catch-all target: route all unknown targets to Sphinx using the new 17 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 18 | %: Makefile 19 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 20 | -------------------------------------------------------------------------------- /examples/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "examples" 3 | version = "0.4.0" 4 | authors = ["The ERDOS Team"] 5 | edition = "2018" 6 | publish = false 7 | 8 | [dependencies] 9 | erdos = { path = "../erdos" } 10 | 11 | rosrust_msg = { version = "0.1", optional = true } 12 | tracing = "0.1.29" 13 | 14 | [features] 15 | default = [] 16 | ros = ["erdos/ros", "rosrust_msg"] 17 | 18 | [[example]] 19 | name = "erdos_to_ros" 20 | path = "erdos_to_ros.rs" 21 | required-features = ["ros"] 22 | 23 | [[example]] 24 | name = "ros_to_erdos" 25 | path = "ros_to_erdos.rs" 26 | required-features = ["ros"] 27 | 28 | [[example]] 29 | name = "full_pipeline" 30 | path = "full_pipeline.rs" 31 | 32 | [[example]] 33 | name = "linq" 34 | path = "linq.rs" 35 | -------------------------------------------------------------------------------- /python/doc/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=source 11 | set BUILDDIR=build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.http://sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /python/src/py_stream/py_loop_stream.rs: -------------------------------------------------------------------------------- 1 | use erdos::dataflow::{stream::StreamId, LoopStream, Stream}; 2 | use pyo3::prelude::*; 3 | 4 | use super::{PyOperatorStream, PyStream}; 5 | 6 | /// The internal Python abstraction over a `LoopStream`. 7 | /// 8 | /// This class is exposed on the Python interface as `erdos.streams.LoopStream`. 9 | #[pyclass(extends=PyStream)] 10 | pub struct PyLoopStream { 11 | loop_stream: LoopStream>, 12 | } 13 | 14 | #[pymethods] 15 | impl PyLoopStream { 16 | #[new] 17 | fn new() -> (Self, PyStream) { 18 | let loop_stream = LoopStream::new(); 19 | let id = loop_stream.id(); 20 | (Self { loop_stream }, PyStream { id }) 21 | } 22 | 23 | fn connect_loop(&self, stream: &PyOperatorStream) { 24 | self.loop_stream.connect_loop(&stream.stream); 25 | } 26 | } 27 | 28 | impl Stream> for PyLoopStream { 29 | fn id(&self) -> StreamId { 30 | self.loop_stream.id() 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /erdos/src/node/mod.rs: -------------------------------------------------------------------------------- 1 | //! Data structures for executing and ERDOS application. 2 | //! 3 | //! ERDOS applications may run across one or several nodes connected via TCP, 4 | //! as set in the [`Configuration`](crate::Configuration). 5 | //! The [`new_app`](crate::new_app) helper function may be useful in scaling 6 | //! from one node to many via command line arguments. 7 | //! 8 | //! Currently, operators are manually scheduled to a [`Node`] via the 9 | //! [`OperatorConfig`](crate::dataflow::OperatorConfig). By default, they are 10 | //! scheduled on node 0. We are looking into more elegant solutions for 11 | //! scheduling operators, and hope to provide a versatile solution. 12 | 13 | // Private submodules 14 | #[allow(clippy::module_inception)] 15 | mod node; 16 | 17 | // Crate-wide visible submodules 18 | pub(crate) mod lattice; 19 | pub(crate) mod operator_event; 20 | pub(crate) mod worker; 21 | 22 | // Public submodules 23 | #[doc(hidden)] 24 | pub mod operator_executors; 25 | 26 | // Public exports 27 | pub use node::{Node, NodeHandle, NodeId}; 28 | -------------------------------------------------------------------------------- /python/README.md: -------------------------------------------------------------------------------- 1 | # ERDOS Python Interface 2 | 3 | ## Building from Source 4 | 5 | First, run `pip install maturin` to install [Maturin](https://github.com/PyO3/maturin), 6 | which aids in building Rust crates with Python bindings. 7 | 8 | ### Debug Build 9 | 10 | From the `python/` directory, run `maturin develop`. 11 | 12 | This will build the debug version of the rust backend resulting in more debug messages, faster compile times, and slower performance. 13 | 14 | ### Release Build 15 | 16 | Run `maturin develop --release`. 17 | 18 | This will build the release version of the Rust backend resulting in less debug messages, slower compile times, and faster performance. 19 | 20 | ## Building Wheels 21 | 22 | To generate wheels, run `maturin build`. 23 | 24 | This will generate wheels and store them in `target/wheels`. 25 | 26 | For more information, including on building portable wheels that comply with manylinux, see [Maturin's README](https://github.com/PyO3/maturin#manylinux-and-auditwheel) and the [Maturin guide](https://maturin.rs/distribution.html). -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ubuntu:20.04 2 | 3 | # Set up an erdos user first. 4 | RUN apt-get -y update && apt-get -y install sudo 5 | ENV uid 1000 6 | ENV gid 1000 7 | 8 | RUN mkdir -p /home/erdos 9 | RUN groupadd erdos -g ${gid} 10 | RUN useradd -r -u ${uid} -g erdos erdos 11 | RUN echo "erdos ALL=(ALL) NOPASSWD: ALL" > /etc/sudoers.d/erdos 12 | RUN chmod 0440 /etc/sudoers.d/erdos 13 | RUN chown ${uid}:${gid} -R /home/erdos 14 | 15 | USER erdos 16 | ENV HOME /home/erdos 17 | ENV SHELL /bin/bash 18 | 19 | SHELL ["/bin/bash", "-c"] 20 | 21 | # Install rust. 22 | RUN sudo apt-get -y install curl clang python3 python3-pip git 23 | RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y 24 | ENV PATH="/home/erdos/.cargo/bin:${PATH}" 25 | 26 | # Get the erdos directory. 27 | RUN mkdir -p /home/erdos/workspace 28 | RUN cd /home/erdos/workspace && git clone https://github.com/erdos-project/erdos.git 29 | WORKDIR /home/erdos/workspace/erdos 30 | RUN cd /home/erdos/workspace/erdos 31 | 32 | # Install erdos. 33 | RUN cargo build --release 34 | # Install the python package 35 | ENV PATH="/home/erdos/.local/bin:${PATH}" 36 | RUN pip3 install -U pip>=21.3 37 | RUN cd python && pip3 install -e . -------------------------------------------------------------------------------- /python/examples/close_streams.py: -------------------------------------------------------------------------------- 1 | """Creates a dummy operator and gracefully shuts it down. 2 | """ 3 | 4 | import erdos 5 | from erdos.operator import OneInOneOut 6 | 7 | 8 | class NoopOp(OneInOneOut): 9 | def __init__(self): 10 | print("Initializing NoopOp") 11 | 12 | def destroy(self): 13 | print("Destroying NoopOp") 14 | 15 | 16 | def main(): 17 | ingest_stream = erdos.streams.IngestStream() 18 | s = erdos.connect_one_in_one_out( 19 | NoopOp, erdos.operator.OperatorConfig(), ingest_stream 20 | ) 21 | extract_stream = erdos.streams.ExtractStream(s) 22 | 23 | handle = erdos.run_async() 24 | 25 | timestamp = erdos.Timestamp(is_top=True) 26 | send_msg = erdos.WatermarkMessage(timestamp) 27 | print("IngestStream: sending {send_msg}".format(send_msg=send_msg)) 28 | ingest_stream.send(send_msg) 29 | assert ingest_stream.is_closed() 30 | 31 | recv_msg = extract_stream.read() 32 | print("ExtractStream: received {recv_msg}".format(recv_msg=recv_msg)) 33 | assert recv_msg.is_top 34 | assert extract_stream.is_closed() 35 | 36 | handle.shutdown() 37 | 38 | 39 | if __name__ == "__main__": 40 | main() 41 | -------------------------------------------------------------------------------- /python/examples/ingest_extract.py: -------------------------------------------------------------------------------- 1 | """Every second: 2 | 1) Send a number from the python script. 3 | 2) An operator squares the number. 4 | 3) The python script receives the result. 5 | """ 6 | import time 7 | 8 | import erdos 9 | 10 | 11 | def square_msg(context, msg): 12 | """Squares the data from an ERDOS message.""" 13 | print("SquareOp: received {msg}".format(msg=msg)) 14 | return erdos.Message(context.timestamp, msg * msg) 15 | 16 | 17 | def main(): 18 | ingest_stream = erdos.streams.IngestStream() 19 | square_stream = ingest_stream.map(lambda x: x * x) 20 | 21 | extract_stream = erdos.streams.ExtractStream(square_stream) 22 | 23 | erdos.run_async() 24 | 25 | count = 0 26 | while True: 27 | timestamp = erdos.Timestamp(coordinates=[count]) 28 | send_msg = erdos.Message(timestamp, count) 29 | print("IngestStream: sending {send_msg}".format(send_msg=send_msg)) 30 | ingest_stream.send(send_msg) 31 | recv_msg = extract_stream.read() 32 | print("ExtractStream: received {recv_msg}".format(recv_msg=recv_msg)) 33 | 34 | count += 1 35 | time.sleep(1) 36 | 37 | 38 | if __name__ == "__main__": 39 | main() 40 | -------------------------------------------------------------------------------- /.github/workflows/docs.yaml: -------------------------------------------------------------------------------- 1 | name: Documentation 2 | on: 3 | push: 4 | branches: [master] 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | RUSTDOCFLAGS: -D warnings 10 | rust_toolchain: nightly-2022-02-09 11 | 12 | jobs: 13 | rustdoc: 14 | runs-on: ubuntu-latest 15 | steps: 16 | - uses: actions/checkout@v2 17 | - name: Install supported Rust nightly 18 | uses: actions-rs/toolchain@v1 19 | with: 20 | toolchain: ${{ env.rust_toolchain }} 21 | profile: minimal 22 | override: true 23 | components: rust-docs 24 | - uses: Swatinem/rust-cache@v1 25 | - name: Build Documentation 26 | run: cargo doc --workspace --no-deps --all-features 27 | 28 | pythondoc: 29 | runs-on: ubuntu-latest 30 | steps: 31 | - uses: actions/checkout@v2 32 | - name: Set up Python 33 | uses: actions/setup-python@v2 34 | with: 35 | python-version: 3.x 36 | - name: Install Python dependencies 37 | run: python -m pip install -r python/doc/requirements-doc.txt 38 | - name: Build Documentation 39 | run: | 40 | cd python/doc 41 | make html SPHINXOPTS="-W --keep-going -n" 42 | -------------------------------------------------------------------------------- /python/doc/source/messages.rst: -------------------------------------------------------------------------------- 1 | Messages 2 | ======== 3 | 4 | ERDOS applications send data on streams via messages. Messages wrap data and 5 | provide timestamp information used to resolve control loops and track data flow 6 | through the system. 7 | 8 | .. autoclass:: erdos.Message 9 | 10 | 11 | Timestamps 12 | ---------- 13 | 14 | Timestamps consist of an array of coordinates. Timestamp semantics are 15 | user-defined for now; however, we may eventually formalize their use in the 16 | future in order to provide more advanced features in order to scale up stateful 17 | operators. Generally, the 0th coordinate is used to track message's sequence 18 | number and subsequent coordinates track the message's progress in cyclic data 19 | flows. 20 | 21 | .. autoclass:: erdos.Timestamp 22 | 23 | 24 | Watermarks 25 | ---------- 26 | 27 | Watermarks in ERDOS signal completion of computation. More concretely, 28 | sending a watermark with timestamp ``t`` on a stream asserts that all future 29 | messages sent on that stream will have timestamps ``t' > t``. 30 | ERDOS also introduces a *top watermark*, which is a watermark with the 31 | maximum possible timestamp. Sending a top watermark closes the stream as 32 | there is no ``t' > t_top``, so no more messages can be sent. 33 | 34 | .. autoclass:: erdos.WatermarkMessage 35 | -------------------------------------------------------------------------------- /erdos/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "erdos" 3 | version = "0.4.0" 4 | authors = ["The ERDOS Team"] 5 | edition = "2018" 6 | license = "Apache-2.0" 7 | readme = "../README.md" 8 | documentation = "https://docs.rs/crate/erdos/" 9 | repository = "https://github.com/erdos-project/erdos" 10 | description = """ 11 | ERDOS is a platform for developing self-driving cars and robotics applications. 12 | """ 13 | keywords = ["data-flow", "robotics", "autonomos", "driving"] 14 | 15 | [dependencies] 16 | abomonation = "0.7.3" 17 | abomonation_derive = "0.5.0" 18 | async-trait = "0.1.18" 19 | bincode = "1.3.1" 20 | bytes = "1.0.0" 21 | byteorder = "1.3.4" 22 | clap = "2.33.0" 23 | futures = "0.3.5" 24 | futures-delay-queue = "0.4.2" 25 | futures-intrusive = "0.4" # Required for futures-delay-queue 26 | futures-util = "0.3.5" 27 | once_cell = "1.10.0" 28 | petgraph = "0.5.0" 29 | rand = "0.3" 30 | rosrust = { version = "0.9", optional = true } 31 | serde = { version = "1.0.115", features = ["derive"] } 32 | tracing = "0.1.29" 33 | tracing-appender = "0.2.0" 34 | tracing-subscriber = "0.3.1" 35 | tokio = { version = "1.8.1", features = ["full"] } 36 | tokio-util = { version = "0.6.7", features = ["codec"] } 37 | tokio-serde-bincode = "0.2.1" 38 | uuid = { version = "0.7", features = ["v4", "v5", "serde"] } 39 | 40 | [features] 41 | default = [] 42 | ros = ["rosrust"] # Add ROS operator support with 'cargo build --features=ros 43 | -------------------------------------------------------------------------------- /python/src/py_stream/py_ingest_stream.rs: -------------------------------------------------------------------------------- 1 | use erdos::dataflow::{stream::IngestStream, Message, Stream}; 2 | use pyo3::{exceptions, prelude::*}; 3 | 4 | use crate::{PyMessage, PyStream}; 5 | 6 | /// The internal Python abstraction over an `IngestStream`. 7 | /// 8 | /// This class is exposed on the Python interface as `erdos.streams.IngestStream`. 9 | #[pyclass(extends=PyStream)] 10 | pub struct PyIngestStream { 11 | ingest_stream: IngestStream>, 12 | } 13 | 14 | #[pymethods] 15 | impl PyIngestStream { 16 | #[new] 17 | fn new(name: Option) -> (Self, PyStream) { 18 | let mut ingest_stream = IngestStream::new(); 19 | if let Some(name_str) = name { 20 | ingest_stream.set_name(&name_str); 21 | } 22 | 23 | let id = ingest_stream.id(); 24 | (Self { ingest_stream }, PyStream { id }) 25 | } 26 | 27 | fn is_closed(&self) -> bool { 28 | self.ingest_stream.is_closed() 29 | } 30 | 31 | fn send(&mut self, msg: &PyMessage) -> PyResult<()> { 32 | self.ingest_stream.send(Message::from(msg)).map_err(|e| { 33 | exceptions::PyException::new_err(format!( 34 | "Error sending message on ingest stream {}: {:?}", 35 | self.ingest_stream.id(), 36 | e 37 | )) 38 | }) 39 | } 40 | 41 | fn name(&self) -> String { 42 | self.ingest_stream.name() 43 | } 44 | 45 | fn set_name(&mut self, name: String) { 46 | self.ingest_stream.set_name(&name) 47 | } 48 | 49 | fn id(&self) -> String { 50 | format!("{}", self.ingest_stream.id()) 51 | } 52 | } 53 | -------------------------------------------------------------------------------- /python/examples/loop.py: -------------------------------------------------------------------------------- 1 | """Sends a message in a loop, incrementing the data and timestamp on 2 | each iteration. 3 | 4 | Dataflow graph: 5 | +--LoopOp--+ 6 | | | 7 | +-----<----+ 8 | """ 9 | 10 | import time 11 | 12 | import erdos 13 | from erdos.context import OneInOneOutContext 14 | from erdos.operator import OneInOneOut 15 | from erdos.streams import ReadStream, WriteStream 16 | 17 | 18 | class LoopOp(OneInOneOut): 19 | def __init__(self): 20 | print("initializing loop op") 21 | 22 | def run(self, read_stream: ReadStream, write_stream: WriteStream): 23 | msg = erdos.Message(erdos.Timestamp(coordinates=[0]), 0) 24 | print("LoopOp: sending {msg}".format(msg=msg)) 25 | write_stream.send(msg) 26 | 27 | def on_data(self, context: OneInOneOutContext, data: int): 28 | print("LoopOp: received {data}".format(data=data)) 29 | time.sleep(1) 30 | # Update data and timestamp. 31 | data += 1 32 | coordinates = list(context.timestamp.coordinates) 33 | coordinates[0] += 1 34 | timestamp = erdos.Timestamp(coordinates=coordinates) 35 | message = erdos.Message(timestamp, data) 36 | print("LoopOp: sending {message}".format(message=message)) 37 | context.write_stream.send(message) 38 | 39 | 40 | def main(): 41 | """Creates and runs the dataflow graph.""" 42 | loop_stream = erdos.streams.LoopStream() 43 | stream = erdos.connect_one_in_one_out( 44 | LoopOp, erdos.operator.OperatorConfig(), loop_stream 45 | ) 46 | loop_stream.connect_loop(stream) 47 | 48 | erdos.run() 49 | 50 | 51 | if __name__ == "__main__": 52 | main() 53 | -------------------------------------------------------------------------------- /python/src/py_stream/py_operator_stream.rs: -------------------------------------------------------------------------------- 1 | use erdos::dataflow::stream::{OperatorStream, Stream, StreamId}; 2 | use pyo3::prelude::*; 3 | 4 | use super::PyStream; 5 | 6 | /// The internal Python abstraction over a [`Stream`]. 7 | /// 8 | /// This class is exposed on the Python interface as `erdos.streams.Stream`. 9 | #[pyclass(extends=PyStream)] 10 | pub struct PyOperatorStream { 11 | pub stream: OperatorStream>, 12 | } 13 | 14 | #[pymethods] 15 | impl PyOperatorStream { 16 | fn name(&self) -> String { 17 | self.stream.name() 18 | } 19 | 20 | fn set_name(&mut self, name: String) { 21 | self.stream.set_name(&name) 22 | } 23 | 24 | fn id(&self) -> String { 25 | format!("{}", self.stream.id()) 26 | } 27 | } 28 | 29 | // Rust-only methods 30 | impl PyOperatorStream { 31 | /// Produces a [`PyOperatorStream`] and its [`PyStream`] base class 32 | /// from a Rust [`OperatorStream`]. 33 | pub(crate) fn new(py: Python, operator_stream: OperatorStream>) -> PyResult> { 34 | let base_class = PyStream { 35 | id: operator_stream.id(), 36 | }; 37 | let initializer = 38 | PyClassInitializer::from(base_class).add_subclass(Self::from(operator_stream)); 39 | Py::new(py, initializer) 40 | } 41 | } 42 | 43 | impl Stream> for PyOperatorStream { 44 | fn id(&self) -> StreamId { 45 | self.stream.id() 46 | } 47 | } 48 | 49 | impl From>> for PyOperatorStream { 50 | fn from(stream: OperatorStream>) -> Self { 51 | Self { stream } 52 | } 53 | } 54 | 55 | impl From for OperatorStream> { 56 | fn from(py_stream: PyOperatorStream) -> Self { 57 | py_stream.stream 58 | } 59 | } 60 | -------------------------------------------------------------------------------- /python/examples/linq.py: -------------------------------------------------------------------------------- 1 | import time 2 | from typing import Any 3 | 4 | import erdos 5 | from erdos.context import SinkContext 6 | from erdos.operator import OperatorConfig, Sink, Source 7 | from erdos.streams import WriteStream 8 | 9 | 10 | class SendOp(Source): 11 | """A :py:class:`SendOp` is a :py:class:`Source` operator that generates a sequence 12 | of inputs for the dataflow graph.""" 13 | 14 | def __init__(self): 15 | print("Initializing SendOp") 16 | 17 | def run(self, write_stream: WriteStream): 18 | count = 0 19 | while True: 20 | msg = erdos.Message(erdos.Timestamp(coordinates=[count]), count) 21 | print(f"SendOp sending {msg}") 22 | write_stream.send(msg) 23 | count += 1 24 | time.sleep(1) 25 | 26 | 27 | class SinkOp(Sink): 28 | """A :py:class:`SinkOp` is a :py:class:`Sink` operator that prints the received 29 | output to the standard output.""" 30 | 31 | def on_data(self, context: SinkContext, data: Any): 32 | print( 33 | f"SinkOp ({context.config.name}): Received data: {data} for " 34 | f"timestamp: {context.timestamp}" 35 | ) 36 | 37 | 38 | def main(): 39 | source_stream = erdos.connect_source(SendOp, OperatorConfig()) 40 | map_stream = source_stream.map(lambda x: x**2) 41 | evens_stream, odds_stream = map_stream.split(lambda x: x % 2 == 0) 42 | flat_map_stream = map_stream.flat_map(lambda x: (f"Number {x}", float(x / 2))) 43 | str_stream, float_stream = flat_map_stream.split_by_type(str, float) 44 | merged_stream = evens_stream.concat(odds_stream, str_stream, float_stream) 45 | erdos.connect_sink(SinkOp, OperatorConfig(name="MergedOutput"), merged_stream) 46 | erdos.run() 47 | 48 | 49 | if __name__ == "__main__": 50 | main() 51 | -------------------------------------------------------------------------------- /erdos/src/dataflow/stream/loop_stream.rs: -------------------------------------------------------------------------------- 1 | use std::marker::PhantomData; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::dataflow::{graph::default_graph, Data}; 6 | 7 | use super::{OperatorStream, Stream, StreamId}; 8 | 9 | /// Enables loops in the dataflow. 10 | /// 11 | /// # Example 12 | /// ``` 13 | /// # use erdos::dataflow::{stream::LoopStream, operator::{OperatorConfig}, operators::{FlatMapOperator}}; 14 | /// let loop_stream = LoopStream::new(); 15 | /// let output_stream = erdos::connect_one_in_one_out( 16 | /// || FlatMapOperator::new(|x: &usize| { std::iter::once(2 * x) }), 17 | /// || {}, 18 | /// OperatorConfig::new().name("MapOperator"), 19 | /// &loop_stream, 20 | /// ); 21 | /// // Makes sending on output_stream equivalent to sending on loop_stream. 22 | /// loop_stream.connect_loop(&output_stream); 23 | /// ``` 24 | pub struct LoopStream 25 | where 26 | for<'a> D: Data + Deserialize<'a>, 27 | { 28 | id: StreamId, 29 | phantom: PhantomData, 30 | } 31 | 32 | impl LoopStream 33 | where 34 | for<'a> D: Data + Deserialize<'a>, 35 | { 36 | pub fn new() -> Self { 37 | let id = StreamId::new_deterministic(); 38 | let loop_stream = Self { 39 | id, 40 | phantom: PhantomData, 41 | }; 42 | default_graph::add_loop_stream(&loop_stream); 43 | loop_stream 44 | } 45 | 46 | pub fn connect_loop(&self, stream: &OperatorStream) { 47 | default_graph::connect_loop(self, stream); 48 | } 49 | } 50 | 51 | impl Default for LoopStream 52 | where 53 | for<'a> D: Data + Deserialize<'a>, 54 | { 55 | fn default() -> Self { 56 | Self::new() 57 | } 58 | } 59 | 60 | impl Stream for LoopStream 61 | where 62 | for<'a> D: Data + Deserialize<'a>, 63 | { 64 | fn id(&self) -> StreamId { 65 | self.id 66 | } 67 | } 68 | -------------------------------------------------------------------------------- /python/erdos/profile.py: -------------------------------------------------------------------------------- 1 | import time 2 | from types import TracebackType 3 | from typing import Dict, Optional, Type 4 | 5 | from erdos.operator import BaseOperator 6 | 7 | 8 | class Profile: 9 | """Used to log the duration of a snippet of code using a with statement.""" 10 | 11 | def __init__( 12 | self, 13 | event_name: str, 14 | operator: BaseOperator, 15 | event_data: Optional[Dict[str, str]] = None, 16 | ) -> None: 17 | self.event_name = event_name 18 | self.operator = operator 19 | if event_data is None: 20 | self.event_data = {} 21 | else: 22 | self.event_data = event_data 23 | 24 | def __enter__(self) -> "Profile": 25 | """Log the start time of a profile event.""" 26 | self.start_time = time.time() 27 | return self 28 | 29 | def __exit__( 30 | self, 31 | exc_type: Optional[Type[BaseException]], 32 | exc: Optional[BaseException], 33 | traceback: Optional[TracebackType], 34 | ) -> None: 35 | for key, value in self.event_data.items(): 36 | if not isinstance(key, str) or not isinstance(value, str): 37 | raise ValueError( 38 | "The event_data must be a dict mapping strings to strings" 39 | ) 40 | # Start time in us. 41 | ts = int(self.start_time * 1000 * 1000) 42 | # Duration in us. 43 | dur = int((time.time() - self.start_time) * 1000 * 1000) 44 | # Log the event in the Google Chrome trace event format. 45 | event = { 46 | "name": self.event_name, 47 | "pid": self.operator.config.name, 48 | "tid": 1, 49 | "ts": ts, 50 | "dur": dur, 51 | "ph": "X", 52 | "args": self.event_data, 53 | } 54 | self.operator.add_trace_event(event) 55 | -------------------------------------------------------------------------------- /python/src/py_stream/py_extract_stream.rs: -------------------------------------------------------------------------------- 1 | use erdos::dataflow::stream::{errors::TryReadError, ExtractStream}; 2 | use pyo3::{exceptions, prelude::*}; 3 | 4 | use crate::{py_stream::PyOperatorStream, PyMessage}; 5 | 6 | /// The internal Python abstraction over an `ExtractStream`. 7 | /// 8 | /// This class is exposed on the Python interface as `erdos.streams.ExtractStream`. 9 | #[pyclass] 10 | pub struct PyExtractStream { 11 | extract_stream: ExtractStream>, 12 | } 13 | 14 | #[pymethods] 15 | impl PyExtractStream { 16 | #[new] 17 | fn new(py_stream: &PyOperatorStream) -> Self { 18 | Self { 19 | extract_stream: ExtractStream::new(&py_stream.stream), 20 | } 21 | } 22 | 23 | fn is_closed(&self) -> bool { 24 | self.extract_stream.is_closed() 25 | } 26 | 27 | fn read(&mut self, py: Python) -> PyResult { 28 | let result = py.allow_threads(|| self.extract_stream.read()); 29 | match result { 30 | Ok(msg) => Ok(PyMessage::from(msg)), 31 | Err(e) => Err(exceptions::PyException::new_err(format!( 32 | "Unable to to read from stream {}: {:?}", 33 | self.extract_stream.id(), 34 | e 35 | ))), 36 | } 37 | } 38 | 39 | fn try_read(&mut self) -> PyResult> { 40 | match self.extract_stream.try_read() { 41 | Ok(msg) => Ok(Some(PyMessage::from(msg))), 42 | Err(TryReadError::Empty) => Ok(None), 43 | Err(e) => Err(exceptions::PyException::new_err(format!( 44 | "Unable to to read from stream {}: {:?}", 45 | self.extract_stream.id(), 46 | e 47 | ))), 48 | } 49 | } 50 | 51 | fn name(&self) -> String { 52 | self.extract_stream.name() 53 | } 54 | 55 | fn id(&self) -> String { 56 | format!("{}", self.extract_stream.id()) 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /python/src/py_stream/py_write_stream.rs: -------------------------------------------------------------------------------- 1 | use erdos::{ 2 | dataflow::stream::{errors::SendError, WriteStreamT}, 3 | dataflow::{Message, WriteStream}, 4 | }; 5 | use pyo3::{create_exception, exceptions, prelude::*}; 6 | 7 | use crate::PyMessage; 8 | 9 | // Define errors that can be raised by a write stream. 10 | create_exception!(SendError, TimestampError, exceptions::PyException); 11 | create_exception!(SendError, ClosedError, exceptions::PyException); 12 | create_exception!(SendError, IOError, exceptions::PyException); 13 | create_exception!(SendError, SerializationError, exceptions::PyException); 14 | 15 | /// The internal Python abstraction over a `WriteStream`. 16 | /// 17 | /// This class is exposed on the Python interface as `erdos.streams.WriteStream`. 18 | #[pyclass] 19 | pub struct PyWriteStream { 20 | pub write_stream: WriteStream>, 21 | } 22 | 23 | #[pymethods] 24 | impl PyWriteStream { 25 | fn is_closed(&self) -> bool { 26 | self.write_stream.is_closed() 27 | } 28 | 29 | fn name(&self) -> String { 30 | self.write_stream.name() 31 | } 32 | 33 | fn id(&self) -> String { 34 | format!("{}", self.write_stream.id()) 35 | } 36 | 37 | fn send(&mut self, msg: &PyMessage) -> PyResult<()> { 38 | self.write_stream.send(Message::from(msg)).map_err(|e| { 39 | let error_str = format!("Error sending message on {}", self.write_stream.id()); 40 | match e { 41 | SendError::TimestampError => TimestampError::new_err(error_str), 42 | SendError::Closed => ClosedError::new_err(error_str), 43 | SendError::IOError => IOError::new_err(error_str), 44 | SendError::SerializationError => SerializationError::new_err(error_str), 45 | } 46 | }) 47 | } 48 | } 49 | 50 | impl From>> for PyWriteStream { 51 | fn from(write_stream: WriteStream>) -> Self { 52 | Self { write_stream } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /erdos/src/dataflow/time.rs: -------------------------------------------------------------------------------- 1 | use std::{cmp::Ordering, fmt::Debug}; 2 | 3 | use abomonation_derive::Abomonation; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | // Alias to [`IntTimestamp`] in case more timestamp variants are added. 7 | pub type Timestamp = IntTimestamp; 8 | 9 | /// Information about when an operator released a message. 10 | #[derive(Debug, Clone, Serialize, Deserialize, Abomonation, PartialEq, Eq, Hash)] 11 | pub enum IntTimestamp { 12 | /// The timestamp used to close the streams. It is the greatest timestamp, and thus signifies 13 | /// completion of all data. 14 | Top, 15 | /// The multi-dimension timestamp conveyed by this instance. 16 | Time(Vec), 17 | /// The lowest timestamp that any stream starts with. 18 | Bottom, 19 | } 20 | 21 | impl IntTimestamp { 22 | pub fn is_top(&self) -> bool { 23 | *self == IntTimestamp::Top 24 | } 25 | 26 | pub fn is_bottom(&self) -> bool { 27 | *self == IntTimestamp::Bottom 28 | } 29 | } 30 | 31 | impl Ord for IntTimestamp { 32 | fn cmp(&self, other: &Self) -> Ordering { 33 | match (self, other) { 34 | // Top and Bottom are always equal. 35 | (IntTimestamp::Top, IntTimestamp::Top) => Ordering::Equal, 36 | (IntTimestamp::Bottom, IntTimestamp::Bottom) => Ordering::Equal, 37 | 38 | // Top is bigger than other timestamps. 39 | (IntTimestamp::Top, _) => Ordering::Greater, 40 | (_, IntTimestamp::Top) => Ordering::Less, 41 | 42 | // Bottom is less than other timestamps. 43 | (_, IntTimestamp::Bottom) => Ordering::Greater, 44 | (IntTimestamp::Bottom, _) => Ordering::Less, 45 | 46 | // Time should be compared lexicographically. 47 | (IntTimestamp::Time(a), IntTimestamp::Time(b)) => a.cmp(b), 48 | } 49 | } 50 | } 51 | 52 | impl PartialOrd for IntTimestamp { 53 | fn partial_cmp(&self, other: &Self) -> Option { 54 | Some(self.cmp(other)) 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /python/src/py_message.rs: -------------------------------------------------------------------------------- 1 | use pyo3::{prelude::*, types::PyBytes}; 2 | use std::sync::Arc; 3 | 4 | use erdos::dataflow::Message; 5 | 6 | use crate::PyTimestamp; 7 | 8 | /// The Python version of an ERDOS message. 9 | /// 10 | /// This class provides the API that is wrapped around by `erdos.Message` in Python. 11 | #[pyclass] 12 | pub(crate) struct PyMessage { 13 | msg: Message>, 14 | } 15 | 16 | #[pymethods] 17 | impl PyMessage { 18 | #[new] 19 | fn new(timestamp: PyTimestamp, data: Option<&PyBytes>) -> PyResult { 20 | let msg = match (timestamp, data) { 21 | (t, Some(d)) => Message::new_message(t.into(), Vec::from(d.as_bytes())), 22 | (t, None) => Message::new_watermark(t.into()), 23 | }; 24 | Ok(Self { msg }) 25 | } 26 | 27 | #[getter(data)] 28 | fn data<'a>(&self, py: Python<'a>) -> Option<&'a PyBytes> { 29 | match &self.msg { 30 | Message::TimestampedData(d) => Some(PyBytes::new(py, &d.data[..])), 31 | _ => None, 32 | } 33 | } 34 | 35 | #[getter(timestamp)] 36 | fn timestamp(&self) -> PyTimestamp { 37 | self.msg.timestamp().clone().into() 38 | } 39 | 40 | fn is_timestamped_data(&self) -> bool { 41 | matches!(&self.msg, Message::TimestampedData(_)) 42 | } 43 | 44 | fn is_watermark(&self) -> bool { 45 | matches!(&self.msg, Message::Watermark(_)) 46 | } 47 | 48 | fn is_top_watermark(&self) -> bool { 49 | self.msg.is_top_watermark() 50 | } 51 | } 52 | 53 | impl From>> for PyMessage { 54 | fn from(msg: Message>) -> Self { 55 | Self { msg } 56 | } 57 | } 58 | 59 | impl From<&PyMessage> for Message> { 60 | fn from(py_message: &PyMessage) -> Self { 61 | py_message.msg.clone() 62 | } 63 | } 64 | 65 | impl From>>> for PyMessage { 66 | fn from(msg: Arc>>) -> Self { 67 | Self { 68 | msg: (*msg).clone(), 69 | } 70 | } 71 | } 72 | -------------------------------------------------------------------------------- /python/examples/simple_pipeline.py: -------------------------------------------------------------------------------- 1 | """Every second, sends the message count to 3 receivers. 2 | One receiver processes messages using a callback, 3 | one uses the blocking read() call, 4 | and one uses the non-blocking try_read() call. 5 | """ 6 | 7 | import time 8 | from typing import Any 9 | 10 | import erdos 11 | from erdos.context import SinkContext 12 | from erdos.operator import Sink, Source 13 | from erdos.streams import ReadStream, WriteStream 14 | 15 | 16 | class SendOp(Source): 17 | def __init__(self): 18 | print("initializing source op") 19 | 20 | def run(self, write_stream: WriteStream): 21 | count = 0 22 | while True: 23 | msg = erdos.Message(erdos.Timestamp(coordinates=[count]), count) 24 | print("SendOp: sending {msg}".format(msg=msg)) 25 | write_stream.send(msg) 26 | 27 | count += 1 28 | time.sleep(1) 29 | 30 | 31 | class CallbackOp(Sink): 32 | def __init__(self): 33 | print("initializing callback op") 34 | 35 | def on_data(self, context: SinkContext, data: Any): 36 | print("CallbackOp: received {}".format(data)) 37 | 38 | 39 | class PullOp(Sink): 40 | def __init__(self): 41 | print("initializing pull op using read") 42 | 43 | def run(self, read_stream: ReadStream): 44 | while True: 45 | data = read_stream.read() 46 | print("PullOp: received {data}".format(data=data)) 47 | 48 | 49 | class TryPullOp(Sink): 50 | def __init__(self): 51 | print("initializing pull op using try_read") 52 | 53 | def run(self, read_stream: ReadStream): 54 | while True: 55 | data = read_stream.try_read() 56 | print("TryPullOp: received {data}".format(data=data)) 57 | time.sleep(0.5) 58 | 59 | 60 | def main(): 61 | """Creates and runs the dataflow graph.""" 62 | count_stream = erdos.connect_source(SendOp, erdos.operator.OperatorConfig()) 63 | erdos.connect_sink(CallbackOp, erdos.operator.OperatorConfig(), count_stream) 64 | erdos.connect_sink(PullOp, erdos.operator.OperatorConfig(), count_stream) 65 | erdos.connect_sink(TryPullOp, erdos.operator.OperatorConfig(), count_stream) 66 | 67 | erdos.run() 68 | 69 | 70 | if __name__ == "__main__": 71 | main() 72 | -------------------------------------------------------------------------------- /python/erdos/config.py: -------------------------------------------------------------------------------- 1 | from typing import Optional 2 | 3 | 4 | class OperatorConfig: 5 | """An :py:class:`OperatorConfig` allows developers to configure an 6 | operator. 7 | 8 | An operator` can query the configuration passed to it by the driver by 9 | accessing the properties in :code:`self.config`. The below example shows 10 | how a `LoggerOperator` can access the log file name passed to the operator 11 | by the driver:: 12 | 13 | class LoggerOperator(erdos.Operator): 14 | def __init__(self, input_stream): 15 | # Set up a logger. 16 | _log = self.config.log_file_name 17 | self.logger = erdos.utils.setup_logging(self.config.name, _log) 18 | """ 19 | 20 | def __init__( 21 | self, 22 | name: Optional[str] = None, 23 | flow_watermarks: bool = True, 24 | log_file_name: Optional[str] = None, 25 | csv_log_file_name: Optional[str] = None, 26 | profile_file_name: Optional[str] = None, 27 | ): 28 | self._name = name 29 | self._flow_watermarks = flow_watermarks 30 | self._log_file_name = log_file_name 31 | self._csv_log_file_name = csv_log_file_name 32 | self._profile_file_name = profile_file_name 33 | 34 | @property 35 | def name(self) -> Optional[str]: 36 | """Name of the operator.""" 37 | return self._name 38 | 39 | @property 40 | def flow_watermarks(self) -> bool: 41 | """Whether to automatically pass on the low watermark.""" 42 | return self._flow_watermarks 43 | 44 | @property 45 | def log_file_name(self) -> Optional[str]: 46 | """File name used for logging.""" 47 | return self._log_file_name 48 | 49 | @property 50 | def csv_log_file_name(self) -> Optional[str]: 51 | """File name used for logging to CSV.""" 52 | return self._csv_log_file_name 53 | 54 | @property 55 | def profile_file_name(self) -> Optional[str]: 56 | """File named used for profiling an operator's performance.""" 57 | return self._profile_file_name 58 | 59 | def __str__(self) -> str: 60 | return "OperatorConfig(name={}, flow_watermarks={})".format( 61 | self.name, self.flow_watermarks 62 | ) 63 | 64 | def __repr__(self) -> str: 65 | return str(self) 66 | -------------------------------------------------------------------------------- /examples/ros_to_erdos.rs: -------------------------------------------------------------------------------- 1 | /// Subscribes to ROS topic named "chatter" which consists of String messages. 2 | /// Messages are converted and sent on an erdos stream which is captured by a Sink node and printed. 3 | /// Pipeline is as follows: 4 | /// ROS subscriber on topic -> FromRosOperator captures and converts to ERDOS messages -> SinkOperator 5 | extern crate erdos; 6 | 7 | use erdos::dataflow::context::*; 8 | use erdos::dataflow::operator::*; 9 | use erdos::dataflow::operators::ros::*; 10 | use erdos::dataflow::Message; 11 | use erdos::dataflow::*; 12 | use erdos::node::Node; 13 | use erdos::Configuration; 14 | 15 | struct SinkOperator {} 16 | 17 | impl SinkOperator { 18 | pub fn new() -> Self { 19 | Self {} 20 | } 21 | } 22 | 23 | // This SinkOperator prints out recieved messages in ERDOS pipeline. 24 | impl Sink<(), String> for SinkOperator { 25 | fn on_data(&mut self, ctx: &mut SinkContext<()>, data: &String) { 26 | let timestamp = ctx.timestamp().clone(); 27 | tracing::debug!("SinkOperator @ {:?}: Received {}", timestamp, data,); 28 | } 29 | 30 | fn on_watermark(&mut self, _ctx: &mut SinkContext<()>) {} 31 | } 32 | 33 | // Defines a function that converts a ROS String message to a vector containing an ERDOS message 34 | // with String data. 35 | fn ros_to_erdos(input: &rosrust_msg::std_msgs::String) -> Vec> { 36 | vec![Message::new_message( 37 | Timestamp::Time(vec![0 as u64]), 38 | String::from(input.data.as_str()), 39 | )] 40 | } 41 | 42 | fn main() { 43 | let args = erdos::new_app("ERDOS").get_matches(); 44 | let mut node = Node::new(Configuration::from_args(&args)); 45 | 46 | // Creates FromRosOperator which subscribes to topic "chatter" and converts ROS messages 47 | // to ERDOS messages. 48 | let ros_source_config = OperatorConfig::new().name("FromRosOperator"); 49 | let ros_source = erdos::connect_source( 50 | move || -> FromRosOperator { 51 | FromRosOperator::new("chatter", ros_to_erdos) 52 | }, 53 | ros_source_config, 54 | ); 55 | 56 | // Connects SinkOperator to ERDOS pipeline. 57 | let erdos_sink_from_ros = OperatorConfig::new().name("SinkOperator"); 58 | erdos::connect_sink(SinkOperator::new, || {}, erdos_sink_from_ros, &ros_source); 59 | 60 | node.run(); 61 | } 62 | -------------------------------------------------------------------------------- /python/erdos/utils.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from typing import Any, Optional, Union 3 | 4 | 5 | def setup_logging(name: str, log_file: Union[str, None] = None) -> logging.Logger: 6 | """Create a logger with the given name and attach the given handler. 7 | 8 | Args: 9 | name: The name of the logger. 10 | log_file: The name of the file to log to. (console if None) 11 | 12 | Returns: 13 | A :py:class:`logging.Logger` instance that can be used to log the 14 | required information. 15 | """ 16 | return _setup_logging( 17 | name, 18 | "%(asctime)s.%(msecs)03d %(name)s %(levelname)s: %(message)s", 19 | "%Y-%m-%d,%H:%M:%S", 20 | log_file, 21 | ) 22 | 23 | 24 | def setup_csv_logging(name: str, log_file: Union[str, None] = None) -> logging.Logger: 25 | """Create a logger that logs statistics in a CSV file, and attach the 26 | given handler. 27 | 28 | Args: 29 | name: The name of the logger. 30 | log_file: The file to log the results to. (console if None) 31 | 32 | Returns: 33 | A :py:class:`logging.Logger` instance that can be used to log the 34 | required information. 35 | """ 36 | return _setup_logging(name, "%(message)s", None, log_file) 37 | 38 | 39 | def setup_trace_logging(name: str, log_file: Union[str, None] = None) -> logging.Logger: 40 | """Create a logger that logs the runtime statistics of methods decorated 41 | with the :py:func:`profile_method`. 42 | 43 | Args: 44 | name: The name of the logger. 45 | log_file: The name of the file to log to. (console if None) 46 | 47 | Returns: 48 | A :py:class:`logging.Logger` instance that can be used to log the 49 | required information. 50 | """ 51 | return _setup_logging(name, "%(message)s,", None, log_file) 52 | 53 | 54 | def _setup_logging( 55 | name: str, fmt: str, date_fmt: Optional[str], log_file: Optional[str] = None 56 | ) -> logging.Logger: 57 | if log_file is None: 58 | handler: logging.StreamHandler[Any] = logging.StreamHandler() 59 | else: 60 | handler = logging.FileHandler(log_file) 61 | 62 | handler.setLevel(logging.DEBUG) 63 | formatter = logging.Formatter(fmt=fmt, datefmt=date_fmt) 64 | handler.setFormatter(formatter) 65 | logger = logging.getLogger(name) 66 | logger.addHandler(handler) 67 | logger.propagate = False 68 | return logger 69 | -------------------------------------------------------------------------------- /erdos/src/communication/endpoints.rs: -------------------------------------------------------------------------------- 1 | use std::{fmt::Debug, sync::Arc}; 2 | 3 | use futures::FutureExt; 4 | use tokio::{sync::mpsc, task::unconstrained}; 5 | 6 | use crate::{ 7 | communication::{CommunicationError, InterProcessMessage, Serializable, TryRecvError}, 8 | dataflow::stream::StreamId, 9 | }; 10 | 11 | /// Endpoint to be used to send messages between operators. 12 | #[derive(Clone)] 13 | pub enum SendEndpoint { 14 | /// Send messages to an operator running in the same process. 15 | InterThread(mpsc::UnboundedSender), 16 | /// Send messages to operators running on a different node. 17 | /// Data is first sent to [`DataSender`](crate::communication::senders::DataSender) 18 | /// which encodes and sends the message on a TCP stream. 19 | InterProcess(StreamId, mpsc::UnboundedSender), 20 | } 21 | 22 | /// Zero-copy implementation of the endpoint. 23 | /// Because we [`Arc`], the message isn't copied when sent between endpoints within the node. 24 | impl SendEndpoint> { 25 | pub fn send(&mut self, msg: Arc) -> Result<(), CommunicationError> { 26 | match self { 27 | Self::InterThread(sender) => sender.send(msg).map_err(CommunicationError::from), 28 | Self::InterProcess(stream_id, sender) => sender 29 | .send(InterProcessMessage::new_deserialized(msg, *stream_id)) 30 | .map_err(CommunicationError::from), 31 | } 32 | } 33 | } 34 | 35 | /// Endpoint to be used to receive messages. 36 | pub enum RecvEndpoint { 37 | InterThread(mpsc::UnboundedReceiver), 38 | } 39 | 40 | impl RecvEndpoint { 41 | /// Async read of a new message. 42 | pub async fn read(&mut self) -> Result { 43 | match self { 44 | Self::InterThread(receiver) => receiver 45 | .recv() 46 | .await 47 | .ok_or(CommunicationError::Disconnected), 48 | } 49 | } 50 | 51 | /// Non-blocking read of a new message. Returns `TryRecvError::Empty` if no message is available. 52 | pub fn try_read(&mut self) -> Result { 53 | match self { 54 | // See https://github.com/tokio-rs/tokio/issues/3350. 55 | Self::InterThread(rx) => match unconstrained(rx.recv()).now_or_never() { 56 | Some(Some(msg)) => Ok(msg), 57 | Some(None) => Err(TryRecvError::Disconnected), 58 | None => Err(TryRecvError::Empty), 59 | }, 60 | } 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /erdos/src/scheduler/endpoints_manager.rs: -------------------------------------------------------------------------------- 1 | use std::collections::HashMap; 2 | use tokio::sync::mpsc::UnboundedSender; 3 | 4 | use crate::{ 5 | communication::{InterProcessMessage, PusherT}, 6 | dataflow::stream::StreamId, 7 | node::NodeId, 8 | }; 9 | 10 | /// Wrapper used to update pushers in the TCP receiving. 11 | /// 12 | /// Stores `mpsc::Sender`s to receivers on which `PusherT` can be sent to inform 13 | /// the receivers that data should be sent to new operators. 14 | pub struct ChannelsToReceivers { 15 | // We do not use a tokio::mpsc::UnboundedSender because that only provides a blocking API. 16 | // It does not allow us to just check if the channel has a new message. We need this API in 17 | // the receivers, which regularly check if there are new pushers available. 18 | senders: Vec)>>, 19 | } 20 | 21 | impl ChannelsToReceivers { 22 | pub fn new() -> Self { 23 | ChannelsToReceivers { 24 | senders: Vec::new(), 25 | } 26 | } 27 | 28 | /// Adds a `mpsc::Sender` to a new receiver thread. 29 | pub fn add_sender(&mut self, sender: UnboundedSender<(StreamId, Box)>) { 30 | self.senders.push(sender); 31 | } 32 | 33 | /// Updates the receivers about the existance of a new operator. 34 | /// 35 | /// It sends a `PusherT` to message on all receiving threads. 36 | pub fn send(&mut self, stream_id: StreamId, pusher: Box) { 37 | for sender in self.senders.iter_mut() { 38 | let msg = (stream_id, pusher.clone()); 39 | sender.send(msg).unwrap(); 40 | } 41 | } 42 | } 43 | 44 | /// Wrapper used to store mappings between node ids and `mpsc::UnboundedSender` to sender threads. 45 | #[derive(Default)] 46 | pub struct ChannelsToSenders { 47 | /// The ith sender corresponds to a TCP connection to the ith node. 48 | senders: HashMap>, 49 | } 50 | 51 | impl ChannelsToSenders { 52 | pub fn new() -> Self { 53 | ChannelsToSenders { 54 | senders: HashMap::new(), 55 | } 56 | } 57 | 58 | /// Adds a `mpsc::UnboundedSender` to a node. 59 | pub fn add_sender( 60 | &mut self, 61 | node_id: NodeId, 62 | sender: tokio::sync::mpsc::UnboundedSender, 63 | ) { 64 | self.senders.insert(node_id, sender); 65 | } 66 | 67 | /// Returns the associated `mpsc::UnboundedSender` for a given node. 68 | pub fn clone_channel( 69 | &self, 70 | node_id: NodeId, 71 | ) -> Option> { 72 | self.senders.get(&node_id).cloned() 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /python/src/py_operators/mod.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use erdos::dataflow::operator::OperatorConfig; 4 | use pyo3::{prelude::*, types::*}; 5 | 6 | // Private submodules 7 | mod py_one_in_one_out; 8 | mod py_one_in_two_out; 9 | mod py_sink; 10 | mod py_source; 11 | mod py_two_in_one_out; 12 | 13 | // Crate-level exports 14 | pub(crate) use py_one_in_one_out::*; 15 | pub(crate) use py_one_in_two_out::*; 16 | pub(crate) use py_sink::*; 17 | pub(crate) use py_source::*; 18 | pub(crate) use py_two_in_one_out::*; 19 | 20 | fn construct_operator( 21 | py_operator_type: Arc, 22 | py_operator_args: Arc, 23 | py_operator_kwargs: Arc, 24 | py_operator_config: Arc, 25 | config: OperatorConfig, 26 | ) -> Arc { 27 | // TODO (Sukrit): The function should return a Result object instead of echoing errors to 28 | // standard output. 29 | Python::with_gil(|py| -> Arc { 30 | let locals = PyDict::new(py); 31 | if let Some(e) = locals 32 | .set_item("Operator", py_operator_type.clone_ref(py)) 33 | .err() 34 | { 35 | e.print(py) 36 | } 37 | if let Some(e) = locals.set_item("op_id", format!("{}", config.id)).err() { 38 | e.print(py) 39 | } 40 | if let Some(e) = locals 41 | .set_item("args", py_operator_args.clone_ref(py)) 42 | .err() 43 | { 44 | e.print(py) 45 | } 46 | if let Some(e) = locals 47 | .set_item("kwargs", py_operator_kwargs.clone_ref(py)) 48 | .err() 49 | { 50 | e.print(py) 51 | } 52 | if let Some(e) = locals 53 | .set_item("config", py_operator_config.clone_ref(py)) 54 | .err() 55 | { 56 | e.print(py) 57 | } 58 | if let Some(e) = locals.set_item("op_name", config.get_name()).err() { 59 | e.print(py) 60 | } 61 | 62 | // Initialize the operator. 63 | let init_result = py.run( 64 | r#" 65 | import uuid, erdos 66 | 67 | # Create the operator. 68 | operator = Operator.__new__(Operator) 69 | operator._id = uuid.UUID(op_id) 70 | operator._config = config 71 | operator._trace_event_logger = erdos.utils.setup_trace_logging( 72 | "{}-profile".format(op_name), 73 | config.profile_file_name, 74 | ) 75 | operator.__init__(*args, **kwargs) 76 | "#, 77 | None, 78 | Some(locals), 79 | ); 80 | if let Err(e) = init_result { 81 | e.print(py); 82 | } 83 | 84 | // Retrieve the constructed operator. 85 | Arc::new( 86 | py.eval("operator", None, Some(locals)) 87 | .unwrap() 88 | .to_object(py), 89 | ) 90 | }) 91 | } 92 | -------------------------------------------------------------------------------- /python/src/py_timestamp.rs: -------------------------------------------------------------------------------- 1 | use erdos::dataflow::Timestamp; 2 | use pyo3::{basic::CompareOp, exceptions, prelude::*}; 3 | 4 | /// A Python version of ERDOS' Timestamp. 5 | /// 6 | /// This struct breaks down the Timestamp enum into coordinates and booleans to represent Top and 7 | /// Bottom. 8 | #[pyclass] 9 | #[derive(Clone)] 10 | pub(crate) struct PyTimestamp { 11 | timestamp: Timestamp, 12 | } 13 | 14 | #[pymethods] 15 | impl PyTimestamp { 16 | #[new] 17 | fn new(coordinates: Option>, is_top: bool, is_bottom: bool) -> PyResult { 18 | match (coordinates, is_top, is_bottom) { 19 | (None, true, false) => Ok(Self { 20 | timestamp: Timestamp::Top, 21 | }), 22 | (None, false, true) => Ok(Self { 23 | timestamp: Timestamp::Bottom, 24 | }), 25 | (Some(c), false, false) => Ok(Self { 26 | timestamp: Timestamp::Time(c), 27 | }), 28 | (_, _, _) => Err(exceptions::PyValueError::new_err( 29 | "Timestamp should either have coordinates or be either Top or Bottom.", 30 | )), 31 | } 32 | } 33 | 34 | fn is_top(&self) -> bool { 35 | self.timestamp.is_top() 36 | } 37 | 38 | fn is_bottom(&self) -> bool { 39 | self.timestamp.is_bottom() 40 | } 41 | 42 | fn coordinates(&self) -> Option> { 43 | match &self.timestamp { 44 | Timestamp::Time(c) => Some(c.clone()), 45 | _ => None, 46 | } 47 | } 48 | 49 | fn __str__(&self) -> PyResult { 50 | match &self.timestamp { 51 | Timestamp::Top => Ok(String::from("Timestamp::Top")), 52 | Timestamp::Bottom => Ok(String::from("Timestamp::Bottom")), 53 | Timestamp::Time(c) => Ok(format!("Timestamp::Time({:?})", c)), 54 | } 55 | } 56 | 57 | fn __repr__(&self) -> PyResult { 58 | self.__str__() 59 | } 60 | 61 | fn __richcmp__(&self, other: PyTimestamp, op: CompareOp) -> PyResult { 62 | match op { 63 | CompareOp::Lt => Ok(self.timestamp < other.timestamp), 64 | CompareOp::Le => Ok(self.timestamp <= other.timestamp), 65 | CompareOp::Eq => Ok(self.timestamp == other.timestamp), 66 | CompareOp::Ne => Ok(self.timestamp != other.timestamp), 67 | CompareOp::Gt => Ok(self.timestamp > other.timestamp), 68 | CompareOp::Ge => Ok(self.timestamp >= other.timestamp), 69 | } 70 | } 71 | } 72 | 73 | impl From for PyTimestamp { 74 | fn from(timestamp: Timestamp) -> Self { 75 | Self { timestamp } 76 | } 77 | } 78 | 79 | impl From for Timestamp { 80 | fn from(timestamp: PyTimestamp) -> Self { 81 | timestamp.timestamp 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /python/examples/join_streams.py: -------------------------------------------------------------------------------- 1 | """Merges messages from two senders based on timestamp. 2 | """ 3 | 4 | import time 5 | from typing import Any 6 | 7 | import erdos 8 | from erdos.context import TwoInOneOutContext 9 | from erdos.operator import Source, TwoInOneOut 10 | from erdos.streams import WriteStream 11 | 12 | 13 | class SendOp(Source): 14 | """Sends `frequency` messages per second.""" 15 | 16 | def __init__(self, frequency): 17 | print("Initializing send op with frequency {}".format(frequency)) 18 | self.frequency = frequency 19 | 20 | def run(self, write_stream: WriteStream): 21 | count = 0 22 | while True: 23 | timestamp = erdos.Timestamp(coordinates=[count]) 24 | msg = erdos.Message(timestamp, count) 25 | print("{name}: sending {msg}".format(name=self.config.name, msg=msg)) 26 | write_stream.send(msg) 27 | 28 | watermark = erdos.WatermarkMessage(timestamp) 29 | print( 30 | "{name}: sending watermark {watermark}".format( 31 | name=self.config.name, watermark=watermark 32 | ) 33 | ) 34 | write_stream.send(watermark) 35 | 36 | count += 1 37 | time.sleep(1 / self.frequency) 38 | 39 | 40 | class JoinOp(TwoInOneOut): 41 | def __init__(self): 42 | print("Initializing join op") 43 | self.left_msgs = {} 44 | self.right_msgs = {} 45 | 46 | def on_left_data(self, context: TwoInOneOutContext, data: Any): 47 | print("JoinOp: received {data} on left stream".format(data=data)) 48 | self.left_msgs[context.timestamp] = data 49 | 50 | def on_right_data(self, context: TwoInOneOutContext, data: Any): 51 | print("JoinOp: received {data} on right stream".format(data=data)) 52 | self.right_msgs[context.timestamp] = data 53 | 54 | def on_watermark(self, context: TwoInOneOutContext): 55 | left_msg = self.left_msgs.pop(context.timestamp) 56 | right_msg = self.right_msgs.pop(context.timestamp) 57 | joined_msg = erdos.Message(context.timestamp, (left_msg, right_msg)) 58 | print("JoinOp: sending {joined_msg}".format(joined_msg=joined_msg)) 59 | context.write_stream.send(joined_msg) 60 | 61 | 62 | def main(): 63 | """Creates and runs the dataflow graph.""" 64 | left_stream = erdos.connect_source( 65 | SendOp, erdos.operator.OperatorConfig(name="FastSendOp"), frequency=2 66 | ) 67 | right_stream = erdos.connect_source( 68 | SendOp, erdos.operator.OperatorConfig(name="SlowSendOp"), frequency=1 69 | ) 70 | erdos.connect_two_in_one_out( 71 | JoinOp, erdos.operator.OperatorConfig(name="JoinOp"), left_stream, right_stream 72 | ) 73 | 74 | erdos.run() 75 | 76 | 77 | if __name__ == "__main__": 78 | main() 79 | -------------------------------------------------------------------------------- /erdos/src/dataflow/stream/errors.rs: -------------------------------------------------------------------------------- 1 | use crate::communication::{CommunicationError, TryRecvError}; 2 | 3 | /// Errors raised when something went wrong while reading from a `ReadStream`. 4 | #[derive(Debug, PartialEq)] 5 | pub enum ReadError { 6 | /// Message deserialization failed. 7 | SerializationError, 8 | /// The channel or the TCP stream has been closed. 9 | Disconnected, 10 | /// Stream is closed and can longer sends messages. 11 | Closed, 12 | } 13 | 14 | // TODO (Sukrit) :: Should we deprecate this? We should have a single ReadError that includes 15 | // Empty. 16 | /// Errors raised by calling `try_read` from a `ReadStream`. 17 | #[derive(Debug, PartialEq)] 18 | pub enum TryReadError { 19 | /// No message available in the buffer. 20 | Empty, 21 | /// The channel or the TCP stream has been closed. 22 | Disconnected, 23 | /// Message deserialization failed. 24 | SerializationError, 25 | /// Stream is closed and can longer sends messages. 26 | Closed, 27 | } 28 | 29 | impl From for TryReadError { 30 | fn from(e: TryRecvError) -> Self { 31 | match e { 32 | TryRecvError::Empty => Self::Empty, 33 | TryRecvError::Disconnected => Self::Disconnected, 34 | TryRecvError::BincodeError(_) => Self::SerializationError, 35 | } 36 | } 37 | } 38 | 39 | /// Error raised when something went wrong while sending on a `WriteStream`. 40 | #[derive(Debug, PartialEq)] 41 | pub enum SendError { 42 | /// Message serialization failed. 43 | SerializationError, 44 | /// There was a network or a `mpsc::channel` error. 45 | IOError, 46 | /// Timestamp or watermark is smaller or equal to the low watermark. 47 | TimestampError, 48 | /// Stream is closed and can no longer send messages. 49 | Closed, 50 | } 51 | 52 | impl From for SendError { 53 | fn from(e: CommunicationError) -> Self { 54 | match e { 55 | CommunicationError::NoCapacity | CommunicationError::Disconnected => SendError::IOError, 56 | CommunicationError::SerializeNotImplemented 57 | | CommunicationError::DeserializeNotImplemented => { 58 | eprintln!("Serialize not implemented"); 59 | SendError::SerializationError 60 | } 61 | CommunicationError::AbomonationError(error) => { 62 | eprintln!("Abomonation error {}", error); 63 | SendError::SerializationError 64 | } 65 | CommunicationError::BincodeError(error) => { 66 | eprintln!("Bincode error {}", error); 67 | SendError::SerializationError 68 | } 69 | CommunicationError::IoError(io_error) => { 70 | eprintln!("Got write stream IOError {}", io_error); 71 | SendError::IOError 72 | } 73 | } 74 | } 75 | } 76 | -------------------------------------------------------------------------------- /erdos/src/dataflow/message.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | use abomonation_derive::Abomonation; 4 | use serde::{Deserialize, Serialize}; 5 | 6 | use crate::dataflow::time::Timestamp; 7 | 8 | /// Trait for valid message data. The data must be clonable, sendable between threads and 9 | /// serializable. 10 | // TODO: somehow add the deserialize requirement. 11 | pub trait Data: 'static + Clone + Send + Sync + Debug + Serialize {} 12 | /// Any type that is clonable, sendable, and can be serialized and dereserialized implements `Data`. 13 | impl Data for T where 14 | for<'a> T: 'static + Clone + Send + Sync + Debug + Serialize + Deserialize<'a> 15 | { 16 | } 17 | 18 | /// Operators send messages on streams. A message can be either a `Watermark` or a `TimestampedData`. 19 | #[derive(Clone, Debug, Serialize, Deserialize, Abomonation)] 20 | pub enum Message { 21 | TimestampedData(TimestampedData), 22 | Watermark(Timestamp), 23 | } 24 | 25 | impl Message { 26 | /// Creates a new `TimestampedData` message. 27 | pub fn new_message(timestamp: Timestamp, data: D) -> Message { 28 | Self::TimestampedData(TimestampedData::new(timestamp, data)) 29 | } 30 | 31 | /// Creates a new `Watermark` message. 32 | pub fn new_watermark(timestamp: Timestamp) -> Message { 33 | Self::Watermark(timestamp) 34 | } 35 | 36 | pub fn is_top_watermark(&self) -> bool { 37 | if let Self::Watermark(t) = self { 38 | t.is_top() 39 | } else { 40 | false 41 | } 42 | } 43 | 44 | pub fn data(&self) -> Option<&D> { 45 | match self { 46 | Self::TimestampedData(d) => Some(&d.data), 47 | _ => None, 48 | } 49 | } 50 | 51 | pub fn timestamp(&self) -> &Timestamp { 52 | match self { 53 | Self::TimestampedData(d) => &d.timestamp, 54 | Self::Watermark(t) => t, 55 | } 56 | } 57 | } 58 | 59 | impl PartialEq for Message { 60 | fn eq(&self, other: &Self) -> bool { 61 | match (self, other) { 62 | (Self::TimestampedData(d1), Self::TimestampedData(d2)) => d1 == d2, 63 | (Self::Watermark(w1), Self::Watermark(w2)) => w1 == w2, 64 | _ => false, 65 | } 66 | } 67 | } 68 | 69 | /// Data message which operators send along streams. 70 | #[derive(Debug, Clone, Serialize, Deserialize, Abomonation)] 71 | pub struct TimestampedData { 72 | /// Timestamp of the message. 73 | pub timestamp: Timestamp, 74 | /// Data is an option in case one wants to send null messages. 75 | pub data: D, 76 | } 77 | 78 | impl TimestampedData { 79 | pub fn new(timestamp: Timestamp, data: D) -> Self { 80 | Self { timestamp, data } 81 | } 82 | } 83 | 84 | impl PartialEq for TimestampedData { 85 | fn eq(&self, other: &Self) -> bool { 86 | self.timestamp == other.timestamp && self.data == other.data 87 | } 88 | } 89 | -------------------------------------------------------------------------------- /erdos/src/communication/pusher.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | any::Any, 3 | fmt::{self, Debug}, 4 | sync::Arc, 5 | }; 6 | 7 | use bytes::BytesMut; 8 | use serde::Deserialize; 9 | 10 | use crate::{ 11 | communication::{ 12 | serializable::{Deserializable, DeserializedMessage, Serializable}, 13 | CommunicationError, SendEndpoint, 14 | }, 15 | dataflow::Data, 16 | }; 17 | 18 | /// Trait used to deserialize a message and send it on a collection of [`SendEndpoint`]s 19 | /// without exposing the message's type to owner of the [`PusherT`] trait object. 20 | pub trait PusherT: Send { 21 | fn as_any(&mut self) -> &mut dyn Any; 22 | /// To be used to clone a boxed pusher. 23 | fn box_clone(&self) -> Box; 24 | /// Creates message from bytes and sends it to endpoints. 25 | fn send_from_bytes(&mut self, buf: BytesMut) -> Result<(), CommunicationError>; 26 | } 27 | 28 | /// Internal structure used to send data on a collection of [`SendEndpoint`]s. 29 | #[derive(Clone)] 30 | pub struct Pusher { 31 | // TODO: We might want to order the endpoints by the priority of their tasks. 32 | endpoints: Vec>, 33 | } 34 | 35 | /// Zero-copy implementation of the pusher. 36 | impl Pusher> { 37 | pub fn new() -> Self { 38 | Self { 39 | endpoints: Vec::new(), 40 | } 41 | } 42 | 43 | pub fn add_endpoint(&mut self, endpoint: SendEndpoint>) { 44 | self.endpoints.push(endpoint); 45 | } 46 | 47 | pub fn send(&mut self, msg: Arc) -> Result<(), CommunicationError> { 48 | for endpoint in self.endpoints.iter_mut() { 49 | endpoint.send(Arc::clone(&msg))?; 50 | } 51 | Ok(()) 52 | } 53 | } 54 | 55 | impl Clone for Box { 56 | /// Clones a boxed pusher. 57 | fn clone(&self) -> Box { 58 | self.box_clone() 59 | } 60 | } 61 | 62 | /// The [`PusherT`] trait is implemented only for the [`Data`] pushers. 63 | impl PusherT for Pusher> 64 | where 65 | for<'de> D: Data + Deserialize<'de>, 66 | { 67 | fn as_any(&mut self) -> &mut dyn Any { 68 | self 69 | } 70 | 71 | fn box_clone(&self) -> Box { 72 | Box::new((*self).clone()) 73 | } 74 | 75 | fn send_from_bytes(&mut self, mut buf: BytesMut) -> Result<(), CommunicationError> { 76 | if !self.endpoints.is_empty() { 77 | let msg = match Deserializable::decode(&mut buf)? { 78 | DeserializedMessage::::Owned(msg) => msg, 79 | DeserializedMessage::::Ref(msg) => msg.clone(), 80 | }; 81 | let msg_arc = Arc::new(msg); 82 | self.send(msg_arc)?; 83 | } 84 | Ok(()) 85 | } 86 | } 87 | 88 | impl fmt::Debug for Box { 89 | fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 90 | write!(f, "Box {{ }}") 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /erdos/src/communication/control_message_codec.rs: -------------------------------------------------------------------------------- 1 | use byteorder::{ByteOrder, NetworkEndian, WriteBytesExt}; 2 | use bytes::BytesMut; 3 | use std::fmt::Debug; 4 | use tokio_util::codec::{Decoder, Encoder}; 5 | 6 | use crate::communication::{CodecError, ControlMessage}; 7 | 8 | #[derive(Debug)] 9 | pub struct ControlMessageCodec { 10 | msg_size: Option, 11 | } 12 | 13 | impl ControlMessageCodec { 14 | pub fn new() -> ControlMessageCodec { 15 | ControlMessageCodec { msg_size: None } 16 | } 17 | 18 | fn try_read_msg_size(&self, buf: &mut BytesMut) -> Option { 19 | if buf.len() >= 4 { 20 | let msg_size_bytes = buf.split_to(4); 21 | let msg_size = NetworkEndian::read_u32(&msg_size_bytes); 22 | Some(msg_size as usize) 23 | } else { 24 | None 25 | } 26 | } 27 | 28 | fn try_read_message(&mut self, buf: &mut BytesMut) -> Option { 29 | let msg_size = self.msg_size.unwrap(); 30 | if buf.len() >= msg_size { 31 | let msg_bytes = buf.split_to(msg_size); 32 | let msg = bincode::deserialize(&msg_bytes) 33 | .map_err(CodecError::from) 34 | .unwrap(); 35 | self.msg_size = None; 36 | Some(msg) 37 | } else { 38 | None 39 | } 40 | } 41 | } 42 | 43 | impl Decoder for ControlMessageCodec { 44 | type Item = ControlMessage; 45 | type Error = CodecError; 46 | 47 | fn decode(&mut self, buf: &mut BytesMut) -> Result, CodecError> { 48 | if self.msg_size.is_some() { 49 | // We already have a message size. 50 | Ok(self.try_read_message(buf)) 51 | } else { 52 | // Try to read the message size. 53 | if let Some(msg_size) = self.try_read_msg_size(buf) { 54 | self.msg_size = Some(msg_size); 55 | Ok(self.try_read_message(buf)) 56 | } else { 57 | // We need more bytes before we can read the message size. 58 | Ok(None) 59 | } 60 | } 61 | } 62 | } 63 | 64 | impl Encoder for ControlMessageCodec { 65 | type Error = CodecError; 66 | 67 | fn encode(&mut self, msg: ControlMessage, buf: &mut BytesMut) -> Result<(), CodecError> { 68 | // Get the serialized size of the message header. 69 | let msg_size = bincode::serialized_size(&msg).map_err(CodecError::from)? as u32; 70 | // Write the size of the serialized message. 71 | let mut size_buffer: Vec = Vec::new(); 72 | size_buffer.write_u32::(msg_size)?; 73 | buf.extend(size_buffer); 74 | // Serialize and write the message. 75 | let serialized_msg = bincode::serialize(&msg).map_err(CodecError::from)?; 76 | buf.extend(serialized_msg); 77 | Ok(()) 78 | } 79 | } 80 | 81 | impl Default for ControlMessageCodec { 82 | fn default() -> Self { 83 | Self::new() 84 | } 85 | } 86 | -------------------------------------------------------------------------------- /erdos/src/dataflow/stream/mod.rs: -------------------------------------------------------------------------------- 1 | //! Streams are used to send data between [operators](crate::dataflow::operator). 2 | //! 3 | //! In the driver, connections between operators are created by passing 4 | //! [`Stream`]s as arguments to the [connect functions](crate::dataflow::connect). 5 | //! 6 | //! During execution, operators can broadcast data to all downstream operators 7 | //! connected to a stream by invoking [`WriteStreamT::send`]. 8 | //! Likewise, operators can process data by implementing callbacks 9 | //! in the [operator traits](crate::dataflow::operator), 10 | //! or by calling [`ReadStream::read`] or [`ReadStream::try_read`] in an 11 | //! operator's `run` method. 12 | //! 13 | //! The driver can interact with an application by sending messages on an 14 | //! [`IngestStream`] or reading messages from an [`ExtractStream`]. 15 | //! 16 | //! Messages sent on a stream are broadcast to all connected operators, 17 | //! using zero-copy communication for operators on the same node. 18 | //! Messages sent across nodes are serialized using 19 | //! [abomonation](https://github.com/TimelyDataflow/abomonation) if possible, 20 | //! before falling back to [bincode](https://github.com/servo/bincode). 21 | use std::marker::PhantomData; 22 | 23 | use crate::dataflow::{Data, Message}; 24 | 25 | // Private submodules 26 | mod extract_stream; 27 | mod ingest_stream; 28 | mod loop_stream; 29 | mod read_stream; 30 | mod write_stream; 31 | 32 | // Public submodules 33 | pub mod errors; 34 | 35 | // Private imports 36 | use errors::SendError; 37 | 38 | // Public exports 39 | pub use extract_stream::ExtractStream; 40 | pub use ingest_stream::IngestStream; 41 | #[doc(hidden)] 42 | pub use loop_stream::LoopStream; 43 | pub use read_stream::ReadStream; 44 | pub use write_stream::WriteStream; 45 | 46 | use super::graph::default_graph; 47 | 48 | pub type StreamId = crate::Uuid; 49 | 50 | /// Write stream trait which allows specialized implementations of 51 | /// [`send`](WriteStreamT::send) depending on the serialization library used. 52 | pub trait WriteStreamT { 53 | /// Sends a messsage to a channel. 54 | fn send(&mut self, msg: Message) -> Result<(), SendError>; 55 | } 56 | 57 | pub trait Stream { 58 | fn name(&self) -> String { 59 | default_graph::get_stream_name(&self.id()) 60 | } 61 | fn set_name(&mut self, name: &str) { 62 | default_graph::set_stream_name(&self.id(), name); 63 | } 64 | fn id(&self) -> StreamId; 65 | } 66 | 67 | #[derive(Clone)] 68 | pub struct OperatorStream { 69 | /// The unique ID of the stream (automatically generated by the constructor) 70 | id: StreamId, 71 | phantom: PhantomData, 72 | } 73 | 74 | #[allow(dead_code)] 75 | impl OperatorStream { 76 | /// Creates a new stream. 77 | pub(crate) fn new() -> Self { 78 | let id = StreamId::new_deterministic(); 79 | 80 | Self { 81 | id, 82 | phantom: PhantomData, 83 | } 84 | } 85 | } 86 | 87 | impl Stream for OperatorStream { 88 | fn id(&self) -> StreamId { 89 | self.id 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /examples/erdos_to_ros.rs: -------------------------------------------------------------------------------- 1 | /// Publishes erdos messages of type String to ROS topic "chatter". 2 | /// Pipeline is as follows: 3 | /// ERDOS SourceOperator -> ToRosOperator converts and publishes ROS messages 4 | extern crate erdos; 5 | 6 | use std::{thread, time::Duration}; 7 | 8 | use erdos::dataflow::operator::*; 9 | use erdos::dataflow::operators::ros::*; 10 | use erdos::dataflow::stream::WriteStreamT; 11 | use erdos::dataflow::Message; 12 | use erdos::dataflow::*; 13 | use erdos::node::Node; 14 | use erdos::Configuration; 15 | 16 | struct SourceOperator {} 17 | 18 | impl SourceOperator { 19 | pub fn new() -> Self { 20 | Self {} 21 | } 22 | } 23 | 24 | // This Source operator repeatedly sends String messages. 25 | impl Source for SourceOperator { 26 | fn run(&mut self, config: &OperatorConfig, write_stream: &mut WriteStream) { 27 | tracing::info!("Running {}", config.get_name()); 28 | for t in 0..10 { 29 | let timestamp = Timestamp::Time(vec![t as u64]); 30 | write_stream 31 | .send(Message::new_message( 32 | timestamp.clone(), 33 | String::from("Hello from ERDOS"), 34 | )) 35 | .unwrap(); 36 | write_stream 37 | .send(Message::new_watermark(timestamp)) 38 | .unwrap(); 39 | thread::sleep(Duration::from_millis(100)); 40 | } 41 | } 42 | 43 | fn destroy(&mut self) { 44 | tracing::info!("Destroying Source Operator"); 45 | } 46 | } 47 | 48 | // Defines a function that converts an ERDOS message containing String data to a vector containing 49 | // a ROS String message. If the message is a Watermark and contains no data, an empty vector 50 | // is returned. 51 | fn erdos_to_ros(input: &Message) -> Vec { 52 | match input.data() { 53 | Some(x) => { 54 | vec![rosrust_msg::std_msgs::String { 55 | data: x.to_string(), 56 | }] 57 | } 58 | None => vec![], 59 | } 60 | } 61 | 62 | fn main() { 63 | let args = erdos::new_app("ERDOS").get_matches(); 64 | let mut node = Node::new(Configuration::from_args(&args)); 65 | 66 | // Creates a Source node on the ERDOS side which contains the messages of interest to publish 67 | // to ROS. 68 | let source_config = OperatorConfig::new().name("SourceOperator"); 69 | let source_stream = erdos::connect_source(SourceOperator::new, source_config); 70 | 71 | // Connects a ToRosOperator as a Sink node in the ERDOS pipeline. 72 | // The operator will convert the messages using conversion function above, and publish the 73 | // messages on the ROS topic "chatter". 74 | let ros_sink_config = OperatorConfig::new().name("ToRosOperator"); 75 | erdos::connect_sink( 76 | move || -> ToRosOperator { 77 | ToRosOperator::new("chatter", erdos_to_ros) 78 | }, 79 | || {}, 80 | ros_sink_config, 81 | &source_stream, 82 | ); 83 | 84 | node.run(); 85 | } 86 | -------------------------------------------------------------------------------- /erdos/src/communication/errors.rs: -------------------------------------------------------------------------------- 1 | use std::io; 2 | use tokio::sync::mpsc; 3 | 4 | /// Error raised by the communication layer. 5 | #[derive(Debug)] 6 | pub enum CommunicationError { 7 | /// The channel has no capacity left. 8 | NoCapacity, 9 | /// The channel or the TCP stream has been closed. 10 | Disconnected, 11 | /// Type does not support serialization. 12 | SerializeNotImplemented, 13 | /// Type does not support deserialization. 14 | DeserializeNotImplemented, 15 | /// Failed to serialize/deserialize data with Abomonation. 16 | AbomonationError(io::Error), 17 | /// Failed to serialize/deserialize data with Bincode. 18 | BincodeError(bincode::Error), 19 | /// Failed to read/write data from/to the TCP stream. 20 | IoError(io::Error), 21 | } 22 | 23 | impl From for CommunicationError { 24 | fn from(e: bincode::Error) -> Self { 25 | CommunicationError::BincodeError(e) 26 | } 27 | } 28 | 29 | impl From for CommunicationError { 30 | fn from(e: io::Error) -> Self { 31 | CommunicationError::IoError(e) 32 | } 33 | } 34 | 35 | impl From> for CommunicationError { 36 | fn from(_e: std::sync::mpsc::SendError) -> Self { 37 | CommunicationError::Disconnected 38 | } 39 | } 40 | 41 | impl From> for CommunicationError { 42 | fn from(_e: mpsc::error::SendError) -> Self { 43 | CommunicationError::Disconnected 44 | } 45 | } 46 | 47 | impl From> for CommunicationError { 48 | fn from(e: mpsc::error::TrySendError) -> Self { 49 | match e { 50 | mpsc::error::TrySendError::Closed(_) => CommunicationError::Disconnected, 51 | mpsc::error::TrySendError::Full(_) => CommunicationError::NoCapacity, 52 | } 53 | } 54 | } 55 | 56 | impl From for CommunicationError { 57 | fn from(e: CodecError) -> Self { 58 | match e { 59 | CodecError::IoError(e) => CommunicationError::IoError(e), 60 | CodecError::BincodeError(e) => CommunicationError::BincodeError(e), 61 | } 62 | } 63 | } 64 | 65 | /// Error that is raised by the `MessageCodec` or `ControlMessageCodec` when messages cannot 66 | /// be encoded or decoded. 67 | #[derive(Debug)] 68 | pub enum CodecError { 69 | IoError(io::Error), 70 | /// Bincode serialization/deserialization error. It is raised when the `MessageMetadata` serialization 71 | /// fails. This should not ever happen. 72 | BincodeError(bincode::Error), 73 | } 74 | 75 | impl From for CodecError { 76 | fn from(e: io::Error) -> CodecError { 77 | CodecError::IoError(e) 78 | } 79 | } 80 | 81 | impl From for CodecError { 82 | fn from(e: bincode::Error) -> Self { 83 | CodecError::BincodeError(e) 84 | } 85 | } 86 | 87 | #[derive(Debug)] 88 | pub enum TryRecvError { 89 | /// No data to read. 90 | Empty, 91 | /// The channel or the TCP stream has been closed. 92 | Disconnected, 93 | /// Failed to serialize/deserialize data. 94 | BincodeError(bincode::Error), 95 | } 96 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/concat.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | 3 | use crate::{ 4 | dataflow::{ 5 | context::TwoInOneOutContext, 6 | operator::TwoInOneOut, 7 | stream::{OperatorStream, WriteStreamT}, 8 | Data, Message, Stream, 9 | }, 10 | OperatorConfig, 11 | }; 12 | 13 | /// Merges the contents of two streams. 14 | /// 15 | /// Data messages are sent on the merged stream in order of arrival. 16 | /// A watermark is sent when the minimum watermark received across both streams advances. 17 | /// In other words, when `min(left_watermark_timestamp, right_watermark_timestamp)` increases, 18 | /// the operator sends a watermark with an equivalent timestamp. 19 | /// 20 | /// ``` 21 | /// # use erdos::dataflow::{stream::{IngestStream, Stream}, operator::OperatorConfig, operators::ConcatOperator}; 22 | /// # let left_stream: IngestStream = IngestStream::new(); 23 | /// # let right_stream: IngestStream = IngestStream::new(); 24 | /// # 25 | /// let merged_stream = erdos::connect_two_in_one_out( 26 | /// ConcatOperator::new, 27 | /// || {}, 28 | /// OperatorConfig::new().name("ConcatOperator"), 29 | /// &left_stream, 30 | /// &right_stream, 31 | /// ); 32 | /// ``` 33 | #[derive(Default)] 34 | pub struct ConcatOperator {} 35 | 36 | impl ConcatOperator { 37 | pub fn new() -> Self { 38 | Self {} 39 | } 40 | } 41 | 42 | impl TwoInOneOut<(), D, D, D> for ConcatOperator 43 | where 44 | for<'a> D: Data + Deserialize<'a>, 45 | { 46 | fn on_left_data(&mut self, ctx: &mut TwoInOneOutContext<(), D>, data: &D) { 47 | let msg = Message::new_message(ctx.timestamp().clone(), data.clone()); 48 | ctx.write_stream().send(msg).unwrap(); 49 | } 50 | 51 | fn on_right_data(&mut self, ctx: &mut TwoInOneOutContext<(), D>, data: &D) { 52 | let msg = Message::new_message(ctx.timestamp().clone(), data.clone()); 53 | ctx.write_stream().send(msg).unwrap(); 54 | } 55 | 56 | fn on_watermark(&mut self, _ctx: &mut TwoInOneOutContext<(), D>) {} 57 | } 58 | 59 | /// Extension trait for merging the contents of two streams. 60 | /// 61 | /// Names the [`ConcatOperator`] using the names of the two merged streams. 62 | /// 63 | /// # Example 64 | /// ``` 65 | /// # use erdos::dataflow::{stream::{IngestStream, Stream}, operator::OperatorConfig, operators::Concat}; 66 | /// # let left_stream: IngestStream = IngestStream::new(); 67 | /// # let right_stream: IngestStream = IngestStream::new(); 68 | /// # 69 | /// let merged_stream = left_stream.concat(&right_stream); 70 | /// ``` 71 | pub trait Concat 72 | where 73 | D: Data + for<'a> Deserialize<'a>, 74 | { 75 | fn concat(&self, other: &dyn Stream) -> OperatorStream; 76 | } 77 | 78 | impl Concat for S 79 | where 80 | S: Stream, 81 | D: Data + for<'a> Deserialize<'a>, 82 | { 83 | fn concat(&self, other: &dyn Stream) -> OperatorStream { 84 | let name = format!("ConcatOp_{}_{}", self.name(), other.name()); 85 | crate::connect_two_in_one_out( 86 | ConcatOperator::new, 87 | || {}, 88 | OperatorConfig::new().name(&name), 89 | self, 90 | other, 91 | ) 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/filter.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::dataflow::{ 6 | context::OneInOneOutContext, 7 | message::Message, 8 | operator::{OneInOneOut, OperatorConfig}, 9 | stream::{OperatorStream, Stream, WriteStreamT}, 10 | Data, 11 | }; 12 | 13 | /// Filters an incoming stream of type D, retaining messages in the stream that 14 | /// the provided condition function evaluates to true when applied. 15 | /// 16 | /// # Example 17 | /// The below example shows how to use a FilterOperator to keep only messages > 10 in an incoming 18 | /// stream of usize messages, and send them. 19 | /// 20 | /// ``` 21 | /// # use erdos::dataflow::{stream::IngestStream, operator::{OperatorConfig}, operators::{FilterOperator}}; 22 | /// # let source_stream = IngestStream::new(); 23 | /// // Add the mapping function as an argument to the operator via the OperatorConfig. 24 | /// let filter_config = OperatorConfig::new().name("FilterOperator"); 25 | /// let filter_stream = erdos::connect_one_in_one_out( 26 | /// || -> FilterOperator { FilterOperator::new(|a: &usize| -> bool { a > &10 }) }, 27 | /// || {}, 28 | /// filter_config, 29 | /// &source_stream, 30 | /// ); 31 | /// ``` 32 | pub struct FilterOperator 33 | where 34 | D: Data + for<'a> Deserialize<'a>, 35 | { 36 | filter_function: Arc bool + Send + Sync>, 37 | } 38 | 39 | impl FilterOperator 40 | where 41 | D: Data + for<'a> Deserialize<'a>, 42 | { 43 | pub fn new(filter_function: F) -> Self 44 | where 45 | F: 'static + Fn(&D) -> bool + Send + Sync, 46 | { 47 | Self { 48 | filter_function: Arc::new(filter_function), 49 | } 50 | } 51 | } 52 | 53 | impl OneInOneOut<(), D, D> for FilterOperator 54 | where 55 | D: Data + for<'a> Deserialize<'a>, 56 | { 57 | fn on_data(&mut self, ctx: &mut OneInOneOutContext<(), D>, data: &D) { 58 | let timestamp = ctx.timestamp().clone(); 59 | if (self.filter_function)(data) { 60 | ctx.write_stream() 61 | .send(Message::new_message(timestamp, data.clone())) 62 | .unwrap(); 63 | tracing::debug!( 64 | "{} @ {:?}: received {:?} and sent it", 65 | ctx.operator_config().get_name(), 66 | ctx.timestamp(), 67 | data, 68 | ); 69 | } 70 | } 71 | 72 | fn on_watermark(&mut self, _ctx: &mut OneInOneOutContext<(), D>) {} 73 | } 74 | 75 | // Extension trait for FilterOperator 76 | pub trait Filter 77 | where 78 | D: Data + for<'a> Deserialize<'a>, 79 | { 80 | fn filter(&self, filter_fn: F) -> OperatorStream 81 | where 82 | F: 'static + Fn(&D) -> bool + Send + Sync + Clone; 83 | } 84 | 85 | impl Filter for S 86 | where 87 | S: Stream, 88 | D: Data + for<'a> Deserialize<'a>, 89 | { 90 | fn filter(&self, filter_fn: F) -> OperatorStream 91 | where 92 | F: 'static + Fn(&D) -> bool + Send + Sync + Clone, 93 | { 94 | let op_name = format!("FilterOp_{}", self.id()); 95 | 96 | crate::connect_one_in_one_out( 97 | move || -> FilterOperator { FilterOperator::new(filter_fn.clone()) }, 98 | || {}, 99 | OperatorConfig::new().name(&op_name), 100 | self, 101 | ) 102 | } 103 | } 104 | -------------------------------------------------------------------------------- /erdos/src/node/operator_executors/source_executor.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | use std::{future::Future, pin::Pin, sync::Arc}; 3 | use tokio::{ 4 | self, 5 | sync::{broadcast, mpsc}, 6 | }; 7 | 8 | use crate::{ 9 | dataflow::{ 10 | operator::{OperatorConfig, Source}, 11 | stream::WriteStreamT, 12 | Data, Message, Timestamp, WriteStream, 13 | }, 14 | node::{ 15 | lattice::ExecutionLattice, 16 | operator_executors::{OperatorExecutorHelper, OperatorExecutorT}, 17 | worker::{EventNotification, OperatorExecutorNotification, WorkerNotification}, 18 | }, 19 | OperatorId, 20 | }; 21 | 22 | pub struct SourceExecutor 23 | where 24 | O: Source, 25 | T: Data + for<'a> Deserialize<'a>, 26 | { 27 | config: OperatorConfig, 28 | operator: O, 29 | write_stream: WriteStream, 30 | helper: OperatorExecutorHelper, 31 | } 32 | 33 | impl SourceExecutor 34 | where 35 | O: Source, 36 | T: Data + for<'a> Deserialize<'a>, 37 | { 38 | pub fn new( 39 | config: OperatorConfig, 40 | operator_fn: impl Fn() -> O + Send, 41 | write_stream: WriteStream, 42 | ) -> Self { 43 | let operator_id = config.id; 44 | Self { 45 | config, 46 | operator: operator_fn(), 47 | write_stream, 48 | helper: OperatorExecutorHelper::new(operator_id), 49 | } 50 | } 51 | 52 | pub(crate) async fn execute( 53 | &mut self, 54 | _channel_from_worker: broadcast::Receiver, 55 | channel_to_worker: mpsc::UnboundedSender, 56 | _channel_to_event_runners: broadcast::Sender, 57 | ) { 58 | self.helper.synchronize().await; 59 | 60 | tracing::debug!( 61 | "Node {}: running operator {}", 62 | self.config.node_id, 63 | self.config.get_name() 64 | ); 65 | 66 | tokio::task::block_in_place(|| self.operator.run(&self.config, &mut self.write_stream)); 67 | tokio::task::block_in_place(|| self.operator.destroy()); 68 | 69 | // Close the stream. 70 | if !self.write_stream.is_closed() { 71 | self.write_stream 72 | .send(Message::new_watermark(Timestamp::Top)) 73 | .unwrap(); 74 | } 75 | 76 | channel_to_worker 77 | .send(WorkerNotification::DestroyedOperator(self.operator_id())) 78 | .unwrap(); 79 | } 80 | } 81 | 82 | impl OperatorExecutorT for SourceExecutor 83 | where 84 | O: Source, 85 | T: Data + for<'a> Deserialize<'a>, 86 | { 87 | fn execute<'a>( 88 | &'a mut self, 89 | channel_from_worker: broadcast::Receiver, 90 | channel_to_worker: mpsc::UnboundedSender, 91 | channel_to_event_runners: broadcast::Sender, 92 | ) -> Pin + 'a + Send>> { 93 | Box::pin(self.execute( 94 | channel_from_worker, 95 | channel_to_worker, 96 | channel_to_event_runners, 97 | )) 98 | } 99 | 100 | fn lattice(&self) -> Arc { 101 | Arc::clone(&self.helper.lattice) 102 | } 103 | 104 | fn operator_id(&self) -> OperatorId { 105 | self.config.id 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /python/src/py_operators/py_source.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use erdos::dataflow::{ 4 | operator::{OperatorConfig, Source}, 5 | WriteStream, 6 | }; 7 | use pyo3::{prelude::*, types::*}; 8 | 9 | use crate::PyWriteStream; 10 | 11 | pub(crate) struct PySource { 12 | py_operator: Arc, 13 | } 14 | 15 | impl PySource { 16 | pub(crate) fn new( 17 | py_operator_type: Arc, 18 | py_operator_args: Arc, 19 | py_operator_kwargs: Arc, 20 | py_operator_config: Arc, 21 | config: OperatorConfig, 22 | ) -> Self { 23 | // Instantiate the Operator in Python. 24 | tracing::debug!("Instantiating the operator {:?}", config.name); 25 | 26 | let py_operator = super::construct_operator( 27 | py_operator_type, 28 | py_operator_args, 29 | py_operator_kwargs, 30 | py_operator_config, 31 | config, 32 | ); 33 | 34 | Self { py_operator } 35 | } 36 | } 37 | 38 | impl Source> for PySource { 39 | fn run(&mut self, _config: &OperatorConfig, write_stream: &mut WriteStream>) { 40 | // Create the Python version of the WriteStream. 41 | let write_stream_clone = write_stream.clone(); 42 | let write_stream_id = write_stream.id(); 43 | let write_stream_name = write_stream.name(); 44 | let py_write_stream = PyWriteStream::from(write_stream_clone); 45 | 46 | // Invoke the `run` method. 47 | Python::with_gil(|py| { 48 | let locals = PyDict::new(py); 49 | if let Some(e) = locals 50 | .set_item("py_write_stream", &Py::new(py, py_write_stream).unwrap()) 51 | .err() 52 | { 53 | e.print(py) 54 | } 55 | if let Some(e) = locals 56 | .set_item("write_stream_id", format!("{}", write_stream_id)) 57 | .err() 58 | { 59 | e.print(py) 60 | } 61 | if let Some(e) = locals 62 | .set_item("write_stream_name", write_stream_name.to_string()) 63 | .err() 64 | { 65 | e.print(py) 66 | } 67 | let stream_construction_result = py.run( 68 | r#" 69 | import uuid, erdos 70 | 71 | # Create the WriteStream. 72 | write_stream = erdos.WriteStream(_py_write_stream=py_write_stream) 73 | "#, 74 | None, 75 | Some(locals), 76 | ); 77 | if let Err(e) = stream_construction_result { 78 | e.print(py); 79 | } 80 | 81 | // Retrieve the constructed stream. 82 | let py_write_stream_obj = py 83 | .eval("write_stream", None, Some(locals)) 84 | .unwrap() 85 | .to_object(py); 86 | 87 | // Invoke the `run` method. 88 | if let Err(e) = self 89 | .py_operator 90 | .call_method1(py, "run", (py_write_stream_obj,)) 91 | { 92 | e.print(py); 93 | } 94 | }); 95 | } 96 | 97 | fn destroy(&mut self) { 98 | Python::with_gil(|py| { 99 | if let Err(e) = self.py_operator.call_method0(py, "destroy") { 100 | e.print(py); 101 | } 102 | }); 103 | } 104 | } 105 | -------------------------------------------------------------------------------- /python/doc/source/streams.rst: -------------------------------------------------------------------------------- 1 | Streams 2 | ======= 3 | 4 | Streams are used to send data in ERDOS applications. 5 | 6 | ERDOS streams are similar to ROS topics, but have a few additional desirable 7 | properties. Streams facilitate one-to-many communication, so only 1 operator 8 | sends messages on a stream. 9 | ERDOS broadcasts messages sent on a stream to all connected operators. 10 | In addition, streams are typed when using the Rust API. 11 | 12 | Streams expose 3 classes of interfaces: 13 | 14 | #. Read-interfaces expose methods to receive and process data. They allow 15 | pulling data by calling ``read()`` and ``try_read()``. 16 | Structures that implement read interfaces include: 17 | 18 | * :py:class:`.ReadStream`: used by operators to read data and register callbacks. 19 | * :py:class:`.ExtractStream`: used by the driver to read data. 20 | 21 | #. Write-interfaces expose the send method to send data on a stream. 22 | Structures that implement write interfaces include: 23 | 24 | * :py:class:`.WriteStream`: used by operators to send data. 25 | * :py:class:`.IngestStream`: used by the driver to send data. 26 | 27 | #. Abstract interfaces used to connect operators and construct a dataflow graph. 28 | Structures that implement the abstract `:py:class:.Stream` interface include: 29 | 30 | * :py:class:`.OperatorStream`: representing a stream on which an operator sends messages. 31 | * :py:class:`.IngestStream`: used to send messages to operators from the driver. 32 | * :py:class:`.LoopStream`: used to create loops in the dataflow graph. 33 | 34 | 35 | Some applications may want to introduce loops in their dataflow graphs which 36 | is possible using the :py:class:`.LoopStream`. 37 | 38 | 39 | Sending Messages 40 | ---------------- 41 | 42 | Operators use Write Streams to send data. 43 | 44 | .. autoclass:: erdos.WriteStream 45 | :members: send 46 | 47 | 48 | Receiving Messages 49 | ------------------ 50 | 51 | Operators receive data by reading messages from Read Streams. Operators also 52 | receive data by implementing callbacks that are automatically invoked upon 53 | the receipt of a message. 54 | 55 | .. autoclass:: erdos.ReadStream 56 | :members: read, try_read 57 | 58 | 59 | Abstract Streams 60 | ---------------- 61 | 62 | These streams represent edges in the dataflow graph, which ERDOS materializes 63 | using its communication protocols, and the `:py:class:.ReadStream` 64 | and `:py:class:.WriteStream` interfaces. 65 | 66 | .. autoclass:: erdos.Stream 67 | :members: 68 | 69 | .. autoclass:: erdos.OperatorStream 70 | :show-inheritance: 71 | :members: 72 | 73 | 74 | Ingesting and Extracting Data 75 | ----------------------------- 76 | 77 | Some applications have trouble placing all of the data processing logic inside 78 | operators. For these applications, ERDOS provides special stream interfaces to 79 | *ingest* and *extract* data. 80 | 81 | A comprehensive example is available `here `__. 82 | 83 | .. autoclass:: erdos.IngestStream 84 | :show-inheritance: 85 | :members: send 86 | 87 | .. autoclass:: erdos.ExtractStream 88 | :members: read, try_read 89 | 90 | 91 | Loops 92 | ----- 93 | Certain applications require feedback in the dataflow. To support this use 94 | case, ERDOS provides the LoopStream interface to support loops in the 95 | dataflow. 96 | 97 | A comprehensive example is available `here `__. 98 | 99 | .. autoclass:: erdos.LoopStream 100 | :show-inheritance: 101 | :members: connect_loop 102 | -------------------------------------------------------------------------------- /examples/linq.rs: -------------------------------------------------------------------------------- 1 | use std::{thread, time::Duration}; 2 | 3 | use erdos::{ 4 | dataflow::{ 5 | context::SinkContext, 6 | operator::{Sink, Source}, 7 | operators::{Filter, Join, Map, Split}, 8 | state::TimeVersionedState, 9 | stream::{WriteStream, WriteStreamT}, 10 | Message, OperatorConfig, Timestamp, 11 | }, 12 | node::Node, 13 | Configuration, 14 | }; 15 | 16 | struct SourceOperator {} 17 | 18 | impl SourceOperator { 19 | pub fn new() -> Self { 20 | Self {} 21 | } 22 | } 23 | 24 | impl Source for SourceOperator { 25 | fn run(&mut self, _operator_config: &OperatorConfig, write_stream: &mut WriteStream) { 26 | tracing::info!("Running Source Operator"); 27 | for t in 0..10 { 28 | let timestamp = Timestamp::Time(vec![t as u64]); 29 | write_stream 30 | .send(Message::new_message(timestamp.clone(), t)) 31 | .unwrap(); 32 | write_stream 33 | .send(Message::new_watermark(timestamp)) 34 | .unwrap(); 35 | thread::sleep(Duration::from_millis(100)); 36 | } 37 | } 38 | 39 | fn destroy(&mut self) { 40 | tracing::info!("Destroying Source Operator"); 41 | } 42 | } 43 | 44 | struct SinkOperator {} 45 | 46 | impl SinkOperator { 47 | pub fn new() -> Self { 48 | Self {} 49 | } 50 | } 51 | 52 | impl Sink, usize> for SinkOperator { 53 | fn on_data(&mut self, ctx: &mut SinkContext>, data: &usize) { 54 | let timestamp = ctx.timestamp().clone(); 55 | tracing::info!( 56 | "{} @ {:?}: Received {}", 57 | ctx.operator_config().get_name(), 58 | timestamp, 59 | data, 60 | ); 61 | 62 | // Increment the message count. 63 | *ctx.current_state().unwrap() += 1; 64 | } 65 | 66 | fn on_watermark(&mut self, ctx: &mut SinkContext>) { 67 | let timestamp = ctx.timestamp().clone(); 68 | tracing::info!( 69 | "{} @ {:?}: Received {} data messages.", 70 | ctx.operator_config().get_name(), 71 | timestamp, 72 | ctx.current_state().unwrap(), 73 | ); 74 | } 75 | } 76 | 77 | fn main() { 78 | let args = erdos::new_app("ERDOS").get_matches(); 79 | let mut node = Node::new(Configuration::from_args(&args)); 80 | 81 | let source_config = OperatorConfig::new().name("SourceOperator"); 82 | // Streams data 0, 1, 2, ..., 9 with timestamps 0, 1, 2, ..., 9. 83 | let source_stream = erdos::connect_source(SourceOperator::new, source_config); 84 | 85 | // Given x, generates a sequence of messages 0, ..., x for the current timestamp. 86 | let sequence = source_stream.flat_map(|x| (1..=*x)); 87 | // Finds the factors of x using the generated sequence. 88 | let factors = source_stream 89 | .timestamp_join(&sequence) 90 | .filter(|&(x, d)| x % d == 0) 91 | .map(|&(_, d)| d); 92 | 93 | // Split into streams of even factors and odd factors. 94 | let (evens, odds) = factors.split(|x| x % 2 == 0); 95 | 96 | // Print received even messages. 97 | let evens_sink_config = OperatorConfig::new().name("EvensSinkOperator"); 98 | erdos::connect_sink( 99 | SinkOperator::new, 100 | TimeVersionedState::new, 101 | evens_sink_config, 102 | &evens, 103 | ); 104 | 105 | // Print received odd messages. 106 | let odds_sink_config = OperatorConfig::new().name("OddsSinkOperator"); 107 | erdos::connect_sink( 108 | SinkOperator::new, 109 | TimeVersionedState::new, 110 | odds_sink_config, 111 | &odds, 112 | ); 113 | 114 | node.run(); 115 | } 116 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/split.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::dataflow::{ 6 | context::OneInTwoOutContext, 7 | message::Message, 8 | operator::{OneInTwoOut, OperatorConfig}, 9 | stream::{OperatorStream, Stream, WriteStreamT}, 10 | Data, 11 | }; 12 | 13 | /// Splits an incoming stream of type D1 into two different streams of type D1 using the provided 14 | /// condition function. When evaluated to true, sends messages to left stream, and right stream 15 | /// otherwise. 16 | /// 17 | /// # Example 18 | /// The below example shows how to use a SplitOperator to split an incoming stream of usize 19 | /// messages into two different streams one with messages > 10 (left stream) and one with 20 | /// messages <= 10 (right stream), and send them. 21 | /// 22 | /// ``` 23 | /// # use erdos::dataflow::{stream::IngestStream, operator::{OperatorConfig}, operators::{SplitOperator}}; 24 | /// # let source_stream = IngestStream::new(); 25 | /// // Add the mapping function as an argument to the operator via the OperatorConfig. 26 | /// let split_config = OperatorConfig::new().name("SplitOperator"); 27 | /// let (left_stream, right_stream) = erdos::connect_one_in_two_out( 28 | /// || -> SplitOperator { SplitOperator::new(|a: &usize| -> bool { a > &10 }) }, 29 | /// || {}, 30 | /// split_config, 31 | /// &source_stream, 32 | /// ); 33 | /// ``` 34 | pub struct SplitOperator 35 | where 36 | D1: Data + for<'a> Deserialize<'a>, 37 | { 38 | split_function: Arc bool + Send + Sync>, 39 | } 40 | 41 | impl SplitOperator 42 | where 43 | D1: Data + for<'a> Deserialize<'a>, 44 | { 45 | pub fn new(split_function: F) -> Self 46 | where 47 | F: 'static + Fn(&D1) -> bool + Send + Sync, 48 | { 49 | Self { 50 | split_function: Arc::new(split_function), 51 | } 52 | } 53 | } 54 | 55 | impl OneInTwoOut<(), D1, D1, D1> for SplitOperator 56 | where 57 | D1: Data + for<'a> Deserialize<'a>, 58 | { 59 | fn on_data(&mut self, ctx: &mut OneInTwoOutContext<(), D1, D1>, data: &D1) { 60 | let timestamp = ctx.timestamp().clone(); 61 | let mut stream_side: &str = "left"; 62 | 63 | let write_stream = if (self.split_function)(data) { 64 | ctx.left_write_stream() 65 | } else { 66 | stream_side = "right"; 67 | ctx.right_write_stream() 68 | }; 69 | 70 | write_stream 71 | .send(Message::new_message(timestamp, data.clone())) 72 | .unwrap(); 73 | tracing::debug!( 74 | "{} @ {:?}: received {:?} and sent to {} stream", 75 | ctx.operator_config().get_name(), 76 | ctx.timestamp(), 77 | data, 78 | stream_side 79 | ); 80 | } 81 | 82 | fn on_watermark(&mut self, _ctx: &mut OneInTwoOutContext<(), D1, D1>) {} 83 | } 84 | 85 | // Extension trait for SplitOperator 86 | pub trait Split 87 | where 88 | D1: Data + for<'a> Deserialize<'a>, 89 | { 90 | fn split(&self, split_fn: F) -> (OperatorStream, OperatorStream) 91 | where 92 | F: 'static + Fn(&D1) -> bool + Send + Sync + Clone; 93 | } 94 | 95 | impl Split for S 96 | where 97 | S: Stream, 98 | D1: Data + for<'a> Deserialize<'a>, 99 | { 100 | fn split(&self, split_fn: F) -> (OperatorStream, OperatorStream) 101 | where 102 | F: 'static + Fn(&D1) -> bool + Send + Sync + Clone, 103 | { 104 | let op_name = format!("SplitOp_{}", self.id()); 105 | 106 | crate::connect_one_in_two_out( 107 | move || -> SplitOperator { SplitOperator::new(split_fn.clone()) }, 108 | || {}, 109 | OperatorConfig::new().name(&op_name), 110 | self, 111 | ) 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /python/erdos/message.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | from typing import Generic, Optional, TypeVar 3 | 4 | from erdos.internal import PyMessage 5 | from erdos.timestamp import Timestamp 6 | 7 | T = TypeVar("T") 8 | 9 | 10 | class Message(Generic[T]): 11 | """A :py:class:`Message` allows an operator to send timestamped data to 12 | other operators via a :py:class:`WriteStream` or an 13 | :py:class:`IngestStream`. 14 | 15 | Attributes: 16 | timestamp: The timestamp of the message. 17 | data: The data of the message. 18 | """ 19 | 20 | def __init__(self, timestamp: Timestamp, data: T) -> None: 21 | """Constructs a :py:class:`Message` with the given `data` and 22 | `timestamp`. 23 | 24 | Args: 25 | timestamp: The :py:class:`Timestamp` associated with the data. 26 | data: The payload to be sent on the :py:class:`WriteStream`. 27 | """ 28 | if not isinstance(timestamp, Timestamp): 29 | raise TypeError("timestamp must be of type `erdos.Timestamp`") 30 | self.timestamp = timestamp 31 | self.data = data 32 | self._serialized_data: Optional[bytes] = None 33 | 34 | def _serialize_data(self) -> None: 35 | """Serializes the message's data using pickle. 36 | 37 | Allows an application to front-load cost of serializing data, which 38 | usually occurs when the message is sent, in order to reduce the cost 39 | of later sending the message. 40 | 41 | If the data is later changed, :py:meth:`Message._serialize_data` must 42 | be called again to reflect changes in the message. 43 | """ 44 | self._serialized_data = pickle.dumps( 45 | self.data, protocol=pickle.HIGHEST_PROTOCOL 46 | ) 47 | 48 | def _to_py_message(self) -> PyMessage: 49 | """Converts the current message to a :py:class:`PyMessage`. 50 | 51 | Returns: 52 | The :py:class:`PyMessage` instance representing `self`. 53 | """ 54 | if self._serialized_data is None: 55 | self._serialize_data() 56 | return PyMessage(self.timestamp._to_py_timestamp(), self._serialized_data) 57 | 58 | def __repr__(self) -> str: 59 | return f"Message({self.timestamp}, {self.data})" 60 | 61 | def __str__(self) -> str: 62 | return f"{{timestamp: {self.timestamp}, data: {self.data}}}" 63 | 64 | def __eq__(self, msg: object) -> bool: 65 | if not isinstance(msg, Message): 66 | raise ValueError(f"Equality with '{type(msg)}' is not implemented.") 67 | return self.timestamp == msg.timestamp and self.data == msg.data 68 | 69 | 70 | class WatermarkMessage(Message[None]): 71 | """A :py:class:`WatermarkMessage` allows an operator to convey the 72 | completion of all outgoing data for a given timestamp on a 73 | :py:class:`WriteStream`. 74 | 75 | Attributes: 76 | timestamp: The timestamp for which this is a watermark. 77 | """ 78 | 79 | def __init__(self, timestamp: Timestamp) -> None: 80 | super(WatermarkMessage, self).__init__(timestamp, None) 81 | 82 | def __str__(self) -> str: 83 | return "{{timestamp: {}, watermark: True}}".format(self.timestamp) 84 | 85 | def _to_py_message(self) -> PyMessage: 86 | """Converts the current message to a :py:class:`PyMessage`. 87 | 88 | Returns: 89 | The :py:class:`PyMessage` instance representing self. 90 | """ 91 | return PyMessage(self.timestamp._to_py_timestamp(), None) 92 | 93 | @property 94 | def is_top(self) -> bool: 95 | """Indicates whether the watermark conveyed by this message 96 | corresponds to the top timestamp. 97 | 98 | Returns: 99 | `true` if the timestamp is top, `false` otherwise. 100 | """ 101 | return self.timestamp.is_top 102 | -------------------------------------------------------------------------------- /erdos/src/dataflow/state.rs: -------------------------------------------------------------------------------- 1 | //! Structures and traits for states added to streams. 2 | 3 | use crate::dataflow::Timestamp; 4 | use std::collections::HashMap; 5 | 6 | /// The [`State`] trait must be implemented by the state exposed to the operators by ERDOS. 7 | pub trait State: 'static + Send + Sync { 8 | type Item: Default; 9 | 10 | /// The `commit` method commits the final state for a given timestamp. 11 | fn commit(&mut self, timestamp: &Timestamp); 12 | 13 | /// Retrieves the last committed timestamp by this state. 14 | /// This method can be used in conjunction with [`Self::at`] to retrieve the latest committed state. 15 | fn last_committed_timestamp(&self) -> Timestamp; 16 | 17 | /// Retrieve the state at a given timestamp. 18 | /// If the state for that timestamp hasn't been initialized yet, invoke the default method on 19 | /// the [`Self::Item`] type, and return the newly created state for that timestamp. 20 | fn at(&mut self, timestamp: &Timestamp) -> Option<&mut Self::Item>; 21 | } 22 | 23 | /// State implementation for () to be used by operators that are stateless. 24 | impl State for () { 25 | type Item = (); 26 | 27 | fn commit(&mut self, _timestamp: &Timestamp) {} 28 | 29 | fn last_committed_timestamp(&self) -> Timestamp { 30 | Timestamp::Bottom 31 | } 32 | 33 | fn at(&mut self, _timestamp: &Timestamp) -> Option<&mut Self::Item> { 34 | None 35 | } 36 | } 37 | 38 | /// The `TimeVersionedState` provides a default implementation of the `State` for a type S. 39 | /// The structure automatically commits the final state for a timestamp into a HashMap and 40 | /// initializes new states for a timestamp `t` by invoking their default method. 41 | pub struct TimeVersionedState 42 | where 43 | S: 'static + Default + Send + Sync, 44 | { 45 | state: HashMap, 46 | last_committed_timestamp: Timestamp, 47 | } 48 | 49 | impl TimeVersionedState 50 | where 51 | S: 'static + Default + Send + Sync, 52 | { 53 | pub fn new() -> Self { 54 | Self { 55 | state: HashMap::new(), 56 | last_committed_timestamp: Timestamp::Bottom, 57 | } 58 | } 59 | 60 | /// [Experimental] Evicts all committed state until and including the provided timestamp. 61 | /// 62 | /// Used to bound state size in 63 | /// [`TimestampJoinOperator`](crate::dataflow::operators::TimestampJoinOperator). 64 | pub(crate) fn evict_until(&mut self, timestamp: &Timestamp) { 65 | let timestamp = std::cmp::min(timestamp, &self.last_committed_timestamp); 66 | self.state.retain(|k, _| k > timestamp); 67 | } 68 | } 69 | 70 | impl Default for TimeVersionedState 71 | where 72 | S: 'static + Default + Send + Sync, 73 | { 74 | fn default() -> Self { 75 | Self::new() 76 | } 77 | } 78 | 79 | impl State for TimeVersionedState 80 | where 81 | S: 'static + Default + Send + Sync, 82 | { 83 | type Item = S; 84 | 85 | fn commit(&mut self, timestamp: &Timestamp) { 86 | self.last_committed_timestamp = timestamp.clone(); 87 | } 88 | 89 | fn last_committed_timestamp(&self) -> Timestamp { 90 | self.last_committed_timestamp.clone() 91 | } 92 | 93 | fn at(&mut self, timestamp: &Timestamp) -> Option<&mut Self::Item> { 94 | Some(self.state.entry(timestamp.clone()).or_default()) 95 | } 96 | } 97 | 98 | /// Trait that must be implemented by a state structure that is used in a Sequential operator. 99 | /// This state structure must implement an `append` method that enables message callbacks to add 100 | /// intermediate state to the structure, and a `commit` method that commits the final state for a 101 | /// given timestamp t. 102 | pub trait AppendableState: 'static + Clone + Send + Sync { 103 | fn append(&self, data: &S); 104 | 105 | fn commit(&self, timestamp: &Timestamp); 106 | 107 | fn last_committed_timestamp(&self) -> Timestamp; 108 | } 109 | -------------------------------------------------------------------------------- /python/examples/watermarks.py: -------------------------------------------------------------------------------- 1 | """Every second, sends the message count to the batch operator. 2 | Sends a watermark every 3 messages which releases the batch. 3 | """ 4 | 5 | import time 6 | from typing import Any 7 | 8 | import erdos 9 | from erdos.context import OneInOneOutContext, SinkContext 10 | from erdos.operator import OneInOneOut, Sink, Source 11 | from erdos.streams import ReadStream, WriteStream 12 | 13 | 14 | class SendOp(Source): 15 | def __init__(self): 16 | print("initializing send op") 17 | 18 | def run(self, write_stream: WriteStream): 19 | count = 0 20 | while True: 21 | timestamp = erdos.Timestamp(coordinates=[count]) 22 | msg = erdos.Message(timestamp, count) 23 | print("SendOp: sending {msg}".format(msg=msg)) 24 | write_stream.send(msg) 25 | 26 | if count % 3 == 2: 27 | print("SendOp: sending watermark") 28 | write_stream.send(erdos.WatermarkMessage(timestamp)) 29 | 30 | count += 1 31 | time.sleep(1) 32 | 33 | 34 | class TopOp(Source): 35 | def __init__(self): 36 | print("initializing top op") 37 | 38 | def run(self, write_stream: WriteStream): 39 | print("TopOp: sending watermark") 40 | top_timestamp = erdos.Timestamp(is_top=True) 41 | write_stream.send(erdos.WatermarkMessage(top_timestamp)) 42 | 43 | 44 | class BatchOp(OneInOneOut): 45 | def __init__(self): 46 | print("initializing batch op") 47 | self.batch = [] 48 | 49 | def on_data(self, context: OneInOneOutContext, data: Any): 50 | print("adding to batch: {data}".format(data=data)) 51 | self.batch.append(data) 52 | 53 | def on_watermark(self, context: OneInOneOutContext): 54 | msg = erdos.Message(context.timestamp, self.batch) 55 | print("BatchOp: sending batch {msg}".format(msg=msg)) 56 | context.write_stream.send(msg) 57 | self.batch = [] 58 | 59 | 60 | class CallbackWatermarkListener(Sink): 61 | def __init__(self): 62 | print("initializing callback listener op") 63 | 64 | def on_data(self, context: SinkContext, data: Any): 65 | print("CallbackWatermarkListener: received message {data}".format(data=data)) 66 | 67 | def on_watermark(self, context: SinkContext): 68 | print( 69 | "CallbackWatermarkListener: received watermark at {}".format( 70 | context.timestamp 71 | ) 72 | ) 73 | 74 | 75 | class PullWatermarkListener(Sink): 76 | def __init__(self): 77 | print("initializing pull listener op") 78 | 79 | def run(self, read_stream: ReadStream): 80 | while True: 81 | data = read_stream.read() 82 | if isinstance(data, erdos.WatermarkMessage): 83 | print( 84 | ("PullWatermarkListener:" "received watermark {timestamp}").format( 85 | timestamp=data.timestamp 86 | ) 87 | ) 88 | else: 89 | print( 90 | "PullWatermarkListener: received message {data}".format(data=data) 91 | ) 92 | 93 | 94 | def main(): 95 | """Creates and runs the dataflow graph.""" 96 | count_stream = erdos.connect_source(SendOp, erdos.operator.OperatorConfig()) 97 | top_stream = erdos.connect_source(TopOp, erdos.operator.OperatorConfig()) 98 | batch_stream = erdos.connect_one_in_one_out( 99 | BatchOp, erdos.operator.OperatorConfig(), count_stream 100 | ) 101 | erdos.connect_sink( 102 | CallbackWatermarkListener, erdos.operator.OperatorConfig(), batch_stream 103 | ) 104 | erdos.connect_sink( 105 | CallbackWatermarkListener, erdos.operator.OperatorConfig(), top_stream 106 | ) 107 | erdos.connect_sink( 108 | PullWatermarkListener, erdos.operator.OperatorConfig(), batch_stream 109 | ) 110 | 111 | erdos.run() 112 | 113 | 114 | if __name__ == "__main__": 115 | main() 116 | -------------------------------------------------------------------------------- /erdos/src/dataflow/graph/default_graph.rs: -------------------------------------------------------------------------------- 1 | //! A globally accessible dataflow graph. 2 | //! 3 | //! This module is used in the driver when connecting new operators, 4 | //! or setting up [`IngestStream`]s, [`ExtractStream`]s, and [`LoopStream`]s. 5 | //! It is also used to get and set [`Stream`] names. 6 | use std::{ops::DerefMut, sync::Mutex}; 7 | 8 | use once_cell::sync::Lazy; 9 | use serde::Deserialize; 10 | 11 | use crate::{ 12 | dataflow::{ 13 | stream::{ExtractStream, IngestStream, LoopStream, OperatorStream, Stream, StreamId}, 14 | Data, 15 | }, 16 | OperatorConfig, 17 | }; 18 | 19 | use super::{AbstractGraph, OperatorRunner, StreamSetupHook}; 20 | 21 | // TODO: Don't require a mutex over the entire graph, as this can call deadlocks. 22 | static DEFAULT_GRAPH: Lazy> = Lazy::new(|| Mutex::new(AbstractGraph::new())); 23 | 24 | /// Adds an operator to the default graph. 25 | /// 26 | /// The operator is pinned on a given node. 27 | pub(crate) fn add_operator( 28 | config: OperatorConfig, 29 | runner: F, 30 | left_read_stream: Option<&dyn Stream>, 31 | right_read_stream: Option<&dyn Stream>, 32 | left_write_stream: Option<&OperatorStream>, 33 | right_write_stream: Option<&OperatorStream>, 34 | ) where 35 | F: OperatorRunner, 36 | for<'a> T: Data + Deserialize<'a>, 37 | for<'a> U: Data + Deserialize<'a>, 38 | for<'a> V: Data + Deserialize<'a>, 39 | for<'a> W: Data + Deserialize<'a>, 40 | { 41 | DEFAULT_GRAPH.lock().unwrap().add_operator( 42 | config, 43 | runner, 44 | left_read_stream, 45 | right_read_stream, 46 | left_write_stream, 47 | right_write_stream, 48 | ); 49 | } 50 | 51 | /// Adds an [`IngestStream`] to the default graph. 52 | /// 53 | /// The stream can be used by the driver to insert data into the dataflow. 54 | pub(crate) fn add_ingest_stream( 55 | ingest_stream: &IngestStream, 56 | setup_hook: impl StreamSetupHook, 57 | ) where 58 | for<'a> D: Data + Deserialize<'a>, 59 | { 60 | DEFAULT_GRAPH 61 | .lock() 62 | .unwrap() 63 | .add_ingest_stream(ingest_stream, setup_hook); 64 | } 65 | 66 | /// Adds an [`ExtractStream`] to the default graph. 67 | /// 68 | /// The stream can be used by the driver to read data from the dataflow. 69 | pub(crate) fn add_extract_stream( 70 | extract_stream: &ExtractStream, 71 | setup_hook: impl StreamSetupHook, 72 | ) where 73 | for<'a> D: Data + Deserialize<'a>, 74 | { 75 | DEFAULT_GRAPH 76 | .lock() 77 | .unwrap() 78 | .add_extract_stream(extract_stream, setup_hook); 79 | } 80 | 81 | /// Adds a [`LoopStream`] to the default graph. 82 | /// 83 | /// The stream can be used by the driver to create cycles in the dataflow. 84 | pub(crate) fn add_loop_stream(loop_stream: &LoopStream) 85 | where 86 | for<'a> D: Data + Deserialize<'a>, 87 | { 88 | DEFAULT_GRAPH.lock().unwrap().add_loop_stream(loop_stream); 89 | } 90 | 91 | pub(crate) fn connect_loop(loop_stream: &LoopStream, stream: &OperatorStream) 92 | where 93 | for<'a> D: Data + Deserialize<'a>, 94 | { 95 | DEFAULT_GRAPH 96 | .lock() 97 | .unwrap() 98 | .connect_loop(loop_stream, stream); 99 | } 100 | 101 | pub(crate) fn set_stream_name(stream_id: &StreamId, name: &str) { 102 | DEFAULT_GRAPH 103 | .lock() 104 | .unwrap() 105 | .set_stream_name(stream_id, name.to_string()); 106 | } 107 | 108 | pub(crate) fn get_stream_name(stream_id: &StreamId) -> String { 109 | DEFAULT_GRAPH.lock().unwrap().get_stream_name(stream_id) 110 | } 111 | 112 | pub(crate) fn resolve_stream_id(stream_id: &StreamId) -> Option { 113 | DEFAULT_GRAPH.lock().unwrap().resolve_stream_id(stream_id) 114 | } 115 | 116 | pub(crate) fn clone() -> AbstractGraph { 117 | DEFAULT_GRAPH.lock().unwrap().clone() 118 | } 119 | 120 | /// Updates the graph, and returns previous value 121 | pub(crate) fn set(graph: AbstractGraph) -> AbstractGraph { 122 | std::mem::replace(DEFAULT_GRAPH.lock().unwrap().deref_mut(), graph) 123 | } 124 | -------------------------------------------------------------------------------- /erdos/src/communication/serializable.rs: -------------------------------------------------------------------------------- 1 | use abomonation::{decode, encode, measure, Abomonation}; 2 | use bytes::{BufMut, BytesMut}; 3 | use serde::{Deserialize, Serialize}; 4 | use std::{ 5 | fmt::Debug, 6 | io::{Error, ErrorKind}, 7 | }; 8 | 9 | use crate::communication::CommunicationError; 10 | 11 | /// Wrapper around a deserialized message. The wrapper can either own the deserialized 12 | /// message or store a reference to it. 13 | pub enum DeserializedMessage<'a, T> { 14 | Ref(&'a T), 15 | Owned(T), 16 | } 17 | 18 | /// Trait automatically derived for all messages that derive `Serialize`. 19 | pub trait Serializable { 20 | fn encode(&self) -> Result; 21 | fn encode_into(&self, buffer: &mut BytesMut) -> Result<(), CommunicationError>; 22 | fn serialized_size(&self) -> Result; 23 | } 24 | 25 | impl Serializable for D 26 | where 27 | D: Debug + Clone + Send + Serialize, 28 | { 29 | default fn encode(&self) -> Result { 30 | let serialized_msg = bincode::serialize(self).map_err(CommunicationError::from)?; 31 | let serialized_msg: BytesMut = BytesMut::from(&serialized_msg[..]); 32 | Ok(serialized_msg) 33 | } 34 | 35 | default fn encode_into(&self, buffer: &mut BytesMut) -> Result<(), CommunicationError> { 36 | let mut writer = buffer.writer(); 37 | bincode::serialize_into(&mut writer, self).map_err(CommunicationError::from) 38 | } 39 | 40 | default fn serialized_size(&self) -> Result { 41 | bincode::serialized_size(&self) 42 | .map(|x| x as usize) 43 | .map_err(CommunicationError::from) 44 | } 45 | } 46 | 47 | /// Specialized version used when messages derive `Abomonation`. 48 | impl Serializable for D 49 | where 50 | D: Debug + Clone + Send + Serialize + Abomonation, 51 | { 52 | fn encode(&self) -> Result { 53 | let mut serialized_msg: Vec = Vec::with_capacity(measure(self)); 54 | unsafe { 55 | encode(self, &mut serialized_msg).map_err(CommunicationError::AbomonationError)?; 56 | } 57 | let serialized_msg: BytesMut = BytesMut::from(&serialized_msg[..]); 58 | Ok(serialized_msg) 59 | } 60 | 61 | fn encode_into(&self, buffer: &mut BytesMut) -> Result<(), CommunicationError> { 62 | let mut writer = buffer.writer(); 63 | unsafe { encode(self, &mut writer).map_err(CommunicationError::AbomonationError) } 64 | } 65 | 66 | fn serialized_size(&self) -> Result { 67 | Ok(abomonation::measure(self)) 68 | } 69 | } 70 | 71 | /// Trait automatically derived for all messages that derive `Deserialize`. 72 | pub trait Deserializable<'a>: Sized { 73 | fn decode(buf: &'a mut BytesMut) -> Result, CommunicationError>; 74 | } 75 | 76 | impl<'a, D> Deserializable<'a> for D 77 | where 78 | D: Debug + Clone + Send + Deserialize<'a>, 79 | { 80 | default fn decode( 81 | buf: &'a mut BytesMut, 82 | ) -> Result, CommunicationError> { 83 | let msg: D = bincode::deserialize(buf).map_err(CommunicationError::from)?; 84 | Ok(DeserializedMessage::Owned(msg)) 85 | } 86 | } 87 | 88 | /// Specialized version used when messages derive `Abomonation`. 89 | impl<'a, D> Deserializable<'a> for D 90 | where 91 | D: Debug + Clone + Send + Deserialize<'a> + Abomonation, 92 | { 93 | fn decode(buf: &'a mut BytesMut) -> Result, CommunicationError> { 94 | let (msg, _) = { 95 | unsafe { 96 | match decode::(buf.as_mut()) { 97 | Some(msg) => msg, 98 | None => { 99 | return Err(CommunicationError::AbomonationError(Error::new( 100 | ErrorKind::Other, 101 | "Deserialization failed", 102 | ))) 103 | } 104 | } 105 | } 106 | }; 107 | Ok(DeserializedMessage::Ref(msg)) 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /python/src/py_stream/py_read_stream.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use erdos::dataflow::stream::{ 4 | errors::{ReadError, TryReadError}, 5 | ReadStream, 6 | }; 7 | use pyo3::{create_exception, exceptions, prelude::*}; 8 | 9 | use crate::PyMessage; 10 | 11 | // Define errors that can be raised by a read stream. 12 | create_exception!(ReadStreamError, SerializationError, exceptions::PyException); 13 | create_exception!(ReadStreamError, Disconnected, exceptions::PyException); 14 | create_exception!(ReadStreamError, Closed, exceptions::PyException); 15 | 16 | /// The internal Python abstraction over a `ReadStream`. 17 | /// 18 | /// This class is exposed on the Python interface as `erdos.streams.ReadStream`. 19 | #[pyclass] 20 | pub struct PyReadStream { 21 | pub read_stream: Arc>>, 22 | } 23 | 24 | #[pymethods] 25 | impl PyReadStream { 26 | fn is_closed(&self) -> bool { 27 | self.read_stream.is_closed() 28 | } 29 | 30 | fn name(&self) -> String { 31 | self.read_stream.name() 32 | } 33 | 34 | fn id(&self) -> String { 35 | format!("{}", self.read_stream.id()) 36 | } 37 | 38 | /// Returns (timestamp, data) 39 | fn read(&mut self) -> PyResult { 40 | // NOTE: Since the executor of a Python's `run` method holds a reference to the same Arc 41 | // that backs a PyReadStream (in order to drop after its execution), we need to do a 42 | // `get_mut_unchecked` instead of a `get_mut` to bypass the reference counting checks, and 43 | // retrieve the underlying ReadStream. 44 | unsafe { 45 | let read_stream = Arc::get_mut_unchecked(&mut self.read_stream); 46 | match read_stream.read() { 47 | Ok(msg) => Ok(PyMessage::from(msg)), 48 | Err(e) => { 49 | let error_str = format!( 50 | "Error reading from {} (ID: {})", 51 | self.read_stream.name(), 52 | self.read_stream.id() 53 | ); 54 | match e { 55 | ReadError::SerializationError => { 56 | Err(SerializationError::new_err(error_str)) 57 | } 58 | ReadError::Disconnected => Err(Disconnected::new_err(error_str)), 59 | ReadError::Closed => Err(Closed::new_err(error_str)), 60 | } 61 | } 62 | } 63 | } 64 | } 65 | 66 | fn try_read(&mut self) -> PyResult> { 67 | // NOTE: Since the executor of a Python's `run` method holds a reference to the same Arc 68 | // that backs a PyReadStream (in order to drop after its execution), we need to do a 69 | // `get_mut_unchecked` instead of a `get_mut` to bypass the reference counting checks, and 70 | // retrieve the underlying ReadStream. 71 | unsafe { 72 | let read_stream = Arc::get_mut_unchecked(&mut self.read_stream); 73 | match read_stream.try_read() { 74 | Ok(msg) => Ok(Some(PyMessage::from(msg))), 75 | Err(e) => { 76 | let error_str = format!( 77 | "Error reading from {} (ID: {})", 78 | self.read_stream.name(), 79 | self.read_stream.id() 80 | ); 81 | match e { 82 | TryReadError::SerializationError => { 83 | Err(SerializationError::new_err(error_str)) 84 | } 85 | TryReadError::Disconnected => Err(Disconnected::new_err(error_str)), 86 | TryReadError::Closed => Err(Closed::new_err(error_str)), 87 | TryReadError::Empty => Ok(None), 88 | } 89 | } 90 | } 91 | } 92 | } 93 | } 94 | 95 | impl From>> for PyReadStream { 96 | fn from(read_stream: ReadStream>) -> Self { 97 | Self { 98 | read_stream: Arc::new(read_stream), 99 | } 100 | } 101 | } 102 | 103 | impl From<&Arc>>> for PyReadStream { 104 | fn from(read_stream: &Arc>>) -> Self { 105 | Self { 106 | read_stream: Arc::clone(read_stream), 107 | } 108 | } 109 | } 110 | -------------------------------------------------------------------------------- /erdos/src/dataflow/graph/mod.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | marker::PhantomData, 3 | sync::{Arc, Mutex}, 4 | }; 5 | 6 | use crate::{ 7 | node::operator_executors::OperatorExecutorT, 8 | scheduler::channel_manager::{ChannelManager, StreamEndpoints, StreamEndpointsT}, 9 | OperatorConfig, OperatorId, 10 | }; 11 | 12 | // Private submodules 13 | mod abstract_graph; 14 | mod job_graph; 15 | 16 | // Public submodules 17 | pub(crate) mod default_graph; 18 | 19 | // Crate-wide exports 20 | pub(crate) use abstract_graph::AbstractGraph; 21 | pub(crate) use job_graph::JobGraph; 22 | use serde::Deserialize; 23 | 24 | use super::{stream::StreamId, Data}; 25 | 26 | /// Trait for functions that set up operator execution. 27 | pub(crate) trait OperatorRunner: 28 | 'static + (Fn(Arc>) -> Box) + Sync + Send 29 | { 30 | fn box_clone(&self) -> Box; 31 | } 32 | 33 | impl< 34 | T: 'static 35 | + (Fn(Arc>) -> Box) 36 | + Sync 37 | + Send 38 | + Clone, 39 | > OperatorRunner for T 40 | { 41 | fn box_clone(&self) -> Box { 42 | Box::new(self.clone()) 43 | } 44 | } 45 | 46 | /// Trait for functions used to set up ingest and extract streams. 47 | pub(crate) trait StreamSetupHook: 48 | 'static + Fn(&AbstractGraph, &mut ChannelManager) + Sync + Send 49 | { 50 | fn box_clone(&self) -> Box; 51 | } 52 | 53 | impl StreamSetupHook 54 | for T 55 | { 56 | fn box_clone(&self) -> Box { 57 | Box::new(self.clone()) 58 | } 59 | } 60 | 61 | /// Specifies the type of job. 62 | #[derive(Clone, Copy)] 63 | pub(crate) enum Job { 64 | /// An operator in the dataflow. 65 | Operator(OperatorId), 66 | /// The driver which may interact with the dataflow. 67 | Driver, 68 | } 69 | 70 | /// A typed representation of a stream used to setup 71 | /// and configure the dataflow graphs. 72 | #[derive(Clone)] 73 | pub(crate) struct AbstractStream 74 | where 75 | for<'a> D: Data + Deserialize<'a>, 76 | { 77 | id: StreamId, 78 | name: String, 79 | phantom: PhantomData, 80 | } 81 | 82 | impl AbstractStream 83 | where 84 | for<'a> D: Data + Deserialize<'a>, 85 | { 86 | fn new(id: StreamId, name: String) -> Self { 87 | Self { 88 | id, 89 | name, 90 | phantom: PhantomData, 91 | } 92 | } 93 | } 94 | 95 | /// A trait implemented over [`AbstractStream`]s used to preserve 96 | /// typing while processing sets of streams. 97 | pub(crate) trait AbstractStreamT: Send + Sync { 98 | fn id(&self) -> StreamId; 99 | fn name(&self) -> String; 100 | fn set_name(&mut self, name: String); 101 | fn box_clone(&self) -> Box; 102 | fn to_stream_endpoints_t(&self) -> Box; 103 | } 104 | 105 | impl AbstractStreamT for AbstractStream 106 | where 107 | for<'a> D: Data + Deserialize<'a>, 108 | { 109 | fn id(&self) -> StreamId { 110 | self.id 111 | } 112 | 113 | fn name(&self) -> String { 114 | self.name.clone() 115 | } 116 | 117 | fn set_name(&mut self, name: String) { 118 | self.name = name; 119 | } 120 | 121 | fn box_clone(&self) -> Box { 122 | Box::new(self.clone()) 123 | } 124 | 125 | fn to_stream_endpoints_t(&self) -> Box { 126 | Box::new(StreamEndpoints::::new(self.id, self.name())) 127 | } 128 | } 129 | 130 | /// The representation of the operator used to set up and configure the dataflow. 131 | pub(crate) struct AbstractOperator { 132 | pub id: OperatorId, 133 | /// Function that executes the operator. 134 | pub runner: Box, 135 | /// Operator configuration. 136 | pub config: OperatorConfig, 137 | /// Streams on which the operator reads. 138 | pub read_streams: Vec, 139 | /// Streams on which the operator writes. 140 | pub write_streams: Vec, 141 | } 142 | 143 | impl Clone for AbstractOperator { 144 | fn clone(&self) -> Self { 145 | Self { 146 | id: self.id, 147 | runner: self.runner.box_clone(), 148 | config: self.config.clone(), 149 | read_streams: self.read_streams.clone(), 150 | write_streams: self.write_streams.clone(), 151 | } 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | push: 5 | branches: [master] 6 | pull_request: 7 | 8 | env: 9 | CARGO_TERM_COLOR: always 10 | RUSTFLAGS: -D warnings 11 | rust_toolchain: nightly-2022-02-09 12 | 13 | jobs: 14 | format-rust: 15 | name: "Rust Formatting" 16 | runs-on: ubuntu-latest 17 | steps: 18 | - name: Checkout code 19 | uses: actions/checkout@v2 20 | - name: Install Rust ${{ env.rust_toolchain }} 21 | uses: actions-rs/toolchain@v1 22 | with: 23 | toolchain: ${{ env.rust_toolchain }} 24 | profile: minimal 25 | override: true 26 | components: rustfmt, clippy 27 | - name: Cache Rust package builds 28 | uses: Swatinem/rust-cache@v1 29 | - name: Check Rust formatting 30 | run: cargo fmt -- --check 31 | - name: Check Clippy linting 32 | run: cargo clippy --all --tests --all-features 33 | 34 | build-rust: 35 | name: "Rust Build" 36 | runs-on: ubuntu-latest 37 | needs: format-rust 38 | steps: 39 | - name: Checkout code 40 | uses: actions/checkout@v2 41 | - name: Install Rust ${{ env.rust_toolchain }} 42 | uses: actions-rs/toolchain@v1 43 | with: 44 | toolchain: ${{ env.rust_toolchain }} 45 | profile: minimal 46 | override: true 47 | - name: Cache Rust package builds 48 | uses: Swatinem/rust-cache@v1 49 | - name: Build 50 | run: cargo build --examples --verbose 51 | - name: Run tests 52 | run: cargo test --verbose 53 | 54 | ros-integration: 55 | name: "ROS Integration" 56 | runs-on: ubuntu-latest 57 | container: ros:noetic 58 | needs: format-rust 59 | steps: 60 | - name: Checkout code 61 | uses: actions/checkout@v2 62 | - name: Install curl 63 | run: apt-get update; apt-get -y install curl 64 | - name: Install Rust ${{ env.rust_toolchain }} 65 | uses: actions-rs/toolchain@v1 66 | with: 67 | toolchain: ${{ env.rust_toolchain }} 68 | profile: minimal 69 | override: true 70 | - name: Cache Rust package builds 71 | uses: Swatinem/rust-cache@v1 72 | - name: Build 73 | shell: bash 74 | run: | 75 | source /opt/ros/$ROS_DISTRO/setup.bash 76 | cargo build --examples --verbose --features=ros 77 | - name: Run tests 78 | shell: bash 79 | run: | 80 | source /opt/ros/$ROS_DISTRO/setup.bash 81 | cargo test --verbose --features=ros 82 | 83 | format-python: 84 | name: "Python Formatting / Type Verification" 85 | runs-on: ubuntu-latest 86 | steps: 87 | - name: Checkout code 88 | uses: actions/checkout@v2 89 | - name: Setup Python 90 | uses: actions/setup-python@v2 91 | with: 92 | python-version: "3.10" 93 | - name: Check Python imports 94 | uses: isort/isort-action@master 95 | with: 96 | configuration: "--check-only --diff --profile black" 97 | - name: Check Black formatting 98 | uses: psf/black@stable 99 | with: 100 | options: "--check --diff --color" 101 | - name: Check Flake8 compatibility 102 | run: | 103 | python -m pip install flake8==3.9.2 flake8-quotes 104 | flake8 --inline-quotes="double" ./python/doc/ 105 | flake8 --inline-quotes="double" ./python/ 106 | - name: Check type correctness. 107 | run: | 108 | python -m pip install numpy mypy 109 | mypy --strict ./python/erdos/ 110 | 111 | build-python: 112 | name: "Python ${{ matrix.python-version }} Build" 113 | runs-on: ubuntu-latest 114 | strategy: 115 | matrix: 116 | python-version: ["3.7", "3.8", "3.9", "3.10"] 117 | needs: [format-python, format-rust] 118 | steps: 119 | - name: Checkout code 120 | uses: actions/checkout@v2 121 | - name: Setup python 122 | uses: actions/setup-python@v2 123 | with: 124 | python-version: ${{ matrix.python-version }} 125 | - name: Install Rust ${{ env.rust_toolchain }} 126 | uses: actions-rs/toolchain@v1 127 | with: 128 | toolchain: ${{ env.rust_toolchain }} 129 | profile: minimal 130 | override: true 131 | - name: Cache Rust package builds 132 | uses: Swatinem/rust-cache@v1 133 | - name: Build 134 | run: | 135 | python -m pip install maturin 136 | cd python && maturin build 137 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/ros/from_ros_operator.rs: -------------------------------------------------------------------------------- 1 | use crate::dataflow::{ 2 | operator::{OperatorConfig, Source}, 3 | operators::ros::*, 4 | stream::{WriteStream, WriteStreamT}, 5 | Data, Message, 6 | }; 7 | use serde::Deserialize; 8 | use std::sync::{Arc, Mutex}; 9 | 10 | /// Subscribes to a ROS topic and outputs incoming messages to an ERDOS stream using the 11 | /// provided message conversion function. 12 | /// 13 | /// The conversion function transforms a ROS message implementing the [`rosrust::Message`] trait 14 | /// into an ERDOS [`Message`]. 15 | /// 16 | /// See [`rosrust_msg`](https://lib.rs/crates/rosrust_msg), 17 | /// the [ROS `std_msgs` package](http://wiki.ros.org/std_msgs), 18 | /// and the [ROS `common_msgs` package](http://wiki.ros.org/common_msgs) 19 | /// for a variety of supported and commonly-used ROS messages. 20 | /// 21 | /// # Example 22 | /// The following example shows how to use a [`FromRosOperator`] with a conversion function 23 | /// which takes a 24 | /// [`rosrust_msg::sensor_msgs::Image`](http://docs.ros.org/en/api/sensor_msgs/html/msg/Image.html) 25 | /// and returns an ERDOS message containing [`Vec`] a vector of bytes. 26 | /// 27 | /// ``` 28 | /// # use erdos::{ 29 | /// # dataflow::{Message, operators::ros::FromRosOperator, Timestamp}, 30 | /// # OperatorConfig, 31 | /// # }; 32 | /// # 33 | /// # pub mod rosrust_msg { 34 | /// # pub mod sensor_msgs { 35 | /// # use std::io; 36 | /// # 37 | /// # #[derive(Debug, Clone, PartialEq, Default)] 38 | /// # pub struct Image { 39 | /// # pub data: Vec, 40 | /// # } 41 | /// # 42 | /// # impl rosrust::Message for Image { 43 | /// # fn msg_definition() -> String { String::new() } 44 | /// # fn md5sum() -> String { String::new() } 45 | /// # fn msg_type() -> String { String::new() } 46 | /// # } 47 | /// # 48 | /// # impl rosrust::RosMsg for Image { 49 | /// # fn encode(&self, mut w: W) -> io::Result<()> { Ok(()) } 50 | /// # fn decode(mut r: R) -> io::Result { Ok(Default::default()) } 51 | /// # } 52 | /// # } 53 | /// # }; 54 | /// fn ros_image_to_bytes(input: &rosrust_msg::sensor_msgs::Image) -> Vec>> { 55 | /// vec![Message::new_message(Timestamp::Time(vec![0 as u64]), input.data.clone())] 56 | /// } 57 | /// 58 | /// let ros_source_config = OperatorConfig::new().name("FromRosImage"); 59 | /// let ros_source = erdos::connect_source( 60 | /// move || -> FromRosOperator> { 61 | /// FromRosOperator::new("image_topic", ros_image_to_bytes) 62 | /// }, 63 | /// ros_source_config, 64 | /// ); 65 | /// ``` 66 | 67 | #[derive(Clone)] 68 | pub struct FromRosOperator 69 | where 70 | U: Data + for<'a> Deserialize<'a>, 71 | { 72 | topic: String, 73 | from_ros_msg: Arc Vec> + Send + Sync>, 74 | } 75 | 76 | impl FromRosOperator 77 | where 78 | U: Data + for<'a> Deserialize<'a>, 79 | { 80 | pub fn new(topic: &str, from_ros_msg: F) -> Self 81 | where 82 | F: 'static + Fn(&T) -> Vec> + Send + Sync, 83 | { 84 | Self { 85 | topic: topic.to_string(), 86 | from_ros_msg: Arc::new(from_ros_msg), 87 | } 88 | } 89 | } 90 | 91 | impl Source for FromRosOperator 92 | where 93 | U: Data + for<'a> Deserialize<'a>, 94 | { 95 | fn run(&mut self, config: &OperatorConfig, write_stream: &mut WriteStream) { 96 | let from_ros_msg = self.from_ros_msg.clone(); 97 | let config_clone = config.clone(); 98 | let write_stream_clone = Arc::new(Mutex::new(write_stream.clone())); 99 | 100 | let _subscriber_raii = 101 | rosrust::subscribe(self.topic.as_str(), ROS_QUEUE_SIZE, move |ros_msg: T| { 102 | let erdos_msg_vec = (from_ros_msg)(&ros_msg); 103 | 104 | for erdos_msg in erdos_msg_vec.into_iter() { 105 | tracing::trace!( 106 | "{}: Received and Converted {:?}", 107 | config_clone.get_name(), 108 | erdos_msg, 109 | ); 110 | // Sends converted message on ERDOS stream. 111 | write_stream_clone.lock().unwrap().send(erdos_msg).unwrap(); 112 | } 113 | }) 114 | .unwrap(); 115 | 116 | rosrust::spin(); 117 | } 118 | } 119 | -------------------------------------------------------------------------------- /python/erdos/internal/__init__.pyi: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Dict, Iterable, List, Optional, Tuple, Type 2 | 3 | from erdos import OperatorConfig 4 | from erdos.operator import BaseOperator 5 | 6 | class PyTimestamp: 7 | def __init__( 8 | self, coordinates: Optional[List[int]], is_top: bool, is_bottom: bool 9 | ) -> None: ... 10 | def is_top(self) -> bool: ... 11 | def is_bottom(self) -> bool: ... 12 | def coordinates(self) -> Optional[List[int]]: ... 13 | def __lt__(self, other: Any) -> bool: ... 14 | def __eq__(self, other: Any) -> bool: ... 15 | def __le__(self, other: Any) -> bool: ... 16 | 17 | class PyMessage: 18 | timestamp: Optional[PyTimestamp] = ... 19 | data: Optional[bytes] = ... 20 | def __init__(self, timestamp: PyTimestamp, data: Optional[bytes]) -> None: ... 21 | def is_timestamped_data(self) -> bool: ... 22 | def is_watermark(self) -> bool: ... 23 | def is_top_watermark(self) -> bool: ... 24 | 25 | class PyStream: 26 | def name(self) -> str: ... 27 | def set_name(self, name: str) -> None: ... 28 | def id(self) -> str: ... 29 | def _map(self, function: Callable[[bytes], bytes]) -> PyOperatorStream: ... 30 | def _flat_map( 31 | self, function: Callable[[bytes], Iterable[bytes]] 32 | ) -> PyOperatorStream: ... 33 | def _filter(self, function: Callable[[bytes], bool]) -> PyOperatorStream: ... 34 | def _split( 35 | self, function: Callable[[bytes], bool] 36 | ) -> Tuple[PyOperatorStream, PyOperatorStream]: ... 37 | def _timestamp_join( 38 | self, other: PyStream, function: Callable[[bytes, bytes], bytes] 39 | ) -> PyOperatorStream: ... 40 | def _concat(self, other: PyStream) -> PyOperatorStream: ... 41 | 42 | class PyOperatorStream(PyStream): ... 43 | 44 | class PyLoopStream(PyStream): 45 | def __init__(self) -> None: ... 46 | def connect_loop(self, stream: PyOperatorStream) -> None: ... 47 | 48 | class PyReadStream: 49 | def is_closed(self) -> bool: ... 50 | def name(self) -> str: ... 51 | def id(self) -> str: ... 52 | def read(self) -> PyMessage: ... 53 | def try_read(self) -> Optional[PyMessage]: ... 54 | 55 | class PyWriteStream: 56 | def is_closed(self) -> bool: ... 57 | def name(self) -> str: ... 58 | def id(self) -> str: ... 59 | def send(self, message: PyMessage) -> None: ... 60 | 61 | class PyIngestStream(PyStream): 62 | def __init__(self, name: Optional[str]) -> None: ... 63 | def is_closed(self) -> bool: ... 64 | def send(self, message: PyMessage) -> None: ... 65 | 66 | class PyExtractStream: 67 | def __init__(self, py_stream: PyOperatorStream) -> None: ... 68 | def is_closed(self) -> bool: ... 69 | def read(self) -> PyMessage: ... 70 | def try_read(self) -> Optional[PyMessage]: ... 71 | def name(self) -> str: ... 72 | def id(self) -> str: ... 73 | 74 | def connect_source( 75 | operator: Type[BaseOperator], 76 | config: OperatorConfig, 77 | args: Tuple[Any, ...], 78 | kwargs: Dict[str, Any], 79 | node_id: int, 80 | ) -> PyOperatorStream: ... 81 | def connect_sink( 82 | operator: Type[BaseOperator], 83 | config: OperatorConfig, 84 | read_stream: PyStream, 85 | args: Tuple[Any, ...], 86 | kwargs: Dict[str, Any], 87 | node_id: int, 88 | ) -> None: ... 89 | def connect_one_in_one_out( 90 | operator: Type[BaseOperator], 91 | config: OperatorConfig, 92 | read_stream: PyStream, 93 | args: Tuple[Any, ...], 94 | kwargs: Dict[str, Any], 95 | node_id: int, 96 | ) -> PyOperatorStream: ... 97 | def connect_one_in_two_out( 98 | operator: Type[BaseOperator], 99 | config: OperatorConfig, 100 | read_stream: PyStream, 101 | args: Tuple[Any, ...], 102 | kwargs: Dict[str, Any], 103 | node_id: int, 104 | ) -> Tuple[PyOperatorStream, PyOperatorStream]: ... 105 | def connect_two_in_one_out( 106 | operator: Type[BaseOperator], 107 | config: OperatorConfig, 108 | left_read_stream: PyStream, 109 | right_read_stream: PyStream, 110 | args: Tuple[Any, ...], 111 | kwargs: Dict[str, Any], 112 | node_id: int, 113 | ) -> PyOperatorStream: ... 114 | def run( 115 | node_id: int, 116 | data_addresses: List[str], 117 | control_addresses: List[str], 118 | graph_filename: Optional[str], 119 | ) -> None: ... 120 | def run_async( 121 | node_id: int, 122 | data_addresses: List[str], 123 | control_addresses: List[str], 124 | graph_filename: Optional[str], 125 | ) -> PyNodeHandle: ... 126 | def reset() -> None: ... 127 | 128 | class PyNodeHandle: 129 | def shutdown_node(self) -> None: ... 130 | -------------------------------------------------------------------------------- /python/src/py_stream/mod.rs: -------------------------------------------------------------------------------- 1 | use erdos::dataflow::{ 2 | operators::{Concat, Filter, Join, Map, Split}, 3 | stream::{Stream, StreamId}, 4 | }; 5 | use pyo3::{prelude::*, types::PyBytes}; 6 | 7 | // Private submodules 8 | mod py_extract_stream; 9 | mod py_ingest_stream; 10 | mod py_loop_stream; 11 | mod py_operator_stream; 12 | mod py_read_stream; 13 | mod py_write_stream; 14 | 15 | // Public exports 16 | pub use py_extract_stream::PyExtractStream; 17 | pub use py_ingest_stream::PyIngestStream; 18 | pub use py_loop_stream::PyLoopStream; 19 | pub use py_operator_stream::PyOperatorStream; 20 | pub use py_read_stream::PyReadStream; 21 | pub use py_write_stream::PyWriteStream; 22 | 23 | /// The internal Python abstraction over a [`Stream`]. 24 | #[pyclass(subclass)] 25 | pub struct PyStream { 26 | pub id: StreamId, 27 | } 28 | 29 | #[pymethods] 30 | impl PyStream { 31 | fn name(&self) -> String { 32 | Stream::name(self) 33 | } 34 | 35 | fn set_name(&mut self, name: &str) { 36 | Stream::set_name(self, name) 37 | } 38 | 39 | fn id(&self) -> String { 40 | format!("{}", self.id) 41 | } 42 | 43 | fn _map(&self, py: Python<'_>, function: PyObject) -> PyResult> { 44 | let map_fn = move |data: &Vec| -> Vec { 45 | Python::with_gil(|py| { 46 | let serialized_data = PyBytes::new(py, &data[..]); 47 | function 48 | .call1(py, (serialized_data,)) 49 | .unwrap() 50 | .extract(py) 51 | .unwrap() 52 | }) 53 | }; 54 | PyOperatorStream::new(py, self.map(map_fn)) 55 | } 56 | 57 | fn _flat_map(&self, py: Python<'_>, function: PyObject) -> PyResult> { 58 | let flat_map_fn = move |data: &Vec| -> Vec> { 59 | Python::with_gil(|py| { 60 | let serialized_data = PyBytes::new(py, &data[..]); 61 | function 62 | .call1(py, (serialized_data,)) 63 | .unwrap() 64 | .extract(py) 65 | .unwrap() 66 | }) 67 | }; 68 | PyOperatorStream::new(py, self.flat_map(flat_map_fn)) 69 | } 70 | 71 | fn _filter(&self, py: Python<'_>, function: PyObject) -> PyResult> { 72 | let filter_fn = move |data: &Vec| -> bool { 73 | Python::with_gil(|py| { 74 | let serialized_data = PyBytes::new(py, &data[..]); 75 | function 76 | .call1(py, (serialized_data,)) 77 | .unwrap() 78 | .extract(py) 79 | .unwrap() 80 | }) 81 | }; 82 | PyOperatorStream::new(py, self.filter(filter_fn)) 83 | } 84 | 85 | fn _split( 86 | &self, 87 | py: Python<'_>, 88 | function: PyObject, 89 | ) -> PyResult<(Py, Py)> { 90 | let split_fn = move |data: &Vec| -> bool { 91 | Python::with_gil(|py| { 92 | let serialized_data = PyBytes::new(py, &data[..]); 93 | function 94 | .call1(py, (serialized_data,)) 95 | .unwrap() 96 | .extract(py) 97 | .unwrap() 98 | }) 99 | }; 100 | let (left_stream, right_stream) = self.split(split_fn); 101 | Ok(( 102 | PyOperatorStream::new(py, left_stream).unwrap(), 103 | PyOperatorStream::new(py, right_stream).unwrap(), 104 | )) 105 | } 106 | 107 | fn _timestamp_join( 108 | &self, 109 | py: Python<'_>, 110 | other: &PyStream, 111 | join_function: PyObject, 112 | ) -> PyResult> { 113 | let map_fn = move |data: &(Vec, Vec)| -> Vec { 114 | Python::with_gil(|py| { 115 | let serialized_data_left = PyBytes::new(py, &data.0[..]); 116 | let serialized_data_right = PyBytes::new(py, &data.1[..]); 117 | join_function 118 | .call1(py, (serialized_data_left, serialized_data_right)) 119 | .unwrap() 120 | .extract(py) 121 | .unwrap() 122 | }) 123 | }; 124 | PyOperatorStream::new(py, self.timestamp_join(other).map(map_fn)) 125 | } 126 | 127 | fn _concat(&self, py: Python<'_>, other: &PyStream) -> PyResult> { 128 | PyOperatorStream::new(py, self.concat(other)) 129 | } 130 | } 131 | 132 | impl Stream> for PyStream { 133 | fn id(&self) -> StreamId { 134 | self.id 135 | } 136 | } 137 | -------------------------------------------------------------------------------- /erdos/src/configuration.rs: -------------------------------------------------------------------------------- 1 | use std::net::SocketAddr; 2 | 3 | use tracing::Level; 4 | 5 | use crate::node::NodeId; 6 | 7 | /// Stores the configuration parameters of a [`node`](crate::node::Node). 8 | #[derive(Clone)] 9 | pub struct Configuration { 10 | /// The index of the node. 11 | pub index: NodeId, 12 | /// The number of OS threads the node will use. 13 | pub num_threads: usize, 14 | /// Mapping between node indices and data socket addresses. 15 | pub data_addresses: Vec, 16 | /// Mapping between node indices and control socket addresses. 17 | pub control_addresses: Vec, 18 | /// DOT file to export dataflow graph. 19 | pub graph_filename: Option, 20 | /// The logging level of the logger initialized by ERDOS. 21 | /// If `None`, ERDOS will not initialize a logger. 22 | /// Defaults to [`Level::DEBUG`] when compiling in debug mode, 23 | /// [`Level::INFO`] when compiling in release mode. 24 | /// 25 | /// While [`tracing`] provides extensions for connecting additional 26 | /// subscribers, note that these may impact performance. 27 | pub logging_level: Option, 28 | } 29 | 30 | impl Configuration { 31 | /// Creates a new node configuration. 32 | pub fn new( 33 | node_index: NodeId, 34 | data_addresses: Vec, 35 | control_addresses: Vec, 36 | num_threads: usize, 37 | ) -> Self { 38 | let log_level = if cfg!(debug_assertions) { 39 | Some(Level::DEBUG) 40 | } else { 41 | Some(Level::INFO) 42 | }; 43 | Self { 44 | index: node_index, 45 | num_threads, 46 | data_addresses, 47 | control_addresses, 48 | graph_filename: None, 49 | logging_level: log_level, 50 | } 51 | } 52 | 53 | /// Creates a node configuration from command line arguments. 54 | pub fn from_args(args: &clap::ArgMatches) -> Self { 55 | let num_threads = args 56 | .value_of("threads") 57 | .unwrap() 58 | .parse() 59 | .expect("Unable to parse number of worker threads"); 60 | 61 | let data_addrs = args.value_of("data-addresses").unwrap(); 62 | let mut data_addresses: Vec = Vec::new(); 63 | for addr in data_addrs.split(',') { 64 | data_addresses.push(addr.parse().expect("Unable to parse socket address")); 65 | } 66 | let control_addrs = args.value_of("control-addresses").unwrap(); 67 | let mut control_addresses: Vec = Vec::new(); 68 | for addr in control_addrs.split(',') { 69 | control_addresses.push(addr.parse().expect("Unable to parse socket address")); 70 | } 71 | assert_eq!( 72 | data_addresses.len(), 73 | control_addresses.len(), 74 | "Each node must have 1 data address and 1 control address" 75 | ); 76 | let node_index = args 77 | .value_of("index") 78 | .unwrap() 79 | .parse() 80 | .expect("Unable to parse node index"); 81 | assert!( 82 | node_index < data_addresses.len(), 83 | "Node index is larger than number of available nodes" 84 | ); 85 | let graph_filename_arg = args.value_of("graph-filename").unwrap(); 86 | let graph_filename = if graph_filename_arg.is_empty() { 87 | None 88 | } else { 89 | Some(graph_filename_arg.to_string()) 90 | }; 91 | let log_level = match args.occurrences_of("verbose") { 92 | 0 => None, 93 | 1 => Some(Level::WARN), 94 | 2 => Some(Level::INFO), 95 | 3 => Some(Level::DEBUG), 96 | _ => Some(Level::TRACE), 97 | }; 98 | 99 | Self { 100 | index: node_index, 101 | num_threads, 102 | data_addresses, 103 | control_addresses, 104 | graph_filename, 105 | logging_level: log_level, 106 | } 107 | } 108 | 109 | /// Upon executing, exports the dataflow graph as a 110 | /// [DOT file](https://en.wikipedia.org/wiki/DOT_(graph_description_language)). 111 | pub fn export_dataflow_graph(mut self, filename: &str) -> Self { 112 | self.graph_filename = Some(filename.to_string()); 113 | self 114 | } 115 | 116 | /// Sets the logging level. 117 | pub fn with_logging_level(mut self, level: Level) -> Self { 118 | self.logging_level = Some(level); 119 | self 120 | } 121 | 122 | /// ERDOS will not initialize a logger if this method is called. 123 | pub fn disable_logger(mut self) -> Self { 124 | self.logging_level = None; 125 | self 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /python/doc/source/operators.rst: -------------------------------------------------------------------------------- 1 | Operators 2 | ========= 3 | 4 | ERDOS operators process received data, and use streams to broadcast :py:class:`erdos.Message` 5 | and :py:class:`erdos.WatermarkMessage` objects to downstream operators. 6 | We provide a standard library of operators for common dataflow patterns. 7 | While the standard operators are general and versatile, some applications may 8 | implement custom operators to better optimize performance and take 9 | fine-grained control over exection. 10 | 11 | Operators are implemented as classes which implement a certain communication pattern. The built-in 12 | operators are subclassed based on the wanted communication pattern. For example, 13 | the `SendOp` from 14 | `python/examples/simple_pipeline.py `_ 15 | implements a :py:class:`erdos.operator.Source` operator because it does not receive any data, 16 | and sends messages on a single output stream. 17 | 18 | * The :py:class:`erdos.operator.Source` operator is used to write data on a single :py:class:`erdos.WriteStream`. 19 | * The :py:class:`erdos.operator.Sink` operator is used to read data from a single :py:class:`erdos.ReadStream`. 20 | * The :py:class:`erdos.operator.OneInOneOut` operator is used to read data from a single :py:class:`erdos.ReadStream` 21 | and write data on a single :py:class:`erdos.WriteStream`. 22 | * The :py:class:`erdos.operator.TwoInOneOut` operator is used to read data from 2 :py:class:`erdos.ReadStream` 23 | s and write data on a single :py:class:`erdos.WriteStream`. 24 | * The :py:class:`erdos.operator.OneInTwoOut` operator is used to read data from a single :py:class:`erdos.ReadStream` 25 | s and write data on 2 :py:class:`erdos.WriteStream` s. 26 | 27 | Operators can support both push and pull-based models of execution by 28 | implementing methods defined for each operator. By implementing callbacks 29 | such as :py:meth:`erdos.operator.OneInOneOut.on_data()`, operators can process 30 | messages as they arrive. Moreover, operators can implement callbacks over watermarks 31 | (e.g. :py:meth:`erdos.operator.OneInOneOut.on_watermark()`) to ensure ordered 32 | processing over timestamps. ERDOS ensures lock-free, safe, and concurrent processing 33 | via a system-managed ordering of callbacks, which is implemented as a 34 | run queue for the system's multithreaded runtime. 35 | 36 | While ERDOS manages the execution of callbacks, some operators require 37 | more finegrained control. Operators can take manual control over the 38 | thread of execution by implementing the `run()` 39 | (e.g. :py:meth:`erdos.operator.OneInOneOut.run()`) method. 40 | *Callbacks are not invoked while run executes.* 41 | 42 | Operator API 43 | ------------ 44 | 45 | .. autoclass:: erdos.operator.BaseOperator 46 | :members: id, config, add_trace_event, get_runtime 47 | 48 | .. autoclass:: erdos.operator.Source 49 | :show-inheritance: 50 | :members: __new__, run, destroy 51 | 52 | .. autoclass:: erdos.operator.Sink 53 | :show-inheritance: 54 | :members: __new__, run, on_data, on_watermark, destroy 55 | 56 | .. autoclass:: erdos.operator.OneInOneOut 57 | :show-inheritance: 58 | :members: __new__, run, on_data, on_watermark, destroy 59 | 60 | .. autoclass:: erdos.operator.TwoInOneOut 61 | :show-inheritance: 62 | :members: __new__, run, on_left_data, on_right_data, on_watermark, destroy 63 | 64 | .. autoclass:: erdos.operator.OneInTwoOut 65 | :show-inheritance: 66 | :members: __new__, run, on_data, on_watermark, destroy 67 | 68 | Operator Config 69 | --------------- 70 | 71 | .. autoclass:: erdos.config.OperatorConfig 72 | :members: name, flow_watermarks, log_file_name, csv_log_file_name, profile_file_name 73 | 74 | Context API 75 | ----------- 76 | 77 | .. autoclass:: erdos.context.SinkContext 78 | :members: 79 | 80 | .. autoclass:: erdos.context.OneInOneOutContext 81 | :members: 82 | 83 | .. autoclass:: erdos.context.TwoInOneOutContext 84 | :members: 85 | 86 | .. autoclass:: erdos.context.OneInTwoOutContext 87 | :members: 88 | 89 | Examples 90 | -------- 91 | 92 | Full example at `python/examples/simple_pipeline.py `_. 93 | 94 | 95 | Periodically Publishing Data 96 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 97 | .. literalinclude:: _literalinclude/python_examples/simple_pipeline.py 98 | :pyobject: SendOp 99 | 100 | 101 | Processing Data via Callbacks 102 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 103 | .. literalinclude:: _literalinclude/python_examples/simple_pipeline.py 104 | :pyobject: CallbackOp 105 | 106 | Processing Data by Pulling Messages 107 | ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ 108 | .. literalinclude:: _literalinclude/python_examples/simple_pipeline.py 109 | :pyobject: PullOp 110 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/ros/to_ros_operator.rs: -------------------------------------------------------------------------------- 1 | use crate::dataflow::{context::SinkContext, operator::Sink, operators::ros::*, Data, Message}; 2 | use serde::Deserialize; 3 | use std::sync::Arc; 4 | 5 | /// Takes an input ERDOS stream and publishes to a ROS topic using the provided message conversion 6 | /// function. 7 | /// 8 | /// The conversion function transforms a [`Message`] into a ROS message which implements the 9 | /// [`rosrust::Message`] trait. 10 | /// 11 | /// See [`rosrust_msg`](https://lib.rs/crates/rosrust_msg), 12 | /// the [ROS `std_msgs` package](http://wiki.ros.org/std_msgs), 13 | /// and the [ROS `common_msgs` package](http://wiki.ros.org/common_msgs) 14 | /// for a variety of supported and commonly-used ROS messages. 15 | /// 16 | /// # Example 17 | /// The following example shows how to use a [`ToRosOperator`] with a conversion function which 18 | /// takes a Rust [`i32`] and converts it to a ROS message with 19 | /// [`rosrust_msg::std_msgs::Int32`](http://docs.ros.org/en/api/std_msgs/html/msg/Int32.html) 20 | /// data. 21 | /// 22 | /// Assume that `source_stream` is an ERDOS stream sending the correct messages. 23 | /// 24 | /// ``` 25 | /// # use erdos::{ 26 | /// # dataflow::{Message, operators::ros::ToRosOperator, stream::IngestStream}, 27 | /// # OperatorConfig 28 | /// # }; 29 | /// # 30 | /// # pub mod rosrust_msg { 31 | /// # pub mod std_msgs { 32 | /// # use std::io; 33 | /// # 34 | /// # #[derive(Debug, Clone, PartialEq, Default)] 35 | /// # pub struct Int32 { 36 | /// # pub data: i32, 37 | /// # } 38 | /// # 39 | /// # impl rosrust::Message for Int32 { 40 | /// # fn msg_definition() -> String { String::new() } 41 | /// # fn md5sum() -> String { String::new() } 42 | /// # fn msg_type() -> String { String::new() } 43 | /// # } 44 | /// # 45 | /// # impl rosrust::RosMsg for Int32 { 46 | /// # fn encode(&self, mut w: W) -> io::Result<()> { Ok(()) } 47 | /// # fn decode(mut r: R) -> io::Result { Ok(Default::default()) } 48 | /// # } 49 | /// # } 50 | /// # }; 51 | /// fn erdos_int_to_ros_int(input: &Message) -> Vec { 52 | /// match input.data() { 53 | /// Some(x) => { 54 | /// vec![rosrust_msg::std_msgs::Int32 { 55 | /// data: *x, 56 | /// }] 57 | /// } 58 | /// None => vec![], 59 | /// } 60 | /// } 61 | /// 62 | /// # let source_stream = IngestStream::new(); 63 | /// let ros_sink_config = OperatorConfig::new().name("ToRosInt32"); 64 | /// erdos::connect_sink( 65 | /// move || -> ToRosOperator { 66 | /// ToRosOperator::new("int_topic", erdos_int_to_ros_int) 67 | /// }, 68 | /// || {}, 69 | /// ros_sink_config, 70 | /// &source_stream, 71 | /// ); 72 | /// ``` 73 | 74 | pub struct ToRosOperator 75 | where 76 | T: Data + for<'a> Deserialize<'a>, 77 | { 78 | publisher: rosrust::Publisher, 79 | to_ros_msg: Arc) -> Vec + Send + Sync>, 80 | } 81 | 82 | impl ToRosOperator 83 | where 84 | T: Data + for<'a> Deserialize<'a>, 85 | { 86 | pub fn new(topic: &str, to_ros_msg: F) -> Self 87 | where 88 | F: 'static + Fn(&Message) -> Vec + Send + Sync, 89 | { 90 | Self { 91 | publisher: rosrust::publish(topic, ROS_QUEUE_SIZE).unwrap(), 92 | to_ros_msg: Arc::new(to_ros_msg), 93 | } 94 | } 95 | 96 | // Converts ERDOS message using conversion function and publishes all messages in 97 | // returned vector 98 | fn convert_and_publish(&mut self, ctx: &mut SinkContext<()>, erdos_msg: &Message) { 99 | let ros_msg_vec = (self.to_ros_msg)(erdos_msg); 100 | 101 | for ros_msg in ros_msg_vec.into_iter() { 102 | tracing::trace!( 103 | "{} @ {:?}: Sending {:?}", 104 | ctx.operator_config().get_name(), 105 | ctx.timestamp().clone(), 106 | ros_msg, 107 | ); 108 | // Publishes converted message on topic. 109 | self.publisher.send(ros_msg).unwrap(); 110 | } 111 | } 112 | } 113 | 114 | impl Sink<(), T> for ToRosOperator 115 | where 116 | T: Data + for<'a> Deserialize<'a>, 117 | { 118 | fn on_data(&mut self, ctx: &mut SinkContext<()>, data: &T) { 119 | let timestamp = ctx.timestamp().clone(); 120 | self.convert_and_publish(ctx, &Message::new_message(timestamp, data.clone())); 121 | } 122 | 123 | fn on_watermark(&mut self, ctx: &mut SinkContext<()>) { 124 | let timestamp = ctx.timestamp().clone(); 125 | self.convert_and_publish(ctx, &Message::new_watermark(timestamp)); 126 | } 127 | } 128 | -------------------------------------------------------------------------------- /python/doc/source/intro.rst: -------------------------------------------------------------------------------- 1 | What is ERDOS? 2 | ============== 3 | 4 | *ERDOS is a platform for developing self-driving cars and robotics 5 | applications.* 6 | 7 | The system is built using techniques from streaming dataflow systems which is 8 | reflected by the API. 9 | Applications are modeled as directed graphs, in which data flows through 10 | :doc:`streams ` and is processed by :doc:`operators `. 11 | Because applications often resemble a sequence of connected operators, 12 | an ERDOS application may also be referred to as a *pipeline*. 13 | 14 | 15 | Example 16 | ------- 17 | 18 | The following example demonstrates a toy robotics application which uses 19 | semantic segmentation and the bounding boxes of detected objects to control a 20 | robot. 21 | The example consists of the driver part of the program, which is responsible 22 | for connecting operators via streams. For information on building operators, see 23 | :doc:`operators `. 24 | 25 | .. code-block:: python 26 | 27 | # Create a camera operator which generates a stream of RGB images. 28 | camera_frames = erdos.connect_source(CameraOp, erdos.OperatorConfig()) 29 | # Connect an object detection operator which uses the provided model to 30 | # detect objects and compute bounding boxes. 31 | bounding_boxes = erdos.connect_one_in_one_out( 32 | ObjectDetectorOp, 33 | erdos.OperatorConfig(), 34 | camera_frames, 35 | model="models/ssd_mobilenet_v1_coco") 36 | # Connect semantic segmentation operator to the camera which computes the 37 | # semantic segmentation for each image. 38 | segmentation = erdos.connect_one_in_one_out(SegmentationOp, 39 | erdos.OperatorConfig(), 40 | camera_frames, 41 | model="models/drn_d_22_cityscapes") 42 | # Connect an action operator to propose actions from provided features. 43 | actions = erdos.connect_two_in_one_out(ActionOp, erdos.OperatorConfig(), 44 | bounding_boxes, segmentation) 45 | # Create a robot operator which interfaces with the robot to apply actions. 46 | erdos.connect_sink(RobotOp, erdos.OperatorConfig(), actions) 47 | 48 | # Execute the application. 49 | erdos.run() 50 | 51 | Further examples are available on 52 | `GitHub `_ 53 | 54 | 55 | Driver 56 | ------ 57 | 58 | The driver section of the program connects operators together using streams to 59 | build an ERDOS application which may then be executed. 60 | The driver is typically the main section of the program. 61 | 62 | The driver may also interact with a running ERDOS application. 63 | Using the :py:class:`.IngestStream`, the driver can send 64 | data to operators on a stream. 65 | The :py:class:`.ExtractStream` allows the driver to read 66 | data sent from an operator. 67 | 68 | 69 | Determinism 70 | ----------- 71 | 72 | ERDOS provides mechanisms to enable the building of deterministic 73 | applications. 74 | For instance, processing sets of messages separated by watermarks using 75 | watermark callbacks can turn ERDOS pipelines into 76 | `Kahn process networks `_. 77 | 78 | 79 | Performance 80 | ----------- 81 | 82 | ERDOS is designed for low latency. Self-driving car pipelines require 83 | end-to-end deadlines on the order of hundreds of milliseconds for safe 84 | driving. Similarly, self-driving cars typically process gigabytes per 85 | second of data on small clusters. Therefore, ERDOS is optimized to 86 | send small amounts of data (gigabytes as opposed to terabytes) 87 | as quickly as possible. 88 | 89 | For performance-sensitive applications, it is recommended to use the Rust API 90 | as Python introduces significant overheads (e.g. serialization and 91 | reduced parallelism from the 92 | `GIL `_). 93 | 94 | View the `codebase on GitHub `_. 95 | 96 | You can export the dataflow graph as a 97 | `DOT file `_ 98 | by setting the ``graph_filename`` argument in :py:func:`erdos.run`. 99 | 100 | 101 | More Information 102 | ---------------- 103 | 104 | To read more about the ideas behind ERDOS, refer to our paper: 105 | `*D3: A Dynamic Deadline-Driven Approach for Building Autonomous Vehicles* `_. 106 | If you find ERDOS useful to your work, please consider citing our paper: 107 | 108 | .. code-block:: bibtex 109 | 110 | @inproceedings{gog2022d3, 111 | title={D3: a dynamic deadline-driven approach for building autonomous vehicles}, 112 | author={Gog, Ionel and Kalra, Sukrit and Schafhalter, Peter and Gonzalez, Joseph E and Stoica, Ion}, 113 | booktitle={Proceedings of the Seventeenth European Conference on Computer Systems}, 114 | pages={453--471}, 115 | year={2022} 116 | } 117 | -------------------------------------------------------------------------------- /python/erdos/timestamp.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | from erdos.internal import PyTimestamp 4 | 5 | 6 | class Timestamp: 7 | """An ERDOS timestamp representing the time for which a 8 | :py:class:`Message` or :py:class:`WatermarkMessage` is sent. 9 | """ 10 | 11 | def __init__( 12 | self, 13 | timestamp: Optional["Timestamp"] = None, 14 | coordinates: Optional[List[int]] = None, 15 | is_top: bool = False, 16 | is_bottom: bool = False, 17 | _py_timestamp: Optional[PyTimestamp] = None, 18 | ): 19 | """Constructs a :py:class:`Timestamp`. 20 | 21 | Args: 22 | timestamp: Constructs a :py:class:`Timestamp` from another 23 | :py:class:`Timestamp` 24 | coordinates: Constructs a :py:class:`Timestamp` from a sequence of 25 | integers representing the time. 26 | is_top: Constructs a :py:class:`Timestamp` representing the final 27 | point in time. 28 | is_bottom: Constructs a :py:class:`Timestamp` representing the 29 | initial point in time. 30 | _py_timestamp: Constructs a :py:class:`Timestamp` from an internal 31 | timestamp object. This argument should not be provided by the 32 | user. 33 | """ 34 | if _py_timestamp is not None: 35 | # Initialize from PyTimestamp, if available. 36 | self._py_timestamp = _py_timestamp 37 | elif timestamp is not None: 38 | # If Timestamp is available, copy its contents. 39 | self._py_timestamp = timestamp._py_timestamp 40 | else: 41 | if is_top and not is_bottom and coordinates is None: 42 | self._py_timestamp = PyTimestamp(coordinates, is_top, is_bottom) 43 | elif is_bottom and not is_top and coordinates is None: 44 | self._py_timestamp = PyTimestamp(coordinates, is_top, is_bottom) 45 | elif coordinates is not None and not is_bottom and not is_top: 46 | self._py_timestamp = PyTimestamp(coordinates, is_top, is_bottom) 47 | else: 48 | raise ValueError( 49 | "Timestamp should either have coordinates" 50 | "or be either Top or Bottom" 51 | ) 52 | 53 | def _to_py_timestamp(self) -> PyTimestamp: 54 | return self._py_timestamp 55 | 56 | def __repr__(self) -> str: 57 | return str(self._py_timestamp) 58 | 59 | def __str__(self) -> str: 60 | return repr(self._py_timestamp) 61 | 62 | def __eq__(self, timestamp: object) -> bool: 63 | if not isinstance(timestamp, Timestamp): 64 | raise ValueError(f"Equality with '{type(timestamp)}' is not implemented.") 65 | return self._py_timestamp == timestamp._py_timestamp 66 | 67 | def __ne__(self, timestamp: object) -> bool: 68 | if not isinstance(timestamp, Timestamp): 69 | raise ValueError(f"Equality with '{type(timestamp)}' is not implemented.") 70 | return self._py_timestamp != timestamp._py_timestamp 71 | 72 | def __lt__(self, timestamp: object) -> bool: 73 | if not isinstance(timestamp, Timestamp): 74 | raise ValueError(f"Comparison with '{type(timestamp)}' is not implemented.") 75 | return self._py_timestamp < timestamp._py_timestamp 76 | 77 | def __le__(self, timestamp: object) -> bool: 78 | if not isinstance(timestamp, Timestamp): 79 | raise ValueError(f"Comparison with '{type(timestamp)}' is not implemented.") 80 | return self._py_timestamp <= timestamp._py_timestamp 81 | 82 | def __gt__(self, timestamp: object) -> bool: 83 | if not isinstance(timestamp, Timestamp): 84 | raise ValueError(f"Comparison with '{type(timestamp)}' is not implemented.") 85 | return self._py_timestamp > timestamp._py_timestamp 86 | 87 | def __ge__(self, timestamp: object) -> bool: 88 | if not isinstance(timestamp, Timestamp): 89 | raise ValueError(f"Comparison with '{type(timestamp)}' is not implemented.") 90 | return self._py_timestamp >= timestamp._py_timestamp 91 | 92 | def __hash__(self) -> int: 93 | py_timestamp_coordinates = self._py_timestamp.coordinates() 94 | coordinates = ( 95 | tuple(py_timestamp_coordinates) 96 | if py_timestamp_coordinates is not None 97 | else None 98 | ) 99 | return hash((coordinates, self.is_top, self.is_bottom)) 100 | 101 | @property 102 | def coordinates(self) -> Optional[List[int]]: 103 | """A list of integers representing the time.""" 104 | return self._py_timestamp.coordinates() 105 | 106 | @property 107 | def is_top(self) -> bool: 108 | """Whether the timestamp represents the final point in time.""" 109 | return self._py_timestamp.is_top() 110 | 111 | @property 112 | def is_bottom(self) -> bool: 113 | """Whether the timestamp represents the initial point in time.""" 114 | return self._py_timestamp.is_bottom() 115 | -------------------------------------------------------------------------------- /erdos/src/dataflow/stream/read_stream.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use crate::{ 4 | communication::{RecvEndpoint, TryRecvError}, 5 | dataflow::{Data, Message}, 6 | }; 7 | 8 | use super::{ 9 | errors::{ReadError, TryReadError}, 10 | StreamId, 11 | }; 12 | 13 | /// A [`ReadStream`] allows operators to pull [`Message`]s from a [stream](crate::dataflow::stream). 14 | /// 15 | /// # Example 16 | /// The following example shows an operator that prints out messages received from a [`ReadStream`]. 17 | /// ``` 18 | /// # use std::marker::PhantomData; 19 | /// # use erdos::dataflow::{operator::{OperatorConfig, Sink}, context::SinkContext, Data, ReadStream}; 20 | /// # 21 | /// struct PrintMessageOperator { 22 | /// phantom: PhantomData, 23 | /// } 24 | /// 25 | /// impl Sink<(), D> for PrintMessageOperator { 26 | /// # fn on_data(&mut self, ctx: &mut SinkContext<()>, data: &D) {} 27 | /// # fn on_watermark(&mut self, ctx: &mut SinkContext<()>) {} 28 | /// # 29 | /// fn run(&mut self, config: &OperatorConfig, read_stream: &mut ReadStream) { 30 | /// while let Ok(message) = read_stream.read() { 31 | /// println!("Recieved message: {:?}", message); 32 | /// } 33 | /// } 34 | /// } 35 | /// ``` 36 | /// 37 | /// The examples in [`ExtractStream`](crate::dataflow::stream::ExtractStream) show how to 38 | /// pull data from a stream in the driver. 39 | pub struct ReadStream { 40 | /// The id of the stream. 41 | id: StreamId, 42 | /// The name of the stream. 43 | name: String, 44 | /// Whether the stream is closed. 45 | is_closed: bool, 46 | /// The endpoint on which the stream receives data. 47 | recv_endpoint: RecvEndpoint>>, 48 | } 49 | 50 | impl ReadStream { 51 | pub(crate) fn new( 52 | id: StreamId, 53 | name: &str, 54 | recv_endpoint: RecvEndpoint>>, 55 | ) -> Self { 56 | Self { 57 | id, 58 | name: name.to_string(), 59 | is_closed: false, 60 | recv_endpoint, 61 | } 62 | } 63 | 64 | /// Returns `true` if a top watermark message was sent or the [`ReadStream`] failed to set up. 65 | pub fn is_closed(&self) -> bool { 66 | self.is_closed 67 | } 68 | 69 | /// Non-blocking read from the [`ReadStream`]. 70 | /// 71 | /// Returns the Message available on the [`ReadStream`], or an [`Empty`](TryReadError::Empty) 72 | /// if no message is available. 73 | pub fn try_read(&mut self) -> Result, TryReadError> { 74 | if self.is_closed { 75 | return Err(TryReadError::Closed); 76 | } 77 | let result = self 78 | .recv_endpoint 79 | .try_read() 80 | .map(|msg| Message::clone(&msg)) 81 | .map_err(TryReadError::from); 82 | if result 83 | .as_ref() 84 | .map(Message::is_top_watermark) 85 | .unwrap_or(false) 86 | { 87 | self.is_closed = true; 88 | } 89 | result 90 | } 91 | 92 | /// Blocking read from the [`ReadStream`]. 93 | /// 94 | /// Returns the Message available on the [`ReadStream`]. 95 | pub fn read(&mut self) -> Result, ReadError> { 96 | if self.is_closed { 97 | return Err(ReadError::Closed); 98 | } 99 | // Poll for the next message 100 | // TODO: call async_read and use some kind of runtime. 101 | let result = loop { 102 | match self.recv_endpoint.try_read() { 103 | Ok(msg) => { 104 | break Ok(Message::clone(&msg)); 105 | } 106 | Err(TryRecvError::Empty) => (), 107 | Err(TryRecvError::Disconnected) => { 108 | break Err(ReadError::Disconnected); 109 | } 110 | Err(TryRecvError::BincodeError(_)) => { 111 | break Err(ReadError::SerializationError); 112 | } 113 | } 114 | }; 115 | 116 | if result 117 | .as_ref() 118 | .map(Message::is_top_watermark) 119 | .unwrap_or(false) 120 | { 121 | self.is_closed = true; 122 | } 123 | result 124 | } 125 | 126 | pub(crate) async fn async_read(&mut self) -> Result>, ReadError> { 127 | if self.is_closed { 128 | return Err(ReadError::Closed); 129 | } 130 | 131 | // Poll for the next message 132 | match self.recv_endpoint.read().await { 133 | Ok(msg) => Ok(msg), 134 | // TODO: better error handling. 135 | _ => Err(ReadError::Disconnected), 136 | } 137 | 138 | // TODO: Close the stream? 139 | } 140 | 141 | /// Get the ID given to the stream by the constructor. 142 | pub fn id(&self) -> StreamId { 143 | self.id 144 | } 145 | 146 | /// Get the name of the stream. 147 | pub fn name(&self) -> String { 148 | self.name.clone() 149 | } 150 | } 151 | 152 | unsafe impl Send for ReadStream {} 153 | unsafe impl Sync for ReadStream {} 154 | -------------------------------------------------------------------------------- /python/erdos/context.py: -------------------------------------------------------------------------------- 1 | from typing import Generic, TypeVar 2 | 3 | from erdos.config import OperatorConfig 4 | from erdos.internal import PyTimestamp 5 | from erdos.streams import WriteStream 6 | from erdos.timestamp import Timestamp 7 | 8 | T = TypeVar("T") 9 | U = TypeVar("U") 10 | V = TypeVar("V") 11 | 12 | 13 | class SinkContext: 14 | """A :py:class:`SinkContext` instance enables developers to retrieve 15 | metadata about the current invocation of either a message or a watermark 16 | callback in a :py:class:`.Sink` operator. 17 | 18 | Attributes: 19 | timestamp (:py:class:`.Timestamp`): The timestamp of the current 20 | invocation of the callback. 21 | config (:py:class:`.OperatorConfig`): The operator config generated by 22 | the driver upon connection of the operator to the graph. 23 | """ 24 | 25 | def __init__(self, timestamp: PyTimestamp, config: OperatorConfig): 26 | self.timestamp = Timestamp(_py_timestamp=timestamp) 27 | self.config = config 28 | 29 | def __str__(self) -> str: 30 | return "SinkContext(Timestamp={}, Config={})".format( 31 | self.timestamp, self.config 32 | ) 33 | 34 | 35 | class OneInOneOutContext(Generic[T]): 36 | """A :py:class:`OneInOneOutContext` instance enables developers to retrieve 37 | metadata about the current invocation of either a message or a watermark 38 | callback in a :py:class:`.OneInOneOut` operator. 39 | 40 | Attributes: 41 | timestamp (:py:class:`.Timestamp`): The timestamp of the current 42 | invocation of the callback. 43 | config (:py:class:`.OperatorConfig`): The operator config generated by 44 | the driver upon connection of the operator to the graph. 45 | write_stream (:py:class:`.WriteStream`): The write stream to send 46 | results to downstream operators. 47 | """ 48 | 49 | def __init__( 50 | self, 51 | timestamp: PyTimestamp, 52 | config: OperatorConfig, 53 | write_stream: WriteStream[T], 54 | ): 55 | self.timestamp = Timestamp(_py_timestamp=timestamp) 56 | self.config = config 57 | self.write_stream = write_stream 58 | 59 | def __str__(self) -> str: 60 | return "OneInOneOutContext(Timestamp={}, Config={}, WriteStream={})".format( 61 | self.timestamp, self.config, self.write_stream.name 62 | ) 63 | 64 | 65 | class OneInTwoOutContext(Generic[T, U]): 66 | """A :py:class:`OneInTwoOutContext` instance enables developers to retrieve 67 | metadata about the current invocation of either a message or a watermark 68 | callback in a :py:class:`.OneInTwoOut` operator. 69 | 70 | Attributes: 71 | timestamp (:py:class:`.Timestamp`): The timestamp of the current 72 | invocation of the callback. 73 | config (:py:class:`.OperatorConfig`): The operator config generated 74 | by the driver upon connection of the operator to the graph. 75 | left_write_stream (:py:class:`.WriteStream`): The first write stream to 76 | send results to downstream operators. 77 | right_write_stream (:py:class:`.WriteStream`): The second write stream 78 | to send results to downstream operators. 79 | """ 80 | 81 | def __init__( 82 | self, 83 | timestamp: PyTimestamp, 84 | config: OperatorConfig, 85 | left_write_stream: WriteStream[T], 86 | right_write_stream: WriteStream[U], 87 | ): 88 | self.timestamp = Timestamp(_py_timestamp=timestamp) 89 | self.config = config 90 | self.left_write_stream = left_write_stream 91 | self.right_write_stream = right_write_stream 92 | 93 | def __str__(self) -> str: 94 | return "OneInTwoOutContext(Timestamp={}, Config={}, \ 95 | Left WriteStream={}, Right WriteStream={})".format( 96 | self.timestamp, 97 | self.config, 98 | self.left_write_stream.name, 99 | self.right_write_stream.name, 100 | ) 101 | 102 | 103 | class TwoInOneOutContext(Generic[T]): 104 | """A :py:class:`TwoInOneOutContext` instance enables developers to retrieve 105 | metadata about the current invocation of either a message or a watermark 106 | callback in a :py:class:`.TwoInOneOut` operator. 107 | 108 | Attributes: 109 | timestamp (:py:class:`.Timestamp`): The timestamp of the current 110 | invocation of the callback. 111 | config (:py:class:`.OperatorConfig`): The operator config generated by 112 | the driver upon connection of the operator to the graph. 113 | write_stream (:py:class:`.WriteStream`): The write stream to send 114 | results to downstream operators. 115 | """ 116 | 117 | def __init__( 118 | self, 119 | timestamp: PyTimestamp, 120 | config: OperatorConfig, 121 | write_stream: WriteStream[T], 122 | ): 123 | self.timestamp = Timestamp(_py_timestamp=timestamp) 124 | self.config = config 125 | self.write_stream = write_stream 126 | 127 | def __str__(self) -> str: 128 | return "TwoInOneOutContext(Timestamp={}, Config={}, WriteStream={})".format( 129 | self.timestamp, 130 | self.config, 131 | self.write_stream.name, 132 | ) 133 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/map.rs: -------------------------------------------------------------------------------- 1 | use std::sync::Arc; 2 | 3 | use serde::Deserialize; 4 | 5 | use crate::dataflow::{ 6 | context::OneInOneOutContext, 7 | message::Message, 8 | operator::{OneInOneOut, OperatorConfig}, 9 | stream::{OperatorStream, Stream, WriteStreamT}, 10 | Data, 11 | }; 12 | 13 | /// Maps an incoming stream of type D to a stream of type `I::Item` using the provided 14 | /// function. 15 | /// 16 | /// # Example 17 | /// The below example shows how to use a [`FlatMapOperator`] to double an incoming stream of usize 18 | /// messages, and return them. 19 | /// 20 | /// ``` 21 | /// # use erdos::dataflow::{stream::IngestStream, operator::{OperatorConfig}, operators::{FlatMapOperator}}; 22 | /// # let source_stream = IngestStream::new(); 23 | /// let map_stream = erdos::connect_one_in_one_out( 24 | /// || -> FlatMapOperator { 25 | /// FlatMapOperator::new(|x: &usize| -> Vec { vec![2 * x] }) 26 | /// }, 27 | /// || {}, 28 | /// OperatorConfig::new().name("FlatMapOperator"), 29 | /// &source_stream, 30 | /// ); 31 | /// ``` 32 | pub struct FlatMapOperator 33 | where 34 | D: Data + for<'a> Deserialize<'a>, 35 | I: IntoIterator, 36 | I::Item: Data + for<'a> Deserialize<'a>, 37 | { 38 | flat_map_fn: Arc I + Send + Sync>, 39 | } 40 | 41 | impl FlatMapOperator 42 | where 43 | D: Data + for<'a> Deserialize<'a>, 44 | I: IntoIterator, 45 | I::Item: Data + for<'a> Deserialize<'a>, 46 | { 47 | pub fn new(flat_map_fn: F) -> Self 48 | where 49 | F: 'static + Fn(&D) -> I + Send + Sync, 50 | { 51 | Self { 52 | flat_map_fn: Arc::new(flat_map_fn), 53 | } 54 | } 55 | } 56 | 57 | impl OneInOneOut<(), D, I::Item> for FlatMapOperator 58 | where 59 | D: Data + for<'a> Deserialize<'a>, 60 | I: IntoIterator, 61 | I::Item: Data + for<'a> Deserialize<'a>, 62 | { 63 | fn on_data(&mut self, ctx: &mut OneInOneOutContext<(), I::Item>, data: &D) { 64 | for item in (self.flat_map_fn)(data).into_iter() { 65 | tracing::trace!( 66 | "{} @ {:?}: received {:?} and sending {:?}", 67 | ctx.operator_config().get_name(), 68 | ctx.timestamp(), 69 | data, 70 | item, 71 | ); 72 | 73 | let timestamp = ctx.timestamp().clone(); 74 | let msg = Message::new_message(timestamp, item); 75 | ctx.write_stream().send(msg).unwrap(); 76 | } 77 | } 78 | 79 | fn on_watermark(&mut self, _ctx: &mut OneInOneOutContext<(), I::Item>) {} 80 | } 81 | 82 | /// Extension trait for mapping a stream of type `D1` to a stream of type `D2`. 83 | /// 84 | /// Names the [`FlatMapOperator`] using the name of the incoming stream. 85 | pub trait Map 86 | where 87 | D1: Data + for<'a> Deserialize<'a>, 88 | D2: Data + for<'a> Deserialize<'a>, 89 | { 90 | /// Applies the provided function to each element, and sends the return value. 91 | /// 92 | /// # Example 93 | /// ``` 94 | /// # use erdos::dataflow::{stream::{IngestStream, Stream}, operator::OperatorConfig, operators::Map}; 95 | /// # let source_stream = IngestStream::new(); 96 | /// let map_stream = source_stream.map(|x: &usize| -> usize { 2 * x }); 97 | /// ``` 98 | fn map(&self, map_fn: F) -> OperatorStream 99 | where 100 | F: 'static + Fn(&D1) -> D2 + Send + Sync + Clone; 101 | 102 | /// Applies the provided function to each element, and sends each returned value. 103 | /// 104 | /// # Example 105 | /// ``` 106 | /// # use erdos::dataflow::{stream::{IngestStream, Stream}, operator::OperatorConfig, operators::Map}; 107 | /// # let source_stream = IngestStream::new(); 108 | /// let map_stream = source_stream.flat_map(|x: &usize| 0..*x ); 109 | /// ``` 110 | fn flat_map(&self, flat_map_fn: F) -> OperatorStream 111 | where 112 | F: 'static + Fn(&D1) -> I + Send + Sync + Clone, 113 | I: 'static + IntoIterator; 114 | } 115 | 116 | impl Map for S 117 | where 118 | S: Stream, 119 | D1: Data + for<'a> Deserialize<'a>, 120 | D2: Data + for<'a> Deserialize<'a>, 121 | { 122 | fn map(&self, map_fn: F) -> OperatorStream 123 | where 124 | F: 'static + Fn(&D1) -> D2 + Send + Sync + Clone, 125 | { 126 | let op_name = format!("MapOp_{}", self.id()); 127 | 128 | crate::connect_one_in_one_out( 129 | move || -> FlatMapOperator { 130 | let map_fn = map_fn.clone(); 131 | FlatMapOperator::new(move |x| std::iter::once(map_fn(x))) 132 | }, 133 | || {}, 134 | OperatorConfig::new().name(&op_name), 135 | self, 136 | ) 137 | } 138 | 139 | fn flat_map(&self, flat_map_fn: F) -> OperatorStream 140 | where 141 | F: 'static + Fn(&D1) -> I + Send + Sync + Clone, 142 | I: 'static + IntoIterator, 143 | { 144 | let op_name = format!("FlatMapOp_{}", self.id()); 145 | 146 | crate::connect_one_in_one_out( 147 | move || -> FlatMapOperator { FlatMapOperator::new(flat_map_fn.clone()) }, 148 | || {}, 149 | OperatorConfig::new().name(&op_name), 150 | self, 151 | ) 152 | } 153 | } 154 | -------------------------------------------------------------------------------- /erdos/src/communication/message_codec.rs: -------------------------------------------------------------------------------- 1 | use byteorder::{ByteOrder, NetworkEndian, WriteBytesExt}; 2 | use bytes::{BufMut, BytesMut}; 3 | use std::fmt::Debug; 4 | use tokio_util::codec::{Decoder, Encoder}; 5 | 6 | use crate::communication::{CodecError, InterProcessMessage, MessageMetadata}; 7 | 8 | const HEADER_SIZE: usize = 8; 9 | 10 | #[derive(Debug)] 11 | enum DecodeStatus { 12 | Header, 13 | Metadata { 14 | metadata_size: usize, 15 | data_size: usize, 16 | }, 17 | Data { 18 | data_size: usize, 19 | }, 20 | } 21 | 22 | /// Encodes messages into bytes, and decodes bytes into an [`InterProcessMessage`]. 23 | /// 24 | /// For each message, the codec first writes the size of its message header, 25 | /// then the message header, and finally the content of the message. 26 | #[derive(Debug)] 27 | pub struct MessageCodec { 28 | /// Current part of the message to decode. 29 | status: DecodeStatus, 30 | msg_metadata: Option, 31 | } 32 | 33 | impl MessageCodec { 34 | pub fn new() -> MessageCodec { 35 | MessageCodec { 36 | status: DecodeStatus::Header, 37 | msg_metadata: None, 38 | } 39 | } 40 | } 41 | 42 | impl Decoder for MessageCodec { 43 | type Item = InterProcessMessage; 44 | type Error = CodecError; 45 | 46 | /// Decodes a sequence of bytes into an InterProcessMessage. 47 | /// 48 | /// Reads the header size, then the header, and finally the message. 49 | /// Reserves memory for the entire message to reduce upon reading the header 50 | /// costly memory allocations. 51 | fn decode(&mut self, buf: &mut BytesMut) -> Result, CodecError> { 52 | match self.status { 53 | // Decode the header and reserve 54 | DecodeStatus::Header => { 55 | if buf.len() >= HEADER_SIZE { 56 | let header = buf.split_to(HEADER_SIZE); 57 | let metadata_size = NetworkEndian::read_u32(&header[0..4]) as usize; 58 | let data_size = NetworkEndian::read_u32(&header[4..8]) as usize; 59 | self.status = DecodeStatus::Metadata { 60 | metadata_size, 61 | data_size, 62 | }; 63 | // Reserve space in the buffer for the rest of the message and the next header. 64 | buf.reserve(metadata_size + data_size + HEADER_SIZE); 65 | self.decode(buf) 66 | } else { 67 | Ok(None) 68 | } 69 | } 70 | // Decode the metadata. 71 | DecodeStatus::Metadata { 72 | metadata_size, 73 | data_size, 74 | } => { 75 | if buf.len() >= metadata_size { 76 | let metadata_bytes = buf.split_to(metadata_size); 77 | let metadata: MessageMetadata = 78 | bincode::deserialize(&metadata_bytes).map_err(CodecError::BincodeError)?; 79 | self.msg_metadata = Some(metadata); 80 | self.status = DecodeStatus::Data { data_size }; 81 | self.decode(buf) 82 | } else { 83 | Ok(None) 84 | } 85 | } 86 | // Decode the data. 87 | DecodeStatus::Data { data_size } => { 88 | if buf.len() >= data_size { 89 | let bytes = buf.split_to(data_size); 90 | let msg = InterProcessMessage::new_serialized( 91 | bytes, 92 | self.msg_metadata.take().unwrap(), 93 | ); 94 | self.status = DecodeStatus::Header; 95 | Ok(Some(msg)) 96 | } else { 97 | Ok(None) 98 | } 99 | } 100 | } 101 | } 102 | } 103 | 104 | impl Encoder for MessageCodec { 105 | type Error = CodecError; 106 | 107 | /// Encodes a InterProcessMessage into a buffer. 108 | /// 109 | /// First writes the header_size, then the header, and finally the 110 | /// serialized message. 111 | fn encode(&mut self, msg: InterProcessMessage, buf: &mut BytesMut) -> Result<(), CodecError> { 112 | // Serialize and write the header. 113 | let (metadata, data) = match msg { 114 | InterProcessMessage::Deserialized { metadata, data } => (metadata, data), 115 | InterProcessMessage::Serialized { 116 | metadata: _, 117 | bytes: _, 118 | } => unreachable!(), 119 | }; 120 | 121 | // Allocate memory in the buffer for serialized metadata and data 122 | // to reduce memory allocations. 123 | let metadata_size = bincode::serialized_size(&metadata).map_err(CodecError::from)?; 124 | let data_size = data.serialized_size().unwrap(); 125 | buf.reserve(HEADER_SIZE + metadata_size as usize + data_size); 126 | 127 | // Serialize directly into the buffer. 128 | let mut writer = buf.writer(); 129 | writer.write_u32::(metadata_size as u32)?; 130 | writer.write_u32::(data_size as u32)?; 131 | bincode::serialize_into(&mut writer, &metadata).map_err(CodecError::from)?; 132 | data.encode_into(buf).unwrap(); 133 | 134 | Ok(()) 135 | } 136 | } 137 | 138 | impl Default for MessageCodec { 139 | fn default() -> Self { 140 | Self::new() 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /erdos/src/dataflow/operators/join.rs: -------------------------------------------------------------------------------- 1 | use serde::Deserialize; 2 | 3 | use crate::dataflow::{ 4 | context::TwoInOneOutContext, 5 | message::Message, 6 | operator::{OperatorConfig, TwoInOneOut}, 7 | state::TimeVersionedState, 8 | stream::{OperatorStream, Stream, WriteStreamT}, 9 | Data, 10 | }; 11 | 12 | /// Joins messages with matching timestamps from two different streams. 13 | /// 14 | /// The following table provides an example of how the [`TimestampJoinOperator`] processes data 15 | /// from two streams: 16 | /// 17 | /// | Timestamp | Left input | Right input | [`TimestampJoinOperator`] output | 18 | /// |-----------|------------|-------------|--------------------------------------------| 19 | /// | 1 | a
b | 1
2 | (a, 1)
(a, 2)
(b, 1)
(b, 2) | 20 | /// | 2 | c | | | 21 | /// | 3 | | 3 | | 22 | /// | 4 | d | 4 | (d, 4) | 23 | /// 24 | /// # Example 25 | /// The following example shows how to use a [`TimestampJoinOperator`] to join two streams. 26 | /// 27 | /// ``` 28 | /// # use erdos::dataflow::{ 29 | /// # stream::IngestStream, 30 | /// # operator::OperatorConfig, 31 | /// # operators::TimestampJoinOperator, 32 | /// # state::TimeVersionedState 33 | /// # }; 34 | /// # 35 | /// # let left_stream: IngestStream = IngestStream::new(); 36 | /// # let right_stream: IngestStream = IngestStream::new(); 37 | /// # 38 | /// // Joins two streams of types String and usize 39 | /// let joined_stream = erdos::connect_two_in_one_out( 40 | /// TimestampJoinOperator::new, 41 | /// TimeVersionedState::new, 42 | /// OperatorConfig::new().name("TimestampJoinOperator"), 43 | /// &left_stream, 44 | /// &right_stream, 45 | /// ); 46 | /// ``` 47 | #[derive(Default)] 48 | pub struct TimestampJoinOperator {} 49 | 50 | impl TimestampJoinOperator { 51 | pub fn new() -> Self { 52 | Self {} 53 | } 54 | } 55 | 56 | impl TwoInOneOut, Vec)>, T, U, (T, U)> for TimestampJoinOperator 57 | where 58 | T: Data + for<'a> Deserialize<'a>, 59 | U: Data + for<'a> Deserialize<'a>, 60 | { 61 | fn on_left_data( 62 | &mut self, 63 | ctx: &mut TwoInOneOutContext, Vec)>, (T, U)>, 64 | data: &T, 65 | ) { 66 | let (left_items, right_items) = ctx.current_state().unwrap(); 67 | left_items.push(data.clone()); 68 | 69 | // Can't iterate through right_msgs and send messages because this results in a compiler 70 | // error due to 2 mutable references to ctx. 71 | let num_right_items = right_items.len(); 72 | for i in 0..num_right_items { 73 | let right_item = ctx.current_state().unwrap().1[i].clone(); 74 | let msg = Message::new_message(ctx.timestamp().clone(), (data.clone(), right_item)); 75 | ctx.write_stream().send(msg).unwrap(); 76 | } 77 | } 78 | 79 | fn on_right_data( 80 | &mut self, 81 | ctx: &mut TwoInOneOutContext, Vec)>, (T, U)>, 82 | data: &U, 83 | ) { 84 | let (left_items, right_items) = ctx.current_state().unwrap(); 85 | right_items.push(data.clone()); 86 | 87 | // Can't iterate through left_items and send messages because this results in a compiler 88 | // error due to 2 mutable references to ctx. 89 | let num_left_items = left_items.len(); 90 | for i in 0..num_left_items { 91 | let left_item = ctx.current_state().unwrap().0[i].clone(); 92 | let msg = Message::new_message(ctx.timestamp().clone(), (left_item, data.clone())); 93 | ctx.write_stream().send(msg).unwrap(); 94 | } 95 | } 96 | 97 | fn on_watermark( 98 | &mut self, 99 | ctx: &mut TwoInOneOutContext, Vec)>, (T, U)>, 100 | ) { 101 | let timestamp = ctx.timestamp().clone(); 102 | ctx.state_mut().evict_until(×tamp); 103 | } 104 | } 105 | 106 | /// Extension trait for joining pairs of streams. 107 | /// 108 | /// Names the operators using the names of the incoming streams. 109 | pub trait Join 110 | where 111 | T: Data + for<'a> Deserialize<'a>, 112 | U: Data + for<'a> Deserialize<'a>, 113 | { 114 | fn timestamp_join(&self, other: &dyn Stream) -> OperatorStream<(T, U)>; 115 | } 116 | 117 | impl Join for S 118 | where 119 | S: Stream, 120 | T: Data + for<'a> Deserialize<'a>, 121 | U: Data + for<'a> Deserialize<'a>, 122 | { 123 | /// Joins messages with matching timestamps from two different streams using a 124 | /// [`TimestampJoinOperator`]. 125 | /// 126 | /// # Example 127 | /// 128 | /// ``` 129 | /// # use erdos::dataflow::{stream::IngestStream, operators::Join}; 130 | /// # 131 | /// # let left_stream: IngestStream = IngestStream::new(); 132 | /// # let right_stream: IngestStream = IngestStream::new(); 133 | /// # 134 | /// let joined_stream = left_stream.timestamp_join(&right_stream); 135 | /// ``` 136 | fn timestamp_join(&self, other: &dyn Stream) -> OperatorStream<(T, U)> { 137 | let name = format!("TimestampJoinOp_{}_{}", self.name(), other.name()); 138 | crate::connect_two_in_one_out( 139 | TimestampJoinOperator::new, 140 | TimeVersionedState::new, 141 | OperatorConfig::new().name(&name), 142 | self, 143 | other, 144 | ) 145 | } 146 | } 147 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ERDOS 2 | 3 | ERDOS is a platform for developing self-driving cars and robotics applications. 4 | 5 | [![Crates.io][crates-badge]][crates-url] 6 | [![Build Status](https://github.com/erdos-project/erdos/workflows/CI/badge.svg)](https://github.com/erdos-project/erdos/actions) 7 | [![Documentation Status](https://readthedocs.org/projects/erdos/badge/?version=latest)](https://erdos.readthedocs.io/en/latest/?badge=latest) 8 | [![Documentation](https://docs.rs/erdos/badge.svg)](https://docs.rs/erdos/) 9 | 10 | [crates-badge]: https://img.shields.io/crates/v/erdos.svg 11 | [crates-url]: https://crates.io/crates/erdos 12 | 13 | # Getting started 14 | 15 | # Local installation 16 | 17 | ## System requirements 18 | 19 | ERDOS is known to work on Ubuntu 18.04 and 20.04. 20 | 21 | ## Rust installation 22 | 23 | To develop an ERDOS application in Rust, simply include ERDOS in `Cargo.toml`. 24 | The latest ERDOS release is published on 25 | [Crates.io](https://crates.io/crates/erdos) 26 | and documentation is available on [Docs.rs](https://docs.rs/erdos). 27 | 28 | If you'd like to contribute to ERDOS, first 29 | [install Rust](https://www.rust-lang.org/tools/install). 30 | Then run the following to clone the repository and build ERDOS: 31 | ```console 32 | git clone https://github.com/erdos-project/erdos.git && cd erdos 33 | cargo build 34 | ``` 35 | 36 | ## Python Installation 37 | 38 | To develop an ERDOS application in Python, simply run 39 | `pip install erdos`. Documentation is available on 40 | [Read the Docs](https://erdos.readthedocs.io/). 41 | 42 | If you'd like to contribute to ERDOS, first 43 | [install Rust](https://www.rust-lang.org/tools/install). 44 | Within a [virtual environment](https://docs.python.org/3/tutorial/venv.html), 45 | run the following to clone the repository and build ERDOS: 46 | ```console 47 | git clone https://github.com/erdos-project/erdos.git && cd erdos/python 48 | pip3 install maturin 49 | maturin develop 50 | ``` 51 | 52 | The Python-Rust bridge interface is developed in the `python` crate, which 53 | also contains user-facing python files under the `python/erdos` directory. 54 | 55 | If you'd like to build ERDOS for release (better performance, but longer 56 | build times), run `maturin develop --release`. 57 | 58 | ## Running an example 59 | 60 | ```console 61 | python3 python/examples/simple_pipeline.py 62 | ``` 63 | 64 | # Writing Applications 65 | 66 | ERDOS provides Python and Rust interfaces for developing applications. 67 | 68 | The Python interface provides easy integration with popular libraries 69 | such as tensorflow, but comes at the cost of performance 70 | (e.g. slower serialization and the [lack of parallelism within a process](https://wiki.python.org/moin/GlobalInterpreterLock)). 71 | 72 | The Rust interface provides more safety guarantees 73 | (e.g. compile-time type checking) and faster performance 74 | (e.g. multithreading and zero-copy message passing). 75 | High performance, safety critical applications such as 76 | self-driving car pipelines deployed in production should use the 77 | Rust API to take full advantage of ERDOS. 78 | 79 | # ERDOS Design 80 | 81 | ERDOS is a streaming dataflow system designed for self-driving car 82 | pipelines and robotics applications. 83 | 84 | Components of the pipelines are implemented as **operators** which 85 | are connected by **data streams**. The set of operators and streams 86 | forms the **dataflow graph**, the representation of the pipline that 87 | ERDOS processes. 88 | 89 | Applications define the dataflow graph by connecting operators to streams 90 | in the **driver** section of the program. Operators are typically 91 | implemented elsewhere. 92 | 93 | ERDOS is designed for low latency. Self-driving car pipelines require 94 | end-to-end deadlines on the order of hundreds of milliseconds for safe 95 | driving. Similarly, self-driving cars typically process gigabytes per 96 | second of data on small clusters. Therefore, ERDOS is optimized to 97 | send small amounts of data (gigabytes as opposed to terabytes) 98 | as quickly as possible. 99 | 100 | ERDOS provides determinism through **watermarks**. Low watermarks 101 | are a bound on the age of messages received and operators will ignore 102 | any messages older than the most recent watermark received. By processing 103 | on watermarks, applications can avoid non-determinism from processing 104 | messages out of order. 105 | 106 | To read more about the ideas behind ERDOS, refer to our paper, 107 | [*D3: A Dynamic Deadline-Driven Approach for Building Autonomous Vehicles*](https://dl.acm.org/doi/10.1145/3492321.3519576). 108 | If you find ERDOS useful to your work, please consider citing our paper: 109 | ```bibtex 110 | @inproceedings{gog2022d3, 111 | title={D3: a dynamic deadline-driven approach for building autonomous vehicles}, 112 | author={Gog, Ionel and Kalra, Sukrit and Schafhalter, Peter and Gonzalez, Joseph E and Stoica, Ion}, 113 | booktitle={Proceedings of the Seventeenth European Conference on Computer Systems}, 114 | pages={453--471}, 115 | year={2022} 116 | } 117 | ``` 118 | 119 | # Pylot 120 | 121 | We are actively developing an AV platform atop ERDOS! For more information, see the [Pylot repository](https://github.com/erdos-project/pylot/). 122 | 123 | # Getting involved 124 | 125 | If you would like to contact us, you can: 126 | * [Community on Slack](https://forms.gle/KXwSrjM6ZqRi2MT18): Join our community 127 | on Slack for discussions about development, questions about usage, and feature 128 | requests. 129 | * [Github Issues](https://github.com/erdos-project/erdos/issues): For reporting 130 | bugs. 131 | 132 | We always welcome contributions to ERDOS. One way to get started is to 133 | pick one of the issues tagged with **good first issue** -- these are usually good issues that help you familiarize yourself with the ERDOS 134 | code base. Please submit contributions using pull requests. 135 | -------------------------------------------------------------------------------- /erdos/src/dataflow/stream/ingest_stream.rs: -------------------------------------------------------------------------------- 1 | use std::{ 2 | sync::{Arc, Mutex}, 3 | thread, 4 | time::Duration, 5 | }; 6 | 7 | use serde::Deserialize; 8 | 9 | use crate::{ 10 | dataflow::{ 11 | graph::{default_graph, AbstractGraph}, 12 | Data, Message, 13 | }, 14 | scheduler::channel_manager::ChannelManager, 15 | }; 16 | 17 | use super::{errors::SendError, Stream, StreamId, WriteStream, WriteStreamT}; 18 | 19 | /// An [`IngestStream`] enables drivers to inject data into a running ERDOS application. 20 | /// 21 | /// Similar to a [`WriteStream`], an [`IngestStream`] exposes a [`send`](IngestStream::send) 22 | /// function to allow drivers to send data to the operators of the constructed graph. 23 | /// 24 | /// # Example 25 | /// The below example shows how to use an [`IngestStream`] to send data to a 26 | /// [`FlatMapOperator`](crate::dataflow::operators::FlatMapOperator), 27 | /// and retrieve the processed values through an 28 | /// [`ExtractStream`](crate::dataflow::stream::ExtractStream). 29 | /// ```no_run 30 | /// # use erdos::dataflow::{ 31 | /// # stream::{IngestStream, ExtractStream, Stream}, 32 | /// # operators::FlatMapOperator, 33 | /// # OperatorConfig, Message, Timestamp 34 | /// # }; 35 | /// # use erdos::*; 36 | /// # use erdos::node::Node; 37 | /// # 38 | /// let args = erdos::new_app("ERDOS").get_matches(); 39 | /// let mut node = Node::new(Configuration::from_args(&args)); 40 | /// 41 | /// // Create an IngestStream. 42 | /// let mut ingest_stream = IngestStream::new(); 43 | /// 44 | /// // Create an ExtractStream from the ReadStream of the FlatMapOperator. 45 | /// let output_stream = erdos::connect_one_in_one_out( 46 | /// || FlatMapOperator::new(|x: &usize| { std::iter::once(2 * x) }), 47 | /// || {}, 48 | /// OperatorConfig::new().name("MapOperator"), 49 | /// &ingest_stream, 50 | /// ); 51 | /// let mut extract_stream = ExtractStream::new(&output_stream); 52 | /// 53 | /// node.run_async(); 54 | /// 55 | /// // Send data on the IngestStream. 56 | /// for i in 1..10 { 57 | /// ingest_stream.send(Message::new_message(Timestamp::Time(vec![i as u64]), i)).unwrap(); 58 | /// } 59 | /// 60 | /// // Retrieve mapped values using an ExtractStream. 61 | /// for i in 1..10 { 62 | /// let message = extract_stream.read().unwrap(); 63 | /// assert_eq!(*message.data().unwrap(), 2 * i); 64 | /// } 65 | /// ``` 66 | pub struct IngestStream 67 | where 68 | for<'a> D: Data + Deserialize<'a>, 69 | { 70 | // The unique ID of the stream (automatically generated by the constructor) 71 | id: StreamId, 72 | // Use a std mutex because the driver doesn't run on the tokio runtime. 73 | write_stream_option: Arc>>>, 74 | } 75 | 76 | impl IngestStream 77 | where 78 | for<'a> D: Data + Deserialize<'a>, 79 | { 80 | /// Returns a new instance of the [`IngestStream`]. 81 | pub fn new() -> Self { 82 | tracing::debug!("Initializing an IngestStream"); 83 | let id = StreamId::new_deterministic(); 84 | let ingest_stream = Self { 85 | id, 86 | write_stream_option: Arc::new(Mutex::new(None)), 87 | }; 88 | 89 | // A hook to initialize the ingest stream's connections to downstream operators. 90 | let write_stream_option_copy = Arc::clone(&ingest_stream.write_stream_option); 91 | 92 | let setup_hook = move |graph: &AbstractGraph, channel_manager: &mut ChannelManager| { 93 | match channel_manager.get_send_endpoints(id) { 94 | Ok(send_endpoints) => { 95 | let write_stream = 96 | WriteStream::new(id, &graph.get_stream_name(&id), send_endpoints); 97 | write_stream_option_copy 98 | .lock() 99 | .unwrap() 100 | .replace(write_stream); 101 | } 102 | Err(msg) => panic!("Unable to set up IngestStream {}: {}", id, msg), 103 | } 104 | }; 105 | 106 | default_graph::add_ingest_stream(&ingest_stream, setup_hook); 107 | default_graph::set_stream_name(&id, &format!("ingest_stream_{}", id)); 108 | 109 | ingest_stream 110 | } 111 | 112 | /// Returns `true` if a top watermark message was received or the [`IngestStream`] failed to 113 | /// set up. 114 | pub fn is_closed(&self) -> bool { 115 | self.write_stream_option 116 | .lock() 117 | .unwrap() 118 | .as_ref() 119 | .map(WriteStream::is_closed) 120 | .unwrap_or(true) 121 | } 122 | 123 | /// Sends data on the stream. 124 | /// 125 | /// # Arguments 126 | /// * `msg` - The message to be sent on the stream. 127 | pub fn send(&mut self, msg: Message) -> Result<(), SendError> { 128 | if !self.is_closed() { 129 | loop { 130 | { 131 | if let Some(write_stream) = self.write_stream_option.lock().unwrap().as_mut() { 132 | let res = write_stream.send(msg); 133 | return res; 134 | } 135 | } 136 | thread::sleep(Duration::from_millis(100)); 137 | } 138 | } else { 139 | tracing::warn!( 140 | "Trying to send messages on a closed IngestStream {} (ID: {})", 141 | default_graph::get_stream_name(&self.id()), 142 | self.id(), 143 | ); 144 | Err(SendError::Closed) 145 | } 146 | } 147 | } 148 | 149 | impl Default for IngestStream 150 | where 151 | for<'a> D: Data + Deserialize<'a>, 152 | { 153 | fn default() -> Self { 154 | Self::new() 155 | } 156 | } 157 | 158 | impl Stream for IngestStream 159 | where 160 | for<'a> D: Data + Deserialize<'a>, 161 | { 162 | fn id(&self) -> StreamId { 163 | self.id 164 | } 165 | } 166 | 167 | impl WriteStreamT for IngestStream 168 | where 169 | for<'a> D: Data + Deserialize<'a>, 170 | { 171 | /// Blocks until write stream is available 172 | fn send(&mut self, msg: Message) -> Result<(), SendError> { 173 | self.send(msg) 174 | } 175 | } 176 | --------------------------------------------------------------------------------