├── .gitignore
├── .travis.yml
├── CHANGELOG.md
├── CONTRIBUTING.md
├── COPYRIGHT
├── Cargo.toml
├── LICENSE
├── README.md
├── bytes
    ├── .gitignore
    ├── Cargo.toml
    └── src
    │   └── lib.rs
├── communication
    ├── .gitignore
    ├── Cargo.toml
    ├── examples
    │   └── comm_hello.rs
    └── src
    │   ├── allocator
    │       ├── canary.rs
    │       ├── counters.rs
    │       ├── generic.rs
    │       ├── mod.rs
    │       ├── process.rs
    │       ├── thread.rs
    │       └── zero_copy
    │       │   ├── allocator.rs
    │       │   ├── allocator_process.rs
    │       │   ├── bytes_exchange.rs
    │       │   ├── bytes_slab.rs
    │       │   ├── initialize.rs
    │       │   ├── mod.rs
    │       │   ├── push_pull.rs
    │       │   └── tcp.rs
    │   ├── buzzer.rs
    │   ├── initialize.rs
    │   ├── lib.rs
    │   ├── logging.rs
    │   ├── message.rs
    │   └── networking.rs
├── kafkaesque
    ├── .gitignore
    ├── Cargo.toml
    └── src
    │   ├── bin
    │       ├── capture_recv.rs
    │       ├── capture_send.rs
    │       └── kafka_source.rs
    │   ├── kafka_source.rs
    │   └── lib.rs
├── logging
    ├── .gitignore
    ├── Cargo.toml
    └── src
    │   └── lib.rs
├── mdbook
    ├── .gitignore
    └── src
    │   ├── SUMMARY.md
    │   ├── chapter_0
    │       ├── chapter_0.md
    │       ├── chapter_0_0.md
    │       ├── chapter_0_1.md
    │       ├── chapter_0_2.md
    │       └── chapter_0_3.md
    │   ├── chapter_1
    │       ├── chapter_1.md
    │       ├── chapter_1_1.md
    │       ├── chapter_1_2.md
    │       └── chapter_1_3.md
    │   ├── chapter_2
    │       ├── chapter_2.md
    │       ├── chapter_2_1.md
    │       ├── chapter_2_2.md
    │       ├── chapter_2_3.md
    │       ├── chapter_2_4.md
    │       └── chapter_2_5.md
    │   ├── chapter_3
    │       ├── chapter_3.md
    │       ├── chapter_3_1.md
    │       ├── chapter_3_2.md
    │       ├── chapter_3_3.md
    │       └── chapter_3_4.md
    │   ├── chapter_4
    │       ├── chapter_4.md
    │       ├── chapter_4_1.md
    │       ├── chapter_4_2.md
    │       ├── chapter_4_3.md
    │       ├── chapter_4_4.md
    │       └── chapter_4_5.md
    │   ├── chapter_5
    │       ├── chapter_5.md
    │       ├── chapter_5_1.md
    │       └── chapter_5_2.md
    │   └── introduction.md
├── sort
    ├── .gitignore
    ├── Cargo.toml
    ├── LICENSE
    ├── README.md
    ├── benches
    │   └── benches.rs
    └── src
    │   ├── batched_vec.rs
    │   ├── bin
    │       └── profile.rs
    │   ├── lib.rs
    │   ├── lsb.rs
    │   ├── lsb_swc.rs
    │   ├── msb.rs
    │   ├── msb_swc.rs
    │   ├── stash.rs
    │   └── swc_buffer.rs
└── timely
    ├── Cargo.toml
    ├── examples
        ├── barrier.rs
        ├── bfs.rs
        ├── capture_recv.rs
        ├── capture_send.rs
        ├── distinct.rs
        ├── event_driven.rs
        ├── exchange.rs
        ├── flow_controlled.rs
        ├── hashjoin.rs
        ├── hello.rs
        ├── logging-recv.rs
        ├── logging-send.rs
        ├── loopdemo.rs
        ├── openloop.rs
        ├── pagerank.rs
        ├── pingpong.rs
        ├── rc.rs
        ├── sequence.rs
        ├── simple.rs
        ├── threadless.rs
        ├── unionfind.rs
        ├── unordered_input.rs
        └── wordcount.rs
    ├── src
        ├── dataflow
        │   ├── channels
        │   │   ├── mod.rs
        │   │   ├── pact.rs
        │   │   ├── pullers
        │   │   │   ├── counter.rs
        │   │   │   └── mod.rs
        │   │   └── pushers
        │   │   │   ├── buffer.rs
        │   │   │   ├── counter.rs
        │   │   │   ├── exchange.rs
        │   │   │   ├── mod.rs
        │   │   │   └── tee.rs
        │   ├── mod.rs
        │   ├── operators
        │   │   ├── aggregation
        │   │   │   ├── aggregate.rs
        │   │   │   ├── mod.rs
        │   │   │   └── state_machine.rs
        │   │   ├── branch.rs
        │   │   ├── broadcast.rs
        │   │   ├── capability.rs
        │   │   ├── capture
        │   │   │   ├── capture.rs
        │   │   │   ├── event.rs
        │   │   │   ├── extract.rs
        │   │   │   ├── mod.rs
        │   │   │   └── replay.rs
        │   │   ├── concat.rs
        │   │   ├── count.rs
        │   │   ├── delay.rs
        │   │   ├── enterleave.rs
        │   │   ├── exchange.rs
        │   │   ├── feedback.rs
        │   │   ├── filter.rs
        │   │   ├── flow_controlled.rs
        │   │   ├── generic
        │   │   │   ├── binary.rs
        │   │   │   ├── builder_raw.rs
        │   │   │   ├── builder_rc.rs
        │   │   │   ├── builder_ref.rs
        │   │   │   ├── handles.rs
        │   │   │   ├── mod.rs
        │   │   │   ├── notificator.rs
        │   │   │   ├── operator.rs
        │   │   │   ├── operator_info.rs
        │   │   │   └── unary.rs
        │   │   ├── input.rs
        │   │   ├── inspect.rs
        │   │   ├── map.rs
        │   │   ├── mod.rs
        │   │   ├── partition.rs
        │   │   ├── probe.rs
        │   │   ├── queue.rs
        │   │   ├── reclock.rs
        │   │   ├── to_stream.rs
        │   │   └── unordered_input.rs
        │   ├── scopes
        │   │   ├── child.rs
        │   │   └── mod.rs
        │   └── stream.rs
        ├── execute.rs
        ├── lib.rs
        ├── logging.rs
        ├── order.rs
        ├── progress
        │   ├── broadcast.rs
        │   ├── change_batch.rs
        │   ├── frontier.rs
        │   ├── mod.rs
        │   ├── operate.rs
        │   ├── reachability.rs
        │   ├── subgraph.rs
        │   └── timestamp.rs
        ├── scheduling
        │   ├── activate.rs
        │   └── mod.rs
        ├── synchronization
        │   ├── barrier.rs
        │   ├── mod.rs
        │   └── sequence.rs
        └── worker.rs
    └── tests
        ├── barrier.rs
        └── skeptic.rs


/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /.vscode
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | dist: xenial
 2 | language: rust
 3 | rust:
 4 | - stable
 5 | install:
 6 | - pip install --user ghp-import
 7 | script:
 8 | - cargo build
 9 | # rustdoc doesn't build dependencies, so it needs to run after `cargo build`,
10 | # but its dependency search gets confused if there are multiple copies of any
11 | # dependency in target/debug/deps, so it needs to run before `cargo test` et al.
12 | # clutter target/debug/deps with multiple copies of things.
13 | - for file in $(find mdbook -name '*.md'); do rustdoc --test $file  -L ./target/debug/deps; done
14 | - cargo test
15 | - cargo bench
16 | - cargo doc
17 | after_success: |
18 |   [ $TRAVIS_BRANCH = master ] &&
19 |   [ $TRAVIS_PULL_REQUEST = false ] &&
20 |   cargo install mdbook &&
21 |   (cd mdbook; mdbook build) &&
22 |   ghp-import -n mdbook/book &&
23 |   git push -fq https://${GH_TOKEN}@github.com/${TRAVIS_REPO_SLUG}.git gh-pages
24 | env:
25 |   global:
26 |   - secure: CzDrbiXWbvgjqZn1z5aFcttW90tWT4LpEc9bSJ/Wb0i4QvABAaOefw3Griu9MT9vNvbyM1ysk8fzXMHXF1ivhUZFzV4MRr+nrKg4LTg/upDAVAEOjWdrFUJOp17a+PVS3K83zrVxeZMkmQe4ACB3a9hsHilKp635xgizSJWQ6Wo=
27 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | Thank you for your interest in contributing!
2 | 
3 | Here is some legal stuff that will make you regret clicking on this link.
4 | 
5 | By submitting a pull request for this project, you are agreeing to license your contribution under the terms of the project's LICENSE file at the time of your submission (in case it changes or something). You are also certifying that you are in a position to make this agreement, in that you didn't nick your code from someone else, or some project with conflicting licensing requirements.
6 | 
7 | If you would like to put explicit copyright notices somewhere, please leave them in the repository's COPYRIGHT file rather than in each file.
8 | 


--------------------------------------------------------------------------------
/COPYRIGHT:
--------------------------------------------------------------------------------
1 | Contributions by Andrea Lattuada <andreal@student.ethz.ch> are Copyright (c) 2016 Andrea Lattuada, ETH Zürich.
2 | Contributions by Moritz Hoffmann <moritz.hoffmann@inf.ethz.ch> are Copyright (c) 2017 Moritz Hoffmann, ETH Zürich.
3 | 


--------------------------------------------------------------------------------
/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [workspace]
 2 | members = [
 3 |     "bytes",
 4 |     "communication",
 5 |     "kafkaesque",
 6 |     "logging",
 7 |     "sort",
 8 |     "timely",
 9 | ]
10 | 
11 | [profile.release]
12 | opt-level = 3
13 | debug = true
14 | rpath = false
15 | lto = true
16 | debug-assertions = false
17 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2014 Frank McSherry
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/bytes/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /.vscode
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/bytes/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "timely_bytes"
 3 | version = "0.10.0"
 4 | authors = ["Frank McSherry <fmcsherry@me.com>"]
 5 | edition = "2018"
 6 | 
 7 | description = "Disjoint mutable byte slices from a common allocation"
 8 | 
 9 | documentation = "https://docs.rs/timely/"
10 | homepage = "https://github.com/TimelyDataflow/timely-dataflow"
11 | repository = "https://github.com/TimelyDataflow/timely-dataflow.git"
12 | keywords = ["timely", "dataflow", "bytes"]
13 | license = "MIT"


--------------------------------------------------------------------------------
/communication/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/communication/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "timely_communication"
 3 | version = "0.10.0"
 4 | authors = ["Frank McSherry <fmcsherry@me.com>"]
 5 | description = "Communication layer for timely dataflow"
 6 | edition = "2018"
 7 | 
 8 | # These URLs point to more information about the repository
 9 | 
10 | documentation = "https://docs.rs/timely/"
11 | homepage = "https://github.com/TimelyDataflow/timely-dataflow"
12 | repository = "https://github.com/TimelyDataflow/timely-dataflow.git"
13 | keywords = ["timely", "dataflow"]
14 | license = "MIT"
15 | 
16 | [features]
17 | default = ["getopts"]
18 | 
19 | [dependencies]
20 | getopts = { version = "0.2.14", optional = true}
21 | bincode = { version = "1.0", optional = true }
22 | serde_derive = "1.0"
23 | serde = "1.0"
24 | abomonation = "0.7"
25 | abomonation_derive = "0.4"
26 | timely_bytes = { path = "../bytes", version = "0.10" }
27 | timely_logging = { path = "../logging", version = "0.10" }
28 | 


--------------------------------------------------------------------------------
/communication/examples/comm_hello.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely_communication;
 2 | 
 3 | use std::ops::Deref;
 4 | use timely_communication::{Message, Allocate};
 5 | 
 6 | fn main() {
 7 | 
 8 |     // extract the configuration from user-supplied arguments, initialize the computation.
 9 |     let config = timely_communication::Configuration::from_args(std::env::args()).unwrap();
10 |     let guards = timely_communication::initialize(config, |mut allocator| {
11 | 
12 |         println!("worker {} of {} started", allocator.index(), allocator.peers());
13 | 
14 |         // allocates pair of senders list and one receiver.
15 |         let (mut senders, mut receiver) = allocator.allocate(0);
16 | 
17 |         // send typed data along each channel
18 |         for i in 0 .. allocator.peers() {
19 |             senders[i].send(Message::from_typed(format!("hello, {}", i)));
20 |             senders[i].done();
21 |         }
22 | 
23 |         // no support for termination notification,
24 |         // we have to count down ourselves.
25 |         let mut received = 0;
26 |         while received < allocator.peers() {
27 | 
28 |             allocator.receive();
29 | 
30 |             if let Some(message) = receiver.recv() {
31 |                 println!("worker {}: received: <{}>", allocator.index(), message.deref());
32 |                 received += 1;
33 |             }
34 | 
35 |             allocator.release();
36 |         }
37 | 
38 |         allocator.index()
39 |     });
40 | 
41 |     // computation runs until guards are joined or dropped.
42 |     if let Ok(guards) = guards {
43 |         for guard in guards.join() {
44 |             println!("result: {:?}", guard);
45 |         }
46 |     }
47 |     else { println!("error in computation"); }
48 | }
49 | 


--------------------------------------------------------------------------------
/communication/src/allocator/canary.rs:
--------------------------------------------------------------------------------
 1 | //! A helper struct to report when something has been dropped.
 2 | 
 3 | use std::rc::Rc;
 4 | use std::cell::RefCell;
 5 | 
 6 | /// An opaque type that reports when it is dropped.
 7 | pub struct Canary {
 8 |     index: usize,
 9 |     queue: Rc<RefCell<Vec<usize>>>,
10 | }
11 | 
12 | impl Canary {
13 |     /// Allocates a new drop canary.
14 |     pub fn new(index: usize, queue: Rc<RefCell<Vec<usize>>>) -> Self {
15 |         Canary { index, queue }
16 |     }
17 | }
18 | 
19 | impl Drop for Canary {
20 |     fn drop(&mut self) {
21 |         self.queue.borrow_mut().push(self.index);
22 |     }
23 | }


--------------------------------------------------------------------------------
/communication/src/allocator/counters.rs:
--------------------------------------------------------------------------------
  1 | //! Push and Pull wrappers to maintain counts of messages in channels.
  2 | 
  3 | use std::rc::Rc;
  4 | use std::cell::RefCell;
  5 | use std::collections::VecDeque;
  6 | 
  7 | use crate::{Push, Pull};
  8 | use crate::allocator::Event;
  9 | 
 10 | /// The push half of an intra-thread channel.
 11 | pub struct Pusher<T, P: Push<T>> {
 12 |     index: usize,
 13 |     // count: usize,
 14 |     events: Rc<RefCell<VecDeque<(usize, Event)>>>,
 15 |     pusher: P,
 16 |     phantom: ::std::marker::PhantomData<T>,
 17 | }
 18 | 
 19 | impl<T, P: Push<T>>  Pusher<T, P> {
 20 |     /// Wraps a pusher with a message counter.
 21 |     pub fn new(pusher: P, index: usize, events: Rc<RefCell<VecDeque<(usize, Event)>>>) -> Self {
 22 |         Pusher {
 23 |             index,
 24 |             // count: 0,
 25 |             events,
 26 |             pusher,
 27 |             phantom: ::std::marker::PhantomData,
 28 |         }
 29 |     }
 30 | }
 31 | 
 32 | impl<T, P: Push<T>> Push<T> for Pusher<T, P> {
 33 |     #[inline]
 34 |     fn push(&mut self, element: &mut Option<T>) {
 35 |         // if element.is_none() {
 36 |         //     if self.count != 0 {
 37 |         //         self.events
 38 |         //             .borrow_mut()
 39 |         //             .push_back((self.index, Event::Pushed(self.count)));
 40 |         //         self.count = 0;
 41 |         //     }
 42 |         // }
 43 |         // else {
 44 |         //     self.count += 1;
 45 |         // }
 46 |         // TODO: Version above is less chatty, but can be a bit late in
 47 |         //       moving information along. Better, but needs cooperation.
 48 |         self.events
 49 |             .borrow_mut()
 50 |             .push_back((self.index, Event::Pushed(1)));
 51 | 
 52 |         self.pusher.push(element)
 53 |     }
 54 | }
 55 | 
 56 | use std::sync::mpsc::Sender;
 57 | 
 58 | /// The push half of an intra-thread channel.
 59 | pub struct ArcPusher<T, P: Push<T>> {
 60 |     index: usize,
 61 |     // count: usize,
 62 |     events: Sender<(usize, Event)>,
 63 |     pusher: P,
 64 |     phantom: ::std::marker::PhantomData<T>,
 65 | }
 66 | 
 67 | impl<T, P: Push<T>>  ArcPusher<T, P> {
 68 |     /// Wraps a pusher with a message counter.
 69 |     pub fn new(pusher: P, index: usize, events: Sender<(usize, Event)>) -> Self {
 70 |         ArcPusher {
 71 |             index,
 72 |             // count: 0,
 73 |             events,
 74 |             pusher,
 75 |             phantom: ::std::marker::PhantomData,
 76 |         }
 77 |     }
 78 | }
 79 | 
 80 | impl<T, P: Push<T>> Push<T> for ArcPusher<T, P> {
 81 |     #[inline]
 82 |     fn push(&mut self, element: &mut Option<T>) {
 83 |         // if element.is_none() {
 84 |         //     if self.count != 0 {
 85 |         //         self.events
 86 |         //             .send((self.index, Event::Pushed(self.count)))
 87 |         //             .expect("Failed to send message count");
 88 |         //         self.count = 0;
 89 |         //     }
 90 |         // }
 91 |         // else {
 92 |         //     self.count += 1;
 93 |         // }
 94 |         let _ =
 95 |         self.events
 96 |             .send((self.index, Event::Pushed(1)));
 97 |             // TODO : Perhaps this shouldn't be a fatal error (e.g. in shutdown).
 98 |             // .expect("Failed to send message count");
 99 | 
100 |         self.pusher.push(element)
101 |     }
102 | }
103 | 
104 | /// The pull half of an intra-thread channel.
105 | pub struct Puller<T, P: Pull<T>> {
106 |     index: usize,
107 |     count: usize,
108 |     events: Rc<RefCell<VecDeque<(usize, Event)>>>,
109 |     puller: P,
110 |     phantom: ::std::marker::PhantomData<T>,
111 | }
112 | 
113 | impl<T, P: Pull<T>>  Puller<T, P> {
114 |     /// Wraps a puller with a message counter.
115 |     pub fn new(puller: P, index: usize, events: Rc<RefCell<VecDeque<(usize, Event)>>>) -> Self {
116 |         Puller {
117 |             index,
118 |             count: 0,
119 |             events,
120 |             puller,
121 |             phantom: ::std::marker::PhantomData,
122 |         }
123 |     }
124 | }
125 | impl<T, P: Pull<T>> Pull<T> for Puller<T, P> {
126 |     #[inline]
127 |     fn pull(&mut self) -> &mut Option<T> {
128 |         let result = self.puller.pull();
129 |         if result.is_none() {
130 |             if self.count != 0 {
131 |                 self.events
132 |                     .borrow_mut()
133 |                     .push_back((self.index, Event::Pulled(self.count)));
134 |                 self.count = 0;
135 |             }
136 |         }
137 |         else {
138 |             self.count += 1;
139 |         }
140 | 
141 |         result
142 |     }
143 | }
144 | 


--------------------------------------------------------------------------------
/communication/src/allocator/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Types and traits for the allocation of channels.
  2 | 
  3 | use std::rc::Rc;
  4 | use std::cell::RefCell;
  5 | use std::time::Duration;
  6 | use std::collections::VecDeque;
  7 | 
  8 | pub use self::thread::Thread;
  9 | pub use self::process::Process;
 10 | pub use self::generic::{Generic, GenericBuilder};
 11 | 
 12 | pub mod thread;
 13 | pub mod process;
 14 | pub mod generic;
 15 | 
 16 | pub mod canary;
 17 | pub mod counters;
 18 | 
 19 | pub mod zero_copy;
 20 | 
 21 | use crate::{Data, Push, Pull, Message};
 22 | 
 23 | /// A proto-allocator, which implements `Send` and can be completed with `build`.
 24 | ///
 25 | /// This trait exists because some allocators contain elements that do not implement
 26 | /// the `Send` trait, for example `Rc` wrappers for shared state. As such, what we
 27 | /// actually need to create to initialize a computation are builders, which we can
 28 | /// then move into new threads each of which then construct their actual allocator.
 29 | pub trait AllocateBuilder : Send {
 30 |     /// The type of allocator to be built.
 31 |     type Allocator: Allocate;
 32 |     /// Builds allocator, consumes self.
 33 |     fn build(self) -> Self::Allocator;
 34 | }
 35 | 
 36 | /// A type capable of allocating channels.
 37 | ///
 38 | /// There is some feature creep, in that this contains several convenience methods about the nature
 39 | /// of the allocated channels, and maintenance methods to ensure that they move records around.
 40 | pub trait Allocate {
 41 |     /// The index of the worker out of `(0..self.peers())`.
 42 |     fn index(&self) -> usize;
 43 |     /// The number of workers in the communication group.
 44 |     fn peers(&self) -> usize;
 45 |     /// Constructs several send endpoints and one receive endpoint.
 46 |     fn allocate<T: Data>(&mut self, identifier: usize) -> (Vec<Box<dyn Push<Message<T>>>>, Box<dyn Pull<Message<T>>>);
 47 |     /// A shared queue of communication events with channel identifier.
 48 |     ///
 49 |     /// It is expected that users of the channel allocator will regularly
 50 |     /// drain these events in order to drive their computation. If they
 51 |     /// fail to do so the event queue may become quite large, and turn
 52 |     /// into a performance problem.
 53 |     fn events(&self) -> &Rc<RefCell<VecDeque<(usize, Event)>>>;
 54 | 
 55 |     /// Awaits communication events.
 56 |     ///
 57 |     /// This method may park the current thread, for at most `duration`,
 58 |     /// until new events arrive.
 59 |     /// The method is not guaranteed to wait for any amount of time, but
 60 |     /// good implementations should use this as a hint to park the thread.
 61 |     fn await_events(&self, _duration: Option<Duration>) { }
 62 | 
 63 |     /// Ensure that received messages are surfaced in each channel.
 64 |     ///
 65 |     /// This method should be called to ensure that received messages are
 66 |     /// surfaced in each channel, but failing to call the method does not
 67 |     /// ensure that they are not surfaced.
 68 |     ///
 69 |     /// Generally, this method is the indication that the allocator should
 70 |     /// present messages contained in otherwise scarce resources (for example
 71 |     /// network buffers), under the premise that someone is about to consume
 72 |     /// the messages and release the resources.
 73 |     fn receive(&mut self) { }
 74 | 
 75 |     /// Signal the completion of a batch of reads from channels.
 76 |     ///
 77 |     /// Conventionally, this method signals to the communication fabric
 78 |     /// that the worker is taking a break from reading from channels, and
 79 |     /// the fabric should consider re-acquiring scarce resources. This can
 80 |     /// lead to the fabric performing defensive copies out of un-consumed
 81 |     /// buffers, and can be a performance problem if invoked casually.
 82 |     fn release(&mut self) { }
 83 | 
 84 |     /// Constructs a pipeline channel from the worker to itself.
 85 |     ///
 86 |     /// By default, this method uses the thread-local channel constructor
 87 |     /// based on a shared `VecDeque` which updates the event queue.
 88 |     fn pipeline<T: 'static>(&mut self, identifier: usize) ->
 89 |         (thread::ThreadPusher<Message<T>>,
 90 |          thread::ThreadPuller<Message<T>>)
 91 |     {
 92 |         thread::Thread::new_from(identifier, self.events().clone())
 93 |     }
 94 | }
 95 | 
 96 | /// A communication channel event.
 97 | pub enum Event {
 98 |     /// A number of messages pushed into the channel.
 99 |     Pushed(usize),
100 |     /// A number of messages pulled from the channel.
101 |     Pulled(usize),
102 | }
103 | 


--------------------------------------------------------------------------------
/communication/src/allocator/thread.rs:
--------------------------------------------------------------------------------
  1 | //! Intra-thread communication.
  2 | 
  3 | use std::rc::Rc;
  4 | use std::cell::RefCell;
  5 | use std::time::Duration;
  6 | use std::collections::VecDeque;
  7 | 
  8 | use crate::allocator::{Allocate, AllocateBuilder, Event};
  9 | use crate::allocator::counters::Pusher as CountPusher;
 10 | use crate::allocator::counters::Puller as CountPuller;
 11 | use crate::{Push, Pull, Message};
 12 | 
 13 | /// Builder for single-threaded allocator.
 14 | pub struct ThreadBuilder;
 15 | 
 16 | impl AllocateBuilder for ThreadBuilder {
 17 |     type Allocator = Thread;
 18 |     fn build(self) -> Self::Allocator { Thread::new() }
 19 | }
 20 | 
 21 | 
 22 | /// An allocator for intra-thread communication.
 23 | pub struct Thread {
 24 |     /// Shared counts of messages in channels.
 25 |     events: Rc<RefCell<VecDeque<(usize, Event)>>>,
 26 | }
 27 | 
 28 | impl Allocate for Thread {
 29 |     fn index(&self) -> usize { 0 }
 30 |     fn peers(&self) -> usize { 1 }
 31 |     fn allocate<T: 'static>(&mut self, identifier: usize) -> (Vec<Box<dyn Push<Message<T>>>>, Box<dyn Pull<Message<T>>>) {
 32 |         let (pusher, puller) = Thread::new_from(identifier, self.events.clone());
 33 |         (vec![Box::new(pusher)], Box::new(puller))
 34 |     }
 35 |     fn events(&self) -> &Rc<RefCell<VecDeque<(usize, Event)>>> {
 36 |         &self.events
 37 |     }
 38 |     fn await_events(&self, duration: Option<Duration>) {
 39 |         if self.events.borrow().is_empty() {
 40 |             if let Some(duration) = duration {
 41 |                 std::thread::park_timeout(duration);
 42 |             }
 43 |             else {
 44 |                 std::thread::park();
 45 |             }
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | /// Thread-local counting channel push endpoint.
 51 | pub type ThreadPusher<T> = CountPusher<T, Pusher<T>>;
 52 | /// Thread-local counting channel pull endpoint.
 53 | pub type ThreadPuller<T> = CountPuller<T, Puller<T>>;
 54 | 
 55 | impl Thread {
 56 |     /// Allocates a new thread-local channel allocator.
 57 |     pub fn new() -> Self {
 58 |         Thread {
 59 |             events: Rc::new(RefCell::new(VecDeque::new())),
 60 |         }
 61 |     }
 62 | 
 63 |     /// Creates a new thread-local channel from an identifier and shared counts.
 64 |     pub fn new_from<T: 'static>(identifier: usize, events: Rc<RefCell<VecDeque<(usize, Event)>>>)
 65 |         -> (ThreadPusher<Message<T>>, ThreadPuller<Message<T>>)
 66 |     {
 67 |         let shared = Rc::new(RefCell::new((VecDeque::<Message<T>>::new(), VecDeque::<Message<T>>::new())));
 68 |         let pusher = Pusher { target: shared.clone() };
 69 |         let pusher = CountPusher::new(pusher, identifier, events.clone());
 70 |         let puller = Puller { source: shared, current: None };
 71 |         let puller = CountPuller::new(puller, identifier, events.clone());
 72 |         (pusher, puller)
 73 |     }
 74 | }
 75 | 
 76 | 
 77 | /// The push half of an intra-thread channel.
 78 | pub struct Pusher<T> {
 79 |     target: Rc<RefCell<(VecDeque<T>, VecDeque<T>)>>,
 80 | }
 81 | 
 82 | impl<T> Push<T> for Pusher<T> {
 83 |     #[inline]
 84 |     fn push(&mut self, element: &mut Option<T>) {
 85 |         let mut borrow = self.target.borrow_mut();
 86 |         if let Some(element) = element.take() {
 87 |             borrow.0.push_back(element);
 88 |         }
 89 |         *element = borrow.1.pop_front();
 90 |     }
 91 | }
 92 | 
 93 | /// The pull half of an intra-thread channel.
 94 | pub struct Puller<T> {
 95 |     current: Option<T>,
 96 |     source: Rc<RefCell<(VecDeque<T>, VecDeque<T>)>>,
 97 | }
 98 | 
 99 | impl<T> Pull<T> for Puller<T> {
100 |     #[inline]
101 |     fn pull(&mut self) -> &mut Option<T> {
102 |         let mut borrow = self.source.borrow_mut();
103 |         // if let Some(element) = self.current.take() {
104 |         //     // TODO : Arbitrary constant.
105 |         //     if borrow.1.len() < 16 {
106 |         //         borrow.1.push_back(element);
107 |         //     }
108 |         // }
109 |         self.current = borrow.0.pop_front();
110 |         &mut self.current
111 |     }
112 | }
113 | 


--------------------------------------------------------------------------------
/communication/src/allocator/zero_copy/bytes_slab.rs:
--------------------------------------------------------------------------------
 1 | //! A large binary allocation for writing and sharing.
 2 | 
 3 | use bytes::arc::Bytes;
 4 | 
 5 | /// A large binary allocation for writing and sharing.
 6 | ///
 7 | /// A bytes slab wraps a `Bytes` and maintains a valid (written) length, and supports writing after
 8 | /// this valid length, and extracting `Bytes` up to this valid length. Extracted bytes are enqueued
 9 | /// and checked for uniqueness in order to recycle them (once all shared references are dropped).
10 | pub struct BytesSlab {
11 |     buffer:         Bytes,                      // current working buffer.
12 |     in_progress:    Vec<Option<Bytes>>,         // buffers shared with workers.
13 |     stash:          Vec<Bytes>,                 // reclaimed and resuable buffers.
14 |     shift:          usize,                      // current buffer allocation size.
15 |     valid:          usize,                      // buffer[..valid] are valid bytes.
16 | }
17 | 
18 | impl BytesSlab {
19 |     /// Allocates a new `BytesSlab` with an initial size determined by a shift.
20 |     pub fn new(shift: usize) -> Self {
21 |         BytesSlab {
22 |             buffer: Bytes::from(vec![0u8; 1 << shift].into_boxed_slice()),
23 |             in_progress: Vec::new(),
24 |             stash: Vec::new(),
25 |             shift,
26 |             valid: 0,
27 |         }
28 |     }
29 |     /// The empty region of the slab.
30 |     pub fn empty(&mut self) -> &mut [u8] {
31 |         &mut self.buffer[self.valid..]
32 |     }
33 |     /// The valid region of the slab.
34 |     pub fn valid(&mut self) -> &mut [u8] {
35 |         &mut self.buffer[..self.valid]
36 |     }
37 |     /// Marks the next `bytes` bytes as valid.
38 |     pub fn make_valid(&mut self, bytes: usize) {
39 |         self.valid += bytes;
40 |     }
41 |     /// Extracts the first `bytes` valid bytes.
42 |     pub fn extract(&mut self, bytes: usize) -> Bytes {
43 |         debug_assert!(bytes <= self.valid);
44 |         self.valid -= bytes;
45 |         self.buffer.extract_to(bytes)
46 |     }
47 | 
48 |     /// Ensures that `self.empty().len()` is at least `capacity`.
49 |     ///
50 |     /// This method may retire the current buffer if it does not have enough space, in which case
51 |     /// it will copy any remaining contents into a new buffer. If this would not create enough free
52 |     /// space, the shift is increased until it is sufficient.
53 |     pub fn ensure_capacity(&mut self, capacity: usize) {
54 | 
55 |         if self.empty().len() < capacity {
56 | 
57 |             let mut increased_shift = false;
58 | 
59 |             // Increase allocation if copy would be insufficient.
60 |             while self.valid + capacity > (1 << self.shift) {
61 |                 self.shift += 1;
62 |                 self.stash.clear();         // clear wrongly sized buffers.
63 |                 self.in_progress.clear();   // clear wrongly sized buffers.
64 |                 increased_shift = true;
65 |             }
66 | 
67 |             // Attempt to reclaim shared slices.
68 |             if self.stash.is_empty() {
69 |                 for shared in self.in_progress.iter_mut() {
70 |                     if let Some(mut bytes) = shared.take() {
71 |                         if bytes.try_regenerate::<Box<[u8]>>() {
72 |                             // NOTE: Test should be redundant, but better safe...
73 |                             if bytes.len() == (1 << self.shift) {
74 |                                 self.stash.push(bytes);
75 |                             }
76 |                         }
77 |                         else {
78 |                             *shared = Some(bytes);
79 |                         }
80 |                     }
81 |                 }
82 |                 self.in_progress.retain(|x| x.is_some());
83 |             }
84 | 
85 |             let new_buffer = self.stash.pop().unwrap_or_else(|| Bytes::from(vec![0; 1 << self.shift].into_boxed_slice()));
86 |             let old_buffer = ::std::mem::replace(&mut self.buffer, new_buffer);
87 | 
88 |             self.buffer[.. self.valid].copy_from_slice(&old_buffer[.. self.valid]);
89 |             if !increased_shift {
90 |                 self.in_progress.push(Some(old_buffer));
91 |             }
92 |         }
93 |     }
94 | }


--------------------------------------------------------------------------------
/communication/src/allocator/zero_copy/initialize.rs:
--------------------------------------------------------------------------------
  1 | //! Network initialization.
  2 | 
  3 | use std::sync::Arc;
  4 | // use crate::allocator::Process;
  5 | use crate::allocator::process::ProcessBuilder;
  6 | use crate::networking::create_sockets;
  7 | use super::tcp::{send_loop, recv_loop};
  8 | use super::allocator::{TcpBuilder, new_vector};
  9 | 
 10 | /// Join handles for send and receive threads.
 11 | ///
 12 | /// On drop, the guard joins with each of the threads to ensure that they complete
 13 | /// cleanly and send all necessary data.
 14 | pub struct CommsGuard {
 15 |     send_guards: Vec<::std::thread::JoinHandle<()>>,
 16 |     recv_guards: Vec<::std::thread::JoinHandle<()>>,
 17 | }
 18 | 
 19 | impl Drop for CommsGuard {
 20 |     fn drop(&mut self) {
 21 |         for handle in self.send_guards.drain(..) {
 22 |             handle.join().expect("Send thread panic");
 23 |         }
 24 |         // println!("SEND THREADS JOINED");
 25 |         for handle in self.recv_guards.drain(..) {
 26 |             handle.join().expect("Recv thread panic");
 27 |         }
 28 |         // println!("RECV THREADS JOINED");
 29 |     }
 30 | }
 31 | 
 32 | use crate::logging::{CommunicationSetup, CommunicationEvent};
 33 | use logging_core::Logger;
 34 | 
 35 | /// Initializes network connections
 36 | pub fn initialize_networking(
 37 |     addresses: Vec<String>,
 38 |     my_index: usize,
 39 |     threads: usize,
 40 |     noisy: bool,
 41 |     log_sender: Box<dyn Fn(CommunicationSetup)->Option<Logger<CommunicationEvent, CommunicationSetup>>+Send+Sync>)
 42 | -> ::std::io::Result<(Vec<TcpBuilder<ProcessBuilder>>, CommsGuard)>
 43 | {
 44 |     let log_sender = Arc::new(log_sender);
 45 |     let processes = addresses.len();
 46 | 
 47 |     // one per process (including local, which would be None)
 48 |     let mut results: Vec<Option<::std::net::TcpStream>> =
 49 |         create_sockets(addresses, my_index, noisy)?;
 50 | 
 51 |     let process_allocators = crate::allocator::process::Process::new_vector(threads);
 52 |     let (builders, promises, futures) = new_vector(process_allocators, my_index, processes);
 53 | 
 54 |     let mut promises_iter = promises.into_iter();
 55 |     let mut futures_iter = futures.into_iter();
 56 | 
 57 |     let mut send_guards = Vec::new();
 58 |     let mut recv_guards = Vec::new();
 59 | 
 60 |     // for each process, if a stream exists (i.e. not local) ...
 61 |     for index in 0..results.len() {
 62 | 
 63 |         if let Some(stream) = results[index].take() {
 64 |             // remote process
 65 | 
 66 |             let remote_recv = promises_iter.next().unwrap();
 67 | 
 68 |             {
 69 |                 let log_sender = log_sender.clone();
 70 |                 let stream = stream.try_clone()?;
 71 |                 let join_guard =
 72 |                 ::std::thread::Builder::new()
 73 |                     .name(format!("send thread {}", index))
 74 |                     .spawn(move || {
 75 | 
 76 |                         let logger = log_sender(CommunicationSetup {
 77 |                             process: my_index,
 78 |                             sender: true,
 79 |                             remote: Some(index),
 80 |                         });
 81 | 
 82 |                         send_loop(stream, remote_recv, my_index, index, logger);
 83 |                     })?;
 84 | 
 85 |                 send_guards.push(join_guard);
 86 |             }
 87 | 
 88 |             let remote_send = futures_iter.next().unwrap();
 89 | 
 90 |             {
 91 |                 // let remote_sends = remote_sends.clone();
 92 |                 let log_sender = log_sender.clone();
 93 |                 let stream = stream.try_clone()?;
 94 |                 let join_guard =
 95 |                 ::std::thread::Builder::new()
 96 |                     .name(format!("recv thread {}", index))
 97 |                     .spawn(move || {
 98 |                         let logger = log_sender(CommunicationSetup {
 99 |                             process: my_index,
100 |                             sender: false,
101 |                             remote: Some(index),
102 |                         });
103 |                         recv_loop(stream, remote_send, threads * my_index, my_index, index, logger);
104 |                     })?;
105 | 
106 |                 recv_guards.push(join_guard);
107 |             }
108 | 
109 |         }
110 |     }
111 | 
112 |     Ok((builders, CommsGuard { send_guards, recv_guards }))
113 | }


--------------------------------------------------------------------------------
/communication/src/allocator/zero_copy/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Allocators based on serialized data which avoid copies.
 2 | //!
 3 | //! These allocators are based on `Abomonation` serialization, and its ability to deserialized
 4 | //! typed Rust data in-place. They surface references to data, often ultimately referencing the
 5 | //! raw binary data they initial received.
 6 | 
 7 | pub mod bytes_slab;
 8 | pub mod bytes_exchange;
 9 | pub mod tcp;
10 | pub mod allocator;
11 | pub mod allocator_process;
12 | pub mod initialize;
13 | pub mod push_pull;


--------------------------------------------------------------------------------
/communication/src/allocator/zero_copy/push_pull.rs:
--------------------------------------------------------------------------------
  1 | //! Push and Pull implementations wrapping serialized data.
  2 | 
  3 | use std::rc::Rc;
  4 | use std::cell::RefCell;
  5 | use std::collections::VecDeque;
  6 | 
  7 | use bytes::arc::Bytes;
  8 | 
  9 | use crate::allocator::canary::Canary;
 10 | use crate::networking::MessageHeader;
 11 | 
 12 | use crate::{Data, Push, Pull};
 13 | use crate::allocator::Message;
 14 | 
 15 | use super::bytes_exchange::{BytesPush, SendEndpoint};
 16 | 
 17 | /// An adapter into which one may push elements of type `T`.
 18 | ///
 19 | /// This pusher has a fixed MessageHeader, and access to a SharedByteBuffer which it uses to
 20 | /// acquire buffers for serialization.
 21 | pub struct Pusher<T, P: BytesPush> {
 22 |     header:     MessageHeader,
 23 |     sender:     Rc<RefCell<SendEndpoint<P>>>,
 24 |     phantom:    ::std::marker::PhantomData<T>,
 25 | }
 26 | 
 27 | impl<T, P: BytesPush> Pusher<T, P> {
 28 |     /// Creates a new `Pusher` from a header and shared byte buffer.
 29 |     pub fn new(header: MessageHeader, sender: Rc<RefCell<SendEndpoint<P>>>) -> Pusher<T, P> {
 30 |         Pusher {
 31 |             header:     header,
 32 |             sender:     sender,
 33 |             phantom:    ::std::marker::PhantomData,
 34 |         }
 35 |     }
 36 | }
 37 | 
 38 | impl<T:Data, P: BytesPush> Push<Message<T>> for Pusher<T, P> {
 39 |     #[inline]
 40 |     fn push(&mut self, element: &mut Option<Message<T>>) {
 41 |         if let Some(ref mut element) = *element {
 42 | 
 43 |             // determine byte lengths and build header.
 44 |             let mut header = self.header;
 45 |             self.header.seqno += 1;
 46 |             header.length = element.length_in_bytes();
 47 |             assert!(header.length > 0);
 48 | 
 49 |             // acquire byte buffer and write header, element.
 50 |             let mut borrow = self.sender.borrow_mut();
 51 |             {
 52 |                 let mut bytes = borrow.reserve(header.required_bytes());
 53 |                 assert!(bytes.len() >= header.required_bytes());
 54 |                 let writer = &mut bytes;
 55 |                 header.write_to(writer).expect("failed to write header!");
 56 |                 element.into_bytes(writer);
 57 |             }
 58 |             borrow.make_valid(header.required_bytes());
 59 |         }
 60 |     }
 61 | }
 62 | 
 63 | /// An adapter from which one can pull elements of type `T`.
 64 | ///
 65 | /// This type is very simple, and just consumes owned `Vec<u8>` allocations. It is
 66 | /// not the most efficient thing possible, which would probably instead be something
 67 | /// like the `bytes` crate (../bytes/) which provides an exclusive view of a shared
 68 | /// allocation.
 69 | pub struct Puller<T> {
 70 |     _canary: Canary,
 71 |     current: Option<Message<T>>,
 72 |     receiver: Rc<RefCell<VecDeque<Bytes>>>,    // source of serialized buffers
 73 | }
 74 | 
 75 | impl<T:Data> Puller<T> {
 76 |     /// Creates a new `Puller` instance from a shared queue.
 77 |     pub fn new(receiver: Rc<RefCell<VecDeque<Bytes>>>, _canary: Canary) -> Puller<T> {
 78 |         Puller {
 79 |             _canary,
 80 |             current: None,
 81 |             receiver,
 82 |         }
 83 |     }
 84 | }
 85 | 
 86 | impl<T:Data> Pull<Message<T>> for Puller<T> {
 87 |     #[inline]
 88 |     fn pull(&mut self) -> &mut Option<Message<T>> {
 89 |         self.current =
 90 |         self.receiver
 91 |             .borrow_mut()
 92 |             .pop_front()
 93 |             .map(|bytes| unsafe { Message::from_bytes(bytes) });
 94 | 
 95 |         &mut self.current
 96 |     }
 97 | }
 98 | 
 99 | /// An adapter from which one can pull elements of type `T`.
100 | ///
101 | /// This type is very simple, and just consumes owned `Vec<u8>` allocations. It is
102 | /// not the most efficient thing possible, which would probably instead be something
103 | /// like the `bytes` crate (../bytes/) which provides an exclusive view of a shared
104 | /// allocation.
105 | pub struct PullerInner<T> {
106 |     inner: Box<dyn Pull<Message<T>>>,               // inner pullable (e.g. intra-process typed queue)
107 |     _canary: Canary,
108 |     current: Option<Message<T>>,
109 |     receiver: Rc<RefCell<VecDeque<Bytes>>>,     // source of serialized buffers
110 | }
111 | 
112 | impl<T:Data> PullerInner<T> {
113 |     /// Creates a new `PullerInner` instance from a shared queue.
114 |     pub fn new(inner: Box<dyn Pull<Message<T>>>, receiver: Rc<RefCell<VecDeque<Bytes>>>, _canary: Canary) -> Self {
115 |         PullerInner {
116 |             inner,
117 |             _canary,
118 |             current: None,
119 |             receiver,
120 |         }
121 |     }
122 | }
123 | 
124 | impl<T:Data> Pull<Message<T>> for PullerInner<T> {
125 |     #[inline]
126 |     fn pull(&mut self) -> &mut Option<Message<T>> {
127 | 
128 |         let inner = self.inner.pull();
129 |         if inner.is_some() {
130 |             inner
131 |         }
132 |         else {
133 |             self.current =
134 |             self.receiver
135 |                 .borrow_mut()
136 |                 .pop_front()
137 |                 .map(|bytes| unsafe { Message::from_bytes(bytes) });
138 | 
139 |             &mut self.current
140 |         }
141 |     }
142 | }


--------------------------------------------------------------------------------
/communication/src/buzzer.rs:
--------------------------------------------------------------------------------
 1 | //! A type that can unpark specific threads.
 2 | 
 3 | use std::thread::Thread;
 4 | 
 5 | /// Can unpark a specific thread.
 6 | #[derive(Clone)]
 7 | pub struct Buzzer {
 8 |     thread: Thread,
 9 | }
10 | 
11 | impl Buzzer {
12 |     /// Creates a new buzzer for the current thread.
13 |     pub fn new() -> Self {
14 |         Self {
15 |             thread: std::thread::current()
16 |         }
17 |     }
18 |     /// Unparks the target thread.
19 |     pub fn buzz(&self) {
20 |         self.thread.unpark()
21 |     }
22 | }


--------------------------------------------------------------------------------
/communication/src/logging.rs:
--------------------------------------------------------------------------------
 1 | //! Configuration and events for communication logging.
 2 | 
 3 | /// Configuration information about a communication thread.
 4 | #[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
 5 | pub struct CommunicationSetup {
 6 |     /// True when this is a send thread (or the receive thread).
 7 |     pub sender: bool,
 8 |     /// The process id of the thread.
 9 |     pub process: usize,
10 |     /// The remote process id.
11 |     pub remote: Option<usize>,
12 | }
13 | 
14 | /// Various communication events.
15 | #[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
16 | pub enum CommunicationEvent {
17 |     /// An observed message.
18 |     Message(MessageEvent),
19 |     /// A state transition.
20 |     State(StateEvent),
21 | }
22 | 
23 | /// An observed message.
24 | #[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
25 | pub struct MessageEvent {
26 |     /// true for send event, false for receive event
27 |     pub is_send: bool,
28 |     /// associated message header.
29 |     pub header: crate::networking::MessageHeader,
30 | }
31 | 
32 | /// Starting or stopping communication threads.
33 | #[derive(Abomonation, Debug, PartialEq, Eq, Hash, Clone, Copy)]
34 | pub struct StateEvent {
35 |     /// Is the thread a send (vs a recv) thread.
36 |     pub send: bool,
37 |     /// The host process id.
38 |     pub process: usize,
39 |     /// The remote process id.
40 |     pub remote: usize,
41 |     /// Is the thread starting or stopping.
42 |     pub start: bool,
43 | }
44 | 
45 | impl From<MessageEvent> for CommunicationEvent {
46 |     fn from(v: MessageEvent) -> CommunicationEvent { CommunicationEvent::Message(v) }
47 | }
48 | impl From<StateEvent> for CommunicationEvent {
49 |     fn from(v: StateEvent) -> CommunicationEvent { CommunicationEvent::State(v) }
50 | }
51 | 


--------------------------------------------------------------------------------
/kafkaesque/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /.vscode
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/kafkaesque/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "kafkaesque"
 3 | version = "0.1.0"
 4 | authors = ["Frank McSherry <fmcsherry@me.com>"]
 5 | edition = "2018"
 6 | 
 7 | [dependencies]
 8 | clap="*"
 9 | abomonation="0.7"
10 | timely = { path = "../timely" }
11 | 
12 | [dependencies.rdkafka]
13 | version = "0.20.0"
14 | 


--------------------------------------------------------------------------------
/kafkaesque/src/bin/capture_recv.rs:
--------------------------------------------------------------------------------
 1 | use timely::dataflow::operators::Inspect;
 2 | use timely::dataflow::operators::capture::Replay;
 3 | use timely::dataflow::operators::Accumulate;
 4 | 
 5 | use rdkafka::config::ClientConfig;
 6 | 
 7 | use kafkaesque::EventConsumer;
 8 | 
 9 | fn main() {
10 |     timely::execute_from_args(std::env::args(), |worker| {
11 | 
12 |         let topic = std::env::args().nth(1).unwrap();
13 |         let source_peers = std::env::args().nth(2).unwrap().parse::<usize>().unwrap();
14 |         let brokers = "localhost:9092";
15 | 
16 |         // Create Kafka stuff.
17 |         let mut consumer_config = ClientConfig::new();
18 |         consumer_config
19 |             .set("produce.offset.report", "true")
20 |             .set("auto.offset.reset", "smallest")
21 |             .set("group.id", "example")
22 |             .set("enable.auto.commit", "false")
23 |             .set("enable.partition.eof", "false")
24 |             .set("auto.offset.reset", "earliest")
25 |             .set("session.timeout.ms", "6000")
26 |             .set("bootstrap.servers", &brokers);
27 | 
28 |         // create replayers from disjoint partition of source worker identifiers.
29 |         let replayers =
30 |         (0 .. source_peers)
31 |             .filter(|i| i % worker.peers() == worker.index())
32 |             .map(|i| {
33 |                 let topic = format!("{}-{:?}", topic, i);
34 |                 EventConsumer::<_,u64>::new(consumer_config.clone(), topic)
35 |             })
36 |             .collect::<Vec<_>>();
37 | 
38 |         worker.dataflow::<u64,_,_>(|scope| {
39 |             replayers
40 |                 .replay_into(scope)
41 |                 .count()
42 |                 .inspect(|x| println!("replayed: {:?}", x))
43 |                 ;
44 |         })
45 |     }).unwrap(); // asserts error-free execution
46 | }
47 | 


--------------------------------------------------------------------------------
/kafkaesque/src/bin/capture_send.rs:
--------------------------------------------------------------------------------
 1 | use timely::dataflow::operators::ToStream;
 2 | use timely::dataflow::operators::capture::Capture;
 3 | 
 4 | use rdkafka::config::ClientConfig;
 5 | 
 6 | use kafkaesque::EventProducer;
 7 | 
 8 | fn main() {
 9 |     timely::execute_from_args(std::env::args(), |worker| {
10 | 
11 |         // target topic name.
12 |         let topic = std::env::args().nth(1).unwrap();
13 |         let count = std::env::args().nth(2).unwrap().parse::<u64>().unwrap();
14 |         let brokers = "localhost:9092";
15 | 
16 |         // Create Kafka stuff.
17 |         let mut producer_config = ClientConfig::new();
18 |         producer_config
19 |             .set("produce.offset.report", "true")
20 |             .set("bootstrap.servers", brokers);
21 | 
22 |         let topic = format!("{}-{:?}", topic, worker.index());
23 |         let producer = EventProducer::new(producer_config, topic);
24 | 
25 |         worker.dataflow::<u64,_,_>(|scope|
26 |             (0 .. count)
27 |                 .to_stream(scope)
28 |                 .capture_into(producer)
29 |         );
30 |     }).unwrap();
31 | }
32 | 


--------------------------------------------------------------------------------
/kafkaesque/src/bin/kafka_source.rs:
--------------------------------------------------------------------------------
 1 | use timely::dataflow::operators::Inspect;
 2 | 
 3 | use rdkafka::config::ClientConfig;
 4 | use rdkafka::consumer::{Consumer, BaseConsumer, DefaultConsumerContext};
 5 | 
 6 | fn main() {
 7 | 
 8 |     let mut args = ::std::env::args();
 9 |     args.next();
10 | 
11 |     // Extract Kafka topic.
12 |     let topic = args.next().expect("Must specify a Kafka topic");
13 |     let brokers = "localhost:9092";
14 | 
15 |     // Create Kafka consumer configuration.
16 |     // Feel free to change parameters here.
17 |     let mut consumer_config = ClientConfig::new();
18 |     consumer_config
19 |         .set("produce.offset.report", "true")
20 |         .set("auto.offset.reset", "smallest")
21 |         .set("group.id", "example")
22 |         .set("enable.auto.commit", "false")
23 |         .set("enable.partition.eof", "false")
24 |         .set("auto.offset.reset", "earliest")
25 |         .set("session.timeout.ms", "6000")
26 |         .set("bootstrap.servers", &brokers);
27 | 
28 |     timely::execute_from_args(args, move |worker| {
29 | 
30 |         // A dataflow for producing spans.
31 |         worker.dataflow::<u64,_,_>(|scope| {
32 | 
33 |             // Create a Kafka consumer.
34 |             let consumer : BaseConsumer<DefaultConsumerContext> = consumer_config.create().expect("Couldn't create consumer");
35 |             consumer.subscribe(&[&topic]).expect("Failed to subscribe to topic");
36 | 
37 |             let strings =
38 |             kafkaesque::source(scope, "KafkaStringSource", consumer, |bytes, capability, output| {
39 | 
40 |                 // If the bytes are utf8, convert to string and send.
41 |                 if let Ok(text) = std::str::from_utf8(bytes) {
42 |                     output
43 |                         .session(capability)
44 |                         .give(text.to_string());
45 |                 }
46 | 
47 |                 // We need some rule to advance timestamps ...
48 |                 let time = *capability.time();
49 |                 capability.downgrade(&(time + 1));
50 | 
51 |                 // Indicate that we are not yet done.
52 |                 false
53 |             });
54 | 
55 |             strings.inspect(|x| println!("Observed: {:?}", x));
56 | 
57 |         });
58 | 
59 |     }).expect("Timely computation failed somehow");
60 | 
61 |     println!("Hello, world!");
62 | }
63 | 
64 | 


--------------------------------------------------------------------------------
/kafkaesque/src/lib.rs:
--------------------------------------------------------------------------------
  1 | use std::sync::Arc;
  2 | use std::sync::atomic::{AtomicIsize, Ordering};
  3 | 
  4 | use abomonation::Abomonation;
  5 | use timely::dataflow::operators::capture::event::{Event, EventPusher, EventIterator};
  6 | 
  7 | use rdkafka::Message;
  8 | use rdkafka::client::ClientContext;
  9 | use rdkafka::config::ClientConfig;
 10 | use rdkafka::producer::{BaseProducer, BaseRecord, ProducerContext, DeliveryResult};
 11 | use rdkafka::consumer::{Consumer, BaseConsumer, DefaultConsumerContext};
 12 | 
 13 | use rdkafka::config::FromClientConfigAndContext;
 14 | 
 15 | pub mod kafka_source;
 16 | pub use kafka_source::kafka_source as source;
 17 | 
 18 | struct OutstandingCounterContext {
 19 |     outstanding: Arc<AtomicIsize>,
 20 | }
 21 | 
 22 | impl ClientContext for OutstandingCounterContext { }
 23 | 
 24 | impl ProducerContext for OutstandingCounterContext {
 25 |     type DeliveryOpaque = ();
 26 |     fn delivery(&self, _report: &DeliveryResult, _: Self::DeliveryOpaque) {
 27 |         self.outstanding.fetch_sub(1, Ordering::SeqCst);
 28 |     }
 29 | }
 30 | 
 31 | impl OutstandingCounterContext {
 32 |     pub fn new(counter: &Arc<AtomicIsize>) -> Self {
 33 |         OutstandingCounterContext {
 34 |             outstanding: counter.clone()
 35 |         }
 36 |     }
 37 | }
 38 | 
 39 | /// A wrapper for `W: Write` implementing `EventPusher<T, D>`.
 40 | pub struct EventProducer<T, D> {
 41 |     topic: String,
 42 |     buffer: Vec<u8>,
 43 |     producer: BaseProducer<OutstandingCounterContext>,
 44 |     counter: Arc<AtomicIsize>,
 45 |     phant: ::std::marker::PhantomData<(T,D)>,
 46 | }
 47 | 
 48 | impl<T, D> EventProducer<T, D> {
 49 |     /// Allocates a new `EventWriter` wrapping a supplied writer.
 50 |     pub fn new(config: ClientConfig, topic: String) -> Self {
 51 |         let counter = Arc::new(AtomicIsize::new(0));
 52 |         let context = OutstandingCounterContext::new(&counter);
 53 |         let producer = BaseProducer::<OutstandingCounterContext>::from_config_and_context(&config, context).expect("Couldn't create producer");
 54 |         println!("allocating producer for topic {:?}", topic);
 55 |         EventProducer {
 56 |             topic: topic,
 57 |             buffer: vec![],
 58 |             producer: producer,
 59 |             counter: counter,
 60 |             phant: ::std::marker::PhantomData,
 61 |         }
 62 |     }
 63 | }
 64 | 
 65 | impl<T: Abomonation, D: Abomonation> EventPusher<T, D> for EventProducer<T, D> {
 66 |     fn push(&mut self, event: Event<T, D>) {
 67 |         unsafe { ::abomonation::encode(&event, &mut self.buffer).expect("Encode failure"); }
 68 |         // println!("sending {:?} bytes", self.buffer.len());
 69 |         self.producer.send::<(),[u8]>(BaseRecord::to(self.topic.as_str()).payload(&self.buffer[..])).unwrap();
 70 |         self.counter.fetch_add(1, Ordering::SeqCst);
 71 |         self.producer.poll(std::time::Duration::from_millis(0));
 72 |         self.buffer.clear();
 73 |     }
 74 | }
 75 | 
 76 | impl<T, D> Drop for EventProducer<T, D> {
 77 |     fn drop(&mut self) {
 78 |         while self.counter.load(Ordering::SeqCst) > 0 {
 79 |             self.producer.poll(std::time::Duration::from_millis(10));
 80 |         }
 81 |     }
 82 | }
 83 | 
 84 | /// A Wrapper for `R: Read` implementing `EventIterator<T, D>`.
 85 | pub struct EventConsumer<T, D> {
 86 |     consumer: BaseConsumer<DefaultConsumerContext>,
 87 |     buffer: Vec<u8>,
 88 |     phant: ::std::marker::PhantomData<(T,D)>,
 89 | }
 90 | 
 91 | impl<T, D> EventConsumer<T, D> {
 92 |     /// Allocates a new `EventReader` wrapping a supplied reader.
 93 |     pub fn new(config: ClientConfig, topic: String) -> Self {
 94 |         println!("allocating consumer for topic {:?}", topic);
 95 |         let consumer : BaseConsumer<DefaultConsumerContext> = config.create().expect("Couldn't create consumer");
 96 |         consumer.subscribe(&[&topic]).expect("Failed to subscribe to topic");
 97 |         EventConsumer {
 98 |             consumer: consumer,
 99 |             buffer: Vec::new(),
100 |             phant: ::std::marker::PhantomData,
101 |         }
102 |     }
103 | }
104 | 
105 | impl<T: Abomonation, D: Abomonation> EventIterator<T, D> for EventConsumer<T, D> {
106 |     fn next(&mut self) -> Option<&Event<T, D>> {
107 |         if let Some(result) = self.consumer.poll(std::time::Duration::from_millis(0)) {
108 |             match result {
109 |                 Ok(message) =>  {
110 |                     self.buffer.clear();
111 |                     self.buffer.extend_from_slice(message.payload().unwrap());
112 |                     Some(unsafe { ::abomonation::decode::<Event<T,D>>(&mut self.buffer[..]).unwrap().0 })
113 |                 },
114 |                 Err(err) => {
115 |                     println!("KafkaConsumer error: {:?}", err);
116 |                     None
117 |                 },
118 |             }
119 |         }
120 |         else { None }
121 |     }
122 | }
123 | 


--------------------------------------------------------------------------------
/logging/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | /.vscode
3 | Cargo.lock
4 | 


--------------------------------------------------------------------------------
/logging/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "timely_logging"
 3 | version = "0.10.0"
 4 | authors = ["Frank McSherry <fmcsherry@me.com>"]
 5 | edition = "2018"
 6 | 
 7 | description = "Common timely logging infrastructure"
 8 | 
 9 | documentation = "https://docs.rs/timely/"
10 | homepage = "https://github.com/TimelyDataflow/timely-dataflow"
11 | repository = "https://github.com/TimelyDataflow/timely-dataflow.git"
12 | keywords = ["timely", "dataflow", "logging"]
13 | license = "MIT"


--------------------------------------------------------------------------------
/mdbook/.gitignore:
--------------------------------------------------------------------------------
1 | book
2 | 


--------------------------------------------------------------------------------
/mdbook/src/SUMMARY.md:
--------------------------------------------------------------------------------
 1 | # Summary
 2 | 
 3 | [Timely Dataflow](./introduction.md)
 4 | 
 5 | - [Motivation](./chapter_0/chapter_0.md)
 6 |     - [A Simplest Example](./chapter_0/chapter_0_0.md)
 7 |     - [A Simple Example](./chapter_0/chapter_0_1.md)
 8 |     - [When to use Timely Dataflow](./chapter_0/chapter_0_2.md)
 9 |     - [When not to use Timely Dataflow](./chapter_0/chapter_0_3.md)
10 | 
11 | - [Core Concepts](./chapter_1/chapter_1.md)
12 |     - [Dataflow](./chapter_1/chapter_1_1.md)
13 |     - [Timestamps](./chapter_1/chapter_1_2.md)
14 |     - [Progress](./chapter_1/chapter_1_3.md)
15 | 
16 | - [Building Timely Dataflows](./chapter_2/chapter_2.md)
17 |     - [Creating Inputs](./chapter_2/chapter_2_1.md)
18 |     - [Observing Outputs](./chapter_2/chapter_2_2.md)
19 |     - [Adding Operators](./chapter_2/chapter_2_3.md)
20 |     - [Creating Operators](./chapter_2/chapter_2_4.md)
21 |     - [A Worked Example](./chapter_2/chapter_2_5.md)
22 | 
23 | - [Running Timely Dataflows](./chapter_3/chapter_3.md)
24 |     - [Providing Input](./chapter_3/chapter_3_1.md)
25 |     - [Monitoring Probes](./chapter_3/chapter_3_2.md)
26 |     - [Operator Execution](./chapter_3/chapter_3_3.md)
27 |     - [Extending Dataflows](./chapter_3/chapter_3_4.md)
28 | 
29 | - [Advanced Timely Dataflow](./chapter_4/chapter_4.md)
30 |     - [Scopes](./chapter_4/chapter_4_1.md)
31 |     - [Iteration](./chapter_4/chapter_4_2.md)
32 |     - [Flow Control](./chapter_4/chapter_4_3.md)
33 |     - [Capture and Replay](./chapter_4/chapter_4_4.md)
34 |     - [Custom Datatypes](./chapter_4/chapter_4_5.md)
35 | 
36 | - [Internals](./chapter_5/chapter_5.md)
37 |     - [Communication](./chapter_5/chapter_5_1.md)
38 |     - [Progress Tracking](./chapter_5/chapter_5_2.md)
39 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_0/chapter_0.md:
--------------------------------------------------------------------------------
1 | # Motivation
2 | 
3 | Let's start with some motivation: what can you do with timely dataflow, and when should you be excited to use it as opposed to other programming frameworks? Is timely dataflow great for everything, or is it only great for a few things? Is it great for anything? We will try and clarify these questions in this section.
4 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_0/chapter_0_0.md:
--------------------------------------------------------------------------------
 1 | ## A simplest example
 2 | 
 3 | Let's start with what may be the simplest non-trivial timely dataflow program.
 4 | 
 5 | ```rust
 6 | extern crate timely;
 7 | 
 8 | use timely::dataflow::operators::{ToStream, Inspect};
 9 | 
10 | fn main() {
11 |     timely::example(|scope| {
12 |         (0..10).to_stream(scope)
13 |                .inspect(|x| println!("seen: {:?}", x));
14 |     });
15 | }
16 | ```
17 | 
18 | This program gives us a bit of a flavor for what a timely dataflow program might look like, including a bit of what Rust looks like, without getting too bogged down in weird stream processing details. Not to worry; we will do that in just a moment!
19 | 
20 | If we run the program up above, we see it print out the numbers zero through nine.
21 | 
22 | ```ignore
23 |     Echidnatron% cargo run --example simple
24 |         Finished dev [unoptimized + debuginfo] target(s) in 0.05s
25 |          Running `target/debug/examples/simple`
26 |     seen: 0
27 |     seen: 1
28 |     seen: 2
29 |     seen: 3
30 |     seen: 4
31 |     seen: 5
32 |     seen: 6
33 |     seen: 7
34 |     seen: 8
35 |     seen: 9
36 |     Echidnatron%
37 | ```
38 | 
39 | This isn't very different from a Rust program that would do this much more simply, namely the program
40 | 
41 | ```rust
42 | fn main() {
43 |     (0..10).for_each(|x| println!("seen: {:?}", x));
44 | }
45 | ```
46 | 
47 | Why would we want to make our life so complicated? The main reason is that we can make our program *reactive*, so that we can run it without knowing ahead of time the data we will use, and it will respond as we produce new data.


--------------------------------------------------------------------------------
/mdbook/src/chapter_0/chapter_0_2.md:
--------------------------------------------------------------------------------
 1 | # When to use Timely Dataflow
 2 | 
 3 | Timely dataflow may be a different programming model than you are used to, but if you can adapt your program to it there are several benefits.
 4 | 
 5 | * **Data Parallelism**: The operators in timely dataflow are largely "data-parallel", meaning they can operate on independent parts of the data concurrently. This allows the underlying system to distribute timely dataflow computations across multiple parallel workers. These can be threads on your computer, or even threads across computers in a cluster you have access to. This distribution typically improves the throughput of the system, and lets you scale to larger problems with access to more resources (computation, communication, and memory).
 6 | 
 7 | * **Streaming Data**: The core data type in timely dataflow is a *stream* of data, an unbounded collection of data not all of which is available right now, but which instead arrives as the computation proceeds. Streams are a helpful generalization of static data sets, which are assumed available at the start of the computation. By expressing your program as a computation on streams, you've explained both how it should respond to static input data sets (feed all the data in at once) but also how it should react to new data that might arrive later on.
 8 | 
 9 | * **Expressivity**: Timely dataflow's main addition over traditional stream processors is its ability to express higher-level control constructs, like iteration. This moves stream computations from the limitations of straight line code to the world of *algorithms*. Many of the advantages of timely dataflow computations come from our ability to express a more intelligent algorithm than the alternative systems, which can only express more primitive computations.
10 | 
11 | There are many domains where streaming and scalability are important, and I'm not sure I can name them all. If you would like to build a scalable monitoring application for a service you run, timely dataflow can express this. If you would like to work with big data computations processing more data than your computer can load into memory, timely dataflow streams can represent this efficiently. If you would like to build an incremental iterative computation over massive data (e.g. matrices, large graphs, text corpora), time dataflow has done these things.
12 | 
13 | At the same time, dataflow computation is also another way of thinking about your program. Much like Rust causes you to think a bit harder about program structure, timely dataflow helps you tease out some structure to your program that results in a more effective computation. Even when writing something like `grep`, a program that scans lines of text looking for patterns, by stating your program as a dataflow computation its implementation immediately scales out to multiple threads, and even across multiple computers.
14 | 
15 | ## Generality
16 | 
17 | Is timely dataflow always applicable? The intent of this research project is to remove layers of abstraction fat that prevent you from expressing anything your computer can do efficiently in parallel.
18 | 
19 | Under the covers, your computer (the one on which you are reading this text) is a dataflow processor. When your computer *reads memory* it doesn't actually wander off to find the memory, it introduces a read request into your memory controller, an independent component that will eventually return with the associated cache line. Your computer then gets back to work on whatever it was doing, hoping the responses from the controller return in a timely fashion.
20 | 
21 | Academically, I treat "my computer can do this, but timely dataflow cannot" as a bug. There are degrees, of course, and timely datalow isn't on par with the processor's custom hardware designed to handle low level requests efficiently, but *algorithmically*, the goal is that anything you can do efficiently with a computer you should be able to express in timely dataflow.
22 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_0/chapter_0_3.md:
--------------------------------------------------------------------------------
 1 | # When not to use Timely Dataflow
 2 | 
 3 | There are several reasons not to use timely dataflow, though many of them are *friction* about how your problem is probably expressed, rather than fundamental technical limitations. There are fundamental technical limitations too, of course.
 4 | 
 5 | I've collected a few examples here, but the list may grow with input and feedback.
 6 | 
 7 | ---
 8 | 
 9 | Timely dataflow is a *dataflow* system, and this means that at its core it likes to move data around. This makes life complicated when you would prefer not to move data, and instead move things like pointers and references to data that otherwise stays put.
10 | 
11 | For example, sorting a slice of data is a fundamental task and one that parallelizes. But, the task of sorting is traditionally viewed as transforming the data in a supplied slice, rather than sending the data to multiple workers and then announcing that it got sorted. The data really does need to end up in one place, one single pre-existing memory allocation, and timely dataflow is not great at problems that cannot be recast as the movement of data.
12 | 
13 | One could re-imagine the sorting process as moving data around, and indeed this is what happens when large clusters need to be brought to bear on such a task, but that doesn't help you at all if what you needed was to sort your single allocation. A library like [Rayon](https://github.com/nikomatsakis/rayon) would almost surely be better suited to the task.
14 | 
15 | ---
16 | 
17 | Dataflow systems are also fundamentally about breaking apart the execution of your program into independently operating parts. However, many programs are correct only because some things happen *before* or *after* other things. A classic example is [depth-first search](https://en.wikipedia.org/wiki/Depth-first_search) in a graph: although there is lots of work to do on small bits of data, it is crucial that the exploration of nodes reachable along a graph edge complete before the exploration of nodes reachable along the next graph edge.
18 | 
19 | Although there is plenty of active research on transforming algorithms from sequential to parallel, if you aren't clear on how to express your program as a dataflow program then timely dataflow may not be a great fit. At the very least, the first step would be "fundamentally re-imagine your program", which can be a fine thing to do, but is perhaps not something you would have to do with your traditional program.
20 | 
21 | ---
22 | 
23 | Timely dataflow is in a bit of a weird space between language library and runtime system. This means that it doesn't quite have the stability guarantees a library might have (when you call `data.sort()` you don't think about "what if it fails?"), nor does it have the surrounding infrastructure of a [DryadLINQ](https://www.microsoft.com/en-us/research/project/dryadlinq/) or [Spark](https://spark.apache.org) style of experience. Part of this burden is simply passed to you, and this may be intolerable depending on your goals for your program.
24 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_1/chapter_1.md:
--------------------------------------------------------------------------------
 1 | # Chapter 1: Core Concepts
 2 | 
 3 | Timely dataflow relies on two fundamental concepts: **timestamps** and **dataflow**, which together lead to the concept of **progress**. We will want to break down these concepts because they play a fundamental role in understanding how timely dataflow programs are structured.
 4 | 
 5 | ## Dataflow
 6 | 
 7 | Dataflow programming is fundamentally about describing your program as independent components, each of which operate in response to the availability of input data, as well as describing the connections between these components.
 8 | 
 9 | The most important part of dataflow programming is the *independence* of the components. When you write a dataflow program, you provide the computer with flexibility in how it executes your program. Rather than insisting on a specific sequence of instructions the computer should follow, the computer can work on each of the components as it sees fit, perhaps even sharing the work with other computers.
10 | 
11 | ## Timestamps
12 | 
13 | While we want to enjoy the benefits of dataflow programming, we still need to understand whether and how our computation progresses. In traditional imperative programming we could reason that because instructions happen in some order, then once we reach a certain point all work (of a certain type) must be done. Instead, we will tag the data that move through our dataflow with *timestamps*, indicating (roughly) when they would have happened in a sequential execution.
14 | 
15 | Timestamps play at least two roles in timely dataflow: they allow dataflow components to make sense of the otherwise unordered inputs they see ("ah, I received the data in *this* order, but I should behave as if it arrived in *this* order"), and they allow the user (and others) to reason about whether they have seen all of the data with a certain timestamp.
16 | 
17 | Timestamps allow us to introduce sequential structure into our program, without requiring actual sequential execution.
18 | 
19 | ## Progress
20 | 
21 | In a traditional imperative program, if we want to return the maximum of a set of numbers, we just scan all the numbers and return the maximum. We don't have to worry about whether we've considered *all* of the numbers yet, because the program makes sure not to provide an answer until it has consulted each number.
22 | 
23 | This simple task is much harder in a dataflow setting, where numbers arrive as input to a component that is tracking the maximum. Before releasing a number as output, the component must know if it has seen everything, as one more value could change its answer. But strictly speaking, nothing we've said so far about dataflow or timestamps provide any information about whether more data might arrive.
24 | 
25 | If we combine dataflow program structure with timestamped data in such a way that as data move along the dataflow their timestamps only increase, we are able to reason about the *progress* of our computation. More specifically, at any component in the dataflow, we can reason about which timestamps we may yet see in the future. Timestamps that are no longer possible are considered "passed", and components can react to this information as they see fit.
26 | 
27 | Continual information about the progress of a computation is the only basis of coordination in timely dataflow, and is the lightest touch we could think of.
28 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_1/chapter_1_2.md:
--------------------------------------------------------------------------------
 1 | # Logical Timestamps
 2 | 
 3 | When dataflow programs move data around arbitrarily, it becomes hard to correlate the produced outputs with the supplied inputs. If we supply a stream of bank transactions as input, and the output is a stream of bank balances, how can we know which input transactions are reflected in which output balances?
 4 | 
 5 | The standard approach to this problem is to install *timestamps* on the data. Each records gets a logical timestamp associated with it that indicates *when* it should be thought to happen. This is not necessarily "when" in terms of the date, time, or specific nanosecond the record was emitted; a timestamp could simply be a sequence number identifying a batch of input records. Or, and we will get in to the terrifying details later, it could be much more complicated than this.
 6 | 
 7 | Timestamps are what allow us to correlate inputs and outputs. When we introduce records with some logical timestamp, unless our dataflow computation changes the timestamps, we expect to see corresponding outputs with that same timestamp.
 8 | 
 9 | ## An example
10 | 
11 | Remember from the dataflow section how when we remove the coordination from our `examples/hello.rs` program, the output was produced in some horrible order? In fact, each of those records had a timestamp associated with it that would reveal the correct order; we just weren't printing the timestamp because `inspect` doesn't have access to it.
12 | 
13 | Let's change the program to print out the timestamp with each record. This shouldn't be very thrilling output, because the timestamp is exactly the same as the number itself, but that didn't have to be the case. We are just going to replace the line
14 | 
15 | ```rust,ignore
16 | .inspect(move |x| println!("worker {}:\thello {}", index, x))
17 | ```
18 | 
19 | with a slightly more complicated operator, `inspect_batch`.
20 | 
21 | ```rust,ignore
22 | .inspect_batch(move |t,xs| {
23 |     for x in xs.iter() {
24 |         println!("worker {}:\thello {} @ {:?}", index, x, t)
25 |     }
26 | })
27 | ```
28 | 
29 | The `inspect_batch` operator gets lower-level access to data in timely dataflow, in particular access to batches of records with the same timestamp. It is intended for diagnosing system-level details, but we can also use it to see what timestamps accompany the data.
30 | 
31 | The output we get with two workers is now:
32 | 
33 | ```ignore
34 |     Echidnatron% cargo run --example hello -- -w2
35 |         Finished dev [unoptimized + debuginfo] target(s) in 0.0 secs
36 |         Running `target/debug/examples/hello -w2`
37 |     worker 1:	hello 1 @ (Root, 1)
38 |     worker 1:	hello 3 @ (Root, 3)
39 |     worker 1:	hello 5 @ (Root, 5)
40 |     worker 0:	hello 0 @ (Root, 0)
41 |     worker 0:	hello 2 @ (Root, 2)
42 |     worker 0:	hello 4 @ (Root, 4)
43 |     worker 0:	hello 6 @ (Root, 6)
44 |     worker 0:	hello 8 @ (Root, 8)
45 |     worker 1:	hello 7 @ (Root, 7)
46 |     worker 1:	hello 9 @ (Root, 9)
47 |     Echidnatron%
48 | ```
49 | 
50 | The timestamps are the `(Root, i)` things for various values of `i`. These happen to correspond to the data themselves, but had we provided random input data rather than `i` itself we would still be able to make sense of the output and put it back "in order".
51 | 
52 | ## Timestamps for dataflow operators
53 | 
54 | Timestamps are not only helpful for dataflow users, but for the operators themselves. With time we will start to write more interesting dataflow operators, and it may be important for them to understand which records should be thought to come before others.
55 | 
56 | Imagine, for example, a dataflow operator whose job is to report the "sum so far", where "so far" should be with respect to the timestamp (as opposed to whatever arbitary order the operator receives the records). Such an operator can't simply take its input records, add them to a total, and produce the result. The input records may no longer be ordered by timestamp, and the produced summations may not reflect any partial sum of the input. Instead, the operator needs to look at the timestamps on the records, and incorporate the numbers in order of their timestamps.
57 | 
58 | Of course, such an operator works great as long as it expects exactly one record for each timestamp. Things get harder for it if it might receive multiple records at each timestamp, or perhaps none. To address this, the underlying system will have to help the operator reason about the progress of its input, up next.
59 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_1/chapter_1_3.md:
--------------------------------------------------------------------------------
 1 | # Tracking Progress
 2 | 
 3 | Both dataflow and timestamps are valuable in their own right, but when we bring them together we get something even better. We get the ability to reason about the flow of timestamps through our computation, and we recover the ability to inform each dataflow component about how much of its input data it has seen.
 4 | 
 5 | Let's recall that bit of code we commented out from `examples/hello.rs`, which had to do with consulting something named `probe`.
 6 | 
 7 | ```rust
 8 | extern crate timely;
 9 | 
10 | use timely::dataflow::InputHandle;
11 | use timely::dataflow::operators::{Input, Exchange, Inspect, Probe};
12 | 
13 | fn main() {
14 |     // initializes and runs a timely dataflow.
15 |     timely::execute_from_args(std::env::args(), |worker| {
16 | 
17 |         let index = worker.index();
18 |         let mut input = InputHandle::new();
19 | 
20 |         // create a new input, exchange data, and inspect its output
21 |         let probe = worker.dataflow(|scope|
22 |             scope.input_from(&mut input)
23 |                  .exchange(|x| *x)
24 |                  .inspect(move |x| println!("worker {}:\thello {}", index, x))
25 |                  .probe()
26 |         );
27 | 
28 |         // introduce data and watch!
29 |         for round in 0..10 {
30 |             if worker.index() == 0 {
31 |                 input.send(round);
32 |             }
33 |             input.advance_to(round + 1);
34 |             worker.step_while(|| probe.less_than(input.time()));
35 |         }
36 |     }).unwrap();
37 | }
38 | ```
39 | 
40 | We'll put the whole program up here, but there are really just two lines that deal with progress tracking:
41 | 
42 | ```rust,ignore
43 | input.advance_to(round + 1);
44 | worker.step_while(|| probe.less_than(input.time()));
45 | ```
46 | 
47 | Let's talk about each of them.
48 | 
49 | ## Input capabilities
50 | 
51 | The `input` structure is how we provide data to a timely dataflow computation, and it has a timestamp associated with it. Initially this timestamp is the default value, usually something like `0` for integers. Whatever timestamp `input` has, it can introduce data with that timestamp or greater. We can advance this timestamp, via the `advance_to` method, which restricts the timestamps we can use to those greater or equal to whatever timestamp is supplied as the argument.
52 | 
53 | The `advance_to` method is a big deal. This is the moment in the computation where our program reveals to the system, and through the system to all other dataflow workers, that we might soon be able to announce a timestamp as complete. There may still be records in flight bearing that timestamp, but as they are retired the system can finally report that progress has been made.
54 | 
55 | ## Output possibilities
56 | 
57 | The `probe` structure is how we learn about the possibility of timestamped data at some point in the dataflow graph. We can, at any point, consult a probe with the `less_than` method and ask whether it is still possible that we might see a time less than the argument at that point in the dataflow graph. There is also a `less_equal` method, if you prefer that.
58 | 
59 | Putting a probe after the `inspect` operator, which passes through all data it receives as input only after invoking its method, tells us whether we should expect to see the method associated with `inspect` fire again for a given timestamp. If we are told we won't see any more messages with timestamp `t` after the `inspect`, then the `inspect` won't see any either.
60 | 
61 | The `less_than` and `less_equal` methods are the only place where we learn about the state of the rest of the system. These methods are non-blocking; they always return immediately with either a "yes, you might see such a timestamp" or a "no, you will not see such a timestamp".
62 | 
63 | ## Responding to progress information
64 | 
65 | Progress information is relatively passive. We get to observe what happens in the rest of the system, and perhaps change our behavior based on the amount of progress. We do not get to tell the system what to do next, we just get to see what has happened since last we checked.
66 | 
67 | This passive approach to coordination allows the system to operate with minimal overhead. Workers exchange both data and progress information. If workers want to wait for further progress before introducing more data they see they are welcome to do so, but they can also go and work on a different part of the dataflow graph as well.
68 | 
69 | Progress information provides a relatively unopinionated view of coordination. Workers are welcome to impose a more synchronous discipline using progress information, perhaps proceeding in sequence through operators by consulting probes installed after each of them, but they are not required to do so. Synchronization is possible, but it becomes a choice made by the workers themselves, rather than imposed on them by the system.
70 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_2/chapter_2.md:
--------------------------------------------------------------------------------
 1 | # Building Timely Dataflows
 2 | 
 3 | Let's talk about how to create timely dataflows.
 4 | 
 5 | This section will be a bit of a tour through the dataflow construction process, ignoring for the moment details about the interesting ways in which you can get data in to and out of your dataflow; those will show up in the "Running Timely Dataflows" section. For now we are going to work with examples with fixed input data and no interactivity to speak of, focusing on what we can cause to happen to that data.
 6 | 
 7 | Here is a relatively simple example, taken from `timely/examples/simple.rs`, that turns the numbers zero through nine into a stream, and then feeds them through an `inspect` operator printing them to the screen.
 8 | 
 9 | ```rust
10 | extern crate timely;
11 | 
12 | use timely::dataflow::operators::{ToStream, Inspect};
13 | 
14 | fn main() {
15 |     timely::example(|scope| {
16 |         (0..10).to_stream(scope)
17 |                .inspect(|x| println!("seen: {:?}", x));
18 |     });
19 | }
20 | ```
21 | 
22 | We are going to develop out this example, showing off both the built-in operators as well as timely's generic operator construction features.
23 | 
24 | ---
25 | 
26 | **NOTE**: Timely very much assumes that you are going to build the same dataflow on each worker. You don't literally have to, in that you could build a dataflow from user input, or with a random number generator, things like that. Please don't! It will not be a good use of your time.
27 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_2/chapter_2_1.md:
--------------------------------------------------------------------------------
 1 | # Creating Inputs
 2 | 
 3 | Let's start with the first thing we'll want for a dataflow computation: a source of data.
 4 | 
 5 | Almost all operators in timely can only be defined from a source of data, with a few exceptions. One of these exceptions is the `to_stream` operator, which is defined for various types and which takes a `scope` as an argument and produces a stream in that scope. Our `InputHandle` type from previous examples has a `to_stream` method, as well as any type that can be turned into an iterator (which we used in the preceding example).
 6 | 
 7 | For example, we can create a new dataflow with one interactive input and one static input:
 8 | 
 9 | ```rust
10 | extern crate timely;
11 | 
12 | use timely::dataflow::InputHandle;
13 | use timely::dataflow::operators::ToStream;
14 | 
15 | fn main() {
16 |     // initializes and runs a timely dataflow.
17 |     timely::execute_from_args(std::env::args(), |worker| {
18 | 
19 |         let mut input = InputHandle::<(), String>::new();
20 | 
21 |         // define a new dataflow
22 |         worker.dataflow(|scope| {
23 | 
24 |             let stream1 = input.to_stream(scope);
25 |             let stream2 = (0 .. 9).to_stream(scope);
26 | 
27 |         });
28 | 
29 |     }).unwrap();
30 | }
31 | ```
32 | 
33 | There will be more to do to get data in to `input`, and we aren't going to worry about that at the moment. But, now you know two of the places you can get data from!
34 | 
35 | ## Other sources
36 | 
37 | There are other sources of input that are a bit more advanced. Once we learn how to create custom operators, the `source` method will allow us to create a custom operator with zero input streams and one output stream, which looks like a source of data (hence the name). There are also the `Capture` and `Replay` traits that allow us to exfiltrate the contents of a stream from one dataflow (using `capture_into`) and re-load it in another dataflow (using `replay_from`).
38 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_2/chapter_2_2.md:
--------------------------------------------------------------------------------
 1 | # Observing Outputs
 2 | 
 3 | Having constructed a minimal streaming computation, we might like to take a peek at the output. There are a few ways to do this, but the simplest by far is the `inspect` operator.
 4 | 
 5 | The `inspect` operator is called with a closure, and it ensures that the closure is run on each record that passes through the operator. This closure can do just about anything, from printing to the screen or writing to a file.
 6 | 
 7 | ```rust
 8 | extern crate timely;
 9 | 
10 | use timely::dataflow::operators::{ToStream, Inspect};
11 | 
12 | fn main() {
13 |     timely::execute_from_args(std::env::args(), |worker| {
14 |         worker.dataflow::<(),_,_>(|scope| {
15 |             (0 .. 9)
16 |                 .to_stream(scope)
17 |                 .inspect(|x| println!("hello: {}", x));
18 |         });
19 |     }).unwrap();
20 | }
21 | ```
22 | 
23 | This simple example turns the sequence zero through nine into a stream and then prints the results to the screen.
24 | 
25 | ## Inspecting Batches
26 | 
27 | The `inspect` operator has a big sibling, `inspect_batch`, whose closure gets access to whole batches of records at a time, just like the underlying operator. More precisely, `inspect_batch` takes a closure of two parameters: first, the timestamp of a batch, and second a reference to the batch itself. The `inspect_batch` operator can be especially helpful if you want to process the outputs more efficiently.
28 | 
29 | ```rust
30 | extern crate timely;
31 | 
32 | use timely::dataflow::operators::{ToStream, Inspect};
33 | 
34 | fn main() {
35 |     timely::execute_from_args(std::env::args(), |worker| {
36 |         worker.dataflow::<(),_,_>(|scope| {
37 |             (0 .. 10)
38 |                 .to_stream(scope)
39 |                 .inspect_batch(|t, xs| println!("hello: {:?} @ {:?}", xs, t));
40 |         });
41 |     }).unwrap();
42 | }
43 | ```
44 | 
45 | ## Capturing Streams
46 | 
47 | The `Capture` trait provides a mechanism for exfiltrating a stream from a dataflow, into information that can be replayed in other dataflows. The trait is pretty general, and can even capture a stream to a binary writer that can be read back from to reconstruct the stream (see `examples/capture_send.rs` and `examples/capture_recv.rs`).
48 | 
49 | The simplest form of capture is the `capture()` method, which turns the stream into a shared queue of "events", which are the sequence of events the operator is exposed to: data arriving and notification of progress through the input stream. The `capture` method is used in many of timely's documentation tests, to extract a stream and verify that it is correct.
50 | 
51 | Consider the documentation test for the `ToStream` trait:
52 | 
53 | ```rust
54 | extern crate timely;
55 | 
56 | use timely::dataflow::operators::{ToStream, Capture};
57 | use timely::dataflow::operators::capture::Extract;
58 | 
59 | fn main() {
60 |     let (data1, data2) = timely::example(|scope| {
61 |         let data1 = (0..3).to_stream(scope).capture();
62 |         let data2 = vec![0,1,2].to_stream(scope).capture();
63 |         (data1, data2)
64 |     });
65 | 
66 |     assert_eq!(data1.extract(), data2.extract());
67 | }
68 | ```
69 | 
70 | Here the two `capture` methods each return the receive side of one of Rust's threadsafe channels. The data moving along the channel have a type `capture::Event<T,D>` which you would need to read about, but which your main thread can drain out of the channel and process as it sees fit.
71 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_3/chapter_3.md:
--------------------------------------------------------------------------------
 1 | # Running Timely Dataflows
 2 | 
 3 | In this section we will look at driving a timely dataflow computation.
 4 | 
 5 | With a dataflow graph defined, how do we interactively supply data to the computation, and how do we understand what the computation has actually done given that we are not ourselves doing it? These are good questions, and the dataflow execution model is indeed a bit of a departure from how most folks first experience programming.
 6 | 
 7 | The first thing to understand about timely dataflow is that *we are programming the worker threads*. Part of this program is asking the worker to build up a dataflow graph; we did that when we created an `InputHandle` and when we called `dataflow` followed by some `filter` and `map` and `probe` commands. But another part is where we actually start feeding the dataflow graph, advancing the inputs, and letting the worker give each of the operators a chance to run.
 8 | 
 9 | ```rust,ignore
10 | for round in 0..10 {
11 |     input.send(round);
12 |     input.advance_to(round + 1);
13 |     while probe.less_than(input.time()) {
14 |         worker.step();
15 |     }
16 | }
17 | ```
18 | 
19 | This is the loop that we've seen in several examples. It looks fairly simple, but this is what actually causes work to happen. We do send data and advance the input, but we also call `worker.step()`, and this is where the actual timely dataflow computation happens. Until you call this, all the data are just building up in queues.
20 | 
21 | In this section, we'll look at these moving parts in more detail.
22 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_3/chapter_3_1.md:
--------------------------------------------------------------------------------
 1 | # Providing Input
 2 | 
 3 | The first thing we often see is `input.send` with some data. This moves the supplied data from the current scope into a queue shared with the input dataflow operator. As this queue starts to fill, perhaps due to you calling `send` a lot, it moves the data along to its intended recipients. This probably means input queues of other operators, but it may mean serialization and network transmission.
 4 | 
 5 | You can call `send` as much as you like, and the `InputHandle` implementation will keep moving the data along. The worst that is going to happen is depositing data in shared queues and perhaps some serialization; the call to `send` will not block, and it should not capture your thread of execution to do any particularly expensive work.
 6 | 
 7 | However, since we are writing the worker code, you may want to take a break from `send` every now and again and let some of the operators run (in just a moment). Part of efficient streaming computation is keeping the data moving, and building up only relatively small buffers before giving the operators a chance to run.
 8 | 
 9 | ## Controlling capabilities
10 | 
11 | The second thing we often see is `input.advance_to` with a time. This is an exciting moment where the input announces that it will no longer send data timestamped with anything not greater or equal to its argument. This is big news for the rest of the system, as any operator waiting on the timestamp you previously held can now get to work (or, once all the messages you sent have drained, it can get to work).
12 | 
13 | It is a logic error to call `advance_to` with a time that is not greater or equal to the current time, which you can read out with `input.time`. Timely will check this for you and panic if you screw it up. It is a bit like accessing an array out of bounds: you can check ahead of time if you are about to screw up, but you went and did it anyhow.
14 | 
15 | Finally, you might be interested to call `input.close`. This method consumes the input and thereby prevents you from sending any more data. This information is *very* exciting to the system, which can now tell dataflow operators that they won't be hearing much of anything from you any more.
16 | 
17 | **TIP**: It is very important to keep moving your inputs along if you want your dataflow graph to make progress. One of the most common classes of errors is forgetting to advance an `InputHandle`, and then waiting and waiting and waiting for the cumulative count of records (or whatever) to come out the other end. Timely really wants you to participate and be clear about what you will and will not do in the future.
18 | 
19 | At the same time, timely's progress tracking does work proportional to the number of timestamps you introduce. If you use a new timestamp for every record, timely will flush its buffers a lot, get very angry with you, and probably fall over. To the extent that you can batch inputs, sending many with the same timestamp, the better.
20 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_3/chapter_3_2.md:
--------------------------------------------------------------------------------
 1 | # Monitoring Probes
 2 | 
 3 | On the flip side of inputs we have probes. Probes aren't *outputs* per se, but rather ways for you to monitor progress. They report on the possible timestamps remaining at certain places in the dataflow graph (wherever you attach them).
 4 | 
 5 | The easiest way to create a `ProbeHandle` is by calling `.probe()` on a stream. This attaches a probe to a point in the dataflow, and when you inspect the probe (in just a moment) you'll learn about what might happen at that point.
 6 | 
 7 | You can also create a `ProbeHandle` directly with `ProbeHandle::new()`. Such a probe handle is not very interesting yet, but you can attach a probe handle by calling `.probe_with(&mut handle)` on a stream. This has the cute benefit that you can attach one probe to multiple parts of the dataflow graph, and it will report on the union of their times. If you would like to watch multiple outputs, you could call `.probe()` multiple times, or attach one common handle to each with multiple calls to `.probe_with()`. Both are reasonable, depending on whether you need to distinguish between the multiple locations.
 8 | 
 9 | A probe handle monitors information that timely provides about the availability of timestamps. You can think of it as holding on to a `Vec<Time>`, where any possible future time must be greater or equal to one of the elements in the list.
10 | 
11 | There are a few convenience methods provided, which allow helpful access to the state a probe handle wraps:
12 | 
13 | 1. The `less_than(&Time)` method returns true if a time strictly less than the argument is possible.
14 | 2. The `less_equal(&Time)` method returns true if a time less or equal to the argument is possible.
15 | 3. The `done()` method returns true if no times are possible.
16 | 
17 | Probe handles also have a `with_frontier` method that allows you to provide a closure that can observe the frontier and return arbitrary results. This is a bit of a song and dance, because the frontier is shared mutable state and cannot be trivially handed back up to your code without peril (you would gain a `RefMut` that may cause the system to panic if you do not drop before calling `worker.step()`).
18 | 
19 | The most common thing to do with a probe handle is to check whether we are "caught up" to the input times. The following is a very safe idiom for doing this:
20 | 
21 | ```rust,ignore
22 | probe.less_than(input.time())
23 | ```
24 | 
25 | This checks if there are any times strictly less than what the input is positioned to provide next. If so, it means we could keep doing work and making progress, because we know that the system *could* catch up to `input.time()` as we can't produce anything less than this from `input`.
26 | 
27 | However, you are free to use whatever logic you like. Perhaps you just want to check this test a few times, rather than iterating for as long as it is true (which we commonly do). This would give the dataflow a chance to catch up, but it would start work on the next batch of data anyhow, to keep things moving along. There is a trade-off between overloading the system (if you provide data faster than you can retire it) and underloading it by constantly waiting rather than working.
28 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_3/chapter_3_3.md:
--------------------------------------------------------------------------------
 1 | # Operator Execution
 2 | 
 3 | Perhaps the most important statement in a timely dataflow program:
 4 | 
 5 | ```rust,ignore
 6 | worker.step()
 7 | ```
 8 | 
 9 | This is the method that tells the worker that it is now a good time to schedule each of the operators. If you recall, when designing our dataflow we wrote these operators, each of which were programmed by what they would do when shown their input and output handles. This is where we run that code.
10 | 
11 | The `worker.step()` call is the heart of data processing in timely dataflow. The system will do a swing through each dataflow operator and call in to its closure once. Each operator has the opportunity to drain its input and produce some output, and depending on how they are coded they may do just that.
12 | 
13 | Importantly, this is also where we start moving data around. Until we call `worker.step()` all data are just sitting in queues. The parts of our computation that do clever things like filtering down the data, or projecting out just a few small fields, or pre-aggregating the data before we act on it, these all happen here and don't happen until we call this.
14 | 
15 | Make sure to call `worker.step()` now and again, like you would your parents.
16 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_3/chapter_3_4.md:
--------------------------------------------------------------------------------
1 | # Extending Dataflows
2 | 
3 | This might be surprising to see in the "Running Timely Dataflows" section, but it is worth pointing out. Just because we built one dataflow doesn't mean that we have to stop there. We can run one dataflow for a while, and then create a second dataflow and run it. While we are running those (just one `worker.step()` calls into both) we could create a third and start running that too.
4 | 
5 | The `worker` can track an arbitrary number of dataflows, and will clean up after each of them once it is complete (when no capabilities exist in the dataflow). You are welcome to spin up as many as you like, if there are reasons you might need several in the course of one program.
6 | 
7 | You can also do something fun that we're working on (in progress), which is to map in shared libraries and load the dataflows they define. This gives rise to something like a "timely dataflow cluster" that can accept jobs (in the form of shared libraries) which are installed and run sharing the resources with other dataflows. Of course, if they crash they take down everything, so bear this in mind before getting too excited.
8 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_4/chapter_4.md:
--------------------------------------------------------------------------------
1 | # Advanced Timely Dataflow
2 | 
3 | In this section we will cover some of the more advanced topics in timely dataflow. Most of these topics derive from existing concepts, but build up the complexity a level.
4 | 
5 | Much of what we'll learn about here looks like *control flow*: dataflow patterns that cause our computation to execute in interesting and new ways, other than a straight line of dataflow. These patterns connect back to idioms from sequential imperative data processing, but using progress information to resculpt the movement of data, changing what computation actually occurs.
6 | 


--------------------------------------------------------------------------------
/mdbook/src/chapter_5/chapter_5.md:
--------------------------------------------------------------------------------
1 | # Internals
2 | 


--------------------------------------------------------------------------------
/mdbook/src/introduction.md:
--------------------------------------------------------------------------------
1 | # Timely Dataflow
2 | 
3 | In this book we will work through the motivation and technical details behind [timely dataflow](https://github.com/frankmcsherry/timely-dataflow), which is both a system for implementing distributed streaming computation, and if you look at it right, a way to structure computation generally.
4 | 
5 | Timely dataflow arose from [work at Microsoft Research](https://www.microsoft.com/en-us/research/wp-content/uploads/2013/11/naiad_sosp2013.pdf), where a group of us worked on building scalable, distributed data processing platforms. Our experience was that other systems did not provide both *expressive computation* and *high performance*. Efficient systems would only let you write restricted programs, and expressive systems employed synchronous and otherwise inefficient execution.
6 | 
7 | Our goal was to provide a not-unpleasant experience where you could write sophisticated streaming computations (e.g. with iterative control flow), which nonetheless compile down to systems that execute with only a modicum of overhead and synchronization.
8 | 
9 | 


--------------------------------------------------------------------------------
/sort/.gitignore:
--------------------------------------------------------------------------------
1 | /target
2 | Cargo.lock
3 | 


--------------------------------------------------------------------------------
/sort/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | name = "timely_sort"
 3 | version = "0.1.7"
 4 | authors = ["Frank McSherry <fmcsherry@me.com>"]
 5 | edition = "2018"
 6 | 
 7 | description = "Sorting infrastructure for timely dataflow"
 8 | 
 9 | # These URLs point to more information about the repository
10 | documentation = "https://frankmcsherry.github.com/timely-dataflow"
11 | homepage = "https://github.com/frankmcsherry/timely-dataflow"
12 | repository = "https://github.com/frankmcsherry/timely-dataflow.git"
13 | keywords = ["timely", "dataflow", "radix", "sort"]
14 | license = "MIT"
15 | 
16 | [[bench]]
17 | name = "benches"
18 | harness = false
19 | 
20 | [dependencies]
21 | rand="*"
22 | 
23 | [dev-dependencies]
24 | bencher = "0.1.5"
25 | 


--------------------------------------------------------------------------------
/sort/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Frank McSherry
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/sort/README.md:
--------------------------------------------------------------------------------
 1 | # timely-sort
 2 | An cache-aware implementation of radix sort in Rust.
 3 | 
 4 | Timely sort is a least-significant bit radix sort written in Rust. It is designed to be cache aware, and in particular to minimize the amount of data moved from the L1 cache up the memory hierarchy. Conventional wisdom is that these exchanges are what limit the scaling of sorting on modern multiprocessors.
 5 | 
 6 | There are currently two flavors of radix sorter: A vanilla `LSBRadixSorter` and a version which performs software write-combining, `LSBSWCRadixSorter`. Each maintain internal buffers of the elements `push`ed so far, initially partitioned by the least significant byte, and finish the sorting when you call `finish`. For examples, check out `benches/benches.rs`. At the moment, the documentation is not stellar.
 7 | 
 8 | For performance, consider sorting increasing numbers of random `u32` data. Here we go from `2 << 20` to `2 << 25` elements, using Rust's default `sort()`. You should see numbers like these when you run `cargo bench`:
 9 | 
10 |     test msort_u32_20    ... bench:  19,182,135 ns/iter (+/- 1,022,566) = 218 MB/s
11 |     test msort_u32_21    ... bench:  38,633,682 ns/iter (+/- 1,299,863) = 217 MB/s
12 |     test msort_u32_22    ... bench:  78,986,400 ns/iter (+/- 1,691,552) = 212 MB/s
13 |     test msort_u32_23    ... bench: 161,851,450 ns/iter (+/- 3,195,896) = 207 MB/s
14 |     test msort_u32_24    ... bench: 329,526,599 ns/iter (+/- 5,655,355) = 203 MB/s
15 |     test msort_u32_25    ... bench: 672,753,001 ns/iter (+/- 11,634,847) = 199 MB/s
16 | 
17 | The throughput drops, which makes sense because `msort` is an n log n algorithm.
18 | 
19 | Radix sort doesn't have the same issue, and apparently actually accelerates (don't ask; probably some buffer re-use I'm not getting right):
20 | 
21 |     test rsort_u32_20    ... bench:  19,704,006 ns/iter (+/- 1,643,605) = 212 MB/s
22 |     test rsort_u32_21    ... bench:  34,972,698 ns/iter (+/- 1,925,898) = 239 MB/s
23 |     test rsort_u32_22    ... bench:  63,746,111 ns/iter (+/- 3,033,849) = 263 MB/s
24 |     test rsort_u32_23    ... bench: 120,175,182 ns/iter (+/- 7,227,761) = 279 MB/s
25 |     test rsort_u32_24    ... bench: 232,857,170 ns/iter (+/- 12,383,304) = 288 MB/s
26 |     test rsort_u32_25    ... bench: 458,974,307 ns/iter (+/- 17,849,213) = 292 MB/s
27 | 
28 | When we go to software write-combining, which keeps the heads of each of the buffers on their own cache line, spread across just a few pages to avoid eviction, we get even better numbers:
29 | 
30 |     test rsortswc_u32_20 ... bench:  11,996,260 ns/iter (+/- 1,615,053) = 349 MB/s
31 |     test rsortswc_u32_21 ... bench:  24,176,384 ns/iter (+/- 1,989,330) = 346 MB/s
32 |     test rsortswc_u32_22 ... bench:  48,412,808 ns/iter (+/- 3,983,659) = 346 MB/s
33 |     test rsortswc_u32_23 ... bench:  98,133,997 ns/iter (+/- 7,162,924) = 341 MB/s
34 |     test rsortswc_u32_24 ... bench: 197,561,440 ns/iter (+/- 13,321,125) = 339 MB/s
35 |     test rsortswc_u32_25 ... bench: 395,547,731 ns/iter (+/- 19,570,709) = 339 MB/s
36 | 
37 | The code appears to work for me, but please be warned before using before your special purpose. There is logic to suss out the number of elements that fit on a 64 byte cache line, and the number that fit in a 4k page, but this logic may be horribly broken for untested cases.
38 | 


--------------------------------------------------------------------------------
/sort/src/bin/profile.rs:
--------------------------------------------------------------------------------
  1 | extern crate rand;
  2 | extern crate timely_sort as haeoua;
  3 | 
  4 | use rand::{Rng, SeedableRng};
  5 | use rand::distributions::{Distribution, Standard};
  6 | use rand::rngs::StdRng;
  7 | use haeoua::*;
  8 | 
  9 | fn main() {
 10 | 
 11 |     if ::std::env::args().len() == 4 {
 12 |         let size: usize = ::std::env::args().nth(1).unwrap().parse().unwrap();
 13 |         let iters: usize = ::std::env::args().nth(2).unwrap().parse().unwrap();
 14 |         let threads: usize = ::std::env::args().nth(3).unwrap().parse().unwrap();
 15 | 
 16 |         println!("merge_sort:\t{:?}s", test_threads(threads, move || test_sort_by::<u64>(size, iters)));
 17 |         println!("pdq_sort:\t{:?}s", test_threads(threads, move || test_sort_unstable_by::<u64>(size, iters)));
 18 |         println!("lsb_sort:\t{:?}s", test_threads(threads, move || test_radix::<u64, LSBRadixSorter<_>>(size, iters)));
 19 |         println!("lsb_swc_sort:\t{:?}s", test_threads(threads, move || test_radix::<u64, LSBSWCRadixSorter<_>>(size, iters)));
 20 |         println!("msb_sort:\t{:?}s", test_threads(threads, move || test_radix::<u64, MSBRadixSorter<_>>(size, iters)));
 21 |         println!("msb_swc_sort:\t{:?}s", test_threads(threads, move || test_radix::<u64, MSBSWCRadixSorter<_>>(size, iters)));
 22 |     }
 23 |     else {
 24 |         println!("usage: profile <size> <iterations> <threads>");
 25 |     }
 26 | }
 27 | 
 28 | fn test_threads<L: Fn()+Send+Sync+'static>(threads: usize, logic: L) -> f64 {
 29 | 
 30 |     let timer = ::std::time::Instant::now();
 31 | 
 32 |     let logic = ::std::sync::Arc::new(logic);
 33 |     let mut handles = Vec::new();
 34 |     for _thread in 0 .. threads {
 35 |         let logic = logic.clone();
 36 |         handles.push(::std::thread::spawn(move || (*logic)()));
 37 |     }
 38 |     for handle in handles {
 39 |         handle.join().unwrap();
 40 |     }
 41 | 
 42 |     let elapsed = timer.elapsed();
 43 |     elapsed.as_secs() as f64 + (elapsed.subsec_nanos() as f64) / 1000000000.0
 44 | }
 45 | 
 46 | fn test_radix<T: Ord+Copy+Unsigned, R: RadixSorter<(T,T),T>>(size: usize, iters: usize)
 47 |     where Standard: Distribution<T>
 48 | {
 49 | 
 50 |     let seed = 1 << 24 + 2 << 16 + 3 << 8 + 4;
 51 |     let mut rng: StdRng = SeedableRng::seed_from_u64(seed);
 52 | 
 53 |     let mut sorter = R::new();
 54 |     for _ in 0..size {
 55 |         sorter.push(rng.gen::<(T, T)>(), &|x| x.0);
 56 |     }
 57 |     let mut vector = sorter.finish(&|x| x.0);
 58 |     sorter.sort(&mut vector, &|x| x.1);
 59 | 
 60 |     for _ in 0 .. (iters - 1) {
 61 |         sorter.sort(&mut vector, &|x| x.0);
 62 |         sorter.sort(&mut vector, &|x| x.1);
 63 |     }
 64 | }
 65 | 
 66 | fn test_sort_by<T: Ord>(size: usize, iters: usize)
 67 |     where Standard: Distribution<T>
 68 | {
 69 | 
 70 |     let seed = 1 << 24 + 2 << 16 + 3 << 8 + 4;
 71 |     let mut rng: StdRng = SeedableRng::seed_from_u64(seed);
 72 | 
 73 |     let mut vector = Vec::<(T, T)>::with_capacity(size);
 74 |     for _ in 0..size {
 75 |         vector.push(rng.gen::<(T, T)>());
 76 |     }
 77 | 
 78 |     for _ in 0 .. iters {
 79 |         vector.sort_by(|x,y| x.0.cmp(&y.0));
 80 |         vector.sort_by(|x,y| x.1.cmp(&y.1));
 81 |     }
 82 | }
 83 | 
 84 | fn test_sort_unstable_by<T: Ord>(size: usize, iters: usize)
 85 |     where Standard: Distribution<T>
 86 | {
 87 | 
 88 |     let seed = 1 << 24 + 2 << 16 + 3 << 8 + 4;
 89 |     let mut rng: StdRng = SeedableRng::seed_from_u64(seed);
 90 | 
 91 |     let mut vector = Vec::<(T, T)>::with_capacity(size);
 92 |     for _ in 0..size {
 93 |         vector.push(rng.gen::<(T, T)>());
 94 |     }
 95 | 
 96 |     for _ in 0 .. iters {
 97 |         vector.sort_unstable_by(|x,y| x.0.cmp(&y.0));
 98 |         vector.sort_unstable_by(|x,y| x.1.cmp(&y.1));
 99 |     }
100 | }
101 | 


--------------------------------------------------------------------------------
/sort/src/lib.rs:
--------------------------------------------------------------------------------
 1 | //! Types and traits for radix sorting.
 2 | //!
 3 | //! The current offerings are
 4 | //!
 5 | //! * LSBRadixSorter: A least-significant byte radix sorter.
 6 | //! * MSBRadixSorter: A most-significant byte radix sorter.
 7 | //! * LSBSWCRadixSorter: A least-significant byte radix sorter with software write combining.
 8 | //!
 9 | //! There should probably be a `MSBSWCRadixSorter` in the future, because I like both of those things.
10 | 
11 | mod lsb;
12 | mod lsb_swc;
13 | mod msb;
14 | mod msb_swc;
15 | mod stash;
16 | mod batched_vec;
17 | mod swc_buffer;
18 | 
19 | pub use lsb::Sorter as LSBRadixSorter;
20 | pub use lsb_swc::Sorter as LSBSWCRadixSorter;
21 | pub use msb::Sorter as MSBRadixSorter;
22 | pub use msb_swc::Sorter as MSBSWCRadixSorter;
23 | pub use swc_buffer::SWCBuffer;
24 | 
25 | /// An unsigned integer fit for use as a radix key.
26 | pub trait Unsigned : Ord {
27 |     fn bytes() -> usize;
28 |     fn as_u64(&self) -> u64;
29 | }
30 | 
31 | impl Unsigned for  u8 { #[inline]fn bytes() -> usize { 1 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
32 | impl Unsigned for u16 { #[inline]fn bytes() -> usize { 2 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
33 | impl Unsigned for u32 { #[inline]fn bytes() -> usize { 4 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
34 | impl Unsigned for u64 { #[inline]fn bytes() -> usize { 8 } #[inline] fn as_u64(&self) -> u64 { *self as u64 } }
35 | impl Unsigned for usize { #[inline]fn bytes() -> usize { ::std::mem::size_of::<usize>() } #[inline]fn as_u64(&self) -> u64 { *self as u64 } }
36 | 
37 | /// Functionality provided by a radix sorter.
38 | ///
39 | /// These radix sorters allow you to specify a radix key function with each method call, so that
40 | /// the sorter does not need to reflect this (hard to name) type in its own type. This means you
41 | /// need to take some care and not abuse this.
42 | ///
43 | /// Internally the sorters manage data using blocks `Vec<T>`, and are delighted to consume data
44 | /// in block form (they will re-use the blocks), and will return results as a sequence of blocks
45 | /// such that if they are traversed in-order they are appropriately sorted.
46 | ///
47 | /// Most of the sorters manage a stash of buffers (ideally: 256) that they use when sorting the
48 | /// data. If this stash goes dry the sorters will allocate, but they much prefer to re-use buffers
49 | /// whenever possible, and if having consulted your sorted results you would like to return the
50 | /// buffers (using either `recycle` or `rebalance`) you should! The `rebalance` method allows you
51 | /// to control just how many buffers the sorter is sitting on.
52 | pub trait RadixSorter<T, U: Unsigned> : RadixSorterBase<T> {
53 | 
54 |     /// Pushes a single element using the supplied radix key function.
55 |     fn push<F: Fn(&T)->U>(&mut self, element: T, key: &F);
56 |     /// Pushes a batch of elements using the supplied radix key function.
57 |     fn push_batch<F: Fn(&T)->U>(&mut self, batch: Vec<T>, key: &F);
58 |     /// Pushes a sequence of elements using the supplied radix key function.
59 |     fn extend<F: Fn(&T)->U, I: Iterator<Item=T>>(&mut self, iterator: I, key: &F) {
60 |         for element in iterator {
61 |             self.push(element, key)
62 |         }
63 |     }
64 |     /// Completes the sorting session and returns the sorted results.
65 |     fn finish<F: Fn(&T)->U>(&mut self, key: &F) -> Vec<Vec<T>> {
66 |         let mut result = Vec::new();
67 |         self.finish_into(&mut result, key);
68 |         result
69 |     }
70 |     /// Completes the sorting session and puts the sorted results into `target`.
71 |     fn finish_into<F: Fn(&T)->U>(&mut self, target: &mut Vec<Vec<T>>, key: &F);
72 |     /// Sorts batched data using the supplied radix key function.
73 |     fn sort<F: Fn(&T)->U>(&mut self, batches: &mut Vec<Vec<T>>, key: &F) {
74 |         for batch in batches.drain(..) {
75 |             self.push_batch(batch, key);
76 |         }
77 |         self.finish_into(batches, key);
78 |     }
79 | }
80 | 
81 | /// Functionality independent of the type `U` used to sort.
82 | pub trait RadixSorterBase<T> {
83 |     /// Allocates a new instance of the radix sorter.
84 |     fn new() -> Self;
85 |     /// Provides empty buffers for the radix sorter to use.
86 |     fn recycle(&mut self, buffers: &mut Vec<Vec<T>>) {
87 |         self.rebalance(buffers, usize::max_value());
88 |     }
89 |     /// Provides empty buffers for the radix sorter to use, with the intent that it should own at most `intended`.
90 |     fn rebalance(&mut self, buffers: &mut Vec<Vec<T>>, intended: usize);
91 | }
92 | 


--------------------------------------------------------------------------------
/sort/src/stash.rs:
--------------------------------------------------------------------------------
 1 | pub struct Stash<T> {
 2 |     stashed: Vec<Vec<T>>,
 3 |     default_capacity: usize
 4 | }
 5 | 
 6 | impl<T> Stash<T> {
 7 |     pub fn new(default_capacity: usize) -> Stash<T> {
 8 |         Stash {
 9 |             stashed: Vec::new(),
10 |             default_capacity: default_capacity
11 |         }
12 |     }
13 | 
14 |     pub fn get(&mut self) -> Vec<T> {
15 |         self.stashed.pop().unwrap_or_else(|| Vec::with_capacity(self.default_capacity))
16 |     }
17 | 
18 |     pub fn give(&mut self, vec: Vec<T>) {
19 |         // TODO: determine some discipline for when to keep buffers vs not.
20 |         // if vec.capacity() == self.default_capacity {
21 |         self.stashed.push(vec);
22 |         // }
23 |     }
24 | 
25 |     pub fn rebalance(&mut self, buffers: &mut Vec<Vec<T>>, intended: usize) {
26 |         while self.stashed.len() > intended {
27 |             buffers.push(self.stashed.pop().unwrap());
28 |         }
29 |         while self.stashed.len() < intended && buffers.len() > 0 {
30 |             let mut buffer = buffers.pop().unwrap();
31 |             buffer.clear();
32 |             self.stashed.push(buffer);
33 |         }
34 |     }
35 | }
36 | 


--------------------------------------------------------------------------------
/sort/src/swc_buffer.rs:
--------------------------------------------------------------------------------
 1 | use std::mem::size_of;
 2 | use std::slice;
 3 | 
 4 | use crate::stash::Stash;
 5 | use crate::batched_vec::BatchedVecRef;
 6 | 
 7 | fn per_cache_line<T>() -> usize {
 8 |     std::cmp::max(64 / size_of::<T>(), 4)
 9 | }
10 | 
11 | #[repr(align(64))]
12 | pub struct CacheLine([u8; 64]);
13 | 
14 | pub struct SWCBuffer<T> {
15 |     counts: [u8; 256],
16 |     staged: Vec<CacheLine>,
17 |     phantom: std::marker::PhantomData<T>,
18 | }
19 | 
20 | impl<T> SWCBuffer<T> {
21 |     pub fn new() -> Self {
22 |         let nlines = (256 * per_cache_line::<T>() * size_of::<T>()) / 64;
23 |         let staged = Vec::<CacheLine>::with_capacity(nlines);
24 | 
25 |         // verify cache alignment
26 |         let addr = staged.as_ptr() as usize;
27 |         assert_eq!(addr % 64, 0);
28 | 
29 |         SWCBuffer {
30 |             staged: staged,
31 |             counts: [0u8; 256],
32 |             phantom: std::marker::PhantomData,
33 |         }
34 |     }
35 | 
36 |     fn staged_ptr(&self) -> *const T {
37 |         self.staged.as_ptr() as _
38 |     }
39 | 
40 |     fn staged_mut_ptr(&mut self) -> *mut T {
41 |         self.staged.as_mut_ptr() as _
42 |     }
43 | 
44 |     #[inline]
45 |     pub fn slice(&self, byte: usize) -> &[T] {
46 |         unsafe {
47 |             slice::from_raw_parts(
48 |                 self.staged_ptr().offset(per_cache_line::<T>() as isize * byte as isize),
49 |                 *self.counts.get_unchecked(byte) as usize
50 |             )
51 |         }
52 |     }
53 | 
54 |     #[inline]
55 |     pub fn full(&self, byte: usize) -> bool {
56 |         unsafe { (*self.counts.get_unchecked(byte) as usize) == per_cache_line::<T>() }
57 |     }
58 | 
59 |     #[inline]
60 |     pub fn count(&self, byte: usize) -> usize {
61 |         unsafe { *self.counts.get_unchecked(byte) as usize }
62 |     }
63 | 
64 |     #[inline]
65 |     pub fn push(&mut self, element: T, byte: usize) {
66 |         unsafe {
67 |             let offset = per_cache_line::<T>() as isize * byte as isize + *self.counts.get_unchecked(byte as usize) as isize;
68 |             std::ptr::write(self.staged_mut_ptr().offset(offset), element);
69 |             *self.counts.get_unchecked_mut(byte) += 1;
70 |         }
71 |     }
72 | 
73 |     #[inline]
74 |     pub fn drain_into<'a>(&mut self, byte: usize, batch: &mut BatchedVecRef<'a, T>, stash: &mut Stash<T>) where T: 'a {
75 |         unsafe {
76 |             if *self.counts.get_unchecked(byte) > 0 {
77 |                 batch.push_all(self.slice(byte), stash);
78 |                 *self.counts.get_unchecked_mut(byte) = 0;
79 |             }
80 |         }
81 |     }
82 | 
83 |     #[inline]
84 |     pub fn drain_into_vec(&mut self, byte: usize, batch: &mut Vec<T>, _stash: &mut Stash<T>) {
85 |         unsafe {
86 |             for i in 0 .. *self.counts.get_unchecked(byte) {
87 |                 batch.push(std::ptr::read(self.staged_mut_ptr().offset(per_cache_line::<T>() as isize * byte as isize + i as isize)));
88 |             }
89 |             *self.counts.get_unchecked_mut(byte) = 0;
90 |         }
91 |     }
92 | }
93 | 


--------------------------------------------------------------------------------
/timely/Cargo.toml:
--------------------------------------------------------------------------------
 1 | [package]
 2 | 
 3 | name = "timely"
 4 | version = "0.10.0"
 5 | authors = ["Frank McSherry <fmcsherry@me.com>"]
 6 | readme = "../README.md"
 7 | edition = "2018"
 8 | 
 9 | description = "A low-latency data-parallel dataflow system in Rust"
10 | 
11 | # These URLs point to more information about the repository
12 | documentation = "https://docs.rs/timely/"
13 | homepage = "https://github.com/TimelyDataflow/timely-dataflow"
14 | repository = "https://github.com/TimelyDataflow/timely-dataflow.git"
15 | keywords = ["timely", "dataflow"]
16 | license = "MIT"
17 | 
18 | [features]
19 | bincode= ["timely_communication/bincode"]
20 | 
21 | [dependencies]
22 | serde = "1.0"
23 | serde_derive = "1.0"
24 | abomonation = "0.7.3"
25 | abomonation_derive = "0.3"
26 | timely_bytes = { path = "../bytes", version = "0.10" }
27 | timely_logging = { path = "../logging", version = "0.10" }
28 | timely_communication = { path = "../communication", version = "0.10" }
29 | 
30 | [dev-dependencies]
31 | timely_sort="0.1.6"
32 | rand="0.4"
33 | 


--------------------------------------------------------------------------------
/timely/examples/barrier.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::channels::pact::Pipeline;
 4 | use timely::dataflow::operators::{Feedback, ConnectLoop};
 5 | use timely::dataflow::operators::generic::operator::Operator;
 6 | 
 7 | fn main() {
 8 | 
 9 |     let iterations = std::env::args().nth(1).unwrap().parse::<usize>().unwrap_or(1_000_000);
10 | 
11 |     timely::execute_from_args(std::env::args().skip(2), move |worker| {
12 | 
13 |         worker.dataflow(move |scope| {
14 |             let (handle, stream) = scope.feedback::<usize>(1);
15 |             stream.unary_notify(
16 |                 Pipeline,
17 |                 "Barrier",
18 |                 vec![0],
19 |                 move |_, _, notificator| {
20 |                     while let Some((cap, _count)) = notificator.next() {
21 |                         let time = *cap.time() + 1;
22 |                         if time < iterations {
23 |                             notificator.notify_at(cap.delayed(&time));
24 |                         }
25 |                     }
26 |                 }
27 |             )
28 |             .connect_loop(handle);
29 |         });
30 |     }).unwrap(); // asserts error-free execution;
31 | }
32 | 


--------------------------------------------------------------------------------
/timely/examples/capture_recv.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use std::net::TcpListener;
 4 | use timely::dataflow::operators::Inspect;
 5 | use timely::dataflow::operators::capture::{EventReader, Replay};
 6 | 
 7 | fn main() {
 8 |     timely::execute_from_args(std::env::args(), |worker| {
 9 | 
10 |         let source_peers = std::env::args().nth(1).unwrap().parse::<usize>().unwrap();
11 | 
12 |         // create replayers from disjoint partition of source worker identifiers.
13 |         let replayers =
14 |         (0 .. source_peers)
15 |             .filter(|i| i % worker.peers() == worker.index())
16 |             .map(|i| TcpListener::bind(format!("127.0.0.1:{}", 8000 + i)).unwrap())
17 |             .collect::<Vec<_>>()
18 |             .into_iter()
19 |             .map(|l| l.incoming().next().unwrap().unwrap())
20 |             .map(|r| EventReader::<_,u64,_>::new(r))
21 |             .collect::<Vec<_>>();
22 | 
23 |         worker.dataflow::<u64,_,_>(|scope| {
24 |             replayers
25 |                 .replay_into(scope)
26 |                 .inspect(|x| println!("replayed: {:?}", x));
27 |         })
28 |     }).unwrap(); // asserts error-free execution
29 | }
30 | 


--------------------------------------------------------------------------------
/timely/examples/capture_send.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use std::net::TcpStream;
 4 | use timely::dataflow::operators::ToStream;
 5 | use timely::dataflow::operators::capture::{Capture, EventWriter};
 6 | 
 7 | fn main() {
 8 |     timely::execute_from_args(std::env::args(), |worker| {
 9 | 
10 |         let addr = format!("127.0.0.1:{}", 8000 + worker.index());
11 |         let send = TcpStream::connect(addr).unwrap();
12 | 
13 |         worker.dataflow::<u64,_,_>(|scope|
14 |             (0..10u64)
15 |                 .to_stream(scope)
16 |                 .capture_into(EventWriter::new(send))
17 |         );
18 |     }).unwrap();
19 | }
20 | 


--------------------------------------------------------------------------------
/timely/examples/distinct.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use std::collections::HashMap;
 4 | 
 5 | use timely::dataflow::{InputHandle, ProbeHandle};
 6 | use timely::dataflow::operators::{Input, Inspect, Probe};
 7 | use timely::dataflow::operators::generic::operator::Operator;
 8 | use timely::dataflow::channels::pact::Exchange;
 9 | 
10 | fn main() {
11 |     // initializes and runs a timely dataflow.
12 |     timely::execute_from_args(std::env::args(), |worker| {
13 |         let index = worker.index();
14 |         let mut input = InputHandle::new();
15 |         let mut probe = ProbeHandle::new();
16 | 
17 |         // create a new input, exchange data, and inspect its output
18 |         worker.dataflow::<usize,_,_>(|scope| {
19 |             let mut counts_by_time = HashMap::new();
20 |             scope.input_from(&mut input)
21 |                 .unary(Exchange::new(|x| *x), "Distinct", move |_, _|
22 |                     move |input, output| {
23 |                         input.for_each(|time, data| {
24 |                             let counts =
25 |                             counts_by_time
26 |                                 .entry(time.time().clone())
27 |                                 .or_insert(HashMap::new());
28 |                             let mut session = output.session(&time);
29 |                             for &datum in data.iter() {
30 |                                 let count = counts.entry(datum).or_insert(0);
31 |                                 if *count == 0 {
32 |                                     session.give(datum);
33 |                                 }
34 |                                 *count += 1;
35 |                             }
36 |                         })
37 |                     })
38 |                 .inspect(move |x| println!("worker {}:\tvalue {}", index, x))
39 |                 .probe_with(&mut probe);
40 |         });
41 | 
42 |         // introduce data and watch!
43 |         for round in 0..1 {
44 |             if index == 0 {
45 |                 vec![0, 1, 2, 2, 2, 3, 3, 4].iter().for_each(|x| input.send(*x));
46 |             } else if index == 1 {
47 |                 vec![0, 0, 3, 4, 4, 5, 7, 7].iter().for_each(|x| input.send(*x));
48 |             }
49 |             input.advance_to(round + 1);
50 |             while probe.less_than(input.time()) {
51 |                 worker.step();
52 |             }
53 |         }
54 |     }).unwrap();
55 | }
56 | 


--------------------------------------------------------------------------------
/timely/examples/event_driven.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | // use timely::dataflow::{InputHandle, ProbeHandle};
 4 | use timely::dataflow::operators::{Input, Map, Probe};
 5 | 
 6 | fn main() {
 7 |     // initializes and runs a timely dataflow.
 8 |     timely::execute_from_args(std::env::args(), |worker| {
 9 | 
10 |         let timer = std::time::Instant::now();
11 | 
12 |         let mut args = std::env::args();
13 |         args.next();
14 | 
15 |         let dataflows = args.next().unwrap().parse::<usize>().unwrap();
16 |         let length = args.next().unwrap().parse::<usize>().unwrap();
17 |         let record = args.next() == Some("record".to_string());
18 | 
19 |         let mut inputs = Vec::new();
20 |         let mut probes = Vec::new();
21 | 
22 |         // create a new input, exchange data, and inspect its output
23 |         for _dataflow in 0 .. dataflows {
24 |             worker.dataflow(|scope| {
25 |                 let (input, mut stream) = scope.new_input();
26 |                 for _step in 0 .. length {
27 |                     stream = stream.map(|x: ()| x);
28 |                 }
29 |                 let probe = stream.probe();
30 |                 inputs.push(input);
31 |                 probes.push(probe);
32 |             });
33 |         }
34 | 
35 |         println!("{:?}\tdataflows built ({} x {})", timer.elapsed(), dataflows, length);
36 | 
37 |         for round in 0 .. {
38 |             let dataflow = round % dataflows;
39 |             if record {
40 |                 inputs[dataflow].send(());
41 |             }
42 |             inputs[dataflow].advance_to(round);
43 |             let mut steps = 0;
44 |             while probes[dataflow].less_than(&round) {
45 |                 worker.step();
46 |                 steps += 1;
47 |             }
48 |             println!("{:?}\tround {} complete in {} steps", timer.elapsed(), round, steps);
49 |         }
50 | 
51 |     }).unwrap();
52 | }
53 | 


--------------------------------------------------------------------------------
/timely/examples/exchange.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::InputHandle;
 4 | use timely::dataflow::operators::{Input, Exchange, Probe};
 5 | 
 6 | fn main() {
 7 |     // initializes and runs a timely dataflow.
 8 |     timely::execute_from_args(std::env::args(), |worker| {
 9 | 
10 |         let batch = std::env::args().nth(1).unwrap().parse::<usize>().unwrap();
11 |         let rounds = std::env::args().nth(2).unwrap().parse::<usize>().unwrap();
12 |         let mut input = InputHandle::new();
13 | 
14 |         // create a new input, exchange data, and inspect its output
15 |         let probe = worker.dataflow(|scope|
16 |             scope
17 |                 .input_from(&mut input)
18 |                 .exchange(|&x| x as u64)
19 |                 .probe()
20 |         );
21 | 
22 | 
23 |         let timer = std::time::Instant::now();
24 | 
25 |         for round in 0 .. rounds {
26 | 
27 |             for i in 0 .. batch {
28 |                 input.send(i);
29 |             }
30 |             input.advance_to(round);
31 | 
32 |             while probe.less_than(input.time()) {
33 |                 worker.step();
34 |             }
35 | 
36 |         }
37 | 
38 |         let volume = (rounds * batch) as f64;
39 |         let elapsed = timer.elapsed();
40 |         let seconds = elapsed.as_secs() as f64 + (f64::from(elapsed.subsec_nanos())/1000000000.0);
41 | 
42 |         println!("{:?}\tworker {} complete; rate: {:?}", timer.elapsed(), worker.index(), volume / seconds);
43 | 
44 |     }).unwrap();
45 | }
46 | 


--------------------------------------------------------------------------------
/timely/examples/flow_controlled.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::operators::flow_controlled::{iterator_source, IteratorSourceInput};
 4 | use timely::dataflow::operators::{probe, Probe, Inspect};
 5 | 
 6 | fn main() {
 7 |     timely::execute_from_args(std::env::args(), |worker| {
 8 |         let mut input = (0u64..100000).peekable();
 9 |         worker.dataflow(|scope| {
10 |             let mut probe_handle = probe::Handle::new();
11 |             let probe_handle_2 = probe_handle.clone();
12 | 
13 |             let mut next_t: u64 = 0;
14 |             iterator_source(
15 |                 scope,
16 |                 "Source",
17 |                 move |prev_t| {
18 |                     if let Some(first_x) = input.peek().cloned() {
19 |                         next_t = first_x / 100 * 100;
20 |                         Some(IteratorSourceInput {
21 |                             lower_bound: Default::default(),
22 |                             data: vec![
23 |                                 (next_t,
24 |                                  input.by_ref().take(10).map(|x| (/* "timestamp" */ x, x)).collect::<Vec<_>>())],
25 |                             target: *prev_t,
26 |                         })
27 |                     } else {
28 |                         None
29 |                     }
30 |                 },
31 |                 probe_handle_2)
32 |             .inspect_time(|t, d| eprintln!("@ {:?}: {:?}", t, d))
33 |             .probe_with(&mut probe_handle);
34 |         });
35 |     }).unwrap();
36 | }
37 | 


--------------------------------------------------------------------------------
/timely/examples/hashjoin.rs:
--------------------------------------------------------------------------------
  1 | extern crate rand;
  2 | extern crate timely;
  3 | 
  4 | use std::collections::HashMap;
  5 | 
  6 | use rand::{Rng, SeedableRng, StdRng};
  7 | 
  8 | use timely::dataflow::*;
  9 | use timely::dataflow::operators::{Input, Probe};
 10 | use timely::dataflow::operators::generic::Operator;
 11 | use timely::dataflow::channels::pact::Exchange;
 12 | 
 13 | fn main() {
 14 | 
 15 |     // command-line args: numbers of nodes and edges in the random graph.
 16 |     let keys: u64 = std::env::args().nth(1).unwrap().parse().unwrap();
 17 |     let vals: usize = std::env::args().nth(2).unwrap().parse().unwrap();
 18 |     let batch: usize = std::env::args().nth(3).unwrap().parse().unwrap();
 19 | 
 20 |     timely::execute_from_args(std::env::args().skip(4), move |worker| {
 21 | 
 22 |         let index = worker.index();
 23 |         let peers = worker.peers();
 24 | 
 25 |         let mut input1 = InputHandle::new();
 26 |         let mut input2 = InputHandle::new();
 27 |         let mut probe = ProbeHandle::new();
 28 | 
 29 |         worker.dataflow(|scope| {
 30 | 
 31 |             let stream1 = scope.input_from(&mut input1);
 32 |             let stream2 = scope.input_from(&mut input2);
 33 | 
 34 |             let exchange1 = Exchange::new(|x: &(u64, u64)| x.0);
 35 |             let exchange2 = Exchange::new(|x: &(u64, u64)| x.0);
 36 | 
 37 |             stream1
 38 |                 .binary(&stream2, exchange1, exchange2, "HashJoin", |_capability, _info| {
 39 | 
 40 |                     let mut map1 = HashMap::<u64, Vec<u64>>::new();
 41 |                     let mut map2 = HashMap::<u64, Vec<u64>>::new();
 42 | 
 43 |                     let mut vector1 = Vec::new();
 44 |                     let mut vector2 = Vec::new();
 45 | 
 46 |                     move |input1, input2, output| {
 47 | 
 48 |                         // Drain first input, check second map, update first map.
 49 |                         input1.for_each(|time, data| {
 50 |                             data.swap(&mut vector1);
 51 |                             let mut session = output.session(&time);
 52 |                             for (key, val1) in vector1.drain(..) {
 53 |                                 if let Some(values) = map2.get(&key) {
 54 |                                     for val2 in values.iter() {
 55 |                                         session.give((val1.clone(), val2.clone()));
 56 |                                     }
 57 |                                 }
 58 | 
 59 |                                 map1.entry(key).or_insert(Vec::new()).push(val1);
 60 |                             }
 61 |                         });
 62 | 
 63 |                         // Drain second input, check first map, update second map.
 64 |                         input2.for_each(|time, data| {
 65 |                             data.swap(&mut vector2);
 66 |                             let mut session = output.session(&time);
 67 |                             for (key, val2) in vector2.drain(..) {
 68 |                                 if let Some(values) = map1.get(&key) {
 69 |                                     for val1 in values.iter() {
 70 |                                         session.give((val1.clone(), val2.clone()));
 71 |                                     }
 72 |                                 }
 73 | 
 74 |                                 map2.entry(key).or_insert(Vec::new()).push(val2);
 75 |                             }
 76 |                         });
 77 |                     }
 78 |                 })
 79 |                 .probe_with(&mut probe);
 80 |         });
 81 | 
 82 |         let seed: &[_] = &[1, 2, 3, index];
 83 |         let mut rng: StdRng = SeedableRng::from_seed(seed);
 84 | 
 85 |         let timer = std::time::Instant::now();
 86 | 
 87 |         let mut sent = 0;
 88 |         while sent < (vals / peers) {
 89 | 
 90 |             // Send some amount of data, no more than `batch`.
 91 |             let to_send = std::cmp::min(batch, vals/peers - sent);
 92 |             for _ in 0 .. to_send {
 93 |                 input1.send((rng.gen_range(0, keys), rng.gen_range(0, keys)));
 94 |                 input2.send((rng.gen_range(0, keys), rng.gen_range(0, keys)));
 95 |             }
 96 |             sent += to_send;
 97 | 
 98 |             // Advance input, iterate until data cleared.
 99 |             let next = input1.epoch() + 1;
100 |             input1.advance_to(next);
101 |             input2.advance_to(next);
102 |             while probe.less_than(input1.time()) {
103 |                 worker.step();
104 |             }
105 | 
106 |             println!("{:?}\tworker {} batch complete", timer.elapsed(), index)
107 |         }
108 | 
109 |     }).unwrap(); // asserts error-free execution;
110 | }
111 | 


--------------------------------------------------------------------------------
/timely/examples/hello.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::{InputHandle, ProbeHandle};
 4 | use timely::dataflow::operators::{Input, Exchange, Inspect, Probe};
 5 | 
 6 | fn main() {
 7 |     // initializes and runs a timely dataflow.
 8 |     timely::execute_from_args(std::env::args(), |worker| {
 9 | 
10 |         let index = worker.index();
11 |         let mut input = InputHandle::new();
12 |         let mut probe = ProbeHandle::new();
13 | 
14 |         // create a new input, exchange data, and inspect its output
15 |         worker.dataflow(|scope| {
16 |             scope.input_from(&mut input)
17 |                  .exchange(|x| *x)
18 |                  .inspect(move |x| println!("worker {}:\thello {}", index, x))
19 |                  .probe_with(&mut probe);
20 |         });
21 | 
22 |         // introduce data and watch!
23 |         for round in 0..10 {
24 |             if index == 0 {
25 |                 input.send(round);
26 |             }
27 |             input.advance_to(round + 1);
28 |             while probe.less_than(input.time()) {
29 |                 worker.step();
30 |             }
31 |         }
32 |     }).unwrap();
33 | }
34 | 


--------------------------------------------------------------------------------
/timely/examples/logging-recv.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use std::net::TcpListener;
 4 | use std::time::Duration;
 5 | 
 6 | use timely::dataflow::operators::Inspect;
 7 | use timely::dataflow::operators::capture::{EventReader, Replay};
 8 | use timely::logging::{TimelySetup, TimelyEvent};
 9 | 
10 | fn main() {
11 |     timely::execute_from_args(std::env::args(), |worker| {
12 | 
13 |         let source_peers = std::env::args().nth(1).unwrap().parse::<usize>().unwrap();
14 | 
15 |         // create replayers from disjoint partition of source worker identifiers.
16 |         let replayers =
17 |         (0 .. source_peers)
18 |             .filter(|i| i % worker.peers() == worker.index())
19 |             .map(|i| TcpListener::bind(format!("127.0.0.1:{}", 8000 + i)).unwrap())
20 |             .collect::<Vec<_>>()
21 |             .into_iter()
22 |             .map(|l| l.incoming().next().unwrap().unwrap())
23 |             .map(|r| EventReader::<Duration,(Duration,TimelySetup,TimelyEvent),_>::new(r))
24 |             .collect::<Vec<_>>();
25 | 
26 |         worker.dataflow(|scope| {
27 |             replayers
28 |                 .replay_into(scope)
29 |                 .inspect(|x| println!("replayed: {:?}", x));
30 |         })
31 |     }).unwrap(); // asserts error-free execution
32 | }
33 | 


--------------------------------------------------------------------------------
/timely/examples/logging-send.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::{InputHandle, ProbeHandle};
 4 | use timely::dataflow::operators::{Input, Exchange, Probe};
 5 | 
 6 | // use timely::dataflow::operators::capture::EventWriter;
 7 | // use timely::dataflow::ScopeParent;
 8 | use timely::logging::TimelyEvent;
 9 | 
10 | fn main() {
11 |     // initializes and runs a timely dataflow.
12 |     let config = timely::Configuration::from_args(::std::env::args()).unwrap();
13 |     timely::execute(config, |worker| {
14 | 
15 |         let batch = std::env::args().nth(1).unwrap().parse::<usize>().unwrap();
16 |         let rounds = std::env::args().nth(2).unwrap().parse::<usize>().unwrap();
17 |         let mut input = InputHandle::new();
18 |         let mut probe = ProbeHandle::new();
19 | 
20 |         // Register timely worker logging.
21 |         worker.log_register().insert::<TimelyEvent,_>("timely", |_time, data|
22 |             data.iter().for_each(|x| println!("LOG1: {:?}", x))
23 |         );
24 | 
25 |         // create a new input, exchange data, and inspect its output
26 |         worker.dataflow(|scope| {
27 |             scope
28 |                 .input_from(&mut input)
29 |                 .exchange(|&x| x as u64)
30 |                 .probe_with(&mut probe);
31 |         });
32 | 
33 |         // Register timely worker logging.
34 |         worker.log_register().insert::<TimelyEvent,_>("timely", |_time, data|
35 |             data.iter().for_each(|x| println!("LOG2: {:?}", x))
36 |         );
37 | 
38 |         // create a new input, exchange data, and inspect its output
39 |         worker.dataflow(|scope| {
40 |             scope
41 |                 .input_from(&mut input)
42 |                 .exchange(|&x| x as u64)
43 |                 .probe_with(&mut probe);
44 |         });
45 | 
46 |         // Register user-level logging.
47 |         worker.log_register().insert::<(),_>("input", |_time, data|
48 |             for element in data.iter() {
49 |                 println!("Round tick at: {:?}", element.0);
50 |             }
51 |         );
52 | 
53 |         let input_logger = worker.log_register().get::<()>("input").expect("Input logger absent");
54 | 
55 |         let timer = std::time::Instant::now();
56 | 
57 |         for round in 0 .. rounds {
58 | 
59 |             for i in 0 .. batch {
60 |                 input.send(i);
61 |             }
62 |             input.advance_to(round);
63 |             input_logger.log(());
64 | 
65 |             while probe.less_than(input.time()) {
66 |                 worker.step();
67 |             }
68 | 
69 |         }
70 | 
71 |         let volume = (rounds * batch) as f64;
72 |         let elapsed = timer.elapsed();
73 |         let seconds = elapsed.as_secs() as f64 + (f64::from(elapsed.subsec_nanos())/1000000000.0);
74 | 
75 |         println!("{:?}\tworker {} complete; rate: {:?}", timer.elapsed(), worker.index(), volume / seconds);
76 | 
77 |     }).unwrap();
78 | }
79 | 


--------------------------------------------------------------------------------
/timely/examples/pingpong.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::operators::*;
 4 | 
 5 | fn main() {
 6 | 
 7 |     let iterations = std::env::args().nth(1).unwrap().parse::<u64>().unwrap();
 8 |     let elements = std::env::args().nth(2).unwrap().parse::<u64>().unwrap();
 9 | 
10 |     // initializes and runs a timely dataflow
11 |     timely::execute_from_args(std::env::args().skip(3), move |worker| {
12 |         let index = worker.index();
13 |         let peers = worker.peers();
14 |         worker.dataflow::<u64,_,_>(move |scope| {
15 |             let (helper, cycle) = scope.feedback(1);
16 |             (0 .. elements)
17 |                   .filter(move |&x| (x as usize) % peers == index)
18 |                   .to_stream(scope)
19 |                   .concat(&cycle)
20 |                   .exchange(|&x| x)
21 |                   .map_in_place(|x| *x += 1)
22 |                   .branch_when(move |t| t < &iterations).1
23 |                   .connect_loop(helper);
24 |         });
25 |     }).unwrap();
26 | }
27 | 


--------------------------------------------------------------------------------
/timely/examples/rc.rs:
--------------------------------------------------------------------------------
 1 | extern crate abomonation;
 2 | extern crate timely;
 3 | 
 4 | use std::rc::Rc;
 5 | use timely::dataflow::{InputHandle, ProbeHandle};
 6 | use timely::dataflow::operators::{Input, Inspect, Probe};
 7 | use abomonation::Abomonation;
 8 | 
 9 | #[derive(Debug, Clone)]
10 | pub struct Test {
11 |     field: Rc<usize>,
12 | }
13 | 
14 | impl Abomonation for Test {
15 |     unsafe fn entomb<W: ::std::io::Write>(&self, _write: &mut W) -> ::std::io::Result<()> { panic!() }
16 |     unsafe fn exhume<'a,'b>(&'a mut self, _bytes: &'b mut [u8]) -> Option<&'b mut [u8]> { panic!()  }
17 | }
18 | 
19 | fn main() {
20 |     // initializes and runs a timely dataflow.
21 |     timely::execute_from_args(std::env::args(), |worker| {
22 |         // create a new input, exchange data, and inspect its output
23 |         let index = worker.index();
24 |         let mut input = InputHandle::new();
25 |         let mut probe = ProbeHandle::new();
26 |         worker.dataflow(|scope| {
27 |             scope.input_from(&mut input)
28 |                  //.exchange(|x| *x) // <-- cannot exchange this; Rc is not Send.
29 |                  .inspect(move |x| println!("worker {}:\thello {:?}", index, x))
30 |                  .probe_with(&mut probe);
31 |         });
32 | 
33 |         // introduce data and watch!
34 |         for round in 0..10 {
35 |             input.send(Test { field: Rc::new(round) } );
36 |             input.advance_to(round + 1);
37 |             worker.step_while(|| probe.less_than(input.time()));
38 |         }
39 |     }).unwrap();
40 | }
41 | 


--------------------------------------------------------------------------------
/timely/examples/sequence.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use std::time::{Instant, Duration};
 4 | 
 5 | use timely::Configuration;
 6 | use timely::synchronization::Sequencer;
 7 | 
 8 | fn main() {
 9 |     timely::execute(Configuration::Process(4), |worker| {
10 | 
11 |         let timer = Instant::now();
12 |         let mut sequencer = Sequencer::new(worker, Instant::now());
13 | 
14 |         for round in 0 .. {
15 |             // if worker.index() < 3 {
16 |                 std::thread::sleep(Duration::from_secs(1 + worker.index() as u64));
17 |                 sequencer.push(format!("worker {:?}, round {:?}", worker.index(), round));
18 |             // }
19 |             while let Some(element) = sequencer.next() {
20 |                 println!("{:?}:\tWorker {:?}:\t recv'd: {:?}", timer.elapsed(), worker.index(), element);
21 |             }
22 |             worker.step();
23 |         }
24 |         
25 |     }).unwrap(); // asserts error-free execution;
26 | }
27 | 


--------------------------------------------------------------------------------
/timely/examples/simple.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::operators::*;
 4 | 
 5 | fn main() {
 6 |     timely::example(|scope| {
 7 |         (0..10).to_stream(scope)
 8 |                .inspect(|x| println!("seen: {:?}", x));
 9 |     });
10 | }
11 | 


--------------------------------------------------------------------------------
/timely/examples/threadless.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::dataflow::{InputHandle, ProbeHandle};
 4 | use timely::dataflow::operators::{Inspect, Probe};
 5 | 
 6 | fn main() {
 7 | 
 8 |     // create a naked single-threaded worker.
 9 |     let allocator = timely::communication::allocator::Thread::new();
10 |     let mut worker = timely::worker::Worker::new(allocator);
11 | 
12 |     // create input and probe handles.
13 |     let mut input = InputHandle::new();
14 |     let mut probe = ProbeHandle::new();
15 | 
16 |     // directly build a dataflow.
17 |     worker.dataflow(|scope| {
18 |         input
19 |             .to_stream(scope)
20 |             .inspect(|x| println!("{:?}", x))
21 |             .probe_with(&mut probe);
22 |     });
23 | 
24 |     // manage inputs.
25 |     for i in 0 .. 10 {
26 |         input.send(i);
27 |         input.advance_to(i);
28 |         while probe.less_than(input.time()) {
29 |             worker.step();
30 |         }
31 |     }
32 | }


--------------------------------------------------------------------------------
/timely/examples/unionfind.rs:
--------------------------------------------------------------------------------
 1 | extern crate rand;
 2 | extern crate timely;
 3 | 
 4 | use std::cmp::Ordering;
 5 | 
 6 | use rand::{Rng, SeedableRng, StdRng};
 7 | 
 8 | use timely::dataflow::*;
 9 | use timely::dataflow::operators::{Input, Exchange, Probe};
10 | use timely::dataflow::operators::generic::operator::Operator;
11 | use timely::dataflow::channels::pact::Pipeline;
12 | 
13 | fn main() {
14 | 
15 |     // command-line args: numbers of nodes and edges in the random graph.
16 |     let nodes: usize = std::env::args().nth(1).unwrap().parse().unwrap();
17 |     let edges: usize = std::env::args().nth(2).unwrap().parse().unwrap();
18 |     let batch: usize = std::env::args().nth(3).unwrap().parse().unwrap();
19 | 
20 |     timely::execute_from_args(std::env::args().skip(4), move |worker| {
21 | 
22 |         let index = worker.index();
23 |         let peers = worker.peers();
24 | 
25 |         let mut input = InputHandle::new();
26 |         let mut probe = ProbeHandle::new();
27 | 
28 |         worker.dataflow(|scope| {
29 |             scope.input_from(&mut input)
30 |                 //  .exchange(move |x: &(usize, usize)| (x.0 % (peers - 1)) as u64 + 1)
31 |                  .union_find()
32 |                  .exchange(|_| 0)
33 |                  .union_find()
34 |                  .probe_with(&mut probe);
35 |         });
36 | 
37 |         let seed: &[_] = &[1, 2, 3, index];
38 |         let mut rng: StdRng = SeedableRng::from_seed(seed);
39 | 
40 |         for edge in 0..(edges / peers) {
41 |             input.send((rng.gen_range(0, nodes), rng.gen_range(0, nodes)));
42 |             if edge % batch == (batch - 1) {
43 |                 let next = input.epoch() + 1;
44 |                 input.advance_to(next);
45 |                 while probe.less_than(input.time()) {
46 |                     worker.step();
47 |                 }
48 |             }
49 |         }
50 | 
51 |     }).unwrap(); // asserts error-free execution;
52 | }
53 | 
54 | trait UnionFind {
55 |     fn union_find(&self) -> Self;
56 | }
57 | 
58 | impl<G: Scope> UnionFind for Stream<G, (usize, usize)> {
59 |     fn union_find(&self) -> Stream<G, (usize, usize)> {
60 | 
61 |         self.unary(Pipeline, "UnionFind", |_,_| {
62 | 
63 |             let mut roots = vec![];  // u32 works, and is smaller than uint/u64
64 |             let mut ranks = vec![];  // u8 should be large enough (n < 2^256)
65 | 
66 |             move |input, output| {
67 | 
68 |                 while let Some((time, data)) = input.next() {
69 | 
70 |                     let mut session = output.session(&time);
71 |                     for &(mut x, mut y) in data.iter() {
72 | 
73 |                         // grow arrays if required.
74 |                         let m = ::std::cmp::max(x, y);
75 |                         for i in roots.len() .. (m + 1) {
76 |                             roots.push(i);
77 |                             ranks.push(0);
78 |                         }
79 | 
80 |                         // look up roots for `x` and `y`.
81 |                         while x != roots[x] { x = roots[x]; }
82 |                         while y != roots[y] { y = roots[y]; }
83 | 
84 |                         if x != y {
85 |                             session.give((x, y));
86 |                             match ranks[x].cmp(&ranks[y]) {
87 |                                 Ordering::Less    => { roots[x] = y },
88 |                                 Ordering::Greater => { roots[y] = x },
89 |                                 Ordering::Equal   => { roots[y] = x; ranks[x] += 1 },
90 |                             }
91 |                         }
92 |                     }
93 |                 }
94 |             }
95 |         })
96 |     }
97 | }
98 | 


--------------------------------------------------------------------------------
/timely/examples/unordered_input.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | extern crate timely_communication;
 3 | 
 4 | use timely::dataflow::operators::*;
 5 | use timely_communication::Configuration;
 6 | // use timely::progress::timestamp::RootTimestamp;
 7 | 
 8 | fn main() {
 9 |     timely::execute(Configuration::Thread, |worker| {
10 |         let (mut input, mut cap) = worker.dataflow::<usize,_,_>(|scope| {
11 |             let (input, stream) = scope.new_unordered_input();
12 |             stream.inspect_batch(|t, x| println!("{:?} -> {:?}", t, x));
13 |             input
14 |         });
15 | 
16 |         for round in 0..10 {
17 |             input.session(cap.clone()).give(round);
18 |             cap = cap.delayed(&(round + 1));
19 |             worker.step();
20 |         }
21 |     }).unwrap();
22 | }
23 | 


--------------------------------------------------------------------------------
/timely/examples/wordcount.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use std::collections::HashMap;
 4 | 
 5 | use timely::dataflow::{InputHandle, ProbeHandle};
 6 | use timely::dataflow::operators::{Map, Operator, Inspect, Probe};
 7 | use timely::dataflow::channels::pact::Exchange;
 8 | 
 9 | fn main() {
10 |     // initializes and runs a timely dataflow.
11 |     timely::execute_from_args(std::env::args(), |worker| {
12 | 
13 |         let mut input = InputHandle::new();
14 |         let mut probe = ProbeHandle::new();
15 | 
16 |         // define a distribution function for strings.
17 |         let exchange = Exchange::new(|x: &(String, i64)| (x.0).len() as u64);
18 | 
19 |         // create a new input, exchange data, and inspect its output
20 |         worker.dataflow::<usize,_,_>(|scope| {
21 |             input.to_stream(scope)
22 |                  .flat_map(|(text, diff): (String, i64)|
23 |                     text.split_whitespace()
24 |                         .map(move |word| (word.to_owned(), diff))
25 |                         .collect::<Vec<_>>()
26 |                  )
27 |                  .unary_frontier(exchange, "WordCount", |_capability, _info| {
28 | 
29 |                     let mut queues = HashMap::new();
30 |                     let mut counts = HashMap::new();
31 | 
32 |                     move |input, output| {
33 |                         while let Some((time, data)) = input.next() {
34 |                             queues.entry(time.retain())
35 |                                   .or_insert(Vec::new())
36 |                                   .push(data.replace(Vec::new()));
37 |                         }
38 | 
39 |                         for (key, val) in queues.iter_mut() {
40 |                             if !input.frontier().less_equal(key.time()) {
41 |                                 let mut session = output.session(key);
42 |                                 for mut batch in val.drain(..) {
43 |                                     for (word, diff) in batch.drain(..) {
44 |                                         let entry = counts.entry(word.clone()).or_insert(0i64);
45 |                                         *entry += diff;
46 |                                         session.give((word, *entry));
47 |                                     }
48 |                                 }
49 |                             }
50 |                         }
51 | 
52 |                         queues.retain(|_key, val| !val.is_empty());
53 |                     }})
54 |                  .inspect(|x| println!("seen: {:?}", x))
55 |                  .probe_with(&mut probe);
56 |         });
57 | 
58 |         // introduce data and watch!
59 |         for round in 0..10 {
60 |             input.send(("round".to_owned(), 1));
61 |             input.advance_to(round + 1);
62 |             while probe.less_than(input.time()) {
63 |                 worker.step();
64 |             }
65 |         }
66 |     }).unwrap();
67 | }
68 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Structured communication between timely dataflow operators.
 2 | 
 3 | use crate::communication::Push;
 4 | 
 5 | /// A collection of types that may be pushed at.
 6 | pub mod pushers;
 7 | /// A collection of types that may be pulled from.
 8 | pub mod pullers;
 9 | /// Parallelization contracts, describing how data must be exchanged between operators.
10 | pub mod pact;
11 | 
12 | /// The input to and output from timely dataflow communication channels.
13 | pub type Bundle<T, D> = crate::communication::Message<Message<T, D>>;
14 | 
15 | /// A serializable representation of timestamped data.
16 | #[derive(Clone, Abomonation, Serialize, Deserialize)]
17 | pub struct Message<T, D> {
18 |     /// The timestamp associated with the message.
19 |     pub time: T,
20 |     /// The data in the message.
21 |     pub data: Vec<D>,
22 |     /// The source worker.
23 |     pub from: usize,
24 |     /// A sequence number for this worker-to-worker stream.
25 |     pub seq: usize,
26 | }
27 | 
28 | impl<T, D> Message<T, D> {
29 |     /// Default buffer size.
30 |     pub fn default_length() -> usize {
31 |         1024
32 |     }
33 | 
34 |     /// Creates a new message instance from arguments.
35 |     pub fn new(time: T, data: Vec<D>, from: usize, seq: usize) -> Self {
36 |         Message { time, data, from, seq }
37 |     }
38 | 
39 |     /// Forms a message, and pushes contents at `pusher`.
40 |     #[inline]
41 |     pub fn push_at<P: Push<Bundle<T, D>>>(buffer: &mut Vec<D>, time: T, pusher: &mut P) {
42 | 
43 |         let data = ::std::mem::replace(buffer, Vec::new());
44 |         let message = Message::new(time, data, 0, 0);
45 |         let mut bundle = Some(Bundle::from_typed(message));
46 | 
47 |         pusher.push(&mut bundle);
48 | 
49 |         if let Some(message) = bundle {
50 |             if let Some(message) = message.if_typed() {
51 |                 *buffer = message.data;
52 |                 buffer.clear();
53 |             }
54 |         }
55 | 
56 |         // TODO: Unclear we always want this here.
57 |         if buffer.capacity() != Self::default_length() {
58 |             *buffer = Vec::with_capacity(Self::default_length());
59 |         }
60 |     }}
61 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/pullers/counter.rs:
--------------------------------------------------------------------------------
 1 | //! A wrapper which accounts records pulled past in a shared count map.
 2 | 
 3 | use std::rc::Rc;
 4 | use std::cell::RefCell;
 5 | 
 6 | use crate::dataflow::channels::Bundle;
 7 | use crate::progress::ChangeBatch;
 8 | use crate::communication::Pull;
 9 | 
10 | /// A wrapper which accounts records pulled past in a shared count map.
11 | pub struct Counter<T: Ord+Clone+'static, D, P: Pull<Bundle<T, D>>> {
12 |     pullable: P,
13 |     consumed: Rc<RefCell<ChangeBatch<T>>>,
14 |     phantom: ::std::marker::PhantomData<D>,
15 | }
16 | 
17 | impl<T:Ord+Clone+'static, D, P: Pull<Bundle<T, D>>> Counter<T, D, P> {
18 |     /// Retrieves the next timestamp and batch of data.
19 |     #[inline]
20 |     pub fn next(&mut self) -> Option<&mut Bundle<T, D>> {
21 |         if let Some(message) = self.pullable.pull() {
22 |             if message.data.len() > 0 {
23 |                 self.consumed.borrow_mut().update(message.time.clone(), message.data.len() as i64);
24 |                 Some(message)
25 |             }
26 |             else { None }
27 |         }
28 |         else { None }
29 |     }
30 | }
31 | 
32 | impl<T:Ord+Clone+'static, D, P: Pull<Bundle<T, D>>> Counter<T, D, P> {
33 |     /// Allocates a new `Counter` from a boxed puller.
34 |     pub fn new(pullable: P) -> Self {
35 |         Counter {
36 |             phantom: ::std::marker::PhantomData,
37 |             pullable,
38 |             consumed: Rc::new(RefCell::new(ChangeBatch::new())),
39 |         }
40 |     }
41 |     /// A references to shared changes in counts, for cloning or draining.
42 |     pub fn consumed(&self) -> &Rc<RefCell<ChangeBatch<T>>> {
43 |         &self.consumed
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/pullers/mod.rs:
--------------------------------------------------------------------------------
 1 | pub use self::counter::Counter;
 2 | pub mod counter;
 3 | 
 4 | 
 5 | // pub trait Pullable<T, D> {
 6 | //     fn pull(&mut self) -> Option<(&T, &mut Message<D>)>;
 7 | // }
 8 | //
 9 | // impl<T, D, P: ?Sized + Pullable<T, D>> Pullable<T, D> for Box<P> {
10 | //     fn pull(&mut self) -> Option<(&T, &mut Message<D>)> { (**self).pull() }
11 | // }
12 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/pushers/counter.rs:
--------------------------------------------------------------------------------
 1 | //! A wrapper which counts the number of records pushed past and updates a shared count map.
 2 | 
 3 | use std::rc::Rc;
 4 | use std::cell::RefCell;
 5 | 
 6 | use crate::progress::ChangeBatch;
 7 | use crate::dataflow::channels::Bundle;
 8 | use crate::communication::Push;
 9 | 
10 | /// A wrapper which updates shared `produced` based on the number of records pushed.
11 | pub struct Counter<T: Ord, D, P: Push<Bundle<T, D>>> {
12 |     pushee: P,
13 |     produced: Rc<RefCell<ChangeBatch<T>>>,
14 |     phantom: ::std::marker::PhantomData<D>,
15 | }
16 | 
17 | impl<T, D, P> Push<Bundle<T, D>> for Counter<T, D, P> where T : Ord+Clone+'static, P: Push<Bundle<T, D>> {
18 |     #[inline]
19 |     fn push(&mut self, message: &mut Option<Bundle<T, D>>) {
20 |         if let Some(message) = message {
21 |             self.produced.borrow_mut().update(message.time.clone(), message.data.len() as i64);
22 |         }
23 | 
24 |         // only propagate `None` if dirty (indicates flush)
25 |         if message.is_some() || !self.produced.borrow_mut().is_empty() {
26 |             self.pushee.push(message);
27 |         }
28 |     }
29 | }
30 | 
31 | impl<T, D, P: Push<Bundle<T, D>>> Counter<T, D, P> where T : Ord+Clone+'static {
32 |     /// Allocates a new `Counter` from a pushee and shared counts.
33 |     pub fn new(pushee: P) -> Counter<T, D, P> {
34 |         Counter {
35 |             pushee,
36 |             produced: Rc::new(RefCell::new(ChangeBatch::new())),
37 |             phantom: ::std::marker::PhantomData,
38 |         }
39 |     }
40 |     /// A references to shared changes in counts, for cloning or draining.
41 |     #[inline]
42 |     pub fn produced(&self) -> &Rc<RefCell<ChangeBatch<T>>> {
43 |         &self.produced
44 |     }
45 | }
46 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/pushers/exchange.rs:
--------------------------------------------------------------------------------
  1 | //! The exchange pattern distributes pushed data between many target pushees.
  2 | 
  3 | use crate::Data;
  4 | use crate::communication::Push;
  5 | use crate::dataflow::channels::{Bundle, Message};
  6 | 
  7 | // TODO : Software write combining
  8 | /// Distributes records among target pushees according to a distribution function.
  9 | pub struct Exchange<T, D, P: Push<Bundle<T, D>>, H: FnMut(&T, &D) -> u64> {
 10 |     pushers: Vec<P>,
 11 |     buffers: Vec<Vec<D>>,
 12 |     current: Option<T>,
 13 |     hash_func: H,
 14 | }
 15 | 
 16 | impl<T: Clone, D, P: Push<Bundle<T, D>>, H: FnMut(&T, &D)->u64>  Exchange<T, D, P, H> {
 17 |     /// Allocates a new `Exchange` from a supplied set of pushers and a distribution function.
 18 |     pub fn new(pushers: Vec<P>, key: H) -> Exchange<T, D, P, H> {
 19 |         let mut buffers = vec![];
 20 |         for _ in 0..pushers.len() {
 21 |             buffers.push(Vec::with_capacity(Message::<T, D>::default_length()));
 22 |         }
 23 |         Exchange {
 24 |             pushers,
 25 |             hash_func: key,
 26 |             buffers,
 27 |             current: None,
 28 |         }
 29 |     }
 30 |     #[inline]
 31 |     fn flush(&mut self, index: usize) {
 32 |         if !self.buffers[index].is_empty() {
 33 |             if let Some(ref time) = self.current {
 34 |                 Message::push_at(&mut self.buffers[index], time.clone(), &mut self.pushers[index]);
 35 |             }
 36 |         }
 37 |     }
 38 | }
 39 | 
 40 | impl<T: Eq+Data, D: Data, P: Push<Bundle<T, D>>, H: FnMut(&T, &D)->u64> Push<Bundle<T, D>> for Exchange<T, D, P, H> {
 41 |     #[inline(never)]
 42 |     fn push(&mut self, message: &mut Option<Bundle<T, D>>) {
 43 |         // if only one pusher, no exchange
 44 |         if self.pushers.len() == 1 {
 45 |             self.pushers[0].push(message);
 46 |         }
 47 |         else if let Some(message) = message {
 48 | 
 49 |             let message = message.as_mut();
 50 |             let time = &message.time;
 51 |             let data = &mut message.data;
 52 | 
 53 |             // if the time isn't right, flush everything.
 54 |             if self.current.as_ref().map_or(false, |x| x != time) {
 55 |                 for index in 0..self.pushers.len() {
 56 |                     self.flush(index);
 57 |                 }
 58 |             }
 59 |             self.current = Some(time.clone());
 60 | 
 61 |             // if the number of pushers is a power of two, use a mask
 62 |             if (self.pushers.len() & (self.pushers.len() - 1)) == 0 {
 63 |                 let mask = (self.pushers.len() - 1) as u64;
 64 |                 for datum in data.drain(..) {
 65 |                     let index = (((self.hash_func)(time, &datum)) & mask) as usize;
 66 | 
 67 |                     self.buffers[index].push(datum);
 68 |                     if self.buffers[index].len() == self.buffers[index].capacity() {
 69 |                         self.flush(index);
 70 |                     }
 71 | 
 72 |                     // unsafe {
 73 |                     //     self.buffers.get_unchecked_mut(index).push(datum);
 74 |                     //     if self.buffers.get_unchecked(index).len() == self.buffers.get_unchecked(index).capacity() {
 75 |                     //         self.flush(index);
 76 |                     //     }
 77 |                     // }
 78 | 
 79 |                 }
 80 |             }
 81 |             // as a last resort, use mod (%)
 82 |             else {
 83 |                 for datum in data.drain(..) {
 84 |                     let index = (((self.hash_func)(time, &datum)) % self.pushers.len() as u64) as usize;
 85 |                     self.buffers[index].push(datum);
 86 |                     if self.buffers[index].len() == self.buffers[index].capacity() {
 87 |                         self.flush(index);
 88 |                     }
 89 |                 }
 90 |             }
 91 | 
 92 |         }
 93 |         else {
 94 |             // flush
 95 |             for index in 0..self.pushers.len() {
 96 |                 self.flush(index);
 97 |                 self.pushers[index].push(&mut None);
 98 |             }
 99 |         }
100 |     }
101 | }
102 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/pushers/mod.rs:
--------------------------------------------------------------------------------
1 | pub use self::tee::{Tee, TeeHelper};
2 | pub use self::exchange::Exchange;
3 | pub use self::counter::Counter;
4 | 
5 | pub mod tee;
6 | pub mod exchange;
7 | pub mod counter;
8 | pub mod buffer;
9 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/channels/pushers/tee.rs:
--------------------------------------------------------------------------------
 1 | //! A `Push` implementor with a list of `Box<Push>` to forward pushes to.
 2 | 
 3 | use std::rc::Rc;
 4 | use std::cell::RefCell;
 5 | 
 6 | use crate::Data;
 7 | use crate::dataflow::channels::{Bundle, Message};
 8 | 
 9 | use crate::communication::Push;
10 | 
11 | /// Wraps a shared list of `Box<Push>` to forward pushes to. Owned by `Stream`.
12 | pub struct Tee<T: 'static, D: 'static> {
13 |     buffer: Vec<D>,
14 |     shared: Rc<RefCell<Vec<Box<dyn Push<Bundle<T, D>>>>>>,
15 | }
16 | 
17 | impl<T: Data, D: Data> Push<Bundle<T, D>> for Tee<T, D> {
18 |     #[inline]
19 |     fn push(&mut self, message: &mut Option<Bundle<T, D>>) {
20 |         let mut pushers = self.shared.borrow_mut();
21 |         if let Some(message) = message {
22 |             for index in 1..pushers.len() {
23 |                 self.buffer.extend_from_slice(&message.data);
24 |                 Message::push_at(&mut self.buffer, message.time.clone(), &mut pushers[index-1]);
25 |             }
26 |         }
27 |         else {
28 |             for index in 1..pushers.len() {
29 |                 pushers[index-1].push(&mut None);
30 |             }
31 |         }
32 |         if pushers.len() > 0 {
33 |             let last = pushers.len() - 1;
34 |             pushers[last].push(message);
35 |         }
36 |     }
37 | }
38 | 
39 | impl<T, D> Tee<T, D> {
40 |     /// Allocates a new pair of `Tee` and `TeeHelper`.
41 |     pub fn new() -> (Tee<T, D>, TeeHelper<T, D>) {
42 |         let shared = Rc::new(RefCell::new(Vec::new()));
43 |         let port = Tee {
44 |             buffer: Vec::with_capacity(Message::<T, D>::default_length()),
45 |             shared: shared.clone(),
46 |         };
47 | 
48 |         (port, TeeHelper { shared })
49 |     }
50 | }
51 | 
52 | impl<T, D> Clone for Tee<T, D> {
53 |     fn clone(&self) -> Tee<T, D> {
54 |         Tee {
55 |             buffer: Vec::with_capacity(self.buffer.capacity()),
56 |             shared: self.shared.clone(),
57 |         }
58 |     }
59 | }
60 | 
61 | /// A shared list of `Box<Push>` used to add `Push` implementors.
62 | pub struct TeeHelper<T, D> {
63 |     shared: Rc<RefCell<Vec<Box<dyn Push<Bundle<T, D>>>>>>
64 | }
65 | 
66 | impl<T, D> TeeHelper<T, D> {
67 |     /// Adds a new `Push` implementor to the list of recipients shared with a `Stream`.
68 |     pub fn add_pusher<P: Push<Bundle<T, D>>+'static>(&self, pusher: P) {
69 |         self.shared.borrow_mut().push(Box::new(pusher));
70 |     }
71 | }
72 | 
73 | impl<T, D> Clone for TeeHelper<T, D> {
74 |     fn clone(&self) -> Self {
75 |         TeeHelper {
76 |             shared: self.shared.clone()
77 |         }
78 |     }
79 | }
80 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Abstractions for timely dataflow programming.
 2 | //!
 3 | //! Timely dataflow programs are constructed by manipulating [`Stream`](./stream/index.html) objects,
 4 | //! most often using pre-defined [operators](./operators/index.html) that implement known patterns.
 5 | //!
 6 | //! # Examples
 7 | //! ```
 8 | //! use timely::dataflow::operators::{ToStream, Inspect};
 9 | //!
10 | //! timely::example(|scope| {
11 | //!     (0..10).to_stream(scope)
12 | //!            .inspect(|x| println!("seen: {:?}", x));
13 | //! });
14 | //! ```
15 | 
16 | pub use self::stream::Stream;
17 | pub use self::scopes::{Scope, ScopeParent};
18 | 
19 | pub use self::operators::input::Handle as InputHandle;
20 | pub use self::operators::probe::Handle as ProbeHandle;
21 | 
22 | pub mod operators;
23 | pub mod channels;
24 | pub mod scopes;
25 | pub mod stream;
26 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/aggregation/aggregate.rs:
--------------------------------------------------------------------------------
  1 | //! General purpose intra-timestamp aggregation
  2 | use std::hash::Hash;
  3 | use std::collections::HashMap;
  4 | 
  5 | use crate::{Data, ExchangeData};
  6 | use crate::dataflow::{Stream, Scope};
  7 | use crate::dataflow::operators::generic::operator::Operator;
  8 | use crate::dataflow::channels::pact::Exchange;
  9 | 
 10 | /// Generic intra-timestamp aggregation
 11 | ///
 12 | /// Extension method supporting aggregation of keyed data within timestamp.
 13 | /// For inter-timestamp aggregation, consider `StateMachine`.
 14 | pub trait Aggregate<S: Scope, K: ExchangeData+Hash, V: ExchangeData> {
 15 |     /// Aggregates data of the form `(key, val)`, using user-supplied logic.
 16 |     ///
 17 |     /// The `aggregate` method is implemented for streams of `(K, V)` data,
 18 |     /// and takes functions `fold`, `emit`, and `hash`; used to combine new `V`
 19 |     /// data with existing `D` state, to produce `R` output from `D` state, and
 20 |     /// to route `K` keys, respectively.
 21 |     ///
 22 |     /// Aggregation happens within each time, and results are produced once the
 23 |     /// time is complete.
 24 |     ///
 25 |     /// # Examples
 26 |     /// ```
 27 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
 28 |     /// use timely::dataflow::operators::aggregation::Aggregate;
 29 |     ///
 30 |     /// timely::example(|scope| {
 31 |     ///
 32 |     ///     (0..10).to_stream(scope)
 33 |     ///         .map(|x| (x % 2, x))
 34 |     ///         .aggregate(
 35 |     ///             |_key, val, agg| { *agg += val; },
 36 |     ///             |key, agg: i32| (key, agg),
 37 |     ///             |key| *key as u64
 38 |     ///         )
 39 |     ///         .inspect(|x| assert!(*x == (0, 20) || *x == (1, 25)));
 40 |     /// });
 41 |     /// ```
 42 |     ///
 43 |     /// By changing the type of the aggregate value, one can accumulate into different types.
 44 |     /// Here we accumulate the data into a `Vec<i32>` and report its length (which we could
 45 |     /// obviously do more efficiently; imagine we were doing a hash instead).
 46 |     ///
 47 |     /// ```
 48 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
 49 |     /// use timely::dataflow::operators::aggregation::Aggregate;
 50 |     ///
 51 |     /// timely::example(|scope| {
 52 |     ///
 53 |     ///     (0..10).to_stream(scope)
 54 |     ///         .map(|x| (x % 2, x))
 55 |     ///         .aggregate::<_,Vec<i32>,_,_,_>(
 56 |     ///             |_key, val, agg| { agg.push(val); },
 57 |     ///             |key, agg| (key, agg.len()),
 58 |     ///             |key| *key as u64
 59 |     ///         )
 60 |     ///         .inspect(|x| assert!(*x == (0, 5) || *x == (1, 5)));
 61 |     /// });
 62 |     /// ```
 63 |     fn aggregate<R: Data, D: Default+'static, F: Fn(&K, V, &mut D)+'static, E: Fn(K, D)->R+'static, H: Fn(&K)->u64+'static>(
 64 |         &self,
 65 |         fold: F,
 66 |         emit: E,
 67 |         hash: H) -> Stream<S, R> where S::Timestamp: Eq;
 68 | }
 69 | 
 70 | impl<S: Scope, K: ExchangeData+Hash+Eq, V: ExchangeData> Aggregate<S, K, V> for Stream<S, (K, V)> {
 71 | 
 72 |     fn aggregate<R: Data, D: Default+'static, F: Fn(&K, V, &mut D)+'static, E: Fn(K, D)->R+'static, H: Fn(&K)->u64+'static>(
 73 |         &self,
 74 |         fold: F,
 75 |         emit: E,
 76 |         hash: H) -> Stream<S, R> where S::Timestamp: Eq {
 77 | 
 78 |         let mut aggregates = HashMap::new();
 79 |         let mut vector = Vec::new();
 80 |         self.unary_notify(Exchange::new(move |&(ref k, _)| hash(k)), "Aggregate", vec![], move |input, output, notificator| {
 81 | 
 82 |             // read each input, fold into aggregates
 83 |             input.for_each(|time, data| {
 84 |                 data.swap(&mut vector);
 85 |                 let agg_time = aggregates.entry(time.time().clone()).or_insert_with(HashMap::new);
 86 |                 for (key, val) in vector.drain(..) {
 87 |                     let agg = agg_time.entry(key.clone()).or_insert_with(Default::default);
 88 |                     fold(&key, val, agg);
 89 |                 }
 90 |                 notificator.notify_at(time.retain());
 91 |             });
 92 | 
 93 |             // pop completed aggregates, send along whatever
 94 |             notificator.for_each(|time,_,_| {
 95 |                 if let Some(aggs) = aggregates.remove(time.time()) {
 96 |                     let mut session = output.session(&time);
 97 |                     for (key, agg) in aggs {
 98 |                         session.give(emit(key, agg));
 99 |                     }
100 |                 }
101 |             });
102 |         })
103 | 
104 |     }
105 | }
106 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/aggregation/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Aggregation operators of various flavors
 2 | //!
 3 | //! Two traits, `Aggregate` and `StateMachine`, which support the accumulation of streamed information.
 4 | //!
 5 | //! `Aggregate` accumulates records within times, and releases the accumulations once the time is complete.
 6 | //!
 7 | //! `StateMachine` responds to a sequence of keyed events, maintaining and updating a state for each key.
 8 | //! The user logic may produce output records for each transition, and optionally de-register the state to
 9 | //! clean up when appropriate.
10 | //!
11 | //! The two methods are often combined, using first `Aggregate` to reduce the volume of information, and then
12 | //! `StateMachine` to track an accumulation across timestamps.
13 | 
14 | pub use self::aggregate::Aggregate;
15 | pub use self::state_machine::StateMachine;
16 | 
17 | pub mod state_machine;
18 | pub mod aggregate;
19 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/capture/extract.rs:
--------------------------------------------------------------------------------
 1 | //! Traits and types for extracting captured timely dataflow streams.
 2 | 
 3 | use super::Event;
 4 | 
 5 | /// Supports extracting a sequence of timestamp and data.
 6 | pub trait Extract<T: Ord, D: Ord> {
 7 |     /// Converts `self` into a sequence of timestamped data.
 8 |     ///
 9 |     /// Currently this is only implemented for `Receiver<Event<T, D>>`, and is used only
10 |     /// to easily pull data out of a timely dataflow computation once it has completed.
11 |     ///
12 |     /// # Examples
13 |     ///
14 |     /// ```rust
15 |     /// use std::rc::Rc;
16 |     /// use std::sync::{Arc, Mutex};
17 |     /// use timely::dataflow::Scope;
18 |     /// use timely::dataflow::operators::{Capture, ToStream, Inspect};
19 |     /// use timely::dataflow::operators::capture::{EventLink, Replay, Extract};
20 |     ///
21 |     /// // get send and recv endpoints, wrap send to share
22 |     /// let (send, recv) = ::std::sync::mpsc::channel();
23 |     /// let send = Arc::new(Mutex::new(send));
24 |     ///
25 |     /// timely::execute(timely::Configuration::Thread, move |worker| {
26 |     ///
27 |     ///     // this is only to validate the output.
28 |     ///     let send = send.lock().unwrap().clone();
29 |     ///
30 |     ///     // these are to capture/replay the stream.
31 |     ///     let handle1 = Rc::new(EventLink::new());
32 |     ///     let handle2 = Some(handle1.clone());
33 |     ///
34 |     ///     worker.dataflow::<u64,_,_>(|scope1|
35 |     ///         (0..10).to_stream(scope1)
36 |     ///                .capture_into(handle1)
37 |     ///     );
38 |     ///
39 |     ///     worker.dataflow(|scope2| {
40 |     ///         handle2.replay_into(scope2)
41 |     ///                .capture_into(send)
42 |     ///     });
43 |     /// }).unwrap();
44 |     ///
45 |     /// assert_eq!(recv.extract()[0].1, (0..10).collect::<Vec<_>>());
46 |     /// ```
47 |     fn extract(self) -> Vec<(T, Vec<D>)>;
48 | }
49 | 
50 | impl<T: Ord, D: Ord> Extract<T,D> for ::std::sync::mpsc::Receiver<Event<T, D>> {
51 |     fn extract(self) -> Vec<(T, Vec<D>)> {
52 |         let mut result = Vec::new();
53 |         for event in self {
54 |             if let Event::Messages(time, data) = event {
55 |                 result.push((time, data));
56 |             }
57 |         }
58 |         result.sort_by(|x,y| x.0.cmp(&y.0));
59 | 
60 |         let mut current = 0;
61 |         for i in 1 .. result.len() {
62 |             if result[current].0 == result[i].0 {
63 |                 let dataz = ::std::mem::replace(&mut result[i].1, Vec::new());
64 |                 result[current].1.extend(dataz);
65 |             }
66 |             else {
67 |                 current = i;
68 |             }
69 |         }
70 | 
71 |         for &mut (_, ref mut data) in &mut result {
72 |             data.sort();
73 |         }
74 |         result.retain(|x| !x.1.is_empty());
75 |         result
76 |     }
77 | }
78 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/capture/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Operators to capture and replay timely dataflow streams.
 2 | //!
 3 | //! The `capture_into` and `replay_into` operators respectively capture what a unary operator
 4 | //! sees as input (both data and progress information), and play this information back as a new
 5 | //! input.
 6 | //!
 7 | //! The `capture_into` method requires a `P: EventPusher<T, D>`, which is some type accepting
 8 | //! `Event<T, D>` inputs. This module provides several examples, including the linked list
 9 | //! `EventLink<T, D>`, and the binary `EventWriter<T, D, W>` wrapping any `W: Write`.
10 | //!
11 | //! Streams are captured at the worker granularity, and one can replay an arbitrary subset of
12 | //! the captured streams on any number of workers (fewer, more, or as many as were captured).
13 | //! There is a protocol the captured stream uses, and implementors of new event streams should
14 | //! make sure to understand this (and complain if it is not clear).
15 | //!
16 | //! # Examples
17 | //!
18 | //! The type `Rc<EventLink<T,D>>` implements a typed linked list,
19 | //! and can be captured into and replayed from.
20 | //!
21 | //! ```rust
22 | //! use std::rc::Rc;
23 | //! use timely::dataflow::Scope;
24 | //! use timely::dataflow::operators::{Capture, ToStream, Inspect};
25 | //! use timely::dataflow::operators::capture::{EventLink, Replay};
26 | //!
27 | //! timely::execute(timely::Configuration::Thread, |worker| {
28 | //!     let handle1 = Rc::new(EventLink::new());
29 | //!     let handle2 = Some(handle1.clone());
30 | //!
31 | //!     worker.dataflow::<u64,_,_>(|scope1|
32 | //!         (0..10).to_stream(scope1)
33 | //!                .capture_into(handle1)
34 | //!     );
35 | //!
36 | //!     worker.dataflow(|scope2| {
37 | //!         handle2.replay_into(scope2)
38 | //!                .inspect(|x| println!("replayed: {:?}", x));
39 | //!     })
40 | //! }).unwrap();
41 | //! ```
42 | //!
43 | //! The types `EventWriter<T, D, W>` and `EventReader<T, D, R>` can be
44 | //! captured into and replayed from, respectively. The use binary writers
45 | //! and readers respectively, and can be backed by files, network sockets,
46 | //! etc.
47 | //!
48 | //! ```
49 | //! use std::rc::Rc;
50 | //! use std::net::{TcpListener, TcpStream};
51 | //! use timely::dataflow::Scope;
52 | //! use timely::dataflow::operators::{Capture, ToStream, Inspect};
53 | //! use timely::dataflow::operators::capture::{EventReader, EventWriter, Replay};
54 | //!
55 | //! timely::execute(timely::Configuration::Thread, |worker| {
56 | //!     let list = TcpListener::bind("127.0.0.1:8000").unwrap();
57 | //!     let send = TcpStream::connect("127.0.0.1:8000").unwrap();
58 | //!     let recv = list.incoming().next().unwrap().unwrap();
59 | //!
60 | //!     recv.set_nonblocking(true).unwrap();
61 | //!
62 | //!     worker.dataflow::<u64,_,_>(|scope1|
63 | //!         (0..10u64)
64 | //!             .to_stream(scope1)
65 | //!             .capture_into(EventWriter::new(send))
66 | //!     );
67 | //!
68 | //!     worker.dataflow::<u64,_,_>(|scope2| {
69 | //!         Some(EventReader::<_,u64,_>::new(recv))
70 | //!             .replay_into(scope2)
71 | //!             .inspect(|x| println!("replayed: {:?}", x));
72 | //!     })
73 | //! }).unwrap();
74 | //! ```
75 | 
76 | pub use self::capture::Capture;
77 | pub use self::replay::Replay;
78 | pub use self::extract::Extract;
79 | pub use self::event::{Event, EventPusher};
80 | pub use self::event::link::EventLink;
81 | pub use self::event::binary::EventReader;
82 | pub use self::event::binary::EventWriter;
83 | 
84 | pub mod capture;
85 | pub mod replay;
86 | pub mod extract;
87 | pub mod event;
88 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/concat.rs:
--------------------------------------------------------------------------------
 1 | //! Merges the contents of multiple streams.
 2 | 
 3 | 
 4 | use crate::Data;
 5 | use crate::dataflow::channels::pact::Pipeline;
 6 | use crate::dataflow::{Stream, Scope};
 7 | 
 8 | /// Merge the contents of two streams.
 9 | pub trait Concat<G: Scope, D: Data> {
10 |     /// Merge the contents of two streams.
11 |     ///
12 |     /// # Examples
13 |     /// ```
14 |     /// use timely::dataflow::operators::{ToStream, Concat, Inspect};
15 |     ///
16 |     /// timely::example(|scope| {
17 |     ///
18 |     ///     let stream = (0..10).to_stream(scope);
19 |     ///     stream.concat(&stream)
20 |     ///           .inspect(|x| println!("seen: {:?}", x));
21 |     /// });
22 |     /// ```
23 |     fn concat(&self, _: &Stream<G, D>) -> Stream<G, D>;
24 | }
25 | 
26 | impl<G: Scope, D: Data> Concat<G, D> for Stream<G, D> {
27 |     fn concat(&self, other: &Stream<G, D>) -> Stream<G, D> {
28 |         self.scope().concatenate(vec![self.clone(), other.clone()])
29 |     }
30 | }
31 | 
32 | /// Merge the contents of multiple streams.
33 | pub trait Concatenate<G: Scope, D: Data> {
34 |     /// Merge the contents of multiple streams.
35 |     ///
36 |     /// # Examples
37 |     /// ```
38 |     /// use timely::dataflow::operators::{ToStream, Concatenate, Inspect};
39 |     ///
40 |     /// timely::example(|scope| {
41 |     ///
42 |     ///     let streams = vec![(0..10).to_stream(scope),
43 |     ///                        (0..10).to_stream(scope),
44 |     ///                        (0..10).to_stream(scope)];
45 |     ///
46 |     ///     scope.concatenate(streams)
47 |     ///          .inspect(|x| println!("seen: {:?}", x));
48 |     /// });
49 |     /// ```
50 |     fn concatenate<I>(&self, sources: I) -> Stream<G, D>
51 |     where
52 |         I: IntoIterator<Item=Stream<G, D>>;
53 | }
54 | 
55 | impl<G: Scope, D: Data> Concatenate<G, D> for Stream<G, D> {
56 |     fn concatenate<I>(&self, sources: I) -> Stream<G, D>
57 |     where
58 |         I: IntoIterator<Item=Stream<G, D>>
59 |     {
60 |         let clone = self.clone();
61 |         self.scope().concatenate(Some(clone).into_iter().chain(sources))
62 |     }
63 | }
64 | 
65 | impl<G: Scope, D: Data> Concatenate<G, D> for G {
66 |     fn concatenate<I>(&self, sources: I) -> Stream<G, D>
67 |     where
68 |         I: IntoIterator<Item=Stream<G, D>>
69 |     {
70 | 
71 |         // create an operator builder.
72 |         use crate::dataflow::operators::generic::builder_rc::OperatorBuilder;
73 |         let mut builder = OperatorBuilder::new("Concatenate".to_string(), self.clone());
74 | 
75 |         // create new input handles for each input stream.
76 |         let mut handles = sources.into_iter().map(|s| builder.new_input(&s, Pipeline)).collect::<Vec<_>>();
77 | 
78 |         // create one output handle for the concatenated results.
79 |         let (mut output, result) = builder.new_output();
80 | 
81 |         // build an operator that plays out all input data.
82 |         builder.build(move |_capability| {
83 | 
84 |             let mut vector = Vec::new();
85 |             move |_frontier| {
86 |                 let mut output = output.activate();
87 |                 for handle in handles.iter_mut() {
88 |                     handle.for_each(|time, data| {
89 |                         data.swap(&mut vector);
90 |                         output.session(&time).give_vec(&mut vector);
91 |                     })
92 |                 }
93 |             }
94 |         });
95 | 
96 |         result
97 |     }
98 | }
99 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/count.rs:
--------------------------------------------------------------------------------
 1 | //! Counts the number of records at each time.
 2 | use std::collections::HashMap;
 3 | 
 4 | use crate::communication::message::RefOrMut;
 5 | 
 6 | use crate::Data;
 7 | use crate::dataflow::channels::pact::Pipeline;
 8 | use crate::dataflow::{Stream, Scope};
 9 | use crate::dataflow::operators::generic::operator::Operator;
10 | 
11 | /// Accumulates records within a timestamp.
12 | pub trait Accumulate<G: Scope, D: Data> {
13 |     /// Accumulates records within a timestamp.
14 |     ///
15 |     /// # Examples
16 |     ///
17 |     /// ```
18 |     /// use timely::dataflow::operators::{ToStream, Accumulate, Capture};
19 |     /// use timely::dataflow::operators::capture::Extract;
20 |     ///
21 |     /// let captured = timely::example(|scope| {
22 |     ///     (0..10).to_stream(scope)
23 |     ///            .accumulate(0, |sum, data| { for &x in data.iter() { *sum += x; } })
24 |     ///            .capture()
25 |     /// });
26 |     ///
27 |     /// let extracted = captured.extract();
28 |     /// assert_eq!(extracted, vec![(0, vec![45])]);
29 |     /// ```
30 |     fn accumulate<A: Data>(&self, default: A, logic: impl Fn(&mut A, RefOrMut<Vec<D>>)+'static) -> Stream<G, A>;
31 |     /// Counts the number of records observed at each time.
32 |     ///
33 |     /// # Examples
34 |     ///
35 |     /// ```
36 |     /// use timely::dataflow::operators::{ToStream, Accumulate, Capture};
37 |     /// use timely::dataflow::operators::capture::Extract;
38 |     ///
39 |     /// let captured = timely::example(|scope| {
40 |     ///     (0..10).to_stream(scope)
41 |     ///            .count()
42 |     ///            .capture()
43 |     /// });
44 |     ///
45 |     /// let extracted = captured.extract();
46 |     /// assert_eq!(extracted, vec![(0, vec![10])]);
47 |     /// ```
48 |     fn count(&self) -> Stream<G, usize> {
49 |         self.accumulate(0, |sum, data| *sum += data.len())
50 |     }
51 | }
52 | 
53 | impl<G: Scope, D: Data> Accumulate<G, D> for Stream<G, D> {
54 |     fn accumulate<A: Data>(&self, default: A, logic: impl Fn(&mut A, RefOrMut<Vec<D>>)+'static) -> Stream<G, A> {
55 | 
56 |         let mut accums = HashMap::new();
57 |         self.unary_notify(Pipeline, "Accumulate", vec![], move |input, output, notificator| {
58 |             input.for_each(|time, data| {
59 |                 logic(&mut accums.entry(time.time().clone()).or_insert_with(|| default.clone()), data);
60 |                 notificator.notify_at(time.retain());
61 |             });
62 | 
63 |             notificator.for_each(|time,_,_| {
64 |                 if let Some(accum) = accums.remove(&time) {
65 |                     output.session(&time).give(accum);
66 |                 }
67 |             });
68 |         })
69 |     }
70 | }
71 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/exchange.rs:
--------------------------------------------------------------------------------
 1 | //! Exchange records between workers.
 2 | 
 3 | use crate::ExchangeData;
 4 | use crate::dataflow::channels::pact::Exchange as ExchangePact;
 5 | use crate::dataflow::{Stream, Scope};
 6 | use crate::dataflow::operators::generic::operator::Operator;
 7 | 
 8 | /// Exchange records between workers.
 9 | pub trait Exchange<T, D: ExchangeData> {
10 |     /// Exchange records between workers.
11 |     ///
12 |     /// The closure supplied should map a reference to a record to a `u64`,
13 |     /// whose value determines to which worker the record will be routed.
14 |     ///
15 |     /// # Examples
16 |     /// ```
17 |     /// use timely::dataflow::operators::{ToStream, Exchange, Inspect};
18 |     ///
19 |     /// timely::example(|scope| {
20 |     ///     (0..10).to_stream(scope)
21 |     ///            .exchange(|x| *x)
22 |     ///            .inspect(|x| println!("seen: {:?}", x));
23 |     /// });
24 |     /// ```
25 |     fn exchange(&self, route: impl Fn(&D)->u64+'static) -> Self;
26 | }
27 | 
28 | // impl<T: Timestamp, G: Scope<Timestamp=T>, D: ExchangeData> Exchange<T, D> for Stream<G, D> {
29 | impl<G: Scope, D: ExchangeData> Exchange<G::Timestamp, D> for Stream<G, D> {
30 |     fn exchange(&self, route: impl Fn(&D)->u64+'static) -> Stream<G, D> {
31 |         let mut vector = Vec::new();
32 |         self.unary(ExchangePact::new(route), "Exchange", move |_,_| move |input, output| {
33 |             input.for_each(|time, data| {
34 |                 data.swap(&mut vector);
35 |                 output.session(&time).give_vec(&mut vector);
36 |             });
37 |         })
38 |     }
39 | }
40 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/filter.rs:
--------------------------------------------------------------------------------
 1 | //! Filters a stream by a predicate.
 2 | 
 3 | use crate::Data;
 4 | use crate::dataflow::channels::pact::Pipeline;
 5 | use crate::dataflow::{Stream, Scope};
 6 | use crate::dataflow::operators::generic::operator::Operator;
 7 | 
 8 | /// Extension trait for filtering.
 9 | pub trait Filter<D: Data> {
10 |     /// Returns a new instance of `self` containing only records satisfying `predicate`.
11 |     ///
12 |     /// # Examples
13 |     /// ```
14 |     /// use timely::dataflow::operators::{ToStream, Filter, Inspect};
15 |     ///
16 |     /// timely::example(|scope| {
17 |     ///     (0..10).to_stream(scope)
18 |     ///            .filter(|x| *x % 2 == 0)
19 |     ///            .inspect(|x| println!("seen: {:?}", x));
20 |     /// });
21 |     /// ```
22 |     fn filter(&self, predicate: impl Fn(&D)->bool+'static) -> Self;
23 | }
24 | 
25 | impl<G: Scope, D: Data> Filter<D> for Stream<G, D> {
26 |     fn filter(&self, predicate: impl Fn(&D)->bool+'static) -> Stream<G, D> {
27 |         let mut vector = Vec::new();
28 |         self.unary(Pipeline, "Filter", move |_,_| move |input, output| {
29 |             input.for_each(|time, data| {
30 |                 data.swap(&mut vector);
31 |                 vector.retain(|x| predicate(x));
32 |                 if vector.len() > 0 {
33 |                     output.session(&time).give_vec(&mut vector);
34 |                 }
35 |             });
36 |         })
37 |     }
38 | }
39 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/generic/builder_ref.rs:
--------------------------------------------------------------------------------
1 | //! Types to build operators with general shapes.
2 | 
3 | // TODO: Analogue to builder_rc.rs, which instead provides wrappers that only borrow ChangeBatch<T>
4 | //       buffers, rather than wrapping them with Rc<RefCell<_>>. This removes dereferences from the
5 | //       common path of having nothing to do, and could plausibly make it a bit faster to do nothing.
6 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/generic/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Generic operators defined by user-provided closures.
 2 | 
 3 | // pub mod unary;
 4 | // pub mod binary;
 5 | pub mod operator;
 6 | pub mod builder_rc;
 7 | pub mod builder_raw;
 8 | // pub mod builder_ref;
 9 | mod handles;
10 | mod notificator;
11 | mod operator_info;
12 | 
13 | pub use self::handles::{InputHandle, FrontieredInputHandle, OutputHandle, OutputWrapper};
14 | pub use self::notificator::{Notificator, FrontierNotificator};
15 | 
16 | // pub use self::unary::Unary;
17 | // pub use self::binary::Binary;
18 | pub use self::operator::{Operator, source};
19 | pub use self::operator_info::OperatorInfo;
20 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/generic/operator_info.rs:
--------------------------------------------------------------------------------
 1 | 
 2 | /// Information about the operator being constructed
 3 | #[derive(Clone)]
 4 | pub struct OperatorInfo {
 5 |     /// Scope-local index assigned to the operator being constructed.
 6 |     pub local_id: usize,
 7 |     /// Worker-unique identifier.
 8 |     pub global_id: usize,
 9 |     /// Operator address.
10 |     pub address: Vec<usize>,
11 | }
12 | 
13 | impl OperatorInfo {
14 |     /// Construct a new `OperatorInfo`.
15 |     pub fn new(local_id: usize, global_id: usize, address: &[usize]) -> OperatorInfo {
16 |         OperatorInfo {
17 |             local_id,
18 |             global_id,
19 |             address: address.to_vec(),
20 |         }
21 |     }
22 | }
23 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/inspect.rs:
--------------------------------------------------------------------------------
 1 | //! Extension trait and implementation for observing and action on streamed data.
 2 | 
 3 | use crate::Data;
 4 | use crate::dataflow::channels::pact::Pipeline;
 5 | use crate::dataflow::{Stream, Scope};
 6 | use crate::dataflow::operators::generic::Operator;
 7 | 
 8 | /// Methods to inspect records and batches of records on a stream.
 9 | pub trait Inspect<G: Scope, D: Data> {
10 |     /// Runs a supplied closure on each observed data element.
11 |     ///
12 |     /// # Examples
13 |     /// ```
14 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
15 |     ///
16 |     /// timely::example(|scope| {
17 |     ///     (0..10).to_stream(scope)
18 |     ///            .inspect(|x| println!("seen: {:?}", x));
19 |     /// });
20 |     /// ```
21 |     fn inspect(&self, mut func: impl FnMut(&D)+'static) -> Stream<G, D> {
22 |         self.inspect_batch(move |_, data| {
23 |             for datum in data.iter() { func(datum); }
24 |         })
25 |     }
26 | 
27 |     /// Runs a supplied closure on each observed data element and associated time.
28 |     ///
29 |     /// # Examples
30 |     /// ```
31 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
32 |     ///
33 |     /// timely::example(|scope| {
34 |     ///     (0..10).to_stream(scope)
35 |     ///            .inspect_time(|t, x| println!("seen at: {:?}\t{:?}", t, x));
36 |     /// });
37 |     /// ```
38 |     fn inspect_time(&self, mut func: impl FnMut(&G::Timestamp, &D)+'static) -> Stream<G, D> {
39 |         self.inspect_batch(move |time, data| {
40 |             for datum in data.iter() {
41 |                 func(&time, &datum);
42 |             }
43 |         })
44 |     }
45 | 
46 |     /// Runs a supplied closure on each observed data batch (time and data slice).
47 |     ///
48 |     /// # Examples
49 |     /// ```
50 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
51 |     ///
52 |     /// timely::example(|scope| {
53 |     ///     (0..10).to_stream(scope)
54 |     ///            .inspect_batch(|t,xs| println!("seen at: {:?}\t{:?} records", t, xs.len()));
55 |     /// });
56 |     /// ```
57 |     fn inspect_batch(&self, func: impl FnMut(&G::Timestamp, &[D])+'static) -> Stream<G, D>;
58 | }
59 | 
60 | impl<G: Scope, D: Data> Inspect<G, D> for Stream<G, D> {
61 | 
62 |     fn inspect_batch(&self, mut func: impl FnMut(&G::Timestamp, &[D])+'static) -> Stream<G, D> {
63 |         let mut vector = Vec::new();
64 |         self.unary(Pipeline, "InspectBatch", move |_,_| move |input, output| {
65 |             input.for_each(|time, data| {
66 |                 data.swap(&mut vector);
67 |                 func(&time, &vector[..]);
68 |                 output.session(&time).give_vec(&mut vector);
69 |             });
70 |         })
71 |     }
72 | }
73 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/map.rs:
--------------------------------------------------------------------------------
 1 | //! Extension methods for `Stream` based on record-by-record transformation.
 2 | 
 3 | use crate::Data;
 4 | use crate::dataflow::{Stream, Scope};
 5 | use crate::dataflow::channels::pact::Pipeline;
 6 | use crate::dataflow::operators::generic::operator::Operator;
 7 | 
 8 | /// Extension trait for `Stream`.
 9 | pub trait Map<S: Scope, D: Data> {
10 |     /// Consumes each element of the stream and yields a new element.
11 |     ///
12 |     /// # Examples
13 |     /// ```
14 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
15 |     ///
16 |     /// timely::example(|scope| {
17 |     ///     (0..10).to_stream(scope)
18 |     ///            .map(|x| x + 1)
19 |     ///            .inspect(|x| println!("seen: {:?}", x));
20 |     /// });
21 |     /// ```
22 |     fn map<D2: Data>(&self, logic: impl Fn(D)->D2+'static) -> Stream<S, D2>;
23 |     /// Updates each element of the stream and yields the element, re-using memory where possible.
24 |     ///
25 |     /// # Examples
26 |     /// ```
27 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
28 |     ///
29 |     /// timely::example(|scope| {
30 |     ///     (0..10).to_stream(scope)
31 |     ///            .map_in_place(|x| *x += 1)
32 |     ///            .inspect(|x| println!("seen: {:?}", x));
33 |     /// });
34 |     /// ```
35 |     fn map_in_place(&self, logic: impl Fn(&mut D)+'static) -> Stream<S, D>;
36 |     /// Consumes each element of the stream and yields some number of new elements.
37 |     ///
38 |     /// # Examples
39 |     /// ```
40 |     /// use timely::dataflow::operators::{ToStream, Map, Inspect};
41 |     ///
42 |     /// timely::example(|scope| {
43 |     ///     (0..10).to_stream(scope)
44 |     ///            .flat_map(|x| (0..x))
45 |     ///            .inspect(|x| println!("seen: {:?}", x));
46 |     /// });
47 |     /// ```
48 |     fn flat_map<I: IntoIterator>(&self, logic: impl Fn(D)->I+'static) -> Stream<S, I::Item> where I::Item: Data;
49 | }
50 | 
51 | impl<S: Scope, D: Data> Map<S, D> for Stream<S, D> {
52 |     fn map<D2: Data>(&self, logic: impl Fn(D)->D2+'static) -> Stream<S, D2> {
53 |         let mut vector = Vec::new();
54 |         self.unary(Pipeline, "Map", move |_,_| move |input, output| {
55 |             input.for_each(|time, data| {
56 |                 data.swap(&mut vector);
57 |                 output.session(&time).give_iterator(vector.drain(..).map(|x| logic(x)));
58 |             });
59 |         })
60 |     }
61 |     fn map_in_place(&self, logic: impl Fn(&mut D)+'static) -> Stream<S, D> {
62 |         let mut vector = Vec::new();
63 |         self.unary(Pipeline, "MapInPlace", move |_,_| move |input, output| {
64 |             input.for_each(|time, data| {
65 |                 data.swap(&mut vector);
66 |                 for datum in vector.iter_mut() { logic(datum); }
67 |                 output.session(&time).give_vec(&mut vector);
68 |             })
69 |         })
70 |     }
71 |     // TODO : This would be more robust if it captured an iterator and then pulled an appropriate
72 |     // TODO : number of elements from the iterator. This would allow iterators that produce many
73 |     // TODO : records without taking arbitrarily long and arbitrarily much memory.
74 |     fn flat_map<I: IntoIterator>(&self, logic: impl Fn(D)->I+'static) -> Stream<S, I::Item> where I::Item: Data {
75 |         let mut vector = Vec::new();
76 |         self.unary(Pipeline, "FlatMap", move |_,_| move |input, output| {
77 |             input.for_each(|time, data| {
78 |                 data.swap(&mut vector);
79 |                 output.session(&time).give_iterator(vector.drain(..).flat_map(|x| logic(x).into_iter()));
80 |             });
81 |         })
82 |     }
83 | }
84 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Extension traits for `Stream` implementing various operators.
 2 | //!
 3 | //! A collection of functions taking typed `Stream` objects as input and producing new `Stream`
 4 | //! objects as output. Many of the operators provide simple, composable functionality. Some of the
 5 | //! operators are more complicated, for use with advanced timely dataflow features.
 6 | //!
 7 | //! The [`Operator`](./generic/operator/index.html) trait provides general
 8 | //! operators whose behavior can be supplied using closures accepting input and output handles.
 9 | //! Most of the operators in this module are defined using these two general operators.
10 | 
11 | pub use self::enterleave::{Enter, EnterAt, Leave};
12 | // pub use self::queue::*;
13 | pub use self::input::Input;
14 | pub use self::unordered_input::UnorderedInput;
15 | pub use self::feedback::{Feedback, LoopVariable, ConnectLoop};
16 | pub use self::concat::{Concat, Concatenate};
17 | pub use self::partition::Partition;
18 | pub use self::map::Map;
19 | pub use self::inspect::Inspect;
20 | pub use self::filter::Filter;
21 | pub use self::delay::Delay;
22 | pub use self::exchange::Exchange;
23 | pub use self::broadcast::Broadcast;
24 | pub use self::probe::Probe;
25 | pub use self::to_stream::ToStream;
26 | pub use self::capture::Capture;
27 | pub use self::branch::{Branch, BranchWhen};
28 | 
29 | pub use self::generic::Operator;
30 | pub use self::generic::{Notificator, FrontierNotificator};
31 | 
32 | pub use self::reclock::Reclock;
33 | pub use self::count::Accumulate;
34 | 
35 | pub mod enterleave;
36 | pub mod input;
37 | pub mod flow_controlled;
38 | pub mod unordered_input;
39 | pub mod feedback;
40 | pub mod concat;
41 | pub mod partition;
42 | pub mod map;
43 | pub mod inspect;
44 | pub mod filter;
45 | pub mod delay;
46 | pub mod exchange;
47 | pub mod broadcast;
48 | pub mod probe;
49 | pub mod to_stream;
50 | pub mod capture;
51 | pub mod branch;
52 | 
53 | pub mod aggregation;
54 | pub mod generic;
55 | 
56 | pub mod reclock;
57 | pub mod count;
58 | 
59 | // keep "mint" module-private
60 | mod capability;
61 | pub use self::capability::{Capability, CapabilityRef, CapabilitySet};
62 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/partition.rs:
--------------------------------------------------------------------------------
 1 | //! Partition a stream of records into multiple streams.
 2 | 
 3 | use crate::dataflow::channels::pact::Pipeline;
 4 | use crate::dataflow::operators::generic::builder_rc::OperatorBuilder;
 5 | use crate::dataflow::{Scope, Stream};
 6 | use crate::Data;
 7 | 
 8 | /// Partition a stream of records into multiple streams.
 9 | pub trait Partition<G: Scope, D: Data, D2: Data, F: Fn(D)->(u64, D2)> {
10 |     /// Produces `parts` output streams, containing records produced and assigned by `route`.
11 |     ///
12 |     /// # Examples
13 |     /// ```
14 |     /// use timely::dataflow::operators::{ToStream, Partition, Inspect};
15 |     ///
16 |     /// timely::example(|scope| {
17 |     ///     let streams = (0..10).to_stream(scope)
18 |     ///                          .partition(3, |x| (x % 3, x));
19 |     ///
20 |     ///     streams[0].inspect(|x| println!("seen 0: {:?}", x));
21 |     ///     streams[1].inspect(|x| println!("seen 1: {:?}", x));
22 |     ///     streams[2].inspect(|x| println!("seen 2: {:?}", x));
23 |     /// });
24 |     /// ```
25 |     fn partition(&self, parts: u64, route: F) -> Vec<Stream<G, D2>>;
26 | }
27 | 
28 | impl<G: Scope, D: Data, D2: Data, F: Fn(D)->(u64, D2)+'static> Partition<G, D, D2, F> for Stream<G, D> {
29 |     fn partition(&self, parts: u64, route: F) -> Vec<Stream<G, D2>> {
30 | 
31 |         let mut builder = OperatorBuilder::new("Partition".to_owned(), self.scope());
32 | 
33 |         let mut input = builder.new_input(self, Pipeline);
34 |         let mut outputs = Vec::new();
35 |         let mut streams = Vec::new();
36 | 
37 |         for _ in 0 .. parts {
38 |             let (output, stream) = builder.new_output();
39 |             outputs.push(output);
40 |             streams.push(stream);
41 |         }
42 | 
43 |         builder.build(move |_| {
44 |             let mut vector = Vec::new();
45 |             move |_frontiers| {
46 |                 let mut handles = outputs.iter_mut().map(|o| o.activate()).collect::<Vec<_>>();
47 |                 input.for_each(|time, data| {
48 |                     data.swap(&mut vector);
49 |                     let mut sessions = handles.iter_mut().map(|h| h.session(&time)).collect::<Vec<_>>();
50 |                     for datum in vector.drain(..) {
51 |                         let (part, datum2) = route(datum);
52 |                         sessions[part as usize].give(datum2);
53 |                     }
54 |                 });
55 |             }
56 |         });
57 | 
58 |         streams
59 |     }
60 | }


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/queue.rs:
--------------------------------------------------------------------------------
 1 | use std::collections::HashMap;
 2 | 
 3 | use Data;
 4 | use dataflow::channels::pact::Pipeline;
 5 | use dataflow::{Stream, Scope};
 6 | use dataflow::operators::unary::Unary;
 7 | 
 8 | pub trait Queue {
 9 |     fn queue(&self) -> Self;
10 | }
11 | 
12 | impl<G: Scope, D: Data> Queue for Stream<G, D> {
13 |     fn queue(&self) -> Stream<G, D> {
14 |         let mut elements = HashMap::new();
15 |         self.unary_notify(Pipeline, "Queue", vec![], move |input, output, notificator| {
16 |             while let Some((time, data)) = input.next() {
17 |                 let set = elements.entry(*time).or_insert_with(Vec::new);
18 |                 for datum in data.drain(..) { set.push(datum); }
19 |                 notificator.notify_at(time);
20 |             }
21 | 
22 |             for (time, _count) in notificator {
23 |                 if let Some(mut data) = elements.remove(&time) {
24 |                     output.session(&time).give_iterator(data.drain(..));
25 |                 }
26 |             }
27 |         })
28 |     }
29 | }
30 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/reclock.rs:
--------------------------------------------------------------------------------
 1 | //! Extension methods for `Stream` based on record-by-record transformation.
 2 | 
 3 | use crate::Data;
 4 | use crate::order::PartialOrder;
 5 | use crate::dataflow::{Stream, Scope};
 6 | use crate::dataflow::channels::pact::Pipeline;
 7 | use crate::dataflow::operators::generic::operator::Operator;
 8 | 
 9 | /// Extension trait for reclocking a stream.
10 | pub trait Reclock<S: Scope, D: Data> {
11 |     /// Delays records until an input is observed on the `clock` input.
12 |     ///
13 |     /// The source stream is buffered until a record is seen on the clock input,
14 |     /// at which point a notification is requested and all data with time less
15 |     /// or equal to the clock time are sent. This method does not ensure that all
16 |     /// workers receive the same clock records, which can be accomplished with
17 |     /// `broadcast`.
18 |     ///
19 |     /// # Examples
20 |     ///
21 |     /// ```
22 |     /// use timely::dataflow::operators::{ToStream, Delay, Map, Reclock, Capture};
23 |     /// use timely::dataflow::operators::capture::Extract;
24 |     ///
25 |     /// let captured = timely::example(|scope| {
26 |     ///
27 |     ///     // produce data 0..10 at times 0..10.
28 |     ///     let data = (0..10).to_stream(scope)
29 |     ///                       .delay(|x,t| *x);
30 |     ///
31 |     ///     // product clock ticks at three times.
32 |     ///     let clock = vec![3, 5, 8].into_iter()
33 |     ///                              .to_stream(scope)
34 |     ///                              .delay(|x,t| *x)
35 |     ///                              .map(|_| ());
36 |     ///
37 |     ///     // reclock the data.
38 |     ///     data.reclock(&clock)
39 |     ///         .capture()
40 |     /// });
41 |     ///
42 |     /// let extracted = captured.extract();
43 |     /// assert_eq!(extracted.len(), 3);
44 |     /// assert_eq!(extracted[0], (3, vec![0,1,2,3]));
45 |     /// assert_eq!(extracted[1], (5, vec![4,5]));
46 |     /// assert_eq!(extracted[2], (8, vec![6,7,8]));
47 |     /// ```
48 |     fn reclock(&self, clock: &Stream<S, ()>) -> Stream<S, D>;
49 | }
50 | 
51 | impl<S: Scope, D: Data> Reclock<S, D> for Stream<S, D> {
52 |     fn reclock(&self, clock: &Stream<S, ()>) -> Stream<S, D> {
53 | 
54 |         let mut stash = vec![];
55 | 
56 |         self.binary_notify(clock, Pipeline, Pipeline, "Reclock", vec![], move |input1, input2, output, notificator| {
57 | 
58 |             // stash each data input with its timestamp.
59 |             input1.for_each(|cap, data| {
60 |                 stash.push((cap.time().clone(), data.replace(Vec::new())));
61 |             });
62 | 
63 |             // request notification at time, to flush stash.
64 |             input2.for_each(|time, _data| {
65 |                 notificator.notify_at(time.retain());
66 |             });
67 | 
68 |             // each time with complete stash can be flushed.
69 |             notificator.for_each(|cap,_,_| {
70 |                 let mut session = output.session(&cap);
71 |                 for &mut (ref t, ref mut data) in &mut stash {
72 |                     if t.less_equal(cap.time()) {
73 |                         session.give_vec(data);
74 |                     }
75 |                 }
76 |                 stash.retain(|x| !x.0.less_equal(cap.time()));
77 |             });
78 |         })
79 |     }
80 | }
81 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/operators/to_stream.rs:
--------------------------------------------------------------------------------
 1 | //! Conversion to the `Stream` type from iterators.
 2 | 
 3 | use crate::progress::Timestamp;
 4 | 
 5 | use crate::Data;
 6 | use crate::dataflow::channels::Message;
 7 | use crate::dataflow::operators::generic::operator::source;
 8 | use crate::dataflow::{Stream, Scope};
 9 | 
10 | /// Converts to a timely `Stream`.
11 | pub trait ToStream<T: Timestamp, D: Data> {
12 |     /// Converts to a timely `Stream`.
13 |     ///
14 |     /// # Examples
15 |     ///
16 |     /// ```
17 |     /// use timely::dataflow::operators::{ToStream, Capture};
18 |     /// use timely::dataflow::operators::capture::Extract;
19 |     ///
20 |     /// let (data1, data2) = timely::example(|scope| {
21 |     ///     let data1 = (0..3).to_stream(scope).capture();
22 |     ///     let data2 = vec![0,1,2].to_stream(scope).capture();
23 |     ///     (data1, data2)
24 |     /// });
25 |     ///
26 |     /// assert_eq!(data1.extract(), data2.extract());
27 |     /// ```
28 |     fn to_stream<S: Scope<Timestamp=T>>(self, scope: &mut S) -> Stream<S, D>;
29 | }
30 | 
31 | impl<T: Timestamp, I: IntoIterator+'static> ToStream<T, I::Item> for I where I::Item: Data {
32 |     fn to_stream<S: Scope<Timestamp=T>>(self, scope: &mut S) -> Stream<S, I::Item> {
33 | 
34 |         source(scope, "ToStream", |capability, info| {
35 | 
36 |             // Acquire an activator, so that the operator can rescheduled itself.
37 |             let activator = scope.activator_for(&info.address[..]);
38 | 
39 |             let mut iterator = self.into_iter().fuse();
40 |             let mut capability = Some(capability);
41 | 
42 |             move |output| {
43 | 
44 |                 if let Some(element) = iterator.next() {
45 |                     let mut session = output.session(capability.as_ref().unwrap());
46 |                     session.give(element);
47 |                     for element in iterator.by_ref().take((256 * Message::<T, I::Item>::default_length()) - 1) {
48 |                         session.give(element);
49 |                     }
50 |                     activator.activate();
51 |                 }
52 |                 else {
53 |                     capability = None;
54 |                 }
55 |             }
56 |         })
57 |     }
58 | }
59 | 


--------------------------------------------------------------------------------
/timely/src/dataflow/stream.rs:
--------------------------------------------------------------------------------
 1 | //! A handle to a typed stream of timely data.
 2 | //!
 3 | //! Most high-level timely dataflow programming is done with streams, which are each a handle to an
 4 | //! operator output. Extension methods on the `Stream` type provide the appearance of higher-level
 5 | //! declarative programming, while constructing a dataflow graph underneath.
 6 | 
 7 | use crate::progress::{Source, Target};
 8 | 
 9 | use crate::communication::Push;
10 | use crate::dataflow::Scope;
11 | use crate::dataflow::channels::pushers::tee::TeeHelper;
12 | use crate::dataflow::channels::Bundle;
13 | 
14 | // use dataflow::scopes::root::loggers::CHANNELS_Q;
15 | 
16 | /// Abstraction of a stream of `D: Data` records timestamped with `S::Timestamp`.
17 | ///
18 | /// Internally `Stream` maintains a list of data recipients who should be presented with data
19 | /// produced by the source of the stream.
20 | #[derive(Clone)]
21 | pub struct Stream<S: Scope, D> {
22 |     /// The progress identifier of the stream's data source.
23 |     name: Source,
24 |     /// The `Scope` containing the stream.
25 |     scope: S,
26 |     /// Maintains a list of Push<Bundle<T, D>> interested in the stream's output.
27 |     ports: TeeHelper<S::Timestamp, D>,
28 | }
29 | 
30 | impl<S: Scope, D> Stream<S, D> {
31 |     /// Connects the stream to a destination.
32 |     ///
33 |     /// The destination is described both by a `Target`, for progress tracking information, and a `P: Push` where the
34 |     /// records should actually be sent. The identifier is unique to the edge and is used only for logging purposes.
35 |     pub fn connect_to<P: Push<Bundle<S::Timestamp, D>>+'static>(&self, target: Target, pusher: P, identifier: usize) {
36 | 
37 |         let mut logging = self.scope().logging();
38 |         logging.as_mut().map(|l| l.log(crate::logging::ChannelsEvent {
39 |             id: identifier,
40 |             scope_addr: self.scope.addr(),
41 |             source: (self.name.node, self.name.port),
42 |             target: (target.node, target.port),
43 |         }));
44 | 
45 |         self.scope.add_edge(self.name, target);
46 |         self.ports.add_pusher(pusher);
47 |     }
48 |     /// Allocates a `Stream` from a supplied `Source` name and rendezvous point.
49 |     pub fn new(source: Source, output: TeeHelper<S::Timestamp, D>, scope: S) -> Self {
50 |         Stream { name: source, ports: output, scope }
51 |     }
52 |     /// The name of the stream's source operator.
53 |     pub fn name(&self) -> &Source { &self.name }
54 |     /// The scope immediately containing the stream.
55 |     pub fn scope(&self) -> S { self.scope.clone() }
56 | }
57 | 


--------------------------------------------------------------------------------
/timely/src/lib.rs:
--------------------------------------------------------------------------------
  1 | //! Timely dataflow is framework for managing and executing data-parallel dataflow computations.
  2 | //!
  3 | //! The code is organized in crates and modules that are meant to depend as little as possible on each other.
  4 | //!
  5 | //! **Serialization**: The [`abomonation`](../abomonation/index.html) crate contains simple and highly unsafe
  6 | //! serialization routines.
  7 | //!
  8 | //! **Communication**: The [`timely_communication`](../timely_communication/index.html) crate defines several primitives for
  9 | //! communicating between dataflow workers, and across machine boundaries.
 10 | //!
 11 | //! **Progress tracking**: The [`timely::progress`](progress/index.html) module defines core dataflow structures for
 12 | //! tracking and reporting progress in a timely dataflow system, namely the number of outstanding
 13 | //! dataflow messages and un-exercised message capabilities throughout the timely dataflow graph.
 14 | //! It depends on `timely_communication` to exchange progress messages.
 15 | //!
 16 | //! **Dataflow construction**: The [`timely::dataflow`](dataflow/index.html) module defines an example dataflow system
 17 | //! using `communication` and `progress` to both exchange data and progress information, in support
 18 | //! of an actual data-parallel timely dataflow computation. It depends on `timely_communication` to
 19 | //! move data, and `timely::progress` to provide correct operator notifications.
 20 | //!
 21 | //! # Examples
 22 | //!
 23 | //! The following is a hello-world dataflow program.
 24 | //!
 25 | //! ```
 26 | //! use timely::*;
 27 | //! use timely::dataflow::operators::{Input, Inspect};
 28 | //!
 29 | //! // construct and execute a timely dataflow
 30 | //! timely::execute_from_args(std::env::args(), |worker| {
 31 | //!
 32 | //!     // add an input and base computation off of it
 33 | //!     let mut input = worker.dataflow(|scope| {
 34 | //!         let (input, stream) = scope.new_input();
 35 | //!         stream.inspect(|x| println!("hello {:?}", x));
 36 | //!         input
 37 | //!     });
 38 | //!
 39 | //!     // introduce input, advance computation
 40 | //!     for round in 0..10 {
 41 | //!         input.send(round);
 42 | //!         input.advance_to(round + 1);
 43 | //!         worker.step();
 44 | //!     }
 45 | //! });
 46 | //! ```
 47 | //!
 48 | //! The program uses `timely::execute_from_args` to spin up a computation based on command line arguments
 49 | //! and a closure specifying what each worker should do, in terms of a handle to a timely dataflow
 50 | //! `Scope` (in this case, `root`). A `Scope` allows you to define inputs, feedback
 51 | //! cycles, and dataflow subgraphs, as part of building the dataflow graph of your dreams.
 52 | //!
 53 | //! In this example, we define a new scope of root using `scoped`, add an exogenous
 54 | //! input using `new_input`, and add a dataflow `inspect` operator to print each observed record.
 55 | //! We then introduce input at increasing rounds, indicate the advance to the system (promising
 56 | //! that we will introduce no more input at prior rounds), and step the computation.
 57 | 
 58 | #![forbid(missing_docs)]
 59 | 
 60 | #[macro_use]
 61 | extern crate abomonation_derive;
 62 | extern crate abomonation;
 63 | extern crate serde;
 64 | #[macro_use]
 65 | extern crate serde_derive;
 66 | extern crate timely_communication;
 67 | extern crate timely_bytes;
 68 | extern crate timely_logging;
 69 | 
 70 | pub use execute::{execute, execute_directly, execute_from_args, example};
 71 | pub use order::PartialOrder;
 72 | 
 73 | pub use timely_communication::Configuration;
 74 | 
 75 | /// Re-export of the `timely_communication` crate.
 76 | pub mod communication {
 77 |     pub use timely_communication::*;
 78 | }
 79 | 
 80 | /// Re-export of the `timely_bytes` crate.
 81 | pub mod bytes {
 82 |     pub use timely_bytes::*;
 83 | }
 84 | 
 85 | /// Re-export of the `timely_logging` crate.
 86 | pub mod logging_core {
 87 |     pub use timely_logging::*;
 88 | }
 89 | 
 90 | pub mod worker;
 91 | pub mod progress;
 92 | pub mod dataflow;
 93 | pub mod synchronization;
 94 | pub mod execute;
 95 | pub mod order;
 96 | 
 97 | pub mod logging;
 98 | // pub mod log_events;
 99 | 
100 | pub mod scheduling;
101 | 
102 | /// A composite trait for types usable as data in timely dataflow.
103 | ///
104 | /// The `Data` trait is necessary for all types that go along timely dataflow channels.
105 | pub trait Data: Clone+'static { }
106 | impl<T: Clone+'static> Data for T { }
107 | 
108 | /// A composite trait for types usable on exchange channels in timely dataflow.
109 | ///
110 | /// The `ExchangeData` trait extends `Data` with any requirements imposed by the `timely_communication`
111 | /// `Data` trait, which describes requirements for communication along channels.
112 | pub trait ExchangeData: Data + communication::Data { }
113 | impl<T: Data + communication::Data> ExchangeData for T { }


--------------------------------------------------------------------------------
/timely/src/progress/broadcast.rs:
--------------------------------------------------------------------------------
  1 | //! Broadcasts progress information among workers.
  2 | 
  3 | use crate::progress::{ChangeBatch, Timestamp};
  4 | use crate::progress::Location;
  5 | use crate::communication::{Message, Push, Pull};
  6 | use crate::logging::TimelyLogger as Logger;
  7 | 
  8 | /// A list of progress updates corresponding to `((child_scope, [in/out]_port, timestamp), delta)`
  9 | pub type ProgressVec<T> = Vec<((Location, T), i64)>;
 10 | /// A progress update message consisting of source worker id, sequence number and lists of
 11 | /// message and internal updates
 12 | pub type ProgressMsg<T> = Message<(usize, usize, ProgressVec<T>)>;
 13 | 
 14 | /// Manages broadcasting of progress updates to and receiving updates from workers.
 15 | pub struct Progcaster<T:Timestamp> {
 16 |     to_push: Option<ProgressMsg<T>>,
 17 |     pushers: Vec<Box<dyn Push<ProgressMsg<T>>>>,
 18 |     puller: Box<dyn Pull<ProgressMsg<T>>>,
 19 |     /// Source worker index
 20 |     source: usize,
 21 |     /// Sequence number counter
 22 |     counter: usize,
 23 |     /// Sequence of nested scope identifiers indicating the path from the root to this subgraph
 24 |     addr: Vec<usize>,
 25 |     /// Communication channel identifier
 26 |     channel_identifier: usize,
 27 | 
 28 |     logging: Option<Logger>,
 29 | }
 30 | 
 31 | impl<T:Timestamp+Send> Progcaster<T> {
 32 |     /// Creates a new `Progcaster` using a channel from the supplied worker.
 33 |     pub fn new<A: crate::worker::AsWorker>(worker: &mut A, path: &Vec<usize>, mut logging: Option<Logger>) -> Progcaster<T> {
 34 | 
 35 |         let channel_identifier = worker.new_identifier();
 36 |         let (pushers, puller) = worker.allocate(channel_identifier, &path[..]);
 37 |         logging.as_mut().map(|l| l.log(crate::logging::CommChannelsEvent {
 38 |             identifier: channel_identifier,
 39 |             kind: crate::logging::CommChannelKind::Progress,
 40 |         }));
 41 |         let worker_index = worker.index();
 42 |         let addr = path.clone();
 43 |         Progcaster {
 44 |             to_push: None,
 45 |             pushers,
 46 |             puller,
 47 |             source: worker_index,
 48 |             counter: 0,
 49 |             addr,
 50 |             channel_identifier,
 51 |             logging,
 52 |         }
 53 |     }
 54 | 
 55 |     /// Sends pointstamp changes to all workers.
 56 |     pub fn send(&mut self, changes: &mut ChangeBatch<(Location, T)>) {
 57 | 
 58 |         changes.compact();
 59 |         if !changes.is_empty() {
 60 | 
 61 |             self.logging.as_ref().map(|l| l.log(crate::logging::ProgressEvent {
 62 |                 is_send: true,
 63 |                 source: self.source,
 64 |                 channel: self.channel_identifier,
 65 |                 seq_no: self.counter,
 66 |                 addr: self.addr.clone(),
 67 |                 // TODO: fill with additional data
 68 |                 messages: Vec::new(),
 69 |                 internal: Vec::new(),
 70 |             }));
 71 | 
 72 |             for pusher in self.pushers.iter_mut() {
 73 | 
 74 |                 // Attempt to re-use allocations, if possible.
 75 |                 if let Some(tuple) = &mut self.to_push {
 76 |                     let tuple = tuple.as_mut();
 77 |                     tuple.0 = self.source;
 78 |                     tuple.1 = self.counter;
 79 |                     tuple.2.clear(); tuple.2.extend(changes.iter().cloned());
 80 |                 }
 81 |                 // If we don't have an allocation ...
 82 |                 if self.to_push.is_none() {
 83 |                     self.to_push = Some(Message::from_typed((
 84 |                         self.source,
 85 |                         self.counter,
 86 |                         changes.clone().into_inner(),
 87 |                     )));
 88 |                 }
 89 | 
 90 |                 // TODO: This should probably use a broadcast channel.
 91 |                 pusher.push(&mut self.to_push);
 92 |                 pusher.done();
 93 |             }
 94 | 
 95 |             self.counter += 1;
 96 |             changes.clear();
 97 |         }
 98 |     }
 99 | 
100 |     /// Receives pointstamp changes from all workers.
101 |     pub fn recv(&mut self, changes: &mut ChangeBatch<(Location, T)>) {
102 | 
103 |         while let Some(message) = self.puller.pull() {
104 | 
105 |             let source = message.0;
106 |             let counter = message.1;
107 |             let recv_changes = &message.2;
108 | 
109 |             let addr = &mut self.addr;
110 |             let channel = self.channel_identifier;
111 |             self.logging.as_ref().map(|l| l.log(crate::logging::ProgressEvent {
112 |                 is_send: false,
113 |                 source: source,
114 |                 seq_no: counter,
115 |                 channel,
116 |                 addr: addr.clone(),
117 |                 // TODO: fill with additional data
118 |                 messages: Vec::new(),
119 |                 internal: Vec::new(),
120 |             }));
121 | 
122 |             // We clone rather than drain to avoid deserialization.
123 |             for &(ref update, delta) in recv_changes.iter() {
124 |                 changes.update(update.clone(), delta);
125 |             }
126 |         }
127 | 
128 |     }
129 | }
130 | 


--------------------------------------------------------------------------------
/timely/src/progress/mod.rs:
--------------------------------------------------------------------------------
  1 | //! Progress tracking mechanisms to support notification in timely dataflow
  2 | 
  3 | pub use self::operate::Operate;
  4 | pub use self::subgraph::{Subgraph, SubgraphBuilder};
  5 | pub use self::timestamp::{Timestamp, PathSummary};
  6 | pub use self::change_batch::ChangeBatch;
  7 | pub use self::frontier::Antichain;
  8 | 
  9 | pub mod change_batch;
 10 | pub mod frontier;
 11 | pub mod timestamp;
 12 | pub mod operate;
 13 | pub mod broadcast;
 14 | pub mod reachability;
 15 | pub mod subgraph;
 16 | 
 17 | /// A timely dataflow location.
 18 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Abomonation, Serialize, Deserialize)]
 19 | pub struct Location {
 20 |     /// A scope-local operator identifier.
 21 |     pub node: usize,
 22 |     /// An operator port identifier.`
 23 |     pub port: Port,
 24 | }
 25 | 
 26 | impl Location {
 27 |     /// Creates a new target location (operator input or scope output).
 28 |     pub fn new_target(node: usize, port: usize) -> Location {
 29 |         Location { node, port: Port::Target(port) }
 30 |     }
 31 |     /// Creates a new source location (operator output or scope input).
 32 |     pub fn new_source(node: usize, port: usize) -> Location {
 33 |         Location { node, port: Port::Source(port) }
 34 |     }
 35 |     /// If the location is a target.
 36 |     pub fn is_target(&self) -> bool { if let Port::Target(_) = self.port { true } else { false } }
 37 |     /// If the location is a source.
 38 |     pub fn is_source(&self) -> bool { if let Port::Source(_) = self.port { true } else { false } }
 39 | }
 40 | 
 41 | impl From<Target> for Location {
 42 |     fn from(target: Target) -> Self {
 43 |         Location {
 44 |             node: target.node,
 45 |             port: Port::Target(target.port),
 46 |         }
 47 |     }
 48 | }
 49 | 
 50 | impl From<Source> for Location {
 51 |     fn from(source: Source) -> Self {
 52 |         Location {
 53 |             node: source.node,
 54 |             port: Port::Source(source.port),
 55 |         }
 56 |     }
 57 | }
 58 | 
 59 | /// An operator port.
 60 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash, Debug, Abomonation, Serialize, Deserialize)]
 61 | pub enum Port {
 62 |     /// An operator input.
 63 |     Target(usize),
 64 |     /// An operator output.
 65 |     Source(usize),
 66 | }
 67 | 
 68 | /// Names a source of a data stream.
 69 | ///
 70 | /// A source of data is either a child output, or an input from a parent.
 71 | /// Conventionally, `index` zero is used for parent input.
 72 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
 73 | pub struct Source {
 74 |     /// Index of the source operator.
 75 |     pub node: usize,
 76 |     /// Number of the output port from the operator.
 77 |     pub port: usize,
 78 | }
 79 | 
 80 | impl Source {
 81 |     /// Creates a new source from node and port identifiers.
 82 |     pub fn new(node: usize, port: usize) -> Self {
 83 |         Self { node, port }
 84 |     }
 85 | }
 86 | 
 87 | /// Names a target of a data stream.
 88 | ///
 89 | /// A target of data is either a child input, or an output to a parent.
 90 | /// Conventionally, `index` zero is used for parent output.
 91 | #[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
 92 | pub struct Target {
 93 |     /// Index of the target operator.
 94 |     pub node: usize,
 95 |     /// Number of the input port to the operator.
 96 |     pub port: usize,
 97 | }
 98 | 
 99 | impl Target {
100 |     /// Creates a new target from node and port identifiers.
101 |     pub fn new(node: usize, port: usize) -> Self {
102 |         Self { node, port }
103 |     }
104 | }
105 | 


--------------------------------------------------------------------------------
/timely/src/progress/operate.rs:
--------------------------------------------------------------------------------
 1 | //! Methods which describe an operators topology, and the progress it makes.
 2 | 
 3 | use std::rc::Rc;
 4 | use std::cell::RefCell;
 5 | 
 6 | use crate::scheduling::Schedule;
 7 | use crate::progress::{Timestamp, ChangeBatch, Antichain};
 8 | 
 9 | /// Methods for describing an operators topology, and the progress it makes.
10 | pub trait Operate<T: Timestamp> : Schedule {
11 | 
12 |     /// Indicates if the operator is strictly local to this worker.
13 |     ///
14 |     /// A parent scope must understand whether the progress information returned by the worker
15 |     /// reflects only this worker's progress, so that it knows whether to send and receive the
16 |     /// corresponding progress messages to its peers. If the operator is strictly local, it must
17 |     /// exchange this information, whereas if the operator is itself implemented by the same set
18 |     /// of workers, the parent scope understands that progress information already reflects the
19 |     /// aggregate information among the workers.
20 |     ///
21 |     /// This is a coarse approximation to refined worker sets. In a future better world, operators
22 |     /// would explain how their implementations are partitioned, so that a parent scope knows what
23 |     /// progress information to exchange with which peers. Right now the two choices are either
24 |     /// "all" or "none", but it could be more detailed. In the more detailed case, this method
25 |     /// should / could return a pair (index, peers), indicating the group id of the worker out of
26 |     /// how many groups. This becomes complicated, as a full all-to-all exchange would result in
27 |     /// multiple copies of the same progress messages (but aggregated variously) arriving at
28 |     /// arbitrary times.
29 |     fn local(&self) -> bool { true }
30 | 
31 |     /// The number of inputs.
32 |     fn inputs(&self) -> usize;
33 |     /// The number of outputs.
34 |     fn outputs(&self) -> usize;
35 | 
36 |     /// Fetches summary information about internal structure of the operator.
37 |     ///
38 |     /// Each operator must summarize its internal structure by a map from pairs `(input, output)`
39 |     /// to an antichain of timestamp summaries, indicating how a timestamp on any of its inputs may
40 |     /// be transformed to timestamps on any of its outputs.
41 |     ///
42 |     /// Each operator must also indicate whether it initially holds any capabilities on any of its
43 |     /// outputs, so that the parent operator can properly initialize its progress information.
44 |     ///
45 |     /// The default behavior is to indicate that timestamps on any input can emerge unchanged on
46 |     /// any output, and no initial capabilities are held.
47 |     fn get_internal_summary(&mut self) -> (Vec<Vec<Antichain<T::Summary>>>, Rc<RefCell<SharedProgress<T>>>);
48 | 
49 |     /// Signals that external frontiers have been set.
50 |     ///
51 |     /// By default this method does nothing, and leaves all changes in the `frontiers` element
52 |     /// of the shared progress state. An operator should be able to consult `frontiers` at any
53 |     /// point and read out the current frontier information, or the changes from the last time
54 |     /// that `frontiers` was drained.
55 |     fn set_external_summary(&mut self) { }
56 | 
57 |     /// Indicates of whether the operator requires `push_external_progress` information or not.
58 |     fn notify_me(&self) -> bool { true }
59 | }
60 | 
61 | /// Progress information shared between parent and child.
62 | #[derive(Debug)]
63 | pub struct SharedProgress<T: Timestamp> {
64 |     /// Frontier capability changes reported by the parent scope.
65 |     pub frontiers: Vec<ChangeBatch<T>>,
66 |     /// Consumed message changes reported by the child operator.
67 |     pub consumeds: Vec<ChangeBatch<T>>,
68 |     /// Internal capability changes reported by the child operator.
69 |     pub internals: Vec<ChangeBatch<T>>,
70 |     /// Produced message changes reported by the child operator.
71 |     pub produceds: Vec<ChangeBatch<T>>,
72 | }
73 | 
74 | impl<T: Timestamp> SharedProgress<T> {
75 |     /// Allocates a new shared progress structure.
76 |     pub fn new(inputs: usize, outputs: usize) -> Self {
77 |         SharedProgress {
78 |             frontiers: vec![ChangeBatch::new(); inputs],
79 |             consumeds: vec![ChangeBatch::new(); inputs],
80 |             internals: vec![ChangeBatch::new(); outputs],
81 |             produceds: vec![ChangeBatch::new(); outputs],
82 |         }
83 |     }
84 | }
85 | 


--------------------------------------------------------------------------------
/timely/src/scheduling/mod.rs:
--------------------------------------------------------------------------------
 1 | //! Types and traits to activate and schedule fibers.
 2 | 
 3 | use std::rc::Rc;
 4 | use std::cell::RefCell;
 5 | 
 6 | pub mod activate;
 7 | 
 8 | pub use self::activate::{Activations, Activator, ActivateOnDrop, SyncActivator};
 9 | 
10 | /// A type that can be scheduled.
11 | pub trait Schedule {
12 |     /// A descriptive name for the operator
13 |     fn name(&self) -> &str;
14 |     /// An address identifying the operator.
15 |     fn path(&self) -> &[usize];
16 |     /// Schedules the operator, receives "cannot terminate" boolean.
17 |     ///
18 |     /// The return value indicates whether `self` has outstanding
19 |     /// work and would be upset if the computation terminated.
20 |     fn schedule(&mut self) -> bool;
21 | }
22 | 
23 | /// Methods for types which schedule fibers.
24 | pub trait Scheduler {
25 |     /// Provides a shared handle to the activation scheduler.
26 |     fn activations(&self) -> Rc<RefCell<Activations>>;
27 |     /// Constructs an `Activator` tied to the specified operator address.
28 |     fn activator_for(&self, path: &[usize]) -> Activator {
29 |         let activations = self.activations().clone();
30 |         Activator::new(path, activations)
31 |     }
32 |     /// Constructs a `SyncActivator` tied to the specified operator address.
33 |     fn sync_activator_for(&self, path: &[usize]) -> SyncActivator {
34 |         let sync_activations = self.activations().borrow().sync();
35 |         SyncActivator::new(path, sync_activations)
36 |     }
37 | }


--------------------------------------------------------------------------------
/timely/src/synchronization/barrier.rs:
--------------------------------------------------------------------------------
 1 | //! Barrier synchronization.
 2 | 
 3 | use crate::communication::Allocate;
 4 | use crate::dataflow::{InputHandle, ProbeHandle};
 5 | use crate::worker::Worker;
 6 | 
 7 | /// A re-usable barrier synchronization mechanism.
 8 | pub struct Barrier<A: Allocate> {
 9 |     input: InputHandle<usize, ()>,
10 |     probe: ProbeHandle<usize>,
11 |     worker: Worker<A>,
12 | }
13 | 
14 | impl<A: Allocate> Barrier<A> {
15 | 
16 |     /// Allocates a new barrier.
17 |     pub fn new(worker: &mut Worker<A>) -> Self {
18 |         use crate::dataflow::operators::{Input, Probe};
19 |         let (input, probe) = worker.dataflow(|scope| {
20 |             let (handle, stream) = scope.new_input::<()>();
21 |             (handle, stream.probe())
22 |         });
23 |         Barrier { input, probe, worker: worker.clone() }
24 |     }
25 | 
26 |     /// Blocks until all other workers have reached this barrier.
27 |     ///
28 |     /// This method does *not* block dataflow execution, which continues
29 |     /// to execute while we await the arrival of the other workers.
30 |     pub fn wait(&mut self) {
31 |         self.advance();
32 |         while !self.reached() {
33 |             self.worker.step();
34 |         }
35 |     }
36 | 
37 |     /// Advances this worker to the next barrier stage.
38 |     ///
39 |     /// This change is not communicated until `worker.step()` is called.
40 |     #[inline]
41 |     pub fn advance(&mut self) {
42 |         let round = *self.input.time();
43 |         self.input.advance_to(round + 1);
44 |     }
45 | 
46 |     /// Indicates that the barrier has been reached by all workers.
47 |     ///
48 |     /// This method may not change until `worker.step()` is called.
49 |     #[inline]
50 |     pub fn reached(&mut self) -> bool {
51 |         !self.probe.less_than(self.input.time())
52 |     }
53 | }
54 | 
55 | 


--------------------------------------------------------------------------------
/timely/src/synchronization/mod.rs:
--------------------------------------------------------------------------------
1 | //! Synchronization primitives implemented in timely dataflow.
2 | 
3 | pub mod barrier;
4 | pub mod sequence;
5 | 
6 | pub use self::barrier::Barrier;
7 | pub use self::sequence::Sequencer;


--------------------------------------------------------------------------------
/timely/tests/barrier.rs:
--------------------------------------------------------------------------------
 1 | extern crate timely;
 2 | 
 3 | use timely::Configuration;
 4 | use timely::dataflow::channels::pact::Pipeline;
 5 | use timely::dataflow::operators::{Feedback, ConnectLoop};
 6 | use timely::dataflow::operators::generic::operator::Operator;
 7 | 
 8 | #[test] fn barrier_sync_1w() { barrier_sync_helper(Configuration::Thread); }
 9 | #[test] fn barrier_sync_2w() { barrier_sync_helper(Configuration::Process(2)); }
10 | #[test] fn barrier_sync_3w() { barrier_sync_helper(Configuration::Process(3)); }
11 | 
12 | // This method asserts that each round of execution is notified of at most one time.
13 | fn barrier_sync_helper(config: ::timely::Configuration) {
14 |     timely::execute(config, move |worker| {
15 |         worker.dataflow(move |scope| {
16 |             let (handle, stream) = scope.feedback::<u64>(1);
17 |             stream.unary_notify(
18 |                 Pipeline,
19 |                 "Barrier",
20 |                 vec![0, 1],
21 |                 move |_, _, notificator| {
22 |                     let mut count = 0;
23 |                     while let Some((cap, _count)) = notificator.next() {
24 |                         count += 1;
25 |                         let time = *cap.time() + 1;
26 |                         if time < 100 {
27 |                             notificator.notify_at(cap.delayed(&time));
28 |                         }
29 |                     }
30 |                     assert!(count <= 1);
31 |                 }
32 |             )
33 |             .connect_loop(handle);
34 |         });
35 |     }).unwrap(); // asserts error-free execution;
36 | }
37 | 


--------------------------------------------------------------------------------
/timely/tests/skeptic.rs:
--------------------------------------------------------------------------------
1 | //include!(concat!(env!("OUT_DIR"), "/skeptic-tests.rs"));
2 | 


--------------------------------------------------------------------------------