├── cedar
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── timer.py
    ├── client
    │   ├── __init__.py
    │   ├── constants.py
    │   └── logger.py
    ├── compose
    │   ├── __init__.py
    │   └── constants.py
    ├── pipes
    │   ├── custom
    │   │   ├── simclrv2_pytorch.py
    │   │   ├── wikitext103.py
    │   │   ├── wikitext103_tf_service.py
    │   │   ├── coco.py
    │   │   ├── simclrv2.py
    │   │   └── commonvoice.py
    │   ├── tf.py
    │   ├── optimize
    │   │   ├── __init__.py
    │   │   ├── registry.py
    │   │   └── noop.py
    │   └── __init__.py
    ├── sources
    │   └── __init__.py
    ├── service
    │   ├── task.py
    │   ├── __init__.py
    │   ├── multiprocess.py
    │   ├── actor.py
    │   └── multithread.py
    └── config.py
├── tests
    ├── __init__.py
    ├── .gitignore
    └── data
    │   ├── test_text.txt
    │   ├── test_text_2.txt
    │   ├── .gitignore
    │   ├── test_tf_string.txt
    │   ├── example_image.jpeg
    │   ├── images
    │       ├── image_1.jpg
    │       ├── image_2.jpg
    │       ├── image_3.jpg
    │       ├── image_4.jpg
    │       ├── image_5.jpg
    │       ├── image_6.jpg
    │       ├── image_7.jpg
    │       ├── image_8.jpg
    │       ├── image_9.jpg
    │       ├── image_10.jpg
    │       └── image_11.jpg
    │   ├── t10k-images-idx3-ubyte.gz
    │   ├── t10k-labels-idx1-ubyte.gz
    │   ├── test_text_3.txt
    │   ├── test_profile_stats.yml
    │   ├── config_tf_string.yml
    │   ├── test_optimizer_stats.yml
    │   ├── config_fuse_reorder_tf.yml
    │   ├── test_full_optimizer_stats.yml
    │   ├── config_ref.yml
    │   ├── config_ref_variant.yml
    │   ├── test_cache_optimizer_stats_expensive_io.yml
    │   ├── test_cache_optimizer_stats.yml
    │   ├── config_ref_prefetch.yml
    │   ├── config_ref_mp.yml
    │   ├── config_fuse_ray.yml
    │   └── insert_config_ref.yml
├── evaluation
    ├── __init__.py
    ├── .gitignore
    ├── pipelines
    │   ├── __init__.py
    │   ├── coco
    │   │   ├── __init__.py
    │   │   ├── download.sh
    │   │   └── configs
    │   │   │   ├── ablation_p.yml
    │   │   │   ├── ablation_pr.yml
    │   │   │   ├── ablation_baseline.yml
    │   │   │   ├── cedar_local_plan.yml
    │   │   │   ├── ablation_tf_p.yml
    │   │   │   ├── ablation_tf_pr.yml
    │   │   │   ├── ablation_tf_pro.yml
    │   │   │   ├── ablation_tf_baseline.yml
    │   │   │   ├── ablation_pro.yml
    │   │   │   ├── cedar_tf_local_plan.yml
    │   │   │   ├── cedar_tf_remote_plan.yml
    │   │   │   └── cedar_remote_plan.yml
    │   ├── simclrv2
    │   │   ├── __init__.py
    │   │   ├── download.py
    │   │   ├── configs
    │   │   │   ├── ablation_tf_p.yaml
    │   │   │   ├── ablation_tf_p_r.yaml
    │   │   │   ├── ablation_tf_p_r_o.yaml
    │   │   │   ├── eval_controller_local.yaml
    │   │   │   ├── ablation_p.yaml
    │   │   │   ├── ablation_p_r.yaml
    │   │   │   ├── ablation_baseline.yaml
    │   │   │   ├── eval_ember_remote.yaml
    │   │   │   ├── eval_controller_remote.yaml
    │   │   │   ├── eval_ember_local.yaml
    │   │   │   ├── eval_ember_remote_tf.yaml
    │   │   │   ├── ablation_tf_baseline.yaml
    │   │   │   ├── ablation_p_r_o.yaml
    │   │   │   └── eval_ember_local_tf.yaml
    │   │   ├── cache_results
    │   │   │   └── configs
    │   │   │   │   ├── no_cache_plan.yml
    │   │   │   │   ├── cache_after_grayscale.yml
    │   │   │   │   ├── new_simclrv2_optimized_plan.yml
    │   │   │   │   ├── cache_after_list.yml
    │   │   │   │   └── cache_after_read.yml
    │   │   ├── torch_dataset.py
    │   │   ├── ray_dataset.py
    │   │   └── tf_dataset.py
    │   ├── wikitext103
    │   │   ├── configs
    │   │   │   ├── ablation_tf_service_p.yaml
    │   │   │   ├── ablation_tf_service_p_r.yaml
    │   │   │   ├── ablation_tf_service_baseline.yaml
    │   │   │   ├── eval_local_tf_service.yaml
    │   │   │   ├── eval_remote_tf_service.yaml
    │   │   │   ├── ablation_tf_p.yaml
    │   │   │   ├── ablation_tf_p_r.yaml
    │   │   │   ├── ablation_tf_baseline.yaml
    │   │   │   ├── ablation_tf_service_p_r_o.yaml
    │   │   │   ├── eval_local_tf.yaml
    │   │   │   ├── eval_remote_tf.yaml
    │   │   │   ├── ablation_tf_p_r_o.yaml
    │   │   │   ├── ablation_p.yaml
    │   │   │   ├── ablation_p_r.yaml
    │   │   │   ├── ablation_baseline.yaml
    │   │   │   ├── eval_remote.yaml
    │   │   │   ├── eval_local.yaml
    │   │   │   └── ablation_p_r_o.yaml
    │   │   ├── cache_results
    │   │   │   └── configs
    │   │   │   │   ├── wikitext_no_caching_plan.yml
    │   │   │   │   ├── wikitext_cache_after_truncate.yml
    │   │   │   │   ├── new_wikitext_optimal_cache_plan.yml
    │   │   │   │   ├── wikitext_cache_after_tensor_conversion.yml
    │   │   │   │   ├── wikitext_cache_after_tokenizer_one_offload.yml
    │   │   │   │   ├── wikitext_cache_after_truncate_one_offload.yml
    │   │   │   │   ├── wikitext_cache_after_tokenizer_two_offloads.yml
    │   │   │   │   └── wikitext_cache_after_truncate_two_offloads.yml
    │   │   ├── download.py
    │   │   ├── tf_dataset.py
    │   │   ├── torch_dataset.py
    │   │   └── tf_service_dataset.py
    │   └── commonvoice
    │   │   ├── configs
    │   │       ├── eval_remote.yaml
    │   │       ├── eval_local.yaml
    │   │       ├── ablation_p.yaml
    │   │       ├── ablation_p_r.yaml
    │   │       ├── ablation_baseline.yaml
    │   │       └── ablation_p_r_o.yaml
    │   │   ├── download.py
    │   │   ├── torch_dataset.py
    │   │   └── tf_dataset.py
    ├── fastflow
    │   └── examples
    │   │   ├── .gitignore
    │   │   ├── default_config.yaml
    │   │   ├── requirements_common.txt
    │   │   ├── config.yaml
    │   │   ├── requirements_cuda10.txt
    │   │   ├── requirements_cuda11.txt
    │   │   ├── run_fastflow.sh
    │   │   ├── test_app.py
    │   │   ├── nlp_hf_app.py
    │   │   ├── nlp_app.py
    │   │   └── simclr_app.py
    ├── plumber
    │   ├── coco
    │   │   ├── .gitignore
    │   │   ├── run_plumber.sh
    │   │   ├── dataset_flags.py
    │   │   └── graph_rewrites.py
    │   └── simclr
    │   │   ├── .gitignore
    │   │   ├── run_plumber.sh
    │   │   ├── dataset_flags.py
    │   │   ├── graph_rewrites.py
    │   │   └── show_bneck.py
    ├── datasets
    │   └── .gitignore
    ├── tf_requirements.txt
    ├── run_tf_service.sh
    ├── run_torch.sh
    ├── run_ray_remote.sh
    ├── run_ray_local.sh
    ├── torch_utils.py
    ├── run_tf.sh
    ├── tf_utils.py
    ├── run_autotuning.sh
    ├── run_tf_service.py
    ├── plots
    │   ├── ablation.csv
    │   ├── aggregate_data.csv
    │   ├── plot_ablation.py
    │   └── plot_scaling.py
    ├── run_caching.sh
    ├── cedar_utils.py
    ├── run_cedar_local.sh
    └── run_cedar_remote.sh
├── setup.py
├── requirements.txt
└── LICENSE


/cedar/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/cedar/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/.gitignore:
--------------------------------------------------------------------------------
1 | *.png


--------------------------------------------------------------------------------
/evaluation/.gitignore:
--------------------------------------------------------------------------------
1 | tf_env/


--------------------------------------------------------------------------------
/evaluation/pipelines/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/data/test_text.txt:
--------------------------------------------------------------------------------
1 | hello
2 | world
3 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | 


--------------------------------------------------------------------------------
/tests/data/test_text_2.txt:
--------------------------------------------------------------------------------
1 | hello
2 | world
3 | this
4 | is
5 | ember
6 | speaking
7 | !
8 | 


--------------------------------------------------------------------------------
/tests/data/.gitignore:
--------------------------------------------------------------------------------
1 | results.json
2 | config_output.yml
3 | insert_config_output.yml
4 | *.png


--------------------------------------------------------------------------------
/evaluation/plumber/coco/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | logdir/
3 | *.pb
4 | *.txt
5 | *.pdf
6 | *.dot
7 | *.ps


--------------------------------------------------------------------------------
/tests/data/test_tf_string.txt:
--------------------------------------------------------------------------------
1 | HELLO
2 | WORLD
3 | HELLO
4 | WORLD
5 | HELLO
6 | WORLD
7 | HELLO
8 | WORLD


--------------------------------------------------------------------------------
/tests/data/example_image.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/example_image.jpeg


--------------------------------------------------------------------------------
/tests/data/images/image_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_1.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_2.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_3.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_4.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_5.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_6.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_7.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_8.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_9.jpg


--------------------------------------------------------------------------------
/evaluation/plumber/simclr/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__/
2 | logdir/
3 | *.csv
4 | *.pb
5 | *.txt
6 | *.pdf
7 | *.dot
8 | *.ps


--------------------------------------------------------------------------------
/tests/data/images/image_10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_10.jpg


--------------------------------------------------------------------------------
/tests/data/images/image_11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/images/image_11.jpg


--------------------------------------------------------------------------------
/evaluation/datasets/.gitignore:
--------------------------------------------------------------------------------
1 | # Ignore everything in datasets dir.
2 | # Use scripts to fetch datasets
3 | *
4 | !.gitignore


--------------------------------------------------------------------------------
/tests/data/t10k-images-idx3-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/t10k-images-idx3-ubyte.gz


--------------------------------------------------------------------------------
/tests/data/t10k-labels-idx1-ubyte.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/stanford-mast/cedar/HEAD/tests/data/t10k-labels-idx1-ubyte.gz


--------------------------------------------------------------------------------
/evaluation/plumber/coco/run_plumber.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python tf_dataset.py --profile && python graph_rewrites.py --skip_baseline=False


--------------------------------------------------------------------------------
/evaluation/plumber/simclr/run_plumber.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | python tf_dataset.py --profile && python graph_rewrites.py --skip_baseline=False


--------------------------------------------------------------------------------
/tests/data/test_text_3.txt:
--------------------------------------------------------------------------------
 1 | hello
 2 | world
 3 | this
 4 | is
 5 | ember
 6 | speaking
 7 | !
 8 | 1
 9 | 2
10 | 3
11 | 4
12 | 5
13 | 


--------------------------------------------------------------------------------
/cedar/client/__init__.py:
--------------------------------------------------------------------------------
1 | from cedar.client.dataset import DataSet
2 | 
3 | __all__ = ["DataSet"]
4 | 
5 | assert __all__ == sorted(__all__)
6 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/default_config.yaml:
--------------------------------------------------------------------------------
1 | dispatcher_addr: 0.0.0.0
2 | parallel: 0
3 | num_profile_steps: 100
4 | num_initial_steps: 10
5 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | from setuptools import setup, find_packages
2 | 
3 | 
4 | setup(
5 |     name="cedar",
6 |     version="0.0.1",
7 |     packages=find_packages(),
8 | )
9 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/requirements_common.txt:
--------------------------------------------------------------------------------
1 | # Common
2 | matplotlib==3.5.3
3 | pandas
4 | tensorflow_datasets
5 | tensorflow_io
6 | tensorflow_addons
7 | imgaug
8 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/config.yaml:
--------------------------------------------------------------------------------
1 | dispatcher_addr: 10.138.0.14
2 | dispatcher_port: 5000 # dispatcher port 
3 | num_profile_steps: 100 # number of profiling steps
4 | num_initial_steps: 10 # number of initial steps to skip metric profiling


--------------------------------------------------------------------------------
/cedar/compose/__init__.py:
--------------------------------------------------------------------------------
1 | from cedar.compose.feature import Feature
2 | from cedar.compose.optimizer import OptimizerOptions, PhysicalPlan
3 | 
4 | __all__ = ["Feature", "OptimizerOptions", "PhysicalPlan"]
5 | 
6 | assert __all__ == sorted(__all__)
7 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/requirements_cuda10.txt:
--------------------------------------------------------------------------------
1 | -r requirements_common.txt
2 | 
3 | # Dependencies to install DALI for CUDA 10.2
4 | --extra-index-url https://developer.download.nvidia.com/compute/redist
5 | nvidia-dali-cuda102
6 | nvidia-dali-tf-plugin-cuda102
7 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/requirements_cuda11.txt:
--------------------------------------------------------------------------------
1 | -r requirements_common.txt
2 | 
3 | # Dependencies to install DALI for CUDA 11.0
4 | --extra-index-url https://developer.download.nvidia.com/compute/redist
5 | nvidia-dali-cuda110
6 | nvidia-dali-tf-plugin-cuda110
7 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/run_fastflow.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #cv-tf
4 | python eval_app_runner.py simclr_app.py simclr ff config.yaml
5 | #nlp-tf
6 | python eval_app_runner.py nlp_app.py nlp ff config.yaml
7 | #ssd-tf
8 | python eval_app_runner.py coco_app.py coco ff config.yaml


--------------------------------------------------------------------------------
/evaluation/tf_requirements.txt:
--------------------------------------------------------------------------------
 1 | tensorflow==2.14.0
 2 | tensorflow-addons[tensorflow]
 3 | pyyaml
 4 | ray[default]==2.7.0
 5 | Pympler
 6 | 
 7 | --extra-index-url https://download.pytorch.org/whl/cpu
 8 | torch==2.0.1
 9 | torchvision==0.15.2
10 | torchaudio==2.0.2
11 | torchdata
12 | 


--------------------------------------------------------------------------------
/tests/data/test_profile_stats.yml:
--------------------------------------------------------------------------------
 1 | baseline:
 2 |   input_sizes:
 3 |     0: 100
 4 |     1: 100
 5 |     2: 10
 6 |     3: 0
 7 |   latencies:
 8 |     0: 1
 9 |     1: 1
10 |     2: 1
11 |     3: 1
12 |   output_sizes:
13 |     0: 10
14 |     1: 100
15 |     2: 100
16 |     3: 10
17 |   throughput: 10
18 | 


--------------------------------------------------------------------------------
/tests/data/config_tf_string.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     1: ''
 4 |   pipes:
 5 |     0:
 6 |       name: MapperPipe_string_lower
 7 |       variant: TF
 8 |     1:
 9 |       name: TFLocalLinePipe
10 |       variant: TF
11 |       variant_ctx:
12 |         num_parallel_calls: -1
13 |       fused_pipes:
14 |         - 0


--------------------------------------------------------------------------------
/cedar/pipes/custom/simclrv2_pytorch.py:
--------------------------------------------------------------------------------
 1 | from torchvision.io import ImageReadMode, read_image
 2 | import torch
 3 | 
 4 | IMG_HEIGHT = 244
 5 | IMG_WIDTH = 244
 6 | GAUSSIAN_BLUR_KERNEL_SIZE = 11
 7 | 
 8 | 
 9 | def read_image_pytorch(x):
10 |     return read_image(x, mode=ImageReadMode.RGB)
11 | 
12 | 
13 | def to_float(x):
14 |     return x.to(torch.float32)
15 | 


--------------------------------------------------------------------------------
/cedar/pipes/tf.py:
--------------------------------------------------------------------------------
 1 | from typing import Union, List
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class TFTensorDontCare:
 7 |     pass
 8 | 
 9 | 
10 | class TFOutputHint:
11 |     def __init__(
12 |         self,
13 |         shape: Union[List, TFTensorDontCare],
14 |         dtype: Union[tf.dtypes.DType, TFTensorDontCare],
15 |     ):
16 |         self.shape = shape
17 |         self.dtype = dtype
18 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | torch==2.0.1
 2 | torchvision
 3 | torchaudio
 4 | torchdata
 5 | torchtext
 6 | pytest
 7 | google-cloud-storage
 8 | google-api-python-client
 9 | pytest-mock
10 | transformers
11 | pyyaml
12 | h5py
13 | pandas
14 | responses
15 | ray[default]==2.7.0
16 | Pympler
17 | tensorflow==2.14.0
18 | tensorflow-addons[tensorflow]
19 | tensorflow-text==2.14.0
20 | keras-nlp
21 | librosa
22 | pyarrow==13.0.0
23 | numpy==1.26.0


--------------------------------------------------------------------------------
/tests/data/test_optimizer_stats.yml:
--------------------------------------------------------------------------------
 1 | baseline:
 2 |   input_sizes:
 3 |     0: 32.0
 4 |     1: 32.0
 5 |     2: 32.0
 6 |     3: 32.0
 7 |     4: 0.0
 8 |   latencies:
 9 |     0: 6928.421568627452
10 |     1: 16207.529411764706
11 |     2: 15714.558823529413
12 |     3: 16007.382352941177
13 |     4: 4067.4411764705883
14 |   output_sizes:
15 |     0: 32.0
16 |     1: 32.0
17 |     2: 32.0
18 |     3: 32.0
19 |     4: 32.0
20 |   throughput: 11503.603334229632
21 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/download.sh:
--------------------------------------------------------------------------------
 1 | #! /bin/bash
 2 | 
 3 | mkdir ~/cedar/evaluation/datasets/coco
 4 | 
 5 | wget http://images.cocodataset.org/zips/val2017.zip
 6 | unzip val2017.zip
 7 | mv val2017/ ~/cedar/evaluation/datasets/coco
 8 | 
 9 | wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
10 | 
11 | unzip annotations_trainval2017.zip
12 | mv annotations/ ~/cedar/evaluation/datasets/coco
13 | 
14 | rm val2017.zip
15 | rm annotations_trainval2017.zip
16 | 


--------------------------------------------------------------------------------
/evaluation/run_tf_service.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | 
3 | #cv-tf
4 | python eval_tf.py --dataset_file pipelines/simclrv2/tf_dataset.py --service_addr 10.138.0.90:38655 --num_parallel_calls -1
5 | #nlp-tf
6 | python eval_tf.py --dataset_file pipelines/wikitext103/tf_service_dataset.py --service_addr 10.138.0.90:38655 --num_parallel_calls -1 --num_total_samples 200000
7 | #ssd-tf
8 | python eval_tf.py --dataset_file pipelines/coco/tf_service_dataset.py --service_addr 10.138.0.90:38655 --num_parallel_calls -1


--------------------------------------------------------------------------------
/evaluation/run_torch.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cv-torch
 4 | python eval_torch.py --dataset_file pipelines/simclrv2/torch_dataset.py --num_workers 8
 5 | #nlp-torch
 6 | python eval_torch.py --dataset_file pipelines/wikitext103/torch_dataset.py --num_workers 8 --num_total_samples 100000
 7 | #asr
 8 | python eval_torch.py --dataset_file pipelines/commonvoice/torch_dataset.py --num_workers 8 --num_total_samples 10000
 9 | # ssd
10 | python eval_torch.py --dataset_file pipelines/coco/torch_dataset.py --num_workers 8


--------------------------------------------------------------------------------
/evaluation/plumber/coco/dataset_flags.py:
--------------------------------------------------------------------------------
 1 | from absl import app
 2 | from absl import flags
 3 | 
 4 | FLAGS = flags.FLAGS
 5 | 
 6 | flags.DEFINE_integer(
 7 |     'benchmark_num_elements', default=1000,
 8 |     help=('The number of elements to use for the benchmark'))
 9 | 
10 | flags.DEFINE_integer(
11 |     'dataset_threadpool_size', default=8,
12 |     help=('The size of the private datapool size in dataset.'))
13 | 
14 | flags.DEFINE_bool(
15 |     'map_and_batch_fusion', default=True,
16 |     help=('tf.data options'))
17 | 


--------------------------------------------------------------------------------
/evaluation/plumber/simclr/dataset_flags.py:
--------------------------------------------------------------------------------
 1 | from absl import app
 2 | from absl import flags
 3 | 
 4 | FLAGS = flags.FLAGS
 5 | 
 6 | flags.DEFINE_integer(
 7 |     'benchmark_num_elements', default=1000,
 8 |     help=('The number of elements to use for the benchmark'))
 9 | 
10 | flags.DEFINE_integer(
11 |     'dataset_threadpool_size', default=8,
12 |     help=('The size of the private datapool size in dataset.'))
13 | 
14 | flags.DEFINE_bool(
15 |     'map_and_batch_fusion', default=True,
16 |     help=('tf.data options'))
17 | 


--------------------------------------------------------------------------------
/evaluation/run_ray_remote.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | #cv-torch
 3 | python pipelines/simclrv2/ray_dataset.py
 4 | #cv-tf
 5 | python pipelines/simclrv2/ray_tf_dataset.py
 6 | #nlp-torch
 7 | python pipelines/wikitext103/ray_dataset.py
 8 | #nlp-hf-tf
 9 | python pipelines/wikitext103/ray_tf_dataset.py
10 | #nlp-tf
11 | python pipelines/wikitext103/ray_tf_service_dataset.py
12 | #asr
13 | python pipelines/commonvoice/ray_dataset.py
14 | #ssd
15 | python pipelines/coco/ray_dataset.py
16 | #ssd-tf
17 | python pipelines/coco/ray_tf_dataset.py


--------------------------------------------------------------------------------
/evaluation/run_ray_local.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cv-torch
 4 | python pipelines/simclrv2/ray_dataset.py 
 5 | #cv-tf
 6 | python pipelines/simclrv2/ray_tf_dataset.py
 7 | #nlp-torch
 8 | python pipelines/wikitext103/ray_dataset.py
 9 | #nlp-hf-tf
10 | python pipelines/wikitext103/ray_tf_dataset.py
11 | #nlp-tf
12 | python pipelines/wikitext103/ray_tf_service_dataset.py
13 | #asr
14 | python pipelines/commonvoice/ray_dataset.py
15 | # ssd-torch
16 | python pipelines/coco/ray_dataset.py
17 | # ssd-tf
18 | python pipelines/coco/ray_tf_dataset.py


--------------------------------------------------------------------------------
/cedar/sources/__init__.py:
--------------------------------------------------------------------------------
 1 | from cedar.sources.iterable import IterSource
 2 | from cedar.sources.local import LocalFSSource, LocalLineSource
 3 | from cedar.sources.source import Source
 4 | from cedar.sources.tf_sources import TFLocalLineSource
 5 | from cedar.sources.coco import COCOSource, COCOFileSource
 6 | 
 7 | __all__ = [
 8 |     "COCOFileSource",
 9 |     "COCOSource",
10 |     "IterSource",
11 |     "LocalFSSource",
12 |     "LocalLineSource",
13 |     "Source",
14 |     "TFLocalLineSource",
15 | ]
16 | 
17 | assert __all__ == sorted(__all__)
18 | 


--------------------------------------------------------------------------------
/cedar/pipes/optimize/__init__.py:
--------------------------------------------------------------------------------
 1 | from cedar.pipes.optimize.fuse import FusedOptimizerPipe
 2 | from cedar.pipes.optimize.noop import NoopOptimizerPipe
 3 | from cedar.pipes.optimize.io import ObjectDiskCachePipe
 4 | from cedar.pipes.optimize.registry import OptimizerPipeRegistry
 5 | from cedar.pipes.optimize.prefetch import PrefetcherPipe
 6 | 
 7 | __all__ = [
 8 |     "FusedOptimizerPipe",
 9 |     "NoopOptimizerPipe",
10 |     "ObjectDiskCachePipe",
11 |     "OptimizerPipeRegistry",
12 |     "PrefetcherPipe",
13 | ]
14 | 
15 | assert __all__ == sorted(__all__)
16 | 


--------------------------------------------------------------------------------
/evaluation/plumber/coco/graph_rewrites.py:
--------------------------------------------------------------------------------
 1 | from absl import app
 2 | from absl import flags
 3 | 
 4 | import tensorflow as tf
 5 | 
 6 | import pandas as pd
 7 | try:
 8 |     import dataloader
 9 | except ImportError:
10 |     try:
11 |         import resnet_flags
12 |     except ImportError:
13 |         import dataset_flags
14 | from plumber_analysis import graph_rewrites
15 | 
16 | 
17 | FLAGS = flags.FLAGS
18 | graph_rewrites.apply_default_flags()
19 | 
20 | def main(_):
21 |     graph_rewrites.default_main(_)
22 | 
23 | if __name__ == '__main__':
24 |   app.run(main)


--------------------------------------------------------------------------------
/evaluation/plumber/simclr/graph_rewrites.py:
--------------------------------------------------------------------------------
 1 | from absl import app
 2 | from absl import flags
 3 | 
 4 | import tensorflow as tf
 5 | 
 6 | import pandas as pd
 7 | try:
 8 |     import dataloader
 9 | except ImportError:
10 |     try:
11 |         import resnet_flags
12 |     except ImportError:
13 |         import dataset_flags
14 | from plumber_analysis import graph_rewrites
15 | 
16 | 
17 | FLAGS = flags.FLAGS
18 | graph_rewrites.apply_default_flags()
19 | 
20 | def main(_):
21 |     graph_rewrites.default_main(_)
22 | 
23 | if __name__ == '__main__':
24 |   app.run(main)


--------------------------------------------------------------------------------
/evaluation/torch_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | 
 4 | class TorchEvalSpec:
 5 |     def __init__(
 6 |         self,
 7 |         batch_size: int,
 8 |         num_workers: int,
 9 |         num_epochs: int = 1,
10 |         num_total_samples: Optional[int] = None,
11 |         iteration_time: Optional[float] = None,
12 |     ):
13 |         self.batch_size = batch_size
14 |         self.num_workers = num_workers
15 |         self.num_total_samples = num_total_samples
16 |         self.num_epochs = num_epochs
17 |         self.iteration_time = iteration_time
18 | 


--------------------------------------------------------------------------------
/cedar/utils/timer.py:
--------------------------------------------------------------------------------
 1 | import time
 2 | 
 3 | 
 4 | class Timer:
 5 |     def __init__(self):
 6 |         self._start = None
 7 |         self._end = None
 8 | 
 9 |     def __enter__(self):
10 |         self._start = time.perf_counter()
11 | 
12 |     def __exit__(self, exc_type, exc_val, exc_tb):
13 |         self._end = time.perf_counter()
14 | 
15 |     def reset(self):
16 |         self._start = time.perf_counter()
17 | 
18 |     def delta(self):
19 |         if self._start is None or self._end is None:
20 |             raise RuntimeError()
21 |         return self._end - self._start
22 | 


--------------------------------------------------------------------------------
/cedar/service/task.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | from typing import Any
 3 | 
 4 | 
 5 | class Task(abc.ABC):
 6 |     """
 7 |     A Task represents a discrete unit of processing, meant to be offloaded
 8 |     to an executor.
 9 |     """
10 | 
11 |     @abc.abstractmethod
12 |     def process(self) -> Any:
13 |         pass
14 | 
15 | 
16 | class MultiprocessTask(Task):
17 |     def __init__(self, input_data: Any) -> None:
18 |         self.input_data = input_data
19 | 
20 | 
21 | class MultithreadedTask(Task):
22 |     def __init__(self, input_data: Any) -> None:
23 |         self.input_data = input_data
24 | 


--------------------------------------------------------------------------------
/cedar/compose/constants.py:
--------------------------------------------------------------------------------
 1 | LOCAL_PARALLELISM_SCALING_FACTOR = 0.8
 2 | OFFLOAD_THRESHOLD_FRAC = 0.05
 3 | FUSED_PIPE_NAME = "FusedPipe"
 4 | 
 5 | RAY_SUBMIT_BATCH_SIZE = 30
 6 | RAY_AVAILABLE_PARALLELISM = 32
 7 | # RAY_SUBMIT_BATCH_SCALING_FACTOR = 2000
 8 | RAY_SUBMIT_BATCH_SCALING_FACTOR = 2000000
 9 | 
10 | # Threshold at which we forbid local workers due to serialization bottlenecks
11 | LOCAL_PARALLELISM_THRESHOLD = 100000000
12 | 
13 | SMP_AVAILABLE_PARALLELISM = 8
14 | 
15 | 
16 | # Threshold for samples/s at which local parallelism is forbidden
17 | LOCAL_PARALLELISM_SAMPLES_PER_SEC_THRESHOLD = 100
18 | 


--------------------------------------------------------------------------------
/tests/data/config_fuse_reorder_tf.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     4: '1'
 4 |     1: '2'
 5 |     2: '5'
 6 |     5: ''
 7 |   pipes:
 8 |     0:
 9 |       name: MapperPipe__add_one
10 |       variant: TF
11 |     1:
12 |       name: MapperPipe__cast
13 |       variant: TF
14 |     2:
15 |       name: MapperPipe__fill_tensor
16 |       variant: TF
17 |     3:
18 |       name: MapperPipe__add_one
19 |       variant: TF
20 |     4:
21 |       name: IterSourcePipe
22 |       variant: INPROCESS
23 |     5:
24 |       name: FusedPipe
25 |       variant: TF
26 |       fused_pipes: 
27 |         - 3
28 |         - 0


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_service_p.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '4'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: ''
 8 |   n_local_workers: 1
 9 |   pipes:
10 |     0:
11 |       name: MapperPipe__embedding
12 |       variant: TF
13 |     1:
14 |       name: MapperPipe__truncate
15 |       variant: TF
16 |     2:
17 |       name: MapperPipe__tokenize
18 |       variant: TF
19 |     3:
20 |       name: TFLocalLinePipe
21 |       variant: TF
22 |     4:
23 |       name: PrefetcherPipe
24 |       variant: INPROCESS
25 |       variant_ctx:
26 |         variant_type: INPROCESS
27 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_service_p_r.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '4'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: ''
 8 |   n_local_workers: 1
 9 |   pipes:
10 |     0:
11 |       name: MapperPipe__embedding
12 |       variant: TF
13 |     1:
14 |       name: MapperPipe__truncate
15 |       variant: TF
16 |     2:
17 |       name: MapperPipe__tokenize
18 |       variant: TF
19 |     3:
20 |       name: TFLocalLinePipe
21 |       variant: TF
22 |     4:
23 |       name: PrefetcherPipe
24 |       variant: INPROCESS
25 |       variant_ctx:
26 |         variant_type: INPROCESS
27 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_service_baseline.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '4'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: ''
 8 |   n_local_workers: 1
 9 |   pipes:
10 |     0:
11 |       name: MapperPipe__embedding
12 |       variant: TF
13 |     1:
14 |       name: MapperPipe__truncate
15 |       variant: TF
16 |     2:
17 |       name: MapperPipe__tokenize
18 |       variant: TF
19 |     3:
20 |       name: TFLocalLinePipe
21 |       variant: TF
22 |     4:
23 |       name: PrefetcherPipe
24 |       variant: INPROCESS
25 |       variant_ctx:
26 |         variant_type: INPROCESS
27 | 


--------------------------------------------------------------------------------
/cedar/pipes/custom/wikitext103.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | from transformers import GPT2Tokenizer
 3 | 
 4 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
 5 | 
 6 | embedding = tf.Variable(tf.random.uniform([50257, 764], -1.0, 1.0))
 7 | 
 8 | 
 9 | @tf.py_function(Tout=tf.int32)
10 | def _tokenize(x):
11 |     return tokenizer(str(x.numpy()), return_tensors="tf")["input_ids"]
12 | 
13 | 
14 | def _embedding(x):
15 |     return tf.nn.embedding_lookup(embedding, x)
16 | 
17 | 
18 | def _truncate(x):
19 |     dim = tf.shape(x)[1]
20 |     slice_size = tf.minimum(dim, 254)
21 |     x = tf.slice(x, [0, 0], [1, slice_size])
22 |     return x
23 | 


--------------------------------------------------------------------------------
/evaluation/run_tf.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | #cv-tf
 4 | python eval_tf.py --dataset_file pipelines/simclrv2/tf_dataset.py --num_parallel_calls -1
 5 | #nlp-hf-tf
 6 | python eval_tf.py --dataset_file pipelines/wikitext103/tf_dataset.py --num_parallel_calls -1 --num_total_samples 100000
 7 | #nlp-tf
 8 | python eval_tf.py --dataset_file pipelines/wikitext103/tf_service_dataset.py --num_parallel_calls -1 --num_total_samples 200000
 9 | #asr
10 | python eval_tf.py --dataset_file pipelines/commonvoice/tf_dataset.py --num_parallel_calls -1 --num_total_samples 10000
11 | # ssd
12 | python eval_tf.py --dataset_file pipelines/coco/tf_dataset.py --num_parallel_calls -1


--------------------------------------------------------------------------------
/tests/data/test_full_optimizer_stats.yml:
--------------------------------------------------------------------------------
 1 | baseline:
 2 |   input_sizes:
 3 |     0: 1
 4 |     1: 100
 5 |     2: 100
 6 |     3: 100
 7 |     4: 100
 8 |     5: 1
 9 |     6: 1
10 |   latencies:
11 |     0: 1
12 |     1: 1
13 |     2: 1
14 |     3: 1
15 |     4: 1
16 |     5: 1
17 |     6: 1
18 |   output_sizes:
19 |     0: 1
20 |     1: 1
21 |     2: 100
22 |     3: 100
23 |     4: 100
24 |     5: 100
25 |     6: 1
26 |   throughput: 100
27 | offloads:
28 |   RAY:
29 |     1:
30 |       throughput: 101
31 |     2:
32 |       throughput: 1
33 |     3:
34 |       throughput: 1
35 |     4:
36 |       throughput: 101
37 |     5:
38 |       throughput: 150
39 | 


--------------------------------------------------------------------------------
/tests/data/config_ref.yml:
--------------------------------------------------------------------------------
 1 | logical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |   pipes:
 8 |     0:
 9 |       name: NoopPipe
10 |     1:
11 |       name: NoopPipe
12 |     2:
13 |       name: NoopPipe
14 |     3:
15 |       name: IterSourcePipe
16 | physical_plan:
17 |   graph:
18 |     0: ''
19 |     1: '0'
20 |     2: '1'
21 |     3: '2'
22 |   pipes:
23 |     0:
24 |       name: NoopPipe
25 |       variant: INPROCESS
26 |     1:
27 |       name: NoopPipe
28 |       variant: INPROCESS
29 |     2:
30 |       name: NoopPipe
31 |       variant: INPROCESS
32 |     3:
33 |       name: IterSourcePipe
34 |       variant: INPROCESS
35 | 


--------------------------------------------------------------------------------
/cedar/client/constants.py:
--------------------------------------------------------------------------------
 1 | from cedar.compose.constants import RAY_SUBMIT_BATCH_SIZE
 2 | 
 3 | RAY_PROFILE_N_ACTORS = 8
 4 | RAY_PROFILE_INFLIGHT = 100
 5 | RAY_PROFILE_PREFETCH = 100
 6 | RAY_PROFILE_SUBMIT_BATCH_SIZE = 10
 7 | CONTROLLER_PERIOD_SEC = 3
 8 | MAX_HISTORY = CONTROLLER_PERIOD_SEC * 10
 9 | THROUGHPUT_LOG_TIME_SEC = 1
10 | SCALE_ATTEMPTS = 3
11 | THROUGHPUT_THRESHOLD = 1.01
12 | EMPTY_BUFFER_THRESHOLD = 500  # set arouhd half of max buffer size
13 | AVAILABLE_RAY_SCALE = 32
14 | SMP_PROFILE_N_PROCS = 8
15 | SMP_TASKSET_MASK = 0xFF  # should match the taskset cpu mask of smp n_procs
16 | SMP_PROFILE_INFLIGHT = 100
17 | SMP_PROFILE_PREFETCH = 100
18 | CONTROLLER_SCALE_DOWN_COUNTER = 10
19 | 


--------------------------------------------------------------------------------
/tests/data/config_ref_variant.yml:
--------------------------------------------------------------------------------
 1 | logical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |   pipes:
 8 |     0:
 9 |       name: NoopPipe
10 |     1:
11 |       name: NoopPipe
12 |     2:
13 |       name: NoopPipe
14 |     3:
15 |       name: IterSourcePipe
16 | physical_plan:
17 |   graph:
18 |     0: ''
19 |     1: '0'
20 |     2: '1'
21 |     3: '2'
22 |   pipes:
23 |     0:
24 |       name: NoopPipe
25 |       variant: INPROCESS
26 |     1:
27 |       name: NoopPipe
28 |       variant: SMP
29 |       variant_ctx:
30 |         n_procs: 10
31 |     2:
32 |       name: NoopPipe
33 |       variant: INPROCESS
34 |     3:
35 |       name: IterSourcePipe
36 |       variant: INPROCESS
37 | 


--------------------------------------------------------------------------------
/tests/data/test_cache_optimizer_stats_expensive_io.yml:
--------------------------------------------------------------------------------
 1 | baseline:
 2 |   input_sizes:
 3 |     0: 1
 4 |     1: 100
 5 |     2: 100
 6 |     3: 100
 7 |     4: 100
 8 |     5: 1
 9 |     6: 1
10 |   latencies:
11 |     0: 1
12 |     1: 1
13 |     2: 1
14 |     3: 1
15 |     4: 1
16 |     5: 1
17 |     6: 1
18 |   output_sizes:
19 |     0: 1
20 |     1: 1
21 |     2: 100
22 |     3: 100
23 |     4: 100
24 |     5: 100
25 |     6: 1
26 |   throughput: 100
27 | offloads:
28 |   RAY:
29 |     1:
30 |       throughput: 100
31 |     2:
32 |       throughput: 1
33 |     3:
34 |       throughput: 1
35 |     4:
36 |       throughput: 100
37 |     5:
38 |       throughput: 150
39 | disk_info:
40 |   read_latency: 100
41 |   write_latency: 100
42 | 


--------------------------------------------------------------------------------
/tests/data/test_cache_optimizer_stats.yml:
--------------------------------------------------------------------------------
 1 | baseline:
 2 |   input_sizes:
 3 |     0: 1
 4 |     1: 100
 5 |     2: 100
 6 |     3: 100
 7 |     4: 100
 8 |     5: 1
 9 |     6: 1
10 |   latencies:
11 |     0: 1
12 |     1: 1
13 |     2: 1
14 |     3: 1
15 |     4: 1
16 |     5: 1
17 |     6: 1
18 |   output_sizes:
19 |     0: 1
20 |     1: 1
21 |     2: 100
22 |     3: 100
23 |     4: 100
24 |     5: 100
25 |     6: 1
26 |   throughput: 100
27 | offloads:
28 |   RAY:
29 |     1:
30 |       throughput: 100
31 |     2:
32 |       throughput: 1
33 |     3:
34 |       throughput: 1
35 |     4:
36 |       throughput: 100
37 |     5:
38 |       throughput: 150
39 | disk_info:
40 |   read_latency: 1.6408648662036286e-09
41 |   write_latency: 2.5914232537616046e-09
42 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/eval_local_tf_service.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     3: '4'
 4 |     4: ''
 5 |   n_local_workers: 1
 6 |   pipes:
 7 |     0:
 8 |       name: MapperPipe__embedding
 9 |       variant: INPROCESS
10 |     1:
11 |       name: MapperPipe__truncate
12 |       variant: INPROCESS
13 |     2:
14 |       name: MapperPipe__tokenize
15 |       variant: INPROCESS
16 |     3:
17 |       fused_pipes:
18 |       - 2
19 |       - 1
20 |       - 0
21 |       name: TFLocalLinePipe
22 |       variant: TF
23 |       variant_ctx:
24 |         num_parallel_calls: -1
25 |         variant_type: TF
26 |     4:
27 |       name: PrefetcherPipe
28 |       variant: INPROCESS
29 |       variant_ctx:
30 |         variant_type: INPROCESS
31 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/eval_remote_tf_service.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     3: '4'
 4 |     4: ''
 5 |   n_local_workers: 1
 6 |   pipes:
 7 |     0:
 8 |       name: MapperPipe__embedding
 9 |       variant: INPROCESS
10 |     1:
11 |       name: MapperPipe__truncate
12 |       variant: INPROCESS
13 |     2:
14 |       name: MapperPipe__tokenize
15 |       variant: INPROCESS
16 |     3:
17 |       fused_pipes:
18 |       - 2
19 |       - 1
20 |       - 0
21 |       name: TFLocalLinePipe
22 |       variant: TF
23 |       variant_ctx:
24 |         num_parallel_calls: -1
25 |         variant_type: TF
26 |     4:
27 |       name: PrefetcherPipe
28 |       variant: INPROCESS
29 |       variant_ctx:
30 |         variant_type: INPROCESS
31 | 


--------------------------------------------------------------------------------
/tests/data/config_ref_prefetch.yml:
--------------------------------------------------------------------------------
 1 | logical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |   pipes:
 8 |     0:
 9 |       name: NoopPipe
10 |     1:
11 |       name: NoopPipe
12 |     2:
13 |       name: NoopPipe
14 |     3:
15 |       name: IterSourcePipe
16 | physical_plan:
17 |   graph:
18 |     0: '4'
19 |     1: '0'
20 |     2: '1'
21 |     3: '2'
22 |     4: ''
23 |   pipes:
24 |     0:
25 |       name: NoopPipe
26 |       variant: INPROCESS
27 |     1:
28 |       name: NoopPipe
29 |       variant: INPROCESS
30 |     2:
31 |       name: NoopPipe
32 |       variant: INPROCESS
33 |     3:
34 |       name: IterSourcePipe
35 |       variant: INPROCESS
36 |     4:
37 |       name: PrefetcherPipe 
38 |       variant: INPROCESS
39 | 


--------------------------------------------------------------------------------
/evaluation/plumber/simclr/show_bneck.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | 
 3 | filename = "stats.pb"
 4 | plumber = tf.data.experimental.analysis.PlumberPerformanceModel(filename)
 5 | model = plumber.model()
 6 | 
 7 | recommendation = model.recommendation()
 8 | slowest_node = recommendation.bottleneck_node()
 9 | print("Slowest node: {}".format(slowest_node.name))
10 | CPU_time_used = model.total_CPU_time()
11 | wallclock_used = model.total_wallclock_time()
12 | cpu_util = model.CPU_Util()
13 | disk_util = model.Disk_Util()
14 | print("Resource utilization: CPU Util {} ({}s CPU time,{}s wallclock time),"
15 |       " Disk Util {}".format(cpu_util,
16 |                         CPU_time_used,
17 |                         wallclock_used,
18 |                         disk_util))


--------------------------------------------------------------------------------
/cedar/service/__init__.py:
--------------------------------------------------------------------------------
 1 | from cedar.service.multiprocess import MultiprocessService
 2 | from cedar.service.multithread import MultithreadedService
 3 | from cedar.service.smp import SMPService, SMPRequest, SMPResponse
 4 | from cedar.service.task import (
 5 |     MultiprocessTask,
 6 |     MultithreadedTask,
 7 |     Task,
 8 | )
 9 | from cedar.service.actor import SMPActor
10 | from cedar.service.ray_service import RayActor, RayService
11 | 
12 | 
13 | __all__ = [
14 |     "MultiprocessService",
15 |     "MultiprocessTask",
16 |     "MultithreadedService",
17 |     "MultithreadedTask",
18 |     "RayActor",
19 |     "RayService",
20 |     "SMPActor",
21 |     "SMPRequest",
22 |     "SMPResponse",
23 |     "SMPService",
24 |     "Task",
25 | ]
26 | 
27 | assert __all__ == sorted(__all__)
28 | 


--------------------------------------------------------------------------------
/evaluation/tf_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | 
 4 | class TFEvalSpec:
 5 |     def __init__(
 6 |         self,
 7 |         batch_size: int,
 8 |         num_parallel_calls: Optional[int],
 9 |         num_epochs: int = 1,
10 |         num_total_samples: Optional[int] = None,
11 |         iteration_time: Optional[float] = None,
12 |         service_addr: Optional[str] = None,
13 |         read_from_remote: bool = False,
14 |     ):
15 |         self.batch_size = batch_size
16 |         self.num_parallel_calls = num_parallel_calls
17 |         self.num_total_samples = num_total_samples
18 |         self.num_epochs = num_epochs
19 |         self.iteration_time = iteration_time
20 |         self.service_addr = service_addr
21 |         self.read_from_remote = read_from_remote
22 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_p.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '5'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: ''
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: MapperPipe__embedding
13 |       variant: INPROCESS
14 |     1:
15 |       name: MapperPipe__truncate
16 |       variant: INPROCESS
17 |     2:
18 |       name: MapperPipe__tokenize
19 |       variant: INPROCESS
20 |     3:
21 |       name: MapperPipe_convert_to_tensor_v2_with_dispatch
22 |       variant: INPROCESS
23 |     4:
24 |       name: LocalLinePipe
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     5:
29 |       name: PrefetcherPipe
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_p_r.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '5'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: ''
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: MapperPipe__embedding
13 |       variant: INPROCESS
14 |     1:
15 |       name: MapperPipe__truncate
16 |       variant: INPROCESS
17 |     2:
18 |       name: MapperPipe__tokenize
19 |       variant: INPROCESS
20 |     3:
21 |       name: MapperPipe_convert_to_tensor_v2_with_dispatch
22 |       variant: INPROCESS
23 |     4:
24 |       name: LocalLinePipe
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     5:
29 |       name: PrefetcherPipe
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_baseline.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '5'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: ''
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: MapperPipe__embedding
13 |       variant: INPROCESS
14 |     1:
15 |       name: MapperPipe__truncate
16 |       variant: INPROCESS
17 |     2:
18 |       name: MapperPipe__tokenize
19 |       variant: INPROCESS
20 |     3:
21 |       name: MapperPipe_convert_to_tensor_v2_with_dispatch
22 |       variant: INPROCESS
23 |     4:
24 |       name: LocalLinePipe
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     5:
29 |       name: PrefetcherPipe
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 | 


--------------------------------------------------------------------------------
/tests/data/config_ref_mp.yml:
--------------------------------------------------------------------------------
 1 | logical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |   pipes:
 9 |     0:
10 |       name: BatcherPipe(batch_size=3)
11 |     1:
12 |       name: NoopPipe
13 |     2:
14 |       name: NoopPipe
15 |     3:
16 |       name: NoopPipe
17 |     4:
18 |       name: IterSourcePipe
19 | physical_plan:
20 |   graph:
21 |     0: ''
22 |     1: '0'
23 |     2: '1'
24 |     3: '2'
25 |     4: '3'
26 |   pipes:
27 |     0:
28 |       name: BatcherPipe(batch_size=3)
29 |       variant: INPROCESS
30 |     1:
31 |       name: NoopPipe
32 |       variant: INPROCESS
33 |     2:
34 |       name: NoopPipe
35 |       variant: INPROCESS
36 |     3:
37 |       name: NoopPipe
38 |       variant: INPROCESS
39 |     4:
40 |       name: IterSourcePipe
41 |       variant: INPROCESS
42 |   n_local_workers: 3
43 | 


--------------------------------------------------------------------------------
/cedar/pipes/custom/wikitext103_tf_service.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_text as text
 3 | 
 4 | # from https://github.com/cirquit/presto/blob/master/openwebtext_pipeline_modern.py  # noqa: E501
 5 | # vocabulary size 50001, GPT2 originally used 50257
 6 | vocabulary_size = 50001
 7 | bpe_model_path = tf.keras.utils.get_file(
 8 |     "bpe_en_50k.model",
 9 |     "https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs50000.model",
10 | )
11 | bpe_model = open(bpe_model_path, "rb").read()
12 | 
13 | embedding_dimension = 768
14 | bpe_tokernizer = text.SentencepieceTokenizer(
15 |     model=bpe_model, out_type=tf.dtypes.int32
16 | )
17 | 
18 | embedding = tf.Variable(
19 |     tf.random.uniform([vocabulary_size, embedding_dimension], -1.0, 1.0)
20 | )
21 | 
22 | 
23 | def _truncate(x):
24 |     dim = tf.shape(x)[0]
25 |     slice_size = tf.minimum(dim, 254)
26 |     x = tf.slice(x, [0], [slice_size])
27 |     return x
28 | 
29 | 
30 | def _embedding(x):
31 |     return tf.nn.embedding_lookup(embedding, x)
32 | 
33 | 
34 | def _tokenize(x):
35 |     return bpe_tokernizer.tokenize(x)
36 | 


--------------------------------------------------------------------------------
/tests/data/config_fuse_ray.yml:
--------------------------------------------------------------------------------
 1 | logical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |   pipes:
 9 |     0:
10 |       name: NoopPipe
11 |     1:
12 |       name: MapperPipe__add_one
13 |     2:
14 |       name: MapperPipe__add_one
15 |     3:
16 |       name: NoopPipe
17 |     4:
18 |       name: IterSourcePipe
19 | physical_plan:
20 |   graph:
21 |     0: '5'
22 |     3: '6'
23 |     4: '3'
24 |     5: ''
25 |     6: '0'
26 |   pipes:
27 |     0:
28 |       name: NoopPipe
29 |       variant: INPROCESS
30 |     1:
31 |       name: MapperPipe__add_one
32 |       variant: INPROCESS
33 |     2:
34 |       name: MapperPipe__add_one
35 |       variant: INPROCESS
36 |     3:
37 |       name: NoopPipe
38 |       variant: INPROCESS
39 |     4:
40 |       name: IterSourcePipe
41 |       variant: INPROCESS
42 |     5:
43 |       name: PrefetcherPipe
44 |       variant: INPROCESS
45 |     6:
46 |       name: FusedPipe
47 |       variant: RAY
48 |       variant_ctx:
49 |         n_actors: 1
50 |       fused_pipes: 
51 |         - 2
52 |         - 1


--------------------------------------------------------------------------------
/cedar/pipes/custom/coco.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from PIL import Image
 3 | from torchvision.transforms import v2
 4 | from torchvision.datapoints import BoundingBox, BoundingBoxFormat
 5 | 
 6 | 
 7 | def to_float(x):
 8 |     return x.to(torch.float32)
 9 | 
10 | 
11 | def to_tensor(x):
12 |     x["image"] = v2.ToTensor()(x["image"])
13 |     return x
14 | 
15 | 
16 | def distort(x):
17 |     x["image"] = v2.RandomPhotometricDistort(p=1)(x["image"])
18 |     return x
19 | 
20 | 
21 | def zoom_out(x):
22 |     x["image"], x["boxes"] = v2.RandomZoomOut(fill=[123.0, 117.0, 104.0], p=1)(
23 |         x["image"], x["boxes"]
24 |     )
25 |     return x
26 | 
27 | 
28 | def crop(x):
29 |     x["image"], x["boxes"] = v2.RandomIoUCrop()(x["image"], x["boxes"])
30 |     return x
31 | 
32 | 
33 | def read_image(x):
34 |     img = Image.open(x["image"]).convert("RGB")
35 |     x["image"] = img
36 |     bboxes = BoundingBox(
37 |         x["boxes"],
38 |         format=BoundingBoxFormat.XYXY,
39 |         spatial_size=(img.height, img.width),
40 |     )
41 |     x["boxes"] = bboxes
42 | 
43 |     return x
44 | 


--------------------------------------------------------------------------------
/tests/data/insert_config_ref.yml:
--------------------------------------------------------------------------------
 1 | logical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |   pipes:
 8 |     0:
 9 |       name: NoopPipe
10 |     1:
11 |       name: NoopPipe
12 |     2:
13 |       name: NoopPipe
14 |     3:
15 |       name: IterSourcePipe
16 | physical_plan:
17 |   graph:
18 |     0: ''
19 |     1: '0'
20 |     2: '4'
21 |     3: '2'
22 |     4: '1'
23 |   n_local_workers: 1
24 |   pipes:
25 |     0:
26 |       name: NoopPipe
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     1:
31 |       name: NoopPipe
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     2:
36 |       name: NoopPipe
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     3:
41 |       name: IterSourcePipe
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     4:
46 |       name: NoopOptimizerPipe
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_service_p_r_o.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '4'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: ''
 8 |   n_local_workers: 1
 9 |   pipes:
10 |     0:
11 |       name: MapperPipe__embedding
12 |       variant: TF
13 |       variant_ctx:
14 |         num_parallel_calls: null
15 |         variant_type: TF
16 |     1:
17 |       name: MapperPipe__truncate
18 |       variant: TF
19 |       variant_ctx:
20 |         num_parallel_calls: null
21 |         variant_type: TF
22 |     2:
23 |       name: MapperPipe__tokenize
24 |       variant: TF_RAY
25 |       variant_ctx:
26 |         max_inflight: 48000
27 |         max_prefetch: 48000
28 |         n_actors: 32
29 |         num_parallel_calls: null
30 |         submit_batch_size: 500
31 |         use_threads: true
32 |         variant_type: TF_RAY
33 |     3:
34 |       name: TFLocalLinePipe
35 |       variant: TF
36 |       variant_ctx:
37 |         num_parallel_calls: null
38 |         variant_type: TF
39 |     4:
40 |       name: PrefetcherPipe
41 |       variant: INPROCESS
42 |       variant_ctx:
43 |         variant_type: INPROCESS
44 |   


--------------------------------------------------------------------------------
/cedar/pipes/optimize/registry.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Type, Callable
 3 | 
 4 | 
 5 | logger = logging.getLogger(__name__)
 6 | 
 7 | 
 8 | class OptimizerPipeRegistry:
 9 |     _registered_pipes = {}
10 | 
11 |     @classmethod
12 |     def register_pipe(cls, name: str, pipe_cls: Type):
13 |         if name in cls._registered_pipes:
14 |             logger.warning(
15 |                 f"Optimizer Pipe {name} already registered. Overwriting."
16 |             )
17 |         logger.info(f"Registering Optimizer Pipe {name}.")
18 |         cls._registered_pipes[name] = pipe_cls
19 | 
20 |     @classmethod
21 |     def get_pipe(cls, name: str):
22 |         if name not in cls._registered_pipes:
23 |             raise ValueError(f"Pipe {name} not reigstered.")
24 |         return cls._registered_pipes[name]
25 | 
26 | 
27 | def register_optimizer_pipe(name: str) -> Callable[[Type], Type]:
28 |     """
29 |     Decorator to register an optimizer pipe.
30 |     """
31 | 
32 |     def decorator(optimizer_cls: Type) -> Type:
33 |         OptimizerPipeRegistry.register_pipe(name, optimizer_cls)
34 |         return optimizer_cls
35 | 
36 |     return decorator
37 | 


--------------------------------------------------------------------------------
/evaluation/run_autotuning.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash 
2 |  python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_controller_dataset.py --master_feature_config pipelines/simclrv2/configs/eval_controller_remote.yaml --use_ray --ray_ip 10.138.0.7 --num_epochs 5 --iteration_time 0.05
3 |  python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_controller_dataset.py --master_feature_config pipelines/simclrv2/configs/eval_controller_remote.yaml --use_ray --ray_ip 10.138.0.50 --num_epochs 5 --iteration_time 0.02
4 |  python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_controller_dataset.py --master_feature_config pipelines/simclrv2/configs/eval_controller_remote.yaml --use_ray --ray_ip 10.138.0.50 --num_epochs 5 --iteration_time 0.01
5 |  python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_controller_dataset.py --master_feature_config pipelines/simclrv2/configs/eval_controller_remote.yaml --use_ray --ray_ip 10.138.0.50 --num_epochs 5 --iteration_time 0.005
6 |  python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_controller_dataset.py --master_feature_config pipelines/simclrv2/configs/eval_controller_remote.yaml --use_ray --ray_ip 10.138.0.50 --num_epochs 5 --iteration_time 0.00333


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | cedar Open Source License.
 2 | 
 3 | Copyright 2024 Board of Trustees of Stanford University
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/cedar/service/multiprocess.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | from concurrent.futures import ProcessPoolExecutor, Future
 4 | 
 5 | from .task import MultiprocessTask
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | 
10 | class MultiprocessService:
11 |     """
12 |     A multiprocess service that executes preprocessing tasks
13 |     using a pool of workers.
14 | 
15 |     Args:
16 |         num_workers: Number of workers in the pool
17 |     """
18 | 
19 |     def __init__(self, num_workers: int):
20 |         if num_workers < 1:
21 |             raise ValueError(
22 |                 "Cannot create a MultiprocessService with {} workers".format(
23 |                     num_workers
24 |                 )
25 |             )
26 |         self.executor = ProcessPoolExecutor(max_workers=num_workers)
27 |         logger.info(
28 |             f"Started Multiprocess Service with {num_workers} workers."
29 |         )
30 | 
31 |     def shutdown(self):
32 |         self.executor.shutdown()
33 | 
34 |     def submit(self, task: MultiprocessTask) -> Future:
35 |         future = self.executor.submit(task.process)
36 |         return future
37 | 
38 |     def __del__(self):
39 |         self.shutdown()
40 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/eval_local_tf.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '5'
 4 |     4: '6'
 5 |     5: ''
 6 |     6: '0'
 7 |   n_local_workers: 1
 8 |   pipes:
 9 |     0:
10 |       name: MapperPipe__embedding
11 |       variant: TF
12 |       variant_ctx:
13 |         num_parallel_calls: null
14 |         variant_type: TF
15 |     1:
16 |       name: MapperPipe__truncate
17 |       variant: INPROCESS
18 |     2:
19 |       name: MapperPipe__tokenize
20 |       variant: INPROCESS
21 |     3:
22 |       name: MapperPipe_convert_to_tensor_v2_with_dispatch
23 |       variant: INPROCESS
24 |     4:
25 |       name: LocalLinePipe
26 |       variant: INPROCESS
27 |       variant_ctx:
28 |         variant_type: INPROCESS
29 |     5:
30 |       name: PrefetcherPipe
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     6:
35 |       fused_pipes:
36 |       - 3
37 |       - 2
38 |       - 1
39 |       name: FusedPipe
40 |       variant: SMP
41 |       variant_ctx:
42 |         disable_torch_parallelism: true
43 |         max_inflight: 50
44 |         max_prefetch: 50
45 |         n_procs: 8
46 |         use_threads: true
47 |         variant_type: SMP
48 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/eval_remote_tf.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '5'
 4 |     4: '6'
 5 |     5: ''
 6 |     6: '0'
 7 |   n_local_workers: 8
 8 |   pipes:
 9 |     0:
10 |       name: MapperPipe__embedding
11 |       variant: TF
12 |       variant_ctx:
13 |         num_parallel_calls: null
14 |         variant_type: TF
15 |     1:
16 |       name: MapperPipe__truncate
17 |       variant: INPROCESS
18 |     2:
19 |       name: MapperPipe__tokenize
20 |       variant: INPROCESS
21 |     3:
22 |       name: MapperPipe_convert_to_tensor_v2_with_dispatch
23 |       variant: INPROCESS
24 |     4:
25 |       name: LocalLinePipe
26 |       variant: INPROCESS
27 |       variant_ctx:
28 |         variant_type: INPROCESS
29 |     5:
30 |       name: PrefetcherPipe
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     6:
35 |       fused_pipes:
36 |       - 3
37 |       - 2
38 |       - 1
39 |       name: FusedPipe
40 |       variant: TF_RAY
41 |       variant_ctx:
42 |         max_inflight: 1500
43 |         max_prefetch: 1500
44 |         n_actors: 4
45 |         num_parallel_calls: null
46 |         submit_batch_size: 500
47 |         use_threads: true
48 |         variant_type: TF_RAY
49 | 


--------------------------------------------------------------------------------
/evaluation/run_tf_service.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Launches a tf.data.service worker and dispatcher on this machine.
 3 | """
 4 | 
 5 | import tensorflow as tf
 6 | import time
 7 | import argparse
 8 | 
 9 | DISPATCHER_PORT = 38655
10 | WORKER_PORT = 38656
11 | 
12 | 
13 | def main():
14 |     parser = argparse.ArgumentParser(description="Server for tf.data.service")
15 |     parser.add_argument(
16 |         "--ip_addr",
17 |         type=str,
18 |         help="IP Address of local host",
19 |         required=True,
20 |     )
21 |     args = parser.parse_args()
22 | 
23 |     d_config = tf.data.experimental.service.DispatcherConfig(
24 |         port=DISPATCHER_PORT
25 |     )
26 |     dispatcher = tf.data.experimental.service.DispatchServer(d_config)
27 |     dispatcher_address = dispatcher.target.split("://")[1]
28 | 
29 |     print("Started tf.data service at address {}".format(dispatcher.target))
30 | 
31 |     w_config = tf.data.experimental.service.WorkerConfig(
32 |         dispatcher_address=dispatcher_address,
33 |         worker_address=args.ip_addr + ":" + str(WORKER_PORT),
34 |         port=WORKER_PORT,
35 |     )
36 |     worker = tf.data.experimental.service.WorkerServer(w_config)  # noqa:F841
37 | 
38 |     while True:
39 |         time.sleep(1)
40 | 
41 | 
42 | if __name__ == "__main__":
43 |     main()
44 | 


--------------------------------------------------------------------------------
/cedar/pipes/custom/simclrv2.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_addons as tfa
 3 | 
 4 | IMG_HEIGHT = 244
 5 | IMG_WIDTH = 244
 6 | GAUSSIAN_BLUR_KERNEL_SIZE = 11
 7 | 
 8 | 
 9 | def read_image(x):
10 |     img = tf.io.read_file(x)
11 |     return img
12 | 
13 | 
14 | def decode_jpeg(x):
15 |     img = tf.image.decode_jpeg(x, channels=3)
16 |     img = tf.expand_dims(img, axis=0)
17 |     return img
18 | 
19 | 
20 | def convert_to_float(x):
21 |     return tf.image.convert_image_dtype(x, tf.float32)
22 | 
23 | 
24 | def crop_and_resize(x):
25 |     boxes = tf.random.uniform(shape=(1, 4))
26 |     return tf.image.crop_and_resize(x, boxes, [0], [IMG_HEIGHT, IMG_WIDTH])
27 | 
28 | 
29 | def random_flip(x):
30 |     return tf.image.random_flip_left_right(x)
31 | 
32 | 
33 | def color_jitter(x):
34 |     img = tf.image.random_brightness(x, max_delta=0.1)
35 |     img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
36 |     if img.shape[-1] == 3:
37 |         img = tf.image.random_saturation(img, lower=0.9, upper=1.1)
38 |     if img.shape[-1] == 3:
39 |         img = tf.image.random_hue(img, max_delta=0.1)
40 |     return img
41 | 
42 | 
43 | def gaussian_blur(x):
44 |     return tfa.image.gaussian_filter2d(
45 |         x,
46 |         filter_shape=[GAUSSIAN_BLUR_KERNEL_SIZE, GAUSSIAN_BLUR_KERNEL_SIZE],
47 |     )
48 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_p.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: ''
11 |   n_local_workers: 8
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_to_tensor
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_distort
20 |       variant: INPROCESS
21 |       variant_ctx:
22 |         variant_type: INPROCESS
23 |     2:
24 |       name: MapperPipe_RandomHorizontalFlip(p=1)
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     3:
29 |       name: MapperPipe_SanitizeBoundingBox(min_size=1.0, labels_getter=boxes)
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 |     4:
34 |       name: MapperPipe_crop
35 |       variant: INPROCESS
36 |       variant_ctx:
37 |         variant_type: INPROCESS
38 |     5:
39 |       name: MapperPipe_zoom_out
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     6:
44 |       name: COCOSourcePipe
45 |       variant: INPROCESS
46 |       variant_ctx:
47 |         variant_type: INPROCESS
48 |     7:
49 |       name: PrefetcherPipe
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_pr.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '5'
 5 |     2: '0'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '1'
10 |     7: ''
11 |   n_local_workers: 8
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_to_tensor
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_distort
20 |       variant: INPROCESS
21 |       variant_ctx:
22 |         variant_type: INPROCESS
23 |     2:
24 |       name: MapperPipe_RandomHorizontalFlip(p=1)
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     3:
29 |       name: MapperPipe_SanitizeBoundingBox(min_size=1.0, labels_getter=boxes)
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 |     4:
34 |       name: MapperPipe_crop
35 |       variant: INPROCESS
36 |       variant_ctx:
37 |         variant_type: INPROCESS
38 |     5:
39 |       name: MapperPipe_zoom_out
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     6:
44 |       name: COCOSourcePipe
45 |       variant: INPROCESS
46 |       variant_ctx:
47 |         variant_type: INPROCESS
48 |     7:
49 |       name: PrefetcherPipe
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_baseline.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: ''
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_to_tensor
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_distort
20 |       variant: INPROCESS
21 |       variant_ctx:
22 |         variant_type: INPROCESS
23 |     2:
24 |       name: MapperPipe_RandomHorizontalFlip(p=1)
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     3:
29 |       name: MapperPipe_SanitizeBoundingBox(min_size=1.0, labels_getter=boxes)
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 |     4:
34 |       name: MapperPipe_crop
35 |       variant: INPROCESS
36 |       variant_ctx:
37 |         variant_type: INPROCESS
38 |     5:
39 |       name: MapperPipe_zoom_out
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     6:
44 |       name: COCOSourcePipe
45 |       variant: INPROCESS
46 |       variant_ctx:
47 |         variant_type: INPROCESS
48 |     7:
49 |       name: PrefetcherPipe
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/cedar_local_plan.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '5'
 5 |     2: '0'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '1'
10 |     7: ''
11 |   n_local_workers: 8
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_to_tensor
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_distort
20 |       variant: INPROCESS
21 |       variant_ctx:
22 |         variant_type: INPROCESS
23 |     2:
24 |       name: MapperPipe_RandomHorizontalFlip(p=1)
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     3:
29 |       name: MapperPipe_SanitizeBoundingBox(min_size=1.0, labels_getter=boxes)
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 |     4:
34 |       name: MapperPipe_crop
35 |       variant: INPROCESS
36 |       variant_ctx:
37 |         variant_type: INPROCESS
38 |     5:
39 |       name: MapperPipe_zoom_out
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     6:
44 |       name: COCOSourcePipe
45 |       variant: INPROCESS
46 |       variant_ctx:
47 |         variant_type: INPROCESS
48 |     7:
49 |       name: PrefetcherPipe
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/download.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Downloads the dataset for this pipeline
 3 | """
 4 | 
 5 | import pathlib
 6 | import tarfile
 7 | import urllib.request
 8 | 
 9 | DATASET_NAME = "imagenette2"
10 | DATASET_LOC = "datasets/imagenette2"
11 | DATASET_FILE = "imagenette2.tgz"
12 | DATASET_SOURCE = "https://s3.amazonaws.com/fast-ai-imageclas/imagenette2.tgz"
13 | 
14 | 
15 | def download_dataset():
16 |     # Assume if tar file exists, dataset exists
17 |     data_dir = (
18 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
19 |     )
20 | 
21 |     extract_dir = data_dir / "imagenette2"
22 |     if extract_dir.exists():
23 |         print("Dataset already downloaded...")
24 |         return
25 | 
26 |     dataset_file = data_dir / DATASET_FILE
27 |     print(dataset_file)
28 |     if dataset_file.is_file():
29 |         return
30 | 
31 |     if not data_dir.exists():
32 |         data_dir.mkdir(parents=True, exist_ok=True)
33 | 
34 |     print(f"Downloading dataset to {str(dataset_file)}...")
35 |     urllib.request.urlretrieve(DATASET_SOURCE, str(dataset_file))
36 | 
37 |     tar_path = dataset_file.parent
38 | 
39 |     print("Extracting dataset...")
40 |     with tarfile.open(dataset_file, "r:gz") as tar:
41 |         tar.extractall(path=tar_path)
42 | 
43 |     dataset_file.unlink()
44 | 
45 | 
46 | if __name__ == "__main__":
47 |     download_dataset()
48 | 


--------------------------------------------------------------------------------
/evaluation/plots/ablation.csv:
--------------------------------------------------------------------------------
 1 | Pipeline,Setup,Runtime
 2 | CV-torch,Baseline,242.3816667
 3 | CV-torch,plus parallelism,49.378
 4 | CV-torch,plus reorder,28.204
 5 | CV-torch,plus offload,16.98566667
 6 | CV-torch,plus fusion,11.11533333
 7 | CV-tf,Baseline,45.455
 8 | CV-tf,plus parallelism,43.214
 9 | CV-tf,plus reorder,38.269
10 | CV-tf,plus offload,38.2
11 | CV-tf,plus fusion,10.627
12 | SSD-torch,Baseline,960.229
13 | SSD-torch,plus parallelism,191.033
14 | SSD-torch,plus reorder,88.535
15 | SSD-torch,plus offload,64.593
16 | SSD-torch,plus fusion,52.788
17 | SSD-tf,Baseline,235.187
18 | SSD-tf,plus parallelism,234.49
19 | SSD-tf,plus reorder,40.469
20 | SSD-tf,plus offload,40.373
21 | SSD-tf,plus fusion,31.32
22 | NLP-torch,Baseline,53.295
23 | NLP-torch,plus parallelism,54.09
24 | NLP-torch,plus reorder,53.664
25 | NLP-torch,plus offload,25.692
26 | NLP-torch,plus fusion,20.98866667
27 | NLP-hf-tf,Baseline,248.723
28 | NLP-hf-tf,plus parallelism,250.318
29 | NLP-hf-tf,plus reorder,251.236
30 | NLP-hf-tf,plus offload,93.021
31 | NLP-hf-tf,plus fusion,41.52766667
32 | NLP-tf,Baseline,241.764
33 | NLP-tf,plus parallelism,238.533
34 | NLP-tf,plus reorder,239.402
35 | NLP-tf,plus offload,208.491
36 | NLP-tf,plus fusion,35.94933333
37 | ASR,Baseline,878.4
38 | ASR,plus parallelism,389.28
39 | ASR,plus reorder,376.75
40 | ASR,plus offload,225.05
41 | ASR,plus fusion,20.043


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/configs/eval_remote.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     7: ''
 4 |   n_local_workers: 1
 5 |   pipes:
 6 |     0:
 7 |       name: MapperPipe_mel
 8 |       variant: INPROCESS
 9 |     1:
10 |       name: MapperPipe_frequency_mask
11 |       variant: INPROCESS
12 |     2:
13 |       name: MapperPipe_time_mask
14 |       variant: INPROCESS
15 |     3:
16 |       name: MapperPipe__stretch
17 |       variant: INPROCESS
18 |     4:
19 |       name: MapperPipe__spec
20 |       variant: INPROCESS
21 |     5:
22 |       name: MapperPipe__resample
23 |       variant: INPROCESS
24 |     6:
25 |       name: MapperPipe__read
26 |       variant: INPROCESS
27 |     7:
28 |       fused_pipes:
29 |       - 6
30 |       - 5
31 |       - 4
32 |       - 2
33 |       - 1
34 |       - 3
35 |       - 0
36 |       name: LocalFSListerPipe
37 |       variant: RAY_DS
38 |       variant_ctx:
39 |         variant_type: RAY_DS
40 |     8:
41 |       name: PrefetcherPipe
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     9:
46 |       fused_pipes:
47 |       - 6
48 |       - 5
49 |       - 4
50 |       - 2
51 |       - 1
52 |       - 3
53 |       - 0
54 |       name: FusedPipe
55 |       variant: RAY
56 |       variant_ctx:
57 |         max_inflight: 100
58 |         max_prefetch: 100
59 |         n_actors: 4
60 |         submit_batch_size: 3
61 |         use_threads: true
62 |         variant_type: RAY
63 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_no_caching_plan.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '9'
 3 |   1: '0'
 4 |   2: '1'
 5 |   8: '10'
 6 |   9: ''
 7 |   10: '2'
 8 | n_local_workers: 1
 9 | pipes:
10 |   0:
11 |     name: BatcherPipe(batch_size=1)
12 |     variant: INPROCESS
13 |     variant_ctx:
14 |       variant_type: INPROCESS
15 |   1:
16 |     name: MapperPipe_Embedding(50257, 764)
17 |     variant: INPROCESS
18 |     variant_ctx:
19 |       variant_type: INPROCESS
20 |   2:
21 |     name: MapperPipe_ToTensor()
22 |     variant: INPROCESS
23 |     variant_ctx:
24 |       variant_type: INPROCESS
25 |   3:
26 |     name: MapperPipe_AddToken()
27 |   4:
28 |     name: MapperPipe_AddToken()
29 |   5:
30 |     name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
31 |   6:
32 |     name: MapperPipe_Truncate()
33 |   7:
34 |     name: MapperPipe_GPT2BPETokenizer()
35 |   8:
36 |     name: LocalLinePipe
37 |     variant: INPROCESS
38 |     variant_ctx:
39 |       variant_type: INPROCESS
40 |   9:
41 |     name: PrefetcherPipe
42 |     variant: INPROCESS
43 |     variant_ctx:
44 |       variant_type: INPROCESS
45 |   10:
46 |     fused_pipes:
47 |     - 7
48 |     - 6
49 |     - 5
50 |     - 4
51 |     - 3
52 |     name: FusedPipe
53 |     variant: RAY
54 |     variant_ctx:
55 |       max_inflight: 48000
56 |       max_prefetch: 48000
57 |       n_actors: 32
58 |       submit_batch_size: 500
59 |       use_threads: true
60 |       variant_type: RAY
61 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_tf_p.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: '8'
13 |     10: '9'
14 |     11: ''
15 |   n_local_workers: 8
16 |   pipes:
17 |     0:
18 |       name: BatcherPipe(batch_size=1)
19 |       variant: INPROCESS
20 |       variant_ctx:
21 |         variant_type: INPROCESS
22 |     1:
23 |       name: MapperPipe_per_image_standardization
24 |       variant: INPROCESS
25 |     2:
26 |       name: MapperPipe_gaussian_blur
27 |       variant: INPROCESS
28 |     3:
29 |       name: MapperPipe_rgb_to_grayscale
30 |       variant: INPROCESS
31 |     4:
32 |       name: MapperPipe_color_jitter
33 |       variant: INPROCESS
34 |     5:
35 |       name: MapperPipe_random_flip
36 |       variant: INPROCESS
37 |     6:
38 |       name: MapperPipe_crop_and_resize
39 |       variant: INPROCESS
40 |     7:
41 |       name: MapperPipe_convert_to_float
42 |       variant: INPROCESS
43 |     8:
44 |       name: MapperPipe_decode_jpeg
45 |       variant: INPROCESS
46 |     9:
47 |       name: MapperPipe_read_file
48 |       variant: INPROCESS
49 |     10:
50 |       name: LocalFSListerPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     11:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_tf_p_r.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     1: '0'
 5 |     2: '5'
 6 |     3: '6'
 7 |     4: '7'
 8 |     5: '4'
 9 |     6: '2'
10 |     7: '1'
11 |     8: '3'
12 |     9: '8'
13 |     10: '9'
14 |     11: ''
15 |   n_local_workers: 8
16 |   pipes:
17 |     0:
18 |       name: BatcherPipe(batch_size=1)
19 |       variant: INPROCESS
20 |       variant_ctx:
21 |         variant_type: INPROCESS
22 |     1:
23 |       name: MapperPipe_per_image_standardization
24 |       variant: INPROCESS
25 |     2:
26 |       name: MapperPipe_gaussian_blur
27 |       variant: INPROCESS
28 |     3:
29 |       name: MapperPipe_rgb_to_grayscale
30 |       variant: INPROCESS
31 |     4:
32 |       name: MapperPipe_color_jitter
33 |       variant: INPROCESS
34 |     5:
35 |       name: MapperPipe_random_flip
36 |       variant: INPROCESS
37 |     6:
38 |       name: MapperPipe_crop_and_resize
39 |       variant: INPROCESS
40 |     7:
41 |       name: MapperPipe_convert_to_float
42 |       variant: INPROCESS
43 |     8:
44 |       name: MapperPipe_decode_jpeg
45 |       variant: INPROCESS
46 |     9:
47 |       name: MapperPipe_read_file
48 |       variant: INPROCESS
49 |     10:
50 |       name: LocalFSListerPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     11:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_tf_p_r_o.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     1: '0'
 5 |     2: '5'
 6 |     3: '6'
 7 |     4: '7'
 8 |     5: '4'
 9 |     6: '2'
10 |     7: '1'
11 |     8: '3'
12 |     9: '8'
13 |     10: '9'
14 |     11: ''
15 |   n_local_workers: 8
16 |   pipes:
17 |     0:
18 |       name: BatcherPipe(batch_size=1)
19 |       variant: INPROCESS
20 |       variant_ctx:
21 |         variant_type: INPROCESS
22 |     1:
23 |       name: MapperPipe_per_image_standardization
24 |       variant: INPROCESS
25 |     2:
26 |       name: MapperPipe_gaussian_blur
27 |       variant: INPROCESS
28 |     3:
29 |       name: MapperPipe_rgb_to_grayscale
30 |       variant: INPROCESS
31 |     4:
32 |       name: MapperPipe_color_jitter
33 |       variant: INPROCESS
34 |     5:
35 |       name: MapperPipe_random_flip
36 |       variant: INPROCESS
37 |     6:
38 |       name: MapperPipe_crop_and_resize
39 |       variant: INPROCESS
40 |     7:
41 |       name: MapperPipe_convert_to_float
42 |       variant: INPROCESS
43 |     8:
44 |       name: MapperPipe_decode_jpeg
45 |       variant: INPROCESS
46 |     9:
47 |       name: MapperPipe_read_file
48 |       variant: INPROCESS
49 |     10:
50 |       name: LocalFSListerPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     11:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_tf_p_r_o.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '5'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: ''
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: MapperPipe__embedding
13 |       variant: TF
14 |       variant_ctx:
15 |         num_parallel_calls: null
16 |         variant_type: TF
17 |     1:
18 |       name: MapperPipe__truncate
19 |       variant: TF
20 |       variant_ctx:
21 |         num_parallel_calls: null
22 |         variant_type: TF
23 |     2:
24 |       name: MapperPipe__tokenize
25 |       variant: TF_RAY
26 |       variant_ctx:
27 |         max_inflight: 3000
28 |         max_prefetch: 3000
29 |         n_actors: 2
30 |         num_parallel_calls: null
31 |         submit_batch_size: 500
32 |         use_threads: true
33 |         variant_type: TF_RAY
34 |     3:
35 |       name: MapperPipe_convert_to_tensor_v2_with_dispatch
36 |       variant: TF_RAY
37 |       variant_ctx:
38 |         max_inflight: 3000
39 |         max_prefetch: 3000
40 |         n_actors: 2
41 |         num_parallel_calls: null
42 |         submit_batch_size: 500
43 |         use_threads: true
44 |         variant_type: TF_RAY
45 |     4:
46 |       name: LocalLinePipe
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 |     5:
51 |       name: PrefetcherPipe
52 |       variant: INPROCESS
53 |       variant_ctx:
54 |         variant_type: INPROCESS
55 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/configs/eval_local.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     7: ''
 4 |   n_local_workers: 1
 5 |   pipes:
 6 |     0:
 7 |       name: MapperPipe_mel
 8 |       variant: INPROCESS
 9 |       variant_ctx:
10 |         variant_type: INPROCESS
11 |     1:
12 |       name: MapperPipe_frequency_mask
13 |       variant: INPROCESS
14 |       variant_ctx:
15 |         variant_type: INPROCESS
16 |     2:
17 |       name: MapperPipe_time_mask
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     3:
22 |       name: MapperPipe__stretch
23 |       variant: INPROCESS
24 |       variant_ctx:
25 |         variant_type: INPROCESS
26 |     4:
27 |       name: MapperPipe__spec
28 |       variant: INPROCESS
29 |       variant_ctx:
30 |         variant_type: INPROCESS
31 |     5:
32 |       name: MapperPipe__resample
33 |       variant: INPROCESS
34 |       variant_ctx:
35 |         variant_type: INPROCESS
36 |     6:
37 |       name: MapperPipe__read
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     7:
42 |       fused_pipes:
43 |       - 6
44 |       - 5
45 |       - 4
46 |       - 2
47 |       - 1
48 |       - 3
49 |       - 0
50 |       name: LocalFSListerPipe
51 |       variant: RAY_DS
52 |       variant_ctx:
53 |         variant_type: RAY_DS
54 |     8:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/configs/ablation_p.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '8'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: ''
12 |   n_local_workers: 8
13 |   pipes:
14 |     0:
15 |       name: MapperPipe_mel
16 |       variant: INPROCESS
17 |       variant_ctx:
18 |         variant_type: INPROCESS
19 |     1:
20 |       name: MapperPipe_frequency_mask
21 |       variant: INPROCESS
22 |       variant_ctx:
23 |         variant_type: INPROCESS
24 |     2:
25 |       name: MapperPipe_time_mask
26 |       variant: INPROCESS
27 |       variant_ctx:
28 |         variant_type: INPROCESS
29 |     3:
30 |       name: MapperPipe__stretch
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     4:
35 |       name: MapperPipe__spec
36 |       variant: INPROCESS
37 |       variant_ctx:
38 |         variant_type: INPROCESS
39 |     5:
40 |       name: MapperPipe__resample
41 |       variant: INPROCESS
42 |       variant_ctx:
43 |         variant_type: INPROCESS
44 |     6:
45 |       name: MapperPipe__read
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     7:
50 |       name: LocalFSListerPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     8:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/configs/ablation_p_r.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '8'
 4 |     1: '3'
 5 |     2: '1'
 6 |     3: '0'
 7 |     4: '2'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: ''
12 |   n_local_workers: 8
13 |   pipes:
14 |     0:
15 |       name: MapperPipe_mel
16 |       variant: INPROCESS
17 |       variant_ctx:
18 |         variant_type: INPROCESS
19 |     1:
20 |       name: MapperPipe_frequency_mask
21 |       variant: INPROCESS
22 |       variant_ctx:
23 |         variant_type: INPROCESS
24 |     2:
25 |       name: MapperPipe_time_mask
26 |       variant: INPROCESS
27 |       variant_ctx:
28 |         variant_type: INPROCESS
29 |     3:
30 |       name: MapperPipe__stretch
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     4:
35 |       name: MapperPipe__spec
36 |       variant: INPROCESS
37 |       variant_ctx:
38 |         variant_type: INPROCESS
39 |     5:
40 |       name: MapperPipe__resample
41 |       variant: INPROCESS
42 |       variant_ctx:
43 |         variant_type: INPROCESS
44 |     6:
45 |       name: MapperPipe__read
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     7:
50 |       name: LocalFSListerPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     8:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/configs/ablation_baseline.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '8'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: ''
12 |   n_local_workers: 1
13 |   pipes:
14 |     0:
15 |       name: MapperPipe_mel
16 |       variant: INPROCESS
17 |       variant_ctx:
18 |         variant_type: INPROCESS
19 |     1:
20 |       name: MapperPipe_frequency_mask
21 |       variant: INPROCESS
22 |       variant_ctx:
23 |         variant_type: INPROCESS
24 |     2:
25 |       name: MapperPipe_time_mask
26 |       variant: INPROCESS
27 |       variant_ctx:
28 |         variant_type: INPROCESS
29 |     3:
30 |       name: MapperPipe__stretch
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     4:
35 |       name: MapperPipe__spec
36 |       variant: INPROCESS
37 |       variant_ctx:
38 |         variant_type: INPROCESS
39 |     5:
40 |       name: MapperPipe__resample
41 |       variant: INPROCESS
42 |       variant_ctx:
43 |         variant_type: INPROCESS
44 |     6:
45 |       name: MapperPipe__read
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     7:
50 |       name: LocalFSListerPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     8:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 |   


--------------------------------------------------------------------------------
/cedar/client/logger.py:
--------------------------------------------------------------------------------
 1 | import threading
 2 | import queue
 3 | 
 4 | 
 5 | class LoggerThread(threading.Thread):
 6 |     def __init__(self, log_queue: queue.Queue[str], log_file: str) -> None:
 7 |         super().__init__(daemon=True)
 8 |         self.log_queue = log_queue
 9 |         self.log_file = log_file
10 |         self.running = True
11 | 
12 |     def run(self) -> None:
13 |         with open(self.log_file, "a") as f:
14 |             while self.running:
15 |                 try:
16 |                     entry = self.log_queue.get(timeout=0.1)
17 |                     f.write(entry + "\n")
18 |                     f.flush()
19 |                 except queue.Empty:
20 |                     continue
21 | 
22 |     def stop(self) -> None:
23 |         self.running = False
24 | 
25 | 
26 | class DataSetLogger:
27 |     """
28 |     Encapsulates a thread which logs to a file.
29 | 
30 |     Args:
31 |         log_file (str): File to log data to
32 |     """
33 | 
34 |     def __init__(self, log_file: str) -> None:
35 |         self.log_file = log_file
36 |         self.log_queue = queue.Queue()
37 |         self.logger = LoggerThread(self.log_queue, self.log_file)
38 |         self.logger.start()
39 | 
40 |     def log(self, entry: str) -> None:
41 |         self.log_queue.put(entry)
42 | 
43 |     def close(self) -> None:
44 |         self.logger.stop()
45 |         self.logger.join()
46 | 
47 |     def __del__(self) -> None:
48 |         if self.logger.is_alive():
49 |             self.close()
50 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_tf_p.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: ''
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_normalize
15 |       variant: TF
16 |       variant_ctx:
17 |         num_parallel_calls: null
18 |         variant_type: TF
19 |     1:
20 |       name: MapperPipe_distort
21 |       variant: TF
22 |       variant_ctx:
23 |         num_parallel_calls: null
24 |         variant_type: TF
25 |     2:
26 |       name: MapperPipe_random_flip
27 |       variant: TF
28 |       variant_ctx:
29 |         num_parallel_calls: null
30 |         variant_type: TF
31 |     3:
32 |       name: MapperPipe_resize_image
33 |       variant: TF
34 |       variant_ctx:
35 |         num_parallel_calls: null
36 |         variant_type: TF
37 |     4:
38 |       name: MapperPipe_distorted_bounding_box_crop
39 |       variant: TF
40 |       variant_ctx:
41 |         num_parallel_calls: null
42 |         variant_type: TF
43 |     5:
44 |       name: MapperPipe_read_image
45 |       variant: TF
46 |       variant_ctx:
47 |         num_parallel_calls: null
48 |         variant_type: TF
49 |     6:
50 |       name: COCOFileSourcePipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     7:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_tf_pr.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '3'
 5 |     2: '0'
 6 |     3: '2'
 7 |     4: '1'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: ''
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_normalize
15 |       variant: TF
16 |       variant_ctx:
17 |         num_parallel_calls: null
18 |         variant_type: TF
19 |     1:
20 |       name: MapperPipe_distort
21 |       variant: TF
22 |       variant_ctx:
23 |         num_parallel_calls: null
24 |         variant_type: TF
25 |     2:
26 |       name: MapperPipe_random_flip
27 |       variant: TF
28 |       variant_ctx:
29 |         num_parallel_calls: null
30 |         variant_type: TF
31 |     3:
32 |       name: MapperPipe_resize_image
33 |       variant: TF
34 |       variant_ctx:
35 |         num_parallel_calls: null
36 |         variant_type: TF
37 |     4:
38 |       name: MapperPipe_distorted_bounding_box_crop
39 |       variant: TF
40 |       variant_ctx:
41 |         num_parallel_calls: null
42 |         variant_type: TF
43 |     5:
44 |       name: MapperPipe_read_image
45 |       variant: TF
46 |       variant_ctx:
47 |         num_parallel_calls: null
48 |         variant_type: TF
49 |     6:
50 |       name: COCOFileSourcePipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     7:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_tf_pro.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '3'
 5 |     2: '0'
 6 |     3: '2'
 7 |     4: '1'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: ''
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_normalize
15 |       variant: TF
16 |       variant_ctx:
17 |         num_parallel_calls: null
18 |         variant_type: TF
19 |     1:
20 |       name: MapperPipe_distort
21 |       variant: TF
22 |       variant_ctx:
23 |         num_parallel_calls: null
24 |         variant_type: TF
25 |     2:
26 |       name: MapperPipe_random_flip
27 |       variant: TF
28 |       variant_ctx:
29 |         num_parallel_calls: null
30 |         variant_type: TF
31 |     3:
32 |       name: MapperPipe_resize_image
33 |       variant: TF
34 |       variant_ctx:
35 |         num_parallel_calls: null
36 |         variant_type: TF
37 |     4:
38 |       name: MapperPipe_distorted_bounding_box_crop
39 |       variant: TF
40 |       variant_ctx:
41 |         num_parallel_calls: null
42 |         variant_type: TF
43 |     5:
44 |       name: MapperPipe_read_image
45 |       variant: TF
46 |       variant_ctx:
47 |         num_parallel_calls: null
48 |         variant_type: TF
49 |     6:
50 |       name: COCOFileSourcePipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     7:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_tf_baseline.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '7'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: ''
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_normalize
15 |       variant: TF
16 |       variant_ctx:
17 |         num_parallel_calls: null
18 |         variant_type: TF
19 |     1:
20 |       name: MapperPipe_distort
21 |       variant: TF
22 |       variant_ctx:
23 |         num_parallel_calls: null
24 |         variant_type: TF
25 |     2:
26 |       name: MapperPipe_random_flip
27 |       variant: TF
28 |       variant_ctx:
29 |         num_parallel_calls: null
30 |         variant_type: TF
31 |     3:
32 |       name: MapperPipe_resize_image
33 |       variant: TF
34 |       variant_ctx:
35 |         num_parallel_calls: null
36 |         variant_type: TF
37 |     4:
38 |       name: MapperPipe_distorted_bounding_box_crop
39 |       variant: TF
40 |       variant_ctx:
41 |         num_parallel_calls: null
42 |         variant_type: TF
43 |     5:
44 |       name: MapperPipe_read_image
45 |       variant: TF
46 |       variant_ctx:
47 |         num_parallel_calls: null
48 |         variant_type: TF
49 |     6:
50 |       name: COCOFileSourcePipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     7:
55 |       name: PrefetcherPipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_cache_after_truncate.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '10'
 3 |   1: '0'
 4 |   2: '9'
 5 |   8: '11'
 6 |   9: '1'
 7 |   10: ''
 8 |   11: '2'
 9 | n_local_workers: 1
10 | pipes:
11 |   0:
12 |     name: BatcherPipe(batch_size=1)
13 |     variant: INPROCESS
14 |     variant_ctx:
15 |       variant_type: INPROCESS
16 |   1:
17 |     name: MapperPipe_Embedding(50257, 764)
18 |     variant: INPROCESS
19 |     variant_ctx:
20 |       variant_type: INPROCESS
21 |   2:
22 |     name: MapperPipe_ToTensor()
23 |     variant: INPROCESS
24 |     variant_ctx:
25 |       variant_type: INPROCESS
26 |   3:
27 |     name: MapperPipe_AddToken()
28 |   4:
29 |     name: MapperPipe_AddToken()
30 |   5:
31 |     name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
32 |   6:
33 |     name: MapperPipe_Truncate()
34 |   7:
35 |     name: MapperPipe_GPT2BPETokenizer()
36 |   8:
37 |     name: LocalLinePipe
38 |     variant: INPROCESS
39 |     variant_ctx:
40 |       variant_type: INPROCESS
41 |   9:
42 |     name: ObjectDiskCachePipe
43 |     variant: INPROCESS
44 |     variant_ctx:
45 |       variant_type: INPROCESS
46 |   10:
47 |     name: PrefetcherPipe
48 |     variant: INPROCESS
49 |     variant_ctx:
50 |       variant_type: INPROCESS
51 |   11:
52 |     fused_pipes:
53 |     - 7
54 |     - 6
55 |     - 5
56 |     - 4
57 |     - 3
58 |     name: FusedPipe
59 |     variant: RAY
60 |     variant_ctx:
61 |       max_inflight: 48000
62 |       max_prefetch: 48000
63 |       n_actors: 32
64 |       submit_batch_size: 500
65 |       use_threads: true
66 |       variant_type: RAY
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/new_wikitext_optimal_cache_plan.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '10'
 3 |   1: '0'
 4 |   2: '9'
 5 |   8: '11'
 6 |   9: '1'
 7 |   10: ''
 8 |   11: '2'
 9 | n_local_workers: 1
10 | pipes:
11 |   0:
12 |     name: BatcherPipe(batch_size=1)
13 |     variant: INPROCESS
14 |     variant_ctx:
15 |       variant_type: INPROCESS
16 |   1:
17 |     name: MapperPipe_Embedding(50257, 764)
18 |     variant: INPROCESS
19 |     variant_ctx:
20 |       variant_type: INPROCESS
21 |   2:
22 |     name: MapperPipe_ToTensor()
23 |     variant: INPROCESS
24 |     variant_ctx:
25 |       variant_type: INPROCESS
26 |   3:
27 |     name: MapperPipe_AddToken()
28 |   4:
29 |     name: MapperPipe_AddToken()
30 |   5:
31 |     name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
32 |   6:
33 |     name: MapperPipe_Truncate()
34 |   7:
35 |     name: MapperPipe_GPT2BPETokenizer()
36 |   8:
37 |     name: LocalLinePipe
38 |     variant: INPROCESS
39 |     variant_ctx:
40 |       variant_type: INPROCESS
41 |   9:
42 |     name: ObjectDiskCachePipe
43 |     variant: INPROCESS
44 |     variant_ctx:
45 |       variant_type: INPROCESS
46 |   10:
47 |     name: PrefetcherPipe
48 |     variant: INPROCESS
49 |     variant_ctx:
50 |       variant_type: INPROCESS
51 |   11:
52 |     fused_pipes:
53 |     - 7
54 |     - 6
55 |     - 5
56 |     - 4
57 |     - 3
58 |     name: FusedPipe
59 |     variant: RAY
60 |     variant_ctx:
61 |       max_inflight: 48000
62 |       max_prefetch: 48000
63 |       n_actors: 32
64 |       submit_batch_size: 500
65 |       use_threads: true
66 |       variant_type: RAY
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_cache_after_tensor_conversion.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '10'
 3 |   1: '0'
 4 |   2: '9'
 5 |   8: '11'
 6 |   9: '1'
 7 |   10: ''
 8 |   11: '2'
 9 | n_local_workers: 1
10 | pipes:
11 |   0:
12 |     name: BatcherPipe(batch_size=1)
13 |     variant: INPROCESS
14 |     variant_ctx:
15 |       variant_type: INPROCESS
16 |   1:
17 |     name: MapperPipe_Embedding(50257, 764)
18 |     variant: INPROCESS
19 |     variant_ctx:
20 |       variant_type: INPROCESS
21 |   2:
22 |     name: MapperPipe_ToTensor()
23 |     variant: INPROCESS
24 |     variant_ctx:
25 |       variant_type: INPROCESS
26 |   3:
27 |     name: MapperPipe_AddToken()
28 |   4:
29 |     name: MapperPipe_AddToken()
30 |   5:
31 |     name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
32 |   6:
33 |     name: MapperPipe_Truncate()
34 |   7:
35 |     name: MapperPipe_GPT2BPETokenizer()
36 |   8:
37 |     name: LocalLinePipe
38 |     variant: INPROCESS
39 |     variant_ctx:
40 |       variant_type: INPROCESS
41 |   9:
42 |     name: ObjectDiskCachePipe
43 |     variant: INPROCESS
44 |     variant_ctx:
45 |       variant_type: INPROCESS
46 |   10:
47 |     name: PrefetcherPipe
48 |     variant: INPROCESS
49 |     variant_ctx:
50 |       variant_type: INPROCESS
51 |   11:
52 |     fused_pipes:
53 |     - 7
54 |     - 6
55 |     - 5
56 |     - 4
57 |     - 3
58 |     name: FusedPipe
59 |     variant: RAY
60 |     variant_ctx:
61 |       max_inflight: 48000
62 |       max_prefetch: 48000
63 |       n_actors: 32
64 |       submit_batch_size: 500
65 |       use_threads: true
66 |       variant_type: RAY
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/ablation_pro.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '8'
 4 |     1: '5'
 5 |     2: '0'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '1'
10 |     7: '6'
11 |     8: ''
12 |   n_local_workers: 8
13 |   pipes:
14 |     0:
15 |       name: MapperPipe_to_tensor
16 |       variant: INPROCESS
17 |       variant_ctx:
18 |         variant_type: INPROCESS
19 |     1:
20 |       name: MapperPipe_distort
21 |       variant: RAY
22 |       variant_ctx:
23 |         max_inflight: 100
24 |         max_prefetch: 100
25 |         n_actors: 4
26 |         submit_batch_size: 1
27 |         use_threads: true
28 |         variant_type: RAY
29 |     2:
30 |       name: MapperPipe_RandomHorizontalFlip(p=1)
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     3:
35 |       name: MapperPipe_SanitizeBoundingBox(min_size=1.0, labels_getter=boxes)
36 |       variant: INPROCESS
37 |       variant_ctx:
38 |         variant_type: INPROCESS
39 |     4:
40 |       name: MapperPipe_crop
41 |       variant: INPROCESS
42 |       variant_ctx:
43 |         variant_type: INPROCESS
44 |     5:
45 |       name: MapperPipe_zoom_out
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     6:
50 |       name: MapperPipe_read_image
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     7:
55 |       name: COCOFileSourcePipe
56 |       variant: INPROCESS
57 |       variant_ctx:
58 |         variant_type: INPROCESS
59 |     8:
60 |       name: PrefetcherPipe
61 |       variant: INPROCESS
62 |       variant_ctx:
63 |         variant_type: INPROCESS
64 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/cedar_tf_local_plan.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     6: '8'
 4 |     7: ''
 5 |     8: '7'
 6 |   n_local_workers: 1
 7 |   pipes:
 8 |     0:
 9 |       name: MapperPipe_normalize
10 |       variant: TF
11 |       variant_ctx:
12 |         num_parallel_calls: null
13 |         variant_type: TF
14 |     1:
15 |       name: MapperPipe_distort
16 |       variant: TF
17 |       variant_ctx:
18 |         num_parallel_calls: null
19 |         variant_type: TF
20 |     2:
21 |       name: MapperPipe_random_flip
22 |       variant: TF
23 |       variant_ctx:
24 |         num_parallel_calls: null
25 |         variant_type: TF
26 |     3:
27 |       name: MapperPipe_resize_image
28 |       variant: TF
29 |       variant_ctx:
30 |         num_parallel_calls: null
31 |         variant_type: TF
32 |     4:
33 |       name: MapperPipe_distorted_bounding_box_crop
34 |       variant: TF
35 |       variant_ctx:
36 |         num_parallel_calls: null
37 |         variant_type: TF
38 |     5:
39 |       name: MapperPipe_read_image
40 |       variant: TF
41 |       variant_ctx:
42 |         num_parallel_calls: null
43 |         variant_type: TF
44 |     6:
45 |       name: COCOFileSourcePipe
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     7:
50 |       name: PrefetcherPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     8:
55 |       fused_pipes:
56 |       - 5
57 |       - 4
58 |       - 1
59 |       - 3
60 |       - 2
61 |       - 0
62 |       name: FusedPipe
63 |       variant: TF
64 |       variant_ctx:
65 |         num_parallel_calls: -1
66 |         variant_type: TF
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/cedar_tf_remote_plan.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     6: '8'
 4 |     7: ''
 5 |     8: '7'
 6 |   n_local_workers: 1
 7 |   pipes:
 8 |     0:
 9 |       name: MapperPipe_normalize
10 |       variant: TF
11 |       variant_ctx:
12 |         num_parallel_calls: null
13 |         variant_type: TF
14 |     1:
15 |       name: MapperPipe_distort
16 |       variant: TF
17 |       variant_ctx:
18 |         num_parallel_calls: null
19 |         variant_type: TF
20 |     2:
21 |       name: MapperPipe_random_flip
22 |       variant: TF
23 |       variant_ctx:
24 |         num_parallel_calls: null
25 |         variant_type: TF
26 |     3:
27 |       name: MapperPipe_resize_image
28 |       variant: TF
29 |       variant_ctx:
30 |         num_parallel_calls: null
31 |         variant_type: TF
32 |     4:
33 |       name: MapperPipe_distorted_bounding_box_crop
34 |       variant: TF
35 |       variant_ctx:
36 |         num_parallel_calls: null
37 |         variant_type: TF
38 |     5:
39 |       name: MapperPipe_read_image
40 |       variant: TF
41 |       variant_ctx:
42 |         num_parallel_calls: null
43 |         variant_type: TF
44 |     6:
45 |       name: COCOFileSourcePipe
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     7:
50 |       name: PrefetcherPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     8:
55 |       fused_pipes:
56 |       - 5
57 |       - 4
58 |       - 1
59 |       - 3
60 |       - 2
61 |       - 0
62 |       name: FusedPipe
63 |       variant: TF
64 |       variant_ctx:
65 |         num_parallel_calls: -1
66 |         variant_type: TF
67 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/test_app.py:
--------------------------------------------------------------------------------
 1 | import fastflow as ff
 2 | import tensorflow as tf
 3 | 
 4 | from eval_app_runner import App
 5 | 
 6 | class TestModel(ff.FastFlowModel):
 7 |     def __init__(self):
 8 |         super().__init__()
 9 |     
10 |     def call(self, inputs):
11 |         # do nothing
12 |         return inputs
13 | 
14 |     def __deepcopy__(self):
15 |         return TestModel()
16 | 
17 | class TestApp(App):
18 |     def __init__(self, args, config):
19 |         super().__init__(args, config)
20 | 
21 |         self.ds = tf.data.Dataset.from_tensor_slices((tf.random.uniform([100000, 32], maxval=100, dtype=tf.int32),))
22 | 
23 | 
24 |     def dummy_loss(self, y_true, y_pred):
25 |         return tf.constant(0.0)
26 | 
27 |     def create_model(self):
28 |         model = TestModel()
29 | 
30 |         model.compile(optimizer="adam", loss=self.dummy_loss)
31 |         return model
32 | 
33 |     def create_dataset(self, num_parallel):
34 |         dataset = self.ds.map(lambda x: (x+1, x), num_parallel_calls=num_parallel, name="prep_begin")
35 |         dataset = dataset.batch(32)
36 |         dataset = dataset.prefetch(buffer_size=tf.data.AUTOTUNE)
37 | 
38 |         return dataset
39 | 
40 |     def create_valid_dataset(self, num_parallel):
41 |         return None
42 |         
43 |     
44 | def main():
45 |     ds = dataloader()
46 |     valid_ds = dataloader()
47 | 
48 |     model = TestModel()
49 |     model.compile(optimizer="adam", loss=dummy_loss)
50 | 
51 |     config = ff.FastFlowConfig.from_yaml("/home/myzhao/FastFlow/eval/test/config.yaml")
52 | 
53 |     model.fit(x=ds, auto_offload_conf=config, epochs=10)
54 | 
55 | 
56 | if __name__ == "__main__":
57 |     main()


--------------------------------------------------------------------------------
/evaluation/run_caching.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Note, we provide the exact stats and config files produced by the optimizer on our setup in order to enable reproducibility.
 4 | # To generate new profiling stats and re-run the optimizer, use the --run_profiling and --generate_plan flags in eval_cedar.py.
 5 | # Replace the stats and optimizer-produced config in the following commands
 6 | 
 7 | # cv-torch
 8 | # with caching
 9 | python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_cache_dataset.py --master_feature_config pipelines/simclrv2/cache_results/configs/new_simclrv2_optimized_plan.yml --use_ray --ray_ip 10.138.0.8
10 | # without caching
11 | python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_cache_dataset.py --master_feature_config pipelines/simclrv2/cache_results/configs/no_cache_plan.yml --use_ray --ray_ip 10.138.0.8
12 | 
13 | # nlp-torch
14 | # with caching
15 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_cache_dataset.py --use_ray --ray_ip 10.138.0.8 --num_total_samples 100000 --master_feature_config pipelines/wikitext103/cache_results/configs/new_wikitext_optimal_cache_plan.yml
16 | # without caching
17 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_cache_dataset.py --use_ray --ray_ip 10.138.0.8 --num_total_samples 100000 --master_feature_config pipelines/wikitext103/cache_results/configs/wikitext_no_caching_plan.yml
18 | 
19 | # asr
20 | # without caching note that the optimizer generates the optimal plan, which does not cache
21 | python eval_cedar.py --dataset_file pipelines/commonvoice/cedar_cache_dataset.py --master_feature_config pipelines/commonvoice/cache_results/configs/no_caching_eval_remote.yml --num_total_samples 10000 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_p.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '9'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: ''
13 |   n_local_workers: 1
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Embedding(50257, 764)
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_ToTensor()
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_AddToken()
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_AddToken()
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     5:
41 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     6:
46 |       name: MapperPipe_Truncate()
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 |     7:
51 |       name: MapperPipe_GPT2BPETokenizer()
52 |       variant: INPROCESS
53 |       variant_ctx:
54 |         variant_type: INPROCESS
55 |     8:
56 |       name: LocalLinePipe
57 |       variant: INPROCESS
58 |       variant_ctx:
59 |         variant_type: INPROCESS
60 |     9:
61 |       name: PrefetcherPipe
62 |       variant: INPROCESS
63 |       variant_ctx:
64 |         variant_type: INPROCESS
65 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_p_r.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '9'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: ''
13 |   n_local_workers: 1
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Embedding(50257, 764)
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_ToTensor()
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_AddToken()
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_AddToken()
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     5:
41 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     6:
46 |       name: MapperPipe_Truncate()
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 |     7:
51 |       name: MapperPipe_GPT2BPETokenizer()
52 |       variant: INPROCESS
53 |       variant_ctx:
54 |         variant_type: INPROCESS
55 |     8:
56 |       name: LocalLinePipe
57 |       variant: INPROCESS
58 |       variant_ctx:
59 |         variant_type: INPROCESS
60 |     9:
61 |       name: PrefetcherPipe
62 |       variant: INPROCESS
63 |       variant_ctx:
64 |         variant_type: INPROCESS
65 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_baseline.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '9'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: ''
13 |   n_local_workers: 1
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Embedding(50257, 764)
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_ToTensor()
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_AddToken()
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_AddToken()
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     5:
41 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     6:
46 |       name: MapperPipe_Truncate()
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 |     7:
51 |       name: MapperPipe_GPT2BPETokenizer()
52 |       variant: INPROCESS
53 |       variant_ctx:
54 |         variant_type: INPROCESS
55 |     8:
56 |       name: LocalLinePipe
57 |       variant: INPROCESS
58 |       variant_ctx:
59 |         variant_type: INPROCESS
60 |     9:
61 |       name: PrefetcherPipe
62 |       variant: INPROCESS
63 |       variant_ctx:
64 |         variant_type: INPROCESS
65 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/eval_remote.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '9'
 4 |     1: '0'
 5 |     2: '1'
 6 |     8: '10'
 7 |     9: ''
 8 |     10: '2'
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: BatcherPipe(batch_size=1)
13 |       variant: INPROCESS
14 |       variant_ctx:
15 |         variant_type: INPROCESS
16 |     1:
17 |       name: MapperPipe_Embedding(50257, 764)
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     2:
22 |       name: MapperPipe_ToTensor()
23 |       variant: INPROCESS
24 |       variant_ctx:
25 |         variant_type: INPROCESS
26 |     3:
27 |       name: MapperPipe_AddToken()
28 |       variant: INPROCESS
29 |     4:
30 |       name: MapperPipe_AddToken()
31 |       variant: INPROCESS
32 |     5:
33 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
34 |       variant: INPROCESS
35 |     6:
36 |       name: MapperPipe_Truncate()
37 |       variant: INPROCESS
38 |     7:
39 |       name: MapperPipe_GPT2BPETokenizer()
40 |       variant: INPROCESS
41 |     8:
42 |       name: LocalLinePipe
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     9:
47 |       name: PrefetcherPipe
48 |       variant: INPROCESS
49 |       variant_ctx:
50 |         variant_type: INPROCESS
51 |     10:
52 |       fused_pipes:
53 |       - 7
54 |       - 6
55 |       - 5
56 |       - 4
57 |       - 3
58 |       name: FusedPipe
59 |       variant: RAY
60 |       variant_ctx:
61 |         max_inflight: 1500
62 |         max_prefetch: 1500
63 |         n_actors: 32
64 |         submit_batch_size: 500
65 |         use_threads: true
66 |         variant_type: RAY
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/eval_local.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '9'
 4 |     1: '0'
 5 |     2: '1'
 6 |     8: '10'
 7 |     9: ''
 8 |     10: '2'
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: BatcherPipe(batch_size=1)
13 |       variant: INPROCESS
14 |       variant_ctx:
15 |         variant_type: INPROCESS
16 |     1:
17 |       name: MapperPipe_Embedding(50257, 764)
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     2:
22 |       name: MapperPipe_ToTensor()
23 |       variant: INPROCESS
24 |       variant_ctx:
25 |         variant_type: INPROCESS
26 |     3:
27 |       name: MapperPipe_AddToken()
28 |       variant: INPROCESS
29 |     4:
30 |       name: MapperPipe_AddToken()
31 |       variant: INPROCESS
32 |     5:
33 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
34 |       variant: INPROCESS
35 |     6:
36 |       name: MapperPipe_Truncate()
37 |       variant: INPROCESS
38 |     7:
39 |       name: MapperPipe_GPT2BPETokenizer()
40 |       variant: INPROCESS
41 |     8:
42 |       name: LocalLinePipe
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     9:
47 |       name: PrefetcherPipe
48 |       variant: INPROCESS
49 |       variant_ctx:
50 |         variant_type: INPROCESS
51 |     10:
52 |       fused_pipes:
53 |       - 7
54 |       - 6
55 |       - 5
56 |       - 4
57 |       - 3
58 |       name: FusedPipe
59 |       variant: SMP
60 |       variant_ctx:
61 |         disable_torch_parallelism: true
62 |         max_inflight: 50
63 |         max_prefetch: 50
64 |         n_procs: 8
65 |         use_threads: true
66 |         variant_type: SMP
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/coco/configs/cedar_remote_plan.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '8'
 4 |     2: '0'
 5 |     3: '2'
 6 |     4: '3'
 7 |     5: '4'
 8 |     7: '9'
 9 |     8: ''
10 |     9: '5'
11 |   n_local_workers: 8
12 |   pipes:
13 |     0:
14 |       name: MapperPipe_to_tensor
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_distort
20 |       variant: INPROCESS
21 |     2:
22 |       name: MapperPipe_RandomHorizontalFlip(p=1)
23 |       variant: INPROCESS
24 |       variant_ctx:
25 |         variant_type: INPROCESS
26 |     3:
27 |       name: MapperPipe_SanitizeBoundingBox(min_size=1.0, labels_getter=boxes)
28 |       variant: INPROCESS
29 |       variant_ctx:
30 |         variant_type: INPROCESS
31 |     4:
32 |       name: MapperPipe_crop
33 |       variant: INPROCESS
34 |       variant_ctx:
35 |         variant_type: INPROCESS
36 |     5:
37 |       name: MapperPipe_zoom_out
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     6:
42 |       name: MapperPipe_read_image
43 |       variant: INPROCESS
44 |     7:
45 |       name: COCOFileSourcePipe
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     8:
50 |       name: PrefetcherPipe
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     9:
55 |       fused_pipes:
56 |       - 6
57 |       - 1
58 |       name: FusedPipe
59 |       variant: RAY
60 |       variant_ctx:
61 |         max_inflight: 100
62 |         max_prefetch: 100
63 |         n_actors: 4
64 |         submit_batch_size: 2
65 |         use_threads: true
66 |         variant_type: RAY
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/download.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download the WikiText103 dataset to local filesystem.
 3 | """
 4 | 
 5 | import logging
 6 | import pathlib
 7 | import tarfile
 8 | from google.cloud import storage
 9 | 
10 | DATASET_NAME = "cv-corpus-15.0-delta-2023-09-08"
11 | DATASET_LOC = "datasets/commonvoice"
12 | DATASET_FILE = "cv-corpus-15.0-delta-2023-09-08-en.tar"
13 | BUCKET_NAME = "ember-data"
14 | SOURCE_BLOB_NAME = "cv-corpus-15.0-delta-2023-09-08-en.tar"
15 | 
16 | 
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | def download_if_not_exists(path: pathlib.Path):
21 |     if not path.is_file():
22 |         storage_client = storage.Client()
23 |         bucket = storage_client.bucket(BUCKET_NAME)
24 |         blob = bucket.blob(SOURCE_BLOB_NAME)
25 |         blob.download_to_filename(str(path))
26 |         print("Downloaded {}".format(str(path)))
27 |     else:
28 |         print("Path already exists: {}".format(str(path)))
29 | 
30 | 
31 | def download_dataset() -> None:
32 |     logger.info("Downloading Commonvoice Dataset")
33 |     data_dir = (
34 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
35 |     )
36 |     if not data_dir.exists():
37 |         data_dir.mkdir(parents=True, exist_ok=True)
38 | 
39 |     dataset_file = data_dir / pathlib.Path(DATASET_FILE)
40 |     zip_dir = dataset_file.parent
41 | 
42 |     if not (zip_dir / DATASET_NAME).exists():
43 |         print(f"Downloading dataset to {str(dataset_file)}...")
44 |         download_if_not_exists(dataset_file)
45 | 
46 |         with tarfile.open(dataset_file, "r") as tar:
47 |             tar.extractall(path=str(zip_dir))
48 | 
49 |         dataset_file.unlink()
50 | 
51 | 
52 | if __name__ == "__main__":
53 |     download_dataset()
54 | 


--------------------------------------------------------------------------------
/evaluation/plots/aggregate_data.csv:
--------------------------------------------------------------------------------
 1 | Pipeline,System,Throughput
 2 | CV-torch,torch,165.1435348
 3 | CV-torch,ember-local,338.9533219
 4 | CV-torch,ray-local,182.7110468
 5 | CV-torch,ember-remote,851.9118309
 6 | CV-torch,ray-remote,613.7542131
 7 | CV-tf,tf,434.7167386
 8 | CV-tf,ember-local,438.4405241
 9 | CV-tf,ray-local,157.9693704
10 | CV-tf,plumber,338.0819766
11 | CV-tf,ember-remote,965.1411681
12 | CV-tf,ray-remote,548.0062504
13 | CV-tf,tfdata-service,947.6581265
14 | CV-tf,fastflow,772.5337355
15 | NLP-torch,torch,1563.607555
16 | NLP-torch,ember-local,2960.273135
17 | NLP-torch,ray-local,1412.855098
18 | NLP-torch,ember-remote,4764.476066
19 | NLP-torch,ray-remote,1465.342987
20 | NLP-hf-tf,tf,634.4627475
21 | NLP-hf-tf,ember-local,1205.012853
22 | NLP-hf-tf,ray-local,1413.387607
23 | NLP-hf-tf,ember-remote,2408.033199
24 | NLP-hf-tf,ray-remote,798.9287172
25 | NLP-tf,tf,6230.335504
26 | NLP-tf,ember-local,5695.571693
27 | NLP-tf,ray-local,1257.46357
28 | NLP-tf,ember-remote,5563.385506
29 | NLP-tf,ray-remote,2053.085996
30 | NLP-tf,tfdata-service,2045.847441
31 | NLP-tf,fastflow,1610.742536
32 | ASR,torch,24.65157059
33 | ASR,tf,17.22063537
34 | ASR,ember-local,105.7026584
35 | ASR,ray-local,105.724439
36 | ASR,ember-remote,498.9273063
37 | ASR,ray-remote,505.7816422
38 | SSD-torch,ray-local,21.36905199
39 | SSD-torch,ember-local,58.36089788
40 | SSD-torch,torch,19.92272835
41 | SSD-torch,ember-remote,92.27214569
42 | SSD-torch,ray-remote,56.42511784
43 | SSD-tf,ember-local,162.1409238
44 | SSD-tf,tf,72.19816589
45 | SSD-tf,ray-local,15.21952602
46 | SSD-tf,plumber,86.74691139
47 | SSD-tf,ember-remote,158.623046
48 | SSD-tf,tfdata-service,32.12837509
49 | SSD-tf,ray-remote,31.81599492
50 | SSD-tf,fastflow,56.7238234


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/download.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Download the WikiText103 dataset to local filesystem.
 3 | """
 4 | 
 5 | import logging
 6 | import pathlib
 7 | import urllib.request
 8 | import zipfile
 9 | 
10 | DATASET_NAME = "wikitext103"
11 | DATASET_LOC = "datasets/wikitext103"
12 | DATASET_FILE = "wikitext-103-v1.zip"
13 | DATASET_SOURCE = "https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-103-v1.zip"  # noqa: E501
14 | 
15 | 
16 | logger = logging.getLogger(__name__)
17 | 
18 | 
19 | def download_if_not_exists(url: str, path: pathlib.Path):
20 |     if not path.is_file():
21 |         urllib.request.urlretrieve(url, str(path))
22 |         print("Downloaded {}".format(str(path)))
23 |     else:
24 |         print("Path already exists: {}".format(str(path)))
25 | 
26 | 
27 | def download_dataset() -> None:
28 |     logger.info("Downloading Wikitext103 Dataset")
29 |     data_dir = (
30 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
31 |     )
32 |     if not data_dir.exists():
33 |         data_dir.mkdir(parents=True, exist_ok=True)
34 | 
35 |     dataset_file = data_dir / pathlib.Path(DATASET_FILE)
36 |     zip_dir = dataset_file.parent
37 | 
38 |     print(zip_dir)
39 |     if not (zip_dir / "wikitext-103").exists():
40 |         print(f"Downloading dataset to {str(dataset_file)}...")
41 |         urllib.request.urlretrieve(DATASET_SOURCE, str(dataset_file))
42 |         logger.info("Extracting Wikitext103 data from zip file.")
43 |         with zipfile.ZipFile(dataset_file, "r") as zip_ref:
44 |             zip_ref.extractall(path=zip_dir)
45 |         logger.info("Done extracting Wikitext103 data from zip file.")
46 | 
47 |         dataset_file.unlink()
48 | 
49 | 
50 | if __name__ == "__main__":
51 |     download_dataset()
52 | 


--------------------------------------------------------------------------------
/cedar/pipes/optimize/noop.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from .registry import register_optimizer_pipe
 3 | from ..context import InProcessPipeVariantContext
 4 | from ..pipe import (
 5 |     Pipe,
 6 | )
 7 | from ..variant import (
 8 |     InProcessPipeVariant,
 9 |     PipeVariant,
10 | )
11 | 
12 | 
13 | @register_optimizer_pipe("NoopOptimizerPipe")
14 | class NoopOptimizerPipe(Pipe):
15 |     """
16 |     A noop pipe, that effectively just forwards the output of the input pipe.
17 |     Intended to be used as an optimization, and not directly defined
18 |     within the feature.
19 | 
20 |     Primarily intenteded for testing.
21 |     """
22 | 
23 |     def __init__(
24 |         self, input_pipe: Optional[Pipe] = None, is_random: bool = False
25 |     ):
26 |         if input_pipe:
27 |             super().__init__(
28 |                 "NoopOptimizerPipe", [input_pipe], is_random=is_random
29 |             )
30 |         else:
31 |             super().__init__("NoopOptimizerPipe", [], is_random=is_random)
32 | 
33 |     def _to_inprocess(
34 |         self, variant_ctx: InProcessPipeVariantContext
35 |     ) -> InProcessPipeVariant:
36 |         variant = InProcessNoopOptimizerPipeVariant(
37 |             self.input_pipes[0].pipe_variant
38 |         )
39 |         return variant
40 | 
41 |     def _check_mutation(self) -> None:
42 |         super()._check_mutation()
43 | 
44 |         if len(self.input_pipes) != 1:
45 |             raise RuntimeError("NoopOptimizerPipe only accepts one input.")
46 | 
47 | 
48 | class InProcessNoopOptimizerPipeVariant(InProcessPipeVariant):
49 |     def __init__(self, input_pipe_variant: Optional[PipeVariant]):
50 |         super().__init__(input_pipe_variant)
51 | 
52 |     def _iter_impl(self):
53 |         for x in self.input_pipe_variant:
54 |             yield x
55 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/configs/ablation_p_r_o.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '9'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: ''
13 |   n_local_workers: 1
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Embedding(50257, 764)
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_ToTensor()
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_AddToken()
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_AddToken()
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     5:
41 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     6:
46 |       name: MapperPipe_Truncate()
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 |     7:
51 |       name: MapperPipe_GPT2BPETokenizer()
52 |       variant: RAY
53 |       variant_ctx:
54 |         max_inflight: 35040
55 |         max_prefetch: 35040
56 |         n_actors: 32
57 |         submit_batch_size: 365
58 |         use_threads: true
59 |         variant_type: RAY
60 |     8:
61 |       name: LocalLinePipe
62 |       variant: INPROCESS
63 |       variant_ctx:
64 |         variant_type: INPROCESS
65 |     9:
66 |       name: PrefetcherPipe
67 |       variant: INPROCESS
68 |       variant_ctx:
69 |         variant_type: INPROCESS
70 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/eval_controller_local.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan: 
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     7: '1'
 6 |     9: '11'
 7 |     10: ''
 8 |     11: '7'
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: BatcherPipe(batch_size=1)
13 |       variant: INPROCESS
14 |       variant_ctx:
15 |         variant_type: INPROCESS
16 |     1:
17 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     2:
22 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
23 |       variant: INPROCESS
24 |     3:
25 |       name: MapperPipe_Grayscale(num_output_channels=1)
26 |       variant: INPROCESS
27 |     4:
28 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
29 |         1.1), hue=(-0.1, 0.1))
30 |       variant: INPROCESS
31 |     5:
32 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
33 |       variant: INPROCESS
34 |     6:
35 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
36 |         1.3333), interpolation=bilinear, antialias=warn)
37 |       variant: INPROCESS
38 |     7:
39 |       name: MapperPipe_to_float
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     8:
44 |       name: MapperPipe_read_image_pytorch
45 |       variant: INPROCESS
46 |     9:
47 |       name: LocalFSListerPipe
48 |       variant: INPROCESS
49 |       variant_ctx:
50 |         variant_type: INPROCESS
51 |     10:
52 |       name: PrefetcherPipe
53 |       variant: INPROCESS
54 |       variant_ctx:
55 |         variant_type: INPROCESS
56 |     11:
57 |       fused_pipes:
58 |       - 8
59 |       - 3
60 |       - 6
61 |       - 2
62 |       - 5
63 |       - 4
64 |       name: FusedPipe
65 |       variant: INPROCESS
66 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/configs/ablation_p_r_o.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '8'
 4 |     1: '3'
 5 |     2: '1'
 6 |     3: '0'
 7 |     4: '2'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: ''
12 |   n_local_workers: 8
13 |   pipes:
14 |     0:
15 |       name: MapperPipe_mel
16 |       variant: RAY
17 |       variant_ctx:
18 |         max_inflight: 100
19 |         max_prefetch: 100
20 |         n_actors: 2
21 |         submit_batch_size: 1
22 |         use_threads: true
23 |         variant_type: RAY
24 |     1:
25 |       name: MapperPipe_frequency_mask
26 |       variant: INPROCESS
27 |       variant_ctx:
28 |         variant_type: INPROCESS
29 |     2:
30 |       name: MapperPipe_time_mask
31 |       variant: INPROCESS
32 |       variant_ctx:
33 |         variant_type: INPROCESS
34 |     3:
35 |       name: MapperPipe__stretch
36 |       variant: RAY
37 |       variant_ctx:
38 |         max_inflight: 100
39 |         max_prefetch: 100
40 |         n_actors: 2
41 |         submit_batch_size: 2
42 |         use_threads: true
43 |         variant_type: RAY
44 |     4:
45 |       name: MapperPipe__spec
46 |       variant: INPROCESS
47 |       variant_ctx:
48 |         variant_type: INPROCESS
49 |     5:
50 |       name: MapperPipe__resample
51 |       variant: INPROCESS
52 |       variant_ctx:
53 |         variant_type: INPROCESS
54 |     6:
55 |       name: MapperPipe__read
56 |       variant: RAY
57 |       variant_ctx:
58 |         max_inflight: 100
59 |         max_prefetch: 100
60 |         n_actors: 2
61 |         submit_batch_size: 4
62 |         use_threads: true
63 |         variant_type: RAY
64 |     7:
65 |       name: LocalFSListerPipe
66 |       variant: INPROCESS
67 |       variant_ctx:
68 |         variant_type: INPROCESS
69 |     8:
70 |       name: PrefetcherPipe
71 |       variant: INPROCESS
72 |       variant_ctx:
73 |         variant_type: INPROCESS
74 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_p.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: '8'
13 |   n_local_workers: 8
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_Grayscale(num_output_channels=1)
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
37 |         1.1), hue=(-0.1, 0.1))
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     5:
42 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     6:
47 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
48 |         1.3333), interpolation=bilinear, antialias=warn)
49 |       variant: INPROCESS
50 |       variant_ctx:
51 |         variant_type: INPROCESS
52 |     7:
53 |       name: MapperPipe_to_float
54 |       variant: INPROCESS
55 |       variant_ctx:
56 |         variant_type: INPROCESS
57 |     8:
58 |       name: ImageReaderPipe
59 |       variant: INPROCESS
60 |       variant_ctx:
61 |         variant_type: INPROCESS
62 |     9:
63 |       name: LocalFSListerPipe
64 |       variant: INPROCESS
65 |       variant_ctx:
66 |         variant_type: INPROCESS
67 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_p_r.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '5'
 6 |     3: '6'
 7 |     4: '7'
 8 |     5: '4'
 9 |     6: '2'
10 |     7: '1'
11 |     8: '3'
12 |     9: '8'
13 |   n_local_workers: 8
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_Grayscale(num_output_channels=1)
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
37 |         1.1), hue=(-0.1, 0.1))
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     5:
42 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     6:
47 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
48 |         1.3333), interpolation=bilinear, antialias=warn)
49 |       variant: INPROCESS
50 |       variant_ctx:
51 |         variant_type: INPROCESS
52 |     7:
53 |       name: MapperPipe_to_float
54 |       variant: INPROCESS
55 |       variant_ctx:
56 |         variant_type: INPROCESS
57 |     8:
58 |       name: ImageReaderPipe
59 |       variant: INPROCESS
60 |       variant_ctx:
61 |         variant_type: INPROCESS
62 |     9:
63 |       name: LocalFSListerPipe
64 |       variant: INPROCESS
65 |       variant_ctx:
66 |         variant_type: INPROCESS
67 |   


--------------------------------------------------------------------------------
/cedar/service/actor.py:
--------------------------------------------------------------------------------
 1 | import abc
 2 | import logging
 3 | import queue
 4 | import multiprocessing as mp
 5 | import torch
 6 | from typing import Any
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class SMPActor(mp.Process):
12 |     def __init__(self, name: str, disable_torch_parallelism: bool = True):
13 |         super().__init__()
14 |         self.req_q = None
15 |         self.resp_q = None
16 |         self.name = name
17 |         self.shutdown_event = mp.Event()
18 |         self.disable_torch_parallelism = disable_torch_parallelism
19 | 
20 |     def register(self, req_q: mp.Queue, resp_q: mp.Queue):
21 |         logger.info(f"Registered SMPActor for {self.name}.")
22 |         self.req_q = req_q
23 |         self.resp_q = resp_q
24 | 
25 |     def run(self):
26 |         # Need to set this to reduce contention in torch threads...
27 |         if self.disable_torch_parallelism:
28 |             torch.set_num_threads(1)
29 |             torch.set_num_interop_threads(1)
30 |         logger.info(f"Running SMPActor for {self.name}.")
31 |         if self.req_q is None or self.resp_q is None:
32 |             logger.error("SMPActor not registered!")
33 |             raise AssertionError("SMPActor not registered.")
34 | 
35 |         while not self.shutdown_event.is_set():
36 |             try:
37 |                 sample = self.req_q.get(block=True, timeout=1)
38 |             except queue.Empty:
39 |                 continue
40 |             if hasattr(sample, "data"):
41 |                 sample.data = self.process(sample.data)
42 |             else:
43 |                 sample = self.process(sample)
44 |             self.resp_q.put(sample, block=True)
45 | 
46 |     @abc.abstractmethod
47 |     def process(self, data: Any) -> None:
48 |         pass
49 | 
50 |     def stop(self) -> None:
51 |         """
52 |         Gracefully shuts down this process
53 |         """
54 |         logger.info(f"Stopping SMPActor for {self.name}.")
55 |         self.shutdown_event.set()
56 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_baseline.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: '8'
13 |   n_local_workers: 1
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_Grayscale(num_output_channels=1)
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
37 |         1.1), hue=(-0.1, 0.1))
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     5:
42 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     6:
47 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
48 |         1.3333), interpolation=bilinear, antialias=warn)
49 |       variant: INPROCESS
50 |       variant_ctx:
51 |         variant_type: INPROCESS
52 |     7:
53 |       name: MapperPipe_to_float
54 |       variant: INPROCESS
55 |       variant_ctx:
56 |         variant_type: INPROCESS
57 |     8:
58 |       name: ImageReaderPipe
59 |       variant: INPROCESS
60 |       variant_ctx:
61 |         variant_type: INPROCESS
62 |     9:
63 |       name: LocalFSListerPipe
64 |       variant: INPROCESS
65 |       variant_ctx:
66 |         variant_type: INPROCESS
67 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/cache_results/configs/no_cache_plan.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '10'
 3 |   1: '0'
 4 |   3: '6'
 5 |   6: '11'
 6 |   7: '1'
 7 |   8: '3'
 8 |   9: '8'
 9 |   10: ''
10 |   11: '7'
11 | n_local_workers: 8
12 | pipes:
13 |   0:
14 |     name: BatcherPipe(batch_size=8)
15 |     variant: INPROCESS
16 |     variant_ctx:
17 |       variant_type: INPROCESS
18 |   1:
19 |     name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
20 |     variant: INPROCESS
21 |     variant_ctx:
22 |       variant_type: INPROCESS
23 |   2:
24 |     name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
25 |   3:
26 |     name: MapperPipe_Grayscale(num_output_channels=1)
27 |     variant: INPROCESS
28 |     variant_ctx:
29 |       variant_type: INPROCESS
30 |   4:
31 |     name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
32 |       1.1), hue=(-0.1, 0.1))
33 |   5:
34 |     name: MapperPipe_RandomHorizontalFlip(p=0.5)
35 |   6:
36 |     name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
37 |       1.3333), interpolation=bilinear, antialias=warn)
38 |     variant: INPROCESS
39 |     variant_ctx:
40 |       variant_type: INPROCESS
41 |   7:
42 |     name: MapperPipe_to_float
43 |     variant: INPROCESS
44 |     variant_ctx:
45 |       variant_type: INPROCESS
46 |   8:
47 |     name: ImageReaderPipe
48 |     variant: INPROCESS
49 |     variant_ctx:
50 |       variant_type: INPROCESS
51 |   9:
52 |     name: LocalFSListerPipe
53 |     variant: INPROCESS
54 |     variant_ctx:
55 |       variant_type: INPROCESS
56 |   10:
57 |     name: PrefetcherPipe
58 |     variant: INPROCESS
59 |     variant_ctx:
60 |       variant_type: INPROCESS
61 |   11:
62 |     fused_pipes:
63 |     - 2
64 |     - 5
65 |     - 4
66 |     name: FusedPipe
67 |     variant: RAY
68 |     variant_ctx:
69 |       max_inflight: 100
70 |       max_prefetch: 100
71 |       n_actors: 4
72 |       submit_batch_size: 16
73 |       use_threads: true
74 |       variant_type: RAY
75 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_cache_after_tokenizer_one_offload.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '9'
11 |     8: '7'
12 |     9: '6'
13 |     10: ''
14 |   n_local_workers: 1
15 |   pipes:
16 |     0:
17 |       name: BatcherPipe(batch_size=1)
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     1:
22 |       name: MapperPipe_Embedding(50257, 764)
23 |       variant: INPROCESS
24 |       variant_ctx:
25 |         variant_type: INPROCESS
26 |     2:
27 |       name: MapperPipe_ToTensor()
28 |       variant: INPROCESS
29 |       variant_ctx:
30 |         variant_type: INPROCESS
31 |     3:
32 |       name: MapperPipe_AddToken()
33 |       variant: INPROCESS
34 |       variant_ctx:
35 |         variant_type: INPROCESS
36 |     4:
37 |       name: MapperPipe_AddToken()
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     5:
42 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     6:
47 |       name: MapperPipe_Truncate()
48 |       variant: INPROCESS
49 |       variant_ctx:
50 |         variant_type: INPROCESS
51 |     7:
52 |       name: MapperPipe_GPT2BPETokenizer()
53 |       variant: RAY
54 |       variant_ctx:
55 |         max_inflight: 10000
56 |         max_prefetch: 10000
57 |         n_actors: 16
58 |         submit_batch_size: 500
59 |         use_threads: true
60 |         variant_type: RAY
61 |     8:
62 |       name: LocalLinePipe
63 |       variant: INPROCESS
64 |       variant_ctx:
65 |         variant_type: INPROCESS
66 |     9:
67 |       name: ObjectDiskCachePipe
68 |       variant: INPROCESS
69 |       variant_ctx:
70 |         variant_type: INPROCESS
71 |     10:
72 |       name: PrefetcherPipe
73 |       variant: INPROCESS
74 |       variant_ctx:
75 |         variant_type: INPROCESS
76 | 


--------------------------------------------------------------------------------
/cedar/service/multithread.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import threading
 3 | 
 4 | from concurrent.futures import ThreadPoolExecutor, Future
 5 | 
 6 | from .task import MultithreadedTask
 7 | 
 8 | logger = logging.getLogger(__name__)
 9 | 
10 | 
11 | class MultithreadedService:
12 |     """
13 |     A multithread service that executes preprocessing tasks using
14 |     a thread pool.
15 | 
16 |     Compared to the MultiprocessService, using MultithreadedService
17 |     is lighter weight, as tasks are executed in the same process.
18 |     However, threads are subject to the GIL, so CPU-bound workloads
19 |     may be better executed in the MultiprocessService
20 | 
21 |     Args:
22 |         num_threads: Number of threads in the pool
23 |     """
24 | 
25 |     def __init__(self, num_threads: int):
26 |         if num_threads < 1:
27 |             raise ValueError(
28 |                 "Cannot create a mutlithreaded "
29 |                 "service with {} threads.".format(num_threads)
30 |             )
31 | 
32 |         self.executor = ThreadPoolExecutor(max_workers=num_threads)
33 |         self.n_threads = num_threads
34 |         logger.info(f"Started MUltithread Service with {num_threads} threads.")
35 | 
36 |         # Lock for resizing executor
37 |         self._lock = threading.Lock()
38 | 
39 |     def shutdown(self) -> None:
40 |         self.executor.shutdown()
41 | 
42 |     def resize(self, num_threads: int) -> None:
43 |         prev_num_threads = self.n_threads
44 |         with self._lock:
45 |             self.executor.shutdown(wait=True, cancel_futures=False)
46 |             self.executor = ThreadPoolExecutor(max_workers=num_threads)
47 |             self.n_threads = num_threads
48 |         logger.info(
49 |             f"Resized Multithreaded Pool from {prev_num_threads}"
50 |             f" to {num_threads} threads"
51 |         )
52 | 
53 |     def submit(self, task: MultithreadedTask) -> Future:
54 |         with self._lock:
55 |             future = self.executor.submit(task.process)
56 |         return future
57 | 
58 |     def __del__(self):
59 |         self.shutdown()
60 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/eval_ember_remote.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan: 
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     7: '1'
 6 |     9: '11'
 7 |     10: ''
 8 |     11: '7'
 9 |   n_local_workers: 8
10 |   pipes:
11 |     0:
12 |       name: BatcherPipe(batch_size=1)
13 |       variant: INPROCESS
14 |       variant_ctx:
15 |         variant_type: INPROCESS
16 |     1:
17 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     2:
22 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
23 |       variant: INPROCESS
24 |     3:
25 |       name: MapperPipe_Grayscale(num_output_channels=1)
26 |       variant: INPROCESS
27 |     4:
28 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
29 |         1.1), hue=(-0.1, 0.1))
30 |       variant: INPROCESS
31 |     5:
32 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
33 |       variant: INPROCESS
34 |     6:
35 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
36 |         1.3333), interpolation=bilinear, antialias=warn)
37 |       variant: INPROCESS
38 |     7:
39 |       name: MapperPipe_to_float
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     8:
44 |       name: MapperPipe_read_image_pytorch
45 |       variant: INPROCESS
46 |     9:
47 |       name: LocalFSListerPipe
48 |       variant: INPROCESS
49 |       variant_ctx:
50 |         variant_type: INPROCESS
51 |     10:
52 |       name: PrefetcherPipe
53 |       variant: INPROCESS
54 |       variant_ctx:
55 |         variant_type: INPROCESS
56 |     11:
57 |       fused_pipes:
58 |       - 8
59 |       - 3
60 |       - 6
61 |       - 2
62 |       - 5
63 |       - 4
64 |       name: FusedPipe
65 |       variant: RAY
66 |       variant_ctx:
67 |         max_inflight: 100
68 |         max_prefetch: 100
69 |         n_actors: 4
70 |         submit_batch_size: 33
71 |         use_threads: true
72 |         variant_type: RAY
73 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/eval_controller_remote.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan: 
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     7: '1'
 6 |     9: '11'
 7 |     10: ''
 8 |     11: '7'
 9 |   n_local_workers: 1
10 |   pipes:
11 |     0:
12 |       name: BatcherPipe(batch_size=1)
13 |       variant: INPROCESS
14 |       variant_ctx:
15 |         variant_type: INPROCESS
16 |     1:
17 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     2:
22 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
23 |       variant: INPROCESS
24 |     3:
25 |       name: MapperPipe_Grayscale(num_output_channels=1)
26 |       variant: INPROCESS
27 |     4:
28 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
29 |         1.1), hue=(-0.1, 0.1))
30 |       variant: INPROCESS
31 |     5:
32 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
33 |       variant: INPROCESS
34 |     6:
35 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
36 |         1.3333), interpolation=bilinear, antialias=warn)
37 |       variant: INPROCESS
38 |     7:
39 |       name: MapperPipe_to_float
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     8:
44 |       name: MapperPipe_read_image_pytorch
45 |       variant: INPROCESS
46 |     9:
47 |       name: LocalFSListerPipe
48 |       variant: INPROCESS
49 |       variant_ctx:
50 |         variant_type: INPROCESS
51 |     10:
52 |       name: PrefetcherPipe
53 |       variant: INPROCESS
54 |       variant_ctx:
55 |         variant_type: INPROCESS
56 |     11:
57 |       fused_pipes:
58 |       - 8
59 |       - 3
60 |       - 6
61 |       - 2
62 |       - 5
63 |       - 4
64 |       name: FusedPipe
65 |       variant: RAY
66 |       variant_ctx:
67 |         max_inflight: 1000
68 |         max_prefetch: 1000
69 |         n_actors: 1
70 |         submit_batch_size: 33
71 |         use_threads: true
72 |         variant_type: RAY
73 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/eval_ember_local.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     2: '5'
 6 |     3: '6'
 7 |     4: '7'
 8 |     5: '4'
 9 |     6: '2'
10 |     7: '1'
11 |     8: '3'
12 |     9: '8'
13 |     10: ''
14 |   n_local_workers: 8
15 |   pipes:
16 |     0:
17 |       name: BatcherPipe(batch_size=1)
18 |       variant: INPROCESS
19 |       variant_ctx:
20 |         variant_type: INPROCESS
21 |     1:
22 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
23 |       variant: INPROCESS
24 |       variant_ctx:
25 |         variant_type: INPROCESS
26 |     2:
27 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
28 |       variant: INPROCESS
29 |       variant_ctx:
30 |         variant_type: INPROCESS
31 |     3:
32 |       name: MapperPipe_Grayscale(num_output_channels=1)
33 |       variant: INPROCESS
34 |       variant_ctx:
35 |         variant_type: INPROCESS
36 |     4:
37 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
38 |         1.1), hue=(-0.1, 0.1))
39 |       variant: INPROCESS
40 |       variant_ctx:
41 |         variant_type: INPROCESS
42 |     5:
43 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
44 |       variant: INPROCESS
45 |       variant_ctx:
46 |         variant_type: INPROCESS
47 |     6:
48 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
49 |         1.3333), interpolation=bilinear, antialias=warn)
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 |     7:
54 |       name: MapperPipe_to_float
55 |       variant: INPROCESS
56 |       variant_ctx:
57 |         variant_type: INPROCESS
58 |     8:
59 |       name: ImageReaderPipe
60 |       variant: INPROCESS
61 |       variant_ctx:
62 |         variant_type: INPROCESS
63 |     9:
64 |       name: LocalFSListerPipe
65 |       variant: INPROCESS
66 |       variant_ctx:
67 |         variant_type: INPROCESS
68 |     10:
69 |       name: PrefetcherPipe
70 |       variant: INPROCESS
71 |       variant_ctx:
72 |         variant_type: INPROCESS
73 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/cache_results/configs/cache_after_grayscale.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '11'
 3 |   1: '0'
 4 |   3: '10'
 5 |   6: '12'
 6 |   7: '1'
 7 |   8: '3'
 8 |   9: '8'
 9 |   10: '6'
10 |   11: ''
11 |   12: '7'
12 | n_local_workers: 8
13 | pipes:
14 |   0:
15 |     name: BatcherPipe(batch_size=8)
16 |     variant: INPROCESS
17 |     variant_ctx:
18 |       variant_type: INPROCESS
19 |   1:
20 |     name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
21 |     variant: INPROCESS
22 |     variant_ctx:
23 |       variant_type: INPROCESS
24 |   2:
25 |     name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
26 |   3:
27 |     name: MapperPipe_Grayscale(num_output_channels=1)
28 |     variant: INPROCESS
29 |     variant_ctx:
30 |       variant_type: INPROCESS
31 |   4:
32 |     name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
33 |       1.1), hue=(-0.1, 0.1))
34 |   5:
35 |     name: MapperPipe_RandomHorizontalFlip(p=0.5)
36 |   6:
37 |     name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
38 |       1.3333), interpolation=bilinear, antialias=warn)
39 |     variant: INPROCESS
40 |     variant_ctx:
41 |       variant_type: INPROCESS
42 |   7:
43 |     name: MapperPipe_to_float
44 |     variant: INPROCESS
45 |     variant_ctx:
46 |       variant_type: INPROCESS
47 |   8:
48 |     name: ImageReaderPipe
49 |     variant: INPROCESS
50 |     variant_ctx:
51 |       variant_type: INPROCESS
52 |   9:
53 |     name: LocalFSListerPipe
54 |     variant: INPROCESS
55 |     variant_ctx:
56 |       variant_type: INPROCESS
57 |   10:
58 |     name: ObjectDiskCachePipe
59 |     variant: INPROCESS
60 |     variant_ctx:
61 |       variant_type: INPROCESS
62 |   11:
63 |     name: PrefetcherPipe
64 |     variant: INPROCESS
65 |     variant_ctx:
66 |       variant_type: INPROCESS
67 |   12:
68 |     fused_pipes:
69 |     - 2
70 |     - 5
71 |     - 4
72 |     name: FusedPipe
73 |     variant: RAY
74 |     variant_ctx:
75 |       max_inflight: 100
76 |       max_prefetch: 100
77 |       n_actors: 4
78 |       submit_batch_size: 16
79 |       use_threads: true
80 |       variant_type: RAY
81 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/cache_results/configs/new_simclrv2_optimized_plan.yml:
--------------------------------------------------------------------------------
 1 | graph:
 2 |   0: '11'
 3 |   1: '0'
 4 |   3: '10'
 5 |   6: '12'
 6 |   7: '1'
 7 |   8: '3'
 8 |   9: '8'
 9 |   10: '6'
10 |   11: ''
11 |   12: '7'
12 | n_local_workers: 8
13 | pipes:
14 |   0:
15 |     name: BatcherPipe(batch_size=8)
16 |     variant: INPROCESS
17 |     variant_ctx:
18 |       variant_type: INPROCESS
19 |   1:
20 |     name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
21 |     variant: INPROCESS
22 |     variant_ctx:
23 |       variant_type: INPROCESS
24 |   2:
25 |     name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
26 |   3:
27 |     name: MapperPipe_Grayscale(num_output_channels=1)
28 |     variant: INPROCESS
29 |     variant_ctx:
30 |       variant_type: INPROCESS
31 |   4:
32 |     name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
33 |       1.1), hue=(-0.1, 0.1))
34 |   5:
35 |     name: MapperPipe_RandomHorizontalFlip(p=0.5)
36 |   6:
37 |     name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
38 |       1.3333), interpolation=bilinear, antialias=warn)
39 |     variant: INPROCESS
40 |     variant_ctx:
41 |       variant_type: INPROCESS
42 |   7:
43 |     name: MapperPipe_to_float
44 |     variant: INPROCESS
45 |     variant_ctx:
46 |       variant_type: INPROCESS
47 |   8:
48 |     name: ImageReaderPipe
49 |     variant: INPROCESS
50 |     variant_ctx:
51 |       variant_type: INPROCESS
52 |   9:
53 |     name: LocalFSListerPipe
54 |     variant: INPROCESS
55 |     variant_ctx:
56 |       variant_type: INPROCESS
57 |   10:
58 |     name: ObjectDiskCachePipe
59 |     variant: INPROCESS
60 |     variant_ctx:
61 |       variant_type: INPROCESS
62 |   11:
63 |     name: PrefetcherPipe
64 |     variant: INPROCESS
65 |     variant_ctx:
66 |       variant_type: INPROCESS
67 |   12:
68 |     fused_pipes:
69 |     - 2
70 |     - 5
71 |     - 4
72 |     name: FusedPipe
73 |     variant: RAY
74 |     variant_ctx:
75 |       max_inflight: 192
76 |       max_prefetch: 192
77 |       n_actors: 4
78 |       submit_batch_size: 16
79 |       use_threads: true
80 |       variant_type: RAY
81 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/eval_ember_remote_tf.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     10: '12'
 5 |     11: ''
 6 |     12: '13'
 7 |     13: '0'
 8 |   n_local_workers: 8
 9 |   pipes:
10 |     0:
11 |       name: BatcherPipe(batch_size=1)
12 |       variant: INPROCESS
13 |       variant_ctx:
14 |         variant_type: INPROCESS
15 |     1:
16 |       name: MapperPipe_per_image_standardization
17 |       variant: TF
18 |       variant_ctx:
19 |         num_parallel_calls: null
20 |         variant_type: TF
21 |     2:
22 |       name: MapperPipe_gaussian_blur
23 |       variant: INPROCESS
24 |     3:
25 |       name: MapperPipe_rgb_to_grayscale
26 |       variant: INPROCESS
27 |     4:
28 |       name: MapperPipe_color_jitter
29 |       variant: INPROCESS
30 |     5:
31 |       name: MapperPipe_random_flip
32 |       variant: INPROCESS
33 |     6:
34 |       name: MapperPipe_crop_and_resize
35 |       variant: INPROCESS
36 |     7:
37 |       name: MapperPipe_convert_to_float
38 |       variant: TF
39 |       variant_ctx:
40 |         num_parallel_calls: null
41 |         variant_type: TF
42 |     8:
43 |       name: MapperPipe_decode_jpeg
44 |       variant: INPROCESS
45 |     9:
46 |       name: MapperPipe_read_file
47 |       variant: INPROCESS
48 |     10:
49 |       name: LocalFSListerPipe
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 |     11:
54 |       name: PrefetcherPipe
55 |       variant: INPROCESS
56 |       variant_ctx:
57 |         variant_type: INPROCESS
58 |     12:
59 |       fused_pipes:
60 |       - 9
61 |       - 8
62 |       - 3
63 |       - 6
64 |       - 2
65 |       - 5
66 |       - 4
67 |       name: FusedPipe
68 |       variant: TF_RAY
69 |       variant_ctx:
70 |         max_inflight: 100
71 |         max_prefetch: 100
72 |         n_actors: 4
73 |         num_parallel_calls: null
74 |         submit_batch_size: 33
75 |         use_threads: true
76 |         variant_type: TF_RAY
77 |     13:
78 |       fused_pipes:
79 |       - 7
80 |       - 1
81 |       name: FusedPipe
82 |       variant: TF
83 |       variant_ctx:
84 |         num_parallel_calls: null
85 |         variant_type: TF
86 | 


--------------------------------------------------------------------------------
/evaluation/cedar_utils.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, Union, Dict
 2 | from cedar.config import RayConfig
 3 | 
 4 | 
 5 | class CedarEvalSpec:
 6 |     def __init__(
 7 |         self,
 8 |         batch_size: int,
 9 |         num_total_samples: Optional[int],
10 |         num_epochs: int,
11 |         config: Optional[Union[str, Dict[str, str]]] = None,
12 |         kwargs: Dict[str, str] = None,
13 |         use_ray: bool = False,
14 |         ray_ip: str = "",
15 |         iteration_time: Optional[float] = None,
16 |         profiled_stats: str = "",
17 |         run_profiling: bool = False,
18 |         disable_optimizer: bool = False,
19 |         disable_controller: bool = False,
20 |         disable_prefetch: bool = False,
21 |         disable_offload: bool = False,
22 |         disable_parallelism: bool = False,
23 |         disable_reorder: bool = False,
24 |         disable_fusion: bool = False,
25 |         disable_caching: bool = False,
26 |         generate_plan: bool = False,
27 |     ):
28 |         self.batch_size = batch_size
29 |         self.num_total_samples = num_total_samples
30 |         self.num_epochs = num_epochs
31 |         self.config = config
32 |         self.kwargs = kwargs
33 |         self.use_ray = use_ray
34 |         self.ray_ip = ray_ip
35 |         self.iteration_time = iteration_time
36 |         self.profiled_stats = profiled_stats
37 |         self.run_profiling = run_profiling
38 |         self.disable_optimizer = disable_optimizer
39 |         self.disable_controller = disable_controller
40 |         self.disable_prefetch = disable_prefetch
41 |         self.disable_offload = disable_offload
42 |         self.disable_parallelism = disable_parallelism
43 |         self.disable_reorder = disable_reorder
44 |         self.disable_fusion = disable_fusion
45 |         self.disable_caching = disable_caching
46 |         self.generate_plan = generate_plan
47 | 
48 |     def to_ray_config(self) -> Optional[RayConfig]:
49 |         """
50 |         Returns a Ray spec for the CedarContext, if specified by
51 |         the profiler spec
52 |         """
53 |         if not self.use_ray:
54 |             return None
55 | 
56 |         return RayConfig(self.ray_ip)
57 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_cache_after_truncate_one_offload.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     8: '11'
10 |     9: '5'
11 |     10: ''
12 |     11: '9'
13 |   n_local_workers: 1
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Embedding(50257, 764)
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_ToTensor()
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_AddToken()
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_AddToken()
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     5:
41 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
42 |       variant: INPROCESS
43 |       variant_ctx:
44 |         variant_type: INPROCESS
45 |     6:
46 |       name: MapperPipe_Truncate()
47 |       variant: INPROCESS
48 |       variant_ctx:
49 |         variant_type: INPROCESS
50 |     7:
51 |       name: MapperPipe_GPT2BPETokenizer()
52 |       variant: INPROCESS
53 |       variant_ctx:
54 |         variant_type: INPROCESS
55 |     8:
56 |       name: LocalLinePipe
57 |       variant: INPROCESS
58 |       variant_ctx:
59 |         variant_type: INPROCESS
60 |     9:
61 |       name: ObjectDiskCachePipe
62 |       variant: INPROCESS
63 |       variant_ctx:
64 |         variant_type: INPROCESS
65 |     10:
66 |       name: PrefetcherPipe
67 |       variant: INPROCESS
68 |       variant_ctx:
69 |         variant_type: INPROCESS
70 |     11:
71 |       fused_pipes:
72 |       - 7
73 |       - 6
74 |       name: FusedPipe
75 |       variant: RAY
76 |       variant_ctx:
77 |         max_inflight: 10000
78 |         max_prefetch: 10000
79 |         n_actors: 16
80 |         submit_batch_size: 500
81 |         use_threads: true
82 |         variant_type: RAY
83 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_tf_baseline.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     1: '0'
 5 |     2: '1'
 6 |     3: '2'
 7 |     4: '3'
 8 |     5: '4'
 9 |     6: '5'
10 |     7: '6'
11 |     8: '7'
12 |     9: '8'
13 |     10: '9'
14 |     11: ''
15 |   n_local_workers: 1
16 |   pipes:
17 |     0:
18 |       name: BatcherPipe(batch_size=1)
19 |       variant: INPROCESS
20 |       variant_ctx:
21 |         variant_type: INPROCESS
22 |     1:
23 |       name: MapperPipe_per_image_standardization
24 |       variant: TF
25 |       variant_ctx:
26 |         num_parallel_calls: null
27 |         variant_type: TF
28 |     2:
29 |       name: MapperPipe_gaussian_blur
30 |       variant: TF
31 |       variant_ctx:
32 |         num_parallel_calls: null
33 |         variant_type: TF
34 |     3:
35 |       name: MapperPipe_rgb_to_grayscale
36 |       variant: TF
37 |       variant_ctx:
38 |         num_parallel_calls: null
39 |         variant_type: TF
40 |     4:
41 |       name: MapperPipe_color_jitter
42 |       variant: TF
43 |       variant_ctx:
44 |         num_parallel_calls: null
45 |         variant_type: TF
46 |     5:
47 |       name: MapperPipe_random_flip
48 |       variant: TF
49 |       variant_ctx:
50 |         num_parallel_calls: null
51 |         variant_type: TF
52 |     6:
53 |       name: MapperPipe_crop_and_resize
54 |       variant: TF
55 |       variant_ctx:
56 |         num_parallel_calls: null
57 |         variant_type: TF
58 |     7:
59 |       name: MapperPipe_convert_to_float
60 |       variant: TF
61 |       variant_ctx:
62 |         num_parallel_calls: null
63 |         variant_type: TF
64 |     8:
65 |       name: MapperPipe_decode_jpeg
66 |       variant: TF
67 |       variant_ctx:
68 |         num_parallel_calls: null
69 |         variant_type: TF
70 |     9:
71 |       name: MapperPipe_read_file
72 |       variant: TF
73 |       variant_ctx:
74 |         num_parallel_calls: null
75 |         variant_type: TF
76 |     10:
77 |       name: LocalFSListerPipe
78 |       variant: INPROCESS
79 |       variant_ctx:
80 |         variant_type: INPROCESS
81 |     11:
82 |       name: PrefetcherPipe
83 |       variant: INPROCESS
84 |       variant_ctx:
85 |         variant_type: INPROCESS
86 |   


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/torch_dataset.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import torch
 3 | import torchdata.datapipes as dp
 4 | 
 5 | from torchvision import transforms
 6 | from torchvision.io import read_image, ImageReadMode
 7 | from evaluation.torch_utils import TorchEvalSpec
 8 | from torch.utils.data import DataLoader
 9 | 
10 | DATASET_LOC = "datasets/imagenette2"
11 | IMG_HEIGHT = 244
12 | IMG_WIDTH = 244
13 | GAUSSIAN_BLUR_KERNEL_SIZE = 11
14 | 
15 | 
16 | def to_float(x):
17 |     return x.to(torch.float32)
18 | 
19 | 
20 | def build_datapipe(root, spec: TorchEvalSpec):
21 |     datapipe = dp.iter.FileLister(root=root, recursive=True)
22 |     # TODO: Evaluate where is a fair place to put this...
23 |     datapipe = datapipe.sharding_filter()
24 |     datapipe = dp.iter.Mapper(
25 |         datapipe, lambda x: read_image(x, mode=ImageReadMode.RGB)
26 |     )
27 |     datapipe = dp.iter.Mapper(datapipe, to_float)
28 |     datapipe = dp.iter.Mapper(
29 |         datapipe, transforms.RandomResizedCrop((IMG_HEIGHT, IMG_WIDTH))
30 |     )
31 |     datapipe = dp.iter.Mapper(datapipe, transforms.RandomHorizontalFlip())
32 |     datapipe = dp.iter.Mapper(
33 |         datapipe, transforms.ColorJitter(0.1, 0.1, 0.1, 0.1)
34 |     )
35 |     datapipe = dp.iter.Mapper(
36 |         datapipe, transforms.Grayscale(num_output_channels=1)
37 |     )
38 |     datapipe = dp.iter.Mapper(
39 |         datapipe, transforms.GaussianBlur(GAUSSIAN_BLUR_KERNEL_SIZE)
40 |     )
41 |     datapipe = dp.iter.Mapper(
42 |         datapipe, transforms.Normalize((0.1307,), (0.3081,))
43 |     )
44 |     return datapipe
45 | 
46 | 
47 | def get_dataset(spec: TorchEvalSpec):
48 |     data_dir = (
49 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
50 |     )
51 |     train_filepath = pathlib.Path(data_dir) / pathlib.Path("imagenette2/train")
52 | 
53 |     datapipe = build_datapipe(str(train_filepath), spec)
54 | 
55 |     dataloader = DataLoader(
56 |         datapipe, batch_size=spec.batch_size, num_workers=spec.num_workers
57 |     )
58 | 
59 |     return dataloader
60 | 
61 | 
62 | if __name__ == "__main__":
63 |     dataset = get_dataset(TorchEvalSpec(8, 1))
64 |     for x in dataset:
65 |         print(x)
66 |         print(x.size())
67 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/ablation_p_r_o.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: ''
 4 |     1: '0'
 5 |     2: '5'
 6 |     3: '6'
 7 |     4: '7'
 8 |     5: '4'
 9 |     6: '2'
10 |     7: '1'
11 |     8: '3'
12 |     9: '8'
13 |   n_local_workers: 8
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=1)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
27 |       variant: RAY
28 |       variant_ctx:
29 |         max_inflight: 100
30 |         max_prefetch: 100
31 |         n_actors: 2
32 |         submit_batch_size: 16
33 |         use_threads: true
34 |         variant_type: RAY
35 |     3:
36 |       name: MapperPipe_Grayscale(num_output_channels=1)
37 |       variant: INPROCESS
38 |       variant_ctx:
39 |         variant_type: INPROCESS
40 |     4:
41 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
42 |         1.1), hue=(-0.1, 0.1))
43 |       variant: RAY
44 |       variant_ctx:
45 |         max_inflight: 100
46 |         max_prefetch: 100
47 |         n_actors: 2
48 |         submit_batch_size: 16
49 |         use_threads: true
50 |         variant_type: RAY
51 |     5:
52 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
53 |       variant: INPROCESS
54 |       variant_ctx:
55 |         variant_type: INPROCESS
56 |     6:
57 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
58 |         1.3333), interpolation=bilinear, antialias=warn)
59 |       variant: INPROCESS
60 |       variant_ctx:
61 |         variant_type: INPROCESS
62 |     7:
63 |       name: MapperPipe_to_float
64 |       variant: INPROCESS
65 |       variant_ctx:
66 |         variant_type: INPROCESS
67 |     8:
68 |       name: ImageReaderPipe
69 |       variant: INPROCESS
70 |       variant_ctx:
71 |         variant_type: INPROCESS
72 |     9:
73 |       name: LocalFSListerPipe
74 |       variant: INPROCESS
75 |       variant_ctx:
76 |         variant_type: INPROCESS
77 |   


--------------------------------------------------------------------------------
/evaluation/run_cedar_local.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Note, we provide the exact stats and config files produced by the optimizer on our setup in order to enable reproducibility.
 4 | # To generate new profiling stats and re-run the optimizer, use the --run_profiling and --generate_plan flags in eval_cedar.py
 5 | # Replace the stats and optimizer-produced config in the following commands
 6 | 
 7 | # cv-torch
 8 | python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_dataset.py --profiled_stats pipelines/simclrv2/stats/cedar_stats.yaml --master_feature_config pipelines/simclrv2/configs/eval_cedar_local.yaml
 9 | 
10 | # cv-tf
11 | python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_tf_dataset.py --profiled_stats pipelines/simclrv2/stats/cedar_tf_stats.yaml --master_feature_config pipelines/simclrv2/configs/eval_cedar_local_tf.yaml
12 | 
13 | # nlp-torch
14 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_dataset.py --profiled_stats pipelines/wikitext103/stats/cedar.yaml --master_feature_config pipelines/wikitext103/configs/eval_local.yaml  --num_total_samples 100000
15 | 
16 | # nlp-hf-tf
17 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_tf_dataset.py --profiled_stats pipelines/wikitext103/stats/tf.yaml --master_feature_config pipelines/wikitext103/configs/eval_local_tf.yaml --num_total_samples 100000
18 | 
19 | # nlp-tf
20 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_tf_service_dataset.py --profiled_stats pipelines/wikitext103/stats/tf_service.yaml --master_feature_config pipelines/wikitext103/configs/eval_local_tf_service.yaml --num_total_samples 200000
21 | 
22 | # asr
23 | python eval_cedar.py --dataset_file pipelines/commonvoice/cedar_dataset.py --profiled_stats pipelines/commonvoice/stats/cedar.yaml --master_feature_config pipelines/commonvoice/configs/eval_local.yaml --num_total_samples 10000
24 | 
25 | # ssd
26 | python eval_cedar.py --dataset_file pipelines/coco/cedar_dataset.py --profiled_stats pipelines/coco/stats/coco_local_stats.yaml --master_feature_config pipelines/coco/configs/cedar_local_plan.yml
27 | 
28 | python eval_cedar.py --dataset_file pipelines/coco/cedar_tf_dataset.py --profiled_stats pipelines/coco/stats/coco_tf_local_stats.yaml --master_feature_config pipelines/coco/configs/cedar_tf_local_plan.yml


--------------------------------------------------------------------------------
/cedar/pipes/__init__.py:
--------------------------------------------------------------------------------
 1 | from cedar.pipes.batch import BatcherPipe
 2 | from cedar.pipes.common import (
 3 |     DataSample,
 4 |     Partition,
 5 |     MutationError,
 6 |     CedarPipeSpec,
 7 |     cedar_pipe,
 8 | )
 9 | from cedar.pipes.io import (
10 |     FileOpenerPipe,
11 |     LineReaderPipe,
12 |     ImageReaderPipe,
13 |     WebReaderPipe,
14 | )
15 | from cedar.pipes.map import MapperPipe
16 | from cedar.pipes.noop import NoopPipe
17 | from cedar.pipes.context import (
18 |     PipeVariantType,
19 |     PipeVariantContext,
20 |     InProcessPipeVariantContext,
21 |     MultiprocessPipeVariantContext,
22 |     MultithreadedPipeVariantContext,
23 |     RayPipeVariantContext,
24 |     SMPPipeVariantContext,
25 |     PipeVariantContextFactory,
26 |     TFPipeVariantContext,
27 |     TFRayPipeVariantContext,
28 |     RayDSPipeVariantContext,
29 | )
30 | from cedar.pipes.pipe import (
31 |     Pipe,
32 | )
33 | from cedar.pipes.variant import (
34 |     PipeVariant,
35 |     InProcessPipeVariant,
36 |     MultiprocessPipeVariant,
37 |     MultithreadedPipeVariant,
38 |     SMPPipeVariant,
39 |     TFPipeVariant,
40 |     RayDSPipeVariant,
41 | )
42 | from cedar.pipes.ray_variant import RayPipeVariant
43 | from cedar.pipes.tf import TFTensorDontCare, TFOutputHint
44 | 
45 | __all__ = [
46 |     "BatcherPipe",
47 |     "CedarPipeSpec",
48 |     "DataSample",
49 |     "FileOpenerPipe",
50 |     "ImageReaderPipe",
51 |     "InProcessPipeVariant",
52 |     "InProcessPipeVariantContext",
53 |     "LineReaderPipe",
54 |     "MapperPipe",
55 |     "MultiprocessPipeVariant",
56 |     "MultiprocessPipeVariantContext",
57 |     "MultithreadedPipeVariant",
58 |     "MultithreadedPipeVariantContext",
59 |     "MutationError",
60 |     "NoopPipe",
61 |     "Partition",
62 |     "Pipe",
63 |     "PipeVariant",
64 |     "PipeVariantContext",
65 |     "PipeVariantContextFactory",
66 |     "PipeVariantType",
67 |     "RayDSPipeVariant",
68 |     "RayDSPipeVariantContext",
69 |     "RayPipeVariant",
70 |     "RayPipeVariantContext",
71 |     "SMPPipeVariant",
72 |     "SMPPipeVariantContext",
73 |     "TFOutputHint",
74 |     "TFPipeVariant",
75 |     "TFPipeVariantContext",
76 |     "TFRayPipeVariantContext",
77 |     "TFTensorDontCare",
78 |     "WebReaderPipe",
79 |     "cedar_pipe",
80 | ]
81 | 
82 | assert __all__ == sorted(__all__)
83 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_cache_after_tokenizer_two_offloads.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     2: '1'
 6 |     7: '9'
 7 |     8: '7'
 8 |     9: '11'
 9 |     10: ''
10 |     11: '2'
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: BatcherPipe(batch_size=1)
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_Embedding(50257, 764)
20 |       variant: INPROCESS
21 |       variant_ctx:
22 |         variant_type: INPROCESS
23 |     2:
24 |       name: MapperPipe_ToTensor()
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     3:
29 |       name: MapperPipe_AddToken()
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 |     4:
34 |       name: MapperPipe_AddToken()
35 |       variant: INPROCESS
36 |       variant_ctx:
37 |         variant_type: INPROCESS
38 |     5:
39 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     6:
44 |       name: MapperPipe_Truncate()
45 |       variant: INPROCESS
46 |       variant_ctx:
47 |         variant_type: INPROCESS
48 |     7:
49 |       name: MapperPipe_GPT2BPETokenizer()
50 |       variant: RAY
51 |       variant_ctx:
52 |         max_inflight: 10000
53 |         max_prefetch: 10000
54 |         n_actors: 16
55 |         submit_batch_size: 500
56 |         use_threads: true
57 |         variant_type: RAY
58 |     8:
59 |       name: LocalLinePipe
60 |       variant: INPROCESS
61 |       variant_ctx:
62 |         variant_type: INPROCESS
63 |     9:
64 |       name: ObjectDiskCachePipe
65 |       variant: INPROCESS
66 |       variant_ctx:
67 |         variant_type: INPROCESS
68 |     10:
69 |       name: PrefetcherPipe
70 |       variant: INPROCESS
71 |       variant_ctx:
72 |         variant_type: INPROCESS
73 |     11:
74 |       fused_pipes:
75 |       - 6
76 |       - 5
77 |       - 4
78 |       - 3
79 |       name: FusedPipe
80 |       variant: RAY
81 |       variant_ctx:
82 |         max_inflight: 10000
83 |         max_prefetch: 10000
84 |         n_actors: 16
85 |         submit_batch_size: 500
86 |         use_threads: true
87 |         variant_type: RAY
88 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/tf_dataset.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import pathlib
 3 | 
 4 | from transformers import GPT2Tokenizer
 5 | 
 6 | from evaluation.tf_utils import TFEvalSpec
 7 | 
 8 | DATASET_LOC = "datasets/wikitext103"
 9 | 
10 | 
11 | def _load_text(path):
12 |     text = tf.io.read_file(path)
13 |     return tf.data.Dataset.from_tensor_slices(tf.strings.split(text, "\n"))
14 | 
15 | 
16 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
17 | embedding = tf.Variable(tf.random.uniform([50257, 764], -1.0, 1.0))
18 | 
19 | 
20 | @tf.py_function(Tout=tf.int32)
21 | def _tokenize(x):
22 |     return tokenizer(str(x.numpy()), return_tensors="tf")["input_ids"]
23 | 
24 | 
25 | def _truncate(x):
26 |     dim = tf.shape(x)[1]
27 |     slice_size = tf.minimum(dim, 254)
28 |     x = tf.slice(x, [0, 0], [1, slice_size])
29 |     return x
30 | 
31 | 
32 | def _embedding(x):
33 |     return tf.nn.embedding_lookup(embedding, x)
34 | 
35 | 
36 | def build_dataset(path, spec):
37 |     # ds = _load_text(path)
38 |     ds = tf.data.TextLineDataset(path)
39 | 
40 |     ds = ds.map(
41 |         lambda x: _tokenize(x), num_parallel_calls=spec.num_parallel_calls
42 |     )
43 |     ds = ds.map(_truncate, num_parallel_calls=spec.num_parallel_calls)
44 |     ds = ds.map(_embedding, num_parallel_calls=spec.num_parallel_calls)
45 | 
46 |     ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
47 | 
48 |     if spec.service_addr:
49 |         print(
50 |             "Using tf.data.service with address {}".format(spec.service_addr)
51 |         )
52 |         ds = ds.apply(
53 |             tf.data.experimental.service.distribute(
54 |                 processing_mode="distributed_epoch", service=spec.service_addr
55 |             )
56 |         )
57 | 
58 |     return ds
59 | 
60 | 
61 | def get_dataset(spec: TFEvalSpec):
62 |     data_dir = (
63 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
64 |     )
65 |     train_filepath = pathlib.Path(data_dir) / pathlib.Path(
66 |         "wikitext-103/wiki.train.tokens"
67 |     )
68 | 
69 |     return build_dataset(
70 |         str(train_filepath),
71 |         spec,
72 |     )
73 | 
74 | 
75 | if __name__ == "__main__":
76 |     tf_dataset = get_dataset(TFEvalSpec(1, 1))
77 | 
78 |     for i, x in enumerate(tf_dataset):
79 |         print(x)
80 |         # print(x.shape)
81 |         print(i)
82 |         if i == 10:
83 |             break
84 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/configs/eval_ember_local_tf.yaml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     10: '12'
 5 |     11: ''
 6 |     12: '0'
 7 |   n_local_workers: 1
 8 |   pipes:
 9 |     0:
10 |       name: BatcherPipe(batch_size=1)
11 |       variant: INPROCESS
12 |       variant_ctx:
13 |         variant_type: INPROCESS
14 |     1:
15 |       name: MapperPipe_per_image_standardization
16 |       variant: TF
17 |       variant_ctx:
18 |         num_parallel_calls: null
19 |         variant_type: TF
20 |     2:
21 |       name: MapperPipe_gaussian_blur
22 |       variant: TF
23 |       variant_ctx:
24 |         num_parallel_calls: null
25 |         variant_type: TF
26 |     3:
27 |       name: MapperPipe_rgb_to_grayscale
28 |       variant: TF
29 |       variant_ctx:
30 |         num_parallel_calls: null
31 |         variant_type: TF
32 |     4:
33 |       name: MapperPipe_color_jitter
34 |       variant: TF
35 |       variant_ctx:
36 |         num_parallel_calls: null
37 |         variant_type: TF
38 |     5:
39 |       name: MapperPipe_random_flip
40 |       variant: TF
41 |       variant_ctx:
42 |         num_parallel_calls: null
43 |         variant_type: TF
44 |     6:
45 |       name: MapperPipe_crop_and_resize
46 |       variant: TF
47 |       variant_ctx:
48 |         num_parallel_calls: null
49 |         variant_type: TF
50 |     7:
51 |       name: MapperPipe_convert_to_float
52 |       variant: TF
53 |       variant_ctx:
54 |         num_parallel_calls: null
55 |         variant_type: TF
56 |     8:
57 |       name: MapperPipe_decode_jpeg
58 |       variant: TF
59 |       variant_ctx:
60 |         num_parallel_calls: null
61 |         variant_type: TF
62 |     9:
63 |       name: MapperPipe_read_file
64 |       variant: TF
65 |       variant_ctx:
66 |         num_parallel_calls: null
67 |         variant_type: TF
68 |     10:
69 |       name: LocalFSListerPipe
70 |       variant: INPROCESS
71 |       variant_ctx:
72 |         variant_type: INPROCESS
73 |     11:
74 |       name: PrefetcherPipe
75 |       variant: INPROCESS
76 |       variant_ctx:
77 |         variant_type: INPROCESS
78 |     12:
79 |       fused_pipes:
80 |       - 9
81 |       - 8
82 |       - 3
83 |       - 6
84 |       - 2
85 |       - 5
86 |       - 4
87 |       - 7
88 |       - 1
89 |       name: FusedPipe
90 |       variant: TF
91 |       variant_ctx:
92 |         num_parallel_calls: -1
93 |         variant_type: TF
94 |   


--------------------------------------------------------------------------------
/evaluation/run_cedar_remote.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Note, we provide the exact stats and config files produced by the optimizer on our setup in order to enable reproducibility.
 4 | # To generate new profiling stats and re-run the optimizer, use the --run_profiling and --generate_plan flags in eval_cedar.py
 5 | # Replace the stats and optimizer-produced config in the following commands
 6 | 
 7 | # cv-torch
 8 | python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_remote_dataset.py --profiled_stats pipelines/simclrv2/stats/cedar_remote.yaml --master_feature_config pipelines/simclrv2/configs/eval_cedar_remote.yaml --use_ray --ray_ip 10.138.0.8
 9 | # cv-tf
10 | python eval_cedar.py --dataset_file pipelines/simclrv2/cedar_tf_dataset.py --profiled_stats pipelines/simclrv2/stats/cedar_tf_stats.yaml --master_feature_config pipelines/simclrv2/configs/eval_cedar_remote_tf.yaml --use_ray --ray_ip 10.138.0.8
11 | # nlp-torch
12 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_dataset.py --profiled_stats pipelines/wikitext103/stats/cedar.yaml --use_ray --ray_ip 10.138.0.8 --num_total_samples 100000 --master_feature_config pipelines/wikitext103/configs/eval_remote.yaml
13 | # nlp-hf-tf
14 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_tf_dataset.py --profiled_stats pipelines/wikitext103/stats/tf.yaml --master_feature_config pipelines/wikitext103/configs/eval_remote_tf.yaml --use_ray --ray_ip 10.138.0.8 --num_total_samples 100000
15 | # nlp-tf
16 | python eval_cedar.py --dataset_file pipelines/wikitext103/cedar_tf_service_dataset.py --profiled_stats pipelines/wikitext103/stats/tf_service.yaml --master_feature_config pipelines/wikitext103/configs/eval_remote_tf_service.yaml --use_ray --ray_ip 10.138.0.8 --num_total_samples 200000
17 | # asr
18 | python eval_cedar.py --dataset_file pipelines/commonvoice/cedar_dataset.py --profiled_stats pipelines/commonvoice/stats/cedar.yaml --master_feature_config pipelines/commonvoice/configs/eval_remote.yaml --num_total_samples 10000
19 | 
20 | python eval_cedar.py --dataset_file pipelines/coco/cedar_remote_dataset.py --master_feature_config pipelines/coco/configs/cedar_remote_plan.yml --use_ray --ray_ip 10.138.0.45
21 | 
22 | python eval_cedar.py --dataset_file pipelines/coco/cedar_tf_dataset.py --master_feature_config pipelines/coco/configs/cedar_tf_remote_plan.yml --profiled_stats pipelines/coco/stats/coco_tf_remote_stats.yaml --use_ray --ray_ip 10.138.0.26


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/torch_dataset.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import torch.nn as nn
 3 | import torchdata.datapipes as dp
 4 | from torch.hub import load_state_dict_from_url
 5 | 
 6 | import torchtext.transforms as T
 7 | from evaluation.torch_utils import TorchEvalSpec
 8 | from torch.utils.data import DataLoader
 9 | 
10 | DATASET_LOC = "datasets/wikitext103"
11 | 
12 | 
13 | def build_datapipe(root, spec: TorchEvalSpec):
14 |     encoder_json_path = (
15 |         "https://download.pytorch.org/models/text/gpt2_bpe_encoder.json"
16 |     )
17 |     vocab_bpe_path = (
18 |         "https://download.pytorch.org/models/text/gpt2_bpe_vocab.bpe"
19 |     )
20 |     tokenizer = T.GPT2BPETokenizer(encoder_json_path, vocab_bpe_path)
21 |     vocab_path = "https://download.pytorch.org/models/text/roberta.vocab.pt"
22 |     vocab = T.VocabTransform(load_state_dict_from_url(vocab_path))
23 |     add_bos = T.AddToken(token=0, begin=True)
24 |     add_eos = T.AddToken(token=2, begin=False)
25 | 
26 |     embedding = nn.Embedding(50257, 764, _freeze=True)
27 | 
28 |     datapipe = dp.iter.FileLister(root=root, recursive=True)
29 |     datapipe = dp.iter.FileOpener(datapipe)
30 |     datapipe = dp.iter.LineReader(datapipe, return_path=False)
31 |     datapipe = datapipe.sharding_filter()
32 | 
33 |     datapipe = dp.iter.Mapper(datapipe, tokenizer)
34 |     datapipe = dp.iter.Mapper(datapipe, T.Truncate(max_seq_len=254))
35 |     datapipe = dp.iter.Mapper(datapipe, vocab)
36 |     datapipe = dp.iter.Mapper(datapipe, add_bos)
37 |     datapipe = dp.iter.Mapper(datapipe, add_eos)
38 |     datapipe = dp.iter.Mapper(datapipe, T.ToTensor())
39 |     datapipe = dp.iter.Mapper(datapipe, embedding)
40 | 
41 |     return datapipe
42 | 
43 | 
44 | def get_dataset(spec: TorchEvalSpec):
45 |     data_dir = (
46 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
47 |     )
48 |     train_filepath = pathlib.Path(data_dir) / pathlib.Path(
49 |         "wikitext-103/wiki.train.tokens"
50 |     )
51 | 
52 |     datapipe = build_datapipe(str(train_filepath), spec)
53 | 
54 |     dataloader = DataLoader(
55 |         datapipe, batch_size=spec.batch_size, num_workers=spec.num_workers
56 |     )
57 | 
58 |     return dataloader
59 | 
60 | 
61 | if __name__ == "__main__":
62 |     dataset = get_dataset(TorchEvalSpec(1, 1))
63 |     for i, x in enumerate(dataset):
64 |         print(x)
65 |         if i == 10:
66 |             break
67 | 


--------------------------------------------------------------------------------
/cedar/config.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Config file for cedar
 3 | """
 4 | 
 5 | from typing import Type, TypeVar, Optional
 6 | import ray
 7 | import logging
 8 | 
 9 | logger = logging.getLogger(__name__)
10 | 
11 | 
12 | try:
13 |     import nvidia.dali as dali  # noqa: F401
14 | 
15 |     DALI_AVAILABLE = True
16 | except ImportError:
17 |     DALI_AVAILABLE = False
18 | 
19 | T = TypeVar("T", bound="CedarContext")
20 | 
21 | 
22 | class RayConfig:
23 |     """
24 |     Configuration class for Ray
25 |     """
26 | 
27 |     def __init__(self, ip: str = "", n_cpus: Optional[int] = None):
28 |         self.ip = ip
29 |         self.n_cpus = n_cpus
30 | 
31 | 
32 | class CedarContext:
33 |     """
34 |     Context holding necessary state for cedar services.
35 |     """
36 | 
37 |     def __init__(self, ray_config: Optional[RayConfig] = None):
38 |         self.ray_config = ray_config
39 | 
40 |     def init_ray(self):
41 |         """
42 |         Initialize the Ray runtime.
43 |         NOTE: If calling this from a child process, ensure that the parent
44 |         process does not call init_ray().
45 |         """
46 |         if self.ray_config is None:
47 |             raise RuntimeError("Ray config not specified.")
48 | 
49 |         if ray.is_initialized():
50 |             logger.warning("Ray already initialized. Defaulting to it.")
51 |         elif self.ray_config.ip != "":
52 |             if ray.is_initialized():
53 |                 ray.shutdown()
54 |             logger.info(f"Connecting to ray cluster at {self.ray_config.ip}")
55 |             ray.init(f"ray://{self.ray_config.ip}:10001")
56 |         else:
57 |             logger.info("Launching to local ray instance")
58 |             if self.ray_config.n_cpus is not None:
59 |                 logger.info(
60 |                     "Using {} CPUs for local ray instance".format(
61 |                         self.ray_config.n_cpus
62 |                     )
63 |                 )
64 |                 ray.init(num_cpus=self.ray_config.n_cpus)
65 |             else:
66 |                 ray.init()
67 | 
68 |     @classmethod
69 |     def from_yaml(cls: Type[T], config_file: str) -> T:
70 |         # TODO (myzhao)
71 |         raise NotImplementedError
72 | 
73 |     def __del__(self):
74 |         if self.ray_config:
75 |             if ray.is_initialized():
76 |                 ray.shutdown()
77 |         pass
78 | 
79 |     def use_ray(self) -> bool:
80 |         """
81 |         Returns if the context should use Ray.
82 |         """
83 |         return self.ray_config is not None
84 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/cache_results/configs/wikitext_cache_after_truncate_two_offloads.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '10'
 4 |     1: '0'
 5 |     2: '1'
 6 |     8: '12'
 7 |     9: '11'
 8 |     10: ''
 9 |     11: '2'
10 |     12: '9'
11 |   n_local_workers: 1
12 |   pipes:
13 |     0:
14 |       name: BatcherPipe(batch_size=1)
15 |       variant: INPROCESS
16 |       variant_ctx:
17 |         variant_type: INPROCESS
18 |     1:
19 |       name: MapperPipe_Embedding(50257, 764)
20 |       variant: INPROCESS
21 |       variant_ctx:
22 |         variant_type: INPROCESS
23 |     2:
24 |       name: MapperPipe_ToTensor()
25 |       variant: INPROCESS
26 |       variant_ctx:
27 |         variant_type: INPROCESS
28 |     3:
29 |       name: MapperPipe_AddToken()
30 |       variant: INPROCESS
31 |       variant_ctx:
32 |         variant_type: INPROCESS
33 |     4:
34 |       name: MapperPipe_AddToken()
35 |       variant: INPROCESS
36 |       variant_ctx:
37 |         variant_type: INPROCESS
38 |     5:
39 |       name: "MapperPipe_VocabTransform(\n  (vocab): Vocab()\n)"
40 |       variant: INPROCESS
41 |       variant_ctx:
42 |         variant_type: INPROCESS
43 |     6:
44 |       name: MapperPipe_Truncate()
45 |       variant: INPROCESS
46 |       variant_ctx:
47 |         variant_type: INPROCESS
48 |     7:
49 |       name: MapperPipe_GPT2BPETokenizer()
50 |       variant: INPROCESS
51 |       variant_ctx:
52 |         variant_type: INPROCESS
53 |     8:
54 |       name: LocalLinePipe
55 |       variant: INPROCESS
56 |       variant_ctx:
57 |         variant_type: INPROCESS
58 |     9:
59 |       name: ObjectDiskCachePipe
60 |       variant: INPROCESS
61 |       variant_ctx:
62 |         variant_type: INPROCESS
63 |     10:
64 |       name: PrefetcherPipe
65 |       variant: INPROCESS
66 |       variant_ctx:
67 |         variant_type: INPROCESS
68 |     11:
69 |       fused_pipes:
70 |       - 5
71 |       - 4
72 |       - 3
73 |       name: FusedPipe
74 |       variant: RAY
75 |       variant_ctx:
76 |         max_inflight: 10000
77 |         max_prefetch: 10000
78 |         n_actors: 16
79 |         submit_batch_size: 500
80 |         use_threads: true
81 |         variant_type: RAY
82 |     12:
83 |       fused_pipes:
84 |       - 7
85 |       - 6
86 |       name: FusedPipe
87 |       variant: RAY
88 |       variant_ctx:
89 |         max_inflight: 10000
90 |         max_prefetch: 10000
91 |         n_actors: 16
92 |         submit_batch_size: 500
93 |         use_threads: true
94 |         variant_type: RAY
95 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/wikitext103/tf_service_dataset.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_text as text
 3 | import pathlib
 4 | 
 5 | from evaluation.tf_utils import TFEvalSpec
 6 | 
 7 | DATASET_LOC = "datasets/wikitext103"
 8 | 
 9 | # from https://github.com/cirquit/presto/blob/master/openwebtext_pipeline_modern.py  # noqa: E501
10 | # vocabulary size 50001, GPT2 originally used 50257
11 | vocabulary_size = 50001
12 | bpe_model_path = tf.keras.utils.get_file(
13 |     "bpe_en_50k.model",
14 |     "https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs50000.model",
15 | )
16 | bpe_model = open(bpe_model_path, "rb").read()
17 | 
18 | embedding_dimension = 768
19 | bpe_tokernizer = text.SentencepieceTokenizer(
20 |     model=bpe_model, out_type=tf.dtypes.int32
21 | )
22 | 
23 | embedding = tf.Variable(
24 |     tf.random.uniform([vocabulary_size, embedding_dimension], -1.0, 1.0)
25 | )
26 | 
27 | 
28 | def _truncate(x):
29 |     dim = tf.shape(x)[0]
30 |     slice_size = tf.minimum(dim, 254)
31 |     x = tf.slice(x, [0], [slice_size])
32 |     return x
33 | 
34 | 
35 | def _embedding(x):
36 |     return tf.nn.embedding_lookup(embedding, x)
37 | 
38 | 
39 | def build_dataset(path, spec):
40 |     # ds = _load_text(path)
41 |     ds = tf.data.TextLineDataset(path)
42 | 
43 |     ds = ds.map(
44 |         bpe_tokernizer.tokenize, num_parallel_calls=spec.num_parallel_calls
45 |     )
46 |     ds = ds.map(_truncate, num_parallel_calls=spec.num_parallel_calls)
47 |     ds = ds.map(_embedding, num_parallel_calls=spec.num_parallel_calls)
48 | 
49 |     ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
50 | 
51 |     if spec.service_addr:
52 |         print(
53 |             "Using tf.data.service with address {}".format(spec.service_addr)
54 |         )
55 |         ds = ds.apply(
56 |             tf.data.experimental.service.distribute(
57 |                 processing_mode="distributed_epoch", service=spec.service_addr
58 |             )
59 |         )
60 | 
61 |     return ds
62 | 
63 | 
64 | def get_dataset(spec: TFEvalSpec):
65 |     data_dir = (
66 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
67 |     )
68 |     train_filepath = pathlib.Path(data_dir) / pathlib.Path(
69 |         "wikitext-103/wiki.train.tokens"
70 |     )
71 | 
72 |     return build_dataset(
73 |         str(train_filepath),
74 |         spec,
75 |     )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     tf_dataset = get_dataset(TFEvalSpec(1, 1))
80 | 
81 |     for i, x in enumerate(tf_dataset):
82 |         print(x)
83 |         # print(x.shape)
84 |         print(i)
85 |         if i == 10:
86 |             break
87 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/cache_results/configs/cache_after_list.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     1: '0'
 5 |     3: '6'
 6 |     6: '12'
 7 |     7: '1'
 8 |     8: '3'
 9 |     9: '10'
10 |     10: '8'
11 |     11: ''
12 |     12: '7'
13 |   n_local_workers: 8
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=8)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_Grayscale(num_output_channels=1)
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
37 |         1.1), hue=(-0.1, 0.1))
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     5:
42 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     6:
47 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
48 |         1.3333), interpolation=bilinear, antialias=warn)
49 |       variant: INPROCESS
50 |       variant_ctx:
51 |         variant_type: INPROCESS
52 |     7:
53 |       name: MapperPipe_to_float
54 |       variant: INPROCESS
55 |       variant_ctx:
56 |         variant_type: INPROCESS
57 |     8:
58 |       name: ImageReaderPipe
59 |       variant: INPROCESS
60 |       variant_ctx:
61 |         variant_type: INPROCESS
62 |     9:
63 |       name: LocalFSListerPipe
64 |       variant: INPROCESS
65 |       variant_ctx:
66 |         variant_type: INPROCESS
67 |     10:
68 |       name: ObjectDiskCachePipe
69 |       variant: INPROCESS
70 |       variant_ctx:
71 |         variant_type: INPROCESS
72 |     11:
73 |       name: PrefetcherPipe
74 |       variant: INPROCESS
75 |       variant_ctx:
76 |         variant_type: INPROCESS
77 |     12:
78 |       fused_pipes:
79 |       - 2
80 |       - 5
81 |       - 4
82 |       name: FusedPipe
83 |       variant: RAY
84 |       variant_ctx:
85 |         max_inflight: 100
86 |         max_prefetch: 100
87 |         n_actors: 4
88 |         submit_batch_size: 16
89 |         use_threads: true
90 |         variant_type: RAY
91 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/cache_results/configs/cache_after_read.yml:
--------------------------------------------------------------------------------
 1 | physical_plan:
 2 |   graph:
 3 |     0: '11'
 4 |     1: '0'
 5 |     3: '6'
 6 |     6: '12'
 7 |     7: '1'
 8 |     8: '10'
 9 |     9: '8'
10 |     10: '3'
11 |     11: ''
12 |     12: '7'
13 |   n_local_workers: 8
14 |   pipes:
15 |     0:
16 |       name: BatcherPipe(batch_size=8)
17 |       variant: INPROCESS
18 |       variant_ctx:
19 |         variant_type: INPROCESS
20 |     1:
21 |       name: MapperPipe_Normalize(mean=(0.1307,), std=(0.3081,))
22 |       variant: INPROCESS
23 |       variant_ctx:
24 |         variant_type: INPROCESS
25 |     2:
26 |       name: MapperPipe_GaussianBlur(kernel_size=(11, 11), sigma=(0.1, 2.0))
27 |       variant: INPROCESS
28 |       variant_ctx:
29 |         variant_type: INPROCESS
30 |     3:
31 |       name: MapperPipe_Grayscale(num_output_channels=1)
32 |       variant: INPROCESS
33 |       variant_ctx:
34 |         variant_type: INPROCESS
35 |     4:
36 |       name: MapperPipe_ColorJitter(brightness=(0.9, 1.1), contrast=(0.9, 1.1), saturation=(0.9,
37 |         1.1), hue=(-0.1, 0.1))
38 |       variant: INPROCESS
39 |       variant_ctx:
40 |         variant_type: INPROCESS
41 |     5:
42 |       name: MapperPipe_RandomHorizontalFlip(p=0.5)
43 |       variant: INPROCESS
44 |       variant_ctx:
45 |         variant_type: INPROCESS
46 |     6:
47 |       name: MapperPipe_RandomResizedCrop(size=(244, 244), scale=(0.08, 1.0), ratio=(0.75,
48 |         1.3333), interpolation=bilinear, antialias=warn)
49 |       variant: INPROCESS
50 |       variant_ctx:
51 |         variant_type: INPROCESS
52 |     7:
53 |       name: MapperPipe_to_float
54 |       variant: INPROCESS
55 |       variant_ctx:
56 |         variant_type: INPROCESS
57 |     8:
58 |       name: ImageReaderPipe
59 |       variant: INPROCESS
60 |       variant_ctx:
61 |         variant_type: INPROCESS
62 |     9:
63 |       name: LocalFSListerPipe
64 |       variant: INPROCESS
65 |       variant_ctx:
66 |         variant_type: INPROCESS
67 |     10:
68 |       name: ObjectDiskCachePipe
69 |       variant: INPROCESS
70 |       variant_ctx:
71 |         variant_type: INPROCESS
72 |     11:
73 |       name: PrefetcherPipe
74 |       variant: INPROCESS
75 |       variant_ctx:
76 |         variant_type: INPROCESS
77 |     12:
78 |       fused_pipes:
79 |       - 2
80 |       - 5
81 |       - 4
82 |       name: FusedPipe
83 |       variant: RAY
84 |       variant_ctx:
85 |         max_inflight: 100
86 |         max_prefetch: 100
87 |         n_actors: 4
88 |         submit_batch_size: 16
89 |         use_threads: true
90 |         variant_type: RAY
91 | 


--------------------------------------------------------------------------------
/cedar/pipes/custom/commonvoice.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import random
 3 | import librosa
 4 | 
 5 | SAMPLE_FREQ = 8000
 6 | N_FFT = 400
 7 | FREQ_MASK_PARAM = 80
 8 | TIME_MASK_PARAM = 80
 9 | N_MELS = 256
10 | 
11 | 
12 | def time_mask(x):
13 |     if isinstance(x, dict):
14 |         x = x["item"].copy()
15 |         ray_ds = True
16 |     else:
17 |         x = x.copy()
18 |         ray_ds = False
19 |     t = np.random.uniform(low=0.0, high=TIME_MASK_PARAM)
20 |     t = int(t)
21 |     tau = x.shape[1]
22 |     rand_int = max(0, tau - t)
23 |     t0 = random.randint(0, rand_int)
24 |     x[:, t0 : t0 + t] = 0  # noqa: E203
25 |     if ray_ds:
26 |         return {"item": x}
27 |     else:
28 |         return x
29 | 
30 | 
31 | def frequency_mask(x):
32 |     if isinstance(x, dict):
33 |         x = x["item"].copy()
34 |         ray_ds = True
35 |     else:
36 |         x = x.copy()
37 |         ray_ds = False
38 |     f = np.random.uniform(low=0.0, high=FREQ_MASK_PARAM)
39 |     f = int(f)
40 |     v = x.shape[0]
41 |     f0 = random.randint(0, v - f)
42 |     x[f0 : f0 + f, :] = 0  # noqa: E203
43 |     if ray_ds:
44 |         return {"item": x}
45 |     else:
46 |         return x
47 | 
48 | 
49 | def mel(x):
50 |     if isinstance(x, dict):
51 |         return {
52 |             "item": librosa.feature.melspectrogram(
53 |                 S=x["item"], sr=SAMPLE_FREQ, n_mels=N_MELS, n_fft=N_FFT
54 |             )
55 |         }
56 |     else:
57 |         return librosa.feature.melspectrogram(
58 |             S=x, sr=SAMPLE_FREQ, n_mels=N_MELS, n_fft=N_FFT
59 |         )
60 | 
61 | 
62 | def _read(x):
63 |     if isinstance(x, dict):
64 |         return {"item": librosa.load(x["item"])}
65 |     else:
66 |         return librosa.load(x)
67 | 
68 | 
69 | def _resample(x):
70 |     if isinstance(x, dict):
71 |         data = x["item"]
72 |         return {
73 |             "item": librosa.resample(
74 |                 y=data[0], orig_sr=data[1], target_sr=SAMPLE_FREQ
75 |             )
76 |         }
77 |     else:
78 |         return librosa.resample(y=x[0], orig_sr=x[1], target_sr=SAMPLE_FREQ)
79 | 
80 | 
81 | def _spec(x):
82 |     if isinstance(x, dict):
83 |         return {"item": np.abs(librosa.stft(x["item"], n_fft=N_FFT)) ** 2}
84 |     else:
85 |         return np.abs(librosa.stft(x, n_fft=N_FFT)) ** 2
86 | 
87 | 
88 | def _stretch(x):
89 |     if isinstance(x, dict):
90 |         return {
91 |             "item": librosa.effects.time_stretch(
92 |                 x["item"], rate=0.8, n_fft=N_FFT
93 |             )
94 |         }
95 |     else:
96 |         return librosa.effects.time_stretch(x, rate=0.8, n_fft=N_FFT)
97 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/nlp_hf_app.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import fastflow as ff
 3 | from transformers import GPT2Tokenizer
 4 | 
 5 | from eval_app_runner import App
 6 | 
 7 | DATASET_LOC="/home/myzhao/cedar/evaluation/datasets/wikitext103/wikitext-103/wiki.train.tokens"
 8 | 
 9 | def _load_text(path):
10 |     text = tf.io.read_file(path)
11 |     return tf.data.Dataset.from_tensor_slices(tf.strings.split(text, "\n"))
12 | 
13 | 
14 | tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
15 | embedding = tf.Variable(tf.random.uniform([50257, 764], -1.0, 1.0))
16 | 
17 | 
18 | def _tokenize(x):
19 |     return tokenizer(str(x.numpy()), return_tensors="tf")["input_ids"]
20 | 
21 | def tokenize(x):
22 |     res = tf.py_function(_tokenize, [x], [tf.int32])
23 |     return res
24 | 
25 | def _truncate(x):
26 |     dim = tf.shape(x)[1]
27 |     slice_size = tf.minimum(dim, 254)
28 |     x = tf.slice(x, [0, 0], [1, slice_size])
29 |     return x
30 | 
31 | 
32 | def _embedding(x):
33 |     return (tf.nn.embedding_lookup(embedding, x), tf.constant(0.0))
34 | 
35 | 
36 | class WikiTextModel(ff.FastFlowModel):
37 | # class WikiTextModel(tf.keras.Model):
38 |     def __init__(self):
39 |         super().__init__()
40 | 
41 |     def call(self, inputs):
42 |         # do nothing
43 |         return inputs
44 | 
45 |     def __deepcopy__(self):
46 |         return WikiTextModel()
47 |     
48 | class WikiTextApp(App):
49 | # class WikiTextApp():
50 |     def __init__(self, args, config):
51 |         super().__init__(args, config)
52 |         # pass
53 | 
54 |     def dummy_loss(self, y_true, y_pred):
55 |         return tf.constant(0.0)
56 | 
57 |     def create_model(self):
58 |         model = WikiTextModel()
59 | 
60 |         model.compile(optimizer="adam", loss=self.dummy_loss)
61 |         return model
62 | 
63 |     def create_dataset(self, num_parallel):
64 |         ds = tf.data.TextLineDataset(DATASET_LOC).take(100000)
65 |         ds = ds.map(
66 |             tokenize,
67 |             num_parallel_calls=num_parallel,
68 |             name="prep_begin",
69 |         )
70 |         ds = ds.map(_truncate, num_parallel_calls=num_parallel)
71 |         ds = ds.map(_embedding, num_parallel_calls=num_parallel)
72 |         ds = ds.batch(1)
73 |         ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
74 |         return ds
75 | 
76 |     def create_valid_dataset(self, num_parallel):
77 |         return None
78 | 
79 | if __name__ == "__main__":
80 |     app = WikiTextApp(None, None)
81 |     ds = app.create_dataset(1)
82 | 
83 |     for x in ds:
84 |         print(x)
85 |         break
86 |     model = app.create_model()
87 | 
88 |     # config = ff.FastFlowConfig.from_yaml("/home/myzhao/FastFlow/examples/config.yaml")
89 | 
90 |     model.fit(ds, epochs=10)
91 | 
92 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/nlp_app.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import fastflow as ff
 3 | import tensorflow_text as text
 4 | 
 5 | from eval_app_runner import App
 6 | 
 7 | DATASET_LOC="/home/myzhao/cedar/evaluation/datasets/wikitext103/wikitext-103/wiki.train.tokens"
 8 | 
 9 | # from https://github.com/cirquit/presto/blob/master/openwebtext_pipeline_modern.py  # noqa: E501
10 | # vocabulary size 50001, GPT2 originally used 50257
11 | vocabulary_size = 50001
12 | bpe_model_path = tf.keras.utils.get_file(
13 |     "bpe_en_50k.model",
14 |     "https://nlp.h-its.org/bpemb/en/en.wiki.bpe.vs50000.model",
15 | )
16 | bpe_model = open(bpe_model_path, "rb").read()
17 | 
18 | embedding_dimension = 768
19 | bpe_tokernizer = text.SentencepieceTokenizer(
20 |     model=bpe_model, out_type=tf.dtypes.int32
21 | )
22 | 
23 | embedding = tf.Variable(
24 |     tf.random.uniform([vocabulary_size, embedding_dimension], -1.0, 1.0)
25 | )
26 | 
27 | def _truncate(x):
28 |     dim = tf.shape(x)[0]
29 |     slice_size = tf.minimum(dim, 254)
30 |     x = tf.slice(x, [0], [slice_size])
31 |     return x
32 | 
33 | 
34 | def _embedding(x):
35 |     return (tf.nn.embedding_lookup(embedding, x), tf.constant(0.0))
36 | 
37 | class WikiTextModel(ff.FastFlowModel):
38 |     def __init__(self):
39 |         super().__init__()
40 | 
41 |     def call(self, inputs):
42 |         # do nothing
43 |         return inputs
44 | 
45 |     def __deepcopy__(self):
46 |         return WikiTextModel()
47 |     
48 | class WikiTextApp(App):
49 |     def __init__(self, args, config):
50 |         super().__init__(args, config)
51 | 
52 |     def dummy_loss(self, y_true, y_pred):
53 |         return tf.constant(0.0)
54 | 
55 |     def create_model(self):
56 |         model = WikiTextModel()
57 | 
58 |         model.compile(optimizer="adam", loss=self.dummy_loss)
59 |         return model
60 | 
61 |     def create_dataset(self, num_parallel):
62 |         ds = tf.data.TextLineDataset(DATASET_LOC).take(200000)
63 |         ds = ds.map(
64 |             bpe_tokernizer.tokenize, num_parallel_calls=num_parallel,
65 |             name="prep_begin",
66 |         )
67 |         ds = ds.map(_truncate, num_parallel_calls=num_parallel)
68 |         ds = ds.map(_embedding, num_parallel_calls=num_parallel)
69 |         ds = ds.batch(1)
70 |         ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
71 |         return ds
72 | 
73 |     def create_valid_dataset(self, num_parallel):
74 |         return None
75 | 
76 | if __name__ == "__main__":
77 |     app = WikiTextApp(None, None)
78 |     ds = app.create_dataset(1)
79 | 
80 |     # for x in ds:
81 |     #     print(x)
82 |     #     break
83 |     model = app.create_model()
84 | 
85 |     # config = ff.FastFlowConfig.from_yaml("/home/myzhao/FastFlow/examples/config.yaml")
86 | 
87 |     model.fit(ds, epochs=10)
88 | 
89 | 


--------------------------------------------------------------------------------
/evaluation/fastflow/examples/simclr_app.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import fastflow as ff
 3 | import tensorflow_addons as tfa
 4 | 
 5 | from eval_app_runner import App
 6 | 
 7 | DATASET_LOC="/home/myzhao/cedar/evaluation/datasets/imagenette2/imagenette2/train/*/*"
 8 | IMG_HEIGHT = 244
 9 | IMG_WIDTH = 244
10 | GAUSSIAN_BLUR_KERNEL_SIZE = 11
11 | 
12 | class SimCLRModel(ff.FastFlowModel):
13 |     def __init__(self):
14 |         super().__init__()
15 | 
16 |     def call(self, inputs):
17 |         # do nothing
18 |         return inputs
19 | 
20 |     def __deepcopy__(self):
21 |         return SimCLRModel()
22 |     
23 | class SimCLRApp(App):
24 |     def __init__(self, args, config):
25 |         super().__init__(args, config)
26 | 
27 |     def process_path(self, img):
28 |         boxes = tf.random.uniform(shape=(1, 4))
29 | 
30 |         img = tf.image.decode_jpeg(img, channels=3)
31 |         img = tf.image.convert_image_dtype(img, tf.float32)
32 |         img = tf.expand_dims(img, axis=0)
33 |         img = tf.image.crop_and_resize(img, boxes, [0], [IMG_HEIGHT, IMG_WIDTH])
34 |         img = tf.image.random_flip_left_right(img)
35 |         img = tf.image.random_brightness(img, max_delta=0.1)
36 |         img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
37 |         img = tf.image.random_saturation(img, lower=0.9, upper=1.1)
38 |         img = tf.image.random_hue(img, max_delta=0.1)
39 |         img = tf.image.rgb_to_grayscale(img)
40 |         img = tfa.image.gaussian_filter2d(
41 |             img,
42 |             filter_shape=[GAUSSIAN_BLUR_KERNEL_SIZE, GAUSSIAN_BLUR_KERNEL_SIZE],
43 |         )
44 |         img = tf.image.per_image_standardization(img)
45 |         return img
46 | 
47 |     def dummy_loss(self, y_true, y_pred):
48 |         return tf.constant(0.0)
49 | 
50 |     def create_model(self):
51 |         model = SimCLRModel()
52 | 
53 |         model.compile(optimizer="adam", loss=self.dummy_loss)
54 |         return model
55 | 
56 |     def create_dataset(self, num_parallel):
57 |         ds = tf.data.Dataset.list_files(DATASET_LOC, shuffle=True)
58 |         ds = ds.map(tf.io.read_file, num_parallel_calls=num_parallel)
59 |         ds = ds.map(
60 |             lambda x: (self.process_path(x), tf.constant(0.0)),
61 |             num_parallel_calls=num_parallel,
62 |             name="prep_begin",
63 |         )
64 |         ds = ds.batch(1)
65 |         ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
66 | 
67 |         return ds
68 | 
69 |     def create_valid_dataset(self, num_parallel):
70 |         return None
71 | 
72 | if __name__ == "__main__":
73 |     app = SimCLRApp(None, None)
74 |     ds = app.create_dataset(1)
75 | 
76 |     # for x in ds:
77 |     #     print(x)
78 |     #     break
79 |     model = app.create_model()
80 | 
81 |     # config = ff.FastFlowConfig.from_yaml("/home/myzhao/FastFlow/examples/config.yaml")
82 | 
83 |     model.fit(ds, epochs=10)
84 | 
85 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/torch_dataset.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | import matplotlib.pyplot as plt
 3 | import torch
 4 | import torchdata.datapipes as dp
 5 | import librosa
 6 | import numpy as np
 7 | import random
 8 | 
 9 | from evaluation.torch_utils import TorchEvalSpec
10 | from torch.utils.data import DataLoader
11 | 
12 | DATASET_LOC = "datasets/commonvoice/cv-corpus-15.0-delta-2023-09-08/en/clips/"
13 | SAMPLE_FREQ = 8000
14 | N_FFT = 400
15 | FREQ_MASK_PARAM = 80
16 | TIME_MASK_PARAM = 80
17 | N_MELS = 256
18 | 
19 | 
20 | def to_float(x):
21 |     return x.to(torch.float32)
22 | 
23 | 
24 | def time_mask(x):
25 |     t = np.random.uniform(low=0.0, high=TIME_MASK_PARAM)
26 |     t = int(t)
27 |     tau = x.shape[1]
28 |     t0 = random.randint(0, tau - t)
29 |     x[:, t0 : t0 + t] = 0
30 |     return x
31 | 
32 | 
33 | def frequency_mask(x):
34 |     f = np.random.uniform(low=0.0, high=FREQ_MASK_PARAM)
35 |     f = int(f)
36 |     v = x.shape[0]
37 |     f0 = random.randint(0, v - f)
38 |     x[f0 : f0 + f, :] = 0
39 |     return x
40 | 
41 | 
42 | def mel(x):
43 |     return librosa.feature.melspectrogram(
44 |         S=x, sr=SAMPLE_FREQ, n_mels=N_MELS, n_fft=N_FFT
45 |     )
46 | 
47 | 
48 | def build_datapipe(root, spec: TorchEvalSpec):
49 |     datapipe = dp.iter.FileLister(root=root, recursive=True)
50 |     # TODO: Evaluate where is a fair place to put this...
51 |     datapipe = datapipe.sharding_filter()
52 |     datapipe = dp.iter.Mapper(datapipe, lambda x: librosa.load(x))
53 |     datapipe = dp.iter.Mapper(
54 |         datapipe,
55 |         lambda x: librosa.resample(
56 |             y=x[0], orig_sr=x[1], target_sr=SAMPLE_FREQ
57 |         ),
58 |     )
59 |     datapipe = dp.iter.Mapper(
60 |         datapipe,
61 |         lambda x: np.abs(librosa.stft(x, n_fft=N_FFT)) ** 2,
62 |     )
63 |     datapipe = dp.iter.Mapper(
64 |         datapipe,
65 |         lambda x: librosa.effects.time_stretch(x, rate=0.8, n_fft=N_FFT),
66 |     )
67 |     datapipe = dp.iter.Mapper(datapipe, time_mask)
68 |     datapipe = dp.iter.Mapper(datapipe, frequency_mask)
69 |     datapipe = dp.iter.Mapper(datapipe, mel)
70 |     return datapipe
71 | 
72 | 
73 | def get_dataset(spec: TorchEvalSpec):
74 |     data_dir = (
75 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
76 |     )
77 | 
78 |     datapipe = build_datapipe(str(data_dir), spec)
79 | 
80 |     dataloader = DataLoader(datapipe, num_workers=spec.num_workers)
81 | 
82 |     return dataloader
83 | 
84 | 
85 | if __name__ == "__main__":
86 |     dataset = get_dataset(TorchEvalSpec(8, 1))
87 |     for x in dataset:
88 |         print(x)
89 |         print(x.size())
90 | 
91 |         fig, ax = plt.subplots()
92 |         D = librosa.power_to_db(x.squeeze(0).numpy(), ref=np.max)
93 |         img = librosa.display.specshow(
94 |             D, y_axis="mel", x_axis="time", sr=SAMPLE_FREQ, ax=ax
95 |         )
96 |         fig.savefig("tmp.png")
97 |         break
98 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/ray_dataset.py:
--------------------------------------------------------------------------------
  1 | import pathlib
  2 | import torch
  3 | import glob
  4 | from torchvision import transforms
  5 | import PIL
  6 | import ray
  7 | import time
  8 | 
  9 | DATASET_LOC = "datasets/imagenette2"
 10 | IMG_HEIGHT = 244
 11 | IMG_WIDTH = 244
 12 | GAUSSIAN_BLUR_KERNEL_SIZE = 11
 13 | 
 14 | 
 15 | class Timer:
 16 |     def __init__(self):
 17 |         self._start = None
 18 |         self._end = None
 19 | 
 20 |     def __enter__(self):
 21 |         self._start = time.perf_counter()
 22 | 
 23 |     def __exit__(self, exc_type, exc_val, exc_tb):
 24 |         self._end = time.perf_counter()
 25 | 
 26 |     def reset(self):
 27 |         self._start = time.perf_counter()
 28 | 
 29 |     def delta(self):
 30 |         if self._start is None or self._end is None:
 31 |             raise RuntimeError()
 32 |         return self._end - self._start
 33 | 
 34 | 
 35 | def read_img(x):
 36 |     return {"image": PIL.Image.open(x["item"])}
 37 | 
 38 | 
 39 | def transform_img(x):
 40 |     transform = transforms.Compose(
 41 |         [
 42 |             transforms.PILToTensor(),
 43 |             transforms.ConvertImageDtype(torch.float),
 44 |             transforms.RandomResizedCrop((IMG_HEIGHT, IMG_WIDTH)),
 45 |             transforms.ColorJitter(0.1, 0.1, 0.1, 0.1),
 46 |             transforms.Grayscale(num_output_channels=1),
 47 |             transforms.GaussianBlur(GAUSSIAN_BLUR_KERNEL_SIZE),
 48 |             transforms.Normalize((0.1307,), (0.3081,)),
 49 |         ]
 50 |     )
 51 |     return {"image": transform(x["image"])}
 52 | 
 53 | 
 54 | def ret_img(x):
 55 |     return x
 56 | 
 57 | 
 58 | def build_ds(root):
 59 |     # Get list of dirs
 60 |     dir_list = []
 61 |     file_list = []
 62 |     for item in pathlib.Path(root).iterdir():
 63 |         if item.is_dir():
 64 |             dir_list.append(str(item))
 65 |             files = glob.glob(f"{str(item)}/*.JPEG")
 66 |             file_list.extend(files)
 67 |     ds = ray.data.from_items(file_list)
 68 |     ds = ds.map(read_img)
 69 |     ds = ds.map(transform_img)
 70 | 
 71 |     return ds
 72 | 
 73 | 
 74 | def get_dataset():
 75 |     data_dir = (
 76 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
 77 |     )
 78 |     train_filepath = pathlib.Path(data_dir) / pathlib.Path("imagenette2/train")
 79 | 
 80 |     ds = build_ds(str(train_filepath))
 81 |     ray.data.DataContext.get_current().execution_options.locality_with_output = (
 82 |         True
 83 |     )
 84 | 
 85 |     return ds
 86 | 
 87 | 
 88 | if __name__ == "__main__":
 89 |     ds = get_dataset()
 90 |     epoch_times = []
 91 |     for _ in range(3):
 92 |         timer = Timer()
 93 |         with timer:
 94 |             for idx, row in enumerate(ds.iter_rows()):
 95 |                 pass
 96 |         epoch_times.append(timer.delta())
 97 |         print(epoch_times[-1])
 98 | 
 99 |     print("Epoch times: {}".format(epoch_times))
100 | 


--------------------------------------------------------------------------------
/evaluation/plots/plot_ablation.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import matplotlib.pyplot as plt
 3 | import seaborn as sns
 4 | 
 5 | # Load the data
 6 | file_path = "~/cedar/evaluation/plots/ablation.csv"
 7 | data = pd.read_csv(file_path)
 8 | 
 9 | rename_dict = {
10 |     "Baseline": "Baseline",
11 |     "plus parallelism": "+P",
12 |     "plus reorder": "+PR",
13 |     "plus offload": "+PRO",
14 |     "plus fusion": "+PROF",
15 | }
16 | data["Setup"] = data["Setup"].map(rename_dict)
17 | 
18 | # Convert execution time to throughput
19 | data["Runtime"] = 1 / data["Runtime"]
20 | 
21 | # Normalize the 'Average' for 'cedar-remote' in each 'Pipeline' group
22 | normalization_factors = data[data["Setup"] == "Baseline"].set_index(
23 |     "Pipeline"
24 | )["Runtime"]
25 | data["Normalized Runtime"] = data.apply(
26 |     lambda row: row["Runtime"] / normalization_factors.get(row["Pipeline"], 1),
27 |     axis=1,
28 | )
29 | print(data)
30 | 
31 | # Create the plot with normalized values
32 | f = plt.figure(figsize=(3.33, 1.8), dpi=600)
33 | # sns.set_style("whitegrid")
34 | ax = sns.barplot(
35 |     x="Pipeline",
36 |     y="Normalized Runtime",
37 |     hue="Setup",
38 |     data=data,
39 |     linewidth=0,
40 |     hue_order=["Baseline", "+P", "+PR", "+PRO", "+PROF"],
41 | )
42 | 
43 | # Add hatches
44 | for i, bar in enumerate(ax.patches):
45 |     if i in range(0, 8):
46 |         bar.set_hatch("//")
47 |     if i in range(8, 16):
48 |         bar.set_hatch("\\\\")
49 |     if i in range(16, 24):
50 |         bar.set_hatch("--")
51 |     if i in range(24, 32):
52 |         bar.set_hatch("..")
53 |     if i in range(32, 40):
54 |         bar.set_hatch("oo")
55 | ax.patches[40].set_hatch("////")
56 | ax.patches[41].set_hatch("\\\\\\")
57 | ax.patches[42].set_hatch("----")
58 | ax.patches[43].set_hatch("..")
59 | ax.patches[44].set_hatch("oo")
60 | 
61 | # Adding vertical lines and red "X" for missing values
62 | pipeline_labels = data["Pipeline"].unique()  # Get unique pipeline labels
63 | 
64 | # Set x-ticks
65 | # Adding vertical lines to mark ranges of each x category
66 | for i in range(len(pipeline_labels) - 1):
67 |     ax.axvline(
68 |         x=i + 0.5, color="grey", linestyle="-", linewidth=0.5
69 |     )  # End of group
70 | 
71 | plt.xticks(rotation=30, ha="right", fontsize=6)
72 | plt.yticks(fontsize=6)
73 | # plt.yticks((0, 0.5, 1), fontsize=6)
74 | # ax.set_ylim((0, 50))
75 | ax.tick_params(axis="both", which="major", pad=0)
76 | # Set y ticks to small font
77 | ax.set_ylabel("Normalized Throughput", fontsize=6)
78 | ax.set_xlabel("")
79 | ax.tick_params(axis="x", direction="out", length=3, color="black")
80 | # ax.set_yscale("log")
81 | 
82 | ax.set_ylim((0, 26))
83 | 
84 | # Write the throughput for the ASR PROF setup above the bar
85 | ax.text(0.93, 1.02, "43.83", fontsize=5, transform=ax.transAxes)
86 | 
87 | 
88 | ax.legend(fontsize=5, title_fontsize="6", ncol=5)
89 | 
90 | # Display the plot
91 | plt.tight_layout()
92 | # ax.legend(fontsize=6, title_fontsize='6')
93 | f.savefig("ablation.png", bbox_inches="tight")
94 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/commonvoice/tf_dataset.py:
--------------------------------------------------------------------------------
  1 | import tensorflow as tf
  2 | import matplotlib.pyplot as plt
  3 | import pathlib
  4 | import librosa
  5 | import random
  6 | import numpy as np
  7 | 
  8 | from evaluation.tf_utils import TFEvalSpec
  9 | 
 10 | DATASET_LOC = "datasets/commonvoice/cv-corpus-15.0-delta-2023-09-08/en/clips/"
 11 | SAMPLE_FREQ = 8000
 12 | N_FFT = 400
 13 | FREQ_MASK_PARAM = 80
 14 | TIME_MASK_PARAM = 80
 15 | N_MELS = 256
 16 | 
 17 | 
 18 | def time_mask(x):
 19 |     t = np.random.uniform(low=0.0, high=TIME_MASK_PARAM)
 20 |     t = int(t)
 21 |     tau = x.shape[1]
 22 |     t0 = random.randint(0, tau - t)
 23 |     x[:, t0 : t0 + t] = 0
 24 |     return x
 25 | 
 26 | 
 27 | def frequency_mask(x):
 28 |     f = np.random.uniform(low=0.0, high=FREQ_MASK_PARAM)
 29 |     f = int(f)
 30 |     v = x.shape[0]
 31 |     f0 = random.randint(0, v - f)
 32 |     x[f0 : f0 + f, :] = 0
 33 |     return x
 34 | 
 35 | 
 36 | def mel(x):
 37 |     return librosa.feature.melspectrogram(
 38 |         S=x, sr=SAMPLE_FREQ, n_mels=N_MELS, n_fft=N_FFT
 39 |     )
 40 | 
 41 | 
 42 | @tf.py_function(Tout=tf.float32)
 43 | def process_path(path):
 44 |     x, sr = librosa.load(path.numpy())
 45 |     x = librosa.resample(y=x, orig_sr=sr, target_sr=SAMPLE_FREQ)
 46 |     x = np.abs(librosa.stft(x, n_fft=N_FFT)) ** 2
 47 |     x = librosa.effects.time_stretch(x, rate=0.8, n_fft=N_FFT)
 48 |     x = time_mask(x)
 49 |     x = frequency_mask(x)
 50 |     x = mel(x)
 51 |     return x
 52 | 
 53 | 
 54 | def build_dataset(data_dir, spec):
 55 |     ds = tf.data.Dataset.list_files(f"{data_dir}/*", shuffle=False)
 56 |     ds = ds.map(
 57 |         lambda x: process_path(x), num_parallel_calls=spec.num_parallel_calls
 58 |     )
 59 |     ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
 60 | 
 61 |     if spec.service_addr:
 62 |         print(
 63 |             "Using tf.data.service with address {}".format(spec.service_addr)
 64 |         )
 65 |         ds = ds.apply(
 66 |             tf.data.experimental.service.distribute(
 67 |                 processing_mode="distributed_epoch", service=spec.service_addr
 68 |             )
 69 |         )
 70 |     return ds
 71 | 
 72 | 
 73 | def get_dataset(spec: TFEvalSpec):
 74 |     data_dir = (
 75 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
 76 |     )
 77 | 
 78 |     # return gen_files(train_filepath)
 79 | 
 80 |     return build_dataset(
 81 |         str(data_dir),
 82 |         spec,
 83 |     )
 84 | 
 85 | 
 86 | if __name__ == "__main__":
 87 |     batch_size = 8
 88 |     num_workers = tf.data.AUTOTUNE
 89 | 
 90 |     tf_dataset = get_dataset(TFEvalSpec(1, 1))
 91 | 
 92 |     for i, x in enumerate(tf_dataset):
 93 |         print(x)
 94 |         print(x.shape)
 95 | 
 96 |         fig, ax = plt.subplots()
 97 |         D = librosa.power_to_db(x.numpy(), ref=np.max)
 98 |         img = librosa.display.specshow(
 99 |             D, y_axis="mel", x_axis="time", sr=SAMPLE_FREQ, ax=ax
100 |         )
101 |         fig.savefig("tmptf.png")
102 |         break
103 | 


--------------------------------------------------------------------------------
/evaluation/pipelines/simclrv2/tf_dataset.py:
--------------------------------------------------------------------------------
 1 | import tensorflow as tf
 2 | import tensorflow_addons as tfa
 3 | import pathlib
 4 | 
 5 | from evaluation.tf_utils import TFEvalSpec
 6 | 
 7 | DATASET_LOC = "datasets/imagenette2"
 8 | IMG_HEIGHT = 244
 9 | IMG_WIDTH = 244
10 | GAUSSIAN_BLUR_KERNEL_SIZE = 11
11 | GCS_PATTERN = "gs://ember-data/imagenette2/train/*/*"
12 | 
13 | 
14 | def process_path(img):
15 |     boxes = tf.random.uniform(shape=(1, 4))
16 | 
17 |     # img = tf.io.read_file(file_path)
18 |     img = tf.image.decode_jpeg(img, channels=3)
19 |     img = tf.image.convert_image_dtype(img, tf.float32)
20 |     img = tf.expand_dims(img, axis=0)
21 |     img = tf.image.crop_and_resize(img, boxes, [0], [IMG_HEIGHT, IMG_WIDTH])
22 |     img = tf.image.random_flip_left_right(img)
23 |     img = tf.image.random_brightness(img, max_delta=0.1)
24 |     img = tf.image.random_contrast(img, lower=0.9, upper=1.1)
25 |     img = tf.image.random_saturation(img, lower=0.9, upper=1.1)
26 |     img = tf.image.random_hue(img, max_delta=0.1)
27 |     img = tf.image.rgb_to_grayscale(img)
28 |     img = tfa.image.gaussian_filter2d(
29 |         img,
30 |         filter_shape=[GAUSSIAN_BLUR_KERNEL_SIZE, GAUSSIAN_BLUR_KERNEL_SIZE],
31 |     )
32 |     img = tf.image.per_image_standardization(img)
33 |     return img
34 | 
35 | 
36 | def build_dataset(data_dir, spec):
37 |     if spec.read_from_remote:
38 |         list_of_files = tf.io.gfile.glob(GCS_PATTERN)
39 |         ds = tf.data.Dataset.from_tensor_slices(list_of_files)
40 |     else:
41 |         ds = tf.data.Dataset.list_files(str(data_dir / "*/*"), shuffle=True)
42 | 
43 |     # ds = tf.data.Dataset.list_files(str(data_dir / "*/*"), shuffle=True)
44 |     ds = ds.map(tf.io.read_file, num_parallel_calls=spec.num_parallel_calls)
45 |     ds = ds.map(
46 |         lambda x: process_path(x),
47 |         num_parallel_calls=spec.num_parallel_calls,
48 |     )
49 |     ds = ds.batch(spec.batch_size)
50 |     ds = ds.prefetch(buffer_size=tf.data.AUTOTUNE)
51 | 
52 |     if spec.service_addr:
53 |         print(
54 |             "Using tf.data.service with address {}".format(spec.service_addr)
55 |         )
56 |         ds = ds.apply(
57 |             tf.data.experimental.service.distribute(
58 |                 processing_mode="distributed_epoch", service=spec.service_addr
59 |             )
60 |         )
61 |     return ds
62 | 
63 | 
64 | def get_dataset(spec: TFEvalSpec):
65 |     data_dir = (
66 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
67 |     )
68 |     train_filepath = pathlib.Path(data_dir) / "imagenette2/train"
69 | 
70 |     # return gen_files(train_filepath)
71 | 
72 |     return build_dataset(
73 |         train_filepath,
74 |         spec,
75 |     )
76 | 
77 | 
78 | if __name__ == "__main__":
79 |     batch_size = 8
80 |     num_workers = tf.data.AUTOTUNE
81 |     data_dir = (
82 |         pathlib.Path(__file__).resolve().parents[2].joinpath(DATASET_LOC)
83 |     )
84 |     train_filepath = pathlib.Path(data_dir) / "imagenette2/train"
85 | 
86 |     tf_dataset = get_dataset(TFEvalSpec(1, 1))
87 | 
88 |     for i, x in enumerate(tf_dataset):
89 |         print(x)
90 |         # print(x.shape)
91 |         print(i)
92 |         break
93 | 


--------------------------------------------------------------------------------
/evaluation/plots/plot_scaling.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | import matplotlib.pyplot as plt
  3 | import seaborn as sns
  4 | 
  5 | 
  6 | set_data = {
  7 |     "set_procs": [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16],
  8 |     "observed_tput": [
  9 |         70.76981133,
 10 |         82.05666667,
 11 |         149.46,
 12 |         229.2966667,
 13 |         289.39,
 14 |         351.71,
 15 |         418.7233333,
 16 |         478.2433333,
 17 |         535.5066667,
 18 |         601.7,
 19 |         634.5366667,
 20 |         681.65,
 21 |         729.01,
 22 |         757.992,
 23 |         791.775,
 24 |         820.474,
 25 |         862.17,
 26 |     ],
 27 | }
 28 | 
 29 | target_data = {
 30 |     "target_tput": [
 31 |         40,
 32 |         100,
 33 |         200,
 34 |         400,
 35 |         600,
 36 |     ],
 37 |     "observed_procs": [
 38 |         0,
 39 |         2,
 40 |         3,
 41 |         6,
 42 |         10,
 43 |     ],
 44 | }
 45 | 
 46 | f = plt.figure(figsize=(3.33, 1.8), dpi=600)
 47 | 
 48 | # Line plot for the set data
 49 | ax = sns.lineplot(
 50 |     x="set_procs",
 51 |     y="observed_tput",
 52 |     data=pd.DataFrame(set_data),
 53 |     color="blue",
 54 |     label="observed throughput",
 55 |     linewidth=1,
 56 | )
 57 | ax.set_ylabel("Throughput (samples/s)", fontsize=6, labelpad=2)
 58 | ax.set_xlabel("Distributed Processes", fontsize=6, labelpad=2)
 59 | ax.tick_params(axis="x", direction="out", length=2, color="black")
 60 | ax.tick_params(axis="y", direction="out", length=2, color="black")
 61 | # set tick labels to small
 62 | plt.xticks(fontsize=6)
 63 | plt.yticks(fontsize=6)
 64 | 
 65 | # Add x and y axis grid lines
 66 | ax.yaxis.grid(color="lightgray", linestyle="--", linewidth=0.5)
 67 | ax.xaxis.grid(color="lightgray", linestyle="--", linewidth=0.5)
 68 | 
 69 | ax.set_xlim(-1, 16)
 70 | ax.set_ylim(-50, 900)
 71 | 
 72 | # Draw horizontal line for target throughput
 73 | for i in range(len(target_data["target_tput"])):
 74 |     # Make the line go from the left to where the corresponding observed_procs is
 75 |     ax.axhline(
 76 |         target_data["target_tput"][i],
 77 |         xmin=0,
 78 |         xmax=(target_data["observed_procs"][i] + 1) / 17,
 79 |         color="red",
 80 |         linewidth=0.4,
 81 |         linestyle="-",
 82 |         label="target throughput",
 83 |     )
 84 |     # Draw the vertial line for the observed_procs
 85 |     ax.axvline(
 86 |         target_data["observed_procs"][i],
 87 |         ymin=0,
 88 |         ymax=(target_data["target_tput"][i] + 50) / 950,
 89 |         color="red",
 90 |         linewidth=0.4,
 91 |         linestyle="-",
 92 |     )
 93 | 
 94 | 
 95 | # Change legend to say "target throughput"
 96 | handles, labels = ax.get_legend_handles_labels()
 97 | # Don't show the legend for the set data
 98 | ax.legend(
 99 |     handles=handles,
100 |     labels=["Observed Throughput", "Target Throughput and Tuned Scale"],
101 |     fontsize=6,
102 |     title_fontsize="6",
103 | )
104 | 
105 | # Reduce pad between axis and labels
106 | ax.tick_params(axis="both", which="major", pad=2)
107 | 
108 | 
109 | 
110 | 
111 | plt.tight_layout()
112 | # ax.legend(fontsize=6, title_fontsize='6')
113 | f.savefig("scaling.png", bbox_inches="tight")
114 | 


--------------------------------------------------------------------------------