├── .github └── workflows │ └── main.yml ├── .gitignore ├── .gitmodules ├── .nojekyll ├── LICENSE ├── README.md ├── assets ├── clippy.png ├── clippy.psd ├── headline.svg ├── logo.ai ├── logo.png ├── logo.svg └── logo.xcf ├── docker └── Dockerfile ├── docs ├── .nojekyll ├── Makefile ├── _static │ ├── benchmark_results.svg │ ├── benchmarking_results.svg │ ├── benchmarking_results_t.svg │ ├── clippy-transparent-2.png │ ├── clippy-transparent.png │ ├── clippy.png │ ├── dataset_sizes.svg │ ├── headline.svg │ ├── perf_scatterplot.svg │ ├── slipstream.ttf │ └── style.css ├── api │ ├── decoders.rst │ ├── fields.rst │ ├── loader.rst │ ├── transforms.rst │ └── writer.rst ├── api_reference.rst ├── basics.rst ├── benchmarks.rst ├── bottleneck_doctor.rst ├── conf.py ├── examples.rst ├── ffcv_examples │ ├── cifar10.rst │ ├── custom_transforms.rst │ ├── imagenet.rst │ ├── linear_regression.rst │ └── transform_with_inds.rst ├── index.rst ├── make.bat ├── making_dataloaders.rst ├── parameter_tuning.rst ├── performance_guide.rst ├── quickstart.rst ├── working_with_images.rst └── writing_datasets.rst ├── examples ├── cifar │ ├── default_config.yaml │ ├── train_cifar.py │ ├── train_cifar.sh │ └── write_datasets.py └── docs_examples │ ├── custom_transform.py │ ├── linear_regression.py │ └── transform_with_inds.py ├── ffcv-conda.yml ├── ffcv ├── .DS_Store ├── __init__.py ├── benchmarks │ ├── __init__.py │ ├── __main__.py │ ├── benchmark.py │ ├── decorator.py │ └── suites │ │ ├── __init__.py │ │ ├── image_read.py │ │ ├── jpeg_decode.py │ │ └── memory_read.py ├── fields │ ├── __init__.py │ ├── base.py │ ├── basics.py │ ├── bytes.py │ ├── decoders.py │ ├── json.py │ ├── ndarray.py │ └── rgb_image.py ├── libffcv.py ├── loader │ ├── __init__.py │ ├── epoch_iterator.py │ └── loader.py ├── memory_allocator.py ├── memory_managers │ ├── __init__.py │ ├── base.py │ ├── common.py │ ├── os_cache.py │ └── process_cache │ │ ├── __init__.py │ │ ├── context.py │ │ ├── manager.py │ │ ├── page_reader.py │ │ └── schedule.py ├── pipeline │ ├── __init__.py │ ├── allocation_query.py │ ├── compiler.py │ ├── graph.py │ ├── operation.py │ ├── pipeline.py │ ├── pipeline_spec.py │ └── state.py ├── reader.py ├── transforms │ ├── __init__.py │ ├── color_jitter.py │ ├── common.py │ ├── cutout.py │ ├── flip.py │ ├── mixup.py │ ├── module.py │ ├── normalize.py │ ├── ops.py │ ├── poisoning.py │ ├── random_resized_crop.py │ ├── replace_label.py │ ├── translate.py │ └── utils │ │ ├── __init__.py │ │ └── fast_crop.py ├── traversal_order │ ├── __init__.py │ ├── base.py │ ├── quasi_random.py │ ├── random.py │ └── sequential.py ├── types.py ├── utils.py └── writer.py ├── install.sh ├── libffcv └── libffcv.cpp ├── mypy.ini ├── setup.py ├── test_data └── pig.png └── tests ├── test_array_field.py ├── test_augmentations.py ├── test_basic_pipeline.py ├── test_cuda_nonblocking.py ├── test_custom_field.py ├── test_image_normalization.py ├── test_image_pipeline.py ├── test_image_read.py ├── test_json_field.py ├── test_loader_filter.py ├── test_memcpy.py ├── test_memory_allocation.py ├── test_memory_leak.py ├── test_memory_reader.py ├── test_partial_batches.py ├── test_partial_pipeline.py ├── test_rrc.py ├── test_traversal_orders.py ├── test_webdataset.py └── test_writer.py /.github/workflows/main.yml: -------------------------------------------------------------------------------- 1 | # This is a basic workflow to help you get started with Actions 2 | 3 | name: Push Docs 4 | 5 | # Controls when the action will run. 6 | on: 7 | # Triggers the workflow on push or pull request events but only for the main branch 8 | push: 9 | branches: [ main ] 10 | 11 | # Allows you to run this workflow manually from the Actions tab 12 | workflow_dispatch: 13 | 14 | # A workflow run is made up of one or more jobs that can run sequentially or in parallel 15 | jobs: 16 | # This workflow contains a single job called "build" 17 | build: 18 | # The type of runner that the job will run on 19 | runs-on: ubuntu-latest 20 | 21 | # Steps represent a sequence of tasks that will be executed as part of the job 22 | steps: 23 | # Checks-out your repository under $GITHUB_WORKSPACE, so your job can access it 24 | - uses: actions/checkout@v2 25 | 26 | # Runs a set of commands using the runners shell 27 | - name: Build and upload the docs 28 | run: | 29 | mv docs docs_src 30 | cd docs_src 31 | pip install -U sphinx==6.0.0 karma-sphinx-theme 32 | pip install -U numpy numba tqdm 33 | pip install --upgrade -U pygments 34 | make html 35 | cp -r _build/html ../docs 36 | cp ../.nojekyll ../docs/.nojekyll 37 | echo docs.ffcv.io > ../docs/CNAME 38 | git branch -D ghpages || echo "branch exists" 39 | git checkout -B ghpages 40 | cd .. 41 | rm -f .gitmodules 42 | rm -rf examples/imagenet-example 43 | git config --global user.email "ailyas@mit.edu" 44 | git config --global user.name "Andrew Ilyas" 45 | git add --force docs 46 | git commit -m generate docs 47 | git push --force -u origin ghpages 48 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | src 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | .vscode 8 | 9 | # C extensions 10 | *.so 11 | 12 | # Distribution / packaging 13 | .Python 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | share/python-wheels/ 27 | *.egg-info/ 28 | .installed.cfg 29 | *.egg 30 | MANIFEST 31 | 32 | # PyInstaller 33 | # Usually these files are written by a python script from a template 34 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 35 | *.manifest 36 | *.spec 37 | 38 | # Installer logs 39 | pip-log.txt 40 | pip-delete-this-directory.txt 41 | 42 | # Unit test / coverage reports 43 | htmlcov/ 44 | .tox/ 45 | .nox/ 46 | .coverage 47 | .coverage.* 48 | .cache 49 | nosetests.xml 50 | coverage.xml 51 | *.cover 52 | *.py,cover 53 | .hypothesis/ 54 | .pytest_cache/ 55 | cover/ 56 | 57 | # Translations 58 | *.mo 59 | *.pot 60 | 61 | # Django stuff: 62 | *.log 63 | local_settings.py 64 | db.sqlite3 65 | db.sqlite3-journal 66 | 67 | # Flask stuff: 68 | instance/ 69 | .webassets-cache 70 | 71 | # Scrapy stuff: 72 | .scrapy 73 | 74 | # Sphinx documentation 75 | docs/_build/ 76 | 77 | # PyBuilder 78 | .pybuilder/ 79 | target/ 80 | 81 | # Jupyter Notebook 82 | .ipynb_checkpoints 83 | 84 | # IPython 85 | profile_default/ 86 | ipython_config.py 87 | 88 | # pyenv 89 | # For a library or package, you might want to ignore these files since the code is 90 | # intended to run in multiple environments; otherwise, check them in: 91 | # .python-version 92 | 93 | # pipenv 94 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 95 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 96 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 97 | # install all needed dependencies. 98 | #Pipfile.lock 99 | 100 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 101 | __pypackages__/ 102 | 103 | # Celery stuff 104 | celerybeat-schedule 105 | celerybeat.pid 106 | 107 | # SageMath parsed files 108 | *.sage.py 109 | 110 | # Environments 111 | .env 112 | .venv 113 | env/ 114 | venv/ 115 | ENV/ 116 | env.bak/ 117 | venv.bak/ 118 | 119 | # Spyder project settings 120 | .spyderproject 121 | .spyproject 122 | 123 | # Rope project settings 124 | .ropeproject 125 | 126 | # mkdocs documentation 127 | /site 128 | 129 | # mypy 130 | .mypy_cache/ 131 | .dmypy.json 132 | dmypy.json 133 | 134 | # Pyre type checker 135 | .pyre/ 136 | 137 | # pytype static type analyzer 138 | .pytype/ 139 | 140 | # Cython debug symbols 141 | cython_debug/ 142 | 143 | .idea/ 144 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "examples/imagenet-example"] 2 | path = examples/imagenet-example 3 | url = git@github.com:libffcv/ffcv-imagenet.git 4 | -------------------------------------------------------------------------------- /.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/.nojekyll -------------------------------------------------------------------------------- /assets/clippy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/assets/clippy.png -------------------------------------------------------------------------------- /assets/clippy.psd: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/assets/clippy.psd -------------------------------------------------------------------------------- /assets/logo.ai: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/assets/logo.ai -------------------------------------------------------------------------------- /assets/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/assets/logo.png -------------------------------------------------------------------------------- /assets/logo.xcf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/assets/logo.xcf -------------------------------------------------------------------------------- /docker/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM pytorch/pytorch:latest 2 | 3 | RUN apt-get update && apt-get install -y --no-install-recommends \ 4 | software-properties-common \ 5 | build-essential \ 6 | curl \ 7 | git \ 8 | ffmpeg 9 | 10 | RUN conda update conda && \ 11 | conda create -n ffcv python=3.9 \ 12 | cupy \ 13 | pkg-config \ 14 | compilers \ 15 | libjpeg-turbo \ 16 | opencv \ 17 | pytorch \ 18 | torchvision \ 19 | cudatoolkit=11.3 \ 20 | numba -c pytorch -c conda-forge 21 | 22 | RUN echo "source activate" >> ~/.bashrc 23 | RUN echo "conda activate ffcv" >> ~/.bashrc 24 | 25 | RUN git clone https://github.com/libffcv/ffcv.git 26 | 27 | RUN conda run -n ffcv pip install ffcv 28 | 29 | # To test: 30 | # 1- build the Dockerfile (e.g. docker build -t ffcv .) 31 | # 2- login to the docker container (e.g. docker run -it --gpus all ffcv bash) 32 | # 3- cd ffcv/examples/cifar 33 | # 4- bash train_cifar.sh 34 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/docs/.nojekyll -------------------------------------------------------------------------------- /docs/Makefile: -------------------------------------------------------------------------------- 1 | # Minimal makefile for Sphinx documentation 2 | # 3 | 4 | # You can set these variables from the command line, and also 5 | # from the environment for the first two. 6 | SPHINXOPTS ?= 7 | SPHINXBUILD ?= sphinx-build 8 | SOURCEDIR = . 9 | BUILDDIR = _build 10 | 11 | # Put it first so that "make" without argument is like "make help". 12 | help: 13 | @$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 14 | 15 | .PHONY: help Makefile 16 | 17 | # Catch-all target: route all unknown targets to Sphinx using the new 18 | # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). 19 | %: Makefile 20 | @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O) 21 | -------------------------------------------------------------------------------- /docs/_static/clippy-transparent-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/docs/_static/clippy-transparent-2.png -------------------------------------------------------------------------------- /docs/_static/clippy-transparent.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/docs/_static/clippy-transparent.png -------------------------------------------------------------------------------- /docs/_static/clippy.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/docs/_static/clippy.png -------------------------------------------------------------------------------- /docs/_static/slipstream.ttf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/docs/_static/slipstream.ttf -------------------------------------------------------------------------------- /docs/_static/style.css: -------------------------------------------------------------------------------- 1 | .viewcode-link{ 2 | float: right; 3 | } 4 | 5 | header { 6 | padding: 0; 7 | } 8 | 9 | .site-title{ 10 | font-family: 'Slipstream Forward', sans-serif; 11 | font-size: 4rem !important; 12 | } 13 | 14 | @font-face { 15 | font-family: 'Slipstream Forward'; 16 | src: url('slipstream.ttf') format('truetype'); 17 | } -------------------------------------------------------------------------------- /docs/api/decoders.rst: -------------------------------------------------------------------------------- 1 | ffcv.fields.decoders module 2 | =========================== 3 | 4 | .. automodule:: ffcv.fields.decoders 5 | :members: -------------------------------------------------------------------------------- /docs/api/fields.rst: -------------------------------------------------------------------------------- 1 | ffcv.fields module 2 | ====================== 3 | 4 | Fields define the type and the storage method of each of the attributes of a 5 | training sample. See the :ref:`Writing a dataset to FFCV format` and 6 | :ref:`Making an FFCV dataloader` guides for information on usage. 7 | 8 | .. automodule:: ffcv.fields 9 | :members: -------------------------------------------------------------------------------- /docs/api/loader.rst: -------------------------------------------------------------------------------- 1 | ffcv.loader module 2 | ====================== 3 | 4 | .. automodule:: ffcv.loader 5 | :members: -------------------------------------------------------------------------------- /docs/api/transforms.rst: -------------------------------------------------------------------------------- 1 | ffcv.transforms module 2 | ====================== 3 | 4 | .. automodule:: ffcv.transforms 5 | :members: -------------------------------------------------------------------------------- /docs/api/writer.rst: -------------------------------------------------------------------------------- 1 | ffcv.writer module 2 | ====================== 3 | 4 | .. automodule:: ffcv.writer 5 | :members: -------------------------------------------------------------------------------- /docs/api_reference.rst: -------------------------------------------------------------------------------- 1 | API Reference 2 | ============= 3 | 4 | .. toctree:: 5 | 6 | api/writer 7 | api/transforms 8 | api/loader 9 | api/fields 10 | api/decoders -------------------------------------------------------------------------------- /docs/basics.rst: -------------------------------------------------------------------------------- 1 | Getting started 2 | =============== 3 | 4 | Working with FFCV just requires two steps: 5 | 6 | 1. :ref:`Converting an existing dataset into FFCV format`, and 7 | 2. :ref:`Making FFCV data loaders `. 8 | 9 | In this section, we'll go over these steps one by one, and go from scratch to a 10 | complete data loader ready for training. 11 | 12 | .. toctree:: 13 | writing_datasets 14 | making_dataloaders 15 | :maxdepth: 2 -------------------------------------------------------------------------------- /docs/bottleneck_doctor.rst: -------------------------------------------------------------------------------- 1 | The Bottleneck Doctor 2 | ====================== 3 | .. image:: /_static/clippy-transparent-2.png 4 | :width: 100% 5 | 6 | To summarize the scenarios from the :ref:`Tuning Guide`, we provide a map from a 7 | type of *system bottleneck* to the FFCV options that will help get the most 8 | performance out of your system: 9 | 10 | Disk-read bottlenecks 11 | --------------------- 12 | What if your GPUs sit idle from low disk or throughput? 13 | Maybe you're reading from a networked drive, maybe you have too many GPUs; 14 | either way, try: 15 | 16 | - If your dataset fits in memory, use **OS-level page caching** (enabled by 17 | default in FFCV) to ensure that concurrent training executions properly 18 | exploit caching. 19 | - If your dataset does not fit in memory, use **process-level page caching**, 20 | (enabled by setting ``os_cache=False`` when constructing the 21 | :class:`ffcv.loader.Loader`) to avoid caching the entire dataset at once. 22 | - Especially when using process-level caching, consider using the **quasi-random 23 | data sampler**, enabled using the ``order=OrderOption.QUASI_RANDOM`` argument to 24 | the :class:`~ffcv.loader.Loader` constructor. Quasi-random sampling tries to 25 | imitate random sampling while minimizing the underlying number of disk reads. 26 | (Again, note that ``QUASI_RANDOM`` is not yet supported for distributed training.) 27 | - Another option for computer vision datasets is **storing resized images**: many 28 | datasets have gigantic images that end up being resized and cropped anyways in 29 | the data augmentation pipeline. You can avoid paying the cost of loading these 30 | giant images by writing them to an appropriate side length in the first place 31 | with :class:`ffcv.writer.DatasetWriter` (see the :ref:`Working with Image Data in FFCV` guide) 32 | - Similarly, you can **store images in JPEG format** to save both disk space and 33 | reading time, and lower serialized JPEG quality to decrease storage sizes. 34 | 35 | CPU bottlenecks 36 | --------------- 37 | All CPUs at 100% and you're still not hitting maximal GPU usage? Consider the 38 | following: 39 | 40 | - Use pre-made, **JIT-compiled augmentations** from :mod:`ffcv.transforms`: these 41 | augmentations use pre-allocated pinned memory, and are fused together and 42 | compiled to machine code at runtime, making them a much faster alternative to 43 | standard data augmentation functions. 44 | - **Make your own** JIT-compiled augmentations: If you don't see your desired 45 | augmentation among the pre-implemented ones, implementing your own efficient 46 | augmentation is simple and only requires implementing a single Python 47 | function. See any of the existing augmentations for an example, or read the 48 | `Customization guide <#>`_ (coming soon!) for a tutorial. 49 | - *Store (some) raw pixel data*: FFCV allows you to smoothly 50 | trade off I/O workload and compute workload (raw pixels require no JPEG decoding) by 51 | randomly storing a specified fraction of the dataset as raw pixel data instead 52 | of JPEG. 53 | 54 | GPU bottlenecks 55 | --------------- 56 | Even if you're not bottlenecked by data loading, FFCV can still help you 57 | accelerate your system: 58 | 59 | - **Asynchronous CPU-GPU data transfer**: we always asynchronously transfer 60 | data, and also include tools for ensuring unblocked GPU execution. 61 | - **Train multiple models on the same GPU**: Fully 62 | asynchronous thread-based data loading means that unlike for other data loading 63 | systems, different training processes using FFCV running on the same GPU won't 64 | block each other. 65 | - **Offload compute to the CPU**: because FFCV offer extremely fast JIT-compiled 66 | data transformations, it's often helpful to move parts of the data pipeline (e.g., 67 | input normalization or image augmentation) to CPU; FFCV will handle compilation 68 | and parallelization of these functions so that the CPU-induced slowdown isn't too 69 | much, and the freed-up GPU time can be used for more GPU-intensive tasks (e.g., 70 | matrix multiplication). 71 | 72 | .. note:: 73 | 74 | This list is limited to what FFCV offers in data loading; check out 75 | guides like `the PyTorch performance guide 76 | `_ for more 77 | model-based ways to speed up training. 78 | -------------------------------------------------------------------------------- /docs/conf.py: -------------------------------------------------------------------------------- 1 | # Configuration file for the Sphinx documentation builder. 2 | # 3 | # This file only contains a selection of the most common options. For a full 4 | # list see the documentation: 5 | # https://www.sphinx-doc.org/en/master/usage/configuration.html 6 | 7 | # -- Path setup -------------------------------------------------------------- 8 | 9 | # If extensions (or modules to document with autodoc) are in another directory, 10 | # add these directories to sys.path here. If the directory is relative to the 11 | # documentation root, use os.path.abspath to make it absolute, like shown here. 12 | # 13 | import os 14 | import sys 15 | sys.path.insert(0, os.path.abspath('..')) 16 | 17 | 18 | # -- Project information ----------------------------------------------------- 19 | 20 | project = 'FFCV' 21 | copyright = '2022, ffcv' 22 | author = 'ffcv' 23 | 24 | 25 | # -- General configuration --------------------------------------------------- 26 | 27 | # Add any Sphinx extension module names here, as strings. They can be 28 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom 29 | # ones. 30 | extensions = [ 31 | 'sphinx.ext.autodoc', 32 | 'sphinx.ext.autosummary', 33 | 'sphinx.ext.napoleon', 34 | 'sphinx.ext.viewcode', 35 | 'sphinx.ext.autosectionlabel' 36 | ] 37 | 38 | # Add any paths that contain templates here, relative to this directory. 39 | templates_path = ['_templates'] 40 | 41 | # List of patterns, relative to source directory, that match files and 42 | # directories to ignore when looking for source files. 43 | # This pattern also affects html_static_path and html_extra_path. 44 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] 45 | 46 | 47 | autodoc_mock_imports = ['torch', 'torchvision', 'cv2', 'PIL', 'ffcv.libffcv'] 48 | autodoc_member_order = 'bysource' 49 | 50 | # -- Options for HTML output ------------------------------------------------- 51 | 52 | # The theme to use for HTML and HTML Help pages. See the documentation for 53 | # a list of builtin themes. 54 | # 55 | html_theme = 'karma_sphinx_theme' 56 | autodoc_default_options = { 57 | 'undoc-members': False, 58 | } 59 | 60 | # Add any paths that contain custom static files (such as style sheets) here, 61 | # relative to this directory. They are copied after the builtin static files, 62 | # so a file named "default.css" will overwrite the builtin "default.css". 63 | html_static_path = ['_static'] 64 | 65 | html_css_files = [ 66 | 'style.css', 67 | ] -------------------------------------------------------------------------------- /docs/examples.rst: -------------------------------------------------------------------------------- 1 | Examples 2 | ========= 3 | 4 | First two examples use FFCV to train machine learning models 5 | to illustrate its ease of use as well as the significant gains in speed. 6 | The next two examples cover how to use **custom transforms** in data loaders. 7 | 8 | 9 | Finally, we provide a self-contained example repository for training ImageNet 10 | quickly and accurately. 11 | 12 | .. toctree:: 13 | ffcv_examples/cifar10.rst 14 | ffcv_examples/linear_regression.rst 15 | ffcv_examples/custom_transforms.rst 16 | ffcv_examples/transform_with_inds.rst 17 | ffcv_examples/imagenet.rst 18 | :maxdepth: 1 -------------------------------------------------------------------------------- /docs/ffcv_examples/imagenet.rst: -------------------------------------------------------------------------------- 1 | ImageNet Fast Training 2 | ====================== 3 | 4 | As a larger-scale example of how FFCV can be used for accelerated machine 5 | learning, we provide an entirely 6 | `self-contained ImageNet example `_ 7 | that allows us to train ImageNet models quickly and accurately: 8 | 9 | .. image:: /_static/perf_scatterplot.svg 10 | :width: 100% 11 | 12 | See the README.md file of the linked repository for more details. -------------------------------------------------------------------------------- /docs/ffcv_examples/transform_with_inds.rst: -------------------------------------------------------------------------------- 1 | Custom transforms with indices 2 | =============================== 3 | 4 | Another invaluable feature of FFCV transforms is that, by assigning the 5 | ``with_indices`` property of the transformation function (so below, by setting 6 | ``corrupt_fixed.with_indices=True``), we get access to a *third* transform 7 | argument that contains the index of each image in the batch within the dataset. 8 | This feature makes it possible to implement transforms in FFCV that are not 9 | possible in standard PyTorch: for example, we can implement an augmentation that 10 | corrupts the labels of a *fixed* set of images throughout training. 11 | 12 | .. code-block:: python 13 | 14 | class CorruptFixedLabels(Operation): 15 | def generate_code(self) -> Callable: 16 | parallel_range = Compiler.get_iterator() 17 | # dst will be None since we don't ask for an allocation 18 | def corrupt_fixed(labs, _, inds): 19 | for i in parallel_range(labs.shape[0]): 20 | # Because the random seed is tied to the image index, the 21 | # same images will be corrupted every epoch: 22 | np.random.seed(inds[i]) 23 | if np.random.rand() < 0.05: 24 | # They will also be corrupted to a deterministic label: 25 | labs[i] = np.random.randint(low=0, high=10) 26 | return labs 27 | 28 | corrupt_fixed.is_parallel = True 29 | corrupt_fixed.with_indices = True 30 | return corrupt_fixed 31 | 32 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 33 | # No updates to state or extra memory necessary! 34 | return previous_state, None 35 | 36 | We provide the corresponding script to test the above augmentation `here `_. -------------------------------------------------------------------------------- /docs/make.bat: -------------------------------------------------------------------------------- 1 | @ECHO OFF 2 | 3 | pushd %~dp0 4 | 5 | REM Command file for Sphinx documentation 6 | 7 | if "%SPHINXBUILD%" == "" ( 8 | set SPHINXBUILD=sphinx-build 9 | ) 10 | set SOURCEDIR=. 11 | set BUILDDIR=_build 12 | 13 | if "%1" == "" goto help 14 | 15 | %SPHINXBUILD% >NUL 2>NUL 16 | if errorlevel 9009 ( 17 | echo. 18 | echo.The 'sphinx-build' command was not found. Make sure you have Sphinx 19 | echo.installed, then set the SPHINXBUILD environment variable to point 20 | echo.to the full path of the 'sphinx-build' executable. Alternatively you 21 | echo.may add the Sphinx directory to PATH. 22 | echo. 23 | echo.If you don't have Sphinx installed, grab it from 24 | echo.https://www.sphinx-doc.org/ 25 | exit /b 1 26 | ) 27 | 28 | %SPHINXBUILD% -M %1 %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 29 | goto end 30 | 31 | :help 32 | %SPHINXBUILD% -M help %SOURCEDIR% %BUILDDIR% %SPHINXOPTS% %O% 33 | 34 | :end 35 | popd 36 | -------------------------------------------------------------------------------- /docs/performance_guide.rst: -------------------------------------------------------------------------------- 1 | Performance Guide 2 | ================= 3 | 4 | In this section we'll go into a bit more detail about how to get the most out of 5 | FFCV. 6 | 7 | In :ref:`Working with Image Data in FFCV`, we give more details on working with image data, 8 | including how to fine-tune to your specific resource requirements. 9 | 10 | In the :ref:`Tuning Guide`, we give recommendations on how to optimize your 11 | system for a couple of common use cases. 12 | 13 | In :ref:`The Bottleneck Doctor`, we distill the concepts from the tuning 14 | guide into a general map from the scarcest system resources to FFCV options. 15 | 16 | .. toctree:: 17 | working_with_images 18 | parameter_tuning 19 | bottleneck_doctor 20 | :maxdepth: 2 21 | -------------------------------------------------------------------------------- /docs/quickstart.rst: -------------------------------------------------------------------------------- 1 | Quickstart 2 | =========== 3 | 4 | Accelerate *any* learning system with `ffcv`: getting started takes just a few 5 | lines of code! 6 | First, convert your dataset into `ffcv` format (`ffcv` converts both indexed 7 | PyTorch datasets and `WebDatasets `_): 8 | 9 | .. code-block:: python 10 | 11 | from ffcv.writer import DatasetWriter 12 | from ffcv.fields import RGBImageField, IntField 13 | 14 | # Your dataset (`torch.utils.data.Dataset`) of (image, label) pairs 15 | my_dataset = make_my_dataset() 16 | write_path = '/output/path/for/converted/ds.beton' 17 | 18 | # Pass a type for each data field 19 | writer = DatasetWriter(write_path, { 20 | # Tune options to optimize dataset size, throughput at train-time 21 | 'image': RGBImageField( 22 | max_resolution=256 23 | ), 24 | 'label': IntField() 25 | }) 26 | 27 | # Write dataset 28 | writer.from_indexed_dataset(my_dataset) 29 | 30 | Then replace your old loader with the `ffcv` loader at train time (in PyTorch, 31 | no other changes required!): 32 | 33 | .. code-block:: python 34 | 35 | from ffcv.loader import Loader, OrderOption 36 | from ffcv.transforms import ToTensor, ToDevice, ToTorchImage, Cutout 37 | from ffcv.fields.decoders import IntDecoder, RandomResizedCropRGBImageDecoder 38 | 39 | # Random resized crop 40 | decoder = RandomResizedCropRGBImageDecoder((224, 224)) 41 | 42 | # Data decoding and augmentation 43 | image_pipeline = [decoder, Cutout(), ToTensor(), ToTorchImage(), ToDevice(0)] 44 | label_pipeline = [IntDecoder(), ToTensor(), ToDevice(0)] 45 | 46 | # Pipeline for each data field 47 | pipelines = { 48 | 'image': image_pipeline, 49 | 'label': label_pipeline 50 | } 51 | 52 | # Replaces PyTorch data loader (`torch.utils.data.Dataloader`) 53 | loader = Loader(write_path, batch_size=bs, num_workers=num_workers, 54 | order=OrderOption.RANDOM, pipelines=pipelines) 55 | 56 | # rest of training / validation proceeds identically 57 | for epoch in range(epochs): 58 | ... 59 | 60 | See :ref:`here ` for a more detailed guide to deploying `ffcv` for your dataset. 61 | -------------------------------------------------------------------------------- /examples/cifar/default_config.yaml: -------------------------------------------------------------------------------- 1 | data: 2 | gpu: 0 3 | num_workers: 8 4 | train_dataset: /tmp/cifar_train.beton 5 | val_dataset: /tmp/cifar_test.beton 6 | training: 7 | batch_size: 512 8 | epochs: 24 9 | lr: 0.5 10 | momentum: 0.9 11 | lr_peak_epoch: 5 12 | momentum: 0.9 13 | weight_decay: 5e-4 14 | label_smoothing: 0.1 15 | lr_tta: true 16 | num_workers: 8 -------------------------------------------------------------------------------- /examples/cifar/train_cifar.sh: -------------------------------------------------------------------------------- 1 | python write_datasets.py --config-file default_config.yaml 2 | 3 | python train_cifar.py --config-file default_config.yaml -------------------------------------------------------------------------------- /examples/cifar/write_datasets.py: -------------------------------------------------------------------------------- 1 | from argparse import ArgumentParser 2 | from typing import List 3 | import time 4 | import numpy as np 5 | from tqdm import tqdm 6 | 7 | import torch as ch 8 | import torchvision 9 | 10 | from fastargs import get_current_config 11 | from fastargs.decorators import param 12 | from fastargs import Param, Section 13 | from fastargs.validation import And, OneOf 14 | 15 | from ffcv.writer import DatasetWriter 16 | from ffcv.fields import IntField, RGBImageField 17 | 18 | Section('data', 'arguments to give the writer').params( 19 | train_dataset=Param(str, 'Where to write the new dataset', required=True), 20 | val_dataset=Param(str, 'Where to write the new dataset', required=True), 21 | ) 22 | 23 | @param('data.train_dataset') 24 | @param('data.val_dataset') 25 | def main(train_dataset, val_dataset): 26 | datasets = { 27 | 'train': torchvision.datasets.CIFAR10('/tmp', train=True, download=True), 28 | 'test': torchvision.datasets.CIFAR10('/tmp', train=False, download=True) 29 | } 30 | 31 | for (name, ds) in datasets.items(): 32 | path = train_dataset if name == 'train' else val_dataset 33 | writer = DatasetWriter(path, { 34 | 'image': RGBImageField(), 35 | 'label': IntField() 36 | }) 37 | writer.from_indexed_dataset(ds) 38 | 39 | 40 | if __name__ == "__main__": 41 | config = get_current_config() 42 | parser = ArgumentParser(description='Fast CIFAR-10 training') 43 | config.augment_argparse(parser) 44 | config.collect_argparse_args(parser) 45 | config.validate(mode='stderr') 46 | config.summary() 47 | 48 | main() -------------------------------------------------------------------------------- /examples/docs_examples/custom_transform.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of defining a custom (image) transform using FFCV. 3 | For tutorial, see https://docs.ffcv.io/ffcv_examples/custom_transforms.html. 4 | 5 | """ 6 | import time 7 | import numpy as np 8 | import torchvision 9 | 10 | from ffcv.fields import IntField, RGBImageField 11 | from ffcv.fields.decoders import SimpleRGBImageDecoder 12 | from ffcv.loader import Loader, OrderOption 13 | from ffcv.pipeline.compiler import Compiler 14 | from ffcv.pipeline.operation import Operation, AllocationQuery 15 | from ffcv.transforms import ToTensor 16 | from ffcv.writer import DatasetWriter 17 | from dataclasses import replace 18 | 19 | class PickACorner(Operation): 20 | def generate_code(self): 21 | parallel_range = Compiler.get_iterator() 22 | def pick_a_corner(images, dst): 23 | which_corner = np.random.rand(images.shape[0]) 24 | for i in parallel_range(images.shape[0]): 25 | if which_corner[i] == 0: 26 | dst[i] = images[i,:images.shape[1]//2, :images.shape[2]//2] 27 | else: 28 | dst[i] = images[i,-images.shape[1]//2:, -images.shape[2]//2:] 29 | 30 | return dst 31 | 32 | pick_a_corner.is_parallel = True 33 | return pick_a_corner 34 | 35 | def declare_state_and_memory(self, previous_state): 36 | h, w, c = previous_state.shape 37 | new_shape = (h // 2, w // 2, c) 38 | 39 | new_state = replace(previous_state, shape=new_shape) 40 | mem_allocation = AllocationQuery(new_shape, previous_state.dtype) 41 | return (new_state, mem_allocation) 42 | 43 | # Step 1: Create an FFCV-compatible CIFAR-10 dataset 44 | ds = torchvision.datasets.CIFAR10('/tmp', train=True, download=True) 45 | writer = DatasetWriter('/tmp/cifar.beton', { 46 | 'image': RGBImageField(), 47 | 'label': IntField() 48 | }) 49 | writer.from_indexed_dataset(ds) 50 | 51 | # Step 2: Create data loaders 52 | BATCH_SIZE = 512 53 | # Create loaders 54 | image_pipelines = { 55 | 'with': [SimpleRGBImageDecoder(), PickACorner(), ToTensor()], 56 | 'without': [SimpleRGBImageDecoder(), ToTensor()] 57 | } 58 | 59 | for name, pipeline in image_pipelines.items(): 60 | loader = Loader(f'/tmp/cifar.beton', batch_size=BATCH_SIZE, 61 | num_workers=16, order=OrderOption.RANDOM, 62 | drop_last=True, pipelines={'image': pipeline}) 63 | 64 | # First epoch includes compilation time 65 | for ims, labs in loader: pass 66 | start_time = time.time() 67 | for _ in range(100): 68 | for ims, labs in loader: pass 69 | print(f'Method: {name} | Shape: {ims.shape} | Time per epoch: {(time.time() - start_time) / 100:.5f}s') -------------------------------------------------------------------------------- /examples/docs_examples/linear_regression.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of using FFCV to speed up large scale linear regression. 3 | For tutorial, see https://docs.ffcv.io/ffcv_examples/linear_regression.html. 4 | 5 | """ 6 | from tqdm import tqdm 7 | import time 8 | import numpy as np 9 | import pickle as pkl 10 | import torch as ch 11 | from torch.utils.data import TensorDataset, DataLoader 12 | from ffcv.fields import NDArrayField, FloatField 13 | from ffcv.fields.basics import FloatDecoder 14 | from ffcv.fields.decoders import NDArrayDecoder 15 | from ffcv.loader import Loader, OrderOption 16 | from ffcv.writer import DatasetWriter 17 | from ffcv.transforms import ToTensor, ToDevice, Squeeze 18 | import os 19 | 20 | # 1,000,000 inputs each of dimension 10,000 = 40GB of data 21 | N, D = 1000000, 10000 22 | USE_FFCV = True 23 | if not os.path.exists('/tmp/linreg_data.pkl'): 24 | X = np.random.rand(N, D).astype('float32') 25 | # Ground-truth vector 26 | W, b = np.random.rand(D).astype('float32'), np.random.rand() 27 | # Response variables 28 | Y = X @ W + b + np.random.randn(N).astype('float32') 29 | pkl.dump((X, W, b, Y), open('/tmp/linreg_data.pkl', 'wb')) 30 | elif not USE_FFCV: 31 | print('Loading from disk...') 32 | X, W, b, Y = pkl.load(open('/tmp/linreg_data.pkl', 'rb')) 33 | 34 | if USE_FFCV and not os.path.exists('/tmp/linreg_data.beton'): 35 | X, W, b, Y = pkl.load(open('/tmp/linreg_data.pkl', 'rb')) 36 | class LinearRegressionDataset: 37 | def __getitem__(self, idx): 38 | return (X[idx], np.array(Y[idx]).astype('float32')) 39 | 40 | def __len__(self): 41 | return len(X) 42 | 43 | writer = DatasetWriter('/tmp/linreg_data.beton', { 44 | 'covariate': NDArrayField(shape=(D,), dtype=np.dtype('float32')), 45 | 'label': NDArrayField(shape=(1,), dtype=np.dtype('float32')), 46 | }, num_workers=16) 47 | 48 | writer.from_indexed_dataset(LinearRegressionDataset()) 49 | else: 50 | print('FFCV file already written') 51 | 52 | 53 | ### PART 2: actual regression 54 | 55 | if not USE_FFCV: 56 | dataset = TensorDataset(ch.tensor(X), ch.tensor(Y)) 57 | train_loader = DataLoader(dataset, batch_size=2048, num_workers=8, shuffle=True) 58 | else: 59 | train_loader = Loader('/tmp/linreg_data.beton', batch_size=2048, 60 | num_workers=8, order=OrderOption.QUASI_RANDOM, os_cache=False, 61 | pipelines={ 62 | 'covariate': [NDArrayDecoder(), ToTensor(), ToDevice(ch.device('cuda:0'))], 63 | 'label': [NDArrayDecoder(), ToTensor(), Squeeze(), ToDevice(ch.device('cuda:0'))] 64 | }) 65 | 66 | # Calculate data mean and variance for normalization 67 | def calculate_stats(loader, N): 68 | mean, stdev = 0., 0. 69 | for x_batch, _ in tqdm(loader): 70 | mean += x_batch.sum(0) / N 71 | stdev += x_batch.pow(2).sum(0) / N 72 | return mean, ch.sqrt(stdev - mean.pow(2)) 73 | 74 | mean, stdev = calculate_stats(train_loader, N) 75 | mean, stdev = mean.cuda(), stdev.cuda() 76 | w_est, b_est = ch.zeros(D).cuda(), ch.zeros(1).cuda() # Initial guess for W 77 | num_epochs = 10 # Number of full passes over the data to do 78 | 79 | lr = 5e-2 80 | for _ in range(num_epochs): 81 | total_loss, num_examples = 0., 0. 82 | start_time = time.time() 83 | for (x_batch, y_batch) in tqdm(train_loader): 84 | if not USE_FFCV: 85 | x_batch = x_batch.cuda() 86 | y_batch = y_batch.cuda() 87 | # Normalize the data for stability 88 | x_batch = (x_batch - mean) / stdev 89 | residual = x_batch @ w_est + b_est - y_batch 90 | # Gradients 91 | w_grad = x_batch.T @ residual / x_batch.shape[0] 92 | b_grad = ch.mean(residual, dim=0) 93 | w_est = w_est - lr * w_grad 94 | b_est = b_est - lr * b_grad 95 | total_loss += residual.pow(2).sum() 96 | num_examples += x_batch.shape[0] 97 | print('Epoch time:', time.time() - start_time) 98 | print(f'Average loss: {total_loss / num_examples:.3f}') 99 | -------------------------------------------------------------------------------- /examples/docs_examples/transform_with_inds.py: -------------------------------------------------------------------------------- 1 | """ 2 | Example of defining a custom (image) transform using FFCV. 3 | For tutorial, see https://docs.ffcv.io/ffcv_examples/transform_with_inds.html. 4 | 5 | """ 6 | from dataclasses import replace 7 | import time 8 | from typing import Callable, Optional, Tuple 9 | import numpy as np 10 | import torchvision 11 | 12 | from ffcv.fields import IntField, RGBImageField 13 | from ffcv.fields.decoders import IntDecoder 14 | from ffcv.loader import Loader, OrderOption 15 | from ffcv.pipeline.compiler import Compiler 16 | from ffcv.pipeline.operation import Operation, AllocationQuery 17 | from ffcv.pipeline.state import State 18 | from ffcv.transforms import ToTensor 19 | from ffcv.writer import DatasetWriter 20 | 21 | 22 | class CorruptFixedLabels(Operation): 23 | def generate_code(self) -> Callable: 24 | # dst will be None since we don't ask for an allocation 25 | parallel_range = Compiler.get_iterator() 26 | def corrupt_fixed(labs, _, inds): 27 | for i in parallel_range(labs.shape[0]): 28 | # Because the random seed is tied to the image index, the 29 | # same images will be corrupted every epoch: 30 | np.random.seed(inds[i]) 31 | if np.random.rand() < 0.2: 32 | # They will also be corrupted to a deterministic label: 33 | labs[i] = np.random.randint(low=0, high=10) 34 | return labs 35 | 36 | corrupt_fixed.is_parallel = True 37 | corrupt_fixed.with_indices = True 38 | return corrupt_fixed 39 | 40 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 41 | # No updates to state or extra memory necessary! 42 | return previous_state, None 43 | 44 | # Step 1: Create an FFCV-compatible CIFAR-10 dataset 45 | ds = torchvision.datasets.CIFAR10('/tmp', train=True, download=True) 46 | writer = DatasetWriter('/tmp/cifar.beton', { 47 | 'image': RGBImageField(), 48 | 'label': IntField() 49 | }) 50 | writer.from_indexed_dataset(ds) 51 | 52 | # Step 2: Create data loaders 53 | BATCH_SIZE = 512 54 | label_pipelines = { 55 | 'with': [IntDecoder(), CorruptFixedLabels(), ToTensor()], 56 | 'without': [IntDecoder(), ToTensor()] 57 | } 58 | 59 | for name, pipeline in label_pipelines.items(): 60 | # Use SEQUENTIAL ordering to compare labels. 61 | loader = Loader(f'/tmp/cifar.beton', batch_size=BATCH_SIZE, 62 | num_workers=8, order=OrderOption.SEQUENTIAL, 63 | drop_last=True, pipelines={'label': pipeline}) 64 | 65 | # First epoch includes compilation time 66 | for ims, labs in loader: pass 67 | start_time = time.time() 68 | for ep in range(20): 69 | for i, (ims, labs) in enumerate(loader): 70 | if i == 0: # Inspect first batch 71 | print(f'> Labels (epoch {ep:2}): {labs[:40,0].tolist()}') 72 | print(f'Method: {name} | Shape: {ims.shape} | Time per epoch: {(time.time() - start_time) / 100:.5f}s') -------------------------------------------------------------------------------- /ffcv-conda.yml: -------------------------------------------------------------------------------- 1 | name: ffcv19 2 | channels: 3 | - pytorch 4 | - defaults 5 | - conda-forge 6 | dependencies: 7 | - _libgcc_mutex=0.1=main 8 | - _openmp_mutex=4.5=1_gnu 9 | - binutils_impl_linux-64=2.36.1=h193b22a_2 10 | - binutils_linux-64=2.36=hf3e587d_1 11 | - blas=1.0=mkl 12 | - bzip2=1.0.8=h7b6447c_0 13 | - ca-certificates=2021.10.8=ha878542_0 14 | - certifi=2021.10.8=py38h578d9bd_0 15 | - cpuonly=2.0=0 16 | - cudatoolkit=11.2.2=he111cf0_8 17 | - ffmpeg=4.3=hf484d3e_0 18 | - freetype=2.10.4=h5ab3b9f_0 19 | - gcc=8.5.0=h143be6b_1 20 | - gcc_impl_linux-64=8.5.0=hb55b52c_11 21 | - gcc_linux-64=8.5.0=h87d5063_1 22 | - giflib=5.2.1=h7b6447c_0 23 | - gmp=6.2.1=h2531618_2 24 | - gnutls=3.6.15=he1e5248_0 25 | - gxx_impl_linux-64=8.5.0=hb55b52c_11 26 | - gxx_linux-64=8.5.0=h82b3ca4_1 27 | - intel-openmp=2021.3.0=h06a4308_3350 28 | - jpeg=9d=h7f8727e_0 29 | - kernel-headers_linux-64=2.6.32=he073ed8_14 30 | - lame=3.100=h7b6447c_0 31 | - lcms2=2.12=h3be6417_0 32 | - ld_impl_linux-64=2.36.1=hea4e1c9_2 33 | - libedit=3.1.20210714=h7f8727e_0 34 | - libffi=3.2.1=hf484d3e_1007 35 | - libgcc-devel_linux-64=8.5.0=h82e8279_11 36 | - libgcc-ng=9.3.0=h5101ec6_17 37 | - libgomp=9.3.0=h5101ec6_17 38 | - libiconv=1.15=h63c8f33_5 39 | - libidn2=2.3.2=h7f8727e_0 40 | - libjpeg-turbo=2.1.0=h7f98852_0 41 | - libpng=1.6.37=hbc83047_0 42 | - libsanitizer=8.5.0=h70fd0c9_11 43 | - libstdcxx-devel_linux-64=8.5.0=h82e8279_11 44 | - libstdcxx-ng=9.3.0=hd4cf53a_17 45 | - libtasn1=4.16.0=h27cfd23_0 46 | - libtiff=4.2.0=h85742a9_0 47 | - libunistring=0.9.10=h27cfd23_0 48 | - libuv=1.40.0=h7b6447c_0 49 | - libwebp=1.2.0=h89dd481_0 50 | - libwebp-base=1.2.0=h27cfd23_0 51 | - lz4-c=1.9.3=h295c915_1 52 | - mkl=2021.3.0=h06a4308_520 53 | - mkl-service=2.4.0=py38h7f8727e_0 54 | - mkl_fft=1.3.1=py38hd3c417c_0 55 | - mkl_random=1.2.2=py38h51133e4_0 56 | - ncurses=6.2=he6710b0_1 57 | - nettle=3.7.3=hbbd107a_1 58 | - olefile=0.46=pyhd3eb1b0_0 59 | - openh264=2.1.0=hd408876_0 60 | - openssl=1.1.1k=h7f98852_0 61 | - pip=21.2.4=py38h06a4308_0 62 | - pkg-config=0.29.2=h36c2ea0_1008 63 | - python=3.8.0=h0371630_2 64 | - python_abi=3.8=2_cp38 65 | - pytorch-mutex=1.0=cpu 66 | - readline=7.0=h7b6447c_5 67 | - setuptools=58.0.4=py38h06a4308_0 68 | - six=1.16.0=pyhd3eb1b0_0 69 | - sqlite=3.33.0=h62c20be_0 70 | - sysroot_linux-64=2.12=he073ed8_14 71 | - tk=8.6.11=h1ccaba5_0 72 | - typing_extensions=3.10.0.2=pyh06a4308_0 73 | - wheel=0.37.0=pyhd3eb1b0_1 74 | - xz=5.2.5=h7b6447c_0 75 | - zlib=1.2.11=h7b6447c_3 76 | - zstd=1.4.9=haebb681_0 77 | - pip: 78 | - assertpy==1.1 79 | - astor==0.8.1 80 | - backcall==0.2.0 81 | - cycler==0.10.0 82 | - decorator==5.1.0 83 | - fastargs==1.2.0 84 | - ffcv==0.1 85 | - ipython==7.28.0 86 | - jedi==0.18.0 87 | - joblib==1.1.0 88 | - kiwisolver==1.3.2 89 | - llvmlite==0.37.0 90 | - matplotlib==3.4.3 91 | - matplotlib-inline==0.1.3 92 | - numba==0.54.1 93 | - numpy==1.20.3 94 | - opencv-python==4.5.4.58 95 | - packaging==21.0 96 | - pandas==1.3.4 97 | - parso==0.8.2 98 | - pexpect==4.8.0 99 | - pickleshare==0.7.5 100 | - pillow==8.4.0 101 | - prompt-toolkit==3.0.21 102 | - ptyprocess==0.7.0 103 | - pygments==2.10.0 104 | - pyparsing==2.4.7 105 | - python-dateutil==2.8.2 106 | - pytz==2021.3 107 | - scikit-learn==1.0.1 108 | - scipy==1.7.1 109 | - terminaltables==3.1.0 110 | - threadpoolctl==3.0.0 111 | - torch==1.9.0 112 | - torchaudio==0.10.0+cu113 113 | - torchmetrics==0.5.1 114 | - torchvision==0.11.1 115 | - tqdm==4.62.3 116 | - traitlets==5.1.0 117 | - wcwidth==0.2.5 -------------------------------------------------------------------------------- /ffcv/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/ffcv/.DS_Store -------------------------------------------------------------------------------- /ffcv/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import Loader 2 | from .writer import DatasetWriter 3 | __version__ = '1.0.2' 4 | 5 | __all__ = ['Loader'] 6 | -------------------------------------------------------------------------------- /ffcv/benchmarks/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/ffcv/benchmarks/__init__.py -------------------------------------------------------------------------------- /ffcv/benchmarks/__main__.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | import pandas as pd 4 | from terminaltables import SingleTable 5 | 6 | from .suites import * 7 | from .decorator import run_all 8 | 9 | parser = argparse.ArgumentParser(description='Run ffcv micro benchmarks') 10 | parser.add_argument('--runs', '-n', type=int, 11 | help='Use the median of --runs runs of each test', 12 | default=3) 13 | parser.add_argument('--warm-up', '-w', type=int, 14 | help='Runs each test --warm-up times before measuring', 15 | default=1) 16 | parser.add_argument('--pattern', '-p', type=str, 17 | help='Run only tests matching this (glob style) pattern', 18 | default='*') 19 | parser.add_argument('--output', '-o', type=str, default=None, 20 | help='If defined will write to file instead of stdout.') 21 | 22 | args = parser.parse_args() 23 | 24 | all_results = run_all(args.runs, 25 | args.warm_up, 26 | pattern=args.pattern) 27 | 28 | result_data = [] 29 | for suite_name, results in all_results.items(): 30 | column_names = results[0].keys() 31 | table_data = [list(column_names)] 32 | 33 | for result in results: 34 | result_data.append({ 35 | 'suite_name': suite_name, 36 | **result 37 | }) 38 | table_data.append(result.values()) 39 | 40 | table = SingleTable(table_data, title=suite_name) 41 | 42 | if args.output is None: 43 | print(table.table) 44 | 45 | if args.output is not None: 46 | frame = pd.DataFrame(result_data) 47 | frame.to_csv(args.output) 48 | -------------------------------------------------------------------------------- /ffcv/benchmarks/benchmark.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | from contextlib import AbstractContextManager 3 | 4 | class Benchmark(AbstractContextManager, metaclass=ABCMeta): 5 | 6 | def __init__(self, **kwargs): 7 | pass 8 | 9 | @abstractmethod 10 | def run(self): 11 | raise NotImplemented() -------------------------------------------------------------------------------- /ffcv/benchmarks/decorator.py: -------------------------------------------------------------------------------- 1 | from itertools import product 2 | from time import time 3 | from collections import defaultdict 4 | from contextlib import redirect_stderr 5 | import pathlib 6 | 7 | import numpy as np 8 | from tqdm import tqdm 9 | 10 | from .benchmark import Benchmark 11 | 12 | ALL_SUITES = {} 13 | 14 | class FakeSink(object): 15 | def write(self, *args): 16 | pass 17 | def writelines(self, *args): 18 | pass 19 | def close(self, *args): 20 | pass 21 | def flush(self, *args): 22 | pass 23 | 24 | 25 | def benchmark(arg_values={}): 26 | args_list = product(*arg_values.values()) 27 | runs = [dict(zip(arg_values.keys(), x)) for x in args_list] 28 | def wrapper(cls): 29 | ALL_SUITES[cls.__name__] = (cls, runs) 30 | 31 | return wrapper 32 | 33 | def run_all(runs=3, warm_up=1, pattern='*'): 34 | results = defaultdict(list) 35 | 36 | selected_suites = {} 37 | for sname in ALL_SUITES.keys(): 38 | if pathlib.PurePath(sname).match(pattern): 39 | selected_suites[sname] = ALL_SUITES[sname] 40 | 41 | it_suite = tqdm(selected_suites.items(), desc='Suite', leave=False) 42 | 43 | for suite_name, (cls, args_list) in it_suite: 44 | it_suite.set_postfix({'name': suite_name}) 45 | it_args = tqdm(args_list, desc='configuration', leave=False) 46 | 47 | for args in it_args: 48 | # with redirect_stderr(FakeSink()): 49 | if True: 50 | benchmark: Benchmark = cls(**args) 51 | with benchmark: 52 | for _ in range(warm_up): 53 | benchmark.run() 54 | 55 | timings = [] 56 | for _ in range(runs): 57 | start = time() 58 | benchmark.run() 59 | timings.append(time() - start) 60 | 61 | median_time = np.median(timings) 62 | 63 | throughput = None 64 | 65 | if 'n' in args: 66 | throughput = args['n'] / median_time 67 | 68 | unit = 'it/sec' 69 | if throughput < 1: 70 | unit = 'sec/it' 71 | throughput = 1 /throughput 72 | 73 | throughput = np.round(throughput * 10) / 10 74 | 75 | results[suite_name].append({ 76 | **args, 77 | 'time': median_time, 78 | 'throughput': str(throughput) + ' ' + unit 79 | }) 80 | it_args.close() 81 | it_suite.close() 82 | return results -------------------------------------------------------------------------------- /ffcv/benchmarks/suites/__init__.py: -------------------------------------------------------------------------------- 1 | from os import listdir 2 | from os.path import dirname 3 | __all__ = [i[:-3] for i in listdir(dirname(__file__)) if not i.startswith('__') and i.endswith('.py')] 4 | -------------------------------------------------------------------------------- /ffcv/benchmarks/suites/image_read.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import os 3 | from tempfile import NamedTemporaryFile 4 | from time import sleep, time 5 | 6 | import numpy as np 7 | from assertpy import assert_that 8 | from ffcv.fields import BytesField, IntField, RGBImageField 9 | from ffcv.memory_managers import OSCacheManager 10 | from ffcv.pipeline.compiler import Compiler 11 | from ffcv.reader import Reader 12 | from ffcv.writer import DatasetWriter 13 | from torch.utils.data import Dataset 14 | from tqdm import tqdm 15 | 16 | from ..benchmark import Benchmark 17 | from ..decorator import benchmark 18 | 19 | 20 | 21 | class DummyDataset(Dataset): 22 | 23 | def __init__(self, length, size): 24 | self.length = length 25 | self.size = size 26 | 27 | def __len__(self): 28 | return self.length 29 | 30 | def __getitem__(self, index): 31 | if index > self.length: 32 | raise IndexError 33 | 34 | dims = tuple([*self.size, 3]) 35 | image_data = np.random.randint(low=0, high=255, size=dims, dtype='uint8') 36 | return index, image_data 37 | 38 | 39 | 40 | @benchmark({ 41 | 'n': [3000], 42 | 'length': [3000], 43 | 'mode': [ 44 | 'raw', 45 | 'jpg' 46 | ], 47 | 'num_workers': [ 48 | 1, 49 | 8, 50 | 16 51 | ], 52 | 'batch_size': [ 53 | 500 54 | ], 55 | 'size': [ 56 | (32, 32), # CIFAR 57 | (300, 500), # ImageNet 58 | ], 59 | 'compile': [ 60 | True, 61 | # False 62 | ], 63 | 'random_reads': [ 64 | True, 65 | # False 66 | ] 67 | }) 68 | class ImageReadBench(Benchmark): 69 | 70 | def __init__(self, n, length, mode, size, random_reads, compile, num_workers, batch_size): 71 | self.n = n 72 | self.mode = mode 73 | self.length = length 74 | self.num_workers = num_workers 75 | self.batch_size = batch_size 76 | self.size = size 77 | self.compile = compile 78 | self.random_reads = random_reads 79 | self.dataset = DummyDataset(length, size) 80 | 81 | def __enter__(self): 82 | self.handle = NamedTemporaryFile() 83 | self.handle.__enter__() 84 | name = self.handle.name 85 | 86 | writer = DatasetWriter(self.length, name, { 87 | 'index': IntField(), 88 | 'value': RGBImageField(write_mode=self.mode) 89 | }) 90 | 91 | with writer: 92 | writer.write_pytorch_dataset(self.dataset, num_workers=-1, chunksize=100) 93 | 94 | reader = Reader(name) 95 | manager = OSCacheManager(reader) 96 | 97 | Compiler.set_enabled(self.compile) 98 | Compiler.set_num_threads(self.num_workers) 99 | 100 | memreader = manager.compile_reader() 101 | Decoder = RGBImageField().get_decoder_class() 102 | decoder = Decoder() 103 | decoder.accept_globals(reader.metadata['f1'], memreader) 104 | 105 | context = manager.schedule_epoch(np.arange(self.n)) 106 | context.__enter__() 107 | self.context = context 108 | 109 | decode = decoder.generate_code() 110 | decode = Compiler.compile(decode) 111 | 112 | self.buff = np.zeros((self.batch_size, *self.size, 3), dtype='uint8') 113 | 114 | if self.random_reads: 115 | self.indices = np.random.choice(self.n, size=self.n, replace=False) 116 | else: 117 | self.indices = np.arange(self.n) 118 | 119 | def code(indices, buff, state): 120 | result = 0 121 | for i in range(0, len(indices), self.batch_size): 122 | result += decode(indices[i:i + self.batch_size], buff, reader.metadata['f1'], state)[0, 5, 5] 123 | return result 124 | 125 | self.code = code 126 | 127 | def run(self): 128 | self.code(self.indices, self.buff, self.context.state) 129 | 130 | def __exit__(self, *args): 131 | self.handle.__exit__(*args) 132 | self.context.__exit__(*args) 133 | pass 134 | -------------------------------------------------------------------------------- /ffcv/benchmarks/suites/jpeg_decode.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | 3 | import numpy as np 4 | import cv2 5 | from numpy.core.numeric import full 6 | 7 | from ..decorator import benchmark 8 | from ..benchmark import Benchmark 9 | 10 | from ...pipeline.compiler import Compiler 11 | 12 | from ...libffcv import imdecode 13 | 14 | @benchmark({ 15 | 'n': [500], 16 | 'source_image': ['../../../test_data/pig.png'], 17 | 'image_width': [500, 256, 1024], 18 | 'quality': [50, 90], 19 | 'compile': [True] 20 | }) 21 | class JPEGDecodeBenchmark(Benchmark): 22 | 23 | def __init__(self, n, source_image, image_width, quality, compile): 24 | self.n = n 25 | self.compile = compile 26 | self.source_image = source_image 27 | self.image_width = image_width 28 | self.quality = quality 29 | 30 | def __enter__(self): 31 | full_path = path.join(path.dirname(__file__), self.source_image) 32 | loaded_image = cv2.imread(full_path, cv2.IMREAD_COLOR) 33 | previous_width = loaded_image.shape[1] 34 | new_width = self.image_width 35 | factor = new_width / previous_width 36 | new_height = int(loaded_image.shape[0] * factor) 37 | resized_image = cv2.resize(loaded_image, (new_width, new_height), 38 | interpolation=cv2.INTER_AREA) 39 | _, self.encoded_image = cv2.imencode('.jpg', resized_image, 40 | [int(cv2.IMWRITE_JPEG_QUALITY), 41 | self.quality]) 42 | 43 | self.destination = np.zeros((new_height, new_width, 3), dtype='uint8') 44 | 45 | Compiler.set_enabled(self.compile) 46 | 47 | n = self.n 48 | decode = Compiler.compile(imdecode) 49 | def code(source, dest): 50 | for _ in range(n): 51 | decode(source, dest, 52 | new_height, 53 | new_width, 54 | new_height, 55 | new_width, 56 | 0, 0, 1, 1, False) 57 | 58 | self.code = Compiler.compile(code) 59 | 60 | def run(self): 61 | self.code(self.encoded_image, self.destination) 62 | 63 | def __exit__(self, *args): 64 | pass 65 | -------------------------------------------------------------------------------- /ffcv/benchmarks/suites/memory_read.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tempfile import NamedTemporaryFile 3 | from time import sleep, time 4 | 5 | import numpy as np 6 | from tqdm import tqdm 7 | from assertpy import assert_that 8 | from torch.utils.data import Dataset 9 | 10 | from ffcv.writer import DatasetWriter 11 | from ffcv.reader import Reader 12 | from ffcv.fields import BytesField, IntField 13 | from ffcv.pipeline.compiler import Compiler 14 | from ffcv.memory_managers import OSCacheManager 15 | from ffcv.libffcv import memcpy 16 | 17 | from ..decorator import benchmark 18 | from ..benchmark import Benchmark 19 | 20 | class DummyDataset(Dataset): 21 | 22 | def __init__(self, l, size): 23 | self.l = l 24 | self.size = size 25 | 26 | def __len__(self): 27 | return self.l 28 | 29 | def __getitem__(self, index): 30 | if index > self.l: 31 | raise IndexError 32 | np.random.seed(index) 33 | return index, np.random.randint(0, 255, size=self.size, dtype='u1') 34 | 35 | @benchmark({ 36 | 'num_samples': [3000], 37 | 'size_bytes': [ 38 | 32 * 32 * 3, # CIFAR RAW image size, 39 | 500 * 300 * 3, # IMAGENET raw image size, 40 | 128 * 1024, # IMAGENET jpg image size, 41 | ], 42 | 'compiled': [ 43 | True 44 | ], 45 | 'random_reads': [True, False], 46 | 'n': [3000] 47 | }) 48 | class MemoryReadBytesBench(Benchmark): 49 | 50 | def __init__(self, num_samples, size_bytes, random_reads, n, compiled): 51 | self.num_samples = num_samples 52 | self.size_bytes = size_bytes 53 | self.random_reads = random_reads 54 | self.n = n 55 | self.compiled = compiled 56 | 57 | def __enter__(self): 58 | self.handle = NamedTemporaryFile() 59 | handle = self.handle.__enter__() 60 | name = handle.name 61 | dataset = DummyDataset(self.num_samples, self.size_bytes) 62 | writer = DatasetWriter(self.num_samples, name, { 63 | 'index': IntField(), 64 | 'value': BytesField() 65 | }) 66 | 67 | with writer: 68 | writer.write_pytorch_dataset(dataset, num_workers=-1, chunksize=100) 69 | 70 | reader = Reader(name) 71 | manager = OSCacheManager(reader) 72 | context = manager.schedule_epoch(np.arange(self.num_samples)) 73 | context.__enter__() 74 | self.context = context 75 | 76 | Compiler.set_enabled(self.compiled) 77 | memcpy_c = Compiler.compile(memcpy) 78 | 79 | read_fn = manager.compile_reader() 80 | 81 | if self.random_reads: 82 | indices = np.random.choice(self.num_samples, self.n, replace=False) 83 | else: 84 | indices = np.arange(self.num_samples)[:self.n] 85 | 86 | addresses = reader.alloc_table['ptr'][indices] 87 | 88 | self.buffer = np.zeros(self.size_bytes, dtype='` for information on how to implement a subclass of Field. 24 | """ 25 | @property 26 | @abstractmethod 27 | def metadata_type(self) -> np.dtype: 28 | raise NotImplemented 29 | 30 | @staticmethod 31 | @abstractmethod 32 | def from_binary(binary: ARG_TYPE) -> Field: 33 | raise NotImplementedError 34 | 35 | @abstractmethod 36 | def to_binary(self) -> ARG_TYPE: 37 | raise NotImplementedError 38 | 39 | @abstractmethod 40 | def encode(field, metadata_destination, malloc): 41 | raise NotImplementedError 42 | 43 | @abstractmethod 44 | def get_decoder_class(self) -> Type[Operation]: 45 | raise NotImplementedError 46 | -------------------------------------------------------------------------------- /ffcv/fields/basics.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, TYPE_CHECKING, Tuple, Type 2 | from dataclasses import replace 3 | 4 | import numpy as np 5 | 6 | from .base import Field, ARG_TYPE 7 | from ..pipeline.operation import Operation 8 | from ..pipeline.state import State 9 | from ..pipeline.allocation_query import AllocationQuery 10 | 11 | if TYPE_CHECKING: 12 | from ..memory_managers.base import MemoryManager 13 | 14 | class BasicDecoder(Operation): 15 | """For decoding scalar fields 16 | 17 | This Decoder can be extend to decode any fixed length numpy data type 18 | """ 19 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, AllocationQuery]: 20 | my_shape = (1,) 21 | return ( 22 | replace(previous_state, jit_mode=True, 23 | shape=my_shape, 24 | dtype=self.dtype), 25 | AllocationQuery(my_shape, dtype=self.dtype) 26 | ) 27 | 28 | def generate_code(self) -> Callable: 29 | def decoder(indices, destination, metadata, storage_state): 30 | for ix, sample_id in enumerate(indices): 31 | destination[ix] = metadata[sample_id] 32 | return destination[:len(indices)] 33 | 34 | return decoder 35 | 36 | class IntDecoder(BasicDecoder): 37 | """Decoder for signed integers scalars (int64) 38 | """ 39 | dtype = np.dtype(' np.dtype: 56 | return np.dtype(' Field: 60 | return FloatField() 61 | 62 | def to_binary(self) -> ARG_TYPE: 63 | return np.zeros(1, dtype=ARG_TYPE)[0] 64 | 65 | def encode(self, destination, field, malloc): 66 | destination[0] = field 67 | 68 | def get_decoder_class(self) -> Type[Operation]: 69 | return FloatDecoder 70 | 71 | class IntField(Field): 72 | """ 73 | A subclass of :class:`~ffcv.fields.Field` supporting (scalar) integer 74 | values. 75 | """ 76 | @property 77 | def metadata_type(self) -> np.dtype: 78 | return np.dtype(' Field: 82 | return IntField() 83 | 84 | def to_binary(self) -> ARG_TYPE: 85 | return np.zeros(1, dtype=ARG_TYPE)[0] 86 | 87 | def encode(self, destination, field, malloc): 88 | # We just allocate 1024bytes for fun 89 | destination[0] = field 90 | 91 | def get_decoder_class(self) -> Type[Operation]: 92 | return IntDecoder 93 | 94 | -------------------------------------------------------------------------------- /ffcv/fields/bytes.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, TYPE_CHECKING, Tuple, Type 2 | from dataclasses import replace 3 | 4 | import numpy as np 5 | 6 | from .base import Field, ARG_TYPE 7 | from ..pipeline.operation import Operation 8 | from ..pipeline.state import State 9 | from ..pipeline.compiler import Compiler 10 | from ..pipeline.allocation_query import AllocationQuery 11 | from ..libffcv import memcpy 12 | 13 | 14 | class BytesDecoder(Operation): 15 | 16 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, AllocationQuery]: 17 | max_size = self.metadata['size'].max() 18 | 19 | my_shape = (max_size,) 20 | return ( 21 | replace(previous_state, jit_mode=True, shape=my_shape, 22 | dtype=' Callable: 27 | mem_read = self.memory_read 28 | my_memcpy = Compiler.compile(memcpy) 29 | my_range = Compiler.get_iterator() 30 | def decoder(batch_indices, destination, metadata, storage_state): 31 | for dest_ix in my_range(batch_indices.shape[0]): 32 | source_ix = batch_indices[dest_ix] 33 | data = mem_read(metadata[source_ix]['ptr'], storage_state) 34 | my_memcpy(data, destination[dest_ix]) 35 | return destination 36 | 37 | return decoder 38 | 39 | class BytesField(Field): 40 | """ 41 | A subclass of :class:`~ffcv.fields.Field` supporting variable-length byte 42 | arrays. 43 | 44 | Intended for use with data such as text or raw data which may not have a 45 | fixed size. Data is written sequentially while saving pointers and read by 46 | pointer lookup. 47 | 48 | The writer expects to be passed a 1D uint8 numpy array of variable length for each sample. 49 | """ 50 | def __init__(self): 51 | pass 52 | 53 | @property 54 | def metadata_type(self) -> np.dtype: 55 | return np.dtype([ 56 | ('ptr', ' Field: 62 | return BytesField() 63 | 64 | def to_binary(self) -> ARG_TYPE: 65 | return np.zeros(1, dtype=ARG_TYPE)[0] 66 | 67 | def encode(self, destination, field, malloc): 68 | ptr, buffer = malloc(field.size) 69 | buffer[:] = field 70 | destination['ptr'] = ptr 71 | destination['size'] = field.size 72 | 73 | def get_decoder_class(self) -> Type[Operation]: 74 | return BytesDecoder 75 | -------------------------------------------------------------------------------- /ffcv/fields/decoders.py: -------------------------------------------------------------------------------- 1 | from .basics import FloatDecoder, IntDecoder 2 | from .ndarray import NDArrayDecoder 3 | from .rgb_image import RandomResizedCropRGBImageDecoder, CenterCropRGBImageDecoder, SimpleRGBImageDecoder 4 | from .bytes import BytesDecoder 5 | 6 | __all__ = ['FloatDecoder', 'IntDecoder', 'NDArrayDecoder', 'RandomResizedCropRGBImageDecoder', 7 | 'CenterCropRGBImageDecoder', 'SimpleRGBImageDecoder', 'BytesDecoder'] -------------------------------------------------------------------------------- /ffcv/fields/json.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import torch as ch 4 | import numpy as np 5 | 6 | from .bytes import BytesField 7 | 8 | ENCODING = 'utf8' 9 | SEPARATOR = '\0' # Null byte 10 | 11 | class JSONField(BytesField): 12 | """A subclass of :class:`~ffcv.fields.BytesField` that encodes JSON data. 13 | 14 | The writer expects to be passed a dict that is compatible with the JSON specification. 15 | 16 | .. warning :: 17 | Because FFCV is based on tensors/ndarrays the reader and therefore the loader can't give return JSON to the user. This is why we provide :class:`~ffcv.fields.JSONField.unpack` which does the conversion. It's up to the user to call it in the main body of the loop 18 | 19 | """ 20 | 21 | @property 22 | def metadata_type(self) -> np.dtype: 23 | return np.dtype([ 24 | ('ptr', ' Tuple[State, AllocationQuery]: 25 | return ( 26 | replace(previous_state, jit_mode=True, 27 | shape=self.field.shape, 28 | dtype=self.field.dtype), 29 | AllocationQuery(self.field.shape, self.field.dtype) 30 | ) 31 | 32 | def generate_code(self) -> Callable: 33 | my_range = Compiler.get_iterator() 34 | mem_read = self.memory_read 35 | my_memcpy = Compiler.compile(memcpy) 36 | 37 | def decoder(indices, destination, metadata, storage_state): 38 | for ix in my_range(indices.shape[0]): 39 | sample_id = indices[ix] 40 | ptr = metadata[sample_id] 41 | data = mem_read(ptr, storage_state) 42 | my_memcpy(data, destination[ix].view(np.uint8)) 43 | return destination 44 | 45 | return decoder 46 | 47 | NDArrayArgsType = np.dtype([ 48 | ('shape', ' np.dtype: 67 | return np.dtype(' Field: 71 | header_size = NDArrayArgsType.itemsize 72 | header = binary[:header_size].view(NDArrayArgsType)[0] 73 | type_length = header['type_length'] 74 | type_data = binary[header_size:][:type_length].tobytes().decode('ascii') 75 | type_desc = json.loads(type_data) 76 | type_desc = [tuple(x) for x in type_desc] 77 | assert len(type_desc) == 1 78 | dtype = np.dtype(type_desc)['f0'] 79 | shape = list(header['shape']) 80 | while shape[-1] == 0: 81 | shape.pop() 82 | 83 | return NDArrayField(dtype, tuple(shape)) 84 | 85 | def to_binary(self) -> ARG_TYPE: 86 | result = np.zeros(1, dtype=ARG_TYPE)[0] 87 | header = np.zeros(1, dtype=NDArrayArgsType) 88 | s = np.array(self.shape).astype(' Type[Operation]: 102 | return NDArrayDecoder 103 | 104 | 105 | class TorchTensorField(NDArrayField): 106 | """A subclass of :class:`~ffcv.fields.Field` supporting 107 | multi-dimensional fixed size matrices of any torch type. 108 | """ 109 | def __init__(self, dtype:ch.dtype, shape:Tuple[int, ...]): 110 | self.dtype = dtype 111 | self.shape = shape 112 | dtype = ch.zeros(0, dtype=dtype).numpy().dtype 113 | 114 | super().__init__(dtype, shape) 115 | 116 | 117 | def encode(self, destination, field, malloc): 118 | field = field.numpy() 119 | return super().encode(destination, field, malloc) 120 | -------------------------------------------------------------------------------- /ffcv/libffcv.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | from numba import njit 3 | import numpy as np 4 | import platform 5 | from ctypes import CDLL, c_int64, c_uint8, c_uint64, POINTER, c_void_p, c_uint32, c_bool, cdll 6 | import ffcv._libffcv 7 | 8 | lib = CDLL(ffcv._libffcv.__file__) 9 | if platform.system() == "Windows": 10 | libc = cdll.msvcrt 11 | read_c = libc._read 12 | else: 13 | libc = cdll.LoadLibrary('libc.so.6') 14 | read_c = libc.pread 15 | 16 | read_c.argtypes = [c_uint32, c_void_p, c_uint64, c_uint64] 17 | 18 | def read(fileno:int, destination:np.ndarray, offset:int): 19 | return read_c(fileno, destination.ctypes.data, destination.size, offset) 20 | 21 | 22 | ctypes_resize = lib.resize 23 | ctypes_resize.argtypes = 11 * [c_int64] 24 | 25 | def resize_crop(source, start_row, end_row, start_col, end_col, destination): 26 | ctypes_resize(0, 27 | source.ctypes.data, 28 | source.shape[0], source.shape[1], 29 | start_row, end_row, start_col, end_col, 30 | destination.ctypes.data, 31 | destination.shape[0], destination.shape[1]) 32 | 33 | # Extract and define the interface of imdeocde 34 | ctypes_imdecode = lib.imdecode 35 | ctypes_imdecode.argtypes = [ 36 | c_void_p, c_uint64, c_uint32, c_uint32, c_void_p, c_uint32, c_uint32, 37 | c_uint32, c_uint32, c_uint32, c_uint32, c_bool, c_bool 38 | ] 39 | 40 | def imdecode(source: np.ndarray, dst: np.ndarray, 41 | source_height: int, source_width: int, 42 | crop_height=None, crop_width=None, 43 | offset_x=0, offset_y=0, scale_factor_num=1, scale_factor_denom=1, 44 | enable_crop=False, do_flip=False): 45 | return ctypes_imdecode(source.ctypes.data, source.size, 46 | source_height, source_width, dst.ctypes.data, 47 | crop_height, crop_width, offset_x, offset_y, scale_factor_num, scale_factor_denom, 48 | enable_crop, do_flip) 49 | 50 | 51 | ctypes_memcopy = lib.my_memcpy 52 | ctypes_memcopy.argtypes = [c_void_p, c_void_p, c_uint64] 53 | 54 | def memcpy(source: np.ndarray, dest: np.ndarray): 55 | return ctypes_memcopy(source.ctypes.data, dest.ctypes.data, source.size*source.itemsize) 56 | 57 | -------------------------------------------------------------------------------- /ffcv/loader/__init__.py: -------------------------------------------------------------------------------- 1 | from .loader import Loader, OrderOption 2 | 3 | __all__ = ['Loader', 'OrderOption'] -------------------------------------------------------------------------------- /ffcv/memory_allocator.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from time import sleep 3 | from os import SEEK_END 4 | from multiprocessing import Value 5 | from .utils import align_to_page 6 | import ctypes 7 | 8 | class MemoryAllocator(): 9 | def __init__(self, fname, offset_start, page_size): 10 | self.fname = fname 11 | self.offset = align_to_page(offset_start, page_size) 12 | self.next_page_allocated = Value(ctypes.c_uint64, 0) 13 | self.next_page_written = Value(ctypes.c_uint64, 0) 14 | 15 | self.page_size = page_size 16 | self.page_offset = 0 17 | self.my_page = -1 18 | 19 | self.page_data = np.zeros(self.page_size, ' self.page_size: 39 | raise ValueError(f"Tried allocating {size} but" + 40 | f" page size is {self.page_size}") 41 | 42 | if size > self.space_left_in_page: 43 | self.flush_page() 44 | # We book the next available page in the file 45 | with self.next_page_allocated.get_lock(): 46 | self.my_page = self.next_page_allocated.value 47 | self.next_page_allocated.value = self.my_page + 1 48 | 49 | self.page_offset = 0 50 | # This is a new page so we erate the content of the buffer 51 | self.page_data.fill(0) 52 | 53 | # We check if we already allocated space for this sample on 54 | # the page that is now full 55 | region_in_previous_page = False 56 | while self.allocations and self.allocations[-1][0] == self.current_sample_id: 57 | # We have to revert the allocations we did and we are giving 58 | # up on this sample. 59 | self.allocations.pop() 60 | # We found at least memory region from the preivous page 61 | region_in_previous_page = True 62 | 63 | # The writer will restart from this freshly allocated page 64 | if region_in_previous_page: 65 | raise MemoryError("Not enough memory to fit the whole sample") 66 | 67 | previous_offset = self.page_offset 68 | self.page_offset += size 69 | 70 | buffer = self.page_data[previous_offset:self.page_offset] 71 | ptr = self.offset + self.my_page * self.page_size + previous_offset 72 | 73 | # We return the pointer to the location in file and where to write 74 | # the data 75 | self.allocations.append((self.current_sample_id, ptr, size)) 76 | return ptr, buffer 77 | 78 | def flush_page(self): 79 | # If we haven't allocated any page we end there 80 | if self.my_page < 0: 81 | return 82 | 83 | # We shouldn't have allocated a page and have nothing to write on it 84 | assert self.page_offset != 0 85 | # Wait until it's my turn to write 86 | while self.next_page_written.value != self.my_page: 87 | # Essentially a spin lock 88 | # TODO we could replace it with like exponential backoff 89 | sleep(0.001) 90 | pass 91 | 92 | # Now it's my turn to write 93 | 94 | expected_file_offset = self.offset + self.my_page * self.page_size 95 | # in order to be aligned with page size 96 | # If this is the first page we have to pad with zeros 97 | if self.my_page == 0: 98 | # print("Padding headers to align with page size") 99 | current_location = self.fp.seek(0, SEEK_END) 100 | null_bytes_to_write = expected_file_offset - current_location 101 | self.fp.write(np.zeros(null_bytes_to_write, dtype='> page_size_bit_location 53 | 54 | sample_to_pages: Mapping[int, Set[int]] = defaultdict(set) 55 | page_to_samples: Mapping[int, Set[int]] = defaultdict(set) 56 | 57 | # We create a mapping that goes from sample id to the pages it has data 58 | # Stored to 59 | # (And the same for the other way around) 60 | for sid, pid in zip(alloc_table['sample_id'], page_locations): 61 | sample_to_pages[sid].add(pid) 62 | page_to_samples[pid].add(sid) 63 | 64 | self.sample_to_pages = sample_to_pages 65 | self.page_to_samples = page_to_samples 66 | 67 | super().__init__() 68 | 69 | @abstractmethod 70 | def schedule_epoch(self, batches: Sequence[Sequence[int]]) -> MemoryContext: 71 | raise NotImplementedError() 72 | 73 | @abstractmethod 74 | def compile_reader(self, address, size) -> Callable: 75 | raise NotImplemented() 76 | 77 | @property 78 | @abstractmethod 79 | def state_type(self): 80 | raise NotImplementedError() 81 | 82 | 83 | -------------------------------------------------------------------------------- /ffcv/memory_managers/common.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, TYPE_CHECKING 2 | 3 | BATCHES_TYPE = Sequence[Sequence[int]] -------------------------------------------------------------------------------- /ffcv/memory_managers/os_cache.py: -------------------------------------------------------------------------------- 1 | from typing import TYPE_CHECKING 2 | 3 | import numpy as np 4 | import numba as nb 5 | 6 | from .base import MemoryManager, MemoryContext 7 | from ..pipeline.compiler import Compiler 8 | 9 | if TYPE_CHECKING: 10 | from ..reader import Reader 11 | 12 | 13 | class OSCacheContext(MemoryContext): 14 | def __init__(self, manager:MemoryManager): 15 | self.manager = manager 16 | self.mmap = None 17 | 18 | @property 19 | def state(self): 20 | return (self.mmap, self.manager.ptrs, self.manager.sizes) 21 | 22 | def __enter__(self): 23 | res = super().__enter__() 24 | if self.mmap is None: 25 | self.mmap = np.memmap(self.manager.reader.file_name, 26 | 'uint8', mode='r') 27 | return res 28 | 29 | def __exit__(self, __exc_type, __exc_value, __traceback): 30 | # Numpy doesn't have an API to close memory maps yet 31 | # The only thing one can do is flush it be since we are not 32 | # Writing to it it's pointless 33 | # Moreover we want to avoid opening the memmap over and over 34 | # anyway. 35 | return super().__exit__(__exc_type, __exc_value, __traceback) 36 | 37 | 38 | class OSCacheManager(MemoryManager): 39 | 40 | def __init__(self, reader: 'Reader'): 41 | super().__init__(reader) 42 | self.context = OSCacheContext(self) 43 | 44 | def schedule_epoch(self, schedule): 45 | return self.context 46 | 47 | @property 48 | def state_type(self): 49 | t1 = nb.uint8[::1] 50 | t1.multable = False 51 | t2 = nb.uint64[::1] 52 | t1.mutable = False 53 | return nb.types.Tuple([t1, t2, t2]) 54 | 55 | def compile_reader(self): 56 | def read(address, mem_state): 57 | size = mem_state[2][np.searchsorted(mem_state[1], address)] 58 | return mem_state[0][address:address + size] 59 | 60 | return Compiler.compile(read, nb.uint8[::1](nb.uint64, self.state_type)) 61 | 62 | -------------------------------------------------------------------------------- /ffcv/memory_managers/process_cache/__init__.py: -------------------------------------------------------------------------------- 1 | from .context import ProcessCacheContext 2 | from .manager import ProcessCacheManager 3 | 4 | __all__ = ['ProcessCacheContext', 'ProcessCacheManager'] -------------------------------------------------------------------------------- /ffcv/memory_managers/process_cache/context.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | 3 | import numpy as np 4 | 5 | from ..base import MemoryManager, MemoryContext 6 | from ..common import BATCHES_TYPE 7 | from .schedule import Schedule, ScheduleExecutor, compute_schedule 8 | 9 | 10 | class ProcessCacheContext(MemoryContext): 11 | 12 | def __init__(self, manager: MemoryManager, batches: BATCHES_TYPE): 13 | self.manager = manager 14 | self.fname = manager.reader.file_name 15 | self.batches = batches 16 | self.page_size = manager.reader.page_size 17 | 18 | @property 19 | def state(self): 20 | return (self.memory, self.manager.ptrs, 21 | self.manager.sizes, self.page_to_slot) 22 | 23 | def __enter__(self): 24 | pages_at_batch = [] 25 | for batch in self.batches: 26 | pages_needed = set() 27 | for sample_id in batch: 28 | pages_needed.update(self.manager.sample_to_pages[sample_id]) 29 | pages_at_batch.append(pages_needed) 30 | 31 | self.schedule = compute_schedule(pages_at_batch) 32 | self.memory = np.zeros((self.schedule.num_slots, self.page_size), 33 | dtype=' MemoryContext: 12 | return ProcessCacheContext(self, batches) 13 | 14 | @property 15 | def state_type(self): 16 | # The data 17 | t1 = nb.uint8[:, ::1] 18 | t1.mutable = False 19 | 20 | # The pointers 21 | t2 = nb.uint64[::1] 22 | t2.mutable = False 23 | # 24 | # Their size 25 | t3 = nb.uint64[::1] 26 | t3.mutable = False 27 | 28 | # Page to slot 29 | t4 = nb.uint32[::1] 30 | t4.mutable = False 31 | 32 | return nb.types.Tuple([t1, t2, t3, t4]) 33 | 34 | def compile_reader(self): 35 | page_size = self.reader.page_size 36 | page_size_log2 = np.uint32(np.log2(page_size)) 37 | 38 | def read(address, mem_state): 39 | size = mem_state[2][np.searchsorted(mem_state[1], address)] 40 | page = address >> page_size_log2 41 | offset = address - (page << page_size_log2) 42 | page_slot = mem_state[3][page] 43 | return mem_state[0][page_slot, offset:offset + size] 44 | 45 | return Compiler.compile(read) 46 | -------------------------------------------------------------------------------- /ffcv/memory_managers/process_cache/page_reader.py: -------------------------------------------------------------------------------- 1 | from threading import Thread 2 | from queue import Queue 3 | 4 | import numpy as np 5 | 6 | from ...libffcv import read 7 | 8 | 9 | class PageReader(Thread): 10 | 11 | def __init__(self, fname:str, queries: Queue, loaded: Queue, 12 | memory: np.ndarray): 13 | self.fname: str = fname 14 | self.queries: Queue = queries 15 | self.memory: np.ndarray = memory 16 | self.page_size = memory.shape[1] 17 | self.loaded: Queue = loaded 18 | super().__init__(daemon=True) 19 | 20 | def run(self): 21 | import hashlib 22 | with open(self.fname, 'rb') as handle: 23 | fileno = handle.fileno() 24 | 25 | while True: 26 | query = self.queries.get() 27 | # No more work 28 | if query is None: 29 | break 30 | 31 | page_number, slot = query 32 | offset = np.uint64(page_number * self.page_size) 33 | length = read(fileno, self.memory[slot], offset) 34 | # print("L", page_number, slot, hashlib.md5(self.memory[slot]).hexdigest(), self.memory[slot].ctypes.data, length) 35 | self.loaded.put(page_number) 36 | 37 | -------------------------------------------------------------------------------- /ffcv/memory_managers/process_cache/schedule.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from dataclasses import dataclass 3 | from typing import Mapping 4 | from queue import Queue 5 | 6 | import numpy as np 7 | 8 | from .page_reader import PageReader 9 | 10 | 11 | @dataclass 12 | class Schedule: 13 | # Number of slots needed 14 | num_slots: int 15 | # Which slot to use for each page 16 | page_to_slot: Mapping[int, int] 17 | # First iteration a page can be loaded 18 | can_prefetch_at: Mapping[int, int] 19 | # Iteration at which a page *has* to be loaded 20 | entering_at: Mapping[int, int] 21 | # Iteration at which we can discard a page 22 | leaving_at: Mapping[int, int] 23 | 24 | def compute_schedule(pages_in_batch, prefetch_ahead = 3): 25 | # We determine what is the early and latest times we will need a page 26 | page_end = {} 27 | page_start = {} 28 | for b_id, pages in enumerate(pages_in_batch): 29 | for page in pages: 30 | page_end[page] = b_id 31 | if page not in page_start: 32 | page_start[page] = b_id 33 | 34 | # We determine which pages are 35 | # - Can be preloaded 36 | # - Are needed 37 | # - Can be diposed of 38 | # At a given batch 39 | entering_at = defaultdict(set) 40 | can_prefetch_at = defaultdict(set) 41 | leaving_at = defaultdict(set) 42 | for page in page_start.keys(): 43 | prefetch_start = max(0, page_start[page] - prefetch_ahead) 44 | can_prefetch_at[prefetch_start].add(page) 45 | entering_at[page_start[page]].add(page) 46 | leaving_at[page_end[page] + 1].add(page) 47 | 48 | 49 | # We now find how many pages we need to keep in our buffer 50 | # We also determine where which page is going to reside 51 | next_slot = 0 52 | page_to_slot = {} 53 | free_slots = set() 54 | 55 | # For each batch 56 | for b_id in range(len(pages_in_batch)): 57 | # First we free the pages that are leaving 58 | for page in leaving_at[b_id]: 59 | free_slots.add(page_to_slot[page]) 60 | 61 | # We use the prefetch timing here because we want to be able 62 | # To start prefetching ahead of time and not overwrite a slot 63 | # That is currently used 64 | for page in can_prefetch_at[b_id]: 65 | # Then we find a slot for the incoming pages 66 | if free_slots: 67 | # There is a slot available for this page 68 | slot = free_slots.pop() 69 | else: 70 | # We have to allocate a new slot because we ran out 71 | slot = next_slot 72 | next_slot += 1 73 | 74 | page_to_slot[page] = slot 75 | 76 | return Schedule(next_slot, page_to_slot, 77 | can_prefetch_at, entering_at, leaving_at) 78 | 79 | class ScheduleExecutor(): 80 | 81 | def __init__(self, fname: str, schedule: Schedule, 82 | memory: np.ndarray, num_workers: int=12): 83 | self.fname = fname 84 | self.schedule = schedule 85 | self.memory = memory 86 | self.queries = Queue() 87 | self.loaded_queue = Queue() 88 | self.num_workers = num_workers 89 | self.entered = False 90 | self.next_batch = 0 91 | self.loaded = set() 92 | 93 | def __enter__(self): 94 | msg = "You can only enter a ScheduleExecutor once" 95 | if self.entered: 96 | raise Exception(msg) 97 | self.entered = True 98 | # Create the number of threads we were asked to 99 | threads = [] 100 | for _ in range(self.num_workers): 101 | thread = PageReader(self.fname, self.queries, 102 | self.loaded_queue, self.memory) 103 | thread.start() 104 | threads.append(thread) 105 | 106 | self.threads = threads 107 | 108 | def __exit__(self, *_): 109 | # Terminating the child threads 110 | for _ in range(self.num_workers): 111 | self.queries.put(None) 112 | 113 | def load_batch(self, current_batch): 114 | assert current_batch == self.next_batch 115 | 116 | # Start prefetching everything we are allowed to 117 | to_prefetch = self.schedule.can_prefetch_at[current_batch] 118 | for page_to_fetch in to_prefetch: 119 | q = (page_to_fetch, self.schedule.page_to_slot[page_to_fetch]) 120 | self.queries.put(q) 121 | 122 | # Wait until we have all the pages we need 123 | to_wait_for = self.schedule.entering_at[current_batch] 124 | for page in to_wait_for: 125 | while page not in self.loaded: 126 | next_loaded = self.loaded_queue.get() 127 | self.loaded.add(next_loaded) 128 | 129 | # We enforce that we read in order otherwise our 130 | # assumptions are broken 131 | self.next_batch = current_batch + 1 132 | -------------------------------------------------------------------------------- /ffcv/pipeline/__init__.py: -------------------------------------------------------------------------------- 1 | from .pipeline import Pipeline 2 | from .pipeline_spec import PipelineSpec 3 | from .compiler import Compiler 4 | 5 | __all__ = ['Pipeline', 'PipelineSpec', 'Compiler'] -------------------------------------------------------------------------------- /ffcv/pipeline/allocation_query.py: -------------------------------------------------------------------------------- 1 | from typing import Optional, Sequence, Tuple, Union 2 | from dataclasses import dataclass 3 | 4 | import numpy as np 5 | import torch as ch 6 | 7 | 8 | @dataclass(frozen=True) 9 | class AllocationQuery: 10 | shape: Tuple[int, ...] 11 | dtype: Union[np.dtype, ch.dtype] 12 | device: Optional[ch.device] = None 13 | 14 | 15 | Allocation = Union[AllocationQuery, Sequence[AllocationQuery]] 16 | 17 | def allocate_query(memory_allocation: AllocationQuery, batch_size: int, batches_ahead: int): 18 | # We compute the total amount of memory needed for this 19 | # operation 20 | final_shape = [batches_ahead, 21 | batch_size, *memory_allocation.shape] 22 | if isinstance(memory_allocation.dtype, ch.dtype): 23 | result = [] 24 | for _ in range(final_shape[0]): 25 | partial = ch.empty(*final_shape[1:], 26 | dtype=memory_allocation.dtype, 27 | device=memory_allocation.device) 28 | try: 29 | partial = partial.pin_memory() 30 | except: 31 | pass 32 | result.append(partial) 33 | else: 34 | ch_dtype = ch.from_numpy(np.empty(0, dtype=memory_allocation.dtype)).dtype 35 | result = ch.empty(*final_shape, 36 | dtype=ch_dtype) 37 | try: 38 | result = result.pin_memory() 39 | except: 40 | pass 41 | result = result.numpy() 42 | return result -------------------------------------------------------------------------------- /ffcv/pipeline/compiler.py: -------------------------------------------------------------------------------- 1 | import pdb 2 | from numba import njit, set_num_threads, prange, warnings as nwarnings, get_num_threads 3 | from numba.core.errors import NumbaPerformanceWarning 4 | from multiprocessing import cpu_count 5 | import torch as ch 6 | import warnings 7 | from os import sched_getaffinity 8 | 9 | class Compiler: 10 | 11 | @classmethod 12 | def set_enabled(cls, b): 13 | cls.is_enabled = b 14 | 15 | @classmethod 16 | def set_num_threads(cls, n): 17 | if n < 1 : 18 | n = len(sched_getaffinity(0)) 19 | cls.num_threads = n 20 | set_num_threads(n) 21 | ch.set_num_threads(n) 22 | 23 | @classmethod 24 | def compile(cls, code, signature=None): 25 | parallel = False 26 | if hasattr(code, 'is_parallel'): 27 | parallel = code.is_parallel and cls.num_threads > 1 28 | 29 | if cls.is_enabled: 30 | return njit(signature, fastmath=True, nogil=True, error_model='numpy', 31 | parallel=parallel)(code) 32 | return code 33 | 34 | @classmethod 35 | def get_iterator(cls): 36 | if cls.num_threads > 1: 37 | return prange 38 | else: 39 | return range 40 | 41 | Compiler.set_enabled(True) 42 | Compiler.set_num_threads(1) 43 | -------------------------------------------------------------------------------- /ffcv/pipeline/operation.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import TYPE_CHECKING 3 | from typing import Callable, Optional, Tuple 4 | 5 | import numpy as np 6 | 7 | from .state import State 8 | from .allocation_query import AllocationQuery 9 | 10 | if TYPE_CHECKING: 11 | from ..fields.base import Field 12 | 13 | 14 | class Operation(ABC): 15 | 16 | def __init__(self): 17 | self.metadata: np.ndarray = None 18 | self.memory_read: Callable[[np.uint64], np.ndarray] = None 19 | pass 20 | 21 | def accept_field(self, field: 'Field'): 22 | self.field: 'Field' = field 23 | 24 | def accept_globals(self, metadata, memory_read): 25 | self.metadata = metadata 26 | self.memory_read = memory_read 27 | 28 | # Return the code to run this operation 29 | @abstractmethod 30 | def generate_code(self) -> Callable: 31 | raise NotImplementedError 32 | 33 | def declare_shared_memory(self, previous_state: State) -> Optional[AllocationQuery]: 34 | return None 35 | 36 | def generate_code_for_shared_state(self) -> Optional[Callable]: 37 | return None 38 | 39 | @abstractmethod 40 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 41 | raise NotImplementedError 42 | -------------------------------------------------------------------------------- /ffcv/pipeline/pipeline.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Optional, Sequence, Mapping 2 | 3 | import torch as ch 4 | import numpy as np 5 | 6 | from .state import State 7 | from .operation import Operation 8 | from .allocation_query import Allocation, AllocationQuery 9 | 10 | BAD_COLLATION_MESSAGE: str = "Each pipeline needs one and one only Collate operation" 11 | 12 | 13 | class Pipeline: 14 | 15 | def __init__(self, operations: Sequence[Operation]): 16 | 17 | # This is the starting state of the pipeline 18 | self.original_state = State(jit_mode=True, 19 | device=ch.device('cpu'), 20 | dtype=np.dtype('u1'), 21 | shape=None) 22 | 23 | self.operations = operations 24 | 25 | self.operation_blocks, _ = self.parse_pipeline() 26 | self.compiled_ops = self.compile_ops() 27 | 28 | # Compile the pipeline 29 | self.compiled_code = None 30 | 31 | def parse_pipeline(self, batch_size=16): 32 | memory_allocations: Mapping[int, Optional[Allocation]] = {} 33 | operation_blocs = [] 34 | 35 | current_state: State = self.original_state 36 | current_block = [] 37 | 38 | # We read the content of the pipeline, validate and collect 39 | # Memory allocations 40 | for op_id, operation in enumerate(self.operations): 41 | previous_state = current_state 42 | current_state, memory_allocation = operation.declare_state_and_memory( 43 | current_state) 44 | 45 | if current_state.jit_mode != previous_state.jit_mode: 46 | if current_block: 47 | operation_blocs.append((previous_state.jit_mode, current_block)) 48 | current_block = [op_id] 49 | else: 50 | current_block.append(op_id) 51 | 52 | memory_allocations[op_id] = memory_allocation 53 | 54 | if current_block: 55 | operation_blocs.append((current_state.jit_mode, current_block)) 56 | 57 | return operation_blocs, memory_allocations 58 | 59 | def compile_ops(self): 60 | compiled_ops = {} 61 | for op_id, operation in enumerate(self.operations): 62 | compiled_ops[op_id] = operation.generate_code() 63 | return compiled_ops 64 | 65 | def allocate_query(self, memory_allocation: AllocationQuery, batch_size: int, batches_ahead: int): 66 | # We compute the total amount of memory needed for this 67 | # operation 68 | final_shape = [batches_ahead, 69 | batch_size, *memory_allocation.shape] 70 | if isinstance(memory_allocation.dtype, ch.dtype): 71 | result = [] 72 | for _ in range(final_shape[0]): 73 | partial = ch.empty(*final_shape[1:], 74 | dtype=memory_allocation.dtype, 75 | device=memory_allocation.device) 76 | try: 77 | partial = partial.pin_memory() 78 | except: 79 | pass 80 | result.append(partial) 81 | else: 82 | ch_dtype = ch.from_numpy(np.empty(0, dtype=memory_allocation.dtype)).dtype 83 | result = ch.empty(*final_shape, 84 | dtype=ch_dtype) 85 | try: 86 | result = result.pin_memory() 87 | except: 88 | pass 89 | result = result.numpy() 90 | return result 91 | 92 | 93 | def allocate_memory(self, batch_size: int, batches_ahead: int): 94 | _, memory_allocations = self.parse_pipeline() 95 | # Contains the actual allocated memory 96 | memory_buffers: Mapping[int, Any] = {} 97 | 98 | # For each allocation made by the operations in the pipeline 99 | for op_id, memory_allocation in memory_allocations.items(): 100 | # If the operation didn't make a query we stop here 101 | allocated_buffer = None 102 | if isinstance(memory_allocation, AllocationQuery): 103 | allocated_buffer = self.allocate_query(memory_allocation, 104 | batch_size, 105 | batches_ahead) 106 | elif isinstance(memory_allocation, Sequence): 107 | allocated_buffer = tuple( 108 | self.allocate_query(q, batch_size, batches_ahead) for q in memory_allocation 109 | ) 110 | 111 | memory_buffers[op_id] = allocated_buffer 112 | 113 | return memory_buffers 114 | -------------------------------------------------------------------------------- /ffcv/pipeline/pipeline_spec.py: -------------------------------------------------------------------------------- 1 | import torch as ch 2 | 3 | from typing import List, Union 4 | from .operation import Operation 5 | from ..transforms.module import ModuleWrapper 6 | from ..transforms import ToTensor 7 | 8 | class PipelineSpec: 9 | 10 | def __init__(self, source: Union[str, Operation], decoder: Operation = None, 11 | transforms:List[Operation] = None ): 12 | 13 | self.source = source 14 | self.decoder = decoder 15 | if transforms is None: 16 | transforms = [] 17 | self.transforms = transforms 18 | self.default_pipeline = (decoder is None 19 | and not transforms 20 | and isinstance(source, str)) 21 | 22 | def __repr__(self): 23 | return repr((self.source, self.decoder, self.transforms)) 24 | 25 | def __str__(self): 26 | return self.__repr__() 27 | 28 | def accept_decoder(self, Decoder, output_name): 29 | if not isinstance(self.source, str) and self.decoder is not None: 30 | raise ValueError("Source can't be a node and also have a decoder") 31 | 32 | if Decoder is not None: 33 | # The first element of the operations is a decoder 34 | if self.transforms and isinstance(self.transforms[0], Decoder): 35 | self.decoder = self.transforms.pop(0) 36 | 37 | elif self.decoder is None: 38 | try: 39 | self.decoder = Decoder() 40 | except Exception: 41 | msg = f"Impossible to use default decoder for {output_name}," 42 | msg += "make sure you specify one in your pipeline." 43 | raise ValueError(msg) 44 | 45 | if self.default_pipeline: 46 | self.transforms.append(ToTensor()) 47 | 48 | for i, op in enumerate(self.transforms): 49 | if isinstance(op, ch.nn.Module): 50 | self.transforms[i] = ModuleWrapper(op) 51 | -------------------------------------------------------------------------------- /ffcv/pipeline/state.py: -------------------------------------------------------------------------------- 1 | 2 | from dataclasses import dataclass 3 | from typing import Literal, Tuple 4 | 5 | import torch as ch 6 | import numpy as np 7 | 8 | @dataclass 9 | class State: 10 | jit_mode: bool 11 | device: ch.device 12 | shape: Tuple[int, ...] 13 | dtype: np.dtype 14 | 15 | # Assess the validity of a pipeline stage 16 | def __post_init__(self): 17 | if self.jit_mode and self.device != ch.device('cpu'): 18 | raise AssertionError("Can't be in JIT mode and on the GPU") 19 | if self.jit_mode and isinstance(self.dtype, ch.dtype): 20 | raise AssertionError("Can't allocate a torch tensor in JIT mode") -------------------------------------------------------------------------------- /ffcv/reader.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | from .utils import decode_null_terminated_string 4 | from .types import (ALLOC_TABLE_TYPE, HeaderType, CURRENT_VERSION, 5 | FieldDescType, get_handlers, get_metadata_type) 6 | 7 | class Reader: 8 | 9 | def __init__(self, fname, custom_handlers={}): 10 | self._fname = fname 11 | self._custom_handlers = custom_handlers 12 | self.read_header() 13 | self.read_field_descriptors() 14 | self.read_metadata() 15 | self.read_allocation_table() 16 | 17 | @property 18 | def file_name(self): 19 | return self._fname 20 | 21 | def read_header(self): 22 | header = np.fromfile(self._fname, dtype=HeaderType, count=1)[0] 23 | header.setflags(write=False) 24 | version = header['version'] 25 | 26 | if version != CURRENT_VERSION: 27 | msg = f"file format mismatch: code={CURRENT_VERSION},file={version}" 28 | raise AssertionError(msg) 29 | 30 | self.num_samples = header['num_samples'] 31 | self.page_size = header['page_size'] 32 | self.num_fields = header['num_fields'] 33 | self.header = header 34 | 35 | def read_field_descriptors(self): 36 | offset = HeaderType.itemsize 37 | field_descriptors = np.fromfile(self._fname, dtype=FieldDescType, 38 | count=self.num_fields, offset=offset) 39 | field_descriptors.setflags(write=False) 40 | handlers = get_handlers(field_descriptors) 41 | 42 | self.field_descriptors = field_descriptors 43 | self.field_names = list(map(decode_null_terminated_string, 44 | self.field_descriptors['name'])) 45 | self.handlers = dict(zip(self.field_names, handlers)) 46 | 47 | for field_name, field_desc in zip(self.field_names, self.field_descriptors): 48 | if field_name in self._custom_handlers: 49 | CustomHandler = self._custom_handlers[field_name] 50 | self.handlers[field_name] = CustomHandler.from_binary(field_desc['arguments']) 51 | 52 | for field_name, handler in self.handlers.items(): 53 | if handler is None: 54 | raise ValueError(f"Must specify a custom_field entry " \ 55 | f"for custom field {field_name}") 56 | 57 | self.metadata_type = get_metadata_type(list(self.handlers.values())) 58 | 59 | def read_metadata(self): 60 | offset = HeaderType.itemsize + self.field_descriptors.nbytes 61 | self.metadata = np.fromfile(self._fname, dtype=self.metadata_type, 62 | count=self.num_samples, offset=offset) 63 | self.metadata.setflags(write=False) 64 | 65 | def read_allocation_table(self): 66 | offset = self.header['alloc_table_ptr'] 67 | alloc_table = np.fromfile(self._fname, dtype=ALLOC_TABLE_TYPE, 68 | offset=offset) 69 | alloc_table.setflags(write=False) 70 | self.alloc_table = alloc_table 71 | 72 | 73 | -------------------------------------------------------------------------------- /ffcv/transforms/__init__.py: -------------------------------------------------------------------------------- 1 | from .cutout import Cutout 2 | from .flip import RandomHorizontalFlip 3 | from .ops import ToTensor, ToDevice, ToTorchImage, Convert, View 4 | from .common import Squeeze 5 | from .random_resized_crop import RandomResizedCrop 6 | from .poisoning import Poison 7 | from .replace_label import ReplaceLabel 8 | from .normalize import NormalizeImage 9 | from .translate import RandomTranslate 10 | from .mixup import ImageMixup, LabelMixup, MixupToOneHot 11 | from .module import ModuleWrapper 12 | from .color_jitter import RandomBrightness, RandomContrast, RandomSaturation 13 | 14 | __all__ = ['ToTensor', 'ToDevice', 15 | 'ToTorchImage', 'NormalizeImage', 16 | 'Convert', 'Squeeze', 'View', 17 | 'RandomResizedCrop', 'RandomHorizontalFlip', 'RandomTranslate', 18 | 'Cutout', 'ImageMixup', 'LabelMixup', 'MixupToOneHot', 19 | 'Poison', 'ReplaceLabel', 20 | 'ModuleWrapper', 21 | 'RandomBrightness', 'RandomContrast', 'RandomSaturation'] 22 | -------------------------------------------------------------------------------- /ffcv/transforms/color_jitter.py: -------------------------------------------------------------------------------- 1 | ''' 2 | Random color operations similar to torchvision.transforms.ColorJitter except not supporting hue 3 | Reference : https://github.com/pytorch/vision/blob/main/torchvision/transforms/functional_tensor.py 4 | ''' 5 | 6 | import numpy as np 7 | 8 | from dataclasses import replace 9 | from ..pipeline.allocation_query import AllocationQuery 10 | from ..pipeline.operation import Operation 11 | from ..pipeline.state import State 12 | from ..pipeline.compiler import Compiler 13 | 14 | 15 | 16 | class RandomBrightness(Operation): 17 | ''' 18 | Randomly adjust image brightness. Operates on raw arrays (not tensors). 19 | 20 | Parameters 21 | ---------- 22 | magnitude : float 23 | randomly choose brightness enhancement factor on [max(0, 1-magnitude), 1+magnitude] 24 | p : float 25 | probability to apply brightness 26 | ''' 27 | def __init__(self, magnitude: float, p=0.5): 28 | super().__init__() 29 | self.p = p 30 | self.magnitude = magnitude 31 | 32 | def generate_code(self): 33 | my_range = Compiler.get_iterator() 34 | p = self.p 35 | magnitude = self.magnitude 36 | 37 | def brightness(images, *_): 38 | def blend(img1, img2, ratio): return (ratio*img1 + (1-ratio)*img2).clip(0, 255).astype(img1.dtype) 39 | 40 | apply_bright = np.random.rand(images.shape[0]) < p 41 | magnitudes = np.random.uniform(max(0, 1-magnitude), 1+magnitude, images.shape[0]) 42 | for i in my_range(images.shape[0]): 43 | if apply_bright[i]: 44 | images[i] = blend(images[i], 0, magnitudes[i]) 45 | 46 | return images 47 | 48 | brightness.is_parallel = True 49 | return brightness 50 | 51 | def declare_state_and_memory(self, previous_state): 52 | return (replace(previous_state, jit_mode=True), AllocationQuery(previous_state.shape, previous_state.dtype)) 53 | 54 | 55 | 56 | class RandomContrast(Operation): 57 | ''' 58 | Randomly adjust image contrast. Operates on raw arrays (not tensors). 59 | 60 | Parameters 61 | ---------- 62 | magnitude : float 63 | randomly choose contrast enhancement factor on [max(0, 1-magnitude), 1+magnitude] 64 | p : float 65 | probability to apply contrast 66 | ''' 67 | def __init__(self, magnitude, p=0.5): 68 | super().__init__() 69 | self.p = p 70 | self.magnitude = magnitude 71 | 72 | def generate_code(self): 73 | my_range = Compiler.get_iterator() 74 | p = self.p 75 | magnitude = self.magnitude 76 | 77 | def contrast(images, *_): 78 | def blend(img1, img2, ratio): return (ratio*img1 + (1-ratio)*img2).clip(0, 255).astype(img1.dtype) 79 | 80 | apply_contrast = np.random.rand(images.shape[0]) < p 81 | magnitudes = np.random.uniform(max(0, 1-magnitude), 1+magnitude, images.shape[0]) 82 | for i in my_range(images.shape[0]): 83 | if apply_contrast[i]: 84 | r, g, b = images[i,:,:,0], images[i,:,:,1], images[i,:,:,2] 85 | l_img = (0.2989 * r + 0.587 * g + 0.114 * b).astype(images[i].dtype) 86 | images[i] = blend(images[i], l_img.mean(), magnitudes[i]) 87 | 88 | return images 89 | 90 | contrast.is_parallel = True 91 | return contrast 92 | 93 | def declare_state_and_memory(self, previous_state): 94 | return (replace(previous_state, jit_mode=True), AllocationQuery(previous_state.shape, previous_state.dtype)) 95 | 96 | 97 | 98 | class RandomSaturation(Operation): 99 | ''' 100 | Randomly adjust image color balance. Operates on raw arrays (not tensors). 101 | 102 | Parameters 103 | ---------- 104 | magnitude : float 105 | randomly choose color balance enhancement factor on [max(0, 1-magnitude), 1+magnitude] 106 | p : float 107 | probability to apply saturation 108 | ''' 109 | def __init__(self, magnitude, p=0.5): 110 | super().__init__() 111 | self.p = p 112 | self.magnitude = magnitude 113 | 114 | def generate_code(self): 115 | my_range = Compiler.get_iterator() 116 | p = self.p 117 | magnitude = self.magnitude 118 | 119 | def saturation(images, *_): 120 | def blend(img1, img2, ratio): return (ratio*img1 + (1-ratio)*img2).clip(0, 255).astype(img1.dtype) 121 | 122 | apply_saturation = np.random.rand(images.shape[0]) < p 123 | magnitudes = np.random.uniform(max(0, 1-magnitude), 1+magnitude, images.shape[0]) 124 | for i in my_range(images.shape[0]): 125 | if apply_saturation[i]: 126 | r, g, b = images[i,:,:,0], images[i,:,:,1], images[i,:,:,2] 127 | l_img = (0.2989 * r + 0.587 * g + 0.114 * b).astype(images[i].dtype) 128 | l_img3 = np.zeros_like(images[i]) 129 | for j in my_range(images[i].shape[-1]): 130 | l_img3[:,:,j] = l_img 131 | images[i] = blend(images[i], l_img3, magnitudes[i]) 132 | 133 | return images 134 | 135 | saturation.is_parallel = True 136 | return saturation 137 | 138 | def declare_state_and_memory(self, previous_state): 139 | return (replace(previous_state, jit_mode=True), AllocationQuery(previous_state.shape, previous_state.dtype)) 140 | -------------------------------------------------------------------------------- /ffcv/transforms/common.py: -------------------------------------------------------------------------------- 1 | from typing import Callable, Optional, Tuple 2 | from ..pipeline.allocation_query import AllocationQuery 3 | from ..pipeline.operation import Operation 4 | from ..pipeline.state import State 5 | from dataclasses import replace 6 | 7 | class Squeeze(Operation): 8 | """Remove given dimensions of input of size 1. 9 | Operates on tensors. 10 | 11 | Parameters 12 | ---------- 13 | *dims : List[int] 14 | Dimensions to squeeze. 15 | """ 16 | 17 | def __init__(self, *dims): 18 | super().__init__() 19 | self.dims = dims 20 | 21 | def generate_code(self) -> Callable: 22 | def squeeze(inp, _): 23 | inp.squeeze_(*self.dims) 24 | return inp 25 | return squeeze 26 | 27 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 28 | return replace(previous_state, shape=[x for x in previous_state.shape if not x == 1]), None 29 | -------------------------------------------------------------------------------- /ffcv/transforms/cutout.py: -------------------------------------------------------------------------------- 1 | """ 2 | Cutout augmentation (https://arxiv.org/abs/1708.04552) 3 | """ 4 | import numpy as np 5 | from typing import Callable, Optional, Tuple 6 | from dataclasses import replace 7 | 8 | from ffcv.pipeline.compiler import Compiler 9 | from ..pipeline.allocation_query import AllocationQuery 10 | from ..pipeline.operation import Operation 11 | from ..pipeline.state import State 12 | 13 | class Cutout(Operation): 14 | """Cutout data augmentation (https://arxiv.org/abs/1708.04552). 15 | 16 | Parameters 17 | ---------- 18 | crop_size : int 19 | Size of the random square to cut out. 20 | fill : Tuple[int, int, int], optional 21 | An RGB color ((0, 0, 0) by default) to fill the cutout square with. 22 | Useful for when a normalization layer follows cutout, in which case 23 | you can set the fill such that the square is zero 24 | post-normalization. 25 | """ 26 | def __init__(self, crop_size: int, fill: Tuple[int, int, int] = (0, 0, 0)): 27 | super().__init__() 28 | self.crop_size = crop_size 29 | self.fill = np.array(fill) 30 | 31 | def generate_code(self) -> Callable: 32 | my_range = Compiler.get_iterator() 33 | crop_size = self.crop_size 34 | fill = self.fill 35 | 36 | def cutout_square(images, *_): 37 | for i in my_range(images.shape[0]): 38 | # Generate random origin 39 | coord = ( 40 | np.random.randint(images.shape[1] - crop_size + 1), 41 | np.random.randint(images.shape[2] - crop_size + 1), 42 | ) 43 | # Black out image in-place 44 | images[i, coord[0]:coord[0] + crop_size, coord[1]:coord[1] + crop_size] = fill 45 | 46 | return images 47 | cutout_square.is_parallel = True 48 | 49 | return cutout_square 50 | 51 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 52 | return replace(previous_state, jit_mode=True), None 53 | -------------------------------------------------------------------------------- /ffcv/transforms/flip.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random horizontal flip 3 | """ 4 | from dataclasses import replace 5 | from numpy.random import rand 6 | from typing import Callable, Optional, Tuple 7 | from ..pipeline.allocation_query import AllocationQuery 8 | from ..pipeline.operation import Operation 9 | from ..pipeline.state import State 10 | from ..pipeline.compiler import Compiler 11 | 12 | class RandomHorizontalFlip(Operation): 13 | """Flip the image horizontally with probability flip_prob. 14 | Operates on raw arrays (not tensors). 15 | 16 | Parameters 17 | ---------- 18 | flip_prob : float 19 | The probability with which to flip each image in the batch 20 | horizontally. 21 | """ 22 | 23 | def __init__(self, flip_prob: float = 0.5): 24 | super().__init__() 25 | self.flip_prob = flip_prob 26 | 27 | def generate_code(self) -> Callable: 28 | my_range = Compiler.get_iterator() 29 | flip_prob = self.flip_prob 30 | 31 | def flip(images, dst): 32 | should_flip = rand(images.shape[0]) < flip_prob 33 | for i in my_range(images.shape[0]): 34 | if should_flip[i]: 35 | dst[i] = images[i, :, ::-1] 36 | else: 37 | dst[i] = images[i] 38 | 39 | return dst 40 | 41 | flip.is_parallel = True 42 | return flip 43 | 44 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 45 | return (replace(previous_state, jit_mode=True), 46 | AllocationQuery(previous_state.shape, previous_state.dtype)) 47 | -------------------------------------------------------------------------------- /ffcv/transforms/mixup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Mixup augmentation for images and labels (https://arxiv.org/abs/1710.09412) 3 | """ 4 | from typing import Tuple 5 | 6 | from numba import objmode 7 | import numpy as np 8 | import torch as ch 9 | import torch.nn.functional as F 10 | from dataclasses import replace 11 | from typing import Callable, Optional, Tuple 12 | from ..pipeline.allocation_query import AllocationQuery 13 | from ..pipeline.operation import Operation 14 | from ..pipeline.state import State 15 | from ..pipeline.compiler import Compiler 16 | 17 | class ImageMixup(Operation): 18 | """Mixup for images. Operates on raw arrays (not tensors). 19 | 20 | Parameters 21 | ---------- 22 | alpha : float 23 | Mixup parameter alpha 24 | same_lambda : bool 25 | Whether to use the same value of lambda across the whole batch, or an 26 | individually sampled lambda per image in the batch 27 | """ 28 | 29 | def __init__(self, alpha: float, same_lambda: bool): 30 | super().__init__() 31 | self.alpha = alpha 32 | self.same_lambda = same_lambda 33 | 34 | def generate_code(self) -> Callable: 35 | alpha = self.alpha 36 | same_lam = self.same_lambda 37 | my_range = Compiler.get_iterator() 38 | 39 | def mixer(images, dst, indices): 40 | np.random.seed(indices[-1]) 41 | num_images = images.shape[0] 42 | lam = np.random.beta(alpha, alpha) if same_lam else \ 43 | np.random.beta(alpha, alpha, num_images) 44 | for ix in my_range(num_images): 45 | l = lam if same_lam else lam[ix] 46 | dst[ix] = l * images[ix] + (1 - l) * images[ix - 1] 47 | 48 | return dst 49 | 50 | mixer.is_parallel = True 51 | mixer.with_indices = True 52 | 53 | return mixer 54 | 55 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 56 | return (previous_state, AllocationQuery(shape=previous_state.shape, 57 | dtype=previous_state.dtype)) 58 | 59 | class LabelMixup(Operation): 60 | """Mixup for labels. Should be initialized in exactly the same way as 61 | :cla:`ffcv.transforms.ImageMixup`. 62 | """ 63 | def __init__(self, alpha: float, same_lambda: bool): 64 | super().__init__() 65 | self.alpha = alpha 66 | self.same_lambda = same_lambda 67 | 68 | def generate_code(self) -> Callable: 69 | alpha = self.alpha 70 | same_lam = self.same_lambda 71 | my_range = Compiler.get_iterator() 72 | 73 | def mixer(labels, temp_array, indices): 74 | num_labels = labels.shape[0] 75 | # permutation = np.random.permutation(num_labels) 76 | np.random.seed(indices[-1]) 77 | lam = np.random.beta(alpha, alpha) if same_lam else \ 78 | np.random.beta(alpha, alpha, num_labels) 79 | 80 | for ix in my_range(num_labels): 81 | temp_array[ix, 0] = labels[ix][0] 82 | temp_array[ix, 1] = labels[ix - 1][0] 83 | temp_array[ix, 2] = lam if same_lam else lam[ix] 84 | 85 | return temp_array 86 | 87 | mixer.is_parallel = True 88 | mixer.with_indices = True 89 | 90 | return mixer 91 | 92 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 93 | return (replace(previous_state, shape=(3,), dtype=np.float32), 94 | AllocationQuery((3,), dtype=np.float32)) 95 | 96 | class MixupToOneHot(Operation): 97 | def __init__(self, num_classes: int): 98 | super().__init__() 99 | self.num_classes = num_classes 100 | 101 | def generate_code(self) -> Callable: 102 | def one_hotter(mixedup_labels, dst): 103 | dst.zero_() 104 | N = mixedup_labels.shape[0] 105 | dst[ch.arange(N), mixedup_labels[:, 0].long()] = mixedup_labels[:, 2] 106 | mixedup_labels[:, 2] *= -1 107 | mixedup_labels[:, 2] += 1 108 | dst[ch.arange(N), mixedup_labels[:, 1].long()] = mixedup_labels[:, 2] 109 | return dst 110 | 111 | return one_hotter 112 | 113 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 114 | # Should already be converted to tensor 115 | assert not previous_state.jit_mode 116 | return (replace(previous_state, shape=(self.num_classes,)), \ 117 | AllocationQuery((self.num_classes,), dtype=previous_state.dtype, device=previous_state.device)) -------------------------------------------------------------------------------- /ffcv/transforms/module.py: -------------------------------------------------------------------------------- 1 | """ 2 | Wrapper for a torch.nn.Module 3 | """ 4 | import torch as ch 5 | from numpy.random import permutation, rand 6 | from typing import Callable, Optional, Tuple 7 | from ..pipeline.allocation_query import AllocationQuery 8 | from ..pipeline.operation import Operation 9 | from ..pipeline.state import State 10 | 11 | class ModuleWrapper(Operation): 12 | """Transform using the given torch.nn.Module 13 | 14 | Parameters 15 | ---------- 16 | module: torch.nn.Module 17 | The module for transformation 18 | """ 19 | def __init__(self, module: ch.nn.Module): 20 | super().__init__() 21 | self.module = module 22 | 23 | def generate_code(self) -> Callable: 24 | def apply_module(inp, _): 25 | res = self.module(inp) 26 | return res 27 | 28 | return apply_module 29 | 30 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 31 | return previous_state, None 32 | -------------------------------------------------------------------------------- /ffcv/transforms/poisoning.py: -------------------------------------------------------------------------------- 1 | """ 2 | Poison images by adding a mask 3 | """ 4 | from typing import Tuple 5 | from dataclasses import replace 6 | 7 | import numpy as np 8 | from typing import Callable, Optional, Tuple 9 | from ..pipeline.allocation_query import AllocationQuery 10 | from ..pipeline.operation import Operation 11 | from ..pipeline.state import State 12 | from ..pipeline.compiler import Compiler 13 | 14 | class Poison(Operation): 15 | """Poison specified images by adding a mask with given opacity. 16 | Operates on raw arrays (not tensors). 17 | 18 | Parameters 19 | ---------- 20 | mask : ndarray 21 | The mask to apply to each image. 22 | alpha: float 23 | The opacity of the mask. 24 | indices : Sequence[int] 25 | The indices of images that should have the mask applied. 26 | clamp : Tuple[int, int] 27 | Clamps the final pixel values between these two values (default: (0, 255)). 28 | """ 29 | 30 | def __init__(self, mask: np.ndarray, alpha: np.ndarray, 31 | indices, clamp = (0, 255)): 32 | super().__init__() 33 | self.mask = mask 34 | self.indices = np.sort(indices) 35 | self.clamp = clamp 36 | self.alpha = alpha 37 | 38 | def generate_code(self) -> Callable: 39 | 40 | alpha = np.repeat(self.alpha[:, :, None], 3, axis=2) 41 | mask = self.mask.astype('float') * alpha 42 | to_poison = self.indices 43 | clamp = self.clamp 44 | my_range = Compiler.get_iterator() 45 | 46 | def poison(images, temp_array, indices): 47 | for i in my_range(images.shape[0]): 48 | sample_ix = indices[i] 49 | # We check if the index is in the list of indices 50 | # to poison 51 | position = np.searchsorted(to_poison, sample_ix) 52 | if position < len(to_poison) and to_poison[position] == sample_ix: 53 | temp = temp_array[i] 54 | temp[:] = images[i] 55 | temp *= 1 - alpha 56 | temp += mask 57 | np.clip(temp, clamp[0], clamp[1], out=temp) 58 | images[i] = temp 59 | return images 60 | 61 | poison.is_parallel = True 62 | poison.with_indices = True 63 | 64 | return poison 65 | 66 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 67 | # We do everything in place 68 | return (replace(previous_state, jit_mode=True), \ 69 | AllocationQuery(shape=previous_state.shape, dtype=np.dtype('float32'))) 70 | -------------------------------------------------------------------------------- /ffcv/transforms/random_resized_crop.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random resized crop, similar to torchvision.transforms.RandomResizedCrop 3 | """ 4 | from dataclasses import replace 5 | from .utils import fast_crop 6 | import numpy as np 7 | from typing import Callable, Optional, Tuple 8 | from ..pipeline.allocation_query import AllocationQuery 9 | from ..pipeline.operation import Operation 10 | from ..pipeline.state import State 11 | from ..pipeline.compiler import Compiler 12 | 13 | class RandomResizedCrop(Operation): 14 | """Crop a random portion of image with random aspect ratio and resize it to 15 | a given size. Chances are you do not want to use this augmentation and 16 | instead want to include RRC as part of the decoder, by using the 17 | :cla:`~ffcv.fields.rgb_image.ResizedCropRGBImageDecoder` class. 18 | 19 | Parameters 20 | ---------- 21 | scale : Tuple[float, float] 22 | Lower and upper bounds for the ratio of random area of the crop. 23 | ratio : Tuple[float, float] 24 | Lower and upper bounds for random aspect ratio of the crop. 25 | size : int 26 | Side length of the output. 27 | """ 28 | def __init__(self, scale: Tuple[float, float], ratio: Tuple[float, float], size: int): 29 | super().__init__() 30 | self.scale = scale 31 | self.ratio = ratio 32 | self.size = size 33 | 34 | 35 | def generate_code(self) -> Callable: 36 | scale, ratio = self.scale, self.ratio 37 | if isinstance(scale, tuple): 38 | scale = np.array(scale) 39 | if isinstance(ratio, tuple): 40 | ratio = np.array(ratio) 41 | my_range = Compiler.get_iterator() 42 | def random_resized_crop(images, dst): 43 | for idx in my_range(images.shape[0]): 44 | i, j, h, w = fast_crop.get_random_crop(images[idx].shape[0], 45 | images[idx].shape[1], 46 | scale, 47 | ratio) 48 | fast_crop.resize_crop(images[idx], i, i + h, j, j + w, dst[idx]) 49 | return dst 50 | 51 | random_resized_crop.is_parallel = True 52 | return random_resized_crop 53 | 54 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 55 | return replace(previous_state, jit_mode=True, shape=(self.size, self.size, 3)), \ 56 | AllocationQuery((self.size, self.size, 3), dtype=previous_state.dtype) 57 | 58 | 59 | -------------------------------------------------------------------------------- /ffcv/transforms/replace_label.py: -------------------------------------------------------------------------------- 1 | """ 2 | Replace label 3 | """ 4 | from typing import Tuple 5 | 6 | import numpy as np 7 | from dataclasses import replace 8 | from typing import Callable, Optional, Tuple 9 | from ..pipeline.allocation_query import AllocationQuery 10 | from ..pipeline.operation import Operation 11 | from ..pipeline.state import State 12 | from ..pipeline.compiler import Compiler 13 | 14 | class ReplaceLabel(Operation): 15 | """Replace label of specified images. 16 | 17 | Parameters 18 | ---------- 19 | indices : Sequence[int] 20 | The indices of images to relabel. 21 | new_label : int 22 | The new label to assign. 23 | """ 24 | 25 | def __init__(self, indices, new_label: int): 26 | super().__init__() 27 | self.indices = np.sort(indices) 28 | self.new_label = new_label 29 | 30 | def generate_code(self) -> Callable: 31 | 32 | to_change = self.indices 33 | new_label = self.new_label 34 | my_range = Compiler.get_iterator() 35 | 36 | def replace_label(labels, temp_array, indices): 37 | for i in my_range(labels.shape[0]): 38 | sample_ix = indices[i] 39 | position = np.searchsorted(to_change, sample_ix) 40 | if position < len(to_change) and to_change[position] == sample_ix: 41 | labels[i] = new_label 42 | return labels 43 | 44 | replace_label.is_parallel = True 45 | replace_label.with_indices = True 46 | 47 | return replace_label 48 | 49 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 50 | return (replace(previous_state, jit_mode=True), None) 51 | -------------------------------------------------------------------------------- /ffcv/transforms/translate.py: -------------------------------------------------------------------------------- 1 | """ 2 | Random translate 3 | """ 4 | import numpy as np 5 | from numpy.random import randint 6 | from typing import Callable, Optional, Tuple 7 | from dataclasses import replace 8 | from ..pipeline.allocation_query import AllocationQuery 9 | from ..pipeline.operation import Operation 10 | from ..pipeline.state import State 11 | from ..pipeline.compiler import Compiler 12 | 13 | class RandomTranslate(Operation): 14 | """Translate each image randomly in vertical and horizontal directions 15 | up to specified number of pixels. 16 | 17 | Parameters 18 | ---------- 19 | padding : int 20 | Max number of pixels to translate in any direction. 21 | fill : tuple 22 | An RGB color ((0, 0, 0) by default) to fill the area outside the shifted image. 23 | """ 24 | 25 | def __init__(self, padding: int, fill: Tuple[int, int, int] = (0, 0, 0)): 26 | super().__init__() 27 | self.padding = padding 28 | self.fill = np.array(fill) 29 | 30 | def generate_code(self) -> Callable: 31 | my_range = Compiler.get_iterator() 32 | pad = self.padding 33 | fill = self.fill 34 | 35 | def translate(images, dst): 36 | n, h, w, _ = images.shape 37 | dst[:] = fill 38 | dst[:, pad:pad+h, pad:pad+w] = images 39 | for i in my_range(n): 40 | y_coord = randint(low=0, high=2 * pad + 1) 41 | x_coord = randint(low=0, high=2 * pad + 1) 42 | images[i] = dst[i, y_coord:y_coord+h, x_coord:x_coord+w] 43 | 44 | return images 45 | 46 | translate.is_parallel = True 47 | return translate 48 | 49 | def declare_state_and_memory(self, previous_state: State) -> Tuple[State, Optional[AllocationQuery]]: 50 | h, w, c = previous_state.shape 51 | return (replace(previous_state, jit_mode=True), \ 52 | AllocationQuery((h + 2 * self.padding, w + 2 * self.padding, c), previous_state.dtype)) 53 | 54 | -------------------------------------------------------------------------------- /ffcv/transforms/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/ffcv/transforms/utils/__init__.py -------------------------------------------------------------------------------- /ffcv/transforms/utils/fast_crop.py: -------------------------------------------------------------------------------- 1 | import ctypes 2 | from numba import njit 3 | import numpy as np 4 | from ...libffcv import ctypes_resize 5 | 6 | @njit(inline='always') 7 | def resize_crop(source, start_row, end_row, start_col, end_col, destination): 8 | ctypes_resize(0, 9 | source.ctypes.data, 10 | source.shape[0], source.shape[1], 11 | start_row, end_row, start_col, end_col, 12 | destination.ctypes.data, 13 | destination.shape[0], destination.shape[1]) 14 | 15 | 16 | @njit(parallel=False, fastmath=True, inline='always') 17 | def get_random_crop(height, width, scale, ratio): 18 | area = height * width 19 | log_ratio = np.log(ratio) 20 | for _ in range(10): 21 | target_area = area * np.random.uniform(scale[0], scale[1]) 22 | aspect_ratio = np.exp(np.random.uniform(log_ratio[0], log_ratio[1])) 23 | w = int(round(np.sqrt(target_area * aspect_ratio))) 24 | h = int(round(np.sqrt(target_area / aspect_ratio))) 25 | if 0 < w <= width and 0 < h <= height: 26 | i = int(np.random.uniform(0, height - h + 1)) 27 | j = int(np.random.uniform(0, width - w + 1)) 28 | return i, j, h, w 29 | in_ratio = float(width) / float(height) 30 | if in_ratio < min(ratio): 31 | w = width 32 | h = int(round(w / min(ratio))) 33 | elif in_ratio > max(ratio): 34 | h = height 35 | w = int(round(h * max(ratio))) 36 | else: 37 | w = width 38 | h = height 39 | i = (height - h) // 2 40 | j = (width - w) // 2 41 | return i, j, h, w 42 | 43 | 44 | @njit(parallel=False, fastmath=True, inline='always') 45 | def get_center_crop(height, width, ratio): 46 | s = min(height, width) 47 | c = int(ratio * s) 48 | delta_h = (height - c) // 2 49 | delta_w = (width - c) // 2 50 | 51 | return delta_h, delta_w, c, c 52 | -------------------------------------------------------------------------------- /ffcv/traversal_order/__init__.py: -------------------------------------------------------------------------------- 1 | from .sequential import Sequential 2 | from .random import Random 3 | from .quasi_random import QuasiRandom 4 | 5 | __all__ = ['Sequential', 'Random', "QuasiRandom"] -------------------------------------------------------------------------------- /ffcv/traversal_order/base.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import Sequence 3 | from ..reader import Reader 4 | 5 | from typing import TYPE_CHECKING 6 | 7 | if TYPE_CHECKING: 8 | from ..loader.main_thread import Loader 9 | 10 | class TraversalOrder(ABC): 11 | def __init__(self, loader: 'Loader'): 12 | self.loader = loader 13 | self.indices = self.loader.indices 14 | self.seed = self.loader.seed 15 | self.distributed = loader.distributed 16 | self.sampler = None 17 | 18 | @abstractmethod 19 | def sample_order(self, epoch:int) -> Sequence[int]: 20 | raise NotImplemented() 21 | -------------------------------------------------------------------------------- /ffcv/traversal_order/quasi_random.py: -------------------------------------------------------------------------------- 1 | import random 2 | from typing import Sequence, TYPE_CHECKING 3 | from numba import njit 4 | import numpy as np 5 | 6 | from torch.utils.data import DistributedSampler 7 | 8 | from .base import TraversalOrder 9 | 10 | if TYPE_CHECKING: 11 | from ..loader.loader import Loader 12 | 13 | 14 | @njit(parallel=False) 15 | def generate_order_inner(seed, page_to_samples_array, page_sizes, 16 | result, buffer_size=6): 17 | num_pages = len(page_sizes) 18 | random.seed(seed) 19 | np.random.seed(seed) 20 | current_pages = [0] 21 | current_pages.remove(0) # Force the type 22 | for page_ix in range(num_pages): 23 | page_size = page_sizes[page_ix] 24 | random.shuffle(page_to_samples_array[page_ix, :page_size]) 25 | next_page = 0 26 | page_order = np.random.permutation(num_pages) 27 | samples_consumed = np.zeros_like(page_sizes) 28 | for s_ix in range(result.shape[0]): 29 | while next_page < num_pages and len(current_pages) < buffer_size: 30 | page_to_add = page_order[next_page] 31 | if page_sizes[page_to_add] > 0: 32 | current_pages.append(page_order[next_page]) 33 | next_page += 1 34 | selected_page_ix = np.random.randint(0, len(current_pages)) 35 | page = current_pages[selected_page_ix] 36 | result[s_ix] = page_to_samples_array[page, samples_consumed[page]] 37 | samples_consumed[page] += 1 38 | if samples_consumed[page] >= page_sizes[page]: 39 | current_pages.remove(page) 40 | 41 | 42 | class QuasiRandom(TraversalOrder): 43 | 44 | def __init__(self, loader: 'Loader'): 45 | super().__init__(loader) 46 | 47 | # TODO filter only the samples we care about!! 48 | self.page_to_samples = loader.memory_manager.page_to_samples 49 | 50 | if not self.page_to_samples: 51 | raise ValueError( 52 | "Dataset won't benefit from QuasiRandom order, use regular Random") 53 | 54 | if self.distributed: 55 | raise NotImplementedError( 56 | "distributed Not implemented yet for QuasiRandom") 57 | 58 | self.prepare_data_structures() 59 | 60 | 61 | def prepare_data_structures(self): 62 | index_set = set(self.indices) 63 | max_size = max(len(y) for y in self.page_to_samples.values()) 64 | num_pages = max(k for k in self.page_to_samples.keys()) + np.uint64(1) 65 | 66 | self.page_to_samples_array = np.empty((num_pages, max_size), 67 | dtype=np.int64) 68 | self.page_sizes = np.zeros(num_pages, dtype=np.int64) 69 | 70 | for page, content in self.page_to_samples.items(): 71 | for c in content: 72 | if c in index_set: 73 | self.page_to_samples_array[page][self.page_sizes[page]] = c 74 | self.page_sizes[page] += 1 75 | 76 | 77 | 78 | def sample_order(self, epoch: int) -> Sequence[int]: 79 | seed = self.seed * 912300 + epoch 80 | result_order = np.zeros(len(self.indices), dtype=np.int64) 81 | generate_order_inner(seed, self.page_to_samples_array, 82 | self.page_sizes, 83 | result_order, 84 | 2*self.loader.batch_size) 85 | 86 | return result_order -------------------------------------------------------------------------------- /ffcv/traversal_order/random.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence 2 | 3 | import numpy as np 4 | from torch.utils.data import DistributedSampler 5 | 6 | from .base import TraversalOrder 7 | 8 | class Random(TraversalOrder): 9 | 10 | def __init__(self, loader:'Loader'): 11 | super().__init__(loader) 12 | 13 | if self.distributed: 14 | self.sampler = DistributedSampler(self.indices, 15 | shuffle=True, 16 | seed=self.seed, 17 | drop_last=False) 18 | 19 | 20 | def sample_order(self, epoch: int) -> Sequence[int]: 21 | if not self.distributed: 22 | generator = np.random.default_rng(self.seed + epoch if self.seed is not None else None) 23 | return generator.permutation(self.indices) 24 | 25 | self.sampler.set_epoch(epoch) 26 | 27 | return self.indices[np.array(list(self.sampler))] 28 | -------------------------------------------------------------------------------- /ffcv/traversal_order/sequential.py: -------------------------------------------------------------------------------- 1 | from typing import Sequence, TYPE_CHECKING 2 | import numpy as np 3 | 4 | from torch.utils.data import DistributedSampler 5 | 6 | from .base import TraversalOrder 7 | 8 | if TYPE_CHECKING: 9 | from ..loader.loader import Loader 10 | 11 | 12 | class Sequential(TraversalOrder): 13 | 14 | def __init__(self, loader:'Loader'): 15 | super().__init__(loader) 16 | 17 | if self.distributed: 18 | self.sampler = DistributedSampler(self.indices, 19 | shuffle=False, 20 | seed=self.seed, 21 | drop_last=False) 22 | 23 | 24 | def sample_order(self, epoch: int) -> Sequence[int]: 25 | if not self.distributed: 26 | return self.indices 27 | 28 | self.sampler.set_epoch(epoch) 29 | 30 | return self.indices[np.array(list(self.sampler))] 31 | -------------------------------------------------------------------------------- /ffcv/types.py: -------------------------------------------------------------------------------- 1 | from typing import List 2 | 3 | import numpy as np 4 | 5 | 6 | from .fields.base import Field 7 | from .fields import ( 8 | FloatField, IntField, RGBImageField, 9 | BytesField, NDArrayField, JSONField, 10 | TorchTensorField 11 | ) 12 | 13 | CURRENT_VERSION = 2 14 | 15 | # Note that in this file we use dtypes in the format np.dtype: 76 | return np.dtype([('', handler.metadata_type) for handler in handlers], 77 | align=True) 78 | -------------------------------------------------------------------------------- /ffcv/utils.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from numba import types 3 | from numba.extending import intrinsic 4 | 5 | 6 | def chunks(lst, n): 7 | for i in range(0, len(lst), n): 8 | yield lst[i:i + n] 9 | 10 | def is_power_of_2(n): 11 | return (n & (n-1) == 0) and n != 0 12 | 13 | def align_to_page(ptr, page_size): 14 | # If we are not aligned with the start of a page: 15 | if ptr % page_size != 0: 16 | ptr = ptr + page_size - ptr % page_size 17 | return ptr 18 | 19 | def decode_null_terminated_string(bytes: np.ndarray): 20 | return bytes.tobytes().decode('ascii').split('\x00')[0] 21 | 22 | @intrinsic 23 | def cast_int_to_byte_ptr(typingctx, src): 24 | # check for accepted types 25 | if isinstance(src, types.Integer): 26 | # create the expected type signature 27 | result_type = types.CPointer(types.uint8) 28 | sig = result_type(types.uintp) 29 | # defines the custom code generation 30 | def codegen(context, builder, signature, args): 31 | # llvm IRBuilder code here 32 | [src] = args 33 | rtype = signature.return_type 34 | llrtype = context.get_value_type(rtype) 35 | return builder.inttoptr(src, llrtype) 36 | return sig, codegen 37 | 38 | from threading import Lock 39 | s_print_lock = Lock() 40 | 41 | 42 | def s_print(*a, **b): 43 | """Thread safe print function""" 44 | with s_print_lock: 45 | print(*a, **b) 46 | -------------------------------------------------------------------------------- /install.sh: -------------------------------------------------------------------------------- 1 | conda create -n ffcv python=3.9 cupy pkg-config compilers libjpeg-turbo opencv pytorch torchvision cudatoolkit=11.3 numba -c pytorch -c conda-forge 2 | -------------------------------------------------------------------------------- /libffcv/libffcv.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include 11 | #ifdef _WIN32 12 | typedef unsigned __int32 __uint32_t; 13 | typedef unsigned __int64 __uint64_t; 14 | #define EXPORT __declspec(dllexport) 15 | #else 16 | #define EXPORT 17 | #endif 18 | 19 | extern "C" { 20 | // a key use to point to the tjtransform instance 21 | static pthread_key_t key_tj_transformer; 22 | // a key use to point to the tjdecompressor instance 23 | static pthread_key_t key_tj_decompressor; 24 | static pthread_once_t key_once = PTHREAD_ONCE_INIT; 25 | 26 | // will make the keys to access the tj instances 27 | static void make_keys() 28 | { 29 | pthread_key_create(&key_tj_decompressor, NULL); 30 | pthread_key_create(&key_tj_transformer, NULL); 31 | } 32 | 33 | EXPORT void resize(int64_t cresizer, int64_t source_p, int64_t sx, int64_t sy, 34 | int64_t start_row, int64_t end_row, int64_t start_col, int64_t end_col, 35 | int64_t dest_p, int64_t tx, int64_t ty) { 36 | // TODO use proper arguments type 37 | 38 | cv::Mat source_matrix(sx, sy, CV_8UC3, (uint8_t*) source_p); 39 | cv::Mat dest_matrix(tx, ty, CV_8UC3, (uint8_t*) dest_p); 40 | cv::resize(source_matrix.colRange(start_col, end_col).rowRange(start_row, end_row), 41 | dest_matrix, dest_matrix.size(), 0, 0, cv::INTER_AREA); 42 | } 43 | 44 | EXPORT void my_memcpy(void *source, void* dst, uint64_t size) { 45 | memcpy(dst, source, size); 46 | } 47 | 48 | EXPORT void my_fread(int64_t fp, int64_t offset, void *destination, int64_t size) { 49 | fseek((FILE *) fp, offset, SEEK_SET); 50 | fread(destination, 1, size, (FILE *) fp); 51 | } 52 | 53 | EXPORT int imdecode(unsigned char *input_buffer, __uint64_t input_size, 54 | __uint32_t source_height, __uint32_t source_width, 55 | 56 | unsigned char *output_buffer, 57 | __uint32_t crop_height, __uint32_t crop_width, 58 | __uint32_t offset_x, __uint32_t offset_y, 59 | __uint32_t scale_num, __uint32_t scale_denom, 60 | bool enable_crop, 61 | bool hflip) 62 | { 63 | pthread_once(&key_once, make_keys); 64 | 65 | tjhandle tj_transformer; 66 | tjhandle tj_decompressor; 67 | if ((tj_transformer = pthread_getspecific(key_tj_transformer)) == NULL) 68 | { 69 | tj_transformer = tjInitTransform(); 70 | pthread_setspecific(key_tj_transformer, tj_transformer); 71 | } 72 | if ((tj_decompressor = pthread_getspecific(key_tj_decompressor)) == NULL) 73 | { 74 | tj_decompressor = tjInitDecompress(); 75 | pthread_setspecific(key_tj_decompressor, tj_decompressor); 76 | } 77 | 78 | tjtransform xform; 79 | tjscalingfactor scaling; 80 | memset(&xform, 0, sizeof(tjtransform)); 81 | if (hflip) { 82 | xform.op = TJXOP_HFLIP; 83 | } 84 | xform.r.x = offset_x; 85 | xform.r.y = offset_y; 86 | xform.r.h = crop_height; 87 | xform.r.w = crop_width; 88 | xform.options |= TJXOPT_CROP; 89 | scaling.num = scale_num; 90 | scaling.denom = scale_denom; 91 | 92 | unsigned char *dstBuf = NULL; 93 | unsigned long dstSize = 0; 94 | 95 | bool do_transform = enable_crop || hflip; 96 | 97 | if (do_transform) { 98 | tjTransform(tj_transformer, input_buffer, input_size, 1, &dstBuf, 99 | &dstSize, &xform, TJFLAG_FASTDCT); 100 | } else { 101 | dstBuf = input_buffer; 102 | dstSize = input_size; 103 | } 104 | int result = tjDecompress2(tj_decompressor, dstBuf, dstSize, output_buffer, 105 | TJSCALED(crop_width, scaling), 0, TJSCALED(crop_height, scaling), 106 | TJPF_RGB, TJFLAG_FASTDCT | TJFLAG_NOREALLOC); 107 | 108 | if (do_transform) { 109 | tjFree(dstBuf); 110 | } 111 | return result; 112 | } 113 | 114 | static PyMethodDef libffcvMethods[] = { 115 | {NULL, NULL, 0, NULL} 116 | }; 117 | 118 | 119 | static struct PyModuleDef libffcvmodule = { 120 | PyModuleDef_HEAD_INIT, 121 | "libffcv", 122 | "This is a dummy python extension, the real code is available through ctypes", 123 | -1, 124 | libffcvMethods 125 | }; 126 | 127 | PyMODINIT_FUNC PyInit__libffcv(void) { 128 | return PyModule_Create(&libffcvmodule); 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /mypy.ini: -------------------------------------------------------------------------------- 1 | [mypy] 2 | plugins = numpy.typing.mypy_plugin 3 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | from setuptools import find_packages 2 | import subprocess 3 | from difflib import get_close_matches 4 | from glob import glob 5 | import os 6 | import platform 7 | 8 | from distutils.core import setup, Extension 9 | 10 | # read the contents of your README file 11 | from pathlib import Path 12 | this_directory = Path(__file__).parent 13 | long_description = (this_directory / "README.md").read_text() 14 | 15 | 16 | def find_pkg_dirs(package): 17 | close_matches = get_close_matches(package.lower(), 18 | os.environ["PATH"].lower().split(';'), 19 | cutoff=0) 20 | dll_dir = None 21 | for close_match in close_matches: 22 | if (os.path.exists(close_match) 23 | and glob(os.path.join(close_match, '*.dll'))): 24 | dll_dir = close_match 25 | break 26 | if dll_dir is None: 27 | raise Exception( 28 | f"Could not find required package: {package}. " 29 | "Add directory containing .dll files to system environment path." 30 | ) 31 | dll_dir_split = dll_dir.replace('\\', '/').split('/') 32 | root = get_close_matches(package.lower(), dll_dir_split, cutoff=0)[0] 33 | root_dir = '/'.join(dll_dir_split[:dll_dir_split.index(root) + 1]) 34 | return os.path.normpath(root_dir), os.path.normpath(dll_dir) 35 | 36 | 37 | def pkgconfig_windows(package, kw): 38 | is_x64 = platform.machine().endswith('64') 39 | root_dir, dll_dir = find_pkg_dirs(package) 40 | include_dir = None 41 | library_dir = None 42 | parent = None 43 | while parent != root_dir: 44 | parent = os.path.dirname(dll_dir if parent is None else parent) 45 | if include_dir is None and os.path.exists(os.path.join(parent, 'include')): 46 | include_dir = os.path.join(parent, 'include') 47 | library_dirs = set() 48 | libraries = glob(os.path.join(parent, '**', 'lib', '**', '*.lib'), 49 | recursive=True) 50 | for library in libraries: 51 | if ((is_x64 and 'x86' in library) 52 | or (not is_x64 and 'x64' in library)): 53 | continue 54 | library_dirs.add(os.path.dirname(library)) 55 | if library_dir is None and library_dirs: 56 | library_dir = sorted(library_dirs)[-1] 57 | if include_dir and library_dir: 58 | libraries = [os.path.splitext(library)[0] 59 | for library in glob(os.path.join(library_dir, '*.lib'))] 60 | break 61 | if not include_dir or not library_dir: 62 | raise Exception(f"Could not find required package: {package}.") 63 | kw.setdefault('include_dirs', []).append(include_dir) 64 | kw.setdefault('library_dirs', []).append(library_dir) 65 | kw.setdefault('libraries', []).extend(libraries) 66 | return kw 67 | 68 | 69 | def pkgconfig(package, kw): 70 | flag_map = {'-I': 'include_dirs', '-L': 'library_dirs', '-l': 'libraries'} 71 | output = subprocess.getoutput( 72 | 'pkg-config --cflags --libs {}'.format(package)) 73 | if 'not found' in output: 74 | raise RuntimeError(f"Could not find required package: {package}.") 75 | for token in output.strip().split(): 76 | kw.setdefault(flag_map.get(token[:2]), []).append(token[2:]) 77 | return kw 78 | 79 | 80 | sources = ['./libffcv/libffcv.cpp'] 81 | 82 | extension_kwargs = { 83 | 'sources': sources, 84 | 'include_dirs': [] 85 | } 86 | if platform.system() == 'Windows': 87 | extension_kwargs = pkgconfig_windows('opencv4', extension_kwargs) 88 | extension_kwargs = pkgconfig_windows('libturbojpeg', extension_kwargs) 89 | 90 | extension_kwargs = pkgconfig_windows('pthread', extension_kwargs) 91 | else: 92 | try: 93 | extension_kwargs = pkgconfig('opencv4', extension_kwargs) 94 | except RuntimeError: 95 | extension_kwargs = pkgconfig('opencv', extension_kwargs) 96 | extension_kwargs = pkgconfig('libturbojpeg', extension_kwargs) 97 | 98 | extension_kwargs['libraries'].append('pthread') 99 | 100 | 101 | libffcv = Extension('ffcv._libffcv', 102 | **extension_kwargs) 103 | 104 | setup(name='ffcv', 105 | version='1.0.1', 106 | description=' FFCV: Fast Forward Computer Vision ', 107 | author='MadryLab', 108 | author_email='ffcv@mit.edu', 109 | url='https://github.com/libffcv/ffcv', 110 | license_files = ('LICENSE.txt',), 111 | packages=find_packages(), 112 | long_description=long_description, 113 | long_description_content_type='text/markdown', 114 | ext_modules=[libffcv], 115 | install_requires=[ 116 | 'terminaltables', 117 | 'pytorch_pfn_extras', 118 | 'fastargs', 119 | 'opencv-python', 120 | 'assertpy', 121 | 'tqdm', 122 | 'psutil', 123 | 'numba', 124 | ]) 125 | -------------------------------------------------------------------------------- /test_data/pig.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/libffcv/ffcv/3a12966b3afe3a81733a732e633317d747bfaac7/test_data/pig.png -------------------------------------------------------------------------------- /tests/test_array_field.py: -------------------------------------------------------------------------------- 1 | from ctypes import pointer 2 | from tempfile import NamedTemporaryFile 3 | from collections import defaultdict 4 | from assertpy.assertpy import assert_that 5 | from multiprocessing import cpu_count 6 | 7 | import torch as ch 8 | from assertpy import assert_that 9 | import numpy as np 10 | from torch.utils.data import Dataset 11 | from ffcv import DatasetWriter 12 | from ffcv.fields import IntField, NDArrayField, TorchTensorField 13 | from ffcv import Loader 14 | 15 | class DummyActivationsDataset(Dataset): 16 | 17 | def __init__(self, n_samples, shape, is_ch=False): 18 | self.n_samples = n_samples 19 | self.shape = shape 20 | self.is_ch = is_ch 21 | 22 | def __len__(self): 23 | return self.n_samples 24 | 25 | def __getitem__(self, index): 26 | if index >= self.n_samples: 27 | raise IndexError() 28 | np.random.seed(index) 29 | to_return = np.random.randn(*self.shape).astype('= self.n_samples: 46 | raise IndexError() 47 | np.random.seed(index) 48 | d1 = np.random.randn(*self.shape).astype('> page_size_l2 107 | sample_to_pages = defaultdict(set) 108 | 109 | for sample_id, page in zip(sample_ids, pages): 110 | sample_to_pages[sample_id].add(page) 111 | assert_that(sample_to_pages[sample_id]).is_length(1) 112 | 113 | for ixes, activations, d2, d3 in loader: 114 | for ix, activation in zip(ixes, activations): 115 | assert_that(np.all(dataset[ix][1] == activation.numpy())).is_true() -------------------------------------------------------------------------------- /tests/test_basic_pipeline.py: -------------------------------------------------------------------------------- 1 | from dataclasses import replace 2 | import torch as ch 3 | from ffcv.pipeline.allocation_query import AllocationQuery 4 | from ffcv.pipeline.compiler import Compiler 5 | import numpy as np 6 | from typing import Callable 7 | from assertpy import assert_that 8 | from torch.utils.data import Dataset 9 | import logging 10 | import os 11 | from assertpy import assert_that 12 | from tempfile import NamedTemporaryFile 13 | from ffcv.pipeline.operation import Operation 14 | from ffcv.transforms.ops import ToTensor 15 | from multiprocessing import cpu_count 16 | 17 | from ffcv.writer import DatasetWriter 18 | from ffcv.reader import Reader 19 | from ffcv.loader import Loader 20 | from ffcv.fields import IntField, FloatField, BytesField 21 | from ffcv.fields.basics import FloatDecoder 22 | from ffcv.pipeline.state import State 23 | 24 | from test_writer import DummyDataset 25 | 26 | numba_logger = logging.getLogger('numba') 27 | numba_logger.setLevel(logging.WARNING) 28 | 29 | 30 | class Doubler(Operation): 31 | 32 | def generate_code(self) -> Callable: 33 | def code(x, dst): 34 | dst[:] = x * 2 35 | return dst 36 | return code 37 | 38 | def declare_state_and_memory(self, previous_state: State): 39 | return (previous_state, AllocationQuery(previous_state.shape, previous_state.dtype, previous_state.device)) 40 | 41 | def test_basic_simple(): 42 | length = 600 43 | batch_size = 8 44 | with NamedTemporaryFile() as handle: 45 | file_name = handle.name 46 | dataset = DummyDataset(length) 47 | writer = DatasetWriter(file_name, { 48 | 'index': IntField(), 49 | 'value': FloatField() 50 | }) 51 | 52 | writer.from_indexed_dataset(dataset) 53 | 54 | Compiler.set_enabled(True) 55 | 56 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 57 | pipelines={ 58 | 'value': [FloatDecoder(), Doubler(), ToTensor()] 59 | }) 60 | 61 | it = iter(loader) 62 | indices, values = next(it) 63 | assert_that(np.allclose(indices.squeeze().numpy(), 64 | np.arange(batch_size))).is_true() 65 | assert_that(np.allclose(2 * np.sin(np.arange(batch_size)), 66 | values.squeeze().numpy())).is_true() 67 | 68 | def test_multiple_iterators_success(): 69 | length = 60 70 | batch_size = 8 71 | with NamedTemporaryFile() as handle: 72 | file_name = handle.name 73 | dataset = DummyDataset(length) 74 | writer = DatasetWriter(file_name, { 75 | 'index': IntField(), 76 | 'value': FloatField() 77 | }) 78 | 79 | writer.from_indexed_dataset(dataset) 80 | 81 | Compiler.set_enabled(True) 82 | 83 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 84 | pipelines={ 85 | 'value': [FloatDecoder(), Doubler(), ToTensor()] 86 | }) 87 | 88 | it = iter(loader) 89 | it = iter(loader) 90 | 91 | def test_multiple_epoch_doesnt_recompile(): 92 | length = 60 93 | batch_size = 8 94 | with NamedTemporaryFile() as handle: 95 | file_name = handle.name 96 | dataset = DummyDataset(length) 97 | writer = DatasetWriter(file_name, { 98 | 'index': IntField(), 99 | 'value': FloatField() 100 | }) 101 | 102 | writer.from_indexed_dataset(dataset) 103 | 104 | Compiler.set_enabled(True) 105 | 106 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 107 | pipelines={ 108 | 'value': [FloatDecoder(), Doubler(), ToTensor()] 109 | }) 110 | 111 | it = iter(loader) 112 | code = loader.code 113 | it = iter(loader) 114 | new_code = loader.code 115 | assert_that(code).is_equal_to(new_code) 116 | 117 | def test_multiple_epoch_does_recompile(): 118 | length = 60 119 | batch_size = 8 120 | with NamedTemporaryFile() as handle: 121 | file_name = handle.name 122 | dataset = DummyDataset(length) 123 | writer = DatasetWriter(file_name, { 124 | 'index': IntField(), 125 | 'value': FloatField() 126 | }) 127 | 128 | writer.from_indexed_dataset(dataset) 129 | 130 | Compiler.set_enabled(True) 131 | 132 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 133 | recompile=True, 134 | pipelines={ 135 | 'value': [FloatDecoder(), Doubler(), ToTensor()] 136 | }) 137 | 138 | it = iter(loader) 139 | code = loader.code 140 | it = iter(loader) 141 | new_code = loader.code 142 | assert_that(code).is_not_equal_to(new_code) -------------------------------------------------------------------------------- /tests/test_cuda_nonblocking.py: -------------------------------------------------------------------------------- 1 | from tempfile import NamedTemporaryFile 2 | import torch as ch 3 | from tqdm import tqdm 4 | import time 5 | from multiprocessing import cpu_count 6 | 7 | from assertpy import assert_that 8 | import numpy as np 9 | from torch.utils.data import Dataset 10 | from ffcv import DatasetWriter 11 | from ffcv.fields import IntField, NDArrayField 12 | from ffcv import Loader 13 | from ffcv.fields.basics import IntDecoder 14 | from ffcv.fields.ndarray import NDArrayDecoder 15 | from ffcv.loader.loader import OrderOption 16 | from ffcv.transforms import ToDevice, ToTensor, Squeeze 17 | import time 18 | 19 | BATCH = 256 20 | SIZE = 25_000 21 | WORKERS = min(10, cpu_count()) 22 | 23 | class DummyArrayDataset(Dataset): 24 | def __init__(self, n_samples, shape): 25 | self.n_samples = n_samples 26 | self.shape = shape 27 | 28 | def __len__(self): 29 | return self.n_samples 30 | 31 | def __getitem__(self, index): 32 | if index >= self.n_samples: 33 | raise IndexError() 34 | np.random.seed(index) 35 | return (np.random.rand(SIZE) > 0.5).astype('bool'), np.random.rand(SIZE).astype('float32'), index 36 | 37 | def run_experiment_cuda(weight, loader, sync=False): 38 | total = 0. 39 | X = ch.empty(BATCH, SIZE, device=weight.device) 40 | for X_bool, _, __ in tqdm(loader): 41 | if sync: ch.cuda.synchronize() 42 | X.copy_(X_bool) 43 | total += X @ weight 44 | total += X @ weight 45 | total += X @ weight 46 | 47 | return total.sum(0) 48 | 49 | def run_cuda(weight, sync): 50 | n_samples, shape = (BATCH * WORKERS, (SIZE,)) 51 | with NamedTemporaryFile() as handle: 52 | name = handle.name 53 | dataset = DummyArrayDataset(n_samples, shape) 54 | writer = DatasetWriter(name, { 55 | 'mask': NDArrayField(dtype=np.dtype('bool'), shape=(SIZE,)), 56 | 'targets': NDArrayField(dtype=np.dtype('float32'), shape=(SIZE,)), 57 | 'idx': IntField() 58 | }) 59 | 60 | writer.from_indexed_dataset(dataset) 61 | 62 | loader = Loader( 63 | name, 64 | batch_size=BATCH, 65 | num_workers=WORKERS, 66 | order=OrderOption.QUASI_RANDOM, 67 | indices=np.arange(n_samples), 68 | drop_last=False, 69 | os_cache=True, 70 | pipelines={ 71 | 'mask': [NDArrayDecoder(), ToTensor(), ToDevice(ch.device('cuda:0'), non_blocking=False)], 72 | 'targets': [NDArrayDecoder(), ToTensor(), ToDevice(ch.device('cuda:0'), non_blocking=False)], 73 | 'idx': [IntDecoder(), ToTensor(), Squeeze(), ToDevice(ch.device('cuda:0'), non_blocking=False)] 74 | }) 75 | 76 | return run_experiment_cuda(weight, loader, sync) 77 | 78 | def test_cuda(): 79 | weight = ch.randn(SIZE, SIZE).cuda() 80 | async_1 = run_cuda(weight, False) 81 | sync_1 = run_cuda(weight, True) 82 | sync_2 = run_cuda(weight, True) 83 | print(async_1) 84 | print(sync_1) 85 | print(sync_2) 86 | print(ch.abs(sync_1 - sync_2).max()) 87 | print(ch.abs(sync_1 - async_1).max()) 88 | assert ch.abs(sync_1 - sync_2).max().cpu().item() < float(WORKERS), 'Sync-sync mismatch' 89 | assert ch.abs(async_1 - sync_1).max().cpu().item() < float(WORKERS), 'Async-sync mismatch' 90 | 91 | # test_cuda() -------------------------------------------------------------------------------- /tests/test_custom_field.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import numpy as np 3 | from uuid import uuid4 4 | from ffcv.fields.ndarray import NDArrayField, NDArrayDecoder 5 | from ffcv.writer import DatasetWriter 6 | from ffcv.loader import Loader, OrderOption 7 | from tempfile import NamedTemporaryFile 8 | 9 | class StringDecoder(NDArrayDecoder): 10 | pass 11 | 12 | class StringField(NDArrayField): 13 | def __init__(self, max_len: int, pad_char='\0'): 14 | self.max_len = max_len 15 | self.pad_char = pad_char 16 | super().__init__(np.dtype('uint8'), (max_len,)) 17 | 18 | def encode(self, destination, field, malloc): 19 | padded_field = (field + self.pad_char * self.max_len)[:self.max_len] 20 | field = np.frombuffer(padded_field.encode('ascii'), dtype='uint8') 21 | return super().encode(destination, field, malloc) 22 | 23 | MAX_STRING_SIZE = 100 24 | 25 | class CaptionDataset: 26 | def __init__(self, N): 27 | self.captions = [str(uuid4())[:np.random.randint(50)] for _ in range(N)] 28 | 29 | def __getitem__(self, idx): 30 | return (self.captions[idx],) 31 | 32 | def __len__(self): 33 | return len(self.captions) 34 | 35 | def test_string_field(): 36 | dataset = CaptionDataset(100) 37 | 38 | with NamedTemporaryFile() as handle: 39 | writer = DatasetWriter(handle.name, { 40 | 'label': StringField(MAX_STRING_SIZE) 41 | }) 42 | 43 | writer.from_indexed_dataset(dataset) 44 | loader = Loader(handle.name, 45 | batch_size=10, 46 | num_workers=2, 47 | order=OrderOption.RANDOM, 48 | pipelines={ 49 | 'label': [StringDecoder()] 50 | }, 51 | custom_fields={ 52 | 'label': StringField 53 | }) 54 | 55 | all_caps = [] 56 | for x, in loader: 57 | for cap in x: 58 | all_caps.append(cap.tobytes().decode('ascii').replace('\0', '')) 59 | assert set(all_caps) == set(dataset.captions) 60 | 61 | def test_no_custom_field(): 62 | dataset = CaptionDataset(100) 63 | 64 | with NamedTemporaryFile() as handle: 65 | writer = DatasetWriter(handle.name, { 66 | 'label': StringField(MAX_STRING_SIZE) 67 | }) 68 | 69 | writer.from_indexed_dataset(dataset) 70 | with pytest.raises(ValueError): 71 | Loader(handle.name, 72 | batch_size=10, 73 | num_workers=2, 74 | order=OrderOption.RANDOM, 75 | pipelines={ 76 | 'label': [StringDecoder()] 77 | }) 78 | -------------------------------------------------------------------------------- /tests/test_image_normalization.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as ch 3 | from torch.utils.data import Dataset 4 | from assertpy import assert_that 5 | from tempfile import NamedTemporaryFile 6 | from torchvision.datasets import CIFAR10 7 | from tqdm import tqdm 8 | 9 | from ffcv.writer import DatasetWriter 10 | from ffcv.fields import IntField, RGBImageField 11 | from ffcv.fields.decoders import SimpleRGBImageDecoder 12 | from ffcv.loader import Loader 13 | from ffcv.pipeline.compiler import Compiler 14 | from ffcv.transforms import ToTorchImage, ToTensor, NormalizeImage, View, ToDevice 15 | 16 | class DummyDataset(Dataset): 17 | 18 | def __init__(self, length, height, width): 19 | self.length = length 20 | self.height = height 21 | self.width = width 22 | def __len__(self): 23 | return self.length 24 | 25 | def __getitem__(self, index): 26 | if index > self.length: 27 | raise IndexError 28 | dims = (self.height, self.width, 3) 29 | image_data = ((np.ones(dims) * index) % 255).astype('uint8') 30 | result = index,image_data 31 | return result 32 | 33 | def test_cpu_normalization(): 34 | 35 | dataset = DummyDataset(500, 25, 30) 36 | 37 | with NamedTemporaryFile() as handle: 38 | name = handle.name 39 | fields = { 40 | 'index': IntField(), 41 | 'value': RGBImageField(write_mode='raw', jpeg_quality=95) 42 | } 43 | writer = DatasetWriter(name, fields, num_workers=2) 44 | mean = np.array([0, 1, 2]) 45 | std = np.array([1, 10, 20]) 46 | 47 | writer.from_indexed_dataset(dataset, chunksize=5) 48 | loader = Loader(name, batch_size=5, num_workers=2, 49 | pipelines={ 50 | 'value': [ 51 | SimpleRGBImageDecoder(), 52 | NormalizeImage(mean, std, np.float16), 53 | View(np.float16), 54 | ToTensor(), 55 | ToTorchImage(), 56 | ] 57 | }) 58 | ix = 0 59 | for res in tqdm(loader): 60 | index, images = res 61 | for image in images: 62 | image = image.numpy() 63 | ref_image = dataset[ix][1] 64 | ref_image = ref_image.transpose(2, 0, 1) 65 | ref_image = ref_image.astype(np.float16) 66 | ref_image -= mean[:, None, None] 67 | ref_image /= std[:, None, None] 68 | assert_that(np.allclose(ref_image, image)).is_true() 69 | ix += 1 70 | 71 | def test_gpu_normalization(): 72 | 73 | dataset = DummyDataset(500, 25, 30) 74 | 75 | with NamedTemporaryFile() as handle: 76 | name = handle.name 77 | fields = { 78 | 'index': IntField(), 79 | 'value': RGBImageField(write_mode='raw', jpeg_quality=95) 80 | } 81 | writer = DatasetWriter(name, fields, num_workers=2) 82 | mean = np.array([0, 1, 2]) 83 | std = np.array([1, 10, 20]) 84 | 85 | writer.from_indexed_dataset(dataset, chunksize=5) 86 | 87 | loader = Loader(name, batch_size=5, num_workers=2, 88 | pipelines={ 89 | 'value': [ 90 | SimpleRGBImageDecoder(), 91 | ToTensor(), 92 | ToDevice(ch.device('cuda:0')), 93 | ToTorchImage(), 94 | NormalizeImage(mean, std, np.float16), 95 | View(ch.float16), 96 | ] 97 | }) 98 | ix = 0 99 | for res in tqdm(loader): 100 | _, images = res 101 | for image in images: 102 | image = image.cpu().numpy() 103 | ref_image = dataset[ix][1] 104 | ref_image = ref_image.transpose(2, 0, 1) 105 | ref_image = ref_image.astype(np.float16) 106 | ref_image -= mean[:, None, None] 107 | ref_image /= std[:, None, None] 108 | assert_that(np.allclose(ref_image, image)).is_true() 109 | ix += 1 110 | -------------------------------------------------------------------------------- /tests/test_image_pipeline.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import torch as ch 3 | from torch.utils.data import Dataset 4 | from assertpy import assert_that 5 | from tempfile import NamedTemporaryFile 6 | from torchvision.datasets import CIFAR10 7 | from torch.utils.data import Subset 8 | 9 | from ffcv.writer import DatasetWriter 10 | from ffcv.fields import IntField, RGBImageField 11 | from ffcv.loader import Loader 12 | from ffcv.pipeline.compiler import Compiler 13 | 14 | class DummyDataset(Dataset): 15 | 16 | def __init__(self, length, height, width, reversed=False): 17 | self.length = length 18 | self.height = height 19 | self.width = width 20 | self.reversed = reversed 21 | 22 | def __len__(self): 23 | return self.length 24 | 25 | def __getitem__(self, index): 26 | if index > self.length: 27 | raise IndexError 28 | dims = (self.height, self.width, 3) 29 | image_data = ((np.ones(dims) * index) % 255).astype('uint8') 30 | result = index,image_data 31 | if self.reversed: 32 | result = tuple(reversed(result)) 33 | return result 34 | 35 | def create_and_validate(length, mode='raw', reversed=False): 36 | 37 | dataset = DummyDataset(length, 500, 300, reversed=reversed) 38 | 39 | with NamedTemporaryFile() as handle: 40 | name = handle.name 41 | 42 | fields = { 43 | 'index': IntField(), 44 | 'value': RGBImageField(write_mode=mode, jpeg_quality=95) 45 | } 46 | 47 | if reversed: 48 | fields = { 49 | 'value': RGBImageField(write_mode=mode, jpeg_quality=95), 50 | 'index': IntField() 51 | } 52 | 53 | writer = DatasetWriter(name, fields, num_workers=2) 54 | 55 | writer.from_indexed_dataset(dataset, chunksize=5) 56 | 57 | Compiler.set_enabled(False) 58 | 59 | loader = Loader(name, batch_size=5, num_workers=2) 60 | 61 | for res in loader: 62 | if not reversed: 63 | index, images = res 64 | else: 65 | images , index = res 66 | 67 | for i, image in zip(index, images): 68 | if mode == 'raw': 69 | assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() 70 | else: 71 | assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() 72 | 73 | def make_and_read_cifar_subset(length): 74 | my_dataset = Subset(CIFAR10(root='/tmp', train=True, download=True), range(length)) 75 | 76 | with NamedTemporaryFile() as handle: 77 | name = handle.name 78 | writer = DatasetWriter(name, { 79 | 'image': RGBImageField(write_mode='smart', 80 | max_resolution=32), 81 | 'label': IntField(), 82 | }, num_workers=2) 83 | 84 | writer.from_indexed_dataset(my_dataset, chunksize=10) 85 | 86 | Compiler.set_enabled(False) 87 | 88 | loader = Loader(name, batch_size=5, num_workers=2) 89 | 90 | for index, images in loader: 91 | pass 92 | 93 | def test_cifar_subset(): 94 | make_and_read_cifar_subset(200) 95 | 96 | def test_simple_raw_image_pipeline(): 97 | create_and_validate(500, 'raw', False) 98 | 99 | def test_simple_raw_image_pipeline_rev(): 100 | create_and_validate(500, 'raw', True) 101 | 102 | def test_simple_jpg_image_pipeline(): 103 | create_and_validate(500, 'jpg', False) 104 | 105 | def test_simple_jpg_image_pipeline_rev(): 106 | create_and_validate(500, 'jpg', True) 107 | -------------------------------------------------------------------------------- /tests/test_image_read.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from tqdm import tqdm 3 | from assertpy import assert_that 4 | from torch.utils.data import Dataset 5 | import logging 6 | from time import time 7 | import os 8 | from assertpy import assert_that 9 | from tempfile import NamedTemporaryFile 10 | 11 | from ffcv.writer import DatasetWriter 12 | from ffcv.reader import Reader 13 | from ffcv.fields import IntField, RGBImageField 14 | from ffcv.pipeline.compiler import Compiler 15 | from ffcv.memory_managers import OSCacheManager 16 | 17 | class DummyDataset(Dataset): 18 | 19 | def __init__(self, length): 20 | self.length = length 21 | 22 | def __len__(self): 23 | return self.length 24 | 25 | def __getitem__(self, index): 26 | if index >= self.length: 27 | raise IndexError 28 | 29 | np.random.seed(37 + index) 30 | dims = tuple([128, 128, 3]) 31 | image_data = np.random.randint(low=0, high=255, size=dims, dtype='uint8') 32 | return index, image_data 33 | 34 | 35 | 36 | def create_and_validate(length, mode='raw', compile=False): 37 | 38 | dataset = DummyDataset(length) 39 | 40 | with NamedTemporaryFile() as handle: 41 | name = handle.name 42 | writer = DatasetWriter(name, { 43 | 'index': IntField(), 44 | 'value': RGBImageField(write_mode=mode) 45 | }, num_workers=2) 46 | 47 | writer.from_indexed_dataset(dataset, chunksize=5) 48 | 49 | reader = Reader(name) 50 | manager = OSCacheManager(reader) 51 | context = manager.schedule_epoch(np.array([0, 1])) 52 | 53 | Compiler.set_enabled(compile) 54 | 55 | with context: 56 | Decoder = RGBImageField().get_decoder_class() 57 | decoder = Decoder() 58 | decoder.accept_globals(reader.metadata['f1'], manager.compile_reader()) 59 | 60 | decode = Compiler.compile(decoder.generate_code()) 61 | 62 | assert_that(reader.metadata).is_length(length) 63 | buff = np.zeros((1, 128, 128, 3), dtype='uint8') 64 | 65 | for i in range(length): 66 | result = decode(np.array([i]), buff, reader.metadata['f1'], context.state)[0] 67 | _, ref_image = dataset[i] 68 | assert_that(result.shape).is_equal_to(ref_image.shape) 69 | if mode == 'jpg': 70 | dist = np.abs(ref_image.astype('float') - result.astype('float')) 71 | assert_that(dist.mean()).is_less_than(80) 72 | else: 73 | assert_that(np.all(ref_image == result)).is_true() 74 | 75 | def test_simple_image_dataset_raw(): 76 | create_and_validate(500, 'raw') 77 | 78 | def test_simple_image_dataset_jpg(): 79 | create_and_validate(100, 'jpg') 80 | 81 | def test_simple_image_dataset_raw_compile(): 82 | create_and_validate(500, 'raw', True) 83 | 84 | def test_simple_image_dataset_jpg_compile(): 85 | create_and_validate(100, 'jpg', True) 86 | 87 | -------------------------------------------------------------------------------- /tests/test_json_field.py: -------------------------------------------------------------------------------- 1 | import string 2 | from ctypes import pointer 3 | from tempfile import NamedTemporaryFile 4 | from collections import defaultdict 5 | from multiprocessing import cpu_count 6 | 7 | from assertpy import assert_that 8 | import numpy as np 9 | from torch.utils.data import Dataset 10 | from ffcv import DatasetWriter 11 | from ffcv.fields import IntField, JSONField 12 | from ffcv.fields.bytes import BytesDecoder 13 | from ffcv.fields.basics import IntDecoder 14 | from ffcv import Loader 15 | 16 | options = list(string.ascii_uppercase + string.digits) 17 | 18 | def generate_random_string(low, high): 19 | length = np.random.randint(low, high) 20 | content = ''.join(np.random.choice(options, size=length)) 21 | return content 22 | 23 | class DummyDictDataset(Dataset): 24 | 25 | def __init__(self, n_samples): 26 | self.n_samples = n_samples 27 | 28 | def __len__(self): 29 | return self.n_samples 30 | 31 | def __getitem__(self, index): 32 | if index >= self.n_samples: 33 | raise IndexError() 34 | np.random.seed(index) 35 | length = np.random.randint(5, 250) 36 | content = np.random.randint(0, 256, size=(length,)) 37 | json_content = {} 38 | for i in range(3): 39 | json_content[generate_random_string(5, 10)] = generate_random_string(50, 250) 40 | return index, json_content 41 | 42 | def run_test(n_samples): 43 | with NamedTemporaryFile() as handle: 44 | name = handle.name 45 | dataset = DummyDictDataset(n_samples) 46 | writer = DatasetWriter(name, { 47 | 'index': IntField(), 48 | 'activations': JSONField() 49 | }, num_workers=min(3, cpu_count())) 50 | 51 | writer.from_indexed_dataset(dataset) 52 | 53 | loader = Loader(name, batch_size=3, num_workers=min(5, cpu_count()), 54 | pipelines={ 55 | 'activations': [BytesDecoder()], 56 | 'index': [IntDecoder()] 57 | } 58 | ) 59 | ix = 0 60 | for _, json_encoded in loader: 61 | json_docs = JSONField.unpack(json_encoded) 62 | for doc in json_docs: 63 | ref_doc = dataset[ix][1] 64 | assert_that(sorted(doc.items())).is_equal_to(sorted(ref_doc.items())) 65 | ix += 1 66 | 67 | 68 | def test_simple_dict(): 69 | run_test(32) 70 | 71 | if __name__ == '__main__': 72 | test_simple_dict() -------------------------------------------------------------------------------- /tests/test_loader_filter.py: -------------------------------------------------------------------------------- 1 | from dataclasses import replace 2 | import torch as ch 3 | from ffcv.pipeline.allocation_query import AllocationQuery 4 | from ffcv.pipeline.compiler import Compiler 5 | import numpy as np 6 | from typing import Callable 7 | from assertpy import assert_that 8 | from torch.utils.data import Dataset 9 | import logging 10 | import os 11 | from assertpy import assert_that 12 | from tempfile import NamedTemporaryFile 13 | from ffcv.pipeline.operation import Operation 14 | from ffcv.transforms.ops import ToTensor 15 | from multiprocessing import cpu_count 16 | 17 | from ffcv.writer import DatasetWriter 18 | from ffcv.reader import Reader 19 | from ffcv.loader import Loader 20 | from ffcv.fields import IntField, FloatField, BytesField 21 | from ffcv.fields.basics import FloatDecoder 22 | from ffcv.pipeline.state import State 23 | 24 | from test_writer import DummyDataset 25 | 26 | numba_logger = logging.getLogger('numba') 27 | numba_logger.setLevel(logging.WARNING) 28 | 29 | 30 | class Doubler(Operation): 31 | 32 | def generate_code(self) -> Callable: 33 | def code(x, dst): 34 | dst[:] = x * 2 35 | return dst 36 | return code 37 | 38 | def declare_state_and_memory(self, previous_state: State): 39 | return (previous_state, AllocationQuery(previous_state.shape, previous_state.dtype, previous_state.device)) 40 | 41 | def test_basic_simple(): 42 | length = 600 43 | batch_size = 8 44 | with NamedTemporaryFile() as handle: 45 | file_name = handle.name 46 | dataset = DummyDataset(length) 47 | writer = DatasetWriter(file_name, { 48 | 'index': IntField(), 49 | 'value': FloatField() 50 | }) 51 | 52 | writer.from_indexed_dataset(dataset) 53 | 54 | Compiler.set_enabled(True) 55 | 56 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 57 | pipelines={ 58 | 'value': [FloatDecoder(), Doubler(), ToTensor()], 59 | }) 60 | 61 | def cond(value): 62 | value = value[0] 63 | result = value < 1 and value >= 0.5 64 | return result 65 | 66 | filtered = loader.filter('value', cond) 67 | 68 | assert_that(len(filtered)).is_greater_than(0) 69 | for index, values in filtered: 70 | assert_that(values.shape[0]).is_equal_to(batch_size) 71 | assert_that(((values < 1) & (values >= 0.5)).all()).is_true() -------------------------------------------------------------------------------- /tests/test_memcpy.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | import pytest 4 | from assertpy import assert_that 5 | 6 | from ffcv.libffcv import memcpy 7 | 8 | 9 | MEMCPY_TYPES = [ 10 | np.uint8, 11 | np.int32, 12 | np.int64, 13 | np.float64, 14 | np.float32 15 | ] 16 | 17 | @pytest.mark.parametrize('dtype', MEMCPY_TYPES) 18 | def test_memcpy(dtype): 19 | 20 | data = np.random.uniform(0, 255, size=(100, 99)).astype(dtype) 21 | dst = np.empty((100, 99), dtype=dtype) 22 | 23 | assert_that(np.all(data == dst)).is_false() 24 | memcpy(data, dst) 25 | 26 | assert_that(np.all(data == dst)).is_true() -------------------------------------------------------------------------------- /tests/test_memory_allocation.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from assertpy import assert_that 3 | from torch.utils.data import Dataset 4 | import logging 5 | import os 6 | from assertpy import assert_that 7 | from tempfile import NamedTemporaryFile 8 | 9 | from ffcv.writer import DatasetWriter 10 | from ffcv.reader import Reader 11 | from ffcv.fields import BytesField, IntField 12 | 13 | class DummyDataset(Dataset): 14 | 15 | def __init__(self, l, size): 16 | self.l = l 17 | self.size = size 18 | 19 | def __len__(self): 20 | return self.l 21 | 22 | def __getitem__(self, index): 23 | if index > self.l: 24 | raise IndexError 25 | np.random.seed(index) 26 | return index, np.random.randint(0, 255, size=self.size, dtype='u1') 27 | 28 | 29 | def create_and_validate(length, size): 30 | 31 | dataset = DummyDataset(length, size) 32 | 33 | with NamedTemporaryFile() as handle: 34 | name = handle.name 35 | writer = DatasetWriter(name, { 36 | 'index': IntField(), 37 | 'value': BytesField() 38 | }, num_workers=2) 39 | 40 | writer.from_indexed_dataset(dataset, chunksize=5) 41 | 42 | reader = Reader(name) 43 | 44 | assert_that(reader.handlers).is_length(2) 45 | assert_that(reader.handlers['index']).is_instance_of(IntField) 46 | assert_that(reader.handlers['value']).is_instance_of(BytesField) 47 | assert_that(reader.alloc_table).is_length(length) 48 | assert_that(reader.metadata).is_length(length) 49 | assert_that((reader.metadata['f0'] == np.arange(length).astype('int')).all()).is_true() 50 | 51 | assert_that(np.all(reader.alloc_table['size'] == size)).is_true() 52 | 53 | def test_simple(): 54 | create_and_validate(600, 76) 55 | 56 | def test_large(): 57 | create_and_validate(600, 1024) 58 | 59 | def test_many(): 60 | create_and_validate(60000, 81) 61 | 62 | 63 | -------------------------------------------------------------------------------- /tests/test_memory_leak.py: -------------------------------------------------------------------------------- 1 | import os 2 | from tempfile import NamedTemporaryFile 3 | from time import sleep, time 4 | import os, psutil 5 | 6 | 7 | import numpy as np 8 | import pytest 9 | from tqdm import tqdm 10 | from assertpy import assert_that 11 | from torch.utils.data import Dataset 12 | 13 | from ffcv.writer import DatasetWriter 14 | from ffcv.reader import Reader 15 | from ffcv.fields import BytesField, IntField 16 | from ffcv.pipeline.compiler import Compiler 17 | from ffcv import Loader 18 | 19 | class DummyDataset(Dataset): 20 | 21 | def __init__(self, l, size): 22 | self.l = l 23 | self.size = size 24 | 25 | def __len__(self): 26 | return self.l 27 | 28 | def __getitem__(self, index): 29 | if index > self.l: 30 | raise IndexError 31 | np.random.seed(index) 32 | return index, np.random.randint(0, 255, size=self.size, dtype='u1') 33 | 34 | def create_and_run(num_samples, size_bytes): 35 | handle = NamedTemporaryFile() 36 | with handle: 37 | name = handle.name 38 | dataset = DummyDataset(num_samples, size_bytes) 39 | writer = DatasetWriter(num_samples, name, { 40 | 'index': IntField(), 41 | 'value': BytesField() 42 | }) 43 | 44 | Compiler.set_enabled(True) 45 | 46 | with writer: 47 | writer.write_pytorch_dataset(dataset, num_workers=-1, chunksize=100) 48 | total_dataset_size = num_samples * size_bytes 49 | # Dataset should not be in RAM 50 | process = psutil.Process(os.getpid()) 51 | assert_that(process.memory_info().rss).is_less_than(total_dataset_size) 52 | 53 | loader = Loader(name, 128, 10) 54 | for _ in tqdm(loader): 55 | assert_that(process.memory_info().rss).is_less_than(total_dataset_size) 56 | 57 | 58 | 59 | @pytest.mark.skipif(bool(os.environ.get('FFCV_RUN_MEMORY_LEAK_TEST', "0")), 60 | reason="set FFCV_RUN_MEMORY_LEAK_TEST to enable it") 61 | def test_memory_leak_write(): 62 | create_and_run(128100, 500*300*3) -------------------------------------------------------------------------------- /tests/test_memory_reader.py: -------------------------------------------------------------------------------- 1 | 2 | import numpy as np 3 | from tqdm import tqdm 4 | from assertpy import assert_that 5 | from torch.utils.data import Dataset 6 | import logging 7 | from time import time 8 | import os 9 | from assertpy import assert_that 10 | from tempfile import NamedTemporaryFile 11 | 12 | from ffcv.writer import DatasetWriter 13 | from ffcv.reader import Reader 14 | from ffcv.fields import BytesField, IntField 15 | from ffcv.pipeline.compiler import Compiler 16 | from ffcv.memory_managers import OSCacheManager 17 | 18 | from test_memory_allocation import DummyDataset 19 | 20 | 21 | def create_and_validate(length, size, do_compile): 22 | 23 | dataset = DummyDataset(length, size) 24 | 25 | with NamedTemporaryFile() as handle: 26 | name = handle.name 27 | writer = DatasetWriter(name, { 28 | 'index': IntField(), 29 | 'value': BytesField() 30 | }, num_workers=2) 31 | 32 | writer.from_indexed_dataset(dataset, chunksize=5) 33 | 34 | reader = Reader(name) 35 | manager = OSCacheManager(reader) 36 | context = manager.schedule_epoch(np.array([0, 1])) 37 | 38 | indices = np.random.choice(length, 500) 39 | addresses = reader.alloc_table['ptr'][indices] 40 | sample_ids = reader.alloc_table['sample_id'][indices] 41 | 42 | Compiler.set_enabled(do_compile) 43 | read_fn = manager.compile_reader() 44 | 45 | with context: 46 | 47 | for addr, sample_id in zip(tqdm(addresses), sample_ids): 48 | read_buffer = read_fn(addr, context.state) 49 | np.random.seed(sample_id) 50 | expected_buff = np.random.randint(0, 255, size=size, dtype='u1') 51 | 52 | assert_that(read_buffer).is_length(len(expected_buff)) 53 | assert_that(np.all(read_buffer == expected_buff)).is_true() 54 | 55 | # We skip the first which is compilation 56 | 57 | def test_simple(): 58 | create_and_validate(600, 76, False) 59 | 60 | def test_large(): 61 | create_and_validate(600, 1024, False) 62 | 63 | def test_many(): 64 | create_and_validate(60000, 81, False) 65 | 66 | def test_many_compiled(): 67 | create_and_validate(1000000, 1, True) 68 | -------------------------------------------------------------------------------- /tests/test_partial_batches.py: -------------------------------------------------------------------------------- 1 | from dataclasses import replace 2 | import torch as ch 3 | from ffcv.pipeline.allocation_query import AllocationQuery 4 | from ffcv.pipeline.compiler import Compiler 5 | import numpy as np 6 | from typing import Callable 7 | from assertpy import assert_that 8 | from torch.utils.data import Dataset 9 | import logging 10 | import os 11 | from assertpy import assert_that 12 | from tempfile import NamedTemporaryFile 13 | from ffcv.pipeline.operation import Operation 14 | from ffcv.transforms.ops import ToTensor 15 | from multiprocessing import cpu_count 16 | 17 | from ffcv.writer import DatasetWriter 18 | from ffcv.reader import Reader 19 | from ffcv.loader import Loader 20 | from ffcv.fields import IntField, FloatField, BytesField 21 | from ffcv.fields.basics import FloatDecoder 22 | from ffcv.pipeline.state import State 23 | 24 | from test_writer import DummyDataset 25 | 26 | numba_logger = logging.getLogger('numba') 27 | numba_logger.setLevel(logging.WARNING) 28 | 29 | 30 | class Doubler(Operation): 31 | 32 | def generate_code(self) -> Callable: 33 | def code(x, dst): 34 | dst[:x.shape[0]] = x * 2 35 | return dst 36 | return code 37 | 38 | def declare_state_and_memory(self, previous_state: State): 39 | return (previous_state, AllocationQuery(previous_state.shape, previous_state.dtype, previous_state.device)) 40 | 41 | def run_test(bs, exp_length, drop_last=True): 42 | length = 600 43 | batch_size = bs 44 | with NamedTemporaryFile() as handle: 45 | file_name = handle.name 46 | dataset = DummyDataset(length) 47 | writer = DatasetWriter(file_name, { 48 | 'index': IntField(), 49 | 'value': FloatField() 50 | }) 51 | 52 | writer.from_indexed_dataset(dataset) 53 | 54 | Compiler.set_enabled(True) 55 | 56 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 57 | drop_last=drop_last, 58 | pipelines={ 59 | 'value': [FloatDecoder(), Doubler(), ToTensor()] 60 | }) 61 | 62 | assert_that(loader).is_length(exp_length) 63 | another_partial = drop_last 64 | for (batch, _) in loader: 65 | if batch.shape[0] != bs: 66 | assert_that(another_partial).is_false() 67 | another_partial = True 68 | 69 | 70 | def test_partial(): 71 | run_test(7, 85, True) 72 | 73 | def test_not_partial(): 74 | run_test(7, 86, False) 75 | 76 | def test_not_partial_multiple(): 77 | run_test(60, 10, False) 78 | 79 | def test_partial_multiple(): 80 | run_test(60, 10, True) -------------------------------------------------------------------------------- /tests/test_partial_pipeline.py: -------------------------------------------------------------------------------- 1 | from dataclasses import replace 2 | import torch as ch 3 | from ffcv.pipeline.allocation_query import AllocationQuery 4 | from ffcv.pipeline.compiler import Compiler 5 | import numpy as np 6 | from typing import Callable 7 | from assertpy import assert_that 8 | from torch.utils.data import Dataset 9 | import logging 10 | import os 11 | from assertpy import assert_that 12 | from tempfile import NamedTemporaryFile 13 | from ffcv.pipeline.operation import Operation 14 | from ffcv.transforms.ops import ToTensor 15 | from multiprocessing import cpu_count 16 | 17 | from ffcv.writer import DatasetWriter 18 | from ffcv.reader import Reader 19 | from ffcv.loader import Loader 20 | from ffcv.fields import IntField, FloatField, BytesField 21 | from ffcv.fields.basics import FloatDecoder 22 | from ffcv.pipeline.state import State 23 | 24 | from test_writer import DummyDataset 25 | 26 | numba_logger = logging.getLogger('numba') 27 | numba_logger.setLevel(logging.WARNING) 28 | 29 | 30 | class Doubler(Operation): 31 | 32 | def generate_code(self) -> Callable: 33 | def code(x, dst): 34 | dst[:] = x * 2 35 | return dst 36 | return code 37 | 38 | def declare_state_and_memory(self, previous_state: State): 39 | return (previous_state, AllocationQuery(previous_state.shape, previous_state.dtype, previous_state.device)) 40 | 41 | def test_basic_simple(): 42 | length = 600 43 | batch_size = 8 44 | with NamedTemporaryFile() as handle: 45 | file_name = handle.name 46 | dataset = DummyDataset(length) 47 | writer = DatasetWriter(file_name, { 48 | 'index': IntField(), 49 | 'value': FloatField() 50 | }) 51 | 52 | writer.from_indexed_dataset(dataset) 53 | 54 | Compiler.set_enabled(True) 55 | 56 | loader = Loader(file_name, batch_size, num_workers=min(5, cpu_count()), seed=17, 57 | pipelines={ 58 | 'value': [FloatDecoder(), Doubler(), ToTensor()], 59 | 'index': None 60 | }) 61 | 62 | it = iter(loader) 63 | result = next(it) 64 | # We should only have one element in the tuple 65 | assert_that(result).is_length(1) 66 | values = result[0] 67 | assert_that(np.allclose(2 * np.sin(np.arange(batch_size)), 68 | values.squeeze().numpy())).is_true() -------------------------------------------------------------------------------- /tests/test_rrc.py: -------------------------------------------------------------------------------- 1 | from ffcv.transforms.ops import ToTensor 2 | from ffcv.fields.rgb_image import RandomResizedCropRGBImageDecoder, SimpleRGBImageDecoder, CenterCropRGBImageDecoder 3 | import numpy as np 4 | import torch as ch 5 | from torch.utils.data import Dataset 6 | from assertpy import assert_that 7 | from tempfile import NamedTemporaryFile 8 | from torchvision.datasets import CIFAR10 9 | from torch.utils.data import Subset 10 | 11 | from ffcv.writer import DatasetWriter 12 | from ffcv.fields import IntField, RGBImageField 13 | from ffcv.loader import Loader 14 | from ffcv.pipeline.compiler import Compiler 15 | 16 | class DummyDataset(Dataset): 17 | 18 | def __init__(self, length, size_range): 19 | self.length = length 20 | self.size_range = size_range 21 | 22 | def __len__(self): 23 | return self.length 24 | 25 | def __getitem__(self, index): 26 | if index > self.length: 27 | raise IndexError 28 | dims = ( 29 | np.random.randint(self.size_range[0], self.size_range[1] + 1), 30 | np.random.randint(self.size_range[0], self.size_range[1] + 1), 31 | 3 32 | ) 33 | image_data = ((np.ones(dims) * index) % 255).astype('uint8') 34 | return index, image_data 35 | 36 | def create_and_validate(length, decoder, size, mode='raw', compile=False): 37 | 38 | dataset = DummyDataset(length, (300, 500)) 39 | 40 | with NamedTemporaryFile() as handle: 41 | name = handle.name 42 | 43 | fields = { 44 | 'index': IntField(), 45 | 'value': RGBImageField(write_mode=mode) 46 | } 47 | 48 | writer = DatasetWriter(name, fields, num_workers=2) 49 | 50 | writer.from_indexed_dataset(dataset, chunksize=5) 51 | 52 | Compiler.set_enabled(compile) 53 | 54 | loader = Loader(name, batch_size=5, num_workers=2, 55 | pipelines={ 56 | 'value': [decoder, ToTensor()] 57 | }) 58 | 59 | for index, images in loader: 60 | for i, image in zip(index, images): 61 | assert_that(image.shape).is_equal_to((size[0], size[1], 3)) 62 | if mode == 'raw': 63 | assert_that(ch.all((image == (i % 255)).reshape(-1))).is_true() 64 | else: 65 | assert_that(ch.all(ch.abs(image - (i % 255)) < 2)).is_true 66 | 67 | 68 | def test_simple_image_decoder_fails_with_variable_images(): 69 | decoder = SimpleRGBImageDecoder() 70 | assert_that(create_and_validate).raises(TypeError).when_called_with(500, decoder, 32, 'raw') 71 | 72 | def test_rrc_decoder_raw(): 73 | size = (160, 160) 74 | decoder = RandomResizedCropRGBImageDecoder(size) 75 | create_and_validate(500, decoder, size, 'raw') 76 | 77 | def test_rrc_decoder_jpg(): 78 | size = (160, 160) 79 | decoder = RandomResizedCropRGBImageDecoder(size) 80 | create_and_validate(500, decoder, size, 'jpg') 81 | 82 | def test_rrc_decoder_raw_compiled(): 83 | size = (160, 160) 84 | decoder = RandomResizedCropRGBImageDecoder(size) 85 | create_and_validate(500, decoder, size, 'raw', True) 86 | 87 | def test_rrc_decoder_jpg_compiled(): 88 | size = (160, 160) 89 | decoder = RandomResizedCropRGBImageDecoder(size) 90 | create_and_validate(500, decoder, size, 'jpg', True) 91 | 92 | def test_cc_decoder_raw_nc(): 93 | size = (160, 160) 94 | decoder = CenterCropRGBImageDecoder(size, 224/256) 95 | create_and_validate(500, decoder, size, 'raw') 96 | 97 | def test_cc_decoder_jpg_nc(): 98 | size = (160, 160) 99 | decoder = CenterCropRGBImageDecoder(size, 224/256) 100 | create_and_validate(500, decoder, size, 'jpg') 101 | 102 | def test_cc_decoder_raw_compiled(): 103 | size = (160, 160) 104 | decoder = CenterCropRGBImageDecoder(size, 224/256) 105 | create_and_validate(500, decoder, size, 'raw', True) 106 | 107 | def test_cc_decoder_jpg_compiled(): 108 | size = (160, 160) 109 | decoder = CenterCropRGBImageDecoder(size, 224/256) 110 | create_and_validate(500, decoder, size, 'jpg', True) 111 | 112 | 113 | if __name__ == '__main__': 114 | test_rrc_decoder_jpg() -------------------------------------------------------------------------------- /tests/test_traversal_orders.py: -------------------------------------------------------------------------------- 1 | from collections import defaultdict 2 | from tempfile import TemporaryDirectory 3 | from os import path 4 | from typing import Counter 5 | 6 | import pytest 7 | from assertpy import assert_that 8 | import numpy as np 9 | from torch.utils.data import Dataset, distributed 10 | from torch.multiprocessing import spawn, Queue 11 | from torch.distributed import init_process_group 12 | 13 | from ffcv.loader.loader import ORDER_TYPE, OrderOption 14 | from ffcv.writer import DatasetWriter 15 | from ffcv.fields import IntField, BytesField 16 | from ffcv import Loader 17 | 18 | class DummyDataset(Dataset): 19 | 20 | def __init__(self, l): 21 | self.l = l 22 | 23 | def __len__(self): 24 | return self.l 25 | 26 | def __getitem__(self, index): 27 | if index > self.l: 28 | raise IndexError() 29 | return (index, np.sin(np.array([index])).view(' 1: 35 | init_process_group('nccl', sync_url, rank=rank, world_size=world_size) 36 | 37 | loader = Loader(fname, 8, num_workers=2, order=order, drop_last=False, 38 | distributed=world_size > 1, indices=indices) 39 | 40 | result = [] 41 | for _ in range(3): 42 | content = np.concatenate([x[0].numpy().reshape(-1).copy() for x in loader]) 43 | result.append(content) 44 | result = np.stack(result) 45 | 46 | np.save(path.join(out_folder, f"result-{rank}.npy"), result) 47 | 48 | 49 | def prep_and_run_test(num_workers, order, with_indices=False): 50 | length = 600 51 | indices = None 52 | if with_indices: 53 | indices = np.random.choice(length, length//2, replace=False) 54 | 55 | with TemporaryDirectory() as folder: 56 | name = path.join(folder, 'dataset.beton') 57 | sync_file = path.join(folder, 'share') 58 | dataset = DummyDataset(length) 59 | writer = DatasetWriter(name, { 60 | 'index': IntField(), 61 | 'value': BytesField() 62 | }) 63 | 64 | writer.from_indexed_dataset(dataset) 65 | 66 | args = (num_workers, name, order, sync_file, folder, indices) 67 | if num_workers > 1: 68 | spawn(process_work, nprocs=num_workers, args=args) 69 | else: 70 | process_work(*((0, ) + args)) 71 | 72 | results = [] 73 | for r in range(num_workers): 74 | array = np.load(path.join(folder,f"result-{r}.npy")) 75 | results.append(array) 76 | 77 | results = np.concatenate(results, 1) 78 | 79 | # For each epoch 80 | for i in range(results.shape[0]): 81 | if not with_indices: 82 | if order == OrderOption.SEQUENTIAL and i < results.shape[0] - 1: 83 | assert_that((results[i] == results[i + 1]).all()).is_true() 84 | if order != OrderOption.SEQUENTIAL and i < results.shape[0] - 1: 85 | assert_that((results[i] == results[i + 1]).all()).is_false() 86 | 87 | epoch_content = Counter(results[i]) 88 | indices_gotten = np.array(sorted(list(epoch_content.keys()))) 89 | assert_that(np.all(np.arange(length) == indices_gotten)).is_true() 90 | assert_that(min(epoch_content.values())).is_equal_to(1) 91 | assert_that(max(epoch_content.values())).is_less_than_or_equal_to(2) 92 | else: 93 | assert_that(set(results[i])).is_equal_to(set(indices)) 94 | 95 | 96 | def test_traversal_sequential_1(): 97 | prep_and_run_test(1, OrderOption.SEQUENTIAL) 98 | 99 | def test_traversal_sequential_2(): 100 | prep_and_run_test(2, OrderOption.SEQUENTIAL) 101 | 102 | def test_traversal_sequential_3(): 103 | prep_and_run_test(3, OrderOption.SEQUENTIAL) 104 | 105 | def test_traversal_sequential_4(): 106 | prep_and_run_test(4, OrderOption.SEQUENTIAL) 107 | 108 | def test_traversal_random_1(): 109 | prep_and_run_test(1, OrderOption.RANDOM) 110 | 111 | def test_traversal_random_2(): 112 | prep_and_run_test(2, OrderOption.RANDOM) 113 | 114 | def test_traversal_random_3(): 115 | prep_and_run_test(3, OrderOption.RANDOM) 116 | 117 | def test_traversal_random_4(): 118 | prep_and_run_test(4, OrderOption.RANDOM) 119 | 120 | def test_traversal_quasirandom_1(): 121 | prep_and_run_test(1, OrderOption.QUASI_RANDOM) 122 | 123 | @pytest.mark.skip() 124 | def test_traversal_quasirandom_2(): 125 | prep_and_run_test(2, OrderOption.QUASI_RANDOM) 126 | 127 | @pytest.mark.skip() 128 | def test_traversal_quasirandom_3(): 129 | prep_and_run_test(3, OrderOption.QUASI_RANDOM) 130 | 131 | @pytest.mark.skip() 132 | def test_traversal_quasirandom_4(): 133 | prep_and_run_test(4, OrderOption.QUASI_RANDOM) 134 | 135 | def test_traversal_sequential_distributed_with_indices(): 136 | prep_and_run_test(2, OrderOption.SEQUENTIAL, True) 137 | 138 | def test_traversal_random_distributed_with_indices(): 139 | prep_and_run_test(2, OrderOption.RANDOM, True) 140 | 141 | @pytest.mark.skip() 142 | def test_traversal_quasi_random_distributed_with_indices(): 143 | prep_and_run_test(2, OrderOption.QUASI_RANDOM, True) -------------------------------------------------------------------------------- /tests/test_webdataset.py: -------------------------------------------------------------------------------- 1 | from os import path 2 | from glob import glob 3 | import tempfile 4 | 5 | import numpy as np 6 | from tempfile import TemporaryDirectory, NamedTemporaryFile 7 | import torch as ch 8 | from torch.utils.data import Dataset 9 | import webdataset as wds 10 | 11 | from ffcv import DatasetWriter 12 | from ffcv.reader import Reader 13 | from ffcv.fields import IntField, FloatField 14 | from test_writer import validate_simple_dataset 15 | 16 | field_names = [ 17 | 'index', 18 | 'value.pyd' 19 | ] 20 | 21 | class DummyDataset(Dataset): 22 | 23 | def __init__(self, l): 24 | self.l = l 25 | 26 | def __len__(self): 27 | return self.l 28 | 29 | def __getitem__(self, index): 30 | if index >= self.l: 31 | raise IndexError() 32 | return (index, np.sin(index)) 33 | 34 | def write_webdataset(folder, dataset, field_names): 35 | pattern = path.join(folder, "dataset-%06d.tar") 36 | writer = wds.ShardWriter(pattern, maxcount=20) 37 | with writer as sink: 38 | for i, sample in enumerate(dataset): 39 | data = { 40 | '__key__': f'sample_{i}' 41 | } 42 | 43 | for field_name, value in zip(field_names, sample): 44 | data[field_name] = value 45 | sink.write(data) 46 | 47 | 48 | def pipeline(dataset): 49 | return (dataset 50 | .decode() 51 | .to_tuple(*field_names) 52 | ) 53 | 54 | if __name__ == '__main__': 55 | N = 1007 56 | dataset = DummyDataset(N) 57 | with TemporaryDirectory() as temp_directory: 58 | with NamedTemporaryFile() as handle: 59 | fname = handle.name 60 | write_webdataset(temp_directory, dataset, field_names) 61 | files = glob(path.join(temp_directory, '*')) 62 | files = list(sorted(files)) 63 | 64 | print(fname) 65 | writer = DatasetWriter(fname, { 66 | 'index': IntField(), 67 | 'value': FloatField() 68 | }) 69 | 70 | writer.from_webdataset(files, pipeline) 71 | 72 | validate_simple_dataset(fname, N, shuffled=False) -------------------------------------------------------------------------------- /tests/test_writer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from assertpy import assert_that 3 | from numpy.random import shuffle 4 | from torch.utils.data import Dataset 5 | import logging 6 | import os 7 | from assertpy import assert_that 8 | from tempfile import NamedTemporaryFile 9 | from multiprocessing import cpu_count 10 | 11 | from ffcv.writer import DatasetWriter 12 | from ffcv.reader import Reader 13 | from ffcv.fields import IntField, FloatField, BytesField 14 | 15 | numba_logger = logging.getLogger('numba') 16 | numba_logger.setLevel(logging.WARNING) 17 | 18 | class DummyDataset(Dataset): 19 | 20 | def __init__(self, l): 21 | self.l = l 22 | 23 | def __len__(self): 24 | return self.l 25 | 26 | def __getitem__(self, index): 27 | if index > self.l: 28 | raise IndexError() 29 | return (index, np.sin(index)) 30 | 31 | class DummyDatasetWithData(Dataset): 32 | 33 | def __init__(self, l): 34 | self.l = l 35 | 36 | def __len__(self): 37 | return self.l 38 | 39 | def __getitem__(self, index): 40 | if index > self.l: 41 | raise IndexError() 42 | return (index, np.zeros(2)) 43 | 44 | def validate_simple_dataset(name, length, shuffled=False): 45 | reader = Reader(name) 46 | assert_that(reader.handlers).is_length(2) 47 | assert_that(reader.handlers['index']).is_instance_of(IntField) 48 | assert_that(reader.handlers['value']).is_instance_of(FloatField) 49 | assert_that(reader.alloc_table).is_length(0) 50 | assert_that(reader.metadata).is_length(length) 51 | if shuffled: 52 | assert_that((reader.metadata['f0'] == np.arange(length).astype('int')).all()).is_false() 53 | assert_that(set(reader.metadata['f0'])).is_equal_to(set(np.arange(length).astype('int'))) 54 | else: 55 | assert_that((reader.metadata['f0'] == np.arange(length).astype('int')).all()).is_true() 56 | assert_that((np.sin(reader.metadata['f0']) == reader.metadata['f1']).all()).is_true() 57 | 58 | def test_write_shuffle(): 59 | length = 600 60 | with NamedTemporaryFile() as handle: 61 | name = handle.name 62 | dataset = DummyDataset(length) 63 | 64 | writer = DatasetWriter(name, { 65 | 'index': IntField(), 66 | 'value': FloatField() 67 | }) 68 | 69 | writer.from_indexed_dataset(dataset, shuffle_indices=True) 70 | 71 | validate_simple_dataset(name, length, shuffled=True) 72 | 73 | def test_write_simple(): 74 | length = 600 75 | with NamedTemporaryFile() as handle: 76 | name = handle.name 77 | dataset = DummyDataset(length) 78 | writer = DatasetWriter(name, { 79 | 'index': IntField(), 80 | 'value': FloatField() 81 | }) 82 | 83 | writer.from_indexed_dataset(dataset) 84 | 85 | validate_simple_dataset(name, length) 86 | 87 | def test_multiple_workers(): 88 | length = 600 89 | with NamedTemporaryFile() as handle: 90 | name = handle.name 91 | dataset = DummyDataset(length) 92 | writer = DatasetWriter(name, { 93 | 'index': IntField(), 94 | 'value': FloatField() 95 | }, num_workers=min(30, cpu_count())) 96 | 97 | writer.from_indexed_dataset(dataset, chunksize=10000) 98 | 99 | validate_simple_dataset(name, length) 100 | 101 | 102 | def test_super_long(): 103 | length = 600000 104 | with NamedTemporaryFile() as handle: 105 | name = handle.name 106 | dataset = DummyDataset(length) 107 | writer = DatasetWriter(name, { 108 | 'index': IntField(), 109 | 'value': FloatField() 110 | }, num_workers=min(30, cpu_count())) 111 | 112 | writer.from_indexed_dataset(dataset, chunksize=10000) 113 | 114 | validate_simple_dataset(name, length) 115 | 116 | def test_small_chunks_multiple_workers(): 117 | length = 600 118 | with NamedTemporaryFile() as handle: 119 | name = handle.name 120 | dataset = DummyDatasetWithData(length) 121 | writer = DatasetWriter(name, { 122 | 'index': IntField(), 123 | 'value': BytesField() 124 | }, num_workers=min(30, cpu_count())) 125 | 126 | writer.from_indexed_dataset(dataset, chunksize=1) --------------------------------------------------------------------------------