├── .gitignore ├── 00-intro.ipynb ├── 01-dask.delayed.ipynb ├── 02-dask-arrays.ipynb ├── 03-dask-dataframes.ipynb ├── 04-schedulers.ipynb ├── 05-distributed-dataframes-and-efficiency.ipynb ├── 06-distributed-advanced.ipynb ├── 07-machine-learning.ipynb ├── README.md ├── environment.yml ├── prep_data.py ├── requirements.txt ├── scipy_utils.py ├── solutions ├── 00-hello-world.py ├── 01-delayed-control-flow.py ├── 01-delayed-groupby.py ├── 01-delayed-loop.py ├── 02-dask-arrays-blocked-mean.py ├── 02-dask-arrays-make-arrays.py ├── 02-dask-arrays-stacked.py ├── 02-dask-arrays-store.py ├── 02-dask-arrays-weather-difference.py ├── 02-dask-arrays-weather-mean.py ├── 03-dask-dataframe-delay-per-airport.py ├── 03-dask-dataframe-delay-per-day.py ├── 03-dask-dataframe-map-partitions.py ├── 03-dask-dataframe-non-cancelled-per-airport.py ├── 03-dask-dataframe-non-cancelled.py ├── 03-dask-dataframe-rows.py ├── 05-distributed-dataframes-memory-usage.ipynb └── client_submit.py └── static ├── fail-case.gif ├── ml-dimensions-color.png ├── ml-dimensions.png ├── sklearn-parallel-dask.png └── sklearn-parallel.png /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/.gitignore -------------------------------------------------------------------------------- /00-intro.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/00-intro.ipynb -------------------------------------------------------------------------------- /01-dask.delayed.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/01-dask.delayed.ipynb -------------------------------------------------------------------------------- /02-dask-arrays.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/02-dask-arrays.ipynb -------------------------------------------------------------------------------- /03-dask-dataframes.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/03-dask-dataframes.ipynb -------------------------------------------------------------------------------- /04-schedulers.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/04-schedulers.ipynb -------------------------------------------------------------------------------- /05-distributed-dataframes-and-efficiency.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/05-distributed-dataframes-and-efficiency.ipynb -------------------------------------------------------------------------------- /06-distributed-advanced.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/06-distributed-advanced.ipynb -------------------------------------------------------------------------------- /07-machine-learning.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/07-machine-learning.ipynb -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/README.md -------------------------------------------------------------------------------- /environment.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/environment.yml -------------------------------------------------------------------------------- /prep_data.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/prep_data.py -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/requirements.txt -------------------------------------------------------------------------------- /scipy_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/scipy_utils.py -------------------------------------------------------------------------------- /solutions/00-hello-world.py: -------------------------------------------------------------------------------- 1 | print("Hello, world!") -------------------------------------------------------------------------------- /solutions/01-delayed-control-flow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/01-delayed-control-flow.py -------------------------------------------------------------------------------- /solutions/01-delayed-groupby.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/01-delayed-groupby.py -------------------------------------------------------------------------------- /solutions/01-delayed-loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/01-delayed-loop.py -------------------------------------------------------------------------------- /solutions/02-dask-arrays-blocked-mean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/02-dask-arrays-blocked-mean.py -------------------------------------------------------------------------------- /solutions/02-dask-arrays-make-arrays.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/02-dask-arrays-make-arrays.py -------------------------------------------------------------------------------- /solutions/02-dask-arrays-stacked.py: -------------------------------------------------------------------------------- 1 | x = da.stack(arrays, axis=0) 2 | x -------------------------------------------------------------------------------- /solutions/02-dask-arrays-store.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/02-dask-arrays-store.py -------------------------------------------------------------------------------- /solutions/02-dask-arrays-weather-difference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/02-dask-arrays-weather-difference.py -------------------------------------------------------------------------------- /solutions/02-dask-arrays-weather-mean.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/02-dask-arrays-weather-mean.py -------------------------------------------------------------------------------- /solutions/03-dask-dataframe-delay-per-airport.py: -------------------------------------------------------------------------------- 1 | df.groupby("Origin").DepDelay.mean().compute() -------------------------------------------------------------------------------- /solutions/03-dask-dataframe-delay-per-day.py: -------------------------------------------------------------------------------- 1 | df.groupby("DayOfWeek").DepDelay.mean().compute() -------------------------------------------------------------------------------- /solutions/03-dask-dataframe-map-partitions.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/03-dask-dataframe-map-partitions.py -------------------------------------------------------------------------------- /solutions/03-dask-dataframe-non-cancelled-per-airport.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/03-dask-dataframe-non-cancelled-per-airport.py -------------------------------------------------------------------------------- /solutions/03-dask-dataframe-non-cancelled.py: -------------------------------------------------------------------------------- 1 | len(df[~df.Cancelled]) -------------------------------------------------------------------------------- /solutions/03-dask-dataframe-rows.py: -------------------------------------------------------------------------------- 1 | len(df) -------------------------------------------------------------------------------- /solutions/05-distributed-dataframes-memory-usage.ipynb: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/05-distributed-dataframes-memory-usage.ipynb -------------------------------------------------------------------------------- /solutions/client_submit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/solutions/client_submit.py -------------------------------------------------------------------------------- /static/fail-case.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/static/fail-case.gif -------------------------------------------------------------------------------- /static/ml-dimensions-color.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/static/ml-dimensions-color.png -------------------------------------------------------------------------------- /static/ml-dimensions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/static/ml-dimensions.png -------------------------------------------------------------------------------- /static/sklearn-parallel-dask.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/static/sklearn-parallel-dask.png -------------------------------------------------------------------------------- /static/sklearn-parallel.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/martindurant/dask-tutorial-scipy-2018/HEAD/static/sklearn-parallel.png --------------------------------------------------------------------------------