├── talks
    ├── pyConLT
    │   └── img
    ├── pyData
    │   ├── img
    │   │   ├── logo.png
    │   │   ├── notebook.png
    │   │   ├── poc_base.jpg
    │   │   ├── poc_dev1.png
    │   │   ├── poc_dev2.png
    │   │   ├── poc_dev3.png
    │   │   ├── poc_dev4.png
    │   │   ├── poc_ml1.png
    │   │   ├── poc_ml2.png
    │   │   ├── poc_ml3.png
    │   │   ├── poc_ml4.png
    │   │   ├── results.png
    │   │   ├── roc_curve.png
    │   │   ├── GitHub-logo.png
    │   │   ├── dvc
    │   │   │   ├── dvc_cmd.png
    │   │   │   ├── gen_dvc.png
    │   │   │   ├── dvc_home_page.png
    │   │   │   ├── dvc_home_page1.png
    │   │   │   ├── dvc_home_page2.png
    │   │   │   ├── dvc_home_page3.png
    │   │   │   ├── pipeline
    │   │   │   │   ├── DVC1.png
    │   │   │   │   ├── DVC2.png
    │   │   │   │   ├── DVC3.png
    │   │   │   │   ├── DVC4.png
    │   │   │   │   ├── DVC5.png
    │   │   │   │   ├── DVC6.png
    │   │   │   │   ├── DVC7.png
    │   │   │   │   ├── DVC8.png
    │   │   │   │   ├── DVC9.png
    │   │   │   │   ├── DVC_change0.png
    │   │   │   │   ├── DVC_change1.png
    │   │   │   │   ├── DVC_change2.png
    │   │   │   │   ├── DVC_change3.png
    │   │   │   │   ├── DVC_change4.png
    │   │   │   │   ├── DVC_change5.png
    │   │   │   │   └── DVC_change2bis.png
    │   │   │   └── script_docstring_extract.png
    │   │   ├── icons
    │   │   │   ├── gear.png
    │   │   │   ├── database.png
    │   │   │   ├── youtube.png
    │   │   │   ├── analytics.png
    │   │   │   ├── parameters.png
    │   │   │   ├── parameters_blue.png
    │   │   │   └── parameters_grey.png
    │   │   ├── poc_worst1.png
    │   │   ├── poc_worst2.png
    │   │   ├── poc_worst3.png
    │   │   ├── crying_unicorn.png
    │   │   ├── global_schema1.png
    │   │   ├── global_schema2.png
    │   │   ├── confusion_matrix.png
    │   │   ├── mlv_convert
    │   │   │   ├── cmd.png
    │   │   │   ├── script1.png
    │   │   │   ├── script2.png
    │   │   │   ├── script3.png
    │   │   │   ├── script4.png
    │   │   │   ├── cmd_param.png
    │   │   │   └── nb_docstring.png
    │   │   ├── nb_convert
    │   │   │   ├── script.png
    │   │   │   ├── formated_script.png
    │   │   │   ├── formated_script_no_effect.png
    │   │   │   └── formated_script_not_conf.png
    │   │   ├── nb_convert_script.png
    │   │   └── nb_docstring_extract.png
    │   ├── overview.md
    │   └── draft.md
    ├── workshop
    │   └── img
    │   │   ├── logo.png
    │   │   ├── GitHub-logo.png
    │   │   ├── icons
    │   │       └── youtube.png
    │   │   └── dvc
    │   │       ├── pipeline
    │   │           ├── DVC1.png
    │   │           ├── DVC2.png
    │   │           ├── DVC3.png
    │   │           ├── DVC4.png
    │   │           ├── DVC5.png
    │   │           ├── DVC9.png
    │   │           ├── DVC_change0.png
    │   │           ├── DVC_change1.png
    │   │           ├── DVC_change2.png
    │   │           ├── DVC_change3.png
    │   │           ├── DVC_change4.png
    │   │           ├── DVC_change5.png
    │   │           └── DVC_change2bis.png
    │   │       ├── dvc_home_page1.png
    │   │       ├── dvc_home_page2.png
    │   │       └── dvc_home_page3.png
    └── reveal.js
    │   ├── lib
    │       ├── font
    │       │   └── external_fonts
    │       │   │   ├── Capsuula.woff
    │       │   │   ├── Capsuula.woff2
    │       │   │   ├── WhiteRabbit.woff
    │       │   │   ├── WhiteRabbit.woff2
    │       │   │   └── stylesheet.css
    │       ├── js
    │       │   ├── html5shiv.js
    │       │   └── classList.js
    │       └── css
    │       │   └── zenburn.css
    │   ├── plugin
    │       ├── multiplex
    │       │   ├── client.js
    │       │   ├── package.json
    │       │   ├── master.js
    │       │   └── index.js
    │       ├── markdown
    │       │   ├── example.md
    │       │   └── example.html
    │       ├── external
    │       │   ├── bower.json
    │       │   ├── LICENSE
    │       │   ├── external
    │       │   │   └── external.js
    │       │   └── README.md
    │       ├── math
    │       │   └── math.js
    │       ├── notes-server
    │       │   ├── index.js
    │       │   └── client.js
    │       └── print-pdf
    │       │   └── print-pdf.js
    │   ├── bower.json
    │   ├── CONTRIBUTING.md
    │   ├── css
    │       ├── theme
    │       │   ├── source
    │       │   │   ├── serif.scss
    │       │   │   ├── simple.scss
    │       │   │   └── moon.scss
    │       │   ├── template
    │       │   │   ├── settings.scss
    │       │   │   └── mixins.scss
    │       │   └── README.md
    │       └── print
    │       │   └── pdf.css
    │   ├── LICENSE
    │   ├── package.json
    │   ├── index.html
    │   └── Gruntfile.js
├── resources
    ├── setup_project
    │   ├── project
    │   │   ├── classifier
    │   │   │   ├── __init__.py
    │   │   │   ├── split.py
    │   │   │   ├── pre_process.py
    │   │   │   ├── helper.py
    │   │   │   └── extract.py
    │   │   ├── requirements.txt
    │   │   ├── .gitignore
    │   │   ├── setup.py
    │   │   ├── Makefile
    │   │   └── notebooks
    │   │   │   ├── evaluate_model.ipynb
    │   │   │   ├── extract_data.ipynb
    │   │   │   ├── preprocess_data.ipynb
    │   │   │   ├── train_data_model.ipynb
    │   │   │   └── split_dataset.ipynb
    │   ├── data
    │   │   └── input
    │   │   │   └── conf.json
    │   ├── docker
    │   │   ├── run.sh
    │   │   └── Dockerfile
    │   └── solution
    │   │   ├── configurables
    │   │       ├── evaluate_model.ipynb
    │   │       ├── extract_data.ipynb
    │   │       ├── preprocess_data.ipynb
    │   │       ├── train_data_model.ipynb
    │   │       └── split_dataset.ipynb
    │   │   └── mlvtools
    │   │       ├── evaluate_model.ipynb
    │   │       ├── extract_data.ipynb
    │   │       ├── preprocess_data.ipynb
    │   │       ├── train_data_model.ipynb
    │   │       └── split_dataset.ipynb
    ├── dvc_playground
    │   ├── user
    │   │   ├── resources
    │   │   │   ├── inputs
    │   │   │   │   ├── parameters.json
    │   │   │   │   ├── part2.input
    │   │   │   │   └── part1.input
    │   │   │   └── steps
    │   │   │   │   ├── concat_files.py
    │   │   │   │   └── decrypt.py
    │   │   ├── dvc_init_repo.sh
    │   │   ├── Dockerfile
    │   │   └── private_key
    │   ├── remote_git
    │   │   ├── pub_key
    │   │   └── Dockerfile
    │   ├── remote_dvc
    │   │   ├── pub_key
    │   │   └── Dockerfile
    │   └── docker-compose.yml
    ├── dummy
    │   ├── step4_convert_octals.ipynb
    │   ├── step1_sanitize_data.ipynb
    │   ├── dummy_pipeline_feed_2.txt
    │   ├── dummy_pipeline_feed.txt
    │   ├── dummy_pipeline_feed_3.txt
    │   ├── step3_convert_binaries.ipynb
    │   └── step2_split_data.ipynb
    ├── 04_Evaluate_model.ipynb
    ├── 03_Classify_text.ipynb
    ├── 03_bis_Classify_text.ipynb
    ├── 02_Tokenize_text.ipynb
    └── 05_Tune_hyperparameters_with_crossvalidation.ipynb
├── .github
    └── CODEOWNERS
├── .gitignore
├── requirements.txt
├── tutorial
    ├── img
    │   └── setup_project_pipeline.png
    ├── use_case4.md
    ├── setup.md
    └── dvc_overview.md
├── setup.py
├── download_data.py
├── setup.cfg
├── requirements.yml
├── modify_input_data.py
└── LICENSE


/talks/pyConLT/img:
--------------------------------------------------------------------------------
1 | ../pyData/img


--------------------------------------------------------------------------------
/resources/setup_project/project/classifier/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.github/CODEOWNERS:
--------------------------------------------------------------------------------
1 | * @SdgJlbl @hsmett @alexdashkov  @elemoine


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | *.egg-info
2 | **/.ipynb_checkpoints/*
3 | __pycache__/
4 | data/*
5 | 
6 | 


--------------------------------------------------------------------------------
/resources/setup_project/data/input/conf.json:
--------------------------------------------------------------------------------
1 | {
2 |   "epoch": 20,
3 |   "learning_rate": 0.7
4 | }


--------------------------------------------------------------------------------
/resources/setup_project/project/requirements.txt:
--------------------------------------------------------------------------------
1 | fasttext
2 | jupyter
3 | dvc
4 | ml-versioning-tools
5 | nltk


--------------------------------------------------------------------------------
/talks/pyData/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/logo.png


--------------------------------------------------------------------------------
/talks/workshop/img/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/logo.png


--------------------------------------------------------------------------------
/talks/pyData/img/notebook.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/notebook.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_base.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_base.jpg


--------------------------------------------------------------------------------
/talks/pyData/img/poc_dev1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_dev1.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_dev2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_dev2.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_dev3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_dev3.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_dev4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_dev4.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_ml1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_ml1.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_ml2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_ml2.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_ml3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_ml3.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_ml4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_ml4.png


--------------------------------------------------------------------------------
/talks/pyData/img/results.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/results.png


--------------------------------------------------------------------------------
/talks/pyData/img/roc_curve.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/roc_curve.png


--------------------------------------------------------------------------------
/talks/pyData/img/GitHub-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/GitHub-logo.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/dvc_cmd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/dvc_cmd.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/gen_dvc.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/gen_dvc.png


--------------------------------------------------------------------------------
/talks/pyData/img/icons/gear.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/gear.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_worst1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_worst1.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_worst2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_worst2.png


--------------------------------------------------------------------------------
/talks/pyData/img/poc_worst3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/poc_worst3.png


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | scikit-learn
 2 | dvc
 3 | mlflow
 4 | jupyter
 5 | pandas
 6 | numpy
 7 | nltk
 8 | pyfasttext
 9 | mlvtools
10 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/.gitignore:
--------------------------------------------------------------------------------
1 | venv
2 | *.egg-info
3 | __pycache__/
4 | **.ipynb_checkpoints
5 | *.pytest_cache
6 | *.idea


--------------------------------------------------------------------------------
/talks/pyData/img/crying_unicorn.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/crying_unicorn.png


--------------------------------------------------------------------------------
/talks/pyData/img/global_schema1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/global_schema1.png


--------------------------------------------------------------------------------
/talks/pyData/img/global_schema2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/global_schema2.png


--------------------------------------------------------------------------------
/talks/pyData/img/icons/database.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/database.png


--------------------------------------------------------------------------------
/talks/pyData/img/icons/youtube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/youtube.png


--------------------------------------------------------------------------------
/talks/workshop/img/GitHub-logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/GitHub-logo.png


--------------------------------------------------------------------------------
/talks/pyData/img/confusion_matrix.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/confusion_matrix.png


--------------------------------------------------------------------------------
/talks/pyData/img/icons/analytics.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/analytics.png


--------------------------------------------------------------------------------
/talks/pyData/img/icons/parameters.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/parameters.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/cmd.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/cmd.png


--------------------------------------------------------------------------------
/talks/workshop/img/icons/youtube.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/icons/youtube.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/dvc_home_page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/dvc_home_page.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/dvc_home_page1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/dvc_home_page1.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/dvc_home_page2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/dvc_home_page2.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/dvc_home_page3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/dvc_home_page3.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC1.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC2.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC3.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC4.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC5.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC6.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC6.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC7.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC7.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC8.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC8.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC9.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/script1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/script1.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/script2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/script2.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/script3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/script3.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/script4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/script4.png


--------------------------------------------------------------------------------
/talks/pyData/img/nb_convert/script.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/nb_convert/script.png


--------------------------------------------------------------------------------
/talks/pyData/img/nb_convert_script.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/nb_convert_script.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC1.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC2.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC3.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC4.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC5.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC9.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC9.png


--------------------------------------------------------------------------------
/tutorial/img/setup_project_pipeline.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/tutorial/img/setup_project_pipeline.png


--------------------------------------------------------------------------------
/resources/dvc_playground/user/resources/inputs/parameters.json:
--------------------------------------------------------------------------------
1 | {
2 |   "row_shift": 47,
3 |   "col_even_shift": 17,
4 |   "col_odd_shift": 65
5 | }


--------------------------------------------------------------------------------
/talks/pyData/img/icons/parameters_blue.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/parameters_blue.png


--------------------------------------------------------------------------------
/talks/pyData/img/icons/parameters_grey.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/icons/parameters_grey.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/cmd_param.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/cmd_param.png


--------------------------------------------------------------------------------
/talks/pyData/img/nb_docstring_extract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/nb_docstring_extract.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/dvc_home_page1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/dvc_home_page1.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/dvc_home_page2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/dvc_home_page2.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/dvc_home_page3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/dvc_home_page3.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change0.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change1.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change2.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change3.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change4.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change5.png


--------------------------------------------------------------------------------
/talks/pyData/img/mlv_convert/nb_docstring.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/mlv_convert/nb_docstring.png


--------------------------------------------------------------------------------
/talks/pyData/img/nb_convert/formated_script.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/nb_convert/formated_script.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change0.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change1.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change2.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change3.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change4.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change4.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change5.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change5.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/pipeline/DVC_change2bis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/pipeline/DVC_change2bis.png


--------------------------------------------------------------------------------
/talks/pyData/img/dvc/script_docstring_extract.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/dvc/script_docstring_extract.png


--------------------------------------------------------------------------------
/talks/workshop/img/dvc/pipeline/DVC_change2bis.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/workshop/img/dvc/pipeline/DVC_change2bis.png


--------------------------------------------------------------------------------
/talks/reveal.js/lib/font/external_fonts/Capsuula.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/reveal.js/lib/font/external_fonts/Capsuula.woff


--------------------------------------------------------------------------------
/talks/reveal.js/lib/font/external_fonts/Capsuula.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/reveal.js/lib/font/external_fonts/Capsuula.woff2


--------------------------------------------------------------------------------
/talks/pyData/img/nb_convert/formated_script_no_effect.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/nb_convert/formated_script_no_effect.png


--------------------------------------------------------------------------------
/talks/pyData/img/nb_convert/formated_script_not_conf.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/pyData/img/nb_convert/formated_script_not_conf.png


--------------------------------------------------------------------------------
/talks/reveal.js/lib/font/external_fonts/WhiteRabbit.woff:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/reveal.js/lib/font/external_fonts/WhiteRabbit.woff


--------------------------------------------------------------------------------
/talks/reveal.js/lib/font/external_fonts/WhiteRabbit.woff2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/peopledoc/mlvtools-tutorial/HEAD/talks/reveal.js/lib/font/external_fonts/WhiteRabbit.woff2


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """Python packaging."""
4 | 
5 | from setuptools import setup
6 | 
7 | if __name__ == '__main__':
8 |     setup()
9 | 


--------------------------------------------------------------------------------
/resources/setup_project/docker/run.sh:
--------------------------------------------------------------------------------
1 | #!/bin/bash
2 | IMG_NAME="setup_project_tuto"
3 | 
4 | docker build -t $IMG_NAME $(dirname $0)
5 | 
6 | docker run -v $(git rev-parse --show-toplevel):/tuto -it $IMG_NAME bash


--------------------------------------------------------------------------------
/resources/setup_project/project/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 | # -*- coding: utf-8 -*-
3 | """Python packaging."""
4 | 
5 | from setuptools import setup
6 | 
7 | if __name__ == '__main__':
8 |     setup(name='tuto_project')
9 | 


--------------------------------------------------------------------------------
/talks/reveal.js/lib/js/html5shiv.js:
--------------------------------------------------------------------------------
1 | document.createElement('header');
2 | document.createElement('nav');
3 | document.createElement('section');
4 | document.createElement('article');
5 | document.createElement('aside');
6 | document.createElement('footer');
7 | document.createElement('hgroup');


--------------------------------------------------------------------------------
/resources/dvc_playground/user/dvc_init_repo.sh:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env bash
 2 | 
 3 | pushd ~
 4 | git config --global user.name $(whoami)
 5 | git config --global user.email $(whoami)@example.com
 6 | 
 7 | git clone git@git_srv:/srv/git/test_dvc_remote.git
 8 | popd
 9 | 
10 | 
11 | dvc remote add dvc_remote ssh://dvc_user@dvc_srv:/data/dvc/remote
12 | dvc config core.remote dvc_remote
13 | tail -f /dev/null
14 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/remote_git/pub_key:
--------------------------------------------------------------------------------
1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2LtCP20Y3Dxy1I7lVB98PqoUuFS7ggeC3pHC32r8e8Vfwhj73TVSHPvdUQDNLWXt+NuZpdSFrpWoT6l1YMHyRZArC1QiM0t+4ptt8Fr1baupnXHO3I74gp89+XEMdDjqSR9WZ0MIX1KRV956samUuEdHmEMTw22HaUQBCrk1b2P9J7e5AchNXJWMWITq9Rorzg58Pquj3ejENKlotAAyVzTAwrnfUuTlmTC96GXarJp4Pkx4LlWJv4J18XfrBRfGUD5F23IBMiII9fQtxrQZ3ntb3TALGCFmxs8udT0eXefsej10iAvrllP5Qg70fNKHpD31qn570AirbT+4FEq91
2 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/Makefile:
--------------------------------------------------------------------------------
 1 | .PHONY: help setup
 2 | 
 3 | #: help - Display callable targets.
 4 | help:
 5 | 	@echo "Reference card for usual actions in development environment."
 6 | 	@echo "Here are available targets:"
 7 | 	@egrep -o "^#: (.+)" [Mm]akefile  | sed 's/#: /* /'
 8 | 
 9 | 
10 | #: setup - Install dependencies.
11 | setup:
12 | 	pip install cython
13 | 	pip install -e . -r ./requirements.txt


--------------------------------------------------------------------------------
/resources/dvc_playground/remote_dvc/pub_key:
--------------------------------------------------------------------------------
1 | ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC2LtCP20Y3Dxy1I7lVB98PqoUuFS7ggeC3pHC32r8e8Vfwhj73TVSHPvdUQDNLWXt+NuZpdSFrpWoT6l1YMHyRZArC1QiM0t+4ptt8Fr1baupnXHO3I74gp89+XEMdDjqSR9WZ0MIX1KRV956samUuEdHmEMTw22HaUQBCrk1b2P9J7e5AchNXJWMWITq9Rorzg58Pquj3ejENKlotAAyVzTAwrnfUuTlmTC96GXarJp4Pkx4LlWJv4J18XfrBRfGUD5F23IBMiII9fQtxrQZ3ntb3TALGCFmxs8udT0eXefsej10iAvrllP5Qg70fNKHpD31qn570AirbT+4FEq91 sbracaloni@poney
2 | 


--------------------------------------------------------------------------------
/download_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | from os.path import dirname, join
 3 | from tempfile import mkdtemp
 4 | 
 5 | from sklearn.datasets.twenty_newsgroups import download_20newsgroups
 6 | 
 7 | cache_path = join(dirname(__file__), 'poc', 'data', '20news-bydate_py3.pkz')
 8 | 
 9 | tmp = mkdtemp()
10 | # Temporary directory is removed by download_20newsgroups
11 | buffer = download_20newsgroups(target_dir=tmp, cache_path=cache_path)
12 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/multiplex/client.js:
--------------------------------------------------------------------------------
 1 | (function() {
 2 | 	var multiplex = Reveal.getConfig().multiplex;
 3 | 	var socketId = multiplex.id;
 4 | 	var socket = io.connect(multiplex.url);
 5 | 
 6 | 	socket.on(multiplex.id, function(data) {
 7 | 		// ignore data from sockets that aren't ours
 8 | 		if (data.socketId !== socketId) { return; }
 9 | 		if( window.location.host === 'localhost:1947' ) return;
10 | 
11 | 		Reveal.setState(data.state);
12 | 	});
13 | }());
14 | 


--------------------------------------------------------------------------------
/resources/setup_project/docker/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6
 2 | 
 3 | RUN apt-get update && apt-get install -y tree \
 4 |                                          nano \
 5 |                                          vim \
 6 |                                          virtualenv \
 7 |                                          python3-dev
 8 | 
 9 | RUN git config --global user.name tuto_user
10 | RUN git config --global user.email tuto_user@example.com
11 | 
12 | WORKDIR /tuto


--------------------------------------------------------------------------------
/talks/reveal.js/lib/font/external_fonts/stylesheet.css:
--------------------------------------------------------------------------------
 1 | @font-face {
 2 |     font-family: 'White Rabbit';
 3 |     src: url('WhiteRabbit.woff2') format('woff2'),
 4 |         url('WhiteRabbit.woff') format('woff');
 5 |     font-weight: normal;
 6 |     font-style: normal;
 7 | }
 8 | 
 9 | @font-face {
10 |     font-family: 'Capsuula';
11 |     src: url('Capsuula.woff2') format('woff2'),
12 |         url('Capsuula.woff') format('woff');
13 |     font-weight: normal;
14 |     font-style: normal;
15 | }
16 | 
17 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/multiplex/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "reveal-js-multiplex",
 3 |   "version": "1.0.0",
 4 |   "description": "reveal.js multiplex server",
 5 |   "homepage": "http://revealjs.com",
 6 |   "scripts": {
 7 |     "start": "node index.js"
 8 |   },
 9 |   "engines": {
10 |     "node": "~4.1.1"
11 |   },
12 |   "dependencies": {
13 |     "express": "~4.13.3",
14 |     "grunt-cli": "~0.1.13",
15 |     "mustache": "~2.2.1",
16 |     "socket.io": "~1.3.7"
17 |   },
18 |   "license": "MIT"
19 | }
20 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/markdown/example.md:
--------------------------------------------------------------------------------
 1 | # Markdown Demo
 2 | 
 3 | 
 4 | 
 5 | ## External 1.1
 6 | 
 7 | Content 1.1
 8 | 
 9 | Note: This will only appear in the speaker notes window.
10 | 
11 | 
12 | ## External 1.2
13 | 
14 | Content 1.2
15 | 
16 | 
17 | 
18 | ## External 2
19 | 
20 | Content 2.1
21 | 
22 | 
23 | 
24 | ## External 3.1
25 | 
26 | Content 3.1
27 | 
28 | 
29 | ## External 3.2
30 | 
31 | Content 3.2
32 | 
33 | 
34 | ## External 3.3
35 | 
36 | ![External Image](https://s3.amazonaws.com/static.slid.es/logo/v2/slides-symbol-512x512.png)
37 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | name=mlv-tools-tutorial
 3 | version=0.1
 4 | license_file = LICENSE
 5 | description = A POC to link them all (DVC, MLflow, MLV-tools).
 6 | long_description = file: README.md
 7 | long_description_content_type = text/markdown
 8 | author = PeopleDoc
 9 | author_email = sarah.diot-girard@people-doc.com
10 | url = http://github.com/peopledoc/mlv-tools-tutorial
11 | keywords = peopledoc, machine learning, versioning, automate, MLV-tools, DVC, MLflow
12 | classifiers =
13 |     Programming Language :: Python :: 3
14 | python_requires = >=3.6
15 | 


--------------------------------------------------------------------------------
/talks/pyData/overview.md:
--------------------------------------------------------------------------------
 1 | - About-US (DUO)
 2 | - Intro => DS point of view / SE poview (DUO)
 3 | - Poc vs Prod ... vs DS vs SE
 4 | - Jupyter Notebooks
 5 |      - Opposition
 6 |      - Joke
 7 |      - NB convert
 8 |      - MLV tools
 9 |      - Benefit of MLVtools
10 |      
11 | - 2 Months Later (repro)
12 |      - PBL
13 |      - Git 
14 |      - Git Lfs
15 |      - Pipeline = data x code ...
16 |      -
17 | - DVC 
18 |     - what is it?
19 |     - how it works
20 |     - example
21 |     - benefits vs relou
22 | - MLVtools gen_dvc
23 | - MLVtools ipynb_to_dvc
24 | 
25 | -REX


--------------------------------------------------------------------------------
/resources/dvc_playground/remote_git/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:stretch
 2 | RUN apt-get update && \
 3 |     apt-get -y install openssh-server git vim && \
 4 |     mkdir -p /var/run/sshd
 5 | 
 6 | 
 7 | RUN useradd -d /home/git -m -s /bin/bash git
 8 | 
 9 | 
10 | RUN mkdir /home/git/.ssh && chmod 700 /home/git/.ssh
11 | COPY pub_key /home/git/.ssh/authorized_keys
12 | RUN chmod 600 /home/git/.ssh/authorized_keys
13 | RUN chown git: -R /home/git/.ssh
14 | RUN mkdir -p /srv/git/test_dvc_remote.git
15 | RUN cd /srv/git/test_dvc_remote.git/ && git init --bare
16 | RUN chown git: /srv/git/ -R
17 | 
18 | 
19 | CMD ["/usr/sbin/sshd", "-D"]


--------------------------------------------------------------------------------
/talks/reveal.js/bower.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "reveal.js",
 3 |   "version": "3.6.0",
 4 |   "main": [
 5 |     "js/reveal.js",
 6 |     "css/reveal.css"
 7 |   ],
 8 |   "homepage": "http://revealjs.com",
 9 |   "license": "MIT",
10 |   "description": "The HTML Presentation Framework",
11 |   "authors": [
12 |     "Hakim El Hattab <hakim.elhattab@gmail.com>"
13 |   ],
14 |   "dependencies": {
15 |     "headjs": "~1.0.3"
16 |   },
17 |   "repository": {
18 |     "type": "git",
19 |     "url": "git://github.com/hakimel/reveal.js.git"
20 |   },
21 |   "ignore": [
22 |     "**/.*",
23 |     "node_modules",
24 |     "bower_components",
25 |     "test"
26 |   ]
27 | }


--------------------------------------------------------------------------------
/resources/dvc_playground/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: '3'
 2 | services:
 3 |   remote_dvc_repo:
 4 |     build:
 5 |       context: ./remote_dvc
 6 |     ports:
 7 |     - "22:22"
 8 | 
 9 |   remote_git_repo:
10 |     build:
11 |       context: ./remote_git
12 | 
13 |   user1:
14 |     build:
15 |       context: ./user
16 |       args:
17 |         USER_NAME: "songoku"
18 |     command: tail -f /dev/null
19 |     links:
20 |       - remote_dvc_repo:dvc_srv
21 |       - remote_git_repo:git_srv
22 |   user2:
23 |     build:
24 |       context: ./user
25 |       args:
26 |         USER_NAME: "bulma"
27 |     command: tail -f /dev/null
28 |     links:
29 |       - remote_dvc_repo:dvc_srv
30 |       - remote_git_repo:git_srv
31 | 


--------------------------------------------------------------------------------
/requirements.yml:
--------------------------------------------------------------------------------
 1 | channels:
 2 |   - defaults
 3 | dependencies:
 4 |   - numpy-base=1.15.1
 5 |   - cython=0.28
 6 |   - ipython=6.1.0
 7 |   - ipython_genutils=0.2
 8 |   - jupyter=1.0.0
 9 |   - nbconvert=5.2
10 |   - nbformat=4.4
11 |   - notebook=5.2
12 |   - numpy=1.13
13 |   - pandas=0.20
14 |   - pandocfilters=1.4
15 |   - pip=9.0.1
16 |   - prompt_toolkit=1.0.15
17 |   - ptyprocess=0.5.2=py36_intel_0
18 |   - pydaal=2018.0.1.20171012=np113py36_intel_0
19 |   - pygments=2.2.0=py36_intel_1
20 |   - python=3.6.3
21 |   - scikit-learn=0.19.0
22 |   - scipy=0.19.1
23 |   - pip:
24 |     - dvc==0.19.7
25 |     - ml-versioning-tools
26 |     - mlflow==0.7.0
27 |     - nltk==3.3
28 |     - nose==1.3.7
29 |     - pyfasttext==0.4.5
30 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/external/bower.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "external-js",
 3 |   "authors": [
 4 |     "Cal Evans <cal@calevans.com>",
 5 |     "Matthew Setter <matthew@matthewsetter.com>"
 6 |   ],
 7 |   "description": "External file importer for reveal.js",
 8 |   "version": "1.0.1",
 9 |   "main": "external/external.js",
10 |   "keywords": [
11 |     "reveal.js",
12 |     "external.js"
13 |   ],
14 |   "license": "MIT",
15 |   "homepage": "https://github.com/settermjd/external",
16 |   "repository": {
17 |     "type": "git",
18 |     "url": "git://github.com/calevans/external.git"
19 |   },
20 |   "ignore": [
21 |     "**/.*",
22 |     "node_modules",
23 |     "bower_components",
24 |     "test",
25 |     "tests"
26 |   ]
27 | }
28 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/classifier/split.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | from typing import List, Tuple
 3 | 
 4 | 
 5 | def split_dataset(fasttext_data_set: List[str], test_percent: float) -> Tuple[List[str], List[str]]:
 6 |     """
 7 |         Shuffle and split the input data set into a train and a test set
 8 |         according to the test_percent.
 9 |     :param fasttext_data_set: data set on fast text format
10 |     :param test_percent:  percent of test data (ex: 0.10)
11 |     :return: test fasttext data set, train fasttext data set
12 |     """
13 |     random.shuffle(fasttext_data_set)
14 |     split_idx = round(test_percent * len(fasttext_data_set))
15 |     return fasttext_data_set[0: split_idx], fasttext_data_set[split_idx:]
16 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/user/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.6
 2 | ARG USER_NAME
 3 | RUN apt-get update && \
 4 |     apt-get -y install openssh-client vim git tree && \
 5 |     mkdir -p /var/run/sshd
 6 | 
 7 | COPY private_key /tmp/
 8 | 
 9 | RUN useradd -d /home/$USER_NAME -m -s /bin/bash $USER_NAME
10 | 
11 | RUN mkdir /home/$USER_NAME/.ssh
12 | COPY private_key /home/$USER_NAME/.ssh/id_rsa
13 | RUN chown $USER_NAME:$USER_NAME -R /home/$USER_NAME/
14 | RUN chmod 600 /home/$USER_NAME/.ssh/id_rsa
15 | 
16 | USER $USER_NAME
17 | ENV PATH=$PATH:/home/$USER_NAME/.local/bin/
18 | RUN pip install --user dvc paramiko
19 | 
20 | RUN git config --global user.name $(whoami)
21 | RUN git config --global user.email $(whoami)@example.com
22 | COPY resources /resources
23 | 
24 | EXPOSE 22


--------------------------------------------------------------------------------
/resources/setup_project/project/classifier/pre_process.py:
--------------------------------------------------------------------------------
 1 | from typing import Tuple, List
 2 | 
 3 | from nltk import wordpunct_tokenize
 4 | 
 5 | 
 6 | def tokenize_and_clean_text(text: str) -> str:
 7 |     return ' '.join([token.lower() for token in wordpunct_tokenize(text)
 8 |                      if token.isalpha() and token.lower()])
 9 | 
10 | 
11 | def clean_formatting(text: List[str]) -> str:
12 |     return tokenize_and_clean_text(' '.join(text))
13 | 
14 | 
15 | def preprocess_data(extracted_data: List[Tuple[str, str]]) -> List[str]:
16 |     """
17 |         Transform data to get compliant with fasttext expected
18 |         format:  __label__[label] [text]
19 |     """
20 |     return [f'__label__{data[0]} {clean_formatting(data[1])}' for data in extracted_data]
21 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/remote_dvc/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM debian:stretch
 2 | RUN apt-get update && \
 3 |     apt-get -y install openssh-server vim && \
 4 |     mkdir -p /var/run/sshd
 5 | 
 6 | RUN groupadd ssh_user
 7 | RUN useradd -g ssh_user -d /upload -s /bin/bash poney -p azerty
 8 | RUN mkdir -p /data/dvc/remote
 9 | RUN chown -R root:ssh_user /data/dvc
10 | RUN chown -R poney:ssh_user /data/dvc/remote
11 | RUN chmod ug+w -R /data/dvc/remote
12 | 
13 | COPY pub_key /tmp
14 | RUN useradd -g ssh_user -m -d /home/dvc_user -s /bin/bash dvc_user && \
15 |     mkdir -p /home/dvc_user/.ssh/ && \
16 |     cat /tmp/pub_key > /home/dvc_user/.ssh/authorized_keys && \
17 |     chown dvc_user:ssh_user -R /home/dvc_user && \
18 |     chmod 644 /home/dvc_user/.ssh/authorized_keys
19 | 
20 | 
21 | CMD ["/usr/sbin/sshd", "-D"]


--------------------------------------------------------------------------------
/resources/setup_project/project/classifier/helper.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | from os import makedirs
 3 | from os.path import dirname
 4 | from typing import List
 5 | 
 6 | 
 7 | def write_json(json_file: str, data: dict):
 8 |     """
 9 |         Create parent directories if not exist.
10 |         Write the json file.
11 |     """
12 |     makedirs(dirname(json_file), exist_ok=True)
13 |     with open(json_file, 'w') as fd:
14 |         json.dump(data, fd)
15 | 
16 | 
17 | def write_lines_file(file_path: str, data_list: List[str]):
18 |     """
19 |         Create parent directories if not exist.
20 |         Write the file line by line.
21 |     """
22 |     makedirs(dirname(file_path), exist_ok=True)
23 |     with open(file_path, 'w') as fd:
24 |         fd.writelines(['{}{}'.format(line, '' if line.endswith('\n') else '\n') for line in data_list])
25 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/user/resources/steps/concat_files.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import glob
 3 | import logging
 4 | from argparse import ArgumentParser
 5 | from os.path import isdir
 6 | 
 7 | if __name__ == '__main__':
 8 |     parser = ArgumentParser(description='Concat files from a directory')
 9 |     parser.add_argument('-i', '--input-dir', required=True, help='Contains files to concat')
10 |     parser.add_argument('-o', '--output-file', required=True, help='Result file')
11 | 
12 |     args = parser.parse_args()
13 | 
14 |     if not isdir(args.input_dir):
15 |         logging.error(f'Not a directory: {args.input_dir}')
16 |     else:
17 |         with open(args.output_file, 'w') as fd_write:
18 |             for file in sorted(glob.glob(f'{args.input_dir}/*.input')):
19 |                 with open(file, 'r') as fd_read:
20 |                     fd_write.write(fd_read.read())
21 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/classifier/extract.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | from typing import Tuple
 4 | 
 5 | 
 6 | def get_json(json_file_path: str) -> dict:
 7 |     """
 8 |         Load json content from a given path
 9 |     """
10 |     try:
11 |         with open(json_file_path, 'r') as fd:
12 |             return json.load(fd)
13 |     except json.JSONDecodeError:
14 |         logging.exception(f'Invalid JSON format for pipeline input: {json_file_path}')
15 |     except IOError:
16 |         logging.exception(f'Can not open pipeline input: {json_file_path}')
17 | 
18 | 
19 | def extract_data_from_inputs(json_input_file: str) -> Tuple[int, str]:
20 |     """
21 |         Read input file then extract pipeline data as list of tuples
22 |     """
23 |     json_content = get_json(json_input_file)
24 | 
25 |     extracted_data = [(review['ratingOverall'], review['segments']) for review in json_content]
26 | 
27 |     return extracted_data
28 | 


--------------------------------------------------------------------------------
/modify_input_data.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import codecs
 3 | import pickle
 4 | from os.path import dirname, join
 5 | 
 6 | from sklearn.utils import shuffle
 7 | 
 8 | cache_path = join(dirname(__file__), 'poc', 'data', '20news-bydate_py3.pkz')
 9 | 
10 | 
11 | def shuffle_data(subset: str, cache):
12 |     cache[subset].data, cache[subset].target, cache[subset].filenames = shuffle(cache[subset].data,
13 |                                                                                 cache[subset].target,
14 |                                                                                 cache[subset].filenames)
15 | 
16 | 
17 | with open(cache_path, 'rb') as f:
18 |     compressed_content = f.read()
19 | uncompressed_content = codecs.decode(compressed_content, 'zlib_codec')
20 | cache = pickle.loads(uncompressed_content)
21 | 
22 | shuffle_data('train', cache)
23 | shuffle_data('test', cache)
24 | 
25 | compressed_content = codecs.encode(pickle.dumps(cache), 'zlib_codec')
26 | with open(cache_path, 'wb') as f:
27 |     f.write(compressed_content)
28 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/multiplex/master.js:
--------------------------------------------------------------------------------
 1 | (function() {
 2 | 
 3 | 	// Don't emit events from inside of notes windows
 4 | 	if ( window.location.search.match( /receiver/gi ) ) { return; }
 5 | 
 6 | 	var multiplex = Reveal.getConfig().multiplex;
 7 | 
 8 | 	var socket = io.connect( multiplex.url );
 9 | 
10 | 	function post() {
11 | 
12 | 		var messageData = {
13 | 			state: Reveal.getState(),
14 | 			secret: multiplex.secret,
15 | 			socketId: multiplex.id
16 | 		};
17 | 
18 | 		socket.emit( 'multiplex-statechanged', messageData );
19 | 
20 | 	};
21 | 
22 | 	// post once the page is loaded, so the client follows also on "open URL".
23 | 	window.addEventListener( 'load', post );
24 | 
25 | 	// Monitor events that trigger a change in state
26 | 	Reveal.addEventListener( 'slidechanged', post );
27 | 	Reveal.addEventListener( 'fragmentshown', post );
28 | 	Reveal.addEventListener( 'fragmenthidden', post );
29 | 	Reveal.addEventListener( 'overviewhidden', post );
30 | 	Reveal.addEventListener( 'overviewshown', post );
31 | 	Reveal.addEventListener( 'paused', post );
32 | 	Reveal.addEventListener( 'resumed', post );
33 | 
34 | }());
35 | 


--------------------------------------------------------------------------------
/talks/reveal.js/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | ## Contributing
 2 | 
 3 | Please keep the [issue tracker](http://github.com/hakimel/reveal.js/issues) limited to **bug reports**, **feature requests** and **pull requests**.
 4 | 
 5 | 
 6 | ### Personal Support
 7 | If you have personal support or setup questions the best place to ask those are [StackOverflow](http://stackoverflow.com/questions/tagged/reveal.js).
 8 | 
 9 | 
10 | ### Bug Reports
11 | When reporting a bug make sure to include information about which browser and operating system you are on as well as the necessary steps to reproduce the issue. If possible please include a link to a sample presentation where the bug can be tested.
12 | 
13 | 
14 | ### Pull Requests
15 | - Should follow the coding style of the file you work in, most importantly:
16 |   - Tabs to indent
17 |   - Single-quoted strings
18 | - Should be made towards the **dev branch**
19 | - Should be submitted from a feature/topic branch (not your master)
20 | 
21 | 
22 | ### Plugins
23 | Please do not submit plugins as pull requests. They should be maintained in their own separate repository. More information here: https://github.com/hakimel/reveal.js/wiki/Plugin-Guidelines
24 | 


--------------------------------------------------------------------------------
/talks/reveal.js/css/theme/source/serif.scss:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A simple theme for reveal.js presentations, similar
 3 |  * to the default theme. The accent color is brown.
 4 |  *
 5 |  * This theme is Copyright (C) 2012-2013 Owen Versteeg, http://owenversteeg.com - it is MIT licensed.
 6 |  */
 7 | 
 8 | 
 9 | // Default mixins and settings -----------------
10 | @import "../template/mixins";
11 | @import "../template/settings";
12 | // ---------------------------------------------
13 | 
14 | 
15 | 
16 | // Override theme settings (see ../template/settings.scss)
17 | $mainFont: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif;
18 | $mainColor: #000;
19 | $headingFont: 'Palatino Linotype', 'Book Antiqua', Palatino, FreeSerif, serif;
20 | $headingColor: #383D3D;
21 | $headingTextShadow: none;
22 | $headingTextTransform: none;
23 | $backgroundColor: #F0F1EB;
24 | $linkColor: #51483D;
25 | $linkColorHover: lighten( $linkColor, 20% );
26 | $selectionBackgroundColor: #26351C;
27 | 
28 | .reveal a {
29 |   line-height: 1.3em;
30 | }
31 | 
32 | 
33 | // Theme template ------------------------------
34 | @import "../template/theme";
35 | // ---------------------------------------------
36 | 


--------------------------------------------------------------------------------
/talks/reveal.js/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (C) 2017 Hakim El Hattab, http://hakim.se, and reveal.js contributors
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/external/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2015 Cal Evans
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 
23 | 


--------------------------------------------------------------------------------
/talks/reveal.js/css/theme/template/settings.scss:
--------------------------------------------------------------------------------
 1 | // Base settings for all themes that can optionally be
 2 | // overridden by the super-theme
 3 | 
 4 | // Background of the presentation
 5 | $backgroundColor: #2b2b2b;
 6 | 
 7 | // Primary/body text
 8 | $mainFont: 'Lato', sans-serif;
 9 | $mainFontSize: 40px;
10 | $mainColor: #eee;
11 | 
12 | // Vertical spacing between blocks of text
13 | $blockMargin: 20px;
14 | 
15 | // Headings
16 | $headingMargin: 0 0 $blockMargin 0;
17 | $headingFont: 'League Gothic', Impact, sans-serif;
18 | $headingColor: #eee;
19 | $headingLineHeight: 1.2;
20 | $headingLetterSpacing: normal;
21 | $headingTextTransform: uppercase;
22 | $headingTextShadow: none;
23 | $headingFontWeight: normal;
24 | $heading1TextShadow: $headingTextShadow;
25 | 
26 | $heading1Size: 3.77em;
27 | $heading2Size: 2.11em;
28 | $heading3Size: 1.55em;
29 | $heading4Size: 1.00em;
30 | 
31 | // Links and actions
32 | $linkColor: #13DAEC;
33 | $linkColorHover: lighten( $linkColor, 20% );
34 | 
35 | // Text selection
36 | $selectionBackgroundColor: #FF5E99;
37 | $selectionColor: #fff;
38 | 
39 | // Generates the presentation background, can be overridden
40 | // to return a background image or gradient
41 | @mixin bodyBackground() {
42 | 	background: $backgroundColor;
43 | }


--------------------------------------------------------------------------------
/talks/reveal.js/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "reveal.js",
 3 |   "version": "3.6.0",
 4 |   "description": "The HTML Presentation Framework",
 5 |   "homepage": "http://revealjs.com",
 6 |   "subdomain": "revealjs",
 7 |   "main": "js/reveal.js",
 8 |   "scripts": {
 9 |     "test": "grunt test",
10 |     "start": "grunt serve",
11 |     "build": "grunt"
12 |   },
13 |   "author": {
14 |     "name": "Hakim El Hattab",
15 |     "email": "hakim.elhattab@gmail.com",
16 |     "web": "http://hakim.se"
17 |   },
18 |   "repository": {
19 |     "type": "git",
20 |     "url": "git://github.com/hakimel/reveal.js.git"
21 |   },
22 |   "engines": {
23 |     "node": ">=4.0.0"
24 |   },
25 |   "devDependencies": {
26 |     "express": "^4.15.2",
27 |     "grunt": "^1.0.1",
28 |     "grunt-autoprefixer": "^3.0.4",
29 |     "grunt-cli": "^1.2.0",
30 |     "grunt-contrib-connect": "^1.0.2",
31 |     "grunt-contrib-cssmin": "^2.1.0",
32 |     "grunt-contrib-jshint": "^1.1.0",
33 |     "grunt-contrib-qunit": "~1.2.0",
34 |     "grunt-contrib-uglify": "^2.3.0",
35 |     "grunt-contrib-watch": "^1.0.0",
36 |     "grunt-sass": "^2.0.0",
37 |     "grunt-retire": "^1.0.7",
38 |     "grunt-zip": "~0.17.1",
39 |     "mustache": "^2.3.0",
40 |     "socket.io": "^1.7.3"
41 |   },
42 |   "license": "MIT"
43 | }
44 | 


--------------------------------------------------------------------------------
/talks/reveal.js/lib/css/zenburn.css:
--------------------------------------------------------------------------------
 1 | /*
 2 | 
 3 | Zenburn style from voldmar.ru (c) Vladimir Epifanov <voldmar@voldmar.ru>
 4 | based on dark.css by Ivan Sagalaev
 5 | 
 6 | */
 7 | 
 8 | .hljs {
 9 |   display: block;
10 |   overflow-x: auto;
11 |   padding: 0.5em;
12 |   background: #3f3f3f;
13 |   color: #dcdcdc;
14 | }
15 | 
16 | .hljs-keyword,
17 | .hljs-selector-tag,
18 | .hljs-tag {
19 |   color: #e3ceab;
20 | }
21 | 
22 | .hljs-template-tag {
23 |   color: #dcdcdc;
24 | }
25 | 
26 | .hljs-number {
27 |   color: #8cd0d3;
28 | }
29 | 
30 | .hljs-variable,
31 | .hljs-template-variable,
32 | .hljs-attribute {
33 |   color: #efdcbc;
34 | }
35 | 
36 | .hljs-literal {
37 |   color: #efefaf;
38 | }
39 | 
40 | .hljs-subst {
41 |   color: #8f8f8f;
42 | }
43 | 
44 | .hljs-title,
45 | .hljs-name,
46 | .hljs-selector-id,
47 | .hljs-selector-class,
48 | .hljs-section,
49 | .hljs-type {
50 |   color: #efef8f;
51 | }
52 | 
53 | .hljs-symbol,
54 | .hljs-bullet,
55 | .hljs-link {
56 |   color: #dca3a3;
57 | }
58 | 
59 | .hljs-deletion,
60 | .hljs-string,
61 | .hljs-built_in,
62 | .hljs-builtin-name {
63 |   color: #cc9393;
64 | }
65 | 
66 | .hljs-addition,
67 | .hljs-comment,
68 | .hljs-quote,
69 | .hljs-meta {
70 |   color: #7f9f7f;
71 | }
72 | 
73 | 
74 | .hljs-emphasis {
75 |   font-style: italic;
76 | }
77 | 
78 | .hljs-strong {
79 |   font-weight: bold;
80 | }
81 | 


--------------------------------------------------------------------------------
/talks/reveal.js/css/theme/source/simple.scss:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A simple theme for reveal.js presentations, similar
 3 |  * to the default theme. The accent color is darkblue.
 4 |  *
 5 |  * This theme is Copyright (C) 2012 Owen Versteeg, https://github.com/StereotypicalApps. It is MIT licensed.
 6 |  * reveal.js is Copyright (C) 2011-2012 Hakim El Hattab, http://hakim.se
 7 |  */
 8 | 
 9 | 
10 | // Default mixins and settings -----------------
11 | @import "../template/mixins";
12 | @import "../template/settings";
13 | // ---------------------------------------------
14 | 
15 | 
16 | 
17 | // Include theme-specific fonts
18 | @import url(https://fonts.googleapis.com/css?family=News+Cycle:400,700);
19 | @import url(https://fonts.googleapis.com/css?family=Lato:400,700,400italic,700italic);
20 | 
21 | 
22 | // Override theme settings (see ../template/settings.scss)
23 | $mainFont: 'Lato', sans-serif;
24 | $mainColor: #000;
25 | $headingFont: 'News Cycle', Impact, sans-serif;
26 | $headingColor: #000;
27 | $headingTextShadow: none;
28 | $headingTextTransform: none;
29 | $backgroundColor: #fff;
30 | $linkColor: #00008B;
31 | $linkColorHover: lighten( $linkColor, 20% );
32 | $selectionBackgroundColor: rgba(0, 0, 0, 0.99);
33 | 
34 | section.has-dark-background {
35 | 	&, h1, h2, h3, h4, h5, h6 {
36 | 		color: #fff;
37 | 	}
38 | }
39 | 
40 | 
41 | // Theme template ------------------------------
42 | @import "../template/theme";
43 | // ---------------------------------------------


--------------------------------------------------------------------------------
/talks/reveal.js/css/theme/source/moon.scss:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Solarized Dark theme for reveal.js.
 3 |  * Author: Achim Staebler
 4 |  */
 5 | 
 6 | 
 7 | // Default mixins and settings -----------------
 8 | @import "../template/mixins";
 9 | @import "../template/settings";
10 | // ---------------------------------------------
11 | 
12 | 
13 | 
14 | // Include theme-specific fonts
15 | @import url(../../lib/font/league-gothic/league-gothic.css);
16 | @import url(https://fonts.googleapis.com/css?family=Lato:400,700,400italic,700italic);
17 | 
18 | /**
19 |  * Solarized colors by Ethan Schoonover
20 |  */
21 | html * {
22 | 	color-profile: sRGB;
23 | 	rendering-intent: auto;
24 | }
25 | 
26 | // Solarized colors
27 | $base03:    #002b36;
28 | $base02:    #073642;
29 | $base01:    #586e75;
30 | $base00:    #657b83;
31 | $base0:     #839496;
32 | $base1:     #93a1a1;
33 | $base2:     #eee8d5;
34 | $base3:     #fdf6e3;
35 | $yellow:    #b58900;
36 | $orange:    #cb4b16;
37 | $red:       #dc322f;
38 | $magenta:   #d33682;
39 | $violet:    #6c71c4;
40 | $blue:      #268bd2;
41 | $cyan:      #2aa198;
42 | $green:     #859900;
43 | 
44 | // Override theme settings (see ../template/settings.scss)
45 | $mainColor: $base1;
46 | $headingColor: $base2;
47 | $headingTextShadow: none;
48 | $backgroundColor: $base03;
49 | $linkColor: $blue;
50 | $linkColorHover: lighten( $linkColor, 20% );
51 | $selectionBackgroundColor: $magenta;
52 | 
53 | 
54 | 
55 | // Theme template ------------------------------
56 | @import "../template/theme";
57 | // ---------------------------------------------
58 | 


--------------------------------------------------------------------------------
/talks/reveal.js/lib/js/classList.js:
--------------------------------------------------------------------------------
1 | /*! @source http://purl.eligrey.com/github/classList.js/blob/master/classList.js*/
2 | if(typeof document!=="undefined"&&!("classList" in document.createElement("a"))){(function(j){var a="classList",f="prototype",m=(j.HTMLElement||j.Element)[f],b=Object,k=String[f].trim||function(){return this.replace(/^\s+|\s+$/g,"")},c=Array[f].indexOf||function(q){var p=0,o=this.length;for(;p<o;p++){if(p in this&&this[p]===q){return p}}return -1},n=function(o,p){this.name=o;this.code=DOMException[o];this.message=p},g=function(p,o){if(o===""){throw new n("SYNTAX_ERR","An invalid or illegal string was specified")}if(/\s/.test(o)){throw new n("INVALID_CHARACTER_ERR","String contains an invalid character")}return c.call(p,o)},d=function(s){var r=k.call(s.className),q=r?r.split(/\s+/):[],p=0,o=q.length;for(;p<o;p++){this.push(q[p])}this._updateClassName=function(){s.className=this.toString()}},e=d[f]=[],i=function(){return new d(this)};n[f]=Error[f];e.item=function(o){return this[o]||null};e.contains=function(o){o+="";return g(this,o)!==-1};e.add=function(o){o+="";if(g(this,o)===-1){this.push(o);this._updateClassName()}};e.remove=function(p){p+="";var o=g(this,p);if(o!==-1){this.splice(o,1);this._updateClassName()}};e.toggle=function(o){o+="";if(g(this,o)===-1){this.add(o)}else{this.remove(o)}};e.toString=function(){return this.join(" ")};if(b.defineProperty){var l={get:i,enumerable:true,configurable:true};try{b.defineProperty(m,a,l)}catch(h){if(h.number===-2146823252){l.enumerable=false;b.defineProperty(m,a,l)}}}else{if(b[f].__defineGetter__){m.__defineGetter__(a,i)}}}(self))};


--------------------------------------------------------------------------------
/talks/reveal.js/css/theme/README.md:
--------------------------------------------------------------------------------
 1 | ## Dependencies
 2 | 
 3 | Themes are written using Sass to keep things modular and reduce the need for repeated selectors across files. Make sure that you have the reveal.js development environment including the Grunt dependencies installed before proceeding: https://github.com/hakimel/reveal.js#full-setup
 4 | 
 5 | ## Creating a Theme
 6 | 
 7 | To create your own theme, start by duplicating a ```.scss``` file in [/css/theme/source](https://github.com/hakimel/reveal.js/blob/master/css/theme/source). It will be automatically compiled by Grunt from Sass to CSS (see the [Gruntfile](https://github.com/hakimel/reveal.js/blob/master/Gruntfile.js)) when you run `grunt css-themes`.
 8 | 
 9 | Each theme file does four things in the following order:
10 | 
11 | 1. **Include [/css/theme/template/mixins.scss](https://github.com/hakimel/reveal.js/blob/master/css/theme/template/mixins.scss)**
12 | Shared utility functions.
13 | 
14 | 2. **Include [/css/theme/template/settings.scss](https://github.com/hakimel/reveal.js/blob/master/css/theme/template/settings.scss)**
15 | Declares a set of custom variables that the template file (step 4) expects. Can be overridden in step 3.
16 | 
17 | 3. **Override**
18 | This is where you override the default theme. Either by specifying variables (see [settings.scss](https://github.com/hakimel/reveal.js/blob/master/css/theme/template/settings.scss) for reference) or by adding any selectors and styles you please.
19 | 
20 | 4. **Include [/css/theme/template/theme.scss](https://github.com/hakimel/reveal.js/blob/master/css/theme/template/theme.scss)**
21 | The template theme file which will generate final CSS output based on the currently defined variables.
22 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | #######
 2 | License
 3 | #######
 4 | 
 5 | BSD 3-Clause License
 6 | 
 7 | Copyright (c) 2018, Sarah Diot-Girard
 8 | Copyright (c) 2018, Stephanie Bracaloni.
 9 | Copyright (c) 2018, PeopleDoc.
10 | All rights reserved.
11 | 
12 | Redistribution and use in source and binary forms, with or without
13 | modification, are permitted provided that the following conditions are met:
14 | 
15 | 1. Redistributions of source code must retain the above copyright notice, this
16 |    list of conditions and the following disclaimer.
17 | 
18 | 2. Redistributions in binary form must reproduce the above copyright notice,
19 |    this list of conditions and the following disclaimer in the documentation
20 |    and/or other materials provided with the distribution.
21 | 
22 | 3. Neither the name of the copyright holder nor the names of its
23 |    contributors may be used to endorse or promote products derived from
24 |    this software without specific prior written permission.
25 | 
26 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
27 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
29 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
30 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
32 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
33 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
34 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 | 


--------------------------------------------------------------------------------
/talks/reveal.js/css/theme/template/mixins.scss:
--------------------------------------------------------------------------------
 1 | @mixin vertical-gradient( $top, $bottom ) {
 2 | 	background: $top;
 3 | 	background: -moz-linear-gradient( top, $top 0%, $bottom 100% );
 4 | 	background: -webkit-gradient( linear, left top, left bottom, color-stop(0%,$top), color-stop(100%,$bottom) );
 5 | 	background: -webkit-linear-gradient( top, $top 0%, $bottom 100% );
 6 | 	background: -o-linear-gradient( top, $top 0%, $bottom 100% );
 7 | 	background: -ms-linear-gradient( top, $top 0%, $bottom 100% );
 8 | 	background: linear-gradient( top, $top 0%, $bottom 100% );
 9 | }
10 | 
11 | @mixin horizontal-gradient( $top, $bottom ) {
12 | 	background: $top;
13 | 	background: -moz-linear-gradient( left, $top 0%, $bottom 100% );
14 | 	background: -webkit-gradient( linear, left top, right top, color-stop(0%,$top), color-stop(100%,$bottom) );
15 | 	background: -webkit-linear-gradient( left, $top 0%, $bottom 100% );
16 | 	background: -o-linear-gradient( left, $top 0%, $bottom 100% );
17 | 	background: -ms-linear-gradient( left, $top 0%, $bottom 100% );
18 | 	background: linear-gradient( left, $top 0%, $bottom 100% );
19 | }
20 | 
21 | @mixin radial-gradient( $outer, $inner, $type: circle ) {
22 | 	background: $outer;
23 | 	background: -moz-radial-gradient( center, $type cover,  $inner 0%, $outer 100% );
24 | 	background: -webkit-gradient( radial, center center, 0px, center center, 100%, color-stop(0%,$inner), color-stop(100%,$outer) );
25 | 	background: -webkit-radial-gradient( center, $type cover,  $inner 0%, $outer 100% );
26 | 	background: -o-radial-gradient( center, $type cover,  $inner 0%, $outer 100% );
27 | 	background: -ms-radial-gradient( center, $type cover,  $inner 0%, $outer 100% );
28 | 	background: radial-gradient( center, $type cover,  $inner 0%, $outer 100% );
29 | }


--------------------------------------------------------------------------------
/resources/dvc_playground/user/private_key:
--------------------------------------------------------------------------------
 1 | -----BEGIN RSA PRIVATE KEY-----
 2 | MIIEowIBAAKCAQEAti7Qj9tGNw8ctSO5VQffD6qFLhUu4IHgt6Rwt9q/HvFX8IY+
 3 | 901Uhz73VEAzS1l7fjbmaXUha6VqE+pdWDB8kWQKwtUIjNLfuKbbfBa9W2rqZ1xz
 4 | tyO+IKfPflxDHQ46kkfVmdDCF9SkVfeerGplLhHR5hDE8Nth2lEAQq5NW9j/Se3u
 5 | QHITVyVjFiE6vUaK84OfD6ro93oxDSpaLQAMlc0wMK531Lk5Zkwvehl2qyaeD5Me
 6 | C5Vib+CdfF36wUXxlA+RdtyATIiCPX0Lca0Gd57W90wCxghZsbPLnU9Hl3n7Ho9d
 7 | IgL65ZT+UIO9HzSh6Q99ap+e9AIq20/uBRKvdQIDAQABAoIBAGuQ8pWtqySBKLsO
 8 | 77Nz5VFTpjTSTw5BwRPM5Ly5TFURvJGIQg80Om9sTw4QpficSdz4v0yO2v+6+Anw
 9 | O9MBNduGTRBCRj0XmP5mjnsS1hTu6J3EqZH9aZnQWyPJh9rU9uI9xbF+38rAQ5N6
10 | k9ZLL5hm7x6+0flAHuOPJe85CPXsBm8wRX7J7h9865uX+L7zvt7rRWBtGS5gqVj9
11 | uRm+wwJiOzjc86Q1h725Ev7pKchYMtFPNIMmrrSQsdRinhDyNCPamZI2YISWNaHM
12 | 3814MbPi9M3v71YBFp80JAcaebgRO/enC4f/qUaaHtVtV65cgoaacLytdBf120n9
13 | MqRLT7UCgYEA7oTm8/k1ZH/3aDONmBu3gSDcYFDfyHiMCZjjF+wcAl/1RfRniHN9
14 | ZwpHXpqewkojKGaIxkDFYMYcRPVCvxZpKTTzIbHmdWL2zMa4tOd8/vtAgwiaRTxz
15 | CWpkuitTqhMKRZMuk6GLN2aJGhW4xoztoxBwvwl4hIcOF/nDUx5wGg8CgYEAw4ju
16 | TywfXLVukeGzF2NR/RHafv5lIGFVAXz6S828gVdAd723Ar5KfUcuHSWou6jeSLxC
17 | X6moxNXg9pzemj5oOxixm5gNmMhhGc5saofVF+Yw7OXiO5HS48N5/y8UhLzG9Woe
18 | woIzLOedI7LL4c5k63Dv1PBRfF7SgotUD9o+cjsCgYBTlIStl3UrO2BBxlbmp0e9
19 | Wvt+ULiqMfu4XTeuqVEDUAbNtaAWA4o88jAiKYJaSMpbHNruxcGkcKd1TpBOD5dY
20 | FjLgast79OrQxQ3ybm+xle98OyZ7/nu9H+geCJKvtS5TRQj8OI5lImwGlPgDs1yO
21 | WHN5reLqZbz9j8rvNGpJ5wKBgHNduBU4WCyv01P5EVxsOjSRMxQYbnwDUU82eMr+
22 | 0QiS/TNo2LCiq3ps49VIodxOzw+lG5grlqrD8h4vAduJlJ/BexDtTqBlIX1DYI4e
23 | JGkcZKNxIuC4se+2JqWcFoQWiWDEwu9KOH13DizA0jAhQUP0d1xwifNSUuu43tkX
24 | kB5/AoGBAMbisz4YfKjxSieEPHz7F4GiIhCe+Xbvj2AwkantBcKNdTmt7uHoE1BM
25 | lGqKz1uRhliBPrHcbXQnLr9pKs5XvbgD67CM8461ZTRSA8hjV+4yBl3oLtoP5kc4
26 | MUSyPfkASNQzd7VSH36rdLFu1cM1OKe+L1Cr7s81S5LaWplsJoXP
27 | -----END RSA PRIVATE KEY-----
28 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/math/math.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * A plugin which enables rendering of math equations inside
 3 |  * of reveal.js slides. Essentially a thin wrapper for MathJax.
 4 |  *
 5 |  * @author Hakim El Hattab
 6 |  */
 7 | var RevealMath = window.RevealMath || (function(){
 8 | 
 9 | 	var options = Reveal.getConfig().math || {};
10 | 	options.mathjax = options.mathjax || 'https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.0/MathJax.js';
11 | 	options.config = options.config || 'TeX-AMS_HTML-full';
12 | 
13 | 	loadScript( options.mathjax + '?config=' + options.config, function() {
14 | 
15 | 		MathJax.Hub.Config({
16 | 			messageStyle: 'none',
17 | 			tex2jax: {
18 | 				inlineMath: [['$','$'],['\\(','\\)']] ,
19 | 				skipTags: ['script','noscript','style','textarea','pre']
20 | 			},
21 | 			skipStartupTypeset: true
22 | 		});
23 | 
24 | 		// Typeset followed by an immediate reveal.js layout since
25 | 		// the typesetting process could affect slide height
26 | 		MathJax.Hub.Queue( [ 'Typeset', MathJax.Hub ] );
27 | 		MathJax.Hub.Queue( Reveal.layout );
28 | 
29 | 		// Reprocess equations in slides when they turn visible
30 | 		Reveal.addEventListener( 'slidechanged', function( event ) {
31 | 
32 | 			MathJax.Hub.Queue( [ 'Typeset', MathJax.Hub, event.currentSlide ] );
33 | 
34 | 		} );
35 | 
36 | 	} );
37 | 
38 | 	function loadScript( url, callback ) {
39 | 
40 | 		var head = document.querySelector( 'head' );
41 | 		var script = document.createElement( 'script' );
42 | 		script.type = 'text/javascript';
43 | 		script.src = url;
44 | 
45 | 		// Wrapper for callback to make sure it only fires once
46 | 		var finish = function() {
47 | 			if( typeof callback === 'function' ) {
48 | 				callback.call();
49 | 				callback = null;
50 | 			}
51 | 		}
52 | 
53 | 		script.onload = finish;
54 | 
55 | 		// IE
56 | 		script.onreadystatechange = function() {
57 | 			if ( this.readyState === 'loaded' ) {
58 | 				finish();
59 | 			}
60 | 		}
61 | 
62 | 		// Normal browsers
63 | 		head.appendChild( script );
64 | 
65 | 	}
66 | 
67 | })();
68 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/multiplex/index.js:
--------------------------------------------------------------------------------
 1 | var http        = require('http');
 2 | var express		= require('express');
 3 | var fs			= require('fs');
 4 | var io			= require('socket.io');
 5 | var crypto		= require('crypto');
 6 | 
 7 | var app       	= express();
 8 | var staticDir 	= express.static;
 9 | var server    	= http.createServer(app);
10 | 
11 | io = io(server);
12 | 
13 | var opts = {
14 | 	port: process.env.PORT || 1948,
15 | 	baseDir : __dirname + '/../../'
16 | };
17 | 
18 | io.on( 'connection', function( socket ) {
19 | 	socket.on('multiplex-statechanged', function(data) {
20 | 		if (typeof data.secret == 'undefined' || data.secret == null || data.secret === '') return;
21 | 		if (createHash(data.secret) === data.socketId) {
22 | 			data.secret = null;
23 | 			socket.broadcast.emit(data.socketId, data);
24 | 		};
25 | 	});
26 | });
27 | 
28 | [ 'css', 'js', 'plugin', 'lib' ].forEach(function(dir) {
29 | 	app.use('/' + dir, staticDir(opts.baseDir + dir));
30 | });
31 | 
32 | app.get("/", function(req, res) {
33 | 	res.writeHead(200, {'Content-Type': 'text/html'});
34 | 
35 | 	var stream = fs.createReadStream(opts.baseDir + '/index.html');
36 | 	stream.on('error', function( error ) {
37 | 		res.write('<style>body{font-family: sans-serif;}</style><h2>reveal.js multiplex server.</h2><a href="/token">Generate token</a>');
38 | 		res.end();
39 | 	});
40 | 	stream.on('readable', function() {
41 | 		stream.pipe(res);
42 | 	});
43 | });
44 | 
45 | app.get("/token", function(req,res) {
46 | 	var ts = new Date().getTime();
47 | 	var rand = Math.floor(Math.random()*9999999);
48 | 	var secret = ts.toString() + rand.toString();
49 | 	res.send({secret: secret, socketId: createHash(secret)});
50 | });
51 | 
52 | var createHash = function(secret) {
53 | 	var cipher = crypto.createCipher('blowfish', secret);
54 | 	return(cipher.final('hex'));
55 | };
56 | 
57 | // Actually listen
58 | server.listen( opts.port || null );
59 | 
60 | var brown = '\033[33m',
61 | 	green = '\033[32m',
62 | 	reset = '\033[0m';
63 | 
64 | console.log( brown + "reveal.js:" + reset + " Multiplex running on port " + green + opts.port + reset );


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/notes-server/index.js:
--------------------------------------------------------------------------------
 1 | var http      = require('http');
 2 | var express   = require('express');
 3 | var fs        = require('fs');
 4 | var io        = require('socket.io');
 5 | var Mustache  = require('mustache');
 6 | 
 7 | var app       = express();
 8 | var staticDir = express.static;
 9 | var server    = http.createServer(app);
10 | 
11 | io = io(server);
12 | 
13 | var opts = {
14 | 	port :      1947,
15 | 	baseDir :   __dirname + '/../../'
16 | };
17 | 
18 | io.on( 'connection', function( socket ) {
19 | 
20 | 	socket.on( 'new-subscriber', function( data ) {
21 | 		socket.broadcast.emit( 'new-subscriber', data );
22 | 	});
23 | 
24 | 	socket.on( 'statechanged', function( data ) {
25 | 		delete data.state.overview;
26 | 		socket.broadcast.emit( 'statechanged', data );
27 | 	});
28 | 
29 | 	socket.on( 'statechanged-speaker', function( data ) {
30 | 		delete data.state.overview;
31 | 		socket.broadcast.emit( 'statechanged-speaker', data );
32 | 	});
33 | 
34 | });
35 | 
36 | [ 'css', 'js', 'images', 'plugin', 'lib' ].forEach( function( dir ) {
37 | 	app.use( '/' + dir, staticDir( opts.baseDir + dir ) );
38 | });
39 | 
40 | app.get('/', function( req, res ) {
41 | 
42 | 	res.writeHead( 200, { 'Content-Type': 'text/html' } );
43 | 	fs.createReadStream( opts.baseDir + '/index.html' ).pipe( res );
44 | 
45 | });
46 | 
47 | app.get( '/notes/:socketId', function( req, res ) {
48 | 
49 | 	fs.readFile( opts.baseDir + 'plugin/notes-server/notes.html', function( err, data ) {
50 | 		res.send( Mustache.to_html( data.toString(), {
51 | 			socketId : req.params.socketId
52 | 		}));
53 | 	});
54 | 
55 | });
56 | 
57 | // Actually listen
58 | server.listen( opts.port || null );
59 | 
60 | var brown = '\033[33m',
61 | 	green = '\033[32m',
62 | 	reset = '\033[0m';
63 | 
64 | var slidesLocation = 'http://localhost' + ( opts.port ? ( ':' + opts.port ) : '' );
65 | 
66 | console.log( brown + 'reveal.js - Speaker Notes' + reset );
67 | console.log( '1. Open the slides at ' + green + slidesLocation + reset );
68 | console.log( '2. Click on the link in your JS console to go to the notes page' );
69 | console.log( '3. Advance through your slides and your notes will advance automatically' );
70 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/notes-server/client.js:
--------------------------------------------------------------------------------
 1 | (function() {
 2 | 
 3 | 	// don't emit events from inside the previews themselves
 4 | 	if( window.location.search.match( /receiver/gi ) ) { return; }
 5 | 
 6 | 	var socket = io.connect( window.location.origin ),
 7 | 		socketId = Math.random().toString().slice( 2 );
 8 | 
 9 | 	console.log( 'View slide notes at ' + window.location.origin + '/notes/' + socketId );
10 | 
11 | 	window.open( window.location.origin + '/notes/' + socketId, 'notes-' + socketId );
12 | 
13 | 	/**
14 | 	 * Posts the current slide data to the notes window
15 | 	 */
16 | 	function post() {
17 | 
18 | 		var slideElement = Reveal.getCurrentSlide(),
19 | 			notesElement = slideElement.querySelector( 'aside.notes' );
20 | 
21 | 		var messageData = {
22 | 			notes: '',
23 | 			markdown: false,
24 | 			socketId: socketId,
25 | 			state: Reveal.getState()
26 | 		};
27 | 
28 | 		// Look for notes defined in a slide attribute
29 | 		if( slideElement.hasAttribute( 'data-notes' ) ) {
30 | 			messageData.notes = slideElement.getAttribute( 'data-notes' );
31 | 		}
32 | 
33 | 		// Look for notes defined in an aside element
34 | 		if( notesElement ) {
35 | 			messageData.notes = notesElement.innerHTML;
36 | 			messageData.markdown = typeof notesElement.getAttribute( 'data-markdown' ) === 'string';
37 | 		}
38 | 
39 | 		socket.emit( 'statechanged', messageData );
40 | 
41 | 	}
42 | 
43 | 	// When a new notes window connects, post our current state
44 | 	socket.on( 'new-subscriber', function( data ) {
45 | 		post();
46 | 	} );
47 | 
48 | 	// When the state changes from inside of the speaker view
49 | 	socket.on( 'statechanged-speaker', function( data ) {
50 | 		Reveal.setState( data.state );
51 | 	} );
52 | 
53 | 	// Monitor events that trigger a change in state
54 | 	Reveal.addEventListener( 'slidechanged', post );
55 | 	Reveal.addEventListener( 'fragmentshown', post );
56 | 	Reveal.addEventListener( 'fragmenthidden', post );
57 | 	Reveal.addEventListener( 'overviewhidden', post );
58 | 	Reveal.addEventListener( 'overviewshown', post );
59 | 	Reveal.addEventListener( 'paused', post );
60 | 	Reveal.addEventListener( 'resumed', post );
61 | 
62 | 	// Post the initial state
63 | 	post();
64 | 
65 | }());
66 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/print-pdf/print-pdf.js:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * phantomjs script for printing presentations to PDF.
 3 |  *
 4 |  * Example:
 5 |  * phantomjs print-pdf.js "http://revealjs.com?print-pdf" reveal-demo.pdf
 6 |  *
 7 |  * @author Manuel Bieh (https://github.com/manuelbieh)
 8 |  * @author Hakim El Hattab (https://github.com/hakimel)
 9 |  * @author Manuel Riezebosch (https://github.com/riezebosch)
10 |  */
11 | 
12 | // html2pdf.js
13 | var system = require( 'system' );
14 | 
15 | var probePage = new WebPage();
16 | var printPage = new WebPage();
17 | 
18 | var inputFile = system.args[1] || 'index.html?print-pdf';
19 | var outputFile = system.args[2] || 'slides.pdf';
20 | 
21 | if( outputFile.match( /\.pdf$/gi ) === null ) {
22 | 	outputFile += '.pdf';
23 | }
24 | 
25 | console.log( 'Export PDF: Reading reveal.js config [1/4]' );
26 | 
27 | probePage.open( inputFile, function( status ) {
28 | 
29 | 	console.log( 'Export PDF: Preparing print layout [2/4]' );
30 | 
31 | 	var config = probePage.evaluate( function() {
32 | 		return Reveal.getConfig();
33 | 	} );
34 | 
35 | 	if( config ) {
36 | 
37 | 		printPage.paperSize = {
38 | 			width: Math.floor( config.width * ( 1 + config.margin ) ),
39 | 			height: Math.floor( config.height * ( 1 + config.margin ) ),
40 | 			border: 0
41 | 		};
42 | 
43 | 		printPage.open( inputFile, function( status ) {
44 | 			console.log( 'Export PDF: Preparing pdf [3/4]')
45 | 			printPage.evaluate(function() {
46 | 				Reveal.isReady() ? window.callPhantom() : Reveal.addEventListener( 'pdf-ready', window.callPhantom );
47 | 			});
48 | 		} );
49 | 
50 | 		printPage.onCallback = function(data) {
51 | 			// For some reason we need to "jump the queue" for syntax highlighting to work.
52 | 			// See: http://stackoverflow.com/a/3580132/129269
53 | 			setTimeout(function() {
54 | 				console.log( 'Export PDF: Writing file [4/4]' );
55 | 				printPage.render( outputFile );
56 | 				console.log( 'Export PDF: Finished successfully!' );
57 | 				phantom.exit();
58 | 			}, 0);
59 | 		};
60 | 	}
61 | 	else {
62 | 
63 |         console.log( 'Export PDF: Unable to read reveal.js config. Make sure the input address points to a reveal.js page.' );
64 |         phantom.exit(1);
65 | 
66 |     }
67 | } );
68 | 
69 | 
70 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/external/external/external.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |  * external.js
 3 |  * Cal Evans <cal@calevans.com>
 4 |  * (c) Evans Internet Construction Company, Inc.
 5 |  * Released under the MIT license
 6 |  * Load external files into a reveal.js presentation.
 7 |  * 
 8 |  * This is a reveal.js plugin to load external html files. It replaces the 
 9 |  * content of any element with a data-external="file.ext" with the contents 
10 |  * of file.ext.
11 |  *
12 |  * This started life as markdown.js. Thank you to whomever wrote it.
13 |  * Small mods by JJ Merelo, github.com/JJ
14 |  */
15 | 
16 | (function(){
17 |     loadExternal();
18 |     
19 |     function loadExternal() {
20 | 	
21 | 	var sections = document.querySelectorAll( '[data-external]');
22 | 	
23 | 	for( var i = 0, len = sections.length; i < len; i++ ) {
24 | 	    
25 | 	    var this_section = sections[i];
26 | 	    
27 | 	    if( this_section.getAttribute( 'data-external' ).length ) {
28 | 		
29 | 		var xhr = new XMLHttpRequest(),
30 | 		url = this_section.getAttribute( 'data-external' );
31 | 		
32 | 		// see https://developer.mozilla.org/en-US/docs/Web/API/element.getAttribute#Notes
33 | 		xhr.onreadystatechange = function() {
34 | 		    if( xhr.readyState === 4 ) {
35 | 			// file protocol yields status code 0 (useful for local debug, mobile applications etc.)
36 | 			if ( ( xhr.status >= 200 && xhr.status < 300 ) || xhr.status === 0 ) {
37 | 			    
38 | 			    this_section.innerHTML = xhr.responseText;
39 | 			    
40 | 			    
41 | 			}
42 | 			else {
43 | 			    
44 | 			    this_section.innerHTML = '<section data-state="alert">' +
45 | 				'ERROR: The attempt to fetch ' + url + ' failed with HTTP status ' + xhr.status + '.' +
46 | 				'Check your browser\'s JavaScript console for more details.' +
47 | 				'<p>Remember that you need to serve the presentation HTML from a HTTP server.</p>' +
48 | 				'</section>';
49 | 			    
50 | 			}
51 | 		    }
52 | 		};
53 | 		
54 | 		xhr.open( 'GET', url, false );
55 | 		
56 | 		try {
57 | 		    xhr.send();
58 | 		}
59 | 		catch ( e ) {
60 | 		    alert( 'Failed to get the file ' + url + '. Make sure that the presentation and the file are served by a HTTP server and the file can be found there. ' + e );
61 | 		}
62 | 		
63 | 	    }
64 | 	    
65 | 	}
66 | 	
67 | 	return;	
68 |     }
69 |     
70 | })();
71 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/configurables/evaluate_model.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "model_path = '../data/model/classifier.bin'\n",
10 |     "dataset_path = '../data/intermediate/test_dataset.txt'\n",
11 |     "metrics_path = '../data/result/metrics_test.txt'"
12 |    ]
13 |   },
14 |   {
15 |    "cell_type": "code",
16 |    "execution_count": null,
17 |    "metadata": {},
18 |    "outputs": [],
19 |    "source": [
20 |     "with open(dataset_path, 'r') as fd:\n",
21 |     "        test_data_lines = fd.readlines()\n",
22 |     "test_data_lines"
23 |    ]
24 |   },
25 |   {
26 |    "cell_type": "code",
27 |    "execution_count": null,
28 |    "metadata": {},
29 |    "outputs": [],
30 |    "source": [
31 |     "import fasttext as ft\n",
32 |     "\n",
33 |     "model = ft.load_model(model_path)\n",
34 |     "result = model.test(dataset_path)"
35 |    ]
36 |   },
37 |   {
38 |    "cell_type": "code",
39 |    "execution_count": null,
40 |    "metadata": {},
41 |    "outputs": [],
42 |    "source": [
43 |     "metrics = [\n",
44 |     "    f'Precision@1: {result.precision}',\n",
45 |     "    f'Recall@1: {result.recall}',\n",
46 |     "    f'Nb review: {result.nexamples}'\n",
47 |     "]"
48 |    ]
49 |   },
50 |   {
51 |    "cell_type": "code",
52 |    "execution_count": null,
53 |    "metadata": {},
54 |    "outputs": [],
55 |    "source": [
56 |     "for line in metrics:\n",
57 |     "    print(line)"
58 |    ]
59 |   },
60 |   {
61 |    "cell_type": "code",
62 |    "execution_count": null,
63 |    "metadata": {},
64 |    "outputs": [],
65 |    "source": [
66 |     "from classifier.helper import write_lines_file\n",
67 |     "\n",
68 |     "write_lines_file(metrics_path, metrics)"
69 |    ]
70 |   }
71 |  ],
72 |  "metadata": {
73 |   "kernelspec": {
74 |    "display_name": "Python 3",
75 |    "language": "python",
76 |    "name": "python3"
77 |   },
78 |   "language_info": {
79 |    "codemirror_mode": {
80 |     "name": "ipython",
81 |     "version": 3
82 |    },
83 |    "file_extension": ".py",
84 |    "mimetype": "text/x-python",
85 |    "name": "python",
86 |    "nbconvert_exporter": "python",
87 |    "pygments_lexer": "ipython3",
88 |    "version": "3.6.5"
89 |   }
90 |  },
91 |  "nbformat": 4,
92 |  "nbformat_minor": 2
93 | }
94 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/configurables/extract_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "reviews_path = '../data/input/trip_advisor.json'\n",
 10 |     "extracted_data_path = '../data/intermediate/extracted_data.json'"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import json\n",
 20 |     "with open(reviews_path) as fd:\n",
 21 |     "    data = json.load(fd)\n",
 22 |     "data"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "len(data)"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": null,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "from classifier.extract import extract_data_from_inputs"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "extracted_data = extract_data_from_inputs(reviews_path)"
 50 |    ]
 51 |   },
 52 |   {
 53 |    "cell_type": "code",
 54 |    "execution_count": null,
 55 |    "metadata": {},
 56 |    "outputs": [],
 57 |    "source": [
 58 |     "extracted_data"
 59 |    ]
 60 |   },
 61 |   {
 62 |    "cell_type": "code",
 63 |    "execution_count": null,
 64 |    "metadata": {},
 65 |    "outputs": [],
 66 |    "source": [
 67 |     "len(extracted_data)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "from classifier.helper import write_json\n",
 77 |     "write_json(extracted_data_path, extracted_data)"
 78 |    ]
 79 |   }
 80 |  ],
 81 |  "metadata": {
 82 |   "kernelspec": {
 83 |    "display_name": "Python 3",
 84 |    "language": "python",
 85 |    "name": "python3"
 86 |   },
 87 |   "language_info": {
 88 |    "codemirror_mode": {
 89 |     "name": "ipython",
 90 |     "version": 3
 91 |    },
 92 |    "file_extension": ".py",
 93 |    "mimetype": "text/x-python",
 94 |    "name": "python",
 95 |    "nbconvert_exporter": "python",
 96 |    "pygments_lexer": "ipython3",
 97 |    "version": "3.6.7"
 98 |   }
 99 |  },
100 |  "nbformat": 4,
101 |  "nbformat_minor": 2
102 | }
103 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/notebooks/evaluate_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "model_path = '../data/model/classifier.bin'\n",
 10 |     "dataset_path = '../data/intermediate/test_dataset.txt'"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "with open(dataset_path, 'r') as fd:\n",
 20 |     "        test_data_lines = fd.readlines()\n",
 21 |     "test_data_lines"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import fasttext as ft\n",
 31 |     "\n",
 32 |     "model = ft.load_model(model_path)\n",
 33 |     "result = model.test(dataset_path)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "metrics = [\n",
 43 |     "    f'Precision@1: {result.precision}',\n",
 44 |     "    f'Recall@1: {result.recall}',\n",
 45 |     "    f'Nb review: {result.nexamples}'\n",
 46 |     "]"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "for line in metrics:\n",
 56 |     "    print(line)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from classifier.helper import write_lines_file\n",
 66 |     "\n",
 67 |     "write_lines_file('../data/result/metrics.txt', metrics)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": null,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": []
 76 |   }
 77 |  ],
 78 |  "metadata": {
 79 |   "kernelspec": {
 80 |    "display_name": "Python 3",
 81 |    "language": "python",
 82 |    "name": "python3"
 83 |   },
 84 |   "language_info": {
 85 |    "codemirror_mode": {
 86 |     "name": "ipython",
 87 |     "version": 3
 88 |    },
 89 |    "file_extension": ".py",
 90 |    "mimetype": "text/x-python",
 91 |    "name": "python",
 92 |    "nbconvert_exporter": "python",
 93 |    "pygments_lexer": "ipython3",
 94 |    "version": "3.6.5"
 95 |   }
 96 |  },
 97 |  "nbformat": 4,
 98 |  "nbformat_minor": 2
 99 | }
100 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/notebooks/extract_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "reviews_path = '../data/input/trip_advisor.json'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import json\n",
 19 |     "with open(reviews_path) as fd:\n",
 20 |     "    data = json.load(fd)\n",
 21 |     "data"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "len(data)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "from classifier.extract import extract_data_from_inputs"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": null,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "extracted_data = extract_data_from_inputs(reviews_path)"
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": null,
 54 |    "metadata": {},
 55 |    "outputs": [],
 56 |    "source": [
 57 |     "extracted_data"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": null,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "len(extracted_data)"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from classifier.helper import write_json\n",
 76 |     "write_json('../data/intermediate/extracted_data.json', extracted_data)"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": []
 85 |   }
 86 |  ],
 87 |  "metadata": {
 88 |   "kernelspec": {
 89 |    "display_name": "Python 3",
 90 |    "language": "python",
 91 |    "name": "python3"
 92 |   },
 93 |   "language_info": {
 94 |    "codemirror_mode": {
 95 |     "name": "ipython",
 96 |     "version": 3
 97 |    },
 98 |    "file_extension": ".py",
 99 |    "mimetype": "text/x-python",
100 |    "name": "python",
101 |    "nbconvert_exporter": "python",
102 |    "pygments_lexer": "ipython3",
103 |    "version": "3.6.5"
104 |   }
105 |  },
106 |  "nbformat": 4,
107 |  "nbformat_minor": 2
108 | }
109 | 


--------------------------------------------------------------------------------
/resources/dummy/step4_convert_octals.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "markdown",
 5 |    "metadata": {},
 6 |    "source": [
 7 |     "# Dummy pipeline - step 4: convert octal Ascii code to character"
 8 |    ]
 9 |   },
10 |   {
11 |    "cell_type": "markdown",
12 |    "metadata": {},
13 |    "source": [
14 |     "This step convert an Ascii octal value to the corresponding character."
15 |    ]
16 |   },
17 |   {
18 |    "cell_type": "code",
19 |    "execution_count": null,
20 |    "metadata": {},
21 |    "outputs": [],
22 |    "source": [
23 |     "# Parameters\n",
24 |     "\"\"\"\n",
25 |     ":param str octal_data: path to octal data input file\n",
26 |     ":param str char_from_octal: path to converted data from octal output file\n",
27 |     ":dvc-in octal_data: ./dummy/data/octal_data.txt\n",
28 |     ":dvc-out char_from_octal : ./dummy/data/data_conv_from_octal.txt\n",
29 |     "\"\"\"\n",
30 |     "# Value of parameters for this Jupyter Notebook only\n",
31 |     "# the notebook is in ./dummy/pipeline/notebooks\n",
32 |     "octal_data = '../../data/octal_data.txt'\n",
33 |     "char_from_octal = '../../data/data_conv_from_octal.txt'"
34 |    ]
35 |   },
36 |   {
37 |    "cell_type": "code",
38 |    "execution_count": null,
39 |    "metadata": {},
40 |    "outputs": [],
41 |    "source": [
42 |     "with open(octal_data, 'r') as fd:\n",
43 |     "    data = fd.read()"
44 |    ]
45 |   },
46 |   {
47 |    "cell_type": "code",
48 |    "execution_count": null,
49 |    "metadata": {},
50 |    "outputs": [],
51 |    "source": [
52 |     "characters = [f\"{d.split('=')[0]}={chr(int(d.split('=')[1], 8))}\" for d in data.split()]"
53 |    ]
54 |   },
55 |   {
56 |    "cell_type": "code",
57 |    "execution_count": null,
58 |    "metadata": {},
59 |    "outputs": [],
60 |    "source": [
61 |     "with open(char_from_octal, 'w') as fd:\n",
62 |     "    fd.write(' '.join(characters))"
63 |    ]
64 |   },
65 |   {
66 |    "cell_type": "code",
67 |    "execution_count": null,
68 |    "metadata": {},
69 |    "outputs": [],
70 |    "source": [
71 |     "# No effect\n",
72 |     "print(characters)"
73 |    ]
74 |   }
75 |  ],
76 |  "metadata": {
77 |   "kernelspec": {
78 |    "display_name": "Python 3",
79 |    "language": "python",
80 |    "name": "python3"
81 |   },
82 |   "language_info": {
83 |    "codemirror_mode": {
84 |     "name": "ipython",
85 |     "version": 3
86 |    },
87 |    "file_extension": ".py",
88 |    "mimetype": "text/x-python",
89 |    "name": "python",
90 |    "nbconvert_exporter": "python",
91 |    "pygments_lexer": "ipython3",
92 |    "version": "3.6.5"
93 |   }
94 |  },
95 |  "nbformat": 4,
96 |  "nbformat_minor": 2
97 | }
98 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/configurables/preprocess_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "extracted_data_path = '../data/intermediate/extracted_data.json'\n",
 10 |     "preprocessed_data_path = '../data/intermediate/preprocessed_data.json'"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": null,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "import json\n",
 20 |     "with open(extracted_data_path) as fd:\n",
 21 |     "        extracted_data = json.load(fd)\n",
 22 |     "extracted_data"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "from collections import Counter\n",
 32 |     "nb_review_by_labels = Counter([d[0] for d in extracted_data])\n",
 33 |     "\n",
 34 |     "nb_review_by_labels.most_common()"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "from classifier.pre_process import preprocess_data\n",
 44 |     "\n",
 45 |     "preprocessed_data = preprocess_data(extracted_data)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "preprocessed_data"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "from collections import Counter\n",
 64 |     "nb_review_by_labels = Counter([d.split()[0] for d in preprocessed_data])\n",
 65 |     "\n",
 66 |     "nb_review_by_labels.most_common()"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "from classifier.helper import write_json\n",
 76 |     "write_json(preprocessed_data_path, preprocessed_data)"
 77 |    ]
 78 |   }
 79 |  ],
 80 |  "metadata": {
 81 |   "kernelspec": {
 82 |    "display_name": "Python 3",
 83 |    "language": "python",
 84 |    "name": "python3"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.6.7"
 97 |   }
 98 |  },
 99 |  "nbformat": 4,
100 |  "nbformat_minor": 2
101 | }
102 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/notebooks/preprocess_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "extracted_data_path = '../data/intermediate/extracted_data.json'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import json\n",
 19 |     "with open(extracted_data_path) as fd:\n",
 20 |     "        extracted_data = json.load(fd)\n",
 21 |     "extracted_data"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "from collections import Counter\n",
 31 |     "nb_review_by_labels = Counter([d[0] for d in extracted_data])\n",
 32 |     "\n",
 33 |     "nb_review_by_labels.most_common()"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from classifier.pre_process import preprocess_data\n",
 43 |     "\n",
 44 |     "preprocessed_data = preprocess_data(extracted_data)"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": null,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "preprocessed_data"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from collections import Counter\n",
 63 |     "nb_review_by_labels = Counter([d.split()[0] for d in preprocessed_data])\n",
 64 |     "\n",
 65 |     "nb_review_by_labels.most_common()"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "from classifier.helper import write_json\n",
 75 |     "write_json('../data/intermediate/preprocessed_data.json', preprocessed_data)\n"
 76 |    ]
 77 |   },
 78 |   {
 79 |    "cell_type": "code",
 80 |    "execution_count": null,
 81 |    "metadata": {},
 82 |    "outputs": [],
 83 |    "source": []
 84 |   }
 85 |  ],
 86 |  "metadata": {
 87 |   "kernelspec": {
 88 |    "display_name": "Python 3",
 89 |    "language": "python",
 90 |    "name": "python3"
 91 |   },
 92 |   "language_info": {
 93 |    "codemirror_mode": {
 94 |     "name": "ipython",
 95 |     "version": 3
 96 |    },
 97 |    "file_extension": ".py",
 98 |    "mimetype": "text/x-python",
 99 |    "name": "python",
100 |    "nbconvert_exporter": "python",
101 |    "pygments_lexer": "ipython3",
102 |    "version": "3.6.5"
103 |   }
104 |  },
105 |  "nbformat": 4,
106 |  "nbformat_minor": 2
107 | }
108 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/external/README.md:
--------------------------------------------------------------------------------
 1 | # External.js
 2 | By: Cal Evans <cal@calevans.com>
 3 | 
 4 | (c) 2015 [Evans Internet Construction Company, Inc.](http://eicc.com)
 5 | 
 6 | License: MIT
 7 | 
 8 | ## IMPORTANT NOTE ##
 9 | This project serves a very specific purpose and as such I don't usually take PRs or respond to requests for new features. You are welcome to fok it and make it your own. 
10 | 
11 | You can also check out [this version](https://github.com/janschoepke/reveal_external) whcich does seem to be mantained and the author seems to be open to PRs and responding to issues.
12 | 
13 | ## Readme.md ##
14 | This is a plugin for Reveal.js. It allows you to specifiy external files to be loaded into a presentation. I developed it for [Zend](http://zend.com) Training. It allows a course, which may be hundreds of slides, to be broken into modules and managed individually. This allows for a course Subject Matter Expert to be working on one module, while the designer is working on another. 
15 | 
16 | # Using external.js
17 | Using the plugin is easy. First, register it in your Reveal.initalize block.
18 | 
19 |     { src: 'plugin/external/external.js', condition: function() { return !!document.querySelector( '[data-external]' ); } },
20 | 
21 | Then simply add an element into your presentation with a data-external attribute.
22 | 
23 | 	<section data-external="module_01/index.html"> </section>
24 | 
25 | In my example, I load in all sections, so my main presentation looks like this.
26 | 
27 | 	<div class="reveal">
28 | 		<!-- Any section element inside of this container is displayed as a slide -->
29 | 
30 | 		<div class="slides">
31 | 			 <section data-external="module_01/index.html"> </section>
32 | 			 <section data-external="module_02/index.html"> </section>
33 | 		</div> <!-- slides -->
34 | 
35 | 	</div> <!-- Reveal -->
36 | 
37 | A sample of one of the files would look like this:
38 | 
39 | 	<section>
40 | 		<h2>This is a slide</h2>
41 | 		<ul>
42 | 			<li>Point 1</li>
43 | 			<li>Point 2</li>
44 | 			<li>Point 3</li>
45 | 		</ul>
46 | 
47 | 		<aside class="notes">
48 | 			These are speaker notes
49 | 		</aside>
50 | 	<section>
51 | 
52 | 	<section>
53 | 		<h2>This is a second slide</h2>
54 | 		<p>Just to show that you can load multiple slides at a time, this is a second slide.</p>
55 | 	</section>
56 | 
57 | This makes each include file its own sub-module that can be navigated
58 | by the up and down cursor keys as well as the space bar,  but modules can be switched by using
59 | left and right.
60 | 
61 | You can of course do it differently. You can also still do sub sections for slides within a separate file. Anything that can normally be done in reveal.js, can be done inside of an externally loaded file.
62 | 
63 | # Version
64 | - 1.0.0 Initial Release
65 | 
66 | # Mantainer
67 | [Cal Evans](https://blog.calevans.com) <cal@calevans.com>
68 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/notebooks/train_data_model.ipynb:
--------------------------------------------------------------------------------
 1 | {
 2 |  "cells": [
 3 |   {
 4 |    "cell_type": "code",
 5 |    "execution_count": null,
 6 |    "metadata": {},
 7 |    "outputs": [],
 8 |    "source": [
 9 |     "train_dataset_path = '../data/intermediate/train_dataset.txt'\n",
10 |     "conf_path = '../data/input/conf.json'"
11 |    ]
12 |   },
13 |   {
14 |    "cell_type": "code",
15 |    "execution_count": null,
16 |    "metadata": {},
17 |    "outputs": [],
18 |    "source": [
19 |     "with open(train_dataset_path, 'r') as fd:\n",
20 |     "        train_data_lines = fd.readlines()\n",
21 |     "train_data_lines"
22 |    ]
23 |   },
24 |   {
25 |    "cell_type": "code",
26 |    "execution_count": null,
27 |    "metadata": {},
28 |    "outputs": [],
29 |    "source": [
30 |     "import json\n",
31 |     "with open(conf_path, 'r') as fd:\n",
32 |     "    conf = json.load(fd)"
33 |    ]
34 |   },
35 |   {
36 |    "cell_type": "code",
37 |    "execution_count": null,
38 |    "metadata": {},
39 |    "outputs": [],
40 |    "source": [
41 |     "import fasttext as ft\n",
42 |     "from tempfile import TemporaryDirectory\n",
43 |     "import shutil\n",
44 |     "from os import remove, makedirs\n",
45 |     "from os.path import join, exists, dirname\n",
46 |     "def train(fasttext_data_path: str, fasttext_model_path: str, epochs: int, learning_rate: float):\n",
47 |     "    with TemporaryDirectory() as tmp_dir:\n",
48 |     "        # Fasttext automatically add .bin at the end of the output model file name so\n",
49 |     "        # we use a temporary file to keep control on output file path\n",
50 |     "        model_tmp_path = join(tmp_dir, 'model')\n",
51 |     "        ft.supervised(fasttext_data_path, model_tmp_path, lr=learning_rate, epoch=epochs, silent=0)\n",
52 |     "        if exists(fasttext_model_path):\n",
53 |     "            remove(fasttext_model_path)\n",
54 |     "        makedirs(dirname(fasttext_model_path), exist_ok=True)\n",
55 |     "        shutil.copy(f'{model_tmp_path}.bin', fasttext_model_path)\n",
56 |     "\n"
57 |    ]
58 |   },
59 |   {
60 |    "cell_type": "code",
61 |    "execution_count": null,
62 |    "metadata": {},
63 |    "outputs": [],
64 |    "source": [
65 |     "train(train_dataset_path, '../data/model/classifier.bin', epochs=conf['epoch'],\n",
66 |     "      learning_rate=conf['learning_rate'])"
67 |    ]
68 |   },
69 |   {
70 |    "cell_type": "code",
71 |    "execution_count": null,
72 |    "metadata": {},
73 |    "outputs": [],
74 |    "source": []
75 |   }
76 |  ],
77 |  "metadata": {
78 |   "kernelspec": {
79 |    "display_name": "Python 3",
80 |    "language": "python",
81 |    "name": "python3"
82 |   },
83 |   "language_info": {
84 |    "codemirror_mode": {
85 |     "name": "ipython",
86 |     "version": 3
87 |    },
88 |    "file_extension": ".py",
89 |    "mimetype": "text/x-python",
90 |    "name": "python",
91 |    "nbconvert_exporter": "python",
92 |    "pygments_lexer": "ipython3",
93 |    "version": "3.6.5"
94 |   }
95 |  },
96 |  "nbformat": 4,
97 |  "nbformat_minor": 2
98 | }
99 | 


--------------------------------------------------------------------------------
/resources/dummy/step1_sanitize_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dummy pipeline - step 1: sanitize data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This step extracts a text from an input file then remove not supported characters."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Parameters\n",
 24 |     "\"\"\"\n",
 25 |     ":param str input_data: path to input file\n",
 26 |     ":param str sanitized_data: path to the output file\n",
 27 |     ":dvc-in input_data: ./dummy/data/dummy_pipeline_feed.txt\n",
 28 |     ":dvc-out sanitized_data : ./dummy/data/sanitized_data.txt\n",
 29 |     "\"\"\"\n",
 30 |     "# Value of parameters for this Jupyter Notebook only\n",
 31 |     "# the notebook is in ./dummy/pipeline/notebooks\n",
 32 |     "input_data = '../../data/dummy_pipeline_feed.txt'\n",
 33 |     "sanitized_data = '../../data/sanitized_data.txt'"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "with open(input_data, 'r') as fd:\n",
 43 |     "    data = fd.read().strip('\\n')"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": null,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "import re\n",
 53 |     "data = re.sub('[^0-9=\\s]', '', data)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "with open(sanitized_data, 'w') as fd:\n",
 63 |     "    fd.write(data)"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "# No effect \n",
 73 |     "\"\"\"\n",
 74 |     "Cells with \"# No effect\" comment will be ignore for the Python 3 script generation.\n",
 75 |     "They are used to see intermediate results only in notebooks\n",
 76 |     "\"\"\"\n",
 77 |     "# check result\n",
 78 |     "print(data)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": []
 87 |   }
 88 |  ],
 89 |  "metadata": {
 90 |   "kernelspec": {
 91 |    "display_name": "Python 3",
 92 |    "language": "python",
 93 |    "name": "python3"
 94 |   },
 95 |   "language_info": {
 96 |    "codemirror_mode": {
 97 |     "name": "ipython",
 98 |     "version": 3
 99 |    },
100 |    "file_extension": ".py",
101 |    "mimetype": "text/x-python",
102 |    "name": "python",
103 |    "nbconvert_exporter": "python",
104 |    "pygments_lexer": "ipython3",
105 |    "version": "3.6.5"
106 |   }
107 |  },
108 |  "nbformat": 4,
109 |  "nbformat_minor": 2
110 | }
111 | 


--------------------------------------------------------------------------------
/resources/dummy/dummy_pipeline_feed_2.txt:
--------------------------------------------------------------------------------
1 | 14=1A#62- L1G1_8=110A0011 #o111t=11001#01 re74-=GA1100G%1t*+10( Rm#@(2t%F9=G)*154R @ 77=01000_0GG0 i*ps-64=Rt1A10-01)01 (um4@(@4=11t0*FG0101  dot1(2=)*011F+01t)1)1(1@(A +l138@=*111t0-011 orA95=11R%G1(0_0#1A1F  sF*i41=0-t%t41_ tt )0=)01+Gt00*)10AR0%(1 am5tR#7=011@1010(_R R#e-t32=t1#A6AF0 ,t6F9G=t1(1R)0_F10F1R1A@*  F48R-=11G1010_0 cGo1)07=110%A1+1GFR%1+1 n1-@28%RR=_A110)1@1@11* t(RsF-ec1At14=11)0-1)1%00* @tet35=01*G10++010#1#) u1=0R@1*-110FF100 )rF# 37=)t_%)15A1 @%ad99=(1(@1)t0R01+11 ip7=A011t00%%00G#1 i11(7G=(11+A*011F11 scti1-*3t5=11011%11t %ng1%36*#=11011#11 GG( %el)8)=A0111*001t1 (i9G0)=t010#0000-A tR-)(F._* 11_5*-=+110010t1 tN9=0111100t1) un21=F01A@1%000@)1_1 )c93=#t11101*00  t%nu+1)8=0%+#1#A101#-#R@11*G1*( Rnt_F++c 124=11)R00AF101F lac10)F%6@=11-00011 u%62=(11%t+0*0@)G001 Gs,1A09=R)0R1R01+1G1A1%_(- (@ lo1+F01=111_0100 bo#)rA*8*_)(2=AR#1R#F1(G1#(0F0%_1-##0 ti1#03=1110101 s38tt=15)6*  1F2G1=_1GAA10*11)0*0* t)nRF()on@_ 2-*4=141 orAci65*=_01(0_0000 ( qui3(6%=R15+4 As(, 5@5=1100%1R1-R1 @heF-nd1R@1(3=11100F0%t(+0 (reR*%r8t@3=1101%00-1 *it d_2)3=04t0 Rtt)i)g#n*+-45=110001%%%1 i-%119)+=010(1A11-R1 s(*F%sit_8+0R@#+@=110%)1-1R@10 Am nF_87+=110G111*1R)F e5t8@=0A(100%00R0 que%._5A@%1=11)00t101%R  I)nt_)54@=11000+01 e7+G9=1-101111- gAerR(2)F7=1A50F ) n@o46=A1110(A__0G10) n91=110#@100G0  )n+F#iA126=111R001A1 F-_-Gs-A+%*)l22A=14_F*5 *- -125R=111-0-010#F +non 5=001000@00GtG#_% _risu@8@-9(=11(t10011 _s)_#R +Gu()-l_85*=%@R)F#11F10101 R-tr+(i6*3=+1G)11t0010A cie5(3=1_F11G+0011#) sG (#@d%i%A4=*163@ gnGi+s10_0t=11A01GF001 %s+_)Rim.56=%1100101  73=@*0+@100000_ S+#e6@*0*=1@1*+F)0010(t1(_ d@ l@11@G0=1RG110000 o19(=0-*t11_@F0-0R1F0@RG0G bF1(08=1101_101(G o132G=110G0111 r@tiG7+#0#=1A1-0-10R0@)__1 Fs 129=(-1101110 +satG*pi4%7(=1_1F00101 e-n *ut(25(=04+0RR %t* n16*=1R#60 i97=01-01111% s4(1=G0F41- i)t %96=01+110R10 veFn1AR37+*=%1G@101%A%1_t00 eGna@ti%-33=1%*(5)1 sR 81@=1G110100 au102F*-=110100(0 R#c)tA*A%tor92=1+11))0FR1FR+R00 t. AR94=1R110%%)0%0_F-0 Vets@1G-0t@F=040 G#@tA+i+bFul6=01Gt1+00101 um# 1+20=1_1G0@1101 tpo*+122=G010#A110#)*#1_ rtt7F8*t=1F1A000G1F1 t10t4*(=110001#0@R itAo#RGr40=#A040 )G aRli+qF71*=11%0111+F0 u@@aFm+ 31=04R0 eRn_Aim30-=G01100101-F  13=0#4G0 se1)33G*=+01_t011t+t%01 m-pt72-=11G0A0111%_ er 13)4-)*=111010G0 @t*20=01110101 e_mp-u(8(6=11#)1#0100 sF. E3R=151 *tiaAFmt2%6#t=167  98+=010G)A111-G1) vi#t(ae a105G=++010F1#@Ft11@0% Auct_tor eG+6#7@+)-=_-F11t01111 RAros1-_27=11_01(*#0F0F1 . #5F2-%_=%1RA11001(1%*@ FPrae*11=01110100% s(#e@7+5+@=F110@1111A ntA A3_9#=14t(G@5 #)ac -alF-iAq28=--01101111 -uett a(-G68=@#t1#10tR11_R(11t u#+guA%te*, %_qu8(4=110_00+1GF0_ its# a+GcGc+um131=_1_F1011*10 #(san )+130=AA-R11_#(+0##1001( @__n*ib+h.49F=_0100000  @@RU-#t2(=-04A#(A0 F Fdign_@i--ssA##34=-16@0(A im +augRue (116-+=%*110010G0)+ vesti*t-G+_bulu88+R=%111001+@0* m t+()_@emp15=)14%5 oRr f_erAm%Gen1(23-=RA@*1%11011-)0% tu*m.AG F)M(50=110_1_10@1% aR-RecGe112#A=11##F01111G Anas+* %pAort6(1#A_(#*=01000_@%00G@ GtitR+or# nu43=1#0+%100R11 ll5*9=1110*1)11_ AFa n_o_n##G -Ghe_)A%ndre7_6(+=1-R1t#_10%0*10R ri42=0-1@0#0*00_0) t e*#_-)letif-Ge+n%dG.*A+@ RF1t7=162t C)u*-rab6(6*(t@t=11+011*00FA i(t(ur auc@tor% p)h+a_ret%ra FcRo(nsecteFtAu+r.


--------------------------------------------------------------------------------
/resources/setup_project/solution/mlvtools/evaluate_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "\"\"\"\n",
 10 |     ":param str model_path: Path to the model input file\n",
 11 |     ":param str dataset_path: Path to the dataset input file\n",
 12 |     ":param str metrics_path: Path to the result metrics output file\n",
 13 |     "\n",
 14 |     ":dvc-in model_path: ./data/model/classifier.bin\n",
 15 |     ":dvc-in dataset_path: ./data/intermediate/test_dataset.txt\n",
 16 |     ":dvc-out metrics_path: ./data/result/metrics_test.txt\n",
 17 |     "\"\"\"\n",
 18 |     "# Following code in this cell will not be add in the generated Python script\n",
 19 |     "# They are values only for notebook purpose\n",
 20 |     "model_path = '../data/model/classifier.bin'\n",
 21 |     "dataset_path = '../data/intermediate/test_dataset.txt'\n",
 22 |     "metrics_path = '../data/result/metrics_test.txt'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# No effect\n",
 32 |     "with open(dataset_path, 'r') as fd:\n",
 33 |     "        test_data_lines = fd.readlines()\n",
 34 |     "test_data_lines"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "import fasttext as ft\n",
 44 |     "\n",
 45 |     "model = ft.load_model(model_path)\n",
 46 |     "result = model.test(dataset_path)"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "metrics = [\n",
 56 |     "    f'Precision@1: {result.precision}',\n",
 57 |     "    f'Recall@1: {result.recall}',\n",
 58 |     "    f'Nb review: {result.nexamples}'\n",
 59 |     "]"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": null,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "# No effect\n",
 69 |     "for line in metrics:\n",
 70 |     "    print(line)"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "from classifier.helper import write_lines_file\n",
 80 |     "\n",
 81 |     "write_lines_file(metrics_path, metrics)"
 82 |    ]
 83 |   }
 84 |  ],
 85 |  "metadata": {
 86 |   "kernelspec": {
 87 |    "display_name": "Python 3",
 88 |    "language": "python",
 89 |    "name": "python3"
 90 |   },
 91 |   "language_info": {
 92 |    "codemirror_mode": {
 93 |     "name": "ipython",
 94 |     "version": 3
 95 |    },
 96 |    "file_extension": ".py",
 97 |    "mimetype": "text/x-python",
 98 |    "name": "python",
 99 |    "nbconvert_exporter": "python",
100 |    "pygments_lexer": "ipython3",
101 |    "version": "3.6.7"
102 |   }
103 |  },
104 |  "nbformat": 4,
105 |  "nbformat_minor": 2
106 | }
107 | 


--------------------------------------------------------------------------------
/talks/pyData/draft.md:
--------------------------------------------------------------------------------
 1 | Axé la conf sur l'opposition des mondes surtout vers le debut. et rsultat on se rassemble à la fin.
 2 | 
 3 | Overview:
 4 | 
 5 | - Presentation
 6 |     - Sarah : [petit resumé] + Accroche sur les technos => J'utilise des jupyter notebook
 7 |     et j'ai besoin de ...pouvoir reproduire facilement, garder de la souplesse, ...
 8 |     
 9 |     - Stephanie: [petit résumé] + Accroche Automatisation, Livaison, Tests
10 |        J'ai besoin de... un truc qui se lance facilement, qui se package, qui soit reproductible
11 |        sur n'importe quel environnement
12 |        
13 |        
14 | - Why => Notre histoire: en gros un titre stylé pour dire le portage du 
15 | poc (multi jupyter executables sur 1 machine) vers la prod (enfin au moins le step d'"industrialisation" du projet)
16 | en mixant monde dev et data scientist
17 | 
18 |   - long et sinueu chemin du poc vers la prod ... à la  croisée des deux mondes
19 |   
20 |   - POC vs PROD ... vs Data scientist vs Software Developer
21 |   
22 | 
23 | - The POC:
24 | 
25 |     - set of notebooks, some data, name versioning, specific server/user
26 |     [Show a repo overview]
27 | 
28 | - Step 1: express our needs
29 | 
30 |     - Automation/Scripting (first step)
31 |         
32 |             ML side : keep using jupyter notebook
33 |             Dev side: be able to easily run the tool and version a standardized format under git
34 |                       tests, CI
35 | 
36 |     - Reproducibility/Pipelining/Versioning
37 |     
38 |         => Going further in the automation process
39 |         => No loss, be more confident 
40 |         => Easily perform experiments
41 |         => Handle data sharing 
42 |         
43 |         
44 |         ML side: be able to experiment, avoid to reproduce time consuming steps, keep tracking data
45 |         share with the team. organistation (no more inconstent reference on name versioned notebooks and execution order
46 |         and dependencies)
47 |         
48 |         
49 |         Dev side: be able to reproduce any configuration (data + hyperparam + code) on any server
50 |         keep tracking the state of the art pipeline for further delivery. be ble to handle client specificities
51 |    
52 |    
53 | [Schema représentant besoins]     
54 |         
55 | - Step 2: Organisation start: we need python scripts from jupyter notebooks
56 | 
57 |     - Existing solutions:  nb convert
58 |     
59 |     - Issues: not parametrized and no effect cells
60 |     
61 |     - MLV-tools: ipynb_to_python
62 |     
63 |     
64 | - Step 3: We need to handle data versioning and pipelining
65 | 
66 |     - Existing solution: git lfs => data ok, pipelining nok
67 |     - Existing solution: dvc => data ok, pipelining ok BUT... [ not easy to use and based on bash cmd
68 |     mais bonne nouvelle on a deja des scripts]
69 |         - example DVC
70 |         - montrer pkoi c'est relou
71 |         
72 |     - MLV-tools: from jupyter notebook to a pipeline step
73 |     
74 |     
75 |     
76 | - REX
77 | 
78 |     => souplesse expérimentation commerzbank
79 |     => perte de données
80 |     
81 | 
82 |         
83 |     
84 |     
85 | 
86 |     


--------------------------------------------------------------------------------
/resources/setup_project/solution/mlvtools/extract_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "\"\"\"\n",
 10 |     ":param str reviews_path: Path to the reviews JSON input file\n",
 11 |     ":param str extracted_data_path: Path to the extracted data output file\n",
 12 |     " \n",
 13 |     ":dvc-in reviews_path: ./data/input/trip_advisor.json\n",
 14 |     ":dvc-out extracted_data_path: ./data/intermediate/extracted_data.json\n",
 15 |     "\"\"\"\n",
 16 |     "# Following code in this cell will not be add in the generated Python script\n",
 17 |     "# They are values only for notebook purpose\n",
 18 |     "reviews_path = '../data/input/trip_advisor.json'\n",
 19 |     "extracted_data_path = '../data/intermediate/extracted_data.json'"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "# No effect\n",
 29 |     "import json\n",
 30 |     "with open(reviews_path) as fd:\n",
 31 |     "    data = json.load(fd)\n",
 32 |     "data"
 33 |    ]
 34 |   },
 35 |   {
 36 |    "cell_type": "code",
 37 |    "execution_count": null,
 38 |    "metadata": {},
 39 |    "outputs": [],
 40 |    "source": [
 41 |     "# No effect\n",
 42 |     "len(data)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "from classifier.extract import extract_data_from_inputs"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "extracted_data = extract_data_from_inputs(reviews_path)"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "# No effect\n",
 70 |     "extracted_data"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "# No effect\n",
 80 |     "len(extracted_data)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "from classifier.helper import write_json\n",
 90 |     "write_json(extracted_data_path, extracted_data)"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.6.7"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 2
115 | }
116 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/configurables/train_data_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "train_dataset_path = '../data/intermediate/train_dataset.txt'\n",
 10 |     "conf_path = '../data/input/conf.json'\n",
 11 |     "model_path = '../data/model/classifier.bin'"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": null,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "with open(train_dataset_path, 'r') as fd:\n",
 21 |     "        train_data_lines = fd.readlines()\n",
 22 |     "train_data_lines"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "import json\n",
 32 |     "with open(conf_path, 'r') as fd:\n",
 33 |     "    conf = json.load(fd)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "conf"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "import fasttext as ft\n",
 52 |     "from tempfile import TemporaryDirectory\n",
 53 |     "import shutil\n",
 54 |     "from os import remove, makedirs\n",
 55 |     "from os.path import join, exists, dirname\n",
 56 |     "def train(fasttext_data_path: str, fasttext_model_path: str, epochs: int, learning_rate: float):\n",
 57 |     "    with TemporaryDirectory() as tmp_dir:\n",
 58 |     "        # Fasttext automatically add .bin at the end of the output model file name so\n",
 59 |     "        # we use a temporary file to keep control on output file path\n",
 60 |     "        model_tmp_path = join(tmp_dir, 'model')\n",
 61 |     "        ft.supervised(fasttext_data_path, model_tmp_path, lr=learning_rate, epoch=epochs, silent=0)\n",
 62 |     "        if exists(fasttext_model_path):\n",
 63 |     "            remove(fasttext_model_path)\n",
 64 |     "        makedirs(dirname(fasttext_model_path), exist_ok=True)\n",
 65 |     "        shutil.copy(f'{model_tmp_path}.bin', fasttext_model_path)\n",
 66 |     "\n"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": null,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "train(train_dataset_path, model_path, \n",
 76 |     "      epochs=conf['epoch'], learning_rate=conf['learning_rate'])"
 77 |    ]
 78 |   }
 79 |  ],
 80 |  "metadata": {
 81 |   "kernelspec": {
 82 |    "display_name": "Python 3",
 83 |    "language": "python",
 84 |    "name": "python3"
 85 |   },
 86 |   "language_info": {
 87 |    "codemirror_mode": {
 88 |     "name": "ipython",
 89 |     "version": 3
 90 |    },
 91 |    "file_extension": ".py",
 92 |    "mimetype": "text/x-python",
 93 |    "name": "python",
 94 |    "nbconvert_exporter": "python",
 95 |    "pygments_lexer": "ipython3",
 96 |    "version": "3.6.7"
 97 |   }
 98 |  },
 99 |  "nbformat": 4,
100 |  "nbformat_minor": 2
101 | }
102 | 


--------------------------------------------------------------------------------
/resources/dummy/dummy_pipeline_feed.txt:
--------------------------------------------------------------------------------
1 | L1tA_26=1110100% o32=171 r1G0_5-+=1#100111 e)F@)139=1%100011 )(m 58G=0R0100_0**00 i1A54=G1R)1t0_1100 p(s76#=10F10011 %Rum8FF5((=1RF1100@A11  8A1=11+R10+100 d1F0R4=110_111)(t0 o84=1A_100101G lo3=0R0100+%00@A0 @r A64(=01+1+10G-t#-0@0t0 s11A4F=1%11*0t1%-0FR0 iG53=156 -t 2G7=00F100000 a16=1*45 m12=#1t64 e1A#66=0A*10)AR#1+*101# t,46=@0F*111-0%@111F  Rc#93=1100101 on2%6=163t s*et2@8=1GG+44 ct112F=-11_0111)A1+ et33+=040@t ur-1@08R=110_G1*11G1  128=1110011 aAd80)(=110010)1 Ri)p24=011)#-010@0)0 +is-1t7=0110_1110 _cA19=040- inA71=00%*@100FG*0(0RR0) -g 97=-1%1-@00+10G(1 e@(li60=157# _t. 125=1110@1R_0A0 N1_6tF1G=1(101111A unc159%_=(1t1100R%%+G@1A1 _ _n+u()R1*41=A(1101101 nct 91_=@0100A000 la21=011%(001#1%0_ c(-us6=01100@0)0)GF1 ,+1%7@1t=F1_1-t10R0t1-1 @ %l(4*8=G0R(11A#101(AA11 o4)=162 boRr70+=t)R1_45 ti8=1@50 s10)1_=110111#1R  #n*o79+=1)1A100*1@0A n o9=011001@*0F1t G+r160-=11-0G1G00)1 @ci 31=0(110F@)1101 qu94G=0+1#00000- i@s-t,13_4@@=1110100R  FG+he10=G0t1-t100A100F n69=@01100)0)11 d1%*=157G re106=01000_00 rit5)1=143  d50=04@G0 tFi@gA156=%t1110110 nAi49=*1G4F5 ssi43=00100+0@00 m_ 142=A+*0101111 ne*1%58=1F1)10%0(1A0 @#queA14A8=1100101 .30=1R+--55 + IR95=1)#1*0000R1A_- nt1A13+G=1()%101+1)-10(tR e(g)e92#=A11@10111 r) 15=040 +t%non1A6*2=11011%GtAAA@10  tn1-5)1=11000G%11@ i#s-18=14t4#+% (l34GG#=01G11000(0  42=0)5%F4 no@1+1(8=111t010@)1 n 122@=1110011 r87=11(00001 i129At=011#1*010 s((23=+F011(1010(0 u-117=11t00010 #s 170=#11%01100* uRl1(@36-*=1110@_101 t#r55=tG011_10#100 ic#i*111=1+1000t11 e)FA1)R#16=11010F01 F*s54@)))=G00100000  Gd37=1@45( +Aig1+3F_@2=11-0-0A11@1 n-56=162( R+-iss+8%9=110G010((1 im.tA1t49G=G110010F0  5=1(45F Se121=1%110t010 d77=#11_001R0+1 + tlo13(3=11t0t1001 b(65=011+#*10t01A0 orFt164=11A011_1@0 i2+5)=1GF51_ #)s123=AR0-100)%F__)%000  sa57=#)*0(1+11(1)0%_01A p59=t-01#110100_ i_en #82#=0_R1000_%00 u%t15%5=01#01101  nis145(=1101)11#1 i#%68=165  90=*011101*0 -#ve-(nA%62=011t10010 ena1+35=1tG1Rt01%00#0 tis 1#37=1_)100R010 auGc#7(4=041 tor16*3*=1(101001 . V14#4=@1100)10(1 +e1)10G=0F100000 sAtiA102(=110+%1011 b1#0-0=t110_1*%_+1R11 ttulAu8t8t=1100111 m po3F%*8=(154 rtti45=157 G*At169%=-_1)101111 or131%=R010)11#G11  ali73=01110G100 qu+aRmRF7=143  e61#=040 -ni86=1110011 m s98=0100000%* GemFpe*1t#4G3@*=11_10000 #_FrF29)_=0A111010_1  153=11011G0R1GG ttem%p_-147=1101t100 u_12R4(=1101*GF000 s(.F#107_=110A0110 ( Et)ia@67+=t0110+0100 m v146t=F111%00)R00+ @FitF%a0=0*10*110_01 FAe_ 13=01(10100G0 a1G38=+0101110 uc*109=++-111--R0010# A+torA#11=0010*00GtF*00  eros120F=11-0_1111 @. @)Pr(a7#-F8=+11_A00011 esFtFen140=11(01R111 t#A Ra168@-=110_(11G11( c ali*%5#2*=01@1000#01 @que4F0(=01-10@_+R1%110 F%t Ftau1#19=1110100 )gu(e@,4#4=1R*%%56 _#+ 157G=1100101 qu-i@16-5=@110011R1 s a+t9)6*=11100_G10 ccu*msa*41R=01R10)AG01R01 n Rnib130=0+10+_)1111* h2((%0=01101111 . 39=t15%1 U-t 75=)t0100000 diRgni1*03=%1101001* ss7F2#AG+=_1-51 +imGG aRug3%+5=F)%15R1 ue ve1(4%=01)1001R0@1% Gst@8_R-_3=1101101 ibu7AR4=0-41- -lum- *t%emG*##+pt*oG2*-2F=00F1G00000_ r *ferFm2(=165 @)e66=0R+1101+@)G%1_F_+11 nAtu@Fm. 127=11100A00 Maec1)52=0R1011_11 e%nas@ pRRor4)A7=04FR(0# ttito-r nu*+*@lla99=1R1011)00t%  nFon# 150=1101+%@11*__1 h#%etndr*Ge-r1%AF67=1F11010F#*0 iA36(t=16-+0 t e*63=1_45 leifen*d.*R Cur(##abiturR#1*15=11100(-1-t0t  Fat-u)ctFor phareRtr%a c(o-n-@-sectGRetur.


--------------------------------------------------------------------------------
/resources/dummy/dummy_pipeline_feed_3.txt:
--------------------------------------------------------------------------------
1 | Lo@26=_*#143#+( r)eFm2+7=0110%A*0101R  (%%i#A%p94#FA=11+0F1t0A0)AA1 sum14_4=%A0F%+*10GFtFA11%0R1  do1GA10=1(_10_0+@1+A11) l57=_11_1+00R1-GA0 o98=11A_0#)@1_1*1*1 r100=1#1+10G0+1(1 A Rts%i+R%13=*01A110F0(1+1 t a1*1=0%@1_1-0*0-10@1 me1)+12=1t11010G0 t,_# R75+=G1-#10F0(@101 Ac51=G04@1 ons97=#1At*A110@100R- ect81=1)1(@01001@ e3-6=0_%5t-5 %tAur3=*15@1* (* a#-121=%1#+11)00**00 FG@Atdti9R2t=F_111+A*0-(100 p+127(tG=11001@0FA0 iscA109=01-01111 %i28=#+04@0 )n1#)02=1%G10100tF#0 g_ -e%_53=01G0@0FR(F000-A li1t13=1F1010-0(_0 t.t 9)1=1t10%1R110 Nu**%80=t)1F1@A01R0@11# Gn74-A=11*10)010% cGR (0#=A@1%(11A )n(_un7*(9=_G11+*01#111 (c68(=(0111010G  la1@37_=1R11-0@0-11 c52A=+0%010+0(001 _#Rus5F9=t*1#1F*10100 ,3(_2@(=-)G04(0*+)  l-+oG47=-F01-10*1(1F11 *b30F=%0_010_@00F00R tor1R40=@1%1R0tG1_1R10 *t46=134 _i11F1=F#1101001 s 8G@)5*=-*%+110()0#)%1)1#0 n%138=R110_10*01 Ron41R%)%+=1F54  A*63=)1(110011 or125=%1101100tt c#i14=17t1 tR+ +qu14)5=1-*@11010*0 @i3#_5(=14@2t )s, 3#9=0R11%G%*_@10%000%G R+hen*17R@=157 dreR66G=1100%111 rG12)_GA3=110@111AG1 itF1--15=G110001G%F0 ) GG(d+i(+133=0101+F1%01F)( g119=11#0#1t101A nR#A%iRsR6+G4=11t#@%1+(0011F *Rs(%A#it@m83=+1%1%00111  %_R3+#3=R+A0GF1G110-(01F1 (A+neG10t3=111F0(100R qu4_+0F=1#45 e.8A8=-010%+F@*0R000A  I@89-@=-11#-000At1)1 Rnt7=R#011011-00 ege10-4_tt=111A)*G0G1@t00 r( n#71=11+00101 o+%n 4=%163% F*_nis+6+0=01)00_0%00 A-R_l# 1-22=110010)1t n-o1-24=1t1F10(FF000 #n%% A1%31=1t1011t0A*G1 r-is1(--2=01100G0F#01G u(87_=R1#(1G10+F0+10 (s 126=_1100A1(G01 _ultA38=0110+1001 rGi-ci2G4+=0G1)1@_0tR0100R esA134R=t1*1#1t01)10R)  10=00)100000 @digR7-8=11-01@1__1(1) R)nis90=1F%t101)1*1R1)% s@A@R*im+.5G2@=%0t41-@RAt )#@ Sed6(2=(110A01-01G)-  lo10FA*A7=Gt0-11101+t0 borGt76))R=0#10+000#0 R)iA@s29=01R-100A_00t1A*+*  s4t(9=F04*0 ap_1-3t6_=t#1-11G0*010 %_ien %F8(*2=1A10*1(1#t(10) @Aut 61+=_@t%1t10#1101 n4A#(3=0@(1#101)1t10(# iR)s14AF9=-1110+011 itF12-R##t0@G=0101@1_1+1 % Aven@117F=**-11t0001#*1 )Aen+t101=0#10-0000#-G a(@%t+i1FG35+=1100101 F#s a__58=110_01*%F01-- uc*to-#77t=%1101100 r1-14-=R111010G1# F.70F)=(111+0F111@F  #V1)42=1G1+01tR110 es))t*116*R=*0(@RA1t0#t11%1#0 i-Rbu++l130+=)+t()01*0R1)1GG11 Fu#m#96=-+11%)10101(  p73=1_1000R01 ort%#Ft1+A4+@F7)A=11%A0111*R1 Gito21=F0_111@0000t% r- (a6t9=0100-_0t00 liq19=(A(-162 Aua*@#m@ 31=0F1*100(00A1+F e(n%56=11R*00011 imG+ t+s+tte13-G9=G1101G)1R11 tm42)=0#t110_10A01 pGer G4)F8=0010F1111@ t*e+mp128=11011R1@G1 %us.%1(18=110%11R)1%@1+@(  FEt5)=00#R1)0)0000 iam@65-=_F1t10_@0001A  v%G_+*i+72-=t_010_0000- )ta54=@)10100+11 *)eR6R7=110%01AG01 * 25@=01)1_@+*10101# a1=1A_(64 uctor @10A-8=01@-)0111*1 e(r9@9=11100*)-%(*10 #o_s_1%-41F=11R%0(1@001 ._+ PFR@55#G=1100101 raG6=1G41 es_2F**0=0R11@0G)01F01 Rent%2_2=0#11-10RF*)01*R0t-_F F 23(=0110(@11)11 *ac aF%%@li3F4=-01)F1_10F101_ q#ueA%t# 1GA_At8=@001+G@*00000 aF_ugue45(@++)=040RR , quisR a9*=157F% t_A(#G%#c-cu@+ms_(%a)n1Rt5=040 _ nib_h.) U132=11*G0110R0- t dGigniF44F=1G45 ss)Fi#m 14@(8R=#G1*1tt@0110(Gt0 a2=001A0000_*t0 RRAugue# Fv(te)10A6=1110_011+ sAtiG(129=11+0%0G011 bRulu_m temttp8=-163 or *93=111+0#-01_0*)+# #%f(ermeGtn1R0#A*A-5=#1R(1-@100R0_0 _t+_Fum-. *-MRG1(6RF)=011+t(#10100G aece(%@nG-a+As tApo(rt5F0t)=+0RF(4F1 ti#to@-r nAul_l)84*=01000A)%0-0 a Rno14*3#=1100111 n% (F-heGnGd@(reF(ri#*#t1)G46)=@1A10__1*G@111GAA  e*leGifend.37=160 @ Cutr8t6+=-1+101(111-_ aAbitur ta_#)uct%o(r -p_h@aretRra c*9)5=@)G1)10#_001t0_# Fonse%t)_ct%(etu-r.


--------------------------------------------------------------------------------
/resources/setup_project/solution/configurables/split_dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "preprocessed_data_path = '../data/intermediate/preprocessed_data.json'\n",
 10 |     "train_dataset_path = '../data/intermediate/train_dataset.txt'\n",
 11 |     "test_dataset_path = '../data/intermediate/test_dataset.txt'\n",
 12 |     "test_percent = 0.15"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "import json\n",
 22 |     "with open(preprocessed_data_path, 'r') as fd:\n",
 23 |     "        preprocessed_data = json.load(fd)\n",
 24 |     "preprocessed_data"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": null,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "len(preprocessed_data)"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "from classifier.split import split_dataset\n",
 43 |     "\n",
 44 |     "\n",
 45 |     "test_dataset, train_dataset = split_dataset(preprocessed_data, test_percent)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "len(test_dataset), len(train_dataset)"
 55 |    ]
 56 |   },
 57 |   {
 58 |    "cell_type": "code",
 59 |    "execution_count": null,
 60 |    "metadata": {},
 61 |    "outputs": [],
 62 |    "source": [
 63 |     "test_dataset"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": null,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "from collections import Counter\n",
 73 |     "test_review_by_labels = Counter([d.split()[0] for d in test_dataset])\n",
 74 |     "train_review_by_labels = Counter([d.split()[0] for d in train_dataset])\n",
 75 |     "\n",
 76 |     "test_review_by_labels.most_common()"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": null,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "train_review_by_labels.most_common()"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": null,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "from classifier.helper import write_lines_file\n",
 95 |     "\n",
 96 |     "write_lines_file(train_dataset_path, train_dataset)\n",
 97 |     "write_lines_file(test_dataset_path, test_dataset)\n"
 98 |    ]
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "Python 3",
104 |    "language": "python",
105 |    "name": "python3"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": {
109 |     "name": "ipython",
110 |     "version": 3
111 |    },
112 |    "file_extension": ".py",
113 |    "mimetype": "text/x-python",
114 |    "name": "python",
115 |    "nbconvert_exporter": "python",
116 |    "pygments_lexer": "ipython3",
117 |    "version": "3.6.7"
118 |   }
119 |  },
120 |  "nbformat": 4,
121 |  "nbformat_minor": 2
122 | }
123 | 


--------------------------------------------------------------------------------
/resources/setup_project/project/notebooks/split_dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "preprocessed_data_path = '../data/intermediate/preprocessed_data.json'"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": null,
 15 |    "metadata": {},
 16 |    "outputs": [],
 17 |    "source": [
 18 |     "import json\n",
 19 |     "with open(preprocessed_data_path, 'r') as fd:\n",
 20 |     "        preprocessed_data = json.load(fd)\n",
 21 |     "preprocessed_data"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": null,
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "len(preprocessed_data)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "from classifier.split import split_dataset\n",
 40 |     "\n",
 41 |     "\n",
 42 |     "test_dataset, train_dataset = split_dataset(preprocessed_data, test_percent=0.15)"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "len(test_dataset), len(train_dataset)"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "test_dataset"
 61 |    ]
 62 |   },
 63 |   {
 64 |    "cell_type": "code",
 65 |    "execution_count": null,
 66 |    "metadata": {},
 67 |    "outputs": [],
 68 |    "source": [
 69 |     "from collections import Counter\n",
 70 |     "test_review_by_labels = Counter([d.split()[0] for d in test_dataset])\n",
 71 |     "train_review_by_labels = Counter([d.split()[0] for d in train_dataset])\n",
 72 |     "\n",
 73 |     "test_review_by_labels.most_common()"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "train_review_by_labels.most_common()"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "from classifier.helper import write_lines_file\n",
 92 |     "\n",
 93 |     "write_lines_file('../data/intermediate/train_dataset.txt', train_dataset)\n",
 94 |     "write_lines_file('../data/intermediate/test_dataset.txt', test_dataset)\n"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": []
103 |   }
104 |  ],
105 |  "metadata": {
106 |   "kernelspec": {
107 |    "display_name": "Python 3",
108 |    "language": "python",
109 |    "name": "python3"
110 |   },
111 |   "language_info": {
112 |    "codemirror_mode": {
113 |     "name": "ipython",
114 |     "version": 3
115 |    },
116 |    "file_extension": ".py",
117 |    "mimetype": "text/x-python",
118 |    "name": "python",
119 |    "nbconvert_exporter": "python",
120 |    "pygments_lexer": "ipython3",
121 |    "version": "3.6.5"
122 |   }
123 |  },
124 |  "nbformat": 4,
125 |  "nbformat_minor": 2
126 | }
127 | 


--------------------------------------------------------------------------------
/resources/04_Evaluate_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Evaluate the model\n",
  8 |     "Next, we want to evaluate how well the model is doing, on train and test data. "
  9 |    ]
 10 |   },
 11 |   {
 12 |    "cell_type": "code",
 13 |    "execution_count": null,
 14 |    "metadata": {},
 15 |    "outputs": [],
 16 |    "source": [
 17 |     "# Parameters\n",
 18 |     "\"\"\"\n",
 19 |     ":param str model_file: Path to model file\n",
 20 |     ":param str data_file: Path to data files\n",
 21 |     ":param str result_file: Path to file for storing evaluation metrics\n",
 22 |     ":dvc-in data_file: ./poc/data/data_train_tokenized.csv \n",
 23 |     ":dvc-in model_file: ./poc/data/fasttext_model.bin \n",
 24 |     ":dvc-out result_file: ./poc/data/metrics.txt\n",
 25 |     "\"\"\"\n",
 26 |     "# Value of parameters for this Jupyter Notebook only\n",
 27 |     "# the notebook is in ./poc/pipeline/notebooks\n",
 28 |     "model_file = '../../data/fasttext_model.bin'\n",
 29 |     "data_file = '../../data/data_train_tokenized.csv'\n",
 30 |     "result_file = '../../data/metrics.txt'"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "import pandas as pd\n",
 40 |     "import numpy as np\n",
 41 |     "from pyfasttext import FastText\n",
 42 |     "import json"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": null,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "df = pd.read_csv(data_file)\n",
 52 |     "df['data'] = df['data'].apply(lambda s: ' '.join(json.loads(s.replace(\"'\", '\"'))))"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "model = FastText()\n",
 62 |     "model.load_model(model_file)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "predicted = pd.DataFrame(model.predict([sentence + '\\n' for sentence in df['data']]), columns=['targetnames'])"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "accuracy = ((predicted != df[['targetnames']]).sum() / len(df)).iloc[0]"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "with open(result_file, 'w') as file_desc:\n",
 90 |     "    file_desc.write(f'accuracy {accuracy}\\n')"
 91 |    ]
 92 |   },
 93 |   {
 94 |    "cell_type": "code",
 95 |    "execution_count": null,
 96 |    "metadata": {},
 97 |    "outputs": [],
 98 |    "source": []
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "Python 3",
104 |    "language": "python",
105 |    "name": "python3"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": {
109 |     "name": "ipython",
110 |     "version": 3
111 |    },
112 |    "file_extension": ".py",
113 |    "mimetype": "text/x-python",
114 |    "name": "python",
115 |    "nbconvert_exporter": "python",
116 |    "pygments_lexer": "ipython3",
117 |    "version": "3.6.5"
118 |   }
119 |  },
120 |  "nbformat": 4,
121 |  "nbformat_minor": 2
122 | }
123 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/mlvtools/preprocess_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "\"\"\"\n",
 10 |     ":param str extracted_data_path: Path extracted data input file\n",
 11 |     ":param str preprocessed_data_path: Path to the preprocessed data output file\n",
 12 |     "\n",
 13 |     ":dvc-in extracted_data_path: ./data/intermediate/extracted_data.json\n",
 14 |     ":dvc-out preprocessed_data_path: ./data/intermediate/preprocessed_data.json\n",
 15 |     "\"\"\"\n",
 16 |     "# Following code in this cell will not be add in the generated Python script\n",
 17 |     "# They are values only for notebook purpose\n",
 18 |     "extracted_data_path = '../data/intermediate/extracted_data.json'\n",
 19 |     "preprocessed_data_path = '../data/intermediate/preprocessed_data.json'"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": null,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "import json\n",
 29 |     "with open(extracted_data_path) as fd:\n",
 30 |     "        extracted_data = json.load(fd)"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": null,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "# No effect\n",
 40 |     "extracted_data"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "# No effect\n",
 50 |     "from collections import Counter\n",
 51 |     "nb_review_by_labels = Counter([d[0] for d in extracted_data])\n",
 52 |     "\n",
 53 |     "nb_review_by_labels.most_common()"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "from classifier.pre_process import preprocess_data\n",
 63 |     "\n",
 64 |     "preprocessed_data = preprocess_data(extracted_data)"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "# No effect\n",
 74 |     "preprocessed_data"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "# No effect\n",
 84 |     "from collections import Counter\n",
 85 |     "nb_review_by_labels = Counter([d.split()[0] for d in preprocessed_data])\n",
 86 |     "\n",
 87 |     "nb_review_by_labels.most_common()"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": null,
 93 |    "metadata": {},
 94 |    "outputs": [],
 95 |    "source": [
 96 |     "from classifier.helper import write_json\n",
 97 |     "write_json(preprocessed_data_path, preprocessed_data)"
 98 |    ]
 99 |   }
100 |  ],
101 |  "metadata": {
102 |   "kernelspec": {
103 |    "display_name": "Python 3",
104 |    "language": "python",
105 |    "name": "python3"
106 |   },
107 |   "language_info": {
108 |    "codemirror_mode": {
109 |     "name": "ipython",
110 |     "version": 3
111 |    },
112 |    "file_extension": ".py",
113 |    "mimetype": "text/x-python",
114 |    "name": "python",
115 |    "nbconvert_exporter": "python",
116 |    "pygments_lexer": "ipython3",
117 |    "version": "3.6.7"
118 |   }
119 |  },
120 |  "nbformat": 4,
121 |  "nbformat_minor": 2
122 | }
123 | 


--------------------------------------------------------------------------------
/resources/dummy/step3_convert_binaries.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dummy pipeline - step 3: convert binary Ascii code to character"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This step convert an Ascii binary value to the corresponding character."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Parameters\n",
 24 |     "\"\"\"\n",
 25 |     ":param str binary_data: path to binary data input file\n",
 26 |     ":param str char_from_bin: path to converted data from binary output file\n",
 27 |     ":dvc-cmd: dvc run -f $MLV_DVC_META_FILENAME -d ./dummy/data/binary_data.txt \n",
 28 |     "                                            -o ./dummy/data/data_conv_from_bin.txt\n",
 29 |     "        $MLV_PY_CMD_PATH --binary-data ./dummy/data/binary_data.txt\n",
 30 |     "                         --char-from-bin ./dummy/data/data_conv_from_bin.txt\n",
 31 |     "\"\"\"\n",
 32 |     "# Value of parameters for this Jupyter Notebook only\n",
 33 |     "# the notebook is in ./dummy/pipeline/notebooks\n",
 34 |     "binary_data = '../../data/binary_data.txt'\n",
 35 |     "char_from_bin = '../../data/data_conv_from_bin.txt'"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "markdown",
 40 |    "metadata": {},
 41 |    "source": [
 42 |     "> In this case  we use **dvc-cmd** instead of **dvc-in** and **dvc-out** but it is just to show it is possible if needed. However it is not recommended due to the verbosity and the risk of error\n",
 43 |     "\n",
 44 |     "With **dvc-in** and **dvc-out**:\n",
 45 |     "\n",
 46 |     "    \"\"\" \n",
 47 |     "    :param str binary_data: path to binary data input file\n",
 48 |     "    :param str char_from_bin: path to converted data from binary output file\n",
 49 |     "    :dvc-in binary_data: ./dummy/data/binary_data.txt\n",
 50 |     "    :dvc-out char_from_bin : ./dummy/data/data_conv_from_bin.txt\n",
 51 |     "    \"\"\""
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": null,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "with open(binary_data, 'r') as fd:\n",
 61 |     "    data = fd.read()"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "characters = [f\"{d.split('=')[0]}={chr(int(d.split('=')[1], 2))}\" for d in data.split()]"
 71 |    ]
 72 |   },
 73 |   {
 74 |    "cell_type": "code",
 75 |    "execution_count": null,
 76 |    "metadata": {},
 77 |    "outputs": [],
 78 |    "source": [
 79 |     "with open(char_from_bin, 'w') as fd:\n",
 80 |     "    fd.write(' '.join(characters))"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": null,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "# No effect\n",
 90 |     "print(characters)"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.6.5"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 2
115 | }
116 | 


--------------------------------------------------------------------------------
/resources/dummy/step2_split_data.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Dummy pipeline - step 2: split data"
  8 |    ]
  9 |   },
 10 |   {
 11 |    "cell_type": "markdown",
 12 |    "metadata": {},
 13 |    "source": [
 14 |     "This step splits data input file in 2 files. One with octal values the other with binaries values."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Parameters\n",
 24 |     "\"\"\"\n",
 25 |     ":param str sanitized_data: path to input sanitized data\n",
 26 |     ":param str octal_data: path to octal data output file\n",
 27 |     ":param str binary_data: path to binary data output file\n",
 28 |     ":param int size_bin_data: number of bits in a binary value\n",
 29 |     ":dvc-in sanitized_data: ./dummy/data/sanitized_data.txt\n",
 30 |     ":dvc-out octal_data: ./dummy/data/octal_data.txt\n",
 31 |     ":dvc-out binary_data: ./dummy/data/binary_data.txt\n",
 32 |     ":dvc-extra: --size-bin-data 8\n",
 33 |     "\"\"\"\n",
 34 |     "# Value of parameters for this Jupyter Notebook only\n",
 35 |     "# the notebook is in ./dummy/pipeline/notebooks\n",
 36 |     "sanitized_data = '../../data/sanitized_data.txt'\n",
 37 |     "octal_data = '../../data/octal_data.txt'\n",
 38 |     "binary_data = '../../data/binary_data.txt'\n",
 39 |     "size_bin_data = 8"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "markdown",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "> In this case we use **dvc-extra** to provide a parameter which neither an input nor an output (--size-bin-data). "
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "with open(sanitized_data, 'r') as fd:\n",
 56 |     "    data = fd.read()"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "binaries = [d for d in data.split() if len(d.split('=')[1]) >= size_bin_data]"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": null,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "octals = [d for d in data.split() if len(d.split('=')[1]) == 3]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "with open(octal_data, 'w') as fd:\n",
 84 |     "    fd.write(' '.join(octals))"
 85 |    ]
 86 |   },
 87 |   {
 88 |    "cell_type": "code",
 89 |    "execution_count": null,
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "with open(binary_data, 'w') as fd:\n",
 94 |     "    fd.write(' '.join(binaries))"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": null,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "# No effect\n",
104 |     "print(binaries)\n",
105 |     "print(octals)"
106 |    ]
107 |   }
108 |  ],
109 |  "metadata": {
110 |   "kernelspec": {
111 |    "display_name": "Python 3",
112 |    "language": "python",
113 |    "name": "python3"
114 |   },
115 |   "language_info": {
116 |    "codemirror_mode": {
117 |     "name": "ipython",
118 |     "version": 3
119 |    },
120 |    "file_extension": ".py",
121 |    "mimetype": "text/x-python",
122 |    "name": "python",
123 |    "nbconvert_exporter": "python",
124 |    "pygments_lexer": "ipython3",
125 |    "version": "3.6.5"
126 |   }
127 |  },
128 |  "nbformat": 4,
129 |  "nbformat_minor": 2
130 | }
131 | 


--------------------------------------------------------------------------------
/resources/03_Classify_text.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Classify text\n",
  8 |     "We are going to train a classifier on the tokenized text input, using the [FastText libary](https://fasttext.cc/). \n",
  9 |     "\n",
 10 |     "In addition to the input data file, we give to the command a few hyperparameter values, and we store the binary file representing the learned model as output.\n",
 11 |     "\n",
 12 |     "We only learn for a few epochs, to see how the versioning tools work. \n"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": null,
 18 |    "metadata": {},
 19 |    "outputs": [],
 20 |    "source": [
 21 |     "# Parameters\n",
 22 |     "\"\"\"\n",
 23 |     ":param str input_csv_file: Path to input file\n",
 24 |     ":param str out_model_path: Path to model files\n",
 25 |     ":param float learning_rate: Learning rate\n",
 26 |     ":param int epochs: Number of epochs\n",
 27 |     "\n",
 28 |     ":dvc-in input_csv_file: ./poc/data/data_train_tokenized.csv\n",
 29 |     ":dvc-out out_model_path: ./poc/data/fasttext_model.bin\n",
 30 |     ":dvc-out: ./poc/data/fasttext_model.vec\n",
 31 |     ":dvc-extra: --learning-rate 0.7 --epochs 20\n",
 32 |     "\"\"\"\n",
 33 |     "# Value of parameters for this Jupyter Notebook only\n",
 34 |     "# the notebook is in ./poc/pipeline/notebooks\n",
 35 |     "input_csv_file = \"../../data_train_tokenized.csv\"\n",
 36 |     "out_model_path = '../../data/fasttext_model'\n",
 37 |     "learning_rate = .7\n",
 38 |     "epochs = 20"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "code",
 43 |    "execution_count": null,
 44 |    "metadata": {},
 45 |    "outputs": [],
 46 |    "source": [
 47 |     "import pandas as pd\n",
 48 |     "import numpy as np\n",
 49 |     "from collections import Counter\n",
 50 |     "from pyfasttext import FastText\n",
 51 |     "import tempfile\n",
 52 |     "import os"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "df = pd.read_csv(input_csv_file)"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "import json\n",
 71 |     "df['data'] = df['data'].apply(lambda s: json.loads(s.replace(\"'\", '\"')))"
 72 |    ]
 73 |   },
 74 |   {
 75 |    "cell_type": "code",
 76 |    "execution_count": null,
 77 |    "metadata": {},
 78 |    "outputs": [],
 79 |    "source": [
 80 |     "with tempfile.TemporaryDirectory() as tmp_dir:\n",
 81 |     "    tmp_path = os.path.join(tmp_dir, 'unigrams')\n",
 82 |     "    with open(tmp_path, 'w') as f:\n",
 83 |     "        for text, _, lab in df.itertuples(index=False, name=None):\n",
 84 |     "            f.write('__label__{} {}\\n'.format(lab, ' '.join(text)))\n",
 85 |     "            \n",
 86 |     "    model = FastText()\n",
 87 |     "    # Fastext automatically add .bin at the end of the output model file name\n",
 88 |     "    out_model_path = out_model_path.replace('.bin', '')\n",
 89 |     "    model.supervised(input=tmp_path, output=out_model_path, epoch=epochs, lr=learning_rate)"
 90 |    ]
 91 |   }
 92 |  ],
 93 |  "metadata": {
 94 |   "kernelspec": {
 95 |    "display_name": "Python 3",
 96 |    "language": "python",
 97 |    "name": "python3"
 98 |   },
 99 |   "language_info": {
100 |    "codemirror_mode": {
101 |     "name": "ipython",
102 |     "version": 3
103 |    },
104 |    "file_extension": ".py",
105 |    "mimetype": "text/x-python",
106 |    "name": "python",
107 |    "nbconvert_exporter": "python",
108 |    "pygments_lexer": "ipython3",
109 |    "version": "3.6.5"
110 |   }
111 |  },
112 |  "nbformat": 4,
113 |  "nbformat_minor": 2
114 | }
115 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/mlvtools/train_data_model.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "\"\"\"\n",
 10 |     ":param str train_dataset_path: Path to the train data input file\n",
 11 |     ":param str conf_path: Path to the hyperparameters configuration input file\n",
 12 |     ":param str model_path: Path to the model output file\n",
 13 |     "\n",
 14 |     ":dvc-in train_dataset_path: ./data/intermediate/train_dataset.txt\n",
 15 |     ":dvc-in conf_path: ./data/input/conf.json\n",
 16 |     ":dvc-out model_path: ./data/model/classifier.bin\n",
 17 |     "\"\"\"\n",
 18 |     "# Following code in this cell will not be add in the generated Python script\n",
 19 |     "# They are values only for notebook purpose\n",
 20 |     "train_dataset_path = '../data/intermediate/train_dataset.txt'\n",
 21 |     "conf_path = '../data/input/conf.json'\n",
 22 |     "model_path = '../data/model/classifier.bin'"
 23 |    ]
 24 |   },
 25 |   {
 26 |    "cell_type": "code",
 27 |    "execution_count": null,
 28 |    "metadata": {},
 29 |    "outputs": [],
 30 |    "source": [
 31 |     "# No effect\n",
 32 |     "with open(train_dataset_path, 'r') as fd:\n",
 33 |     "        train_data_lines = fd.readlines()\n",
 34 |     "train_data_lines"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": null,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "import json\n",
 44 |     "with open(conf_path, 'r') as fd:\n",
 45 |     "    conf = json.load(fd)"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "# No effect\n",
 55 |     "conf"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "import fasttext as ft\n",
 65 |     "from tempfile import TemporaryDirectory\n",
 66 |     "import shutil\n",
 67 |     "from os import remove, makedirs\n",
 68 |     "from os.path import join, exists, dirname\n",
 69 |     "def train(fasttext_data_path: str, fasttext_model_path: str, epochs: int, learning_rate: float):\n",
 70 |     "    with TemporaryDirectory() as tmp_dir:\n",
 71 |     "        # Fasttext automatically add .bin at the end of the output model file name so\n",
 72 |     "        # we use a temporary file to keep control on output file path\n",
 73 |     "        model_tmp_path = join(tmp_dir, 'model')\n",
 74 |     "        ft.supervised(fasttext_data_path, model_tmp_path, lr=learning_rate, epoch=epochs, silent=0)\n",
 75 |     "        if exists(fasttext_model_path):\n",
 76 |     "            remove(fasttext_model_path)\n",
 77 |     "        makedirs(dirname(fasttext_model_path), exist_ok=True)\n",
 78 |     "        shutil.copy(f'{model_tmp_path}.bin', fasttext_model_path)\n",
 79 |     "\n"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": null,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "train(train_dataset_path, model_path, \n",
 89 |     "      epochs=conf['epoch'], learning_rate=conf['learning_rate'])"
 90 |    ]
 91 |   }
 92 |  ],
 93 |  "metadata": {
 94 |   "kernelspec": {
 95 |    "display_name": "Python 3",
 96 |    "language": "python",
 97 |    "name": "python3"
 98 |   },
 99 |   "language_info": {
100 |    "codemirror_mode": {
101 |     "name": "ipython",
102 |     "version": 3
103 |    },
104 |    "file_extension": ".py",
105 |    "mimetype": "text/x-python",
106 |    "name": "python",
107 |    "nbconvert_exporter": "python",
108 |    "pygments_lexer": "ipython3",
109 |    "version": "3.6.7"
110 |   }
111 |  },
112 |  "nbformat": 4,
113 |  "nbformat_minor": 2
114 | }
115 | 


--------------------------------------------------------------------------------
/talks/reveal.js/index.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html>
  3 | 	<head>
  4 | 		<meta charset="utf-8">
  5 | 		<meta name="viewport" content="width=device-width, initial-scale=1.0, maximum-scale=1.0, user-scalable=no">
  6 | 
  7 | 		<title>reveal.js</title>
  8 | 
  9 | 		<link rel="stylesheet" href="css/reveal.css">
 10 | 		<link rel="stylesheet" href="css/theme/black.css">
 11 | 
 12 | 		<!-- Theme used for syntax highlighting of code -->
 13 | 		<link rel="stylesheet" href="lib/css/zenburn.css">
 14 | 
 15 | 		<!-- Printing and PDF exports -->
 16 | 		<script>
 17 | 			var link = document.createElement( 'link' );
 18 | 			link.rel = 'stylesheet';
 19 | 			link.type = 'text/css';
 20 | 			link.href = window.location.search.match( /print-pdf/gi ) ? 'css/print/pdf.css' : 'css/print/paper.css';
 21 | 			document.getElementsByTagName( 'head' )[0].appendChild( link );
 22 | 		</script>
 23 | 	</head>
 24 | 	<body>
 25 | 		<div class="reveal">
 26 | 			<div class="slides">
 27 | 				<section>
 28 | 					<h2>From ML experiments to production: versioning and reproducibility with MLV-tools</h2>
 29 | 					<p><i> <small> Stéphanie Bracaloni and Sarah Diot-Girard</small></i></p>
 30 | 					<aside class="notes">
 31 | 
 32 | 					</aside>
 33 | 				</section>
 34 | 				<section>
 35 | 					<h3>About us</h3>
 36 | 					<ul>
 37 | 						<li><b>Sarah Diot-Girard :</b></li>
 38 | 						<li><b>Stéphanie Bracaloni :</b></li>
 39 | 					</ul>
 40 | 					<aside class="notes">
 41 | 						- Sarah : [petit resumé] + Accroche sur les technos => J'utilise des jupyter notebook
 42 | 						et j'ai besoin de ...pouvoir reproduire facilement, garder de la souplesse, ...
 43 | 
 44 | 						- Stephanie: [petit résumé] + Accroche Automatisation, Livaison, Tests
 45 | 						J'ai besoin de... un truc qui se lance facilement, qui se package, qui soit reproductible
 46 | 						sur n'importe quel environnement
 47 | 					</aside>
 48 | 				</section>
 49 | 				<section>
 50 | 					<h3>Why are we here</h3>
 51 | 					<p class="fragment fade-up" data-fragment-index="1">POC vs PROD</p>
 52 | 					<p class="fragment fade-up" data-fragment-index="2"> ... vs Data scientist </p>
 53 | 				</section>
 54 | 				<section>
 55 | 					<h3>The POC</h3>
 56 | 					<img src="./img/poc_base.jpg"/>
 57 | 					<aside class="notes">
 58 | 						- git repo with format not really compatible with git versioning
 59 | 						- hardcoded stuff (path, user, ...)
 60 | 						- hardcoded hyperparameters
 61 | 					</aside>
 62 | 				</section>
 63 | 				<section>
 64 | 					<h3>The POC</h3>
 65 | 					<img src="./img/poc_worst.jpg"/>
 66 | 					<aside class="notes">
 67 | 						Just a slide to keep in mind it can be really worst
 68 | 						Show the lake of versioning and pipelining => reproducibility issues
 69 | 					</aside>
 70 | 				</section>
 71 | 				<section>
 72 | 					<h3>The POC</h3>
 73 | 					<img src="./img/poc_ml.jpg"/>
 74 | 					<aside class="notes">
 75 | 						What the Data Scientist want
 76 | 					</aside>
 77 | 				</section>
 78 | 				<section>
 79 | 					<h3>The POC</h3>
 80 | 					<img src="./img/poc_dev.jpg"/>
 81 | 					<aside class="notes">
 82 | 						What the Software Developer want
 83 | 					</aside>
 84 | 				</section>
 85 | 			</div>
 86 | 		</div>
 87 | 
 88 | 		<script src="lib/js/head.min.js"></script>
 89 | 		<script src="js/reveal.js"></script>
 90 | 
 91 | 		<script>
 92 | 			// More info about config & dependencies:
 93 | 			// - https://github.com/hakimel/reveal.js#configuration
 94 | 			// - https://github.com/hakimel/reveal.js#dependencies
 95 | 			Reveal.initialize({
 96 | 				dependencies: [
 97 | 					{ src: 'plugin/markdown/marked.js' },
 98 | 					{ src: 'plugin/markdown/markdown.js' },
 99 | 					{ src: 'plugin/notes/notes.js', async: true },
100 | 					{ src: 'plugin/highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } }
101 | 				]
102 | 			});
103 | 		</script>
104 | 	</body>
105 | </html>
106 | 


--------------------------------------------------------------------------------
/resources/03_bis_Classify_text.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Classify text with trigrams\n",
  8 |     "We are going to train a classifier on the tokenized text input, using the [FastText libary](https://fasttext.cc/). \n",
  9 |     "\n",
 10 |     "In addition to the input data file, we give to the command a few hyperparameter values, and we store the binary file representing the learned model as output.\n",
 11 |     "\n",
 12 |     "We only learn for a few epochs, to see how the versioning tools work. \n",
 13 |     "\n",
 14 |     "We feed the neural network trigrams to see how the accuracy improve over using unigrams (single words).\n"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Parameters\n",
 24 |     "\"\"\"\n",
 25 |     ":param str input_csv_file: Path to input file\n",
 26 |     ":param str out_model_path: Path to model files\n",
 27 |     ":param float learning_rate: Learning rate\n",
 28 |     ":param int epochs: Number of epochs\n",
 29 |     "\n",
 30 |     ":dvc-in input_csv_file: [REPLACE_CSV_INPUT]\n",
 31 |     ":dvc-out out_model_path: [REPLACE_MODEL_OUT_BIN_PATH]\n",
 32 |     ":dvc-out: [REPLACE_MODEL_OUT_VEC_PATH]\n",
 33 |     ":dvc-extra: --learning-rate 0.7 --epochs 20\n",
 34 |     "\"\"\"\n",
 35 |     "# Value of parameters for this Jupyter Notebook only\n",
 36 |     "# the notebook is in ./poc/pipeline/notebooks\n",
 37 |     "input_csv_file = \"../../data/data_train_tokenized.csv\"\n",
 38 |     "out_model_path = '../../data/fasttext_model_bis'\n",
 39 |     "learning_rate = .7\n",
 40 |     "epochs = 20"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": null,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "import pandas as pd\n",
 50 |     "import numpy as np\n",
 51 |     "from pyfasttext import FastText\n",
 52 |     "import tempfile\n",
 53 |     "import os"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": null,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "df = pd.read_csv(input_csv_file)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": null,
 68 |    "metadata": {},
 69 |    "outputs": [],
 70 |    "source": [
 71 |     "import json\n",
 72 |     "df['data'] = df['data'].apply(lambda s: json.loads(s.replace(\"'\", '\"')))"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": null,
 78 |    "metadata": {},
 79 |    "outputs": [],
 80 |    "source": [
 81 |     "with tempfile.TemporaryDirectory() as tmp_dir:\n",
 82 |     "    tmp_path = os.path.join(tmp_dir, 'trigrams')\n",
 83 |     "    with open(tmp_path, 'w') as f:\n",
 84 |     "        for text, _, lab in df.itertuples(index=False, name=None):\n",
 85 |     "            f.write('__label__{} {}\\n'.format(lab, ' '.join(text)))\n",
 86 |     "            \n",
 87 |     "    model = FastText()\n",
 88 |     "    # Fastext automatically add .bin at the end of the output model file name\n",
 89 |     "    out_model_path = out_model_path.replace('.bin', '')\n",
 90 |     "    model.supervised(input=tmp_path, output=out_model_path, epoch=epochs, lr=learning_rate, wordNgrams=3)"
 91 |    ]
 92 |   }
 93 |  ],
 94 |  "metadata": {
 95 |   "kernelspec": {
 96 |    "display_name": "Python 3",
 97 |    "language": "python",
 98 |    "name": "python3"
 99 |   },
100 |   "language_info": {
101 |    "codemirror_mode": {
102 |     "name": "ipython",
103 |     "version": 3
104 |    },
105 |    "file_extension": ".py",
106 |    "mimetype": "text/x-python",
107 |    "name": "python",
108 |    "nbconvert_exporter": "python",
109 |    "pygments_lexer": "ipython3",
110 |    "version": "3.6.5"
111 |   }
112 |  },
113 |  "nbformat": 4,
114 |  "nbformat_minor": 2
115 | }
116 | 


--------------------------------------------------------------------------------
/tutorial/use_case4.md:
--------------------------------------------------------------------------------
 1 | # Use Case 4: Combine Metrics
 2 | 
 3 | One typical use-case in Machine Learning is that of hyper-parameters optimization. We want to train a classifier with various choices
 4 | of hyperparameters, using cross-validation to get an accurate estimate of generalisation metrics (accuracy on validation set, but
 5 | also possibly F1-scores or other metrics depending on your data).
 6 | 
 7 | Each of those runs will thus generate a set of metrics, and we want to have a unified view on all results to make a decision on
 8 | the best set of hyperparameters. We use **MLFlow tracking API** to record and expose results.
 9 | 
10 | **Requirements**:
11 | 
12 | - setup the environment ([tutorial setup](./setup.md))
13 | - build the pipeline from [Use Case 1: Build and Reproduce a Pipeline](./use_case1.md)
14 | 
15 | > Note: it is possible to quickly build the pipeline from Use Case 1 running `make setup` if setup is not done
16 | then `make pipeline1`. Be careful, the pipeline files and DVC meta files will not be committed.
17 | 
18 | We want to reuse the split step from the **Use Case 1** pipeline, and then run cross validation to tune hyperparameters.
19 | 
20 |     `20news-bydate_py3.pkz` = Split => `data_train.csv` = Classif with Cross Validation => ./poc/data/cross_valid_metrics
21 | 
22 | ## 1. Create a Cross Validation Step
23 | 
24 | 
25 | This pipeline step is based on the `05_Tune_hyperparameters_with_crossvalidation.ipynb` **Jupyter Notebook**.
26 | 
27 | We use scikit-learn to build a simple pipeline with two hyperparameters: the number of words in the vocabulary for
28 | the bag-of-words encoding, and the regularization parameter for the Logistic Regression classifier.
29 | 
30 | For tutorial purpose, we try out a very limited number of values (a more realistic scenario would probably involve
31 | a grid search). In order for the step to execute quite quickly, we only use one repetition of 3-fold cross-validation.
32 | Once again, in real life, you'll probably want to use 10 repetitions of 5-fold or 10-fold cross-validation.
33 | 
34 | In this notebook, the output is just the folder containing all metrics results, but you might also want to store
35 | the model trained with the best hyperparameters. That's a nice exercice for you to try !
36 | 
37 | 
38 | 
39 | |||
40 | | :--- | :--- |
41 | | **Step Input**: | `./poc/data/data_train.csv` |
42 | |||
43 | | **Step Outputs**: | `./poc/data/cross_valid_metrics` |
44 | |||
45 | |**Generated files**:| `./poc/pipeline/steps/mlvtools_05_tune_hyperparameters_with_crossvalidation.py`|
46 | | | `./poc/commands/dvc/mlvtools_05_tune_hyperparameters_with_crossvalidation_dvc`|
47 | 
48 | 1. Copy the `05_Tune_hyperparameters_with_crossvalidation.ipynb` from the resources directory to the poc project:
49 | 
50 |         cp ./resources/05_Tune_hyperparameters_with_crossvalidation.ipynb ./poc/pipeline/notebooks/
51 | 
52 | 2. Continue with usual process
53 | 
54 | 
55 |         # Git versioning
56 |         git add ./poc/pipeline/notebooks/05_Tune_hyperparameters_with_crossvalidation.ipynb
57 |         git commit -m 'Tutorial: use case 4 step 1 - Add notebook'
58 | 
59 |         # Convert to Python 3 script
60 |         ipynb_to_python -w . -n ./poc/pipeline/notebooks/05_Tune_hyperparameters_with_crossvalidation.ipynb
61 | 
62 |         # Generate command
63 |         gen_dvc -w . -i ./poc/pipeline/steps/mlvtools_05_tune_hyperparameters_with_crossvalidation.py
64 | 
65 |         # Run
66 |         ./poc/commands/dvc/mlvtools_05_tune_hyperparameters_with_crossvalidation_dvc
67 | 
68 |         # Version the result
69 |         git add *.dvc && git add ./poc/pipeline ./poc/commands/ ./poc/data/
70 |         git commit -m 'Tutorial use case 4 step 1: cross validation'
71 | 
72 | 
73 | 3. Analyse results
74 | 
75 | All metrics are logged in **MLflow tracking**. It is possible to visualize them.
76 | 
77 |  Run: `mlflow ui --file-store ./poc/data/cross_valid_metrics/`
78 | 
79 |  Go to: [http://127.0.0.1:5000](http://127.0.0.1:5000)
80 | 
81 | 
82 | You reached the end of this tutorial.
83 | 
84 |  Or [go back to README](../README.md)
85 | 


--------------------------------------------------------------------------------
/tutorial/setup.md:
--------------------------------------------------------------------------------
  1 | # Tutorial Setup
  2 | 
  3 | This is the setup section for realistic tutorial.
  4 | 
  5 | ## 1. Create Project Structure
  6 | 
  7 | All resource files needed in this tutorial are provided in `ml-poc-version/resources`.
  8 | The structure of the project will be created along the tutorial.
  9 | 
 10 | If it is not already done, clone the repository on the tutorial branch.
 11 | 
 12 |     git clone -b tutorial https://github.com/peopledoc/mlv-tools-tutorial
 13 |     cd ml-poc-version
 14 | 
 15 | Create your working branch
 16 | 
 17 |     git checkout -b working
 18 |     
 19 | 
 20 | Create the project base structure.
 21 | 
 22 |     make init-struct
 23 | 
 24 | Following structure must be created:
 25 | 
 26 |     ├── poc
 27 |     │   ├── pipeline
 28 |     │   │   ├── __init__.py
 29 |     │   │   ├── notebooks        # contains Jupyter notebooks (one by pipeline step)
 30 |     |   |   └── steps            # contains generated configurable Python 3 scripts
 31 |     |   ├── data                 # contains pipeline data
 32 |     │   └── commands
 33 |     │       └── dvc              # contains dvc command wrapped in a bash script
 34 |     ...
 35 |     ├── resources                # contains Jupyter notebooks needed in this tutorial
 36 |     │   ├── 01_Extract_dataset.ipynb
 37 |     │   ├── 02_Tokenize_text.ipynb
 38 |     │   ├── 03_bis_Classify_text.ipynb
 39 |     │   ├── 03_Classify_text.ipynb
 40 |     │   └── 04_Evaluate_model.ipynb
 41 |     ...
 42 | 
 43 | > It is not mandatory to follow this structure, it is just an example for this tutorial.
 44 | 
 45 | ## 2. Prepare Environment
 46 | 
 47 | Create a virtual environment using **conda** or **virtualenv**, then activate it.
 48 | Then setup the project.
 49 | 
 50 |     make develop
 51 | 
 52 | ## 3. Initialize DVC Project
 53 | **DVC** works on top of **git** repositories. Run **DVC** initialization in a **git**
 54 |  repository directory to create **DVC meta files**.
 55 | 
 56 |     dvc init
 57 | 
 58 | The directory `.dvc` should be created in the project root directory.
 59 | 
 60 | Add it under git versioning:
 61 | 
 62 |     git commit -m 'Tutorial setup: dvc init' ./.dvc/
 63 | 
 64 | ## 4. Create MLV-tools Project Configuration
 65 | 
 66 | Using **MLV-tools**, it can be repetitive to repeat output paths parameters for each `ipynb_to_python` 
 67 | and `gen_dvc` command. 
 68 | 
 69 | It is possible to provide a configuration to declare project structure and
 70 |  let **MLV-tools** generates output path.
 71 | (For more information see [documentation](https://github.com/mlflow/mlflow))
 72 | 
 73 |     make mlvtools-conf
 74 | 
 75 | The configuration file `./.mlvtools` should be created.
 76 | 
 77 | Add it under git versioning:
 78 | 
 79 |     git add .mlvtools && git commit -m 'Tutorial setup: dvc init'
 80 | 
 81 | ## 5. Add Git Hooks and Filters
 82 | 
 83 | ### 5.1 Automatise Jupyter Notebook Cleanup
 84 | 
 85 | Usually it is not useful to version **Jupyter notebook** embedded outputs. Sometimes it is even forbidden,
 86 | if you work on production data for example. To avoid mistakes, use git pre-commit or git filter to cleanup
 87 | **Jupyter notebook** outputs. Several tools can do that, 
 88 | see for example [nbstripout](https://github.com/kynan/nbstripout).
 89 | 
 90 |     pip install --upgrade nbstripout
 91 |     nbstripout --install
 92 | 
 93 | With **nbstripout** git filter, **Jupyter notebook** outputs are cleaned on each branch on check-in. That means 
 94 | when you will commit a change you will keep outputs into the notebook to continue working.
 95 |  But those outputs will not be sent to the remote server when you push. 
 96 |  Notebook outputs are also excluded from the git diff.
 97 | 
 98 | ## 6. Get Tutorial Data
 99 | 
100 | This tutorial is based on data from [20_newsgroup](http://scikit-learn.org/stable/datasets/).
101 | Run the following command to download them.
102 | 
103 |     make download-data
104 | 
105 | Data are stored in `./poc/data/20news-bydate_py3.pkz`.
106 | 
107 | 
108 | You reached the end of the setup part, see [Use Case 1: Build and Reproduce a Pipeline](./use_case1.md)
109 | 
110 | Or [go back to README](../README.md)
111 | 


--------------------------------------------------------------------------------
/resources/02_Tokenize_text.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Tokenize text\n",
  8 |     "The next step in the pipeline is to tokenize the text input, as is usual in Natural Language Processing. In order to do that, we use the word punkt tokenizer provided by NLTK. \n",
  9 |     "\n",
 10 |     "We also remove english stopwords (frequent words who add no semantic meaning, such as \"and\", \"is\", \"the\"...). \n",
 11 |     "\n",
 12 |     "Each token is also converted to lower-case and non-alphabetic tokens are removed. \n",
 13 |     "\n",
 14 |     "In this very simple tutorial example, we do not apply any lemmatization technique."
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": null,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "# Parameters\n",
 24 |     "\"\"\"\n",
 25 |     ":param str input_csv_file: Path to input file\n",
 26 |     ":param str output_csv_file: Path to output file\n",
 27 |     ":dvc-in input_csv_file: ./poc/data/data_train.csv\n",
 28 |     ":dvc-out output_csv_file: ./poc/data/data_train_tokenized.csv\n",
 29 |     "\"\"\"\n",
 30 |     "# Value of parameters for this Jupyter Notebook only\n",
 31 |     "# the notebook is in ./poc/pipeline/notebooks\n",
 32 |     "input_csv_file = \"../../data/data_train.csv\"\n",
 33 |     "output_csv_file = input_csv_file.replace('.csv', '_tokenized.csv')"
 34 |    ]
 35 |   },
 36 |   {
 37 |    "cell_type": "code",
 38 |    "execution_count": null,
 39 |    "metadata": {},
 40 |    "outputs": [],
 41 |    "source": [
 42 |     "import pandas as pd\n",
 43 |     "import numpy as np\n",
 44 |     "from nltk.tokenize import wordpunct_tokenize\n",
 45 |     "from nltk.corpus import stopwords"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": null,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "df = pd.read_csv(input_csv_file)\n",
 55 |     "df.head()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": null,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "stopswords_english = set(stopwords.words('english'))"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": null,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "def tokenize_and_clean_text(s):\n",
 74 |     "    return [token.lower() for token in wordpunct_tokenize(s) if token.isalpha() and token.lower() not in stopswords_english]"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": null,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "df = df.dropna()"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": null,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "df['data'] = df['data'].apply(tokenize_and_clean_text)"
 93 |    ]
 94 |   },
 95 |   {
 96 |    "cell_type": "code",
 97 |    "execution_count": null,
 98 |    "metadata": {},
 99 |    "outputs": [],
100 |    "source": [
101 |     "# No effect\n",
102 |     "df.head()"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "df.to_csv(output_csv_file, index=False)"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": null,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": []
120 |   }
121 |  ],
122 |  "metadata": {
123 |   "kernelspec": {
124 |    "display_name": "Python 3",
125 |    "language": "python",
126 |    "name": "python3"
127 |   },
128 |   "language_info": {
129 |    "codemirror_mode": {
130 |     "name": "ipython",
131 |     "version": 3
132 |    },
133 |    "file_extension": ".py",
134 |    "mimetype": "text/x-python",
135 |    "name": "python",
136 |    "nbconvert_exporter": "python",
137 |    "pygments_lexer": "ipython3",
138 |    "version": "3.6.5"
139 |   }
140 |  },
141 |  "nbformat": 4,
142 |  "nbformat_minor": 2
143 | }
144 | 


--------------------------------------------------------------------------------
/talks/reveal.js/css/print/pdf.css:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * This stylesheet is used to print reveal.js
  3 |  * presentations to PDF.
  4 |  *
  5 |  * https://github.com/hakimel/reveal.js#pdf-export
  6 |  */
  7 | 
  8 | * {
  9 | 	-webkit-print-color-adjust: exact;
 10 | }
 11 | 
 12 | body {
 13 | 	margin: 0 auto !important;
 14 | 	border: 0;
 15 | 	padding: 0;
 16 | 	float: none !important;
 17 | 	overflow: visible;
 18 | }
 19 | 
 20 | html {
 21 | 	width: 100%;
 22 | 	height: 100%;
 23 | 	overflow: visible;
 24 | }
 25 | 
 26 | /* Remove any elements not needed in print. */
 27 | .nestedarrow,
 28 | .reveal .controls,
 29 | .reveal .progress,
 30 | .reveal .playback,
 31 | .reveal.overview,
 32 | .fork-reveal,
 33 | .share-reveal,
 34 | .state-background {
 35 | 	display: none !important;
 36 | }
 37 | 
 38 | h1, h2, h3, h4, h5, h6 {
 39 | 	text-shadow: 0 0 0 #000 !important;
 40 | }
 41 | 
 42 | .reveal pre code {
 43 | 	overflow: hidden !important;
 44 | 	font-family: Courier, 'Courier New', monospace !important;
 45 | }
 46 | 
 47 | ul, ol, div, p {
 48 | 	visibility: visible;
 49 | 	position: static;
 50 | 	width: auto;
 51 | 	height: auto;
 52 | 	display: block;
 53 | 	overflow: visible;
 54 | 	margin: auto;
 55 | }
 56 | .reveal {
 57 | 	width: auto !important;
 58 | 	height: auto !important;
 59 | 	overflow: hidden !important;
 60 | }
 61 | .reveal .slides {
 62 | 	position: static;
 63 | 	width: 100% !important;
 64 | 	height: auto !important;
 65 | 	zoom: 1 !important;
 66 | 
 67 | 	left: auto;
 68 | 	top: auto;
 69 | 	margin: 0 !important;
 70 | 	padding: 0 !important;
 71 | 
 72 | 	overflow: visible;
 73 | 	display: block;
 74 | 
 75 | 	-webkit-perspective: none;
 76 | 	   -moz-perspective: none;
 77 | 	    -ms-perspective: none;
 78 | 	        perspective: none;
 79 | 
 80 | 	-webkit-perspective-origin: 50% 50%; /* there isn't a none/auto value but 50-50 is the default */
 81 | 	   -moz-perspective-origin: 50% 50%;
 82 | 	    -ms-perspective-origin: 50% 50%;
 83 | 	        perspective-origin: 50% 50%;
 84 | }
 85 | 
 86 | .reveal .slides .pdf-page {
 87 | 	position: relative;
 88 | 	overflow: hidden;
 89 | 	z-index: 1;
 90 | 
 91 | 	page-break-after: always;
 92 | }
 93 | 
 94 | .reveal .slides section {
 95 | 	visibility: visible !important;
 96 | 	display: block !important;
 97 | 	position: absolute !important;
 98 | 
 99 | 	margin: 0 !important;
100 | 	padding: 0 !important;
101 | 	box-sizing: border-box !important;
102 | 	min-height: 1px;
103 | 
104 | 	opacity: 1 !important;
105 | 
106 | 	-webkit-transform-style: flat !important;
107 | 	   -moz-transform-style: flat !important;
108 | 	    -ms-transform-style: flat !important;
109 | 	        transform-style: flat !important;
110 | 
111 | 	-webkit-transform: none !important;
112 | 	   -moz-transform: none !important;
113 | 	    -ms-transform: none !important;
114 | 	        transform: none !important;
115 | }
116 | 
117 | .reveal section.stack {
118 | 	position: relative !important;
119 | 	margin: 0 !important;
120 | 	padding: 0 !important;
121 | 	page-break-after: avoid !important;
122 | 	height: auto !important;
123 | 	min-height: auto !important;
124 | }
125 | 
126 | .reveal img {
127 | 	box-shadow: none;
128 | }
129 | 
130 | .reveal .roll {
131 | 	overflow: visible;
132 | 	line-height: 1em;
133 | }
134 | 
135 | /* Slide backgrounds are placed inside of their slide when exporting to PDF */
136 | .reveal .slide-background {
137 | 	display: block !important;
138 | 	position: absolute;
139 | 	top: 0;
140 | 	left: 0;
141 | 	width: 100%;
142 | 	height: 100%;
143 | 	z-index: auto !important;
144 | }
145 | 
146 | /* Display slide speaker notes when 'showNotes' is enabled */
147 | .reveal.show-notes {
148 | 	max-width: none;
149 | 	max-height: none;
150 | }
151 | .reveal .speaker-notes-pdf {
152 | 	display: block;
153 | 	width: 100%;
154 | 	height: auto;
155 | 	max-height: none;
156 | 	top: auto;
157 | 	right: auto;
158 | 	bottom: auto;
159 | 	left: auto;
160 | 	z-index: 100;
161 | }
162 | 
163 | /* Layout option which makes notes appear on a separate page */
164 | .reveal .speaker-notes-pdf[data-layout="separate-page"] {
165 | 	position: relative;
166 | 	color: inherit;
167 | 	background-color: transparent;
168 | 	padding: 20px;
169 | 	page-break-after: always;
170 | 	border: 0;
171 | }
172 | 
173 | /* Display slide numbers when 'slideNumber' is enabled */
174 | .reveal .slide-number-pdf {
175 | 	display: block;
176 | 	position: absolute;
177 | 	font-size: 14px;
178 | }
179 | 


--------------------------------------------------------------------------------
/resources/setup_project/solution/mlvtools/split_dataset.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "\"\"\"\n",
 10 |     ":param str preprocessed_data_path: Path to preprocessed data input file\n",
 11 |     ":param str train_dataset_path: Path to the train data output file\n",
 12 |     ":param str test_dataset_path: Path to the test data output file\n",
 13 |     ":param float test_percent: Percentage of test data (example: 0.15)\n",
 14 |     " \n",
 15 |     ":dvc-in preprocessed_data_path: ./data/intermediate/preprocessed_data.json\n",
 16 |     ":dvc-out train_dataset_path: ./data/intermediate/train_dataset.txt\n",
 17 |     ":dvc-out test_dataset_path: ./data/intermediate/test_dataset.txt\n",
 18 |     ":dvc-extra: --test-percent 0.15\n",
 19 |     "\"\"\"\n",
 20 |     "# Following code in this cell will not be add in the generated Python script\n",
 21 |     "# They are values only for notebook purpose\n",
 22 |     "preprocessed_data_path = '../data/intermediate/preprocessed_data.json'\n",
 23 |     "train_dataset_path = '../data/intermediate/train_dataset.txt'\n",
 24 |     "test_dataset_path = '../data/intermediate/test_dataset.txt'\n",
 25 |     "test_percent = 0.15"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": null,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "import json\n",
 35 |     "with open(preprocessed_data_path, 'r') as fd:\n",
 36 |     "        preprocessed_data = json.load(fd)"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": null,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "# No effect\n",
 46 |     "preprocessed_data"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": null,
 52 |    "metadata": {},
 53 |    "outputs": [],
 54 |    "source": [
 55 |     "# No effect\n",
 56 |     "len(preprocessed_data)"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": null,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "from classifier.split import split_dataset\n",
 66 |     "\n",
 67 |     "\n",
 68 |     "test_dataset, train_dataset = split_dataset(preprocessed_data, test_percent)"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": null,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "# No effect\n",
 78 |     "len(test_dataset), len(train_dataset)"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": null,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# No effect\n",
 88 |     "test_dataset"
 89 |    ]
 90 |   },
 91 |   {
 92 |    "cell_type": "code",
 93 |    "execution_count": null,
 94 |    "metadata": {},
 95 |    "outputs": [],
 96 |    "source": [
 97 |     "# No effect\n",
 98 |     "from collections import Counter\n",
 99 |     "test_review_by_labels = Counter([d.split()[0] for d in test_dataset])\n",
100 |     "train_review_by_labels = Counter([d.split()[0] for d in train_dataset])\n",
101 |     "\n",
102 |     "test_review_by_labels.most_common()"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "# No effect\n",
112 |     "train_review_by_labels.most_common()"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": null,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "from classifier.helper import write_lines_file\n",
122 |     "\n",
123 |     "write_lines_file(train_dataset_path, train_dataset)\n",
124 |     "write_lines_file(test_dataset_path, test_dataset)\n"
125 |    ]
126 |   }
127 |  ],
128 |  "metadata": {
129 |   "kernelspec": {
130 |    "display_name": "Python 3",
131 |    "language": "python",
132 |    "name": "python3"
133 |   },
134 |   "language_info": {
135 |    "codemirror_mode": {
136 |     "name": "ipython",
137 |     "version": 3
138 |    },
139 |    "file_extension": ".py",
140 |    "mimetype": "text/x-python",
141 |    "name": "python",
142 |    "nbconvert_exporter": "python",
143 |    "pygments_lexer": "ipython3",
144 |    "version": "3.6.7"
145 |   }
146 |  },
147 |  "nbformat": 4,
148 |  "nbformat_minor": 2
149 | }
150 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/user/resources/inputs/part2.input:
--------------------------------------------------------------------------------
 1 | §¨[[D¨rr![€![§$!dr£"?§!°XX!D./&@@%/,###(((%@%((((((((((((((((((((,[¨DD??.#@%%&@@@@#{*@,[£€$dX.@#,£(#?€"?r*@/£[{}Xd[D,%***.,*.*,***,*.{€.,*&&.!°¨£}XX€r"$"{"r}[¨[
 2 | (((((((((((#%@&#(&@@@@@@@@@@@@/(#,°,@(X?"[}£"!€%@("[§€§£$¨,#.¨[{¨$r§",(**,.,,,******.¨[.,*&@#!?r}dr°$dD?¨XDX$r€§?D}€!£r$D€?°X![XD}§!d.,/(%@@%/#&&#####,/(((#@&#(
 3 | X£[$€€![}?"r.,,/(%&@@@&%(*,,(########,/((/(#&@@%#%%##%%%%&&@@@@@@&@@@@@&&&#%%###/*(@%r§X€€[€§X{D*@(X$d"€r$}?¨"°£{Dr!?°****.,*.***,**,[{,**&@@&}r§°X$rd[!Dd£r§{[?
 4 | #%%##(,..,&%/@,.€rD§!D,..,,,,,,,,,.r"d?r}{{DX}}dX"%/£d{!D§£r{§£d¨¨?[X{$//**.,,,**,,**.?,*,&@@@dD}°£"X""£{d¨[€€¨$£§¨[?€?,#@@@&%#//***/((############(,*(((,.%@&%#
 5 | !D?$$rrr£$.(@@@&%%##########(,*###,*((*/(,*@/.***,.?(&.rD€.&@@@%*}¨D€§D°r{"D{{X¨D€?D"?X§°°€°[?¨€¨°"XDd"¨"XX?§XX{°{r{¨d[{*&(*,*,,**.**.°,*,&@@@{}!€D$"X¨$![°!dD}r
 6 | $/@&*§[°£{*@@@/€?{§¨¨}X$${"{¨°XXD§"?€!{!?{€$£![rD{¨€€X$$d?D°"d??$?!r?}"?X&@@&**,**,,*,},*,&%@@[£{§[°§!{d§"D{[[£d[[DDr$D€[!""{,%@@@&(,*///*,.*#((((((*.,(/*@/}{}d
 7 | €XrXddXdd}£°°"?,/&@@@@&##((((//(#%%,§.(#/&%{€${#&&,{X$}¨[#@@&,d{¨dD€!"X$D!}"[d§"$rDd£?§€€?d¨?!d[§£{°["d€€£D°€{{rXX§XX§€.*@@@%*****,.**.,**&%@@d€X£D{"Dr}![£§r{?"
 8 | .££"}/@@@@@*X¨°°"€?$§r[$!§°¨ddX!$§£r°£°}${r£[€€[£}$X?D{°°$[£d{{[dDd¨#&(@&#@&,%#**/*,,*****#/&@[§!{"!€!°€¨{!r?D$ddX"¨"!DX!{§X§°$}Xr}.*(%&@@@&%##@&.[?,%@##@.d,&@*
 9 | ?!}££dd?d?X"§{"£[€?€"drd{?!.*(@@@((#@&.*#%%@#.€X¨$D°*@@@@/?¨"[X°??r€DD"°¨D£€°r°[¨}"}?r¨{!r£€£dD!}{X$°€?€!°r°r}[d€§"r&@%/%@@**./&#/(*,***,(#/&@€d}£X![[}}d?X$D¨"D
10 | ¨€,@@@@@@/X¨[€rr}X{°?{"XX¨D°£€}X}X?"X{D[§¨D!€??{¨"dX"[[}d€{°"dD$}!?,@%/@&,./(/..*#(*%/**,%#/&@{{{€€€d£X?}{€°"€°r"°°!¨°D{X€?["$r€€D"¨"€§$d"£d.#@&/,%@@@@@&/"£XX$$
11 | D"€XXdXr°?§[}D!?[!D§[?§${!§,&&@@@@@#/,}$Dr?Dd¨X°r#@@#@@@@%[£}£d""}°![?!$?X?°¨£""°€$?¨§?[§€{X}?d!££§°€}?§[}!?[d!{§"D/@/@#,}¨((/.".((*#&/*/&#/&@Dr¨d¨$°£?{D$?§dX![
12 | @@/$.@@@@@*[""§€¨X"£d[{["?$d€$!¨¨DD"¨€!r§r°!}!D?£$?!$d£§€[!"£€¨°X?€%@@#*((##(#(..(*(&%@//@#(&&?r°£{}"d}¨£¨£$"{$°r"§¨{}}€}r°D"!D£r{{{£"D?¨.(@&&@@@*}?!"§°[D$}r".&
13 | r£d[§?{}XrDD}¨r£rX{X?°X.,#@@(*.[£¨}°[{€°!°{rd#@%/!D€.@@@@@@.{rrr$¨dD!°[°D"€r}?X}[°£"D!"€X[¨£"!?XDD}?¨{d![r§X€{{}D§.@@&£d,(((((/@&#*[/@&@@&(#@#D?¨¨€$X{{["§£}§}{€
14 | $!dd.&@@@@&dD§}r€°§¨}X"D}§""[§?D?°d{r§d{r°§[£$rdrrD°€D[£rD£!{{"¨!d*@@/"?X/((((.X,(%*",@@@&/#@/D?}!rdr¨?{£"X!¨§d}{d{XdX€!D€{[DdDX$D{§*%@&#,"dD§}D§d}}!°[§€?}(@#}D
15 | D!}?!£{dX[}€¨X¨¨r.(@@(.£}§£§d°€¨£"!¨!"§¨§¨(%*Dd$D£[}[&@@@@&.°£!?€!¨$$££¨r}?¨Xd?§d€X"£$"°{r$![?Dd£[d!d[$X$€r°rr$dDX.@@,£!§,(((/.£.(%@/,#@@@%%@(!¨§€rD{}[¨¨§£"$$$!
16 | r£§[?(@@&&%!D{D€¨[!Dd{{?r{r{¨["d°$Xd£d}¨¨X"!X°DD[°"?${€[d!}!€D§rd€$&@(D!§!/(((.$.(*.&%//&@@#((}D§?°![£{{dD"¨§€X[¨°!{°[}r!¨[}°./&@%*§D[€}}[$r?r°rD°"}?XD{$Dd€€{D}
17 | £¨£D€??!}{°*&@/€§}€°}°DX{$}°$°dr€?£Xrrrd"d°§€}?[$$$€?,@@&&&}}€}dDX§r£"D!{}§drrdD§°£}$°[$?"{$!DDr¨[$°d?°°r?°£[XdX}$.&@@@*.X,((/.¨.(*?.(%#&@@#/(X""$}}{¨X?€¨{?"![d
18 | }dX¨"°#@@@&.$°DdD?°"£°r!d{{["XX}X¨?$}[{§}X€}"d[£XD!d£!dd¨°!D"[$[$X.&((@@@&@#((.d.(*§.(#@@&%%##£!€$[r{${d¨¨D€€X§?X§°{}€"X.#@#.}°!X"¨{??{d¨Dr§¨[}$}$!$"£D€!°DD[§{"
19 | [§°d}£*&@%.°D?[{°X§°dX$[X€{°§°$£°£[{?D§!°£?rD"dr£?"X°{¨%@@@.Xd£°D}"?[!{£X§°![X£$[°}[°"r¨D!d}$°D§€§£r}£°°!{{XDX¨}$r,@/X,%@@@%(/.¨.(/!,(,#@@/#((§$¨d°"!dDr{}}dD{£€
20 | }{D°¨D€D.##.{§r[€"§€"d€§?!!d§}¨"D?Xdd§}XX{X!$°?§"€€$§{"£r€°€€£!DD{,@/"°![.#@#(.!.((.,(.!.&%,((X?!£$§dd${}€!¨$!§[¨°d£(@#.!€¨§?D{?§$dd£?!£}¨?¨§d£°$X?"X?¨°?°£¨{£$¨
21 | X,#@#{!€![.#&*D°€"€¨rd§°§€X""°D}€Dr$§{X¨?"$¨"!}r£€"D{¨"[r°}€[D°°¨${!?$°dr!"€[£D¨$r[?$"°}["§X§!![§?XX}D°"¨{"r£"£{Xr,@/§[X€{?/@&%€}/(.,(."d.&&#({§€£d{[}"["d¨}}"!{
22 | $d}§"!§€r!¨[£}"¨![§DD$€}€${°§d$}!"}d[r€!£?D€¨$£X{{§dd$¨{!d}d$"X§}X/@,?£?.$d"(@&*[*(*,(.?£r,&#(r[}{£€{XD$£${${!?*#@*£¨}}d[,@@@,°??"[!d§$°}[$[D?r{§XDd£¨€¨[$€$Xr$°
23 | &.Dr€£!}¨,@@&?d$?$§[}{"€X££{d{°!X"X}§§¨¨DDrX¨{}€¨{£r§D!X¨£§°!$[X¨£?D§[€¨X?€[€$!£"§?€°£?r§Dr§§}D£[{?¨r$$"!d£}§£!"XD.,/(//*¨"[.&@%*,(/,(../@@@#([€!D}!??"[€¨$¨X€/@
24 | }d£€D{"{¨¨$°Dd£D?r"{d{°$d}$"!X¨£X°[dXX?!d°$d{}${$![!$?}¨XDd.*(%@@%*€XXd$?¨}D°*@@%.((*#&@@*(@%(d££"§$?}dDX£!}?,@/.!¨${$rdr(@@,r§}}¨°!°°{[X[r[$§!¨¨?D{rd§"§}§€X"£"
25 | XXD{§d}£°&@%°§$€$D$r£r{€X$€$}D[r}$§§°dr!["$¨r§[""§d,(&@&&%,X!d{"€¨"r$}!°D?"{r!£rr$"£r€}[$?£{"¨["Dr}£,(%@@&#*,.°£¨"}[§§"!§°}"D¨&@#!/(#&@,¨.@&((€"[X!{dD[d§!}r{/@,
26 | &@%*.}}!{!¨r}r€X¨[$€$DD!X[Xd£€X$§!r"°$[?€¨$",/#&&&%(*.rX£d¨€{$€X¨$}{rd"£¨¨!€D?%@%*(#@&.}.@@%*(r"!¨"°r?X!"§}{[/@,X{?d"¨r.(@@(¨$"!°£XD[}¨°!€r!§"D°X}§°¨¨r[{?{}"./%
27 | .¨§X"Xd/&@@*drX[{?dX[[{?r?"!£€?°D}°€€?[?}€,(&@%*.D[€$"d$§"rd{[d$$¨¨!?{£d[§£"£¨rX}X§€{.*(##%(*,.D"£§[{D!£!€d°£X£"X£°!}§!!{r$D$D%@%#&@&(.,#@@#*(¨¨d?D$}§°¨!?[{D,@(
28 | ?}°rD}§rr}§£§"?£$DX"{!DXX?d.*(%&@@@&#/,..£!{}¨°!rr"D$?[?D}£r¨Dr"dd}{?¨[XD?D$d,@@@&@%,[¨&@*@/*(?$DDD!£"{€$£"X!£*@&[[}€{D!..*.}[r$§DdD"¨££r$!°£$!!"r}$?,#@@&/.!{}d
29 | 


--------------------------------------------------------------------------------
/talks/reveal.js/Gruntfile.js:
--------------------------------------------------------------------------------
  1 | /* global module:false */
  2 | module.exports = function(grunt) {
  3 | 	var port = grunt.option('port') || 8000;
  4 | 	var root = grunt.option('root') || '.';
  5 | 
  6 | 	if (!Array.isArray(root)) root = [root];
  7 | 
  8 | 	// Project configuration
  9 | 	grunt.initConfig({
 10 | 		pkg: grunt.file.readJSON('package.json'),
 11 | 		meta: {
 12 | 			banner:
 13 | 				'/*!\n' +
 14 | 				' * reveal.js <%= pkg.version %> (<%= grunt.template.today("yyyy-mm-dd, HH:MM") %>)\n' +
 15 | 				' * http://revealjs.com\n' +
 16 | 				' * MIT licensed\n' +
 17 | 				' *\n' +
 18 | 				' * Copyright (C) 2017 Hakim El Hattab, http://hakim.se\n' +
 19 | 				' */'
 20 | 		},
 21 | 
 22 | 		qunit: {
 23 | 			files: [ 'test/*.html' ]
 24 | 		},
 25 | 
 26 | 		uglify: {
 27 | 			options: {
 28 | 				banner: '<%= meta.banner %>\n',
 29 | 				screwIE8: false
 30 | 			},
 31 | 			build: {
 32 | 				src: 'js/reveal.js',
 33 | 				dest: 'js/reveal.min.js'
 34 | 			}
 35 | 		},
 36 | 
 37 | 		sass: {
 38 | 			core: {
 39 | 				src: 'css/reveal.scss',
 40 | 				dest: 'css/reveal.css'
 41 | 			},
 42 | 			themes: {
 43 | 				expand: true,
 44 | 				cwd: 'css/theme/source',
 45 | 				src: ['*.sass', '*.scss'],
 46 | 				dest: 'css/theme',
 47 | 				ext: '.css'
 48 | 			}
 49 | 		},
 50 | 
 51 | 		autoprefixer: {
 52 | 			core: {
 53 | 				src: 'css/reveal.css'
 54 | 			}
 55 | 		},
 56 | 
 57 | 		cssmin: {
 58 | 			options: {
 59 | 				compatibility: 'ie9'
 60 | 			},
 61 | 			compress: {
 62 | 				src: 'css/reveal.css',
 63 | 				dest: 'css/reveal.min.css'
 64 | 			}
 65 | 		},
 66 | 
 67 | 		jshint: {
 68 | 			options: {
 69 | 				curly: false,
 70 | 				eqeqeq: true,
 71 | 				immed: true,
 72 | 				esnext: true,
 73 | 				latedef: 'nofunc',
 74 | 				newcap: true,
 75 | 				noarg: true,
 76 | 				sub: true,
 77 | 				undef: true,
 78 | 				eqnull: true,
 79 | 				browser: true,
 80 | 				expr: true,
 81 | 				globals: {
 82 | 					head: false,
 83 | 					module: false,
 84 | 					console: false,
 85 | 					unescape: false,
 86 | 					define: false,
 87 | 					exports: false
 88 | 				}
 89 | 			},
 90 | 			files: [ 'Gruntfile.js', 'js/reveal.js' ]
 91 | 		},
 92 | 
 93 | 		connect: {
 94 | 			server: {
 95 | 				options: {
 96 | 					port: port,
 97 | 					base: root,
 98 | 					livereload: true,
 99 | 					open: true,
100 | 					useAvailablePort: true
101 | 				}
102 | 			}
103 | 		},
104 | 
105 | 		zip: {
106 | 			bundle: {
107 | 				src: [
108 | 					'index.html',
109 | 					'css/**',
110 | 					'js/**',
111 | 					'lib/**',
112 | 					'images/**',
113 | 					'plugin/**',
114 | 					'**.md'
115 | 				],
116 | 				dest: 'reveal-js-presentation.zip'
117 | 			}
118 | 		},
119 | 
120 | 		watch: {
121 | 			js: {
122 | 				files: [ 'Gruntfile.js', 'js/reveal.js' ],
123 | 				tasks: 'js'
124 | 			},
125 | 			theme: {
126 | 				files: [
127 | 					'css/theme/source/*.sass',
128 | 					'css/theme/source/*.scss',
129 | 					'css/theme/template/*.sass',
130 | 					'css/theme/template/*.scss'
131 | 				],
132 | 				tasks: 'css-themes'
133 | 			},
134 | 			css: {
135 | 				files: [ 'css/reveal.scss' ],
136 | 				tasks: 'css-core'
137 | 			},
138 | 			html: {
139 | 				files: root.map(path => path + '/*.html')
140 | 			},
141 | 			markdown: {
142 | 				files: root.map(path => path + '/*.md')
143 | 			},
144 | 			options: {
145 | 				livereload: true
146 | 			}
147 | 		},
148 | 
149 | 		retire: {
150 | 			js: [ 'js/reveal.js', 'lib/js/*.js', 'plugin/**/*.js' ],
151 | 			node: [ '.' ]
152 | 		}
153 | 
154 | 	});
155 | 
156 | 	// Dependencies
157 | 	grunt.loadNpmTasks( 'grunt-contrib-connect' );
158 | 	grunt.loadNpmTasks( 'grunt-contrib-cssmin' );
159 | 	grunt.loadNpmTasks( 'grunt-contrib-jshint' );
160 | 	grunt.loadNpmTasks( 'grunt-contrib-qunit' );
161 | 	grunt.loadNpmTasks( 'grunt-contrib-uglify' );
162 | 	grunt.loadNpmTasks( 'grunt-contrib-watch' );
163 | 	grunt.loadNpmTasks( 'grunt-autoprefixer' );
164 | 	grunt.loadNpmTasks( 'grunt-retire' );
165 | 	grunt.loadNpmTasks( 'grunt-sass' );
166 | 	grunt.loadNpmTasks( 'grunt-zip' );
167 | 	
168 | 	// Default task
169 | 	grunt.registerTask( 'default', [ 'css', 'js' ] );
170 | 
171 | 	// JS task
172 | 	grunt.registerTask( 'js', [ 'jshint', 'uglify', 'qunit' ] );
173 | 
174 | 	// Theme CSS
175 | 	grunt.registerTask( 'css-themes', [ 'sass:themes' ] );
176 | 
177 | 	// Core framework CSS
178 | 	grunt.registerTask( 'css-core', [ 'sass:core', 'autoprefixer', 'cssmin' ] );
179 | 
180 | 	// All CSS
181 | 	grunt.registerTask( 'css', [ 'sass', 'autoprefixer', 'cssmin' ] );
182 | 
183 | 	// Package presentation to archive
184 | 	grunt.registerTask( 'package', [ 'default', 'zip' ] );
185 | 
186 | 	// Serve presentation locally
187 | 	grunt.registerTask( 'serve', [ 'connect', 'watch' ] );
188 | 
189 | 	// Run tests
190 | 	grunt.registerTask( 'test', [ 'jshint', 'qunit' ] );
191 | 
192 | };
193 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/user/resources/inputs/part1.input:
--------------------------------------------------------------------------------
 1 | %&.¨§D£}X{!?$€X{?°r}?{ddD¨"°X}£{[/&@%/.r{{€§![£$[d°rr!d!rd?[}€¨?£$?./%&@@%#/,.r{,%@%,°!"[£D£€$Xr}$°{?[€[§?"§€}"£¨[{{[{{°D}d€.&@@@&/(,}(@(,@,*/§¨[r§§$[?r¨£?}r!°*
 2 | r¨§§€?Xdr§.,/#&@@@%/,€r{¨}[DD?!°¨£r,&@%,r°§"¨X"?D¨€€"!?[{£€£D¨D[r[$!€!X°D£€"/@@@#{r!}}&#.*@.*/£$X€€}°![rdX€$[°!°}/@/[X°£}"°°X[}§X£}€r§€{""!$.(@@%*[$D¨{"D}X!D}X§
 3 | [}/@/£$§€[r§¨¨}r°$€}€$¨X"}*&@#.X£°rd!rX"X{d?¨$°$§§"[D./%&@@%(*.D{"X¨XDDr{"X{"DX$¨X€$.#@@@(.$£§€X$£¨¨£d£$°[}¨€$d°r§"rD€"[§!XX*@@*!¨r[d.@/€*@.*(¨?°!${d?°?£!¨§!r$°
 4 | #&@&%/.!r$X"r"£¨$[€°!€D°[$°X}¨X§"Dd}d}¨*#@@(.?¨"°[£r!D![X°$d¨[{X€"}X}XX?§?r/@(?!€!$$X,@/[*@..(¨X°${°}€!$£§§§dr!X°D$.&&.rr°r[?r["rDX!X£,%&&/r€"X°£§¨€$€[}$"dd}.,(
 5 | °?"¨°/@(.[§X[{¨{rD$*&@#.$°§?"r§??[${dD"*(%@@&#*.[§[!$r?§$r"[?{d"°r"[!rrdr"?°¨[?D¨°{§¨§r§§£(@@/¨r}¨D{°?drr!"€d!€°dD°€d?!}d}"..}[?$?d?!.&(.*@.?($!X§X[XXd"[r{°$[§$
 6 | }X€?r°£€"£§¨€D}[°r?"D?°D£!$X?r[¨D{€£Dd£€r![%&.r¨}!¨{}r{$r££"?$}}}d§d$?dr"?D{r}{Xr"€X€"*@%/@.{/?€[dD{X[D!{{}€€°°d[r!D}£.#@%.{[§d,/@&/X}$¨{rDDX§{§D§,#&@%*,{$r£[!d
 7 | $XdD§D[€.%@*€/&@%,?$€D€°€¨?$[X*#&@&(.${$rdD{!X}DX¨d$$"¨r€§Dd§$"{°°rrr$}d¨£$DX€d{$§??D?$"X°,@($!$!}?¨£}X$§"X{d}{{r!}}°D["r£?"§?X§"}[d§¨€(@@@.€*?r"¨}£°¨[[d°X$¨°$d
 8 | ¨°{¨{X{Dd°?d§¨§X!r¨"£D€r§¨"D!rdd?D¨Xr§[¨§,@&."{°?$!°X}D€dd$§!X}[!{Dd"€${§£dX[X["!§"$§{?€.&@.D.}§d§°}{XX€Xr$€°[}°"Xr¨}D¨r¨!.%@@.€}}d€$"d.,(&@&#*.?§["°DDd$X!!dD°¨
 9 | °§£${XD!°§£€.%#@@@@@@@%(*.§""[Dr£¨!¨D§[¨D}£D}}r¨"$€D$["X£?Dr{°£?£?$D¨?$£{€X[$$Dr§rX¨£"dD.&%°$¨¨d[?£€£r¨°X??!§¨r!r{{"X"}§"$!§["[£[!X[€X€°°,@&.{°}°r£¨[d$XX}X[}$£}
10 | }}€€{dd€?!£¨£XXrrd§}$X¨}€°§€!![?Dd°{"¨[*%@,X!§€{{$°!€d€{!XX§{!$£r}D$"D}D[€§€"[}d!Dd?$X€°{£&@@,£§d$$€¨r£!}?X€}{}X}"D£D!§X°¨$"r£?¨{[$€}r[?{drd¨?r"dX?€€§${£[§"{d§X
11 | ![}drdX""£!!!{d"§r{¨$?§{{[XX£°{¨d!X{°"$¨§¨[?!{"[}£§Dd[X$$X}$°¨!X£¨"¨¨dD!§€}D°}$r°D["d€[%@(£!!$}!{"{d$§$[?"?€"$?[$"X[°{$D!DX"°{d£?§?¨?§[?"$(&&&}!£€$§£{?$d°XD$r$£
12 | DrDrd£€¨!!$[$"[d!D¨!"[}*(@@@@@@@@#(,.°"?rd°X[!{X""d£?....%&.?Dr!X€¨!DX$£°[D$[$£{[?dDD!{"°d?d£"$°!£X$°?[?$[°}€£§[£"}£¨D?r§°§€{{X{€?Dr§°$D}!}°£¨"X}}¨D$°£d§¨§}?"?$
13 | €}£°D"r€[¨°!!X$°¨!d?"d}d[°D$§$r"¨§{£"!¨§}€?XX°"§?§D?"¨d¨{{r£§}r§°££.*(&@@@@@@@@@@@@@@%*!§X"}$[£[$r[,%@@@@%@#°€£"$[?££!¨d{X!°r°°°?}r[°°X€¨£d"€{$!"§[{"€§?¨d?€rr°!
14 | D€€?£§"[d?§{r§r".#@@&%(*,/(#####(##%&@@@@#.Xr?¨§r,&&,{£[X(&@*€¨}{r"Xr€}§€§$D"r{D"£¨£dXr€?{"D$$€¨?$d?°°d}€°!€§}!{€r}[¨€[§£X{£}§€"€?"°€§{"¨£D§?.*#####(,$}XD!dD§€D
15 | °§d?€$rr"¨""Xd[°"§"$£$""r(&%/#########(....£?!?"?¨r°$€d!$°!r€°(@@###//#######*,....,#(#@@%@&,$}./@/£$£XD°¨[#&.[£X£[X[}Xd?§XX[r}§}X"?"dr§"DDX£}r§€"§!d{[§?[!!€}}r
16 | "§[€?r§D€?§"*@&###%##(((((/***/((((((((((&&(@%*@&,X§[$?rD§§.&(dr¨!¨!{X[£§"€$$X?!$¨£!!°d§£X€{°[¨r[r"!!§?"°X"$r°rr¨°$?!Xd§$}!{$¨¨{§D¨{€d!D"*@%&@@&%####((((((((,$?
17 | r°![§"{}°"[[${dD?[?{£D[§?,&&##%&@@&#((((((((((,}r{{{?{rXd€.#@&####((//(##((((((((/**,/(/(#@&*&@&/}dr?d{}d}r"(&.?d{d£D?}¨r{£${d"$§X$rrd£°[¨d$r{?[d°D¨¨€£?XD}€!dD"
18 | ,.€€§?"°.(@@###(((((((((((/,.r€[{..,,**////#@%@@#"°dX§£"€$D{.&(€D€r§[}D€$£¨§"$Xrd¨!dd£¨¨!D$D°$§°}°§$$$¨£¨}¨!°r}rDr!?{D€[r$£¨Xr$§{§?£?d?§§¨*@%#((((#&@@%#((((///*
19 | $d!rD}${dr$X}!}$¨[}}d¨}d?{.#@%((((((((#&@@&(/******,./&@@@%#(((#((((((*..[{X.,*****/#(***,.,,*/.£°£D¨{€{¨¨!}"(&.?°[D€D"€}{!¨Dr$§}d€D{!{$"D¨}§§¨X°r£!d"?d?°d"£$€D
20 | /****/,¨X,(((((((((/.D"?.,********/##(###(,&%&%.,d£D¨?{X§§"X[.@/X€}}$?$DD[!{£¨r?DX[D§}X$DD°$$¨{€[d"°{£[¨rr{§§°}XX¨£"{§X£§$¨?}°{§£r£{$££X°r$.#@&(((((((//**/#&@&(
21 | D[¨Xd!dd[£€§{"D"$§°X¨{D£{£[}.#@%#((((/*******/#&@@&%%#(*/((//(////,¨?€,**********,(######/.&%&@@&,€{£€§r£§?$€£%%,"!¨$XD{£¨D£[D"¨?§¨D!?[}{[[£D{€"°r$[dXd[§"r£X§"[
22 | %%%@@@@@%(.//*(*,}§r,********,,**(#####(((.&%&@#,!$D},*¨°¨°}!€*@@@@@@@@@@@@%#(.°[¨}¨?d!X{°!XX}$}["§["{!¨[¨€!¨D!"}$§D"Xd[}[}??{??£!€¨[?€{!X?r§,#@@#(/**********/%
23 | ["DX§}§X¨{X$°dX{€r£{D[!€"€D£}*((#@%*******/(#%%%%%%%%%&@@@@%(/.¨€£,****,,**.,**/(#(#######(@/%@#.£DdD/@.D¨¨£D$"&@(%**,,**,..*#@@@&/.[§DD?£$Dr"D"£°r?"rXr[€€¨€[?!
24 | %%%%%%#((#&@@@%/****,.**,.**../#(/###,§*(#%&.%@%,¨°}r.&/.$¨{$!€/&#?,*,,****.D,*,*@&(#,¨D"DrX€X{"¨}¨{$¨§€°[[¨d¨?¨}${[£X{D¨¨°¨DX££r€§d"$°D[X$"r./(#@@@@(**/#%%%%%%
25 | d£d}[[{}?€d£{r!!"DXrdd!§°d!$X!.*/&@@@@(#%%%%%%%%%%%%#((((((#&@@@@##(/*****,£X*#((###,",##*@%°#@@(rX!¨d%%,£!£}€?"/@,,*****,*,,**,..#@@@/Drd"€r}}°€Xr"§d?X"$?"??€r
26 | #(((((((((((((#%&@@@&#(((,[£.(####(d§/##&@@@,.&@@.?rd{/@#!£§"DDX[%%.***,*,***.*,.D,/&@@&(§D£}€dXd}¨°£[dD!Dd€£€Xd""r§[€§$°$r€??§X¨??€rr"{?d{€D°d"¨(@*&@&%%%%%%%%%
27 | X!°XD$¨?!}d}!$?¨€$€°°"X}°{$X€rD€!*@%%@@&%%%%#(((((((((((((((((((((#%@@@@@#€,######&%###@@@@@%X,&@(r}D{"%@,£°$[!£€.@(,**.,,,**.**,€,*.*@@@(}D¨{?£§}D{}°d£{}"?}dX{
28 | ((((((((((((((((((((((#%&@@@###(,*#*/#@@@.?.&({.(&.d{?",%%["}$"Xd{*@***,.*,**.**,!,*.[.&@(,?}[§?[££§€{£rD{{£°[!$£?{}£{}"X?[[}§"{{°°DX€§X[X§££!D"!,@&%@@@@%#(((((
29 | £[d!D°[§°{€¨°€{}[$£°£r§D°"$X€$"![,@&@@%&&@#(((((((((((((((((((((((((((((/£(%##/,/(.,##@%*D¨D/@.[.@(€Dr""*@,§X§X!X{X%%,*,.,***.,**.,*.}r,#@#.°}[X?}¨d$¨§r[D{£d°}D
30 | ((((((((((((((((((((/,X?¨¨[(#/(/,€(@@@%d!dD}{%%"€*@,"{}r¨#&[{"£{"[?.@#**,.*,*,,**,.*."r€.%@*d?°d"!X¨d€D§{"{r°X?§€§£Xr}{ddX{£$r}¨d°{X$°€§€}"}[}rD.(@&##%#(%@#((((
31 | 


--------------------------------------------------------------------------------
/tutorial/dvc_overview.md:
--------------------------------------------------------------------------------
  1 | Data Version Control
  2 | ====================
  3 | 
  4 | 
  5 | Overview
  6 | ---------
  7 | - Each run is tracked and is reproducible
  8 | - Each run can be a part of a pipeline
  9 | - A complete pipeline is reproducible according to a chosen version 
 10 | (ie chosen commit)
 11 | - The cache mechanisme allows to reproduce sub-pipelines (only part with outdated dependencies)
 12 | - Several kind of storage can be configure to handle data file (AWS S3, Azure, 
 13 | Google Cloud Storage, SSH, HDFS)
 14 | 
 15 | 
 16 | - Need to be rigorous: inputs and outputs of each run must be explicitly 
 17 | specified to be handle as dependencies
 18 | - Commands can't be run through a Jupyter Notebook
 19 | 
 20 | 
 21 | 
 22 | How it works
 23 | ------------
 24 | 
 25 | **DVC** depends on **Git**. You need to have a Git repository and to manage yourself
 26 | your *code* versioning.
 27 | You must consider **DVC** as a git extension.
 28 | 
 29 | 1. As usual, create a git repository and version your files
 30 | 2. Activate DVC  (`dvc init`)
 31 | 3. Add data files and manage their versioning with DVC (`dvc add [my_file]`).
 32 |    At this step DVC put data files in its cache and it creates meta files to
 33 |    identify them.
 34 |    (see section **Add data file**)
 35 | 4. Commit meta files using Git to save a version of a pipeline
 36 | 
 37 | 
 38 | 
 39 | Small tutorial
 40 | ---------------
 41 | 
 42 | ### Install DVC
 43 | 
 44 |     pip install dvc
 45 |     
 46 | ### Setup a git environment
 47 |     
 48 |     mkdir test_dvc
 49 |     cd test_dvc
 50 |     
 51 |     git init 
 52 |     # Create a python script which takes a file as input, reads it, writes it in upper case
 53 |     mkdir code
 54 |     echo '#!/usr/bin/env python' > code/python_script.py
 55 |     echo -e "with open('./data/input_file.txt', 'r') as fd, open('./results/output_file.txt', 'w') \
 56 |     as wfd:\n    wfd.write(fd.read().upper())" >> code/python_script.py
 57 |     chmod +x ./code/python_script.py
 58 |     
 59 |     # Commit you script
 60 |     git add ./code/python_script.py
 61 |     git commit -m 'Initialize env'
 62 |     
 63 | ### Setup DVC environment
 64 | 
 65 |     # In ./test_dvc (top level directory)
 66 |     dvc init
 67 |     git commit -m 'Initialize dvc'
 68 |     
 69 | ### Add a data file
 70 | 
 71 |     # Create a data fiel for the exemple
 72 |     mkdir data
 73 |     echo "This is a text" > data/input_file.txt
 74 |     
 75 |     dvc add data/input_file.txt
 76 |     
 77 | Here it is possible to check meta file is created running `git status data`, real file
 78 | is ignored by git `cat ./data/.gitignore` and cache entry is created `ls -la .dvc/cache/`
 79 | 
 80 |     # Commit meta files in git
 81 |     git add .
 82 |     git commit -m "Add input data file"
 83 | 
 84 | ### Run a step 
 85 | 
 86 |     dvc run -d [input file] -o [output file] [cmd]
 87 |    
 88 |     mkdir results
 89 |     dvc run -d ./data/input_file.txt -o ./results/output_file.txt ./code/python_script.py
 90 |     
 91 | Check output file and meta file are generated *./results/output_file.txt*, *./output_file.txt.dvc*
 92 | 
 93 | 
 94 | ### Run a pipeline
 95 | A pipeline is composed of several steps, so we need to create at least one more step here.
 96 | 
 97 |     # Run an other step and create a pipeline 
 98 |     MY_CMD="cat ./results/output_file.txt | wc -c > ./results/nb_letters.txt" 
 99 |     dvc run -d ./results/output_file.txt -o ./results/nb_letters.txt -f MyPipeline.dvc $MY_CMD
100 |     
101 | See the result
102 |     
103 |     cat ./results/nb_letters.txt
104 |     
105 | A this step the file *./MyPipeline.dvc* represent the pipeline for the current version of files and data
106 | 
107 |     # Reproduce the pipeline
108 |     dvc repro MyPipeline.dvc
109 |     
110 | Nothing happened because nothing has changed try `dvc repro MyPipeline.dvc -v`
111 | 
112 |     # Force the pipeline run
113 |     dvc repro MyPipeline.dvc -v -f
114 |     
115 |     git add .
116 |     git commit -m 'pipeline creation'
117 |     
118 | ### Modify the input and re-run
119 | 
120 |     echo "new input" >> data/input_file.txt
121 |     
122 |     dvc repro MyPipeline.dvc -v
123 |     
124 |     cat ./results/nb_letters.txt
125 |     
126 |     git commit -am 'New pipeline version'
127 |    
128 |    
129 | ### See pipelines steps
130 |     
131 |     dvc pipeline show MyPipeline.dvc
132 |  
133 | Need to be rigorous
134 | -------------------
135 | 
136 | - inputs and outputs of each run must be explicitly 
137 | specified to be handle as dependencies
138 | - when you modify a data file you need to run the associated step to be able 
139 | to version it (or reproduce the whole pipeline using the cache mechanism)
140 | 
141 | Various
142 | -------
143 | 
144 | See [Data Version Control documentation](https://github.com/iterative/dvc)
145 | 
146 | See [Data Version Control tutorial](https://blog.dataversioncontrol.com/data-version-control-tutorial-9146715eda46)
147 | 


--------------------------------------------------------------------------------
/resources/dvc_playground/user/resources/steps/decrypt.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import json
  3 | import logging
  4 | from argparse import ArgumentParser
  5 | from random import randint
  6 | from typing import List
  7 | 
  8 | noise_chars = ['!', '?', '§', 'X', 'r', 'd', 'D', '¨', '$', '£', '"', "€", '°', '}', '{', '[']
  9 | 
 10 | 
 11 | def add_noise(content_lines: List[str]):
 12 |     replaced_content = []
 13 |     max_line_lenght = len(max(content_lines, key=len))
 14 |     for line in content_lines:
 15 |         line = line.replace('\n', '')
 16 |         if len(line) < max_line_lenght:
 17 |             line = line + (max_line_lenght - len(line)) * ' '
 18 |         replaced_content.append(''.join([noise_chars[randint(0, len(noise_chars) - 1)] if char == ' ' else char
 19 |                                          for char in line]))
 20 |     return [f'{line}\n' for line in replaced_content]
 21 | 
 22 | 
 23 | def remove_noise(content_lines: List[str]):
 24 |     return [''.join(' ' if char in noise_chars else char for char in line) for line in content_lines]
 25 | 
 26 | 
 27 | def shift_rows(content_lines: List[str], shift: int):
 28 |     nb_lines = len(content_lines)
 29 |     return [content_lines[(i + shift) % nb_lines] for i in range(0, nb_lines)]
 30 | 
 31 | 
 32 | def unshift_rows(content_lines: List[str], shift: int):
 33 |     nb_lines = len(content_lines)
 34 |     return [content_lines[(i - shift) % nb_lines] for i in range(0, nb_lines)]
 35 | 
 36 | 
 37 | def shift_cols(content_lines: List[str], even_shift: int, odd_shift: int):
 38 |     shifted_content = []
 39 |     for idx, line in enumerate(content_lines):
 40 |         line = line.replace('\n', '')
 41 |         new_line = ''
 42 |         if idx % 2 == 0:
 43 |             for char_id in range(0, len(line)):
 44 |                 new_line += line[(char_id + even_shift) % len(line)]
 45 |         else:
 46 |             for char_id in range(0, len(line)):
 47 |                 new_line += line[(char_id + odd_shift) % len(line)]
 48 |         shifted_content.append(new_line)
 49 | 
 50 |     return [f'{line}\n' for line in shifted_content]
 51 | 
 52 | 
 53 | def unshift_cols(content_lines: List[str], even_shift: int, odd_shift: int):
 54 |     unshifted_content = []
 55 |     for idx, line in enumerate(content_lines):
 56 |         line = line.replace('\n', '')
 57 |         new_line = ''
 58 |         if idx % 2 == 0:
 59 |             for char_id in range(0, len(line)):
 60 |                 new_line += line[(char_id - even_shift) % len(line)]
 61 |         else:
 62 |             for char_id in range(0, len(line)):
 63 |                 new_line += line[(char_id - odd_shift) % len(line)]
 64 |         unshifted_content.append(new_line)
 65 | 
 66 |     return [f'{line}\n' for line in unshifted_content]
 67 | 
 68 | 
 69 | def encrypt(encrypted_file: str, output_file: str, row_shift: int, col_even_shift: int, col_odd_shift: int):
 70 |     with open(encrypted_file, 'r') as fd:
 71 |         content_lines = fd.readlines()
 72 | 
 73 |     noisy_content = add_noise(content_lines)
 74 |     row_shifted = shift_rows(noisy_content, shift=row_shift)
 75 |     col_shifted = shift_cols(row_shifted, even_shift=col_even_shift, odd_shift=col_odd_shift)
 76 | 
 77 |     with open(output_file, 'w') as fd:
 78 |         fd.writelines(col_shifted)
 79 | 
 80 | 
 81 | def decrypt(encrypted_file: str, output_file: str, row_shift: int, col_even_shift: int, col_odd_shift: int):
 82 |     with open(encrypted_file, 'r') as fd:
 83 |         content_lines = fd.readlines()
 84 | 
 85 |     col_unshifted = unshift_cols(content_lines, even_shift=col_even_shift, odd_shift=col_odd_shift)
 86 |     row_unshifted = unshift_rows(col_unshifted, shift=row_shift)
 87 |     decrypted_content = remove_noise(row_unshifted)
 88 | 
 89 |     with open(output_file, 'w') as fd:
 90 |         fd.writelines(decrypted_content)
 91 | 
 92 | 
 93 | if __name__ == '__main__':
 94 |     parser = ArgumentParser(description='Decrypt file!')
 95 |     parser.add_argument('-i', '--input-file', required=True)
 96 |     parser.add_argument('-o', '--output-file', required=True)
 97 |     parser.add_argument('-p', '--param-file', required=True)
 98 |     parser.add_argument('-e', '--encrypt', action='store_true')
 99 | 
100 |     args = parser.parse_args()
101 | try:
102 |     with open(args.param_file, 'r') as fd:
103 |         params = json.load(fd)
104 | 
105 |     row_shift = params['row_shift']
106 |     col_even_shift = params['col_even_shift']
107 |     col_odd_shift = params['col_odd_shift']
108 | 
109 |     if args.encrypt:
110 |         encrypt(args.input_file, args.output_file, row_shift, col_even_shift, col_odd_shift)
111 |     else:
112 |         decrypt(args.input_file, args.output_file, row_shift, col_even_shift, col_odd_shift)
113 | except KeyError as e:
114 |     logging.error(f'Parameter error: {e}')
115 | except json.JSONDecodeError as e:
116 |     logging.error(f"Parameter file wrongly formated {e}")
117 | except IOError as e:
118 |     logging.error(f'IOError: {e}')
119 | 
120 | 


--------------------------------------------------------------------------------
/talks/reveal.js/plugin/markdown/example.html:
--------------------------------------------------------------------------------
  1 | <!doctype html>
  2 | <html lang="en">
  3 | 
  4 | 	<head>
  5 | 		<meta charset="utf-8">
  6 | 
  7 | 		<title>reveal.js - Markdown Demo</title>
  8 | 
  9 | 		<link rel="stylesheet" href="../../css/reveal.css">
 10 | 		<link rel="stylesheet" href="../../css/theme/white.css" id="theme">
 11 | 
 12 |         <link rel="stylesheet" href="../../lib/css/zenburn.css">
 13 | 	</head>
 14 | 
 15 | 	<body>
 16 | 
 17 | 		<div class="reveal">
 18 | 
 19 | 			<div class="slides">
 20 | 
 21 |                 <!-- Use external markdown resource, separate slides by three newlines; vertical slides by two newlines -->
 22 |                 <section data-markdown="example.md" data-separator="^\n\n\n" data-separator-vertical="^\n\n"></section>
 23 | 
 24 |                 <!-- Slides are separated by three dashes (quick 'n dirty regular expression) -->
 25 |                 <section data-markdown data-separator="---">
 26 |                     <script type="text/template">
 27 |                         ## Demo 1
 28 |                         Slide 1
 29 |                         ---
 30 |                         ## Demo 1
 31 |                         Slide 2
 32 |                         ---
 33 |                         ## Demo 1
 34 |                         Slide 3
 35 |                     </script>
 36 |                 </section>
 37 | 
 38 |                 <!-- Slides are separated by newline + three dashes + newline, vertical slides identical but two dashes -->
 39 |                 <section data-markdown data-separator="^\n---\n$" data-separator-vertical="^\n--\n$">
 40 |                     <script type="text/template">
 41 |                         ## Demo 2
 42 |                         Slide 1.1
 43 | 
 44 |                         --
 45 | 
 46 |                         ## Demo 2
 47 |                         Slide 1.2
 48 | 
 49 |                         ---
 50 | 
 51 |                         ## Demo 2
 52 |                         Slide 2
 53 |                     </script>
 54 |                 </section>
 55 | 
 56 |                 <!-- No "extra" slides, since there are no separators defined (so they'll become horizontal rulers) -->
 57 |                 <section data-markdown>
 58 |                     <script type="text/template">
 59 |                         A
 60 | 
 61 |                         ---
 62 | 
 63 |                         B
 64 | 
 65 |                         ---
 66 | 
 67 |                         C
 68 |                     </script>
 69 |                 </section>
 70 | 
 71 |                 <!-- Slide attributes -->
 72 |                 <section data-markdown>
 73 |                     <script type="text/template">
 74 |                         <!-- .slide: data-background="#000000" -->
 75 |                         ## Slide attributes
 76 |                     </script>
 77 |                 </section>
 78 | 
 79 |                 <!-- Element attributes -->
 80 |                 <section data-markdown>
 81 |                     <script type="text/template">
 82 |                         ## Element attributes
 83 |                         - Item 1 <!-- .element: class="fragment" data-fragment-index="2" -->
 84 |                         - Item 2 <!-- .element: class="fragment" data-fragment-index="1" -->
 85 |                     </script>
 86 |                 </section>
 87 | 
 88 |                 <!-- Code -->
 89 |                 <section data-markdown>
 90 |                     <script type="text/template">
 91 |                         ```php
 92 |                         public function foo()
 93 |                         {
 94 |                             $foo = array(
 95 |                                 'bar' => 'bar'
 96 |                             )
 97 |                         }
 98 |                         ```
 99 |                     </script>
100 |                 </section>
101 | 
102 |                 <!-- Images -->
103 |                 <section data-markdown>
104 |                     <script type="text/template">
105 |                         ![Sample image](https://s3.amazonaws.com/static.slid.es/logo/v2/slides-symbol-512x512.png)
106 |                     </script>
107 |                 </section>
108 | 
109 |             </div>
110 | 		</div>
111 | 
112 | 		<script src="../../lib/js/head.min.js"></script>
113 | 		<script src="../../js/reveal.js"></script>
114 | 
115 | 		<script>
116 | 
117 | 			Reveal.initialize({
118 | 				controls: true,
119 | 				progress: true,
120 | 				history: true,
121 | 				center: true,
122 | 
123 | 				// Optional libraries used to extend on reveal.js
124 | 				dependencies: [
125 | 					{ src: '../../lib/js/classList.js', condition: function() { return !document.body.classList; } },
126 | 					{ src: 'marked.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
127 |                     { src: 'markdown.js', condition: function() { return !!document.querySelector( '[data-markdown]' ); } },
128 |                     { src: '../highlight/highlight.js', async: true, callback: function() { hljs.initHighlightingOnLoad(); } },
129 | 					{ src: '../notes/notes.js' }
130 | 				]
131 | 			});
132 | 
133 | 		</script>
134 | 
135 | 	</body>
136 | </html>
137 | 


--------------------------------------------------------------------------------
/resources/05_Tune_hyperparameters_with_crossvalidation.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "metadata": {},
  6 |    "source": [
  7 |     "# Tune hyperparameters using cross-validation\n",
  8 |     "\n",
  9 |     "In this notebook, we will tune hyper-parameters of a simple text classification pipeline. \n",
 10 |     "\n",
 11 |     "Starting from the raw text data, we will encode it using bag of words (*hyperparameter 1*: number of words in the vocabulary), and then train a Logisitic Regression classifier (*hyperparameter 2*: regularization parameter). We will evaluate performance using (repeated) cross-validation.\n",
 12 |     "\n",
 13 |     "Metrics from each of the run will be stored with **MLFlow tracking API**. That's the output we want to version with **DVC**."
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": null,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "# Parameters\n",
 23 |     "\"\"\"\n",
 24 |     ":param str input_csv_file: Path to input file\n",
 25 |     ":param List[float] C_list: List of inverse of regularisation coefficient values\n",
 26 |     ":param List[int] max_features_list: List the maximum number of features\n",
 27 |     ":param str mlflow_output: MLflow metrics directory\n",
 28 |     ":dvc-in input_csv_file: ./poc/data/data_train.csv\n",
 29 |     ":dvc-out mlflow_output : ./poc/data/cross_valid_metrics\n",
 30 |     ":dvc-extra: --C-list .1 1.0 --max-features-list 100 500 1000\n",
 31 |     "\"\"\"\n",
 32 |     "# Value of parameters for this Jupyter Notebook only\n",
 33 |     "# the notebook is in ./poc/pipeline/notebooks\n",
 34 |     "input_csv_file = \"../../data/data_train.csv\"\n",
 35 |     "C_list = [.1, 1.0]\n",
 36 |     "max_features_list = [100, 500, 1000]\n",
 37 |     "mlflow_output='../../data/cross_valid_metrics'"
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": null,
 43 |    "metadata": {},
 44 |    "outputs": [],
 45 |    "source": [
 46 |     "import pandas as pd\n",
 47 |     "from sklearn.feature_extraction.text import CountVectorizer\n",
 48 |     "from sklearn.linear_model import LogisticRegression\n",
 49 |     "from sklearn.pipeline import Pipeline\n",
 50 |     "from sklearn.model_selection import cross_validate, RepeatedStratifiedKFold\n",
 51 |     "import mlflow\n",
 52 |     "from itertools import product"
 53 |    ]
 54 |   },
 55 |   {
 56 |    "cell_type": "code",
 57 |    "execution_count": null,
 58 |    "metadata": {},
 59 |    "outputs": [],
 60 |    "source": [
 61 |     "df = pd.read_csv(input_csv_file).dropna()"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": null,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "def log_results(d):\n",
 71 |     "    for metrics, values in d.items():\n",
 72 |     "        mlflow.log_metric(metrics + '_avg', values.mean())\n",
 73 |     "        mlflow.log_metric(metrics + '_std', values.std())"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": null,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "mlflow.set_tracking_uri(mlflow_output)"
 83 |    ]
 84 |   },
 85 |   {
 86 |    "cell_type": "code",
 87 |    "execution_count": null,
 88 |    "metadata": {},
 89 |    "outputs": [],
 90 |    "source": [
 91 |     "for C, max_features in product(C_list, max_features_list):\n",
 92 |     "    with mlflow.start_run():\n",
 93 |     "        mlflow.log_param('C', C)\n",
 94 |     "        mlflow.log_param('max_features', max_features)\n",
 95 |     "        classifier = LogisticRegression(C=C,\n",
 96 |     "                                        solver='lbfgs',\n",
 97 |     "                                        multi_class='multinomial')\n",
 98 |     "        vectorizer = CountVectorizer(max_features=max_features,\n",
 99 |     "                                     stop_words='english')\n",
100 |     "        pipeline = Pipeline([('vectorizer', vectorizer),\n",
101 |     "                         (classifier.__repr__().split('(')[0], classifier)])\n",
102 |     "        d = cross_validate(pipeline,\n",
103 |     "                           X=df['data'],\n",
104 |     "                           y=df['target'],\n",
105 |     "                           scoring=['accuracy', 'precision_macro', 'f1_micro', 'f1_macro'],\n",
106 |     "                           cv=RepeatedStratifiedKFold(n_splits=3, n_repeats=1, random_state=0))\n",
107 |     "        log_results(d)\n"
108 |    ]
109 |   },
110 |   {
111 |    "cell_type": "code",
112 |    "execution_count": null,
113 |    "metadata": {},
114 |    "outputs": [],
115 |    "source": []
116 |   }
117 |  ],
118 |  "metadata": {
119 |   "kernelspec": {
120 |    "display_name": "Python [conda env:poc_ml_versioning]",
121 |    "language": "python",
122 |    "name": "conda-env-poc_ml_versioning-py"
123 |   },
124 |   "language_info": {
125 |    "codemirror_mode": {
126 |     "name": "ipython",
127 |     "version": 3
128 |    },
129 |    "file_extension": ".py",
130 |    "mimetype": "text/x-python",
131 |    "name": "python",
132 |    "nbconvert_exporter": "python",
133 |    "pygments_lexer": "ipython3",
134 |    "version": "3.7.0"
135 |   }
136 |  },
137 |  "nbformat": 4,
138 |  "nbformat_minor": 2
139 | }
140 | 


--------------------------------------------------------------------------------