├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs.material
    ├── about.md
    ├── artifacts.md
    ├── css
    │   ├── fonts.css
    │   ├── jupyter-notebook.css
    │   ├── material.css
    │   ├── mkdocstrings.css
    │   ├── style.css
    │   └── tables_style.css
    ├── datasets
    │   ├── curation_and_metadata.md
    │   ├── datasets.csv
    │   ├── datasets.md
    │   ├── datasets_splits.md
    │   ├── guides
    │   │   ├── index.md
    │   │   ├── tutorial_load_datasets.ipynb
    │   │   └── tutorial_load_datasets.md
    │   ├── import.md
    │   ├── index.md
    │   ├── install.md
    │   ├── install
    │   │   ├── index.md
    │   │   ├── mirage19.md
    │   │   ├── mirage22.md
    │   │   ├── ucdavis-icdm19.md
    │   │   └── utmobilenet21.md
    │   ├── metadata.md
    │   ├── metadata.md.DEPRECATED
    │   ├── samples_count
    │   │   ├── index.md
    │   │   ├── mirage19.md
    │   │   ├── mirage22.md
    │   │   ├── ucdavis-icdm19.md
    │   │   └── utmobilenet21.md
    │   ├── schemas
    │   │   ├── index.md
    │   │   ├── mirage19.md
    │   │   ├── mirage22.md
    │   │   ├── ucdavis-icdm19.md
    │   │   └── utmobilenet21.md
    │   └── tutorial_load_parquet.ipynb
    ├── figs
    │   ├── aim_log1.png
    │   ├── aim_log2.png
    │   ├── aim_log3.png
    │   ├── aim_run1.png
    │   ├── aim_run2.png
    │   ├── aim_run3.png
    │   ├── dataset_properties_mirage19.png
    │   ├── dataset_properties_mirage22.png
    │   ├── dataset_properties_ucdavis-icdm19.png
    │   └── dataset_properties_utmobilenet21.png
    ├── index.md
    ├── index.md.DEPRECATED
    ├── install.md
    ├── modeling
    │   ├── aim_repos
    │   │   ├── aim_webui.md
    │   │   ├── aimrepo_subcmd.md
    │   │   └── index.md
    │   ├── aim_repositories_content.md
    │   ├── campaigns.md
    │   ├── exploring_artifacts.md
    │   ├── figs
    │   │   ├── aim_home-page.png
    │   │   ├── aim_log1.png
    │   │   ├── aim_log2.png
    │   │   ├── aim_log3.png
    │   │   ├── aim_run1.png
    │   │   ├── aim_run2.png
    │   │   └── aim_run3.png
    │   ├── index.md
    │   ├── overview.md
    │   └── runs.md
    ├── overrides
    │   ├── arrow-right-solid.svg
    │   ├── github-mark
    │   │   └── github-mark.svg
    │   ├── home.html
    │   ├── home.js
    │   ├── main.html
    │   ├── main.html.DEPRECATED
    │   ├── tcbench.svg
    │   └── tcbench_logo.svg
    ├── papers
    │   ├── imc23
    │   │   ├── artifacts.md
    │   │   ├── campaigns.md
    │   │   ├── index.md
    │   │   ├── ml_artifacts.md
    │   │   ├── notebooks.md
    │   │   ├── notebooks
    │   │   │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb
    │   │   │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human.md
    │   │   │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human_files
    │   │   │   │   └── figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png
    │   │   │   ├── figure11_dropout_impact_supervised_setting.ipynb
    │   │   │   ├── figure11_dropout_impact_supervised_setting.md
    │   │   │   ├── figure11_dropout_impact_supervised_setting_files
    │   │   │   │   └── figure11_dropout_impact_supervised_setting_15_1.png
    │   │   │   ├── figure1_flowpic_example.ipynb
    │   │   │   ├── figure1_flowpic_example.md
    │   │   │   ├── figure1_flowpic_example_files
    │   │   │   │   └── figure1_flowpic_example_8_0.png
    │   │   │   ├── figure3_confusion_matrix_supervised_setting.ipynb
    │   │   │   ├── figure3_confusion_matrix_supervised_setting.md
    │   │   │   ├── figure3_confusion_matrix_supervised_setting_files
    │   │   │   │   └── figure3_confusion_matrix_supervised_setting_5_0.png
    │   │   │   ├── figure4_ucdavis_per_class_average_flowpic.ipynb
    │   │   │   ├── figure4_ucdavis_per_class_average_flowpic.md
    │   │   │   ├── figure4_ucdavis_per_class_average_flowpic_files
    │   │   │   │   └── figure4_ucdavis_per_class_average_flowpic_12_1.png
    │   │   │   ├── figure5_ucdavis_augmentations_comparison.ipynb
    │   │   │   ├── figure5_ucdavis_augmentations_comparison.md
    │   │   │   ├── figure5_ucdavis_augmentations_comparison_files
    │   │   │   │   └── figure5_ucdavis_augmentations_comparison_6_1.png
    │   │   │   ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb
    │   │   │   ├── figure6_augmentations_comparison_across_datasets_critical_distance.md
    │   │   │   ├── figure6_augmentations_comparison_across_datasets_critical_distance_files
    │   │   │   │   └── figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png
    │   │   │   ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb
    │   │   │   ├── figure7_augmentations_comparison_across_datasets_average_rank.md
    │   │   │   ├── figure7_augmentations_comparison_across_datasets_average_rank_files
    │   │   │   │   └── figure7_augmentations_comparison_across_datasets_average_rank_8_0.png
    │   │   │   ├── figure8_ucdavis_kde_on_pkts_size.ipynb
    │   │   │   ├── figure8_ucdavis_kde_on_pkts_size.md
    │   │   │   ├── figure8_ucdavis_kde_on_pkts_size_files
    │   │   │   │   └── figure8_ucdavis_kde_on_pkts_size_10_0.png
    │   │   │   ├── miscellaneous_stats.ipynb
    │   │   │   ├── miscellaneous_stats.md
    │   │   │   ├── table10_ucdavis-icdm19_tukey.ipynb
    │   │   │   ├── table10_ucdavis-icdm19_tukey.md
    │   │   │   ├── table2_datasets_properties.ipynb
    │   │   │   ├── table2_datasets_properties.md
    │   │   │   ├── table3_xgboost_baseline.ipynb
    │   │   │   ├── table3_xgboost_baseline.md
    │   │   │   ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb
    │   │   │   ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.md
    │   │   │   ├── table5_simclr_dropout_and_projectionlayer.ipynb
    │   │   │   ├── table5_simclr_dropout_and_projectionlayer.md
    │   │   │   ├── table6_simclr_other_augmentation_pairs.ipynb
    │   │   │   ├── table6_simclr_other_augmentation_pairs.md
    │   │   │   ├── table7_larger_trainset.ipynb
    │   │   │   ├── table7_larger_trainset.md
    │   │   │   ├── table8_augmentation-at-loading_on_other_datasets.ipynb
    │   │   │   ├── table8_augmentation-at-loading_on_other_datasets.md
    │   │   │   ├── table9_icdm_finetuning_per_class_metrics_on_human.ipynb
    │   │   │   └── table9_icdm_finetuning_per_class_metrics_on_human.md
    │   │   └── pytest.md
    │   └── index.md
    ├── quick_tour.md
    └── tcbench
    │   ├── api
    │       ├── overview.md
    │       ├── tcbench_cli_clickutils.md
    │       ├── tcbench_cli_command_aimrepo.md
    │       ├── tcbench_cli_command_campaign.md
    │       ├── tcbench_cli_command_datasets.md
    │       ├── tcbench_cli_command_singlerun.md
    │       ├── tcbench_cli_richutils.md
    │       ├── tcbench_libtcdatasets.md
    │       ├── tcbench_libtcdatasets_datasets_utils.md
    │       ├── tcbench_libtcdatasets_mirage19_json_to_parquet.md
    │       ├── tcbench_libtcdatasets_mirage22_json_to_parquet.md
    │       ├── tcbench_libtcdatasets_tcbench_mirage19_generate_splits.md
    │       ├── tcbench_libtcdatasets_tcbench_mirage22_generate_splits.md
    │       ├── tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits.md
    │       ├── tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits.md
    │       ├── tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet.md
    │       ├── tcbench_libtcdatasets_utmobilenet21_csv_to_parquet.md
    │       ├── tcbench_modeling_aimutils.md
    │       ├── tcbench_modeling_augmentation.md
    │       ├── tcbench_modeling_backbone.md
    │       ├── tcbench_modeling_dataprep.md
    │       ├── tcbench_modeling_losses.md
    │       ├── tcbench_modeling_methods.md
    │       ├── tcbench_modeling_run_augmentations_at_loading.md
    │       ├── tcbench_modeling_run_augmentations_at_loading_xgboost.md
    │       ├── tcbench_modeling_run_campaign_augmentations_at_loading.md
    │       ├── tcbench_modeling_run_campaign_augmentations_at_loading_xgboost.md
    │       ├── tcbench_modeling_run_campaign_contrastive_learning_and_finetune.md
    │       ├── tcbench_modeling_run_contrastive_learning_and_finetune.md
    │       └── tcbench_modeling_utils.md
    │   ├── cli_intro.md
    │   ├── index.md
    │   ├── install.md
    │   ├── internals.md
    │   └── overview.md
├── docs
    ├── .DS_Store
    ├── 404.html
    ├── about
    │   └── index.html
    ├── arrow-right-solid.svg
    ├── artifacts
    │   └── index.html
    ├── assets
    │   ├── _mkdocstrings.css
    │   ├── images
    │   │   └── favicon.png
    │   ├── javascripts
    │   │   ├── bundle.83f73b43.min.js
    │   │   ├── bundle.83f73b43.min.js.map
    │   │   ├── glightbox.min.js
    │   │   ├── lunr
    │   │   │   ├── min
    │   │   │   │   ├── lunr.ar.min.js
    │   │   │   │   ├── lunr.da.min.js
    │   │   │   │   ├── lunr.de.min.js
    │   │   │   │   ├── lunr.du.min.js
    │   │   │   │   ├── lunr.el.min.js
    │   │   │   │   ├── lunr.es.min.js
    │   │   │   │   ├── lunr.fi.min.js
    │   │   │   │   ├── lunr.fr.min.js
    │   │   │   │   ├── lunr.he.min.js
    │   │   │   │   ├── lunr.hi.min.js
    │   │   │   │   ├── lunr.hu.min.js
    │   │   │   │   ├── lunr.hy.min.js
    │   │   │   │   ├── lunr.it.min.js
    │   │   │   │   ├── lunr.ja.min.js
    │   │   │   │   ├── lunr.jp.min.js
    │   │   │   │   ├── lunr.kn.min.js
    │   │   │   │   ├── lunr.ko.min.js
    │   │   │   │   ├── lunr.multi.min.js
    │   │   │   │   ├── lunr.nl.min.js
    │   │   │   │   ├── lunr.no.min.js
    │   │   │   │   ├── lunr.pt.min.js
    │   │   │   │   ├── lunr.ro.min.js
    │   │   │   │   ├── lunr.ru.min.js
    │   │   │   │   ├── lunr.sa.min.js
    │   │   │   │   ├── lunr.stemmer.support.min.js
    │   │   │   │   ├── lunr.sv.min.js
    │   │   │   │   ├── lunr.ta.min.js
    │   │   │   │   ├── lunr.te.min.js
    │   │   │   │   ├── lunr.th.min.js
    │   │   │   │   ├── lunr.tr.min.js
    │   │   │   │   ├── lunr.vi.min.js
    │   │   │   │   └── lunr.zh.min.js
    │   │   │   ├── tinyseg.js
    │   │   │   └── wordcut.js
    │   │   └── workers
    │   │   │   ├── search.6ce7567c.min.js
    │   │   │   └── search.6ce7567c.min.js.map
    │   └── stylesheets
    │   │   ├── glightbox.min.css
    │   │   ├── main.0253249f.min.css
    │   │   ├── main.0253249f.min.css.map
    │   │   ├── palette.06af60db.min.css
    │   │   └── palette.06af60db.min.css.map
    ├── css
    │   ├── fonts.css
    │   ├── jupyter-notebook.css
    │   ├── material.css
    │   ├── mkdocstrings.css
    │   ├── style.css
    │   └── tables_style.css
    ├── datasets
    │   ├── curation_and_metadata
    │   │   └── index.html
    │   ├── datasets.csv
    │   ├── datasets
    │   │   └── index.html
    │   ├── datasets_splits
    │   │   └── index.html
    │   ├── guides
    │   │   ├── index.html
    │   │   ├── tutorial_load_datasets.ipynb
    │   │   └── tutorial_load_datasets
    │   │   │   └── index.html
    │   ├── import
    │   │   └── index.html
    │   ├── index.html
    │   ├── install
    │   │   ├── index.html
    │   │   ├── mirage19
    │   │   │   └── index.html
    │   │   ├── mirage22
    │   │   │   └── index.html
    │   │   ├── ucdavis-icdm19
    │   │   │   └── index.html
    │   │   └── utmobilenet21
    │   │   │   └── index.html
    │   ├── metadata.md.DEPRECATED
    │   ├── metadata
    │   │   └── index.html
    │   ├── samples_count
    │   │   ├── index.html
    │   │   ├── mirage19
    │   │   │   └── index.html
    │   │   ├── mirage22
    │   │   │   └── index.html
    │   │   ├── ucdavis-icdm19
    │   │   │   └── index.html
    │   │   └── utmobilenet21
    │   │   │   └── index.html
    │   ├── schemas
    │   │   ├── index.html
    │   │   ├── mirage19
    │   │   │   └── index.html
    │   │   ├── mirage22
    │   │   │   └── index.html
    │   │   ├── ucdavis-icdm19
    │   │   │   └── index.html
    │   │   └── utmobilenet21
    │   │   │   └── index.html
    │   └── tutorial_load_parquet.ipynb
    ├── figs
    │   ├── aim_log1.png
    │   ├── aim_log2.png
    │   ├── aim_log3.png
    │   ├── aim_run1.png
    │   ├── aim_run2.png
    │   ├── aim_run3.png
    │   ├── dataset_properties_mirage19.png
    │   ├── dataset_properties_mirage22.png
    │   ├── dataset_properties_ucdavis-icdm19.png
    │   └── dataset_properties_utmobilenet21.png
    ├── github-mark
    │   ├── github-mark-white.png
    │   ├── github-mark-white.svg
    │   ├── github-mark.png
    │   └── github-mark.svg
    ├── home.js
    ├── index.html
    ├── index.md.DEPRECATED
    ├── install
    │   └── index.html
    ├── main.html.DEPRECATED
    ├── modeling
    │   ├── aim_repos
    │   │   ├── aim_webui
    │   │   │   └── index.html
    │   │   ├── aimrepo_subcmd
    │   │   │   └── index.html
    │   │   └── index.html
    │   ├── aim_repositories_content
    │   │   └── index.html
    │   ├── campaigns
    │   │   └── index.html
    │   ├── exploring_artifacts
    │   │   └── index.html
    │   ├── figs
    │   │   ├── aim_home-page.png
    │   │   ├── aim_log1.png
    │   │   ├── aim_log2.png
    │   │   ├── aim_log3.png
    │   │   ├── aim_run1.png
    │   │   ├── aim_run2.png
    │   │   └── aim_run3.png
    │   ├── index.html
    │   ├── overview
    │   │   └── index.html
    │   └── runs
    │   │   └── index.html
    ├── objects.inv
    ├── overrides
    │   ├── arrow-right-solid.svg
    │   ├── github-mark
    │   │   ├── github-mark-white.png
    │   │   ├── github-mark-white.svg
    │   │   ├── github-mark.png
    │   │   └── github-mark.svg
    │   ├── home.html
    │   ├── home.js
    │   ├── main.html
    │   ├── main.html.DEPRECATED
    │   ├── tcbench.svg
    │   └── tcbench_logo.svg
    ├── papers
    │   ├── imc23
    │   │   ├── artifacts
    │   │   │   └── index.html
    │   │   ├── campaigns
    │   │   │   └── index.html
    │   │   ├── index.html
    │   │   ├── ml_artifacts
    │   │   │   └── index.html
    │   │   ├── notebooks
    │   │   │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb
    │   │   │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human
    │   │   │   │   └── index.html
    │   │   │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human_files
    │   │   │   │   └── figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png
    │   │   │   ├── figure11_dropout_impact_supervised_setting.ipynb
    │   │   │   ├── figure11_dropout_impact_supervised_setting
    │   │   │   │   └── index.html
    │   │   │   ├── figure11_dropout_impact_supervised_setting_files
    │   │   │   │   └── figure11_dropout_impact_supervised_setting_15_1.png
    │   │   │   ├── figure1_flowpic_example.ipynb
    │   │   │   ├── figure1_flowpic_example
    │   │   │   │   └── index.html
    │   │   │   ├── figure1_flowpic_example_files
    │   │   │   │   └── figure1_flowpic_example_8_0.png
    │   │   │   ├── figure3_confusion_matrix_supervised_setting.ipynb
    │   │   │   ├── figure3_confusion_matrix_supervised_setting
    │   │   │   │   └── index.html
    │   │   │   ├── figure3_confusion_matrix_supervised_setting_files
    │   │   │   │   └── figure3_confusion_matrix_supervised_setting_5_0.png
    │   │   │   ├── figure4_ucdavis_per_class_average_flowpic.ipynb
    │   │   │   ├── figure4_ucdavis_per_class_average_flowpic
    │   │   │   │   └── index.html
    │   │   │   ├── figure4_ucdavis_per_class_average_flowpic_files
    │   │   │   │   └── figure4_ucdavis_per_class_average_flowpic_12_1.png
    │   │   │   ├── figure5_ucdavis_augmentations_comparison.ipynb
    │   │   │   ├── figure5_ucdavis_augmentations_comparison
    │   │   │   │   └── index.html
    │   │   │   ├── figure5_ucdavis_augmentations_comparison_files
    │   │   │   │   └── figure5_ucdavis_augmentations_comparison_6_1.png
    │   │   │   ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb
    │   │   │   ├── figure6_augmentations_comparison_across_datasets_critical_distance
    │   │   │   │   └── index.html
    │   │   │   ├── figure6_augmentations_comparison_across_datasets_critical_distance_files
    │   │   │   │   └── figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png
    │   │   │   ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb
    │   │   │   ├── figure7_augmentations_comparison_across_datasets_average_rank
    │   │   │   │   └── index.html
    │   │   │   ├── figure7_augmentations_comparison_across_datasets_average_rank_files
    │   │   │   │   └── figure7_augmentations_comparison_across_datasets_average_rank_8_0.png
    │   │   │   ├── figure8_ucdavis_kde_on_pkts_size.ipynb
    │   │   │   ├── figure8_ucdavis_kde_on_pkts_size
    │   │   │   │   └── index.html
    │   │   │   ├── figure8_ucdavis_kde_on_pkts_size_files
    │   │   │   │   └── figure8_ucdavis_kde_on_pkts_size_10_0.png
    │   │   │   ├── index.html
    │   │   │   ├── miscellaneous_stats.ipynb
    │   │   │   ├── miscellaneous_stats
    │   │   │   │   └── index.html
    │   │   │   ├── table10_ucdavis-icdm19_tukey.ipynb
    │   │   │   ├── table10_ucdavis-icdm19_tukey
    │   │   │   │   └── index.html
    │   │   │   ├── table2_datasets_properties.ipynb
    │   │   │   ├── table2_datasets_properties
    │   │   │   │   └── index.html
    │   │   │   ├── table3_xgboost_baseline.ipynb
    │   │   │   ├── table3_xgboost_baseline
    │   │   │   │   └── index.html
    │   │   │   ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb
    │   │   │   ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions
    │   │   │   │   └── index.html
    │   │   │   ├── table5_simclr_dropout_and_projectionlayer.ipynb
    │   │   │   ├── table5_simclr_dropout_and_projectionlayer
    │   │   │   │   └── index.html
    │   │   │   ├── table6_simclr_other_augmentation_pairs.ipynb
    │   │   │   ├── table6_simclr_other_augmentation_pairs
    │   │   │   │   └── index.html
    │   │   │   ├── table7_larger_trainset.ipynb
    │   │   │   ├── table7_larger_trainset
    │   │   │   │   └── index.html
    │   │   │   ├── table8_augmentation-at-loading_on_other_datasets.ipynb
    │   │   │   ├── table8_augmentation-at-loading_on_other_datasets
    │   │   │   │   └── index.html
    │   │   │   ├── table9_icdm_finetuning_per_class_metrics_on_human.ipynb
    │   │   │   └── table9_icdm_finetuning_per_class_metrics_on_human
    │   │   │   │   └── index.html
    │   │   └── pytest
    │   │   │   └── index.html
    │   └── index.html
    ├── quick_tour
    │   └── index.html
    ├── search
    │   └── search_index.json
    ├── sitemap.xml
    ├── sitemap.xml.gz
    ├── tcbench.svg
    ├── tcbench
    │   ├── api
    │   │   ├── overview
    │   │   │   └── index.html
    │   │   ├── tcbench_cli_clickutils
    │   │   │   └── index.html
    │   │   ├── tcbench_cli_command_aimrepo
    │   │   │   └── index.html
    │   │   ├── tcbench_cli_command_campaign
    │   │   │   └── index.html
    │   │   ├── tcbench_cli_command_datasets
    │   │   │   └── index.html
    │   │   ├── tcbench_cli_command_singlerun
    │   │   │   └── index.html
    │   │   ├── tcbench_cli_richutils
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_datasets_utils
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_mirage19_json_to_parquet
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_mirage22_json_to_parquet
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_tcbench_mirage19_generate_splits
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_tcbench_mirage22_generate_splits
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet
    │   │   │   └── index.html
    │   │   ├── tcbench_libtcdatasets_utmobilenet21_csv_to_parquet
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_aimutils
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_augmentation
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_backbone
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_dataprep
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_losses
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_methods
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_run_augmentations_at_loading
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_run_augmentations_at_loading_xgboost
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_run_campaign_augmentations_at_loading
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_run_campaign_augmentations_at_loading_xgboost
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_run_campaign_contrastive_learning_and_finetune
    │   │   │   └── index.html
    │   │   ├── tcbench_modeling_run_contrastive_learning_and_finetune
    │   │   │   └── index.html
    │   │   └── tcbench_modeling_utils
    │   │   │   └── index.html
    │   ├── cli_intro
    │   │   └── index.html
    │   ├── index.html
    │   ├── install
    │   │   └── index.html
    │   ├── internals
    │   │   └── index.html
    │   └── overview
    │   │   └── index.html
    └── tcbench_logo.svg
├── mkdocs.yml
├── notebooks
    ├── imc23
    │   ├── LICENSE
    │   ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb
    │   ├── figure11_dropout_impact_supervised_setting.ipynb
    │   ├── figure1_flowpic_example.ipynb
    │   ├── figure3_confusion_matrix_supervised_setting.ipynb
    │   ├── figure3_ucdavis_augmentations_comparison.ipynb
    │   ├── figure4_ucdavis_per_class_average_flowpic.ipynb
    │   ├── figure5_ucdavis_augmentations_comparison.ipynb
    │   ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb
    │   ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb
    │   ├── figure8_ucdavis_kde_on_pkts_size.ipynb
    │   ├── miscellaneous_stats.ipynb
    │   ├── table10_ucdavis-icdm19_tukey.ipynb
    │   ├── table2_datasets_properties.ipynb
    │   ├── table3_xgboost_baseline.ipynb
    │   ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb
    │   ├── table5_simclr_dropout_and_projectionlayer.ipynb
    │   ├── table6_simclr_other_augmentation_pairs.ipynb
    │   ├── table7_larger_trainset.ipynb
    │   ├── table8_augmentation-at-loading_on_other_datasets.ipynb
    │   └── table9_icdm_finetuning_per_class_metrics_on_human.ipynb
    └── tutorials
    │   └── tutorial_load_parquet.ipynb
├── pyproject.toml
├── src
    └── tcbench
    │   ├── FIGSHARE_RESOURCES.yml
    │   ├── __init__.py
    │   ├── cli
    │       ├── __init__.py
    │       ├── clickutils.py
    │       ├── command_aimrepo.py
    │       ├── command_campaign.py
    │       ├── command_datasets.py
    │       ├── command_fetchartifacts.py
    │       ├── command_singlerun.py
    │       ├── main.py
    │       ├── rich.theme
    │       └── richutils.py
    │   ├── libtcdatasets
    │       ├── __init__.py
    │       ├── datasets_utils.py
    │       ├── mirage19_generate_splits.py
    │       ├── mirage19_json_to_parquet.py
    │       ├── mirage22_generate_splits.py
    │       ├── mirage22_json_to_parquet.py
    │       ├── resources
    │       │   ├── DATASETS.yml
    │       │   ├── DATASETS_FILES_MD5.yml
    │       │   ├── mirage19.yml
    │       │   ├── mirage22.yml
    │       │   ├── ucdavis-icdm19.yml
    │       │   └── utmobilenet21.yml
    │       ├── ucdavis_icdm19_csv_to_parquet.py
    │       ├── ucdavis_icdm19_generate_splits.py
    │       ├── utmobilenet21_csv_to_parquet.py
    │       └── utmobilenet21_generate_splits.py
    │   └── modeling
    │       ├── __init__.py
    │       ├── aimutils.py
    │       ├── augmentation.py
    │       ├── backbone.py
    │       ├── dataprep.py
    │       ├── losses.py
    │       ├── methods.py
    │       ├── run_augmentations_at_loading.py
    │       ├── run_augmentations_at_loading_xgboost.py
    │       ├── run_campaign_augmentations_at_loading.py
    │       ├── run_campaign_augmentations_at_loading_xgboost.py
    │       ├── run_campaign_contrastive_learning_and_finetune.py
    │       ├── run_contrastive_learning_and_finetune.py
    │       └── utils.py
└── tests
    ├── conftest.py
    ├── test_augmentations_at_loading.py
    ├── test_augmentations_at_loading_xgboost.py
    ├── test_cli_command_campaign.py
    ├── test_cli_command_singlerun.py
    ├── test_contrastive_learning_and_finetune.py
    ├── test_libtcdatasets_datasets_utils.py
    ├── test_modeling_backbone.py
    ├── test_modeling_dataprep.py
    └── test_modeling_methods.py


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | *.swp
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 
163 | ######
164 | # EXTRAS
165 | __ATTIC__
166 | __TMP__
167 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 tcbenchstack
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include src/tcbench/libtcdatasets/resources/*yml
3 | include src/tcbench/cli/rich.theme
4 | include src/tcbench/FIGSHARE_RESOURCES.yml
5 | recursive-exclude .*swp
6 | recursive-exclude *.bck
7 | 


--------------------------------------------------------------------------------
/docs.material/about.md:
--------------------------------------------------------------------------------
 1 | # The tcbench framework
 2 | 
 3 | tcbench is a ML/DL framework specific for __Traffic Classification (TC)__
 4 | created as research project by the AI4NET team of the Huawei Technologies
 5 | research center in Paris, France.
 6 | 
 7 | !!! info "What is Traffic Classification?"
 8 |     
 9 |     Nodes within a computer network operate by exchanging 
10 |     information, namely *packets*, which is regulated according
11 |     to standardized protocols (e.g., HTTP for the web). So to understand 
12 |     the network health it is required to constantly monitor
13 |     this information flow and react accordingly. For instance, one
14 |     might want to prioritize certain traffic (e.g., video meeting)
15 |     or block it (e.g., social media in working environment).
16 | 
17 |     Traffic classification is the the act of labeling an exchange of packets 
18 |     based on the Internet application which generated it.
19 | 
20 | 
21 | The academic literature is ripe with methods and proposals for TC.
22 | Yet, it is scarce of code artifacts and public datasets 
23 | do not offer common conventions of use.
24 | 
25 | We designed tcbench with the following goals in mind:
26 | 
27 | | Goal | State of the art | tcbench |
28 | |:-----|:-----------------|:--------|
29 | |__:octicons-stack-24: Data curation__ | There are a few public datasets for TC, yet no common format/schema, cleaning process, or standard train/val/test folds. | An (opinionated) curation of datasets to create easy to use parquet files with associated train/val/test fold.|
30 | |__:octicons-file-code-24: Code__ | TC literature has no reference code base for ML/DL modeling | tcbench is [:material-github: open source](https://github.com/tcbenchstack/tcbench) with an easy to use CLI based on [:fontawesome-solid-arrow-pointer: click](https://click.palletsprojects.com/en/8.1.x/)|
31 | |__:material-monitor-dashboard: Model tracking__ | Most of ML framework requires integration with cloud environments and subscription services | tcbench uses [aimstack](https://aimstack.io/) to save on local servers metrics during training which can be later explored via its web UI or aggregated in report summaries using tcbench |
32 | 
33 | ## Features and roadmap
34 | 
35 | tcbench is still under development, but (as suggested by its name) ultimately aims
36 | to be a reference framework for benchmarking multiple ML/DL solutions 
37 | related to TC.
38 | 
39 | At the current stage, tcbench offers
40 | 
41 | * Integration with 4 datasets, namely `ucdavis-icdm19`, `mirage19`, `mirage22` and `utmobilenet21`.
42 | You can use these datasets and their curated version independently from tcbench.
43 | Check out the [dataset install](/tcbench/datasets/install) process and [dataset loading tutorial](/tcbench/datasets/guides/tutorial_load_datasets).
44 | 
45 | * Good support for flowpic input representation and minimal support
46 | for 1d time series (based on network packets properties) input representation.
47 | 
48 | * Data augmentation functionality for flowpic input representation.
49 | 
50 | * Modeling via XGBoost, vanilla DL supervision and contrastive learning (via SimCLR or SupCon).
51 | 
52 | Most of the above functionalities described relate to our __:material-file-document-outline: [IMC23 paper](/tcbench/papers/imc23/)__.
53 | 
54 | More exiting features including more datasets and algorithms will come in the next months. 
55 | 
56 | Stay tuned :wink:!
57 | 
58 | 


--------------------------------------------------------------------------------
/docs.material/artifacts.md:
--------------------------------------------------------------------------------
 1 | The submission is associated to three types of artifacts
 2 | 
 3 | * __:spider_web: Website__: This website serves as a primary source
 4 | of documentation. It collects
 5 |     * Documentation about [datasets :simple-artifacthub:](../datasets/install).
 6 |     * Documentation about our modeling framework called :material-link-off:[`tcbench`]().
 7 |     * Guides on how to [run experiments :fontawesome-solid-flask:](/tcbench/modeling/campaigns/) via `tcbench`.
 8 | 
 9 | * __:octicons-file-code-24: Code__: This includes 
10 |     * All source code related to :material-link-off:[`tcbench` :material-language-python:]().
11 |     * A collection of [:simple-jupyter: Jupyter notebooks](../paper_tables_and_figures/reference) 
12 |     used for the tables and figures of the submission.
13 | 
14 | * __:octicons-stack-24: Data__: This includes 
15 |     * The [datasets install, curation and split generation :material-rhombus-split-outline:](../datasets/install) used in our modeling
16 |     * All [models and logs :material-file-multiple-outline:](/tcbench/modeling/exploring_artifacts/) generated through our modeling campaigns.
17 | 
18 | ## :simple-figshare: Figshare material
19 | 
20 | A key objective of our submission is to made available all artifacts
21 | to the research community. 
22 | For instance, all code will be pushed to a :material-github: github repository,
23 | this website will be published on github pages or similar solutions,
24 | and data artifacts will be on a public cloud storage solution.
25 | 
26 | Yet, due to double-blind policy, we temporarily uploaded our artifacts to a
27 | :simple-figshare: [figshare repository](https://figshare.com/collections/IMC23_artifacts_-_Replication_Contrastive_Learning_and_Data_Augmentation_in_Traffic_Classification_Using_a_Flowpic_Input_Representation/6849252).
28 | 
29 | More specifically, on figshare you find the following tarball.
30 | 
31 | * `website_documentation.tgz`: Well...if you are reading this page
32 | you already know the tarball contains this website :octicons-smiley-24:.
33 | 
34 | * `code_artifacts_paper132.tgz`: All code developed. See 
35 |     * [Quick tour](../quick_tour) for `tcbench`.
36 |     * [Table and figures](../paper_tables_and_figures/reference/) for jupyter notebooks.
37 | 
38 | * `curated_datasets.tgz`: The preprocessed version of the datasets. 
39 | Please see the datasets pages in this website.
40 | 
41 | * `ml_artifacts.tgz`: All output data generated via modeling campaigns.
42 | For fine grained view, those can be explored via [AIM web UI](/tcbench/modeling/exploring_artifacts/#aim-web-ui) 
43 | while results are generated via [:simple-jupyter: Jupyter notebooks](../paper_tables_and_figures/reference/).
44 | 
45 | ## :material-package-variant: Unpack artifacts
46 | 
47 | In the figshare folder we also provide a `unpack_scripts.tgz` 
48 | tarball containing the following scripts
49 | 
50 | ```
51 | unpack_all.sh
52 | _unpack_code_artifacts_paper132.sh
53 | _unpack_curated_datasets.sh
54 | _unpack_ml_artifacts.sh
55 | ```
56 | 
57 | These are simple bash scripts to simplify the 
58 | extraction and installation of all material.
59 | 
60 | Use the following process
61 | 
62 | 1.  First of all, prepare a python virtual environment, for example via :simple-anaconda: conda
63 |     ```
64 |     conda create -n tcbench python=3.10 pip
65 |     conda activate tcbench
66 |     ```
67 | 
68 | 2. Download all figshare tarballs in the same folder and run
69 |     ```
70 |     tar -xzvf unpack_script.tgz
71 |     bash ./unpack_all.sh
72 |     ```
73 | 


--------------------------------------------------------------------------------
/docs.material/css/fonts.css:
--------------------------------------------------------------------------------
 1 | .md-typeset code,
 2 | .md-typeset kbd,
 3 | .md-typeset pre {
 4 |   font-feature-settings: "kern", "liga";
 5 |   font-variant-ligatures: normal;
 6 | }
 7 | 
 8 | :root{
 9 | --md-text-font:"Roboto";
10 | --md-code-font:""
11 | }
12 | 


--------------------------------------------------------------------------------
/docs.material/css/jupyter-notebook.css:
--------------------------------------------------------------------------------
 1 | .jp-RenderedHTMLCommon p {
 2 |     margin: 0pt;
 3 | }
 4 | 
 5 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
 6 |     display: none;
 7 | }
 8 | 
 9 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
10 |     display: none;
11 | }
12 | 
13 | .jupyter-wrapper .jp-OutputArea-output pre {
14 |     border-left: solid 5px #e0e0e0;
15 |     padding-left: 5pt;
16 | }
17 | 


--------------------------------------------------------------------------------
/docs.material/css/material.css:
--------------------------------------------------------------------------------
1 | /* More space at the bottom of the page. */
2 | .md-main__inner {
3 |   margin-bottom: 1.5rem;
4 | }
5 | 


--------------------------------------------------------------------------------
/docs.material/css/mkdocstrings.css:
--------------------------------------------------------------------------------
 1 | /* Indentation. */
 2 | div.doc-contents:not(.first) {
 3 |   padding-left: 25px;
 4 |   border-left: 4px solid rgba(230, 230, 230);
 5 |   margin-bottom: 80px;
 6 | }
 7 | 
 8 | /* Avoid breaking parameters name, etc. in table cells. */
 9 | td code {
10 |   word-break: normal !important;
11 | }
12 | 


--------------------------------------------------------------------------------
/docs.material/css/style.css:
--------------------------------------------------------------------------------
 1 | /* Mark external links as such (also in nav) */
 2 | a.external:hover::after, a.md-nav__link[href^="https:"]:hover::after {
 3 |   /* https://primer.style/octicons/link-external-16 */
 4 |   background-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill="rgb(233, 235, 252)" d="M10.604 1h4.146a.25.25 0 01.25.25v4.146a.25.25 0 01-.427.177L13.03 4.03 9.28 7.78a.75.75 0 01-1.06-1.06l3.75-3.75-1.543-1.543A.25.25 0 0110.604 1zM3.75 2A1.75 1.75 0 002 3.75v8.5c0 .966.784 1.75 1.75 1.75h8.5A1.75 1.75 0 0014 12.25v-3.5a.75.75 0 00-1.5 0v3.5a.25.25 0 01-.25.25h-8.5a.25.25 0 01-.25-.25v-8.5a.25.25 0 01.25-.25h3.5a.75.75 0 000-1.5h-3.5z"></path></svg>');
 5 |   height: 0.8em;
 6 |   width: 0.8em;
 7 |   margin-left: 0.2em;
 8 |   content: ' ';
 9 |   display: inline-block;
10 | }
11 | 
12 | /* More space at the bottom of the page */
13 | .md-main__inner {
14 |   margin-bottom: 1.5rem;
15 | }
16 | 


--------------------------------------------------------------------------------
/docs.material/css/tables_style.css:
--------------------------------------------------------------------------------
 1 | th, td {
 2 |     border: 1px solid var(--md-typeset-table-color);
 3 |     border-spacing: 0;
 4 |     border-bottom: none;
 5 |     border-left: none;
 6 |     border-top: none;
 7 | }
 8 | 
 9 | th {
10 | 	background:var(--md-primary-fg-color);
11 | 	color:white;
12 | }
13 | 
14 | .md-typeset table:not([class]) th {
15 | 	font-weight: 200;
16 | }
17 | 
18 | .md-typeset__table {
19 |     line-height: 1;
20 | }
21 | 
22 | .md-typeset__table table:not([class]) {
23 |     font-size: .74rem;
24 |     border-right: none;
25 | }
26 | 
27 | .md-typeset__table table:not([class]) td,
28 | .md-typeset__table table:not([class]) th {
29 |     padding: 9px;
30 | }
31 | 
32 | /* light mode alternating table bg colors */
33 | .md-typeset__table tr:nth-child(2n) {
34 |     background-color: #f8f8f8;
35 | }
36 | 
37 | /* dark mode alternating table bg colors */
38 | [data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) {
39 |     background-color: hsla(var(--md-hue),25%,25%,1)
40 | }
41 | 


--------------------------------------------------------------------------------
/docs.material/datasets/datasets.csv:
--------------------------------------------------------------------------------
1 | Name,Classes, PDF, Data, Code, Auto-download
2 | ucdavis-icdm19,5,[pdf](https://arxiv.org/pdf/1812.09761.pdf), [data](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), [code](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), :octicons-x-12:
3 | mirage19, 20, [pdf](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-2019.html), -, :heavy_check_mark:
4 | mirage22, 9, [pdf](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html), -, :heavy_check_mark:
5 | utmobilenet21, 17, [pdf](https://ieeexplore.ieee.org/abstract/document/9490678/), [data](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), [code](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), :octicons-x-12:
6 | 


--------------------------------------------------------------------------------
/docs.material/datasets/datasets_splits.md:
--------------------------------------------------------------------------------
 1 | The splits described here are specific for our submission
 2 | and the aim to replicate the previous IMC22 paper.
 3 | 
 4 | 
 5 | ### ucdavis-icdm19
 6 | 
 7 | Differently from the other datasets inhere described,
 8 | `ucdavis-icdm19` does NOT require any filtering/adaptation
 9 | after transforming the original CSV into a monolithic parquet.
10 | 
11 | The testing partition are also predefined ("human" and "script").
12 | 
13 | We need however to define splits of 100 samples per class
14 | for modeling. To do so we perform a random shuffle of 
15 | the data and generate 5 non overlapping groups of 100 samples.
16 | 
17 | ```
18 | python datasets/generate_splits.py --config config.yml
19 | ```
20 | 
21 | ???+ note "output"
22 |     ```
23 |     loading: datasets/ucdavis-icdm19/ucdavis-icdm19.parquet
24 |     saving: datasets/ucdavis-icdm19/train_split_0.parquet
25 |     saving: datasets/ucdavis-icdm19/train_split_1.parquet
26 |     saving: datasets/ucdavis-icdm19/train_split_2.parquet
27 |     saving: datasets/ucdavis-icdm19/train_split_3.parquet
28 |     saving: datasets/ucdavis-icdm19/train_split_4.parquet
29 |     loading: datasets/ucdavis-icdm19/ucdavis-icdm19.parquet
30 |     saving: datasets/ucdavis-icdm19/test_split_human.parquet
31 |     saving: datasets/ucdavis-icdm19/test_split_script.parquet
32 |     ```
33 | 
34 | 


--------------------------------------------------------------------------------
/docs.material/datasets/guides/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: material/book-outline
3 | ---
4 | 
5 | # Guides
6 | 
7 | [:simple-jupyter: Datasets loading](/tcbench/datasets/guides/tutorial_load_datasets): A jupyter notebook
8 | showing the APIs used loading the parquet files composing a dataset.
9 | 


--------------------------------------------------------------------------------
/docs.material/datasets/import.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | icon: material/cloud-download-outline
  3 | title: Import
  4 | ---
  5 | 
  6 | # Import curated datasets
  7 | 
  8 | The `datasets` command offers also the option
  9 | to import a pre-computed curation of datasets.
 10 | 
 11 | This is 
 12 | 
 13 | * To avoid spending computation. 
 14 | Some of the preprocessing requires ingenuity and
 15 | multiprocessing/multicore architecture. 
 16 | 
 17 | * Further strength replicability (although
 18 | the curation process of tcbench is deterministic).
 19 | 
 20 | The [datasets summary table](/tcbench/datasets/#table-datasets-properties) indicates that the
 21 | not all datasets have the curated data already available.
 22 | This is because some datasets (namely MIRAGE) has
 23 | tighter licensing. For these datasets
 24 | please refer to the related installation page.
 25 | 
 26 | ## The `import` subcommand
 27 | 
 28 | For datasets which licensing allows to redistribute
 29 | modified version, the curated data is stored
 30 | in a public [:simple-figshare: figshare collection](https://figshare.com/collections/IMC23_artifacts_-_Replication_Contrastive_Learning_and_Data_Augmentation_in_Traffic_Classification_Using_a_Flowpic_Input_Representation/6849252).
 31 | 
 32 | You can manually fetch the datasets from the collection or use
 33 | automate their installation with the `datasets import` subcommand.
 34 | 
 35 | ```
 36 | tcbench datasets import --name ucdavis-icdm19
 37 | ```
 38 | 
 39 | !!! info Output
 40 | 	```
 41 | 	Downloading... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 554.2 MB / 554.2 MB eta 0:00:00
 42 | 	opening: /tmp/tmpb586lqhh/42438621
 43 | 
 44 | 	Files installed
 45 | 	Datasets
 46 | 	└── ucdavis-icdm19
 47 | 		└── 📁 preprocessed/
 48 | 			├── ucdavis-icdm19.parquet
 49 | 			├── LICENSE
 50 | 			└── 📁 imc23/
 51 | 				├── test_split_human.parquet
 52 | 				├── test_split_script.parquet
 53 | 				├── train_split_0.parquet
 54 | 				├── train_split_1.parquet
 55 | 				├── train_split_2.parquet
 56 | 				├── train_split_3.parquet
 57 | 				└── train_split_4.parquet
 58 | 	```
 59 | 
 60 | 
 61 | Notice that `installed` is not set. Indeed
 62 | the prepared curated datasets do NOT repack
 63 | the original datasets, just the preprocessed ones 
 64 | (see the [meta-data](/tcbench/datasets/metadata/#samples-count-reports) page).
 65 | 
 66 | You can also import the curated data by downloading the individual
 67 | archives from figshare and use the `--archive` option
 68 | 
 69 | ```
 70 | tcbench datasets import \
 71 | 	--name ucdavis-icdm19 \
 72 | 	--archive <tarball>
 73 | ```
 74 | 
 75 | !!! warning ":simple-figshare: Figshare versioning"
 76 | 	
 77 | 	Figshare updates the version of a published entry for any modification
 78 |     to any of the elements related to the entry (including changes to 
 79 |     description). 
 80 | 
 81 | 	tcbench is configured to automatically fetch the latest version of
 82 |     the curated datasets. But if you download them manually make
 83 |     sure to download the latest versions
 84 | 
 85 | 
 86 | ## The `delete` subcommand
 87 | 
 88 | You can use the `delete` subcommand to remove installed/imported datasets.
 89 | 
 90 | For instance, continuing the example above
 91 | 
 92 | ```
 93 | tcbench datasets delete --name ucdavis-icdm19
 94 | ```
 95 | 
 96 | ...now `info` stats all data for `ucdavis-icdm19` has been removed
 97 | 
 98 | ```
 99 | tcbench datasets info --name ucdavis-icdm19
100 | ```
101 | !!! info "Output"
102 | 	```
103 | 	Datasets
104 | 	└── ucdavis-icdm19
105 | 		└──  🚩 classes:           5
106 | 			 🔗 paper_url:         https://arxiv.org/pdf/1812.09761.pdf
107 | 			 🔗 website:           https://github.com/shrezaei/Semi-supervised-Learning-QUIC-
108 | 			 🔗 data:              https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE
109 | 	         🔗 curated data:      https://figshare.com/ndownloader/files/42437043
110 | 	         ➕ curated data MD5:  9828cce0c3a092ff19ed77f9e07f317c
111 | 			 📁 installed:         None
112 | 			 📁 preprocessed:      None
113 | 			 📁 data splits:       None
114 | 	```
115 | 


--------------------------------------------------------------------------------
/docs.material/datasets/index.md:
--------------------------------------------------------------------------------
 1 | # Datasets
 2 | 
 3 | TCBench supports the following *public* traffic classification datasets
 4 | 
 5 | ##### Table : Datasets properties
 6 | | Name | Applications | Links | License | Our curation |
 7 | |:----:|:------------:|:-----:|:-------:|:------------:|
 8 | |[`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)|5|[:fontawesome-regular-file-pdf:](https://arxiv.org/pdf/1812.09761.pdf)[:material-package-down:](https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE)[:material-github:](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-)| [:material-creative-commons:](https://creativecommons.org/licenses/by/4.0/) | [:simple-figshare:](https://figshare.com/articles/dataset/curated_datasets_ucdavisicdm19_tgz/23538141/1) |
 9 | |[`mirage19`](/tcbench/datasets/install/mirage19/)|20|[:fontawesome-regular-file-pdf:](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf)[:material-package-down:](https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-2019_traffic_dataset_downloadable_v2.tar.gz)[:material-web:](https://traffic.comics.unina.it/mirage/mirage-2019.html)| [:material-creative-commons: NC-ND](http://creativecommons.org/licenses/by-nc-nd/4.0/) | - |
10 | |[`mirage22`](/tcbench/datasets/install/mirage22/)|9|[:fontawesome-regular-file-pdf:](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf)[:material-package-down:](https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-COVID-CCMA-2022.zip)[:material-web:](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html)| [:material-creative-commons: NC-ND](http://creativecommons.org/licenses/by-nc-nd/4.0/) | - |
11 | |[`utmobilenet21`](/tcbench/datasets/install/utmobilenet21/)|17|[:fontawesome-regular-file-pdf:](https://ieeexplore.ieee.org/abstract/document/9490678/)[:material-package-down:](https://github.com/YuqiangHeng/UTMobileNetTraffic2021)[:material-github:](https://github.com/YuqiangHeng/UTMobileNetTraffic2021)| [:simple-gnu: GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) | [:simple-figshare:](https://figshare.com/articles/dataset/curated_datasets_utmobilenet21_tgz/23648703/1) |
12 | 
13 | At a glance, these datasets
14 | 
15 | * Are collections of either *CSV or JSON* files.
16 | 
17 | * Are reporting individual *packet level information or per-flow time series* and metrics.
18 | 
19 | * May have been organized in subfolders, namely *partitions*,
20 | to reflect the related measurement campaign (see `ucdavis-icdm19`, `utmobilenet21`).
21 | 
22 | * May have file names carrying semantic.
23 | 
24 | * May require preprocessing to remove "background" noise, i.e.,
25 | traffic unrelated to a target application (see `mirage19` and `mirage22`).
26 | 
27 | * Do not have reference train/validation/test splits.
28 | 
29 | In other words, these datasets need to be *curated* 
30 | to be used.
31 | 
32 | !!! tip "Important"
33 | 
34 |     The integration of these datasets in tcbench does not break
35 |     the original licensing of the data nor it breaks their ownership.
36 |     Rather, the integration aims at easing the access to these dataset.
37 |     We thus encourage researchers and practitioners interesting in
38 |     using these datasets to cite the original publications 
39 |     (see links in the table above).
40 | 
41 | ## Terminology
42 | 
43 | When describing datasets and related processing we
44 | use the following conventions:
45 | 
46 | * A __partition__ is a set of samples 
47 | pre-defined by the authors of the dataset.
48 | For instance, a partition can relate to a
49 | specific set of samples to use for training/test 
50 | (see [`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)).
51 | 
52 | * A __split__ is a set of indexes of samples
53 | that need to be used for train/validation/test.
54 | 
55 | * An __unfiltered__ dataset corresponds a
56 | monolithic parquet files containing the
57 | original raw data of a dataset (no filtering 
58 | is applied).
59 | 
60 | * A __curated__ dataset is generated 
61 | processing the unfiltered parquet 
62 | to clean noise, remove small flows, etc.,
63 | and each dataset have slightly different
64 | curation rules.
65 | 


--------------------------------------------------------------------------------
/docs.material/datasets/install/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | icon: material/arrow-down-bold-box
 3 | ---
 4 | 
 5 | # Datasets installation
 6 | 
 7 | Dataset installation is triggered with the `datasets install` subcommand
 8 | 
 9 | ```
10 | tcbench datasets install --help
11 | ```
12 | 
13 | !!! info "Output"
14 | 	```
15 | 	 Usage: tcbench datasets install [OPTIONS]
16 | 
17 | 	 Install a dataset.
18 | 
19 | 	╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
20 | 	│ *  --name          -n  [ucdavis-icdm19|utmobilenet21|mirage19|mirage22]  Dataset to install. [required]                         │
21 | 	│    --input-folder  -i  PATH                                              Folder where to find pre-downloaded tarballs.          │
22 | 	│    --help                                                                Show this message and exit.                            │
23 | 	╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
24 | 	```
25 | 
26 | The raw data of the datasets is either hosted on websites
27 | or cloud environments. The automatic download from
28 | those locations is available only for some of the datasets.
29 | 
30 | | Name | Auto download | 
31 | |:----:|:-------------:|
32 | |[`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)| :octicons-x-24: |
33 | |[`mirage19`](/tcbench/datasets/install/mirage19/)| :material-check: |
34 | |[`mirage22`](/tcbench/datasets/install/mirage22/)| :material-check: |
35 | |[`utmobilenet21`](/tcbench/datasets/install/utmobilenet21/)| :octicons-x-24: |
36 | 
37 | If auto download is not possible, to install the dataset
38 | you need to manually fetch the related archives, place them
39 | in a folder, e.g., `/download`, and provide the `--input-folder`
40 | option when triggering installation.
41 | 
42 | When installing a dataset, `tcbench` also
43 | shows two types of reports as formatted tables.
44 | 
45 | * __Samples count__: This tables collect
46 | the number of samples (i.e., flows)
47 | available.
48 | 
49 | * __Stats__: The curation process
50 | can filter out flows (e.g., based
51 | on a minum number of packets
52 | or remove classes without a minimum
53 | number of flows). As such, when 
54 | installing, `tcbench` is showing
55 | general stats (mean, std, percentiles)
56 | about number of packets
57 | for each flow across classes.
58 | 
59 | Please check the specific install page for each dataset for more details.
60 | 
61 | 
62 | ## Datasets deletion
63 | 
64 | The datasets files are installed within the 
65 | python environment where tcbench is installed.
66 | 
67 | You can delete a dataset using the following command
68 | 
69 | ```
70 | tcbench datasets delete --name <dataset-name>
71 | ```
72 | 
73 | 


--------------------------------------------------------------------------------
/docs.material/datasets/samples_count/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | icon: octicons/number-24
 3 | ---
 4 | 
 5 | # Samples count report
 6 | 
 7 | An important dataset property to keep an eye
 8 | on when aiming for modeling is the number of 
 9 | samples for each class available in the datasets.
10 | 
11 | You can easily recover this using the `datasets samples-count` subcommand.
12 | 
13 | For instance, 
14 | the following command computes the samples count for the *unfitered* 
15 | version of the [`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19) dataset.
16 | 
17 | ```
18 | tcbench datasets samples-count --name ucdavis-icdm19
19 | ```
20 | 
21 | !!! note "Output"
22 | 	```
23 | 	unfiltered
24 | 	┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓
25 | 	┃ partition                   ┃ app           ┃ samples ┃
26 | 	┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩
27 | 	│ pretraining                 │ google-doc    │    1221 │
28 | 	│                             │ google-drive  │    1634 │
29 | 	│                             │ google-music  │     592 │
30 | 	│                             │ google-search │    1915 │
31 | 	│                             │ youtube       │    1077 │
32 | 	│                             │ __total__     │    6439 │
33 | 	├─────────────────────────────┼───────────────┼─────────┤
34 | 	│ retraining-human-triggered  │ google-doc    │      15 │
35 | 	│                             │ google-drive  │      18 │
36 | 	│                             │ google-music  │      15 │
37 | 	│                             │ google-search │      15 │
38 | 	│                             │ youtube       │      20 │
39 | 	│                             │ __total__     │      83 │
40 | 	├─────────────────────────────┼───────────────┼─────────┤
41 | 	│ retraining-script-triggered │ google-doc    │      30 │
42 | 	│                             │ google-drive  │      30 │
43 | 	│                             │ google-music  │      30 │
44 | 	│                             │ google-search │      30 │
45 | 	│                             │ youtube       │      30 │
46 | 	│                             │ __total__     │     150 │
47 | 	└─────────────────────────────┴───────────────┴─────────┘
48 | 	```
49 | 
50 | While to obtain the breakdown of the first train split
51 | 
52 | ```
53 | tcbench datasets samples-count --name ucdavis-icdm19 --split 0
54 | ```
55 | 
56 | !!! note "Output"
57 | 	```
58 | 	filtered, split: 0
59 | 	┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
60 | 	┃ app           ┃ samples ┃
61 | 	┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
62 | 	│ google-doc    │     100 │
63 | 	│ google-drive  │     100 │
64 | 	│ google-music  │     100 │
65 | 	│ google-search │     100 │
66 | 	│ youtube       │     100 │
67 | 	├───────────────┼─────────┤
68 | 	│ __total__     │     500 │
69 | 	└───────────────┴─────────┘
70 | 	```
71 | 
72 | ...or the `human` test split
73 | 
74 | ```
75 | tcbench datasets samples-count --name ucdavis-icdm19 --split human
76 | ```
77 | 
78 | !!! note "Output"
79 | 	```
80 | 	filtered, split: human
81 | 	┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
82 | 	┃ app           ┃ samples ┃
83 | 	┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
84 | 	│ youtube       │      20 │
85 | 	│ google-drive  │      18 │
86 | 	│ google-doc    │      15 │
87 | 	│ google-music  │      15 │
88 | 	│ google-search │      15 │
89 | 	├───────────────┼─────────┤
90 | 	│ __total__     │      83 │
91 | 	└───────────────┴─────────┘
92 | 	```
93 | 


--------------------------------------------------------------------------------
/docs.material/datasets/samples_count/ucdavis-icdm19.md:
--------------------------------------------------------------------------------
  1 | # `ucdavis-icdm19`
  2 | 
  3 | Below we report the samples count for each version of the dataset.
  4 | 
  5 | !!! tip "Semantic of the splits"
  6 | 
  7 |     The split available for this datasets relate to our [:material-file-document-outline: IMC23 paper](/tcbench/papers/imc23).
  8 | 
  9 | ### unfiltered
 10 | 
 11 | The unfitered version contains all data before curation.
 12 | 
 13 | ```
 14 | tcbench datasets samples-count --name ucdavis-icdm19
 15 | ```
 16 | 
 17 | !!! note "Output"
 18 | 	```
 19 | 	unfiltered
 20 | 	┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓
 21 | 	┃ partition                   ┃ app           ┃ samples ┃
 22 | 	┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩
 23 | 	│ pretraining                 │ google-doc    │    1221 │
 24 | 	│                             │ google-drive  │    1634 │
 25 | 	│                             │ google-music  │     592 │
 26 | 	│                             │ google-search │    1915 │
 27 | 	│                             │ youtube       │    1077 │
 28 | 	│                             │ __total__     │    6439 │
 29 | 	├─────────────────────────────┼───────────────┼─────────┤
 30 | 	│ retraining-human-triggered  │ google-doc    │      15 │
 31 | 	│                             │ google-drive  │      18 │
 32 | 	│                             │ google-music  │      15 │
 33 | 	│                             │ google-search │      15 │
 34 | 	│                             │ youtube       │      20 │
 35 | 	│                             │ __total__     │      83 │
 36 | 	├─────────────────────────────┼───────────────┼─────────┤
 37 | 	│ retraining-script-triggered │ google-doc    │      30 │
 38 | 	│                             │ google-drive  │      30 │
 39 | 	│                             │ google-music  │      30 │
 40 | 	│                             │ google-search │      30 │
 41 | 	│                             │ youtube       │      30 │
 42 | 	│                             │ __total__     │     150 │
 43 | 	└─────────────────────────────┴───────────────┴─────────┘
 44 | 	```
 45 | 
 46 | 
 47 | ### First training split
 48 | 
 49 | ```
 50 | tcbench datasets samples-count --name ucdavis-icdm19 --split 0
 51 | ```
 52 | 
 53 | !!! note "Output"
 54 | 	```
 55 | 	filtered, split: 0
 56 | 	┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
 57 | 	┃ app           ┃ samples ┃
 58 | 	┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
 59 | 	│ google-doc    │     100 │
 60 | 	│ google-drive  │     100 │
 61 | 	│ google-music  │     100 │
 62 | 	│ google-search │     100 │
 63 | 	│ youtube       │     100 │
 64 | 	├───────────────┼─────────┤
 65 | 	│ __total__     │     500 │
 66 | 	└───────────────┴─────────┘
 67 | 	```
 68 | 
 69 | ### `human` test split
 70 | 
 71 | This is equivalent to the `human` partition of the unfiltered dataset.
 72 | 
 73 | ```
 74 | tcbench datasets samples-count --name ucdavis-icdm19 --split human
 75 | ```
 76 | 
 77 | !!! note "Output"
 78 | 	```
 79 | 	filtered, split: human
 80 | 	┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
 81 | 	┃ app           ┃ samples ┃
 82 | 	┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
 83 | 	│ youtube       │      20 │
 84 | 	│ google-drive  │      18 │
 85 | 	│ google-doc    │      15 │
 86 | 	│ google-music  │      15 │
 87 | 	│ google-search │      15 │
 88 | 	├───────────────┼─────────┤
 89 | 	│ __total__     │      83 │
 90 | 	└───────────────┴─────────┘
 91 | 	```
 92 | 
 93 | ### `script` test split
 94 | 
 95 | This is equivalent to the `script` partition of the unfiltered dataset.
 96 | 
 97 | ```
 98 | tcbench datasets samples-count --name ucdavis-icdm19 --split script
 99 | ```
100 | 
101 | !!! note "Output"
102 |     ```
103 |     filtered, split: script
104 |     ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
105 |     ┃ app           ┃ samples ┃
106 |     ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
107 |     │ google-doc    │      30 │
108 |     │ google-drive  │      30 │
109 |     │ google-music  │      30 │
110 |     │ google-search │      30 │
111 |     │ youtube       │      30 │
112 |     ├───────────────┼─────────┤
113 |     │ __total__     │     150 │
114 |     └───────────────┴─────────┘
115 |     ```
116 | 


--------------------------------------------------------------------------------
/docs.material/datasets/samples_count/utmobilenet21.md:
--------------------------------------------------------------------------------
 1 | # `utmobilenet21`
 2 | 
 3 | Below we report the samples count for each version of the dataset.
 4 | 
 5 | !!! tip "Semantic of the splits"
 6 | 
 7 |     The split available for this datasets relate to our [:material-file-document-outline: IMC23 paper](/tcbench/papers/imc23).
 8 | 
 9 | ### unfiltered
10 | 
11 | The unfitered version contains all data before curation.
12 | 
13 | ```
14 | tcbench datasets samples-count --name utmobilenet21
15 | ```
16 | 
17 | !!! note "Output"
18 | 	```
19 |     unfiltered
20 |     ┏━━━━━━━━━━━━━━┳━━━━━━━━━┓
21 |     ┃ app          ┃ samples ┃
22 |     ┡━━━━━━━━━━━━━━╇━━━━━━━━━┩
23 |     │ youtube      │    5591 │
24 |     │ reddit       │    4370 │
25 |     │ google-maps  │    4347 │
26 |     │ spotify      │    2550 │
27 |     │ netflix      │    2237 │
28 |     │ pinterest    │    2165 │
29 |     │ hulu         │    1839 │
30 |     │ instagram    │    1778 │
31 |     │ dropbox      │    1752 │
32 |     │ facebook     │    1654 │
33 |     │ twitter      │    1494 │
34 |     │ gmail        │    1133 │
35 |     │ pandora      │     949 │
36 |     │ messenger    │     837 │
37 |     │ google-drive │     803 │
38 |     │ hangout      │     720 │
39 |     │ skype        │     159 │
40 |     ├──────────────┼─────────┤
41 |     │ __total__    │   34378 │
42 |     └──────────────┴─────────┘
43 | 	```
44 | 
45 | 
46 | ### First training split
47 | 
48 | ```
49 | tcbench datasets samples-count --name utmobilenet21 --split 0
50 | ```
51 | 
52 | !!! note "Output"
53 | 	```
54 |     min_pkts: 10, split: 0
55 |     ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
56 |     ┃ app          ┃ train_samples ┃ val_samples ┃ test_samples ┃ all_samples ┃
57 |     ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
58 |     │ youtube      │          2021 │         225 │          250 │        2496 │
59 |     │ google-maps  │          1456 │         162 │          180 │        1798 │
60 |     │ hulu         │           947 │         105 │          117 │        1169 │
61 |     │ reddit       │           661 │          73 │           82 │         816 │
62 |     │ spotify      │           538 │          60 │           66 │         664 │
63 |     │ netflix      │           391 │          44 │           48 │         483 │
64 |     │ pinterest    │           353 │          39 │           44 │         436 │
65 |     │ twitter      │           296 │          33 │           36 │         365 │
66 |     │ instagram    │           222 │          25 │           27 │         274 │
67 |     │ hangout      │           206 │          23 │           25 │         254 │
68 |     │ dropbox      │           193 │          21 │           24 │         238 │
69 |     │ pandora      │           162 │          18 │           20 │         200 │
70 |     │ facebook     │           111 │          12 │           14 │         137 │
71 |     │ google-drive │           105 │          12 │           13 │         130 │
72 |     ├──────────────┼───────────────┼─────────────┼──────────────┼─────────────┤
73 |     │ __total__    │          7662 │         852 │          946 │        9460 │
74 |     └──────────────┴───────────────┴─────────────┴──────────────┴─────────────┘
75 | 	```
76 | 


--------------------------------------------------------------------------------
/docs.material/datasets/schemas/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | icon: material/table
 3 | title: Schemas
 4 | ---
 5 | 
 6 | # Datasets schemas
 7 | 
 8 | Despite the [curation](/tcbench/datasets/curation_and_metadata/), datasets can have intrinsically
 9 | different schemas.
10 | 
11 | You can investigate those on the command line via
12 | the `datasets schema` sub-command.
13 | 
14 | ```
15 | tcbench datasets schema --help
16 | 
17 |  Usage: tcbench datasets schema [OPTIONS]
18 | 
19 |  Show datasets schemas
20 | 
21 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
22 | │ --name  -n  [ucdavis-icdm19|utmobilenet21|mirage19|mirage22]  Dataset to install                                         │
23 | │ --type  -t  [unfiltered|filtered|splits]                      Schema type (unfiltered: original raw data; filtered:      │
24 | │                                                               curated data; splits: train/val/test splits)               │
25 | │ --help                                                        Show this message and exit.                                │
26 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
27 | ```
28 | 
29 | Beside the dataset name `--name`, the selection
30 | of the schema is simplified via a single parameter `--type`
31 | which matches the parquet files as follows
32 | 
33 | * `"unfiltered"` corresponds to the monolithic 
34 | before any filtering (i.e., the files under `/preprocessed`)
35 | 
36 | * `"filtered"` corresponds to the filtered 
37 | version of the monolithic files (i.e., the files
38 | having `minpkts<N>` in the filename).
39 | 
40 | * `"splits"` corresponds to the split files
41 | (i.e., the files having `xyz_split.parquet`
42 | in the filename).
43 | 
44 | 
45 | 


--------------------------------------------------------------------------------
/docs.material/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log1.png


--------------------------------------------------------------------------------
/docs.material/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log2.png


--------------------------------------------------------------------------------
/docs.material/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log3.png


--------------------------------------------------------------------------------
/docs.material/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run1.png


--------------------------------------------------------------------------------
/docs.material/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run2.png


--------------------------------------------------------------------------------
/docs.material/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run3.png


--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_mirage19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_mirage19.png


--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_mirage22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_mirage22.png


--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_ucdavis-icdm19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_ucdavis-icdm19.png


--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_utmobilenet21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_utmobilenet21.png


--------------------------------------------------------------------------------
/docs.material/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: TCBench
3 | template: home.html
4 | ---
5 | 
6 | 
7 | 
8 | 


--------------------------------------------------------------------------------
/docs.material/index.md.DEPRECATED:
--------------------------------------------------------------------------------
 1 | This website documents code and data artifacts related to the IMC23 submission #132 titled
 2 | 
 3 | !!! quote ""
 4 |     __Contrastive Learning and Data Augmentation in Traffic Classification via a Flowpic Representation__
 5 |     *Replicating and Reproducing “A Few Shots Traffic Classification with mini-FlowPic Augmentations”
 6 |     from IMC’22*
 7 | 
 8 | Our submission investigates the role of data
 9 | augmentation by using both supervised
10 | and contrastive learning techniques
11 | across [4 datasets](datasets/install).
12 | 
13 | It replicates and reproduces the following paper
14 | from the IMC22 program
15 | 
16 | 
17 | ```
18 | @inproceedings{10.1145/3517745.3561436,
19 | author = {Horowicz, Eyal and Shapira, Tal and Shavitt, Yuval},
20 | title = {A Few Shots Traffic Classification with Mini-FlowPic Augmentations},
21 | year = {2022},
22 | isbn = {9781450392594},
23 | publisher = {Association for Computing Machinery},
24 | address = {New York, NY, USA},
25 | url = {https://doi.org/10.1145/3517745.3561436},
26 | doi = {10.1145/3517745.3561436},
27 | booktitle = {Proceedings of the 22nd ACM Internet Measurement Conference},
28 | pages = {647–654},
29 | numpages = {8},
30 | location = {Nice, France},
31 | series = {IMC '22}
32 | }
33 | ```
34 | 
35 | We adopt the same traffic representation used in :material-file-document-outline:`imc22-paper`,
36 | namely a Flowpic -- a summarization of the packet size time series of a flow by means of 
37 | frequency histograms extracted from consecutive time windows of the flow -- 
38 | applied on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
39 | 
40 | In the first part of the submission we investigate how augmentations
41 | affect classification performance -- the study considers 3 image transformations (*rotation, 
42 | color jitter, horizontal flip*) and 3 time series transformations (*time shift, packet drop, change rtt*)
43 | applied to packets timestamps -- when used either in a fully supervised setting or via
44 | contrastive learning.
45 | 
46 | !!! info "Key takeaways from reproducibility"
47 |     1. We can only partially reproduce the results from :material-file-document-outline:`imc22-paper` on [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
48 |        Specifically, we uncover a data shift present in the dataset itself which justifies our results; 
49 |        yet, we cannot comment on why this was not detected in :material-file-document-outline:`imc22-paper`.
50 | 
51 |     2. Simply based on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) dataset, and differently
52 |        from the argumentation presented in :material-file-document-outline:`imc22-paper`, 
53 |        we do not find statistical significance differences across the different augmentations.
54 | 
55 |     3. Contrastive learning can help to "bootstrap" a model in an unsupervised fashion, yet
56 |        relying on more samples is beneficial to boost performance.
57 |        
58 | Then, in the second part of the submission we replicate the 
59 | analysis testing the same 6 augmentations across 3 other datasets.
60 | 
61 | !!! info "Key takeaways from replicability"
62 |     Using multiple datasets allow to confirm the argument of the  :material-file-document-outline:`imc22-paper`, i.e.,
63 |     *Change RTT* augmentation used in [`ucdavis-icdm19`](datasets/#ucdavis-icdm19)
64 |     is superior to the alternative transformations presented in the paper.
65 | 
66 | 
67 | ## Website conventions
68 | 
69 | * :material-file-document-outline:`imc22-paper` is used to the reference the replicated/reproduced paper.
70 | 
71 | * WIP (Work in progress) and :construction: suggest documentation that is incomplete or not yet available.
72 | 
73 | * :material-link-off: suggests a link is expected to be added but is not yet available.
74 | 


--------------------------------------------------------------------------------
/docs.material/install.md:
--------------------------------------------------------------------------------
 1 | # Install and config
 2 | 
 3 | ## Download code and artifacts
 4 | 
 5 | If you see this documentation it means
 6 | you downloaded the file from figshare so you already have the code
 7 | in your hand :)
 8 | 
 9 | !!! note
10 |     It is our intent to push all the code into a proper repository
11 | 
12 | 
13 | ## Configure a python environment :material-language-python:
14 | 
15 | We first create a `conda` environment to install
16 | all required dependencies
17 | 
18 | ```
19 | conda create -n replicating-imc22-flowpic python=3.10 pip
20 | conda activate replicating-imc22-flowpic
21 | python -m pip install -r ./requirements.txt
22 | ```
23 | 
24 | The code artifacts are also a python package
25 | that can be installed inside the environment.
26 | From inside `/replicate_imc22_flowpic` run
27 | 
28 | ```
29 | python -m pip install .
30 | ```
31 | 


--------------------------------------------------------------------------------
/docs.material/modeling/aim_repos/aim_webui.md:
--------------------------------------------------------------------------------
  1 | ---
  2 | title: AIM Web UI
  3 | icon: material/monitor-dashboard
  4 | ---
  5 | 
  6 | # AIM Web UI
  7 | 
  8 | AIM web interface is quite intuitive and 
  9 | the official documentation already provides 
 10 | a [general purpose tutorial](https://aimstack.readthedocs.io/en/latest/ui/overview.html).
 11 | 
 12 | In this mini guide we limit to showcase a basic set 
 13 | of operations to navigate the ML artifacts using
 14 | some artifacts from our [IMC23](/tcbench/papers/imc23) paper.
 15 | 
 16 | To replicate the following, make sure you [installed
 17 | the needed artifacts](/tcbench/papers/imc23/artifacts/#downloading-artifacts).
 18 | 
 19 | ```
 20 | aim up --repo notebooks/imc23/campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/
 21 | ```
 22 | 
 23 | !!! info "Output"
 24 | 	```
 25 | 	Running Aim UI on repo `<Repo#-3653246895908991301 path=./notebooks/imc23/campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/.aim read_only=None>`
 26 | 	Open http://127.0.0.1:43800
 27 | 	Press Ctrl+C to exit
 28 | 	```
 29 | 
 30 | 	Run `aim up --help` for more options (e.g., specifying a different port or hostname).
 31 | 
 32 | When visiting the URL reported in the output 
 33 | you land on the home page of the AIM repository.
 34 | 
 35 | This collects a variety of aggregate metrics 
 36 | and track activity over time. 
 37 | Hence, in our scenario
 38 | the home page of the ML artifacts are mostly empty
 39 | because all campaigns were generated in a specific moment in time.
 40 | 
 41 | [![aim-home-page]][aim-home-page]
 42 | 
 43 |   [aim-home-page]: ../../figs/aim_home-page.png
 44 | 
 45 | The left side bar allows switch the view.
 46 | In particular, "Runs" show a tabular
 47 | view of the runs collected in the repository.
 48 | 
 49 | [![aim-run1]][aim-run1]
 50 | 
 51 |   [aim-run1]: ../../figs/aim_run1.png
 52 | 
 53 | From the view you can see the hash of each run
 54 | and scrolling horizontally you can glance 
 55 | over the metadata stored for each run.
 56 | 
 57 | [![aim-run2]][aim-run2]
 58 | 
 59 |   [aim-run2]: ../../figs/aim_run2.png
 60 | 
 61 | The search bar on the top of the page
 62 | allows to filter runs.
 63 | It accept python expression bounded
 64 | to a `run` entry point.
 65 | 
 66 | For instance, in the following example we filter
 67 | one specific run based on hyper parameters.
 68 | 
 69 | [![aim-run3]][aim-run3]
 70 | 
 71 |   [aim-run3]: ../../figs/aim_run3.png
 72 | 
 73 | 
 74 | !!! tip "Using the search box"
 75 |     
 76 |     The search box accept python expressions and `run.hparams` 
 77 |     is a dictionary of key-value pairs related to the different runs.
 78 | 
 79 |     As from the example, you can use the traditional python
 80 |     syntax of `dict[<key>] == <value>` to filter, but the search
 81 |     box supports also a dot-notated syntax `hparams.<key> == <value>`
 82 |     which has an autocomplete.
 83 | 
 84 |     In the example, the search is based on equality but any other
 85 |     python operation is allowed.
 86 | 
 87 | When clicking the hash of a run (e.g., the one we filtered)
 88 | we switch to a per-run view which
 89 | further details the collected metadata of the selected run.
 90 | 
 91 | [![aim-log1]][aim-log1]
 92 | 
 93 |   [aim-log1]: ../../figs/aim_log1.png
 94 | 
 95 | For instance, when scrolling at
 96 | the bottom of the per-run page
 97 | we can see that AIM details
 98 | 
 99 | * The specific git commit used when executing the run.
100 | 
101 | * The specific python packages and related versions
102 | available in the environment when executing the run.
103 | 
104 | Both are automatically tracked by AIM with
105 | no extra code required (beside activating the 
106 | their collection when creating the run).
107 | 
108 | [![aim-log2]][aim-log2]
109 | 
110 |   [aim-log2]: ../../figs/aim_log2.png
111 | 
112 | The per-run view offers a variety of information
113 | organized in multiple tabs.
114 | 
115 | For instance, the tab "Logs"
116 | details the console output.
117 | 
118 | [![aim-log3]][aim-log3]
119 | 
120 |   [aim-log3]: ../../figs/aim_log3.png
121 | 
122 | 


--------------------------------------------------------------------------------
/docs.material/modeling/aim_repos/index.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: Explore AIM repos
 3 | icon: simple/awsorganizations
 4 | ---
 5 | 
 6 | An AIM repository is merely a folder
 7 | where AIM stores a [rocksdb database](https://rocksdb.org/docs/getting-started.html)
 8 | (see [AIM reference doc](https://aimstack.readthedocs.io/en/v3.17.5/understanding/data_storage.html) for more info).
 9 | 
10 | AIM has great functionality for tracking metrics
11 | but has very little support for 
12 | tracking general artifacts outside
13 | console output and nor has native support for storing trained models
14 | files.
15 | 
16 | Hence tcbench complement AIM by collecting
17 | runs artifacts into run-specific folders.
18 | 
19 | Specifically, a tcbench repository has the following structure
20 | 
21 | ```
22 | <root>
23 | ├── .aim
24 | ├── artifacts
25 | │   ├── 001baa39ed8d4b8bb9966e94
26 | │   ├── 025830cb840b4f3f8f0a1625
27 | │   ├── 050bae064b5246f88e821a29
28 | ...
29 | └── campaign_summary
30 |     └── <campaign_id>
31 | ```
32 | 
33 | * Each subfolder of `/artifacts` collects
34 | the artificats of a specific run and
35 | is named with the hash of the run itself.
36 | 
37 | * The `/campaign_summary` subfolder collects
38 | reports generated by the [`aimrepo report` subcommand](/tcbench/modeling/aim_repos/aimrepo_subcmd/).
39 | 
40 | Investigating the content of one run artifact folder 
41 | 
42 | ```
43 | ls -1 <root>/artifacts/001baa39ed8d4b8bb9966e94
44 | ```
45 | 
46 | !!! note "Output"
47 | 
48 | 	```
49 | 	log.txt
50 | 	params.yml
51 | 	test-human_class_rep.csv
52 | 	test-human_conf_mtx.csv
53 | 	test-script_class_rep.csv
54 | 	test-script_conf_mtx.csv
55 | 	test-train-val-leftover_class_rep.csv
56 | 	test-train-val-leftover_conf_mtx.csv
57 | 	train_class_rep.csv
58 | 	train_conf_mtx.csv
59 | 	val_class_rep.csv
60 | 	val_conf_mtx.csv
61 | 	best_model_weights_split_2.pt
62 | 	```
63 | 
64 | For each run tcbench creates the following artifacts:
65 | 
66 | * `params.yml` is a YAML file collecting 
67 | parameters used when triggering a run, i.e., both
68 | the arguments explicitly defined on the command line,
69 | as well the ones with default values.
70 | 
71 | * `log.txt` collects the console output generated by the run.
72 | 
73 | * `<context>_class_rep.csv` contains a [classification report](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html). The filename is bounded to the context (i.e., train, val, test)
74 | used to generate it.
75 | 
76 | * `<context>_conf_mtx.csv` contains [confusion matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html). The filename is bounded to the context (i.e., train, val, test)
77 | used to generate it.
78 | 
79 | * `best_model_weights_split_<split-index>.pt` stores the weights of the best 
80 | trained pytorch model (for a deep learning model). The filename is bounded to the specific
81 | split index configured when triggering the run.
82 | 
83 | * `xgb_model_split_<split-index>.json` stores an XGBoost model (when training 
84 | via xgboost). The filename is bounded to the specific
85 | split index configured when triggering the run.
86 | 
87 | 


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_home-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_home-page.png


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log1.png


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log2.png


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log3.png


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run1.png


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run2.png


--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run3.png


--------------------------------------------------------------------------------
/docs.material/modeling/overview.md:
--------------------------------------------------------------------------------
 1 | # Modeling overview
 2 | 
 3 | When training ML/DL models, 
 4 | finding the right combination of data
 5 | preprocessing/splitting, algorithms and
 6 | hyper-parameters can be challenging.
 7 | Even more so when the modeling process 
 8 | aims to be [repeatable/replicable/reproducible](https://www.acm.org/publications/policies/artifact-review-badging).
 9 | 
10 | To ease this process is key to
11 | 
12 | * Collect __telemetry and metadata__.
13 | This includes both the parameters used to create models
14 | as well as lower level metrics such as the evolution of the
15 | training loss over time.
16 | 
17 | * Generate __artifacts__ such as 
18 | reports about the overall performance
19 | (e.g., confusion matrixes).
20 | 
21 | ## AIM stack tracking
22 | 
23 | `tcbench` integrates
24 | with [AIM stack](https://aimstack.io/), an
25 | open-source and self-hosted model
26 | tracking framework enabling logging of metrics 
27 | related to model training. Such telemetry 
28 | can later be explored via a [web interface](https://aimstack.readthedocs.io/en/latest/ui/overview.html)
29 | or [programmatically extracted](https://aimstack.readthedocs.io/en/latest/using/query_runs.html) via AIM SKD.
30 | 
31 | !!! info "__Why not using more popular frameworks?__"
32 | 
33 |     There are [many solutions for model tracking](https://neptune.ai/blog/best-ml-experiment-tracking-tools).
34 |     While frameworks such as __Weights & Biases__ or __Neptune.ai__
35 |     are extremely rich with features, unfortunately they typically 
36 |     are cloud-based solutions and not necessarily open-sourced.
37 | 
38 |     Alternative frameworks such as __Tensorboard__ and __MLFlow__
39 |     have only primitive functionalities with respect to AIM stack.
40 | 
41 |     Aim stack is sitting in the middle of this spectrum:
42 |     It is self-hosted (i.e., no need to push data to the cloud)
43 |     and provides nice data exploration features.
44 | 
45 | ## Runs and campaigns
46 | 
47 | AIM collects modeling metadata into __repositories__
48 | which are fully controlled by end-users:
49 | 
50 | * Repositories are not tied to specific projects.
51 | In other words, the end-user can store
52 | in a repository models completely unrelated to each other.
53 | 
54 | * There is no limit on the amount of repositories 
55 | can be created. 
56 | 
57 | `tcbench` tracks in an AIM repository two types of tasks,
58 | namely *runs* and *campaigns*:
59 | 
60 | * A __run__ corresponds to the training of an
61 | individual ML/DL model and is "minimal experiment object" used by AIM,
62 | i.e., any tracked metadata need to be
63 | associated to an AIM run.
64 | 
65 | * A __campaign__ corresponds to a
66 | collection of runs. 
67 | 
68 | AIM assign a unique hash code to a run,
69 | but a run object be further enriched with 
70 | extra metadata using AIM SDK or web UI.
71 | 
72 | A run can be enriched with both individual values
73 | (e.g., best validation loss observed or the final accuracy score)
74 | as well as series (e.g., loss value for each epoch).
75 | Morever, values can have a *context* to further
76 | specify semantic (e.g., define if a registered metric
77 | relates to trainining, validation or test).
78 | 
79 | While *run* is at term borrowed from AIM terminology,
80 | `tcbench` introduces *campaign* to 
81 | group runs which are semantically related
82 | and need to be summarized together (e.g., results
83 | collected across different train/val/test splits).
84 | 
85 | It follows that:
86 | 
87 | * Runs are the fundamental building block for collecting
88 | modeling results. But they are also the fundamental
89 | unit when developing/debugging modeling tasks.
90 | 
91 | * Campaigns bind multiple runs together. Hence,
92 | are meant to be stored in separate AIM repositories
93 | (although this is NOT a strict requirement for `tcbench`).
94 | 


--------------------------------------------------------------------------------
/docs.material/overrides/arrow-right-solid.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Pro 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><path d="M438.6 278.6c12.5-12.5 12.5-32.8 0-45.3l-160-160c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3L338.8 224 32 224c-17.7 0-32 14.3-32 32s14.3 32 32 32l306.7 0L233.4 393.4c-12.5 12.5-12.5 32.8 0 45.3s32.8 12.5 45.3 0l160-160z"/></svg>


--------------------------------------------------------------------------------
/docs.material/overrides/github-mark/github-mark.svg:
--------------------------------------------------------------------------------
1 | <svg width="98" height="96" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z" fill="#24292f"/></svg>


--------------------------------------------------------------------------------
/docs.material/overrides/main.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | {% if page.nb_url %}
 6 |     <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
 7 |         {% include ".icons/simple/jupyter.svg" %}
 8 | 		{% include ".icons/material/download.svg" %}
 9 |     </a>
10 | {% endif %}
11 | 
12 | {{ super() }}
13 | 
14 | <style>
15 | // Do whatever changes you need here
16 | 
17 | .jp-RenderedHTMLCommon p {
18 |     margin: 0pt;
19 | }
20 | 
21 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
22 |     display: none;
23 | }
24 | 
25 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
26 |     display: none;
27 | }
28 | 
29 | .jupyter-wrapper .jp-OutputArea-output pre {
30 |     border-left: solid 5px #e0e0e0;
31 |     padding-left: 5pt;
32 | }
33 | 
34 | </style>
35 | 
36 | <script>
37 | tags = document.getElementsByClassName("md-nav__link");
38 | for (var idx=0; idx < tags.length; idx++) {
39 |     //if (item.hasAttribute("href") && item.attributes["href"].textContent.endsWith("ipynb")) {
40 |     if (tags[idx].innerText.includes("HIDETHIS")) {
41 |         tags[idx].style.display = "none";
42 |     }
43 | }
44 | </script>
45 | 
46 | 
47 | {% endblock content %}
48 | 


--------------------------------------------------------------------------------
/docs.material/overrides/main.html.DEPRECATED:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | {% if page.nb_url %}
 6 |     <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
 7 |         {% include ".icons/simple/jupyter.svg" %}
 8 | 		{% include ".icons/material/download.svg" %}
 9 |     </a>
10 | {% endif %}
11 | 
12 | {{ super() }}
13 | 
14 | <style>
15 | // Do whatever changes you need here
16 | 
17 | .jp-RenderedHTMLCommon p {
18 |     margin: 0pt;
19 | }
20 | 
21 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
22 |     display: none;
23 | }
24 | 
25 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
26 |     display: none;
27 | }
28 | 
29 | .jupyter-wrapper .jp-OutputArea-output pre {
30 |     border-left: solid 5px #e0e0e0;
31 |     padding-left: 5pt;
32 | }
33 | 
34 | </style>
35 | 
36 | <script>
37 | tags = document.getElementsByClassName("md-nav__link");
38 | for (var idx=0; idx < tags.length; idx++) {
39 |     //if (item.hasAttribute("href") && item.attributes["href"].textContent.endsWith("ipynb")) {
40 |     if (tags[idx].innerText.includes("HIDETHIS")) {
41 |         tags[idx].style.display = "none";
42 |     }
43 | }
44 | </script>
45 | 
46 | 
47 | {% endblock content %}
48 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human.md:
--------------------------------------------------------------------------------
  1 | # Figure 10(b): Classwise evaluation on human.
  2 | 
  3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb)
  4 | 
  5 | 
  6 | ```python
  7 | import pathlib
  8 | 
  9 | import matplotlib as mpl
 10 | import matplotlib.pyplot as plt
 11 | import numpy as np
 12 | import pandas as pd
 13 | import seaborn as sns
 14 | import statsmodels.stats.api as sms
 15 | 
 16 | %matplotlib inline
 17 | %config InlineBackend.figure_format='retina'
 18 | ```
 19 | 
 20 | 
 21 | ```python
 22 | def compute_confidence_intervals(array, alpha=0.05):
 23 |     array = np.array(array)
 24 |     low, high = sms.DescrStatsW(array).tconfint_mean(alpha)
 25 |     mean = array.mean()
 26 |     ci = high - mean
 27 |     return ci
 28 | ```
 29 | 
 30 | 
 31 | ```python
 32 | path = pathlib.Path(
 33 |     "./campaigns/ucdavis-icdm19-git-repo-forked/artifacts/IncrementalSampling_Retraining(human-triggered)_20/"
 34 | )
 35 | 
 36 | class_reps = list(path.glob("*class_rep.csv"))
 37 | 
 38 | per_cls = np.stack(
 39 |     [
 40 |         pd.read_csv(file)[:5][["Accuracy", "precision", "recall", "f1-score"]].values
 41 |         for file in class_reps
 42 |     ],
 43 |     axis=0,
 44 | )
 45 | 
 46 | 
 47 | means = np.mean(per_cls, axis=0)
 48 | 
 49 | cis = np.zeros([per_cls.shape[1], per_cls.shape[2]])
 50 | for i in range(per_cls.shape[1]):
 51 |     for j in range(per_cls.shape[2]):
 52 |         cis[i, j] = compute_confidence_intervals(per_cls[:, i, j])
 53 | ```
 54 | 
 55 | 
 56 | ```python
 57 | X = ["G. Drive", "Youtube", "G. Doc", "G. Search", "G. Music"]
 58 | X_axis = np.arange(len(X))
 59 | 
 60 | plt.rcParams.update({'font.size': 16})
 61 | 
 62 | fig, ax = plt.subplots(figsize=(7, 6.5))
 63 | ax.bar(
 64 |     X_axis - 0.3,
 65 |     means[:, 0],
 66 |     0.2,
 67 |     label="Accuracy",
 68 |     yerr=cis[:, 0],
 69 |     ecolor="black",
 70 |     alpha=0.5,
 71 |     capsize=10,
 72 | )
 73 | ax.bar(
 74 |     X_axis - 0.1,
 75 |     means[:, 1],
 76 |     0.2,
 77 |     label="Precision",
 78 |     yerr=cis[:, 1],
 79 |     ecolor="black",
 80 |     alpha=0.5,
 81 |     capsize=10,
 82 | )
 83 | ax.bar(
 84 |     X_axis + 0.1,
 85 |     means[:, 2],
 86 |     0.2,
 87 |     label="Recall",
 88 |     yerr=cis[:, 2],
 89 |     ecolor="black",
 90 |     alpha=0.5,
 91 |     capsize=10,
 92 | )
 93 | ax.bar(
 94 |     X_axis + 0.3,
 95 |     means[:, 3],
 96 |     0.2,
 97 |     label="F1",
 98 |     yerr=cis[:, 3],
 99 |     ecolor="black",
100 |     alpha=0.5,
101 |     capsize=10,
102 | )
103 | 
104 | 
105 | plt.xticks(X_axis, X)
106 | ax.set_xlabel("Class")
107 | ax.set_ylabel("Value")
108 | ax.set_ylim([0, 1])
109 | plt.legend()
110 | ax.legend(bbox_to_anchor=(1, 1.02))
111 | plt.grid(axis="y")
112 | 
113 | plt.savefig("icdm19_fig3b_replicate_human_ci.png", dpi=300, bbox_inches="tight")
114 | ```
115 | 
116 | 
117 |     
118 | ![png](figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png)
119 |     
120 | 
121 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure1_flowpic_example.md:
--------------------------------------------------------------------------------
  1 | # Figure 1 : Example of a packet time series transformed into a flowpic representation for a randomly selected flow
  2 | 
  3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure1_flowpic_example.ipynb)
  4 | 
  5 | 
  6 | ```python
  7 | import numpy as np
  8 | import tcbench as tcb
  9 | from matplotlib.colors import LogNorm, Normalize
 10 | from tcbench import dataprep
 11 | ```
 12 | 
 13 | 
 14 | ```python
 15 | import matplotlib as mpl
 16 | import matplotlib.pyplot as plt
 17 | import seaborn as sns
 18 | 
 19 | %matplotlib inline
 20 | %config InlineBackend.figure_format='retina'
 21 | ```
 22 | 
 23 | 
 24 | ```python
 25 | import tcbench
 26 | ```
 27 | 
 28 | 
 29 | ```python
 30 | # load unfiltered dataset
 31 | FLOWPIC_BLOCK_DURATION = 15
 32 | ```
 33 | 
 34 | 
 35 | ```python
 36 | df = tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19)
 37 | ```
 38 | 
 39 | 
 40 | ```python
 41 | df_sample = df.sample(n=1, random_state=12345)
 42 | ser = df_sample.iloc[0]
 43 | ```
 44 | 
 45 | 
 46 | ```python
 47 | fig, axes = plt.subplots(
 48 |     nrows=1, ncols=5, figsize=(15, 3), gridspec_kw=dict(width_ratios=[1, 1, 1, 1, 1.1])
 49 | )
 50 | 
 51 | direction = np.where(ser["pkts_dir"] == 0, -1, 1)
 52 | y = ser["pkts_size"] * direction
 53 | x = ser["timetofirst"]
 54 | 
 55 | ax = axes[0]
 56 | ax.stem(
 57 |     np.where(y > 0, x, 0),
 58 |     np.where(y > 0, y, 0),
 59 |     markerfmt="",
 60 |     basefmt="lightgray",
 61 |     label="outgoing",
 62 |     linefmt="green",
 63 | )
 64 | ax.stem(
 65 |     np.where(y < 0, x, 0),
 66 |     np.where(y < 0, y, 0),
 67 |     markerfmt="",
 68 |     basefmt="lightgray",
 69 |     label="incoming",
 70 |     linefmt="lightgreen",
 71 | )
 72 | ax.legend()
 73 | ax.set_ylabel("packet size [B]")
 74 | ax.set_xlabel("time [s]")
 75 | 
 76 | rect = mpl.patches.Rectangle(
 77 |     (0, -1500), 15, 3000, linewidth=1, edgecolor="r", facecolor="none"
 78 | )
 79 | ax.add_patch(rect)
 80 | ax.annotate("first\n15s", (5, 1000))
 81 | 
 82 | for idx, flowpic_dim in enumerate((32, 64, 256, 512), start=1):
 83 |     # create a single sample dataset
 84 |     dset = dataprep.FlowpicDataset(
 85 |         data=df_sample,
 86 |         timetofirst_colname="timetofirst",
 87 |         pkts_size_colname="pkts_size",
 88 |         pkts_dir_colname="pkts_dir",
 89 |         target_colname="app",
 90 |         flowpic_dim=flowpic_dim,
 91 |         flowpic_block_duration=FLOWPIC_BLOCK_DURATION,
 92 |     )
 93 | 
 94 |     # fetch the flowpic representation
 95 |     flowpic, label = dset[0]
 96 | 
 97 |     # flattening the representation
 98 |     # to remove zero values (used for finding
 99 |     # min values)
100 |     flowpic = flowpic.numpy().squeeze()
101 |     flattened = flowpic.flatten()
102 |     flattened = flattened[flattened > 0]
103 | 
104 |     ax = axes[idx]
105 | 
106 |     sns.heatmap(
107 |         ax=ax,
108 |         data=np.where(flowpic == 0, np.nan, flowpic),
109 |         vmin=flattened.min(),
110 |         vmax=flattened.max(),
111 |         cbar=idx == 4,
112 |         cbar_kws=dict(fraction=0.046, pad=0.01, aspect=20, label="Normalized packets count"),
113 |         cmap=plt.get_cmap("viridis_r"),
114 |         square=True,
115 |         norm=LogNorm(flattened.min(), flattened.max()),
116 |     )
117 |     for _, spine in ax.spines.items():
118 |         spine.set_visible(True)
119 |         spine.set_linewidth(1)
120 |     ax.yaxis.set_ticks([], None)
121 |     ax.xaxis.set_ticks([], None)
122 |     ax.set_ylabel(f"packets size (bins of {1500 // flowpic_dim}B)")
123 |     ax.set_xlabel(f"time (bins of {FLOWPIC_BLOCK_DURATION / flowpic_dim * 1000:.1f}ms)")
124 |     ax.set_title(f"{flowpic_dim}x{flowpic_dim}")
125 | 
126 | plt.savefig("flowpic_example.png", dpi=300, bbox_inches="tight")
127 | ```
128 | 
129 | 
130 |     
131 | ![png](figure1_flowpic_example_files/figure1_flowpic_example_8_0.png)
132 |     
133 | 
134 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting.md:
--------------------------------------------------------------------------------
 1 | # Figure 3 : Average confusion matrixes for the 32x32 resolution across all experiments in Table 4
 2 | 
 3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting.ipynb)
 4 | 
 5 | 
 6 | ```python
 7 | import pathlib
 8 | 
 9 | import matplotlib as mpl
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import pandas as pd
13 | import seaborn as sns
14 | from sklearn.preprocessing import normalize
15 | 
16 | %matplotlib inline
17 | %config InlineBackend.figure_format='retina'
18 | ```
19 | 
20 | 
21 | ```python
22 | folder_artifacts = pathlib.Path(
23 |     "./campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/artifacts/"
24 | )
25 | ```
26 | 
27 | 
28 | ```python
29 | filelists = [
30 |     list(folder_artifacts.glob("*/test-human_conf_mtx.csv")),
31 |     list(folder_artifacts.glob("*/test-script_conf_mtx.csv")),
32 | ]
33 | 
34 | titles = ["human", "script"]
35 | 
36 | CLASSES = {
37 |     "google-doc": "G. Doc",
38 |     "google-drive": "G. Drive",
39 |     "google-music": "G. Music",
40 |     "google-search": "G. Search",
41 |     "youtube": "YouTube",
42 | }
43 | ```
44 | 
45 | 
46 | ```python
47 | plt.rcParams.update({"font.size": 14})
48 | 
49 | fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(9, 5))
50 | # cbar_ax = fig.add_axes([0.93, 0.2, 0.02, 0.6])  # (left, bottom, width, height)
51 | for i in range(2):
52 |     cm_mean = np.mean(
53 |         np.stack(
54 |             [pd.read_csv(file)[list(CLASSES.keys())].values for file in filelists[i]]
55 |         ),
56 |         axis=0,
57 |     )
58 | 
59 |     normed_cm_mean = normalize(cm_mean, axis=1, norm="l1")
60 | 
61 |     ax = axes[i]
62 | 
63 |     sns.heatmap(
64 |         data=normed_cm_mean,
65 |         ax=ax,
66 |         square=True,
67 |         cmap="viridis",
68 |         annot=True,
69 |         annot_kws={"fontsize": 11},
70 |         fmt=".2f",
71 |         vmin=0,
72 |         vmax=1,
73 |         cbar_kws=dict(fraction=0.046, pad=0.03, aspect=20),
74 |     )
75 | 
76 |     ax.set_xticklabels(list(CLASSES.values()), rotation=45, ha="right")
77 |     ax.set_yticklabels(list(CLASSES.values()), rotation=0)
78 | 
79 |     ax.set_title(titles[i])
80 | 
81 |     ax.set_ylabel("Ground Truth")
82 |     ax.set_xlabel("Prediction")
83 | 
84 | plt.tight_layout()
85 | plt.savefig("ucdavis_dataset_confusion_matrix.png", bbox_inches="tight", dpi=150)
86 | ```
87 | 
88 | 
89 |     
90 | ![png](figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png)
91 |     
92 | 
93 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size.md:
--------------------------------------------------------------------------------
  1 | # Figure 8: Investigating root cause of G1 discrepancies: Kernel density estimation of the per-class packet size distributions.
  2 | 
  3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size.ipynb)
  4 | 
  5 | 
  6 | ```python
  7 | import itertools
  8 | 
  9 | import numpy as np
 10 | import pandas as pd
 11 | ```
 12 | 
 13 | 
 14 | ```python
 15 | import matplotlib as mpl
 16 | import matplotlib.pyplot as plt
 17 | import seaborn as sns
 18 | from matplotlib.colors import LogNorm, Normalize
 19 | 
 20 | %matplotlib inline
 21 | %config InlineBackend.figure_format='retina'
 22 | ```
 23 | 
 24 | 
 25 | ```python
 26 | import tcbench as tcb
 27 | from tcbench import dataprep
 28 | ```
 29 | 
 30 | 
 31 | ```python
 32 | FLOWPIC_DIM = 32
 33 | FLOWPIC_BLOCK_DURATION = 15
 34 | ```
 35 | 
 36 | 
 37 | ```python
 38 | # load unfiltered dataset
 39 | dset = dataprep.FlowpicDataset(
 40 |     data=tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19),
 41 |     timetofirst_colname="timetofirst",
 42 |     pkts_size_colname="pkts_size",
 43 |     pkts_dir_colname="pkts_dir",
 44 |     target_colname="app",
 45 |     flowpic_dim=FLOWPIC_DIM,
 46 |     flowpic_block_duration=FLOWPIC_BLOCK_DURATION,
 47 | )
 48 | ```
 49 | 
 50 | 
 51 | ```python
 52 | REPLACE = {
 53 |     "google-doc": "G. Doc",
 54 |     "google-drive": "G. Drive",
 55 |     "google-music": "G. Music",
 56 |     "google-search": "G. Search",
 57 |     "youtube": "YouTube",
 58 |     "retraining-human-triggered": "Human",
 59 |     "retraining-script-triggered": "Script",
 60 | }
 61 | 
 62 | dset.df = dset.df.assign(
 63 |     app = dset.df["app"].replace(REPLACE),
 64 |     partition = dset.df["partition"].replace(REPLACE)
 65 | )
 66 | ```
 67 | 
 68 | 
 69 | ```python
 70 | TARGETS_LABEL = sorted(dset.df["app"].unique())
 71 | PARTITIONS_NAME = sorted(dset.df["partition"].unique())
 72 | ```
 73 | 
 74 | 
 75 | ```python
 76 | all_pkts_size = dict()
 77 | 
 78 | for partition_name in PARTITIONS_NAME:
 79 |     all_pkts_size[partition_name] = dict()
 80 | 
 81 |     for app in TARGETS_LABEL:
 82 |         df_tmp = dset.df[
 83 |             (dset.df["partition"] == partition_name) & (dset.df["app"] == app)
 84 |         ]
 85 | 
 86 |         l = []
 87 |         for idx in df_tmp.index:
 88 |             ser = df_tmp.loc[idx]
 89 |             indexes = np.where(ser["timetofirst"] < FLOWPIC_BLOCK_DURATION)[0]
 90 |             pkts_size = ser["pkts_size"][indexes]
 91 |             l.append(pkts_size)
 92 |         all_pkts_size[partition_name][app] = np.concatenate(l)
 93 | ```
 94 | 
 95 | 
 96 | ```python
 97 | # WARNING: computing the KDE will take a few minutes
 98 | 
 99 | fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(15, 5))
100 | 
101 | line_props = {
102 |     "pretraining": dict(linestyle="-"),
103 |     "Script": dict(
104 |         linestyle=(0, (1, 1))
105 |     ), 
106 |     "Human": dict(linestyle=(0, (1, 1))),
107 | }
108 | 
109 | for ax, app in zip(axes, TARGETS_LABEL):
110 |     for partition_name in [
111 |         "pretraining",
112 |         "Script",
113 |         "Human",
114 |     ]:
115 |         props = line_props[partition_name]
116 |         sns.kdeplot(
117 |             ax=ax,
118 |             data=all_pkts_size[partition_name][app],
119 |             linewidth=2,
120 |             label=partition_name,
121 |             **props,
122 |             fill=True,
123 |             alpha=0.1
124 |         )
125 |     ax.legend(bbox_to_anchor=(0.5, 1.5), loc="upper center")
126 |     ax.set_title(app, fontsize=10)
127 |     ax.set_xlim((-500, 1800))
128 |     ax.set_xlabel("packet size")
129 |     ax.set_ylabel("kde")
130 | 
131 | plt.tight_layout()
132 | plt.savefig("ucdavid-icdm19_kde_pkts_size.png", dpi=300, bbox_inches='tight')
133 | ```
134 | 
135 | 
136 |     
137 | ![png](figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png)
138 |     
139 | 
140 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table10_ucdavis-icdm19_tukey.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <style>
  3 | code.outputcode {
  4 |     background-color: white;
  5 |     border-left: solid 2px #4051b5;
  6 |     line-height:normal;
  7 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
  8 | }
  9 | pre.outputcode {
 10 |     background-color: white;
 11 |     border-left: solid 2px #4051b5;
 12 |     line-height:normal;
 13 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
 14 |     padding-left: 15px;
 15 | }
 16 | .ansi-red-fg {
 17 |   color: #e75c58;
 18 | }
 19 | .ansi-blue-fg {
 20 |   color: #208ffb;
 21 | }
 22 | </style>
 23 | # Table 10: Performance comparison across augmentations for different flowpic sizes.
 24 | 
 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table10_ucdavis-icdm19_tukey.ipynb)
 26 | 
 27 | 
 28 | ```python
 29 | import pathlib
 30 | 
 31 | import numpy as np
 32 | import pandas as pd
 33 | from scipy.stats import tukey_hsd
 34 | ```
 35 | 
 36 | ```python
 37 | folder = pathlib.Path(
 38 |     "campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/augment-at-loading-with-dropout"
 39 | )
 40 | df = pd.concat(
 41 |     (
 42 |         pd.read_parquet(folder / "runsinfo_flowpic_dim_1500.parquet"),
 43 |         pd.read_parquet(folder / "runsinfo_flowpic_dim_64.parquet"),
 44 |         pd.read_parquet(folder / "runsinfo_flowpic_dim_32.parquet"),
 45 |     )
 46 | )
 47 | ```
 48 | 
 49 | ```python
 50 | # df = pd.read_parquet('campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/1684447037/merged_runsinfo.parquet')
 51 | ```
 52 | 
 53 | ```python
 54 | df_script = df[df["test_split_name"] == "test-script"]
 55 | 
 56 | acc_32 = df_script[df_script["flowpic_dim"] == 32]["acc"].values.tolist()
 57 | acc_64 = df_script[df_script["flowpic_dim"] == 64]["acc"].values.tolist()
 58 | acc_1500 = df_script[df_script["flowpic_dim"] == 1500]["acc"].values.tolist()
 59 | ```
 60 | 
 61 | ```python
 62 | res = tukey_hsd(acc_32, acc_64, acc_1500)
 63 | ```
 64 | 
 65 | ```python
 66 | df = pd.DataFrame(
 67 |     np.array([res.pvalue[0, 1], res.pvalue[0, 2], res.pvalue[1, 2]]).reshape(-1, 1),
 68 |     columns=["pvalue"],
 69 |     index=pd.MultiIndex.from_arrays(
 70 |         [("32x32", "32x32", "64x64"), ("64x64", "1500x1500", "1500x1500")]
 71 |     ),
 72 | )
 73 | df = df.assign(is_different=df["pvalue"] < 0.05)
 74 | ```
 75 | 
 76 | ```python
 77 | df
 78 | ```
 79 | 
 80 | 
 81 | 
 82 | <div class="md-typeset__scrollwrap">
 83 | <div class="md-typeset__table">
 84 | <table>
 85 | <thead>
 86 | <tr style="text-align: right;">
 87 | <th></th>
 88 | <th></th>
 89 | <th>pvalue</th>
 90 | <th>is_different</th>
 91 | </tr>
 92 | </thead>
 93 | <tbody>
 94 | <tr>
 95 | <th rowspan="2" valign="top">32x32</th>
 96 | <th>64x64</th>
 97 | <td>5.772842e-01</td>
 98 | <td>False</td>
 99 | </tr>
100 | <tr>
101 | <th>1500x1500</th>
102 | <td>1.936038e-06</td>
103 | <td>True</td>
104 | </tr>
105 | <tr>
106 | <th>64x64</th>
107 | <th>1500x1500</th>
108 | <td>1.044272e-08</td>
109 | <td>True</td>
110 | </tr>
111 | </tbody>
112 | </table>
113 | </div>
114 | </div>
115 | 
116 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table3_xgboost_baseline.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <style>
  3 | code.outputcode {
  4 |     background-color: white;
  5 |     border-left: solid 2px #4051b5;
  6 |     line-height:normal;
  7 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
  8 | }
  9 | pre.outputcode {
 10 |     background-color: white;
 11 |     border-left: solid 2px #4051b5;
 12 |     line-height:normal;
 13 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
 14 |     padding-left: 15px;
 15 | }
 16 | .ansi-red-fg {
 17 |   color: #e75c58;
 18 | }
 19 | .ansi-blue-fg {
 20 |   color: #208ffb;
 21 | }
 22 | </style>
 23 | # Table 3: (G0) Baseline ML performance without augmentation in a supervised setting.
 24 | 
 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table3_xgboost_baseline.ipynb)
 26 | 
 27 | 
 28 | ```python
 29 | import pandas as pd
 30 | ```
 31 | 
 32 | ```python
 33 | df = pd.read_csv(
 34 |     "./campaigns/ucdavis-icdm19/xgboost/noaugmentation-flowpic/campaign_summary/noaugmentation-flowpic/summary_flowpic_dim_32.csv",
 35 |     header=[0, 1],
 36 |     index_col=[0, 1],
 37 | )
 38 | ```
 39 | 
 40 | ```python
 41 | # reformatting
 42 | df_tmp = df["acc"][["mean", "ci95"]].round(2)
 43 | df_tmp.loc[["test-script", "test-human"]].droplevel(1, axis=0).astype(float).round(2)
 44 | ```
 45 | 
 46 | 
 47 | 
 48 | <div class="md-typeset__scrollwrap">
 49 | <div class="md-typeset__table">
 50 | <table>
 51 | <thead>
 52 | <tr style="text-align: right;">
 53 | <th></th>
 54 | <th>mean</th>
 55 | <th>ci95</th>
 56 | </tr>
 57 | </thead>
 58 | <tbody>
 59 | <tr>
 60 | <th>test-script</th>
 61 | <td>96.80</td>
 62 | <td>0.37</td>
 63 | </tr>
 64 | <tr>
 65 | <th>test-human</th>
 66 | <td>73.65</td>
 67 | <td>2.14</td>
 68 | </tr>
 69 | </tbody>
 70 | </table>
 71 | </div>
 72 | </div>
 73 | 
 74 | 
 75 | 
 76 | ```python
 77 | df = pd.read_csv(
 78 |     "./campaigns/ucdavis-icdm19/xgboost/noaugmentation-timeseries/campaign_summary/noaugmentation-timeseries/summary_max_n_pkts_10.csv",
 79 |     header=[0, 1],
 80 |     index_col=[0, 1],
 81 | )
 82 | ```
 83 | 
 84 | ```python
 85 | # reformatting
 86 | df_tmp = df["acc"][["mean", "ci95"]].round(2)
 87 | df_tmp.loc[["test-script", "test-human"]].droplevel(1, axis=0).astype(float).round(2)
 88 | ```
 89 | 
 90 | 
 91 | 
 92 | <div class="md-typeset__scrollwrap">
 93 | <div class="md-typeset__table">
 94 | <table>
 95 | <thead>
 96 | <tr style="text-align: right;">
 97 | <th></th>
 98 | <th>mean</th>
 99 | <th>ci95</th>
100 | </tr>
101 | </thead>
102 | <tbody>
103 | <tr>
104 | <th>test-script</th>
105 | <td>94.53</td>
106 | <td>0.56</td>
107 | </tr>
108 | <tr>
109 | <th>test-human</th>
110 | <td>66.91</td>
111 | <td>1.40</td>
112 | </tr>
113 | </tbody>
114 | </table>
115 | </div>
116 | </div>
117 | 
118 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table5_simclr_dropout_and_projectionlayer.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <style>
  3 | code.outputcode {
  4 |     background-color: white;
  5 |     border-left: solid 2px #4051b5;
  6 |     line-height:normal;
  7 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
  8 | }
  9 | pre.outputcode {
 10 |     background-color: white;
 11 |     border-left: solid 2px #4051b5;
 12 |     line-height:normal;
 13 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
 14 |     padding-left: 15px;
 15 | }
 16 | .ansi-red-fg {
 17 |   color: #e75c58;
 18 | }
 19 | .ansi-blue-fg {
 20 |   color: #208ffb;
 21 | }
 22 | </style>
 23 | # Table 5: Impact of dropout and SimCLR projection layer dimension on fine-tuning.
 24 | 
 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table5_simclr_dropout_and_projectionlayer.ipynb)
 26 | 
 27 | 
 28 | ```python
 29 | import itertools
 30 | 
 31 | import pandas as pd
 32 | ```
 33 | 
 34 | ```python
 35 | df = pd.read_csv(
 36 |     "campaigns/ucdavis-icdm19/simclr-dropout-and-projection/campaign_summary/simclr-dropout-and-projection/summary_flowpic_dim_32.csv",
 37 |     header=[0, 1],
 38 |     index_col=[0, 1, 2],
 39 | )
 40 | 
 41 | df = df["acc"][["mean", "ci95"]]
 42 | df = df.T
 43 | df.columns.set_names("test_split_name", level=0, inplace=True)
 44 | df.columns.set_names("projection_layer_dim", level=1, inplace=True)
 45 | df.columns.set_names("with_dropout", level=2, inplace=True)
 46 | df = df.reorder_levels(
 47 |     ["test_split_name", "with_dropout", "projection_layer_dim"], axis=1
 48 | )
 49 | 
 50 | df = df[list(itertools.product(["test-script", "test-human"], [True, False], [30, 84]))]
 51 | df = df.round(2)
 52 | 
 53 | df.to_csv("table5_simclr_dropout_and_projectionlayer.csv")
 54 | df
 55 | ```
 56 | 
 57 | 
 58 | 
 59 | <div class="md-typeset__scrollwrap">
 60 | <div class="md-typeset__table">
 61 | <table>
 62 | <thead>
 63 | <tr>
 64 | <th>test_split_name</th>
 65 | <th colspan="4" halign="left">test-script</th>
 66 | <th colspan="4" halign="left">test-human</th>
 67 | </tr>
 68 | <tr>
 69 | <th>with_dropout</th>
 70 | <th colspan="2" halign="left">True</th>
 71 | <th colspan="2" halign="left">False</th>
 72 | <th colspan="2" halign="left">True</th>
 73 | <th colspan="2" halign="left">False</th>
 74 | </tr>
 75 | <tr>
 76 | <th>projection_layer_dim</th>
 77 | <th>30</th>
 78 | <th>84</th>
 79 | <th>30</th>
 80 | <th>84</th>
 81 | <th>30</th>
 82 | <th>84</th>
 83 | <th>30</th>
 84 | <th>84</th>
 85 | </tr>
 86 | </thead>
 87 | <tbody>
 88 | <tr>
 89 | <th>mean</th>
 90 | <td>91.81</td>
 91 | <td>92.02</td>
 92 | <td>92.18</td>
 93 | <td>92.54</td>
 94 | <td>72.12</td>
 95 | <td>73.31</td>
 96 | <td>74.69</td>
 97 | <td>74.35</td>
 98 | </tr>
 99 | <tr>
100 | <th>ci95</th>
101 | <td>0.38</td>
102 | <td>0.36</td>
103 | <td>0.31</td>
104 | <td>0.33</td>
105 | <td>1.37</td>
106 | <td>1.04</td>
107 | <td>1.13</td>
108 | <td>1.38</td>
109 | </tr>
110 | </tbody>
111 | </table>
112 | </div>
113 | </div>
114 | 
115 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table6_simclr_other_augmentation_pairs.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <style>
  3 | code.outputcode {
  4 |     background-color: white;
  5 |     border-left: solid 2px #4051b5;
  6 |     line-height:normal;
  7 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
  8 | }
  9 | pre.outputcode {
 10 |     background-color: white;
 11 |     border-left: solid 2px #4051b5;
 12 |     line-height:normal;
 13 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
 14 |     padding-left: 15px;
 15 | }
 16 | .ansi-red-fg {
 17 |   color: #e75c58;
 18 | }
 19 | .ansi-blue-fg {
 20 |   color: #208ffb;
 21 | }
 22 | </style>
 23 | # Table 6: Comparing the fine-tuning performance when using different pairs of augmentation for pretraining.
 24 | 
 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table6_simclr_other_augmentation_pairs.ipynb)
 26 | 
 27 | 
 28 | ```python
 29 | import itertools
 30 | 
 31 | import pandas as pd
 32 | ```
 33 | 
 34 | ```python
 35 | RENAME = {
 36 |     "colorjitter": "Color jitter",
 37 |     "timeshift": "Time shift",
 38 |     "changertt": "Change RTT",
 39 |     "rotate": "Rotate",
 40 |     "packetloss": "Packet loss",
 41 | }
 42 | ```
 43 | 
 44 | ```python
 45 | df = pd.read_csv(
 46 |     "./campaigns/ucdavis-icdm19/simclr-other-augmentation-pairs/campaign_summary/simclr-other-augmentation-pairs/summary_flowpic_dim_32.csv",
 47 |     header=[0, 1],
 48 |     index_col=[0, 1],
 49 | )
 50 | 
 51 | df = df["acc"][["mean", "ci95"]].round(2)
 52 | df = df.reset_index()
 53 | df = df.assign(
 54 |     aug1=df["level_1"].apply(eval).str[0],
 55 |     aug2=df["level_1"].apply(eval).str[1],
 56 | )
 57 | df = df.drop("level_1", axis=1)
 58 | df = df.rename({"level_0": "test_split_name"}, axis=1)
 59 | df = df.replace(RENAME)
 60 | df = df.pivot(index="test_split_name", columns=["aug1", "aug2"])
 61 | df.columns.set_names(["stat", "aug1", "aug2"], inplace=True)
 62 | df = df.reorder_levels(["aug1", "aug2", "stat"], axis=1)
 63 | df.columns.set_names(["", "", ""], inplace=True)
 64 | df.index.name = None
 65 | 
 66 | df = df[
 67 |     list(itertools.product(["Change RTT"], ["Time shift"], ["mean", "ci95"]))
 68 |     + list(
 69 |         itertools.product(["Packet loss"], ["Color jitter", "Rotate"], ["mean", "ci95"])
 70 |     )
 71 |     + list(
 72 |         itertools.product(["Change RTT"], ["Color jitter", "Rotate"], ["mean", "ci95"])
 73 |     )
 74 |     + list(itertools.product(["Color jitter"], ["Rotate"], ["mean", "ci95"]))
 75 | ]
 76 | df = df.loc[["test-script", "test-human"]]
 77 | 
 78 | df.to_csv("table5_simclr_other_augmentation_pairs.csv")
 79 | df
 80 | ```
 81 | 
 82 | 
 83 | 
 84 | <div class="md-typeset__scrollwrap">
 85 | <div class="md-typeset__table">
 86 | <table>
 87 | <thead>
 88 | <tr>
 89 | <th></th>
 90 | <th colspan="2" halign="left">Change RTT</th>
 91 | <th colspan="4" halign="left">Packet loss</th>
 92 | <th colspan="4" halign="left">Change RTT</th>
 93 | <th colspan="2" halign="left">Color jitter</th>
 94 | </tr>
 95 | <tr>
 96 | <th></th>
 97 | <th colspan="2" halign="left">Time shift</th>
 98 | <th colspan="2" halign="left">Color jitter</th>
 99 | <th colspan="2" halign="left">Rotate</th>
100 | <th colspan="2" halign="left">Color jitter</th>
101 | <th colspan="2" halign="left">Rotate</th>
102 | <th colspan="2" halign="left">Rotate</th>
103 | </tr>
104 | <tr>
105 | <th></th>
106 | <th>mean</th>
107 | <th>ci95</th>
108 | <th>mean</th>
109 | <th>ci95</th>
110 | <th>mean</th>
111 | <th>ci95</th>
112 | <th>mean</th>
113 | <th>ci95</th>
114 | <th>mean</th>
115 | <th>ci95</th>
116 | <th>mean</th>
117 | <th>ci95</th>
118 | </tr>
119 | </thead>
120 | <tbody>
121 | <tr>
122 | <th>test-script</th>
123 | <td>92.18</td>
124 | <td>0.31</td>
125 | <td>90.17</td>
126 | <td>0.41</td>
127 | <td>91.94</td>
128 | <td>0.3</td>
129 | <td>91.72</td>
130 | <td>0.36</td>
131 | <td>92.38</td>
132 | <td>0.32</td>
133 | <td>91.79</td>
134 | <td>0.34</td>
135 | </tr>
136 | <tr>
137 | <th>test-human</th>
138 | <td>74.69</td>
139 | <td>1.13</td>
140 | <td>73.67</td>
141 | <td>1.24</td>
142 | <td>71.22</td>
143 | <td>1.2</td>
144 | <td>75.56</td>
145 | <td>1.23</td>
146 | <td>74.33</td>
147 | <td>1.26</td>
148 | <td>71.64</td>
149 | <td>1.23</td>
150 | </tr>
151 | </tbody>
152 | </table>
153 | </div>
154 | </div>
155 | 
156 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table9_icdm_finetuning_per_class_metrics_on_human.md:
--------------------------------------------------------------------------------
  1 | 
  2 | <style>
  3 | code.outputcode {
  4 |     background-color: white;
  5 |     border-left: solid 2px #4051b5;
  6 |     line-height:normal;
  7 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
  8 | }
  9 | pre.outputcode {
 10 |     background-color: white;
 11 |     border-left: solid 2px #4051b5;
 12 |     line-height:normal;
 13 |     font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace;
 14 |     padding-left: 15px;
 15 | }
 16 | .ansi-red-fg {
 17 |   color: #e75c58;
 18 | }
 19 | .ansi-blue-fg {
 20 |   color: #208ffb;
 21 | }
 22 | </style>
 23 | # Table 9: Macro-average Accuracy with different retraining dataset and different sampling methods
 24 | 
 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table9_icdm_finetuning_per_class_metrics_on_human.ipynb)
 26 | 
 27 | 
 28 | ```python
 29 | import pathlib
 30 | 
 31 | import matplotlib as mpl
 32 | import matplotlib.pyplot as plt
 33 | import numpy as np
 34 | import pandas as pd
 35 | import seaborn as sns
 36 | import statsmodels.stats.api as sms
 37 | 
 38 | %matplotlib inline
 39 | %config InlineBackend.figure_format='retina'
 40 | ```
 41 | 
 42 | ```python
 43 | def compute_confidence_intervals(array, alpha=0.05):
 44 |     array = np.array(array)
 45 |     low, high = sms.DescrStatsW(array).tconfint_mean(alpha)
 46 |     mean = array.mean()
 47 |     ci = high - mean
 48 |     return ci
 49 | ```
 50 | 
 51 | ```python
 52 | path = pathlib.Path("./campaigns/ucdavis-icdm19-git-repo-forked/artifacts/")
 53 | 
 54 | class_repss = list(path.glob("*10/"))
 55 | ```
 56 | 
 57 | ```python
 58 | data = dict()
 59 | 
 60 | for path in class_repss:
 61 |     if "script" in str(path):
 62 |         class_reps = list(path.glob("*class_rep.csv"))
 63 |         accs = [pd.read_csv(file).iloc[6].values[2] for file in class_reps]
 64 | 
 65 |         augmentation_name = path.name.split("_")[0].replace("Sampling", "")
 66 |         data[augmentation_name] = (
 67 |             np.mean(accs) * 100,
 68 |             compute_confidence_intervals(accs),
 69 |         )
 70 | 
 71 | df_script = pd.DataFrame(data, index=["mean", "ci95"]).T.round(2)
 72 | df_script.columns = pd.MultiIndex.from_arrays([["script", "script"], df_script.columns])
 73 | # df_script
 74 | ```
 75 | 
 76 | ```python
 77 | data = dict()
 78 | for path in class_repss:
 79 |     if "human" in str(path):
 80 |         class_reps = list(path.glob("*class_rep.csv"))
 81 |         accs = [pd.read_csv(file).iloc[6].values[2] for file in class_reps]
 82 | 
 83 |         augmentation_name = path.name.split("_")[0].replace("Sampling", "")
 84 |         data[augmentation_name] = (
 85 |             np.mean(accs) * 100,
 86 |             compute_confidence_intervals(accs),
 87 |         )
 88 | 
 89 | df_human = pd.DataFrame(data, index=["mean", "ci95"]).T.round(2)
 90 | df_human.columns = pd.MultiIndex.from_arrays([["human", "human"], df_human.columns])
 91 | ```
 92 | 
 93 | ```python
 94 | df_tmp = pd.concat((df_script, df_human), axis=1).T
 95 | display(df_tmp)
 96 | df_tmp.to_csv("icdm_finetuning_per_class_metrics_on_human.csv")
 97 | ```
 98 | 
 99 | <div class="md-typeset__scrollwrap">
100 | <div class="md-typeset__table">
101 | <table>
102 | <thead>
103 | <tr style="text-align: right;">
104 | <th></th>
105 | <th></th>
106 | <th>FixedStep</th>
107 | <th>Random</th>
108 | <th>Incremental</th>
109 | </tr>
110 | </thead>
111 | <tbody>
112 | <tr>
113 | <th rowspan="2" valign="top">script</th>
114 | <th>mean</th>
115 | <td>87.11</td>
116 | <td>94.63</td>
117 | <td>96.22</td>
118 | </tr>
119 | <tr>
120 | <th>ci95</th>
121 | <td>0.09</td>
122 | <td>0.02</td>
123 | <td>0.01</td>
124 | </tr>
125 | <tr>
126 | <th rowspan="2" valign="top">human</th>
127 | <th>mean</th>
128 | <td>82.60</td>
129 | <td>87.29</td>
130 | <td>92.56</td>
131 | </tr>
132 | <tr>
133 | <th>ci95</th>
134 | <td>0.03</td>
135 | <td>0.04</td>
136 | <td>0.03</td>
137 | </tr>
138 | </tbody>
139 | </table>
140 | </div>
141 | </div>
142 | 


--------------------------------------------------------------------------------
/docs.material/papers/imc23/pytest.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | title: pytest
 3 | icon: simple/pytest
 4 | ---
 5 | 
 6 | # ML unit testing
 7 | 
 8 | Multiple tests are available to verify different functionalities
 9 | for either tcbench and the modeling campaigns created.
10 | 
11 | Tests are not bundled with pypi installation. Rather, you need
12 | to follow the procedure described in the [artifact page](/tcbench/papers/imc23/artifacts/)
13 | to fetch the source code and install all artifacts and datasets.
14 | 
15 | Tests are coded via [`pytest` :simple-pytest:](https://docs.pytest.org/en/7.4.x/)
16 | and are available under the `/tests` folder.
17 | 
18 | !!! warning "Tests trigger model training"
19 | 
20 |     Most of the test verify that the models train for
21 |     the campaigns described in the paper are indeed reproducible, i.e.,
22 |     the provide the exact same models obtained for the paper.
23 | 
24 |     To do so, the pytest resources fetched from figshare 
25 |     contains a subset of reference models so the test
26 |     trigger the modeling for those scenarios and check
27 |     that what trained matches what created for the paper.
28 | 
29 |     So be aware that running these tests might take a while
30 |     depending on your local environment.
31 | 
32 | 
33 | To trigger all tests run
34 | 
35 | ```
36 | pytest tests
37 | ```
38 | 
39 | !!! note "Output"
40 |     ```
41 |     ============================ test session starts ======================================
42 |     platform linux -- Python 3.10.13, pytest-7.4.2, pluggy-1.3.0
43 |     rootdir: /tmp/tcbench-pip/tcbench
44 |     plugins: anyio-3.7.1, helpers-namespace-2021.12.29
45 |     collected 101 items
46 | 
47 |     tests/test_augmentations_at_loading.py ...........                               [ 10%]
48 |     tests/test_augmentations_at_loading_xgboost.py .                                 [ 11%]
49 |     tests/test_cli_command_campaign.py ....                                          [ 15%]
50 |     tests/test_cli_command_singlerun.py ............                                 [ 27%]
51 |     tests/test_contrastive_learning_and_finetune.py ..                               [ 29%]
52 |     tests/test_libtcdatasets_datasets_utils.py .................                     [ 46%]
53 |     tests/test_modeling_backbone.py ................                                 [ 62%]
54 |     tests/test_modeling_dataprep.py ..................................               [ 96%]
55 |     tests/test_modeling_methods.py ....                                              [100%]
56 |     ============================== 101 passed, 8 warnings in 6523.55s (1:48:43) =========================
57 |     ```
58 | 


--------------------------------------------------------------------------------
/docs.material/papers/index.md:
--------------------------------------------------------------------------------
 1 | # Research articles featuring tcbench
 2 | 
 3 | [__Replication: Contrastive Learning and Data Augmentation in Traffic Classification__](/tcbench/papers/imc23)
 4 | <br>
 5 | *A. Finamore, C. Wang, J. Krolikowki, J. M. Navarro, F. Cheng, D. Rossi*, 
 6 | <br> ACM Internet Measurement Conference (IMC), 2023
 7 | <br> [:material-hexagon-outline: __Artifacts__](/tcbench/papers/imc23/artifacts) [:fontawesome-regular-file-pdf: __PDF__](https://arxiv.org/pdf/2309.09733)
 8 | 
 9 | === "Bibtex"
10 |     ```
11 |     @misc{finamore2023contrastive,
12 |       title={
13 |         Contrastive Learning and Data Augmentation 
14 |         in Traffic Classification Using a 
15 |         Flowpic Input Representation
16 |       }, 
17 |       author={
18 |         Alessandro Finamore and 
19 |         Chao Wang and 
20 |         Jonatan Krolikowski 
21 |         and Jose M. Navarro 
22 |         and Fuxing Chen and 
23 |         Dario Rossi
24 |       },
25 |       year={2023},
26 |       eprint={2309.09733},
27 |       archivePrefix={arXiv},
28 |       primaryClass={cs.LG}
29 |     }
30 |     ```
31 | 
32 | === "Abstract"
33 |     Over the last years we witnessed a renewed interest towards
34 |     Traffic Classification (TC) captivated by the rise of Deep
35 |     Learning (DL). Yet, the vast majority of TC literature lacks
36 |     code artifacts, performance assessments across datasets and
37 |     reference comparisons against Machine Learning (ML) meth-
38 |     ods. Among those works, a recent study from IMC'22 [17] is
39 |     worth of attention since it adopts recent DL methodologies
40 |     (namely, few-shot learning, self-sup ervision via contrastive
41 |     learning and data augmentation) appealing for networking as
42 |     they enable to learn from a few samples and transfer across
43 |     datasets. The main result of [17] on the UCDAVIS19, ISCX-VPN
44 |     and ISCX-Tor datasets is that, with such DL methodologies,
45 |     100 input samples are enough to achieve very high accuracy
46 |     using an input representation called "flowpic" (i.e., a per-flow
47 |     2d histograms of the packets size evolution over time).
48 |     In this paper (i) we rep roduce [17] on the same datasets
49 |     and (ii) we rep licate its most salient aspect (the importance
50 |     of data augmentation) on three additional public datasets,
51 |     MIRAGE-19, MIRAGE-22 and UTMOBILENET21. While we con-
52 |     firm most of the original results, we also found a 20% ac-
53 |     curacy drop on some of the investigated scenarios due to
54 |     a data shift of the original dataset that we uncovered. Ad-
55 |     ditionally, our study validates that the data augmentation
56 |     strategies studied in [17] perform well on other datasets too.
57 |     In the spirit of reproducibility and replicability we make all
58 |     artifacts (code and data) available at [10].
59 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/overview.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/tcbench/api/overview.md


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_clickutils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.clickutils
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_aimrepo.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_aimrepo
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_campaign.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_campaign
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_datasets.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_datasets
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_singlerun.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_singlerun
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_richutils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.richutils
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets.md:
--------------------------------------------------------------------------------
1 | ## Generating train/val/test splits
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_datasets_utils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.datasets_utils  
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_mirage19_json_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage19_json_to_parquet  
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_mirage22_json_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage22_json_to_parquet  
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_mirage19_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage19_generate_splits  
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_mirage22_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage22_generate_splits 
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.ucdavis_icdm19_generate_splits
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.utmobilenet21_generate_splits
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.ucdavis_icdm19_csv_to_parquet   
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_utmobilenet21_csv_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.utmobilenet21_csv_to_parquet
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_aimutils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.aimutils
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_augmentation.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.augmentation
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_backbone.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.backbone
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_dataprep.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.dataprep
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_losses.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.losses
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_methods.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.methods
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_augmentations_at_loading.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_augmentations_at_loading
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_augmentations_at_loading_xgboost.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_augmentations_at_loading_xgboost
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_campaign_augmentations_at_loading.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_campaign_augmentations_at_loading
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_campaign_augmentations_at_loading_xgboost.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_campaign_augmentations_at_loading_xgboost
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_campaign_contrastive_learning_and_finetune.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_campaign_contrastive_learning_and_finetune
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_contrastive_learning_and_finetune.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_contrastive_learning_and_finetune
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_utils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.utils
2 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/cli_intro.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | icon: octicons/terminal-16
 3 | title: CLI Intro
 4 | ---
 5 | 
 6 | # CLI Introduction
 7 | 
 8 | tcbench can be used for as SDK and
 9 | from the command line.
10 | 
11 | When installing tcbench you install
12 | also a `tcbench` command line script
13 | created via [:material-cursor-default: click](https://click.palletsprojects.com/en/8.1.x/) 
14 | and [:material-language-python: rich](https://github.com/Textualize/rich).
15 | 
16 | For instance
17 | ```
18 | tcbench --help
19 | ```
20 | 
21 | !!! info "Output"
22 |     ```bash
23 |      Usage: tcbench [OPTIONS] COMMAND [ARGS]...
24 | 
25 |     ╭─ Options ────────────────────────────────────────────────────────────────────────────────╮
26 |     │ --version      Show tcbench version and exit.                                            │
27 |     │ --help         Show this message and exit.                                               │
28 |     ╰──────────────────────────────────────────────────────────────────────────────────────────╯
29 |     ╭─ Commands ───────────────────────────────────────────────────────────────────────────────╮
30 |     │ aimrepo         Investigate AIM repository content.                                      │
31 |     │ campaign        Triggers a modeling campaign.                                            │
32 |     │ datasets        Install/Remove traffic classification datasets.                          │
33 |     │ run             Triggers a modeling run.                                                 │
34 |     │ tree            show the command tree of your CLI.                                       │
35 |     ╰──────────────────────────────────────────────────────────────────────────────────────────╯
36 |     ```
37 | 
38 | The commands are organized in a nested structure which
39 | you can visualize using
40 | 
41 | ```
42 | tcbench tree
43 | ```
44 | 
45 | !!! info "Output"
46 |     ```bash
47 |     main
48 |     ├── aimrepo - Investigate AIM repository content.
49 |     │   ├── ls - List a subset of properties of each run.
50 |     │   ├── merge - Coalesce different AIM repos into a single new repo.
51 |     │   ├── properties - List properties across all runs.
52 |     │   └── report - Summarize runs performance metrics.
53 |     ├── campaign - Triggers a modeling campaign.
54 |     │   ├── augment-at-loading - Modeling by applying data augmentation when loading the training set.
55 |     │   └── contralearn-and-finetune - Modeling by pre-training via constrative learning and then finetune the final classifier from the pre-trained model.
56 |     ├── datasets - Install/Remove traffic classification datasets.
57 |     │   ├── delete - Delete a dataset.
58 |     │   ├── import - Import datasets.
59 |     │   ├── info - Show the meta-data related to supported datasets.
60 |     │   ├── install - Install a dataset.
61 |     │   ├── lsparquet - Tree view of the datasets parquet files.
62 |     │   ├── samples-count - Show report on number of samples per class.
63 |     │   └── schema - Show datasets schemas
64 |     ├── run - Triggers a modeling run.
65 |     │   ├── augment-at-loading - Modeling by applying data augmentation when loading the training set.
66 |     │   └── contralearn-and-finetune - Modeling by pre-training via constrative learning and then finetune the final classifier from the pre-trained model.
67 |     └── tree - show the command tree of your CLI
68 |     ```
69 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/index.md:
--------------------------------------------------------------------------------
 1 | # The tcbench framework
 2 | 
 3 | tcbench is a ML/DL framework specific for __Traffic Classification (TC)__
 4 | created as research project by the AI4NET team of the Huawei Technologies
 5 | research center in Paris, France.
 6 | 
 7 | !!! info "What is Traffic Classification?"
 8 |     
 9 |     Nodes within a computer network operate by exchanging 
10 |     information, namely *packets*, which is regulated according
11 |     to standardized protocols (e.g., HTTP for the web). So to understand 
12 |     the network health it is required to constantly monitor
13 |     this information flow and react accordingly. For instance, one
14 |     might want to prioritize certain traffic (e.g., video meeting)
15 |     or block it (e.g., social media in working environment).
16 | 
17 |     Traffic classification is the the act of labeling an exchange of packets 
18 |     based on the Internet application which generated it.
19 | 
20 | 
21 | The academic literature is ripe with methods and proposals for TC.
22 | Yet, it is scarce of code artifacts and public datasets 
23 | do not offer common conventions of use.
24 | 
25 | We designed tcbench with the following goals in mind:
26 | 
27 | | Goal | State of the art | tcbench |
28 | |:-----|:-----------------|:--------|
29 | |__:octicons-stack-24: Data curation__ | There are a few public datasets for TC, yet no common format/schema, cleaning process, or standard train/val/test folds. | An (opinionated) curation of datasets to create easy to use parquet files with associated train/val/test fold.|
30 | |__:octicons-file-code-24: Code__ | TC literature has no reference code base for ML/DL modeling | tcbench is [:material-github: open source](https://github.com/tcbenchstack/tcbench) with an easy to use CLI based on [:fontawesome-solid-arrow-pointer: click](https://click.palletsprojects.com/en/8.1.x/)|
31 | |__:material-monitor-dashboard: Model tracking__ | Most of ML framework requires integration with cloud environments and subscription services | tcbench uses [aimstack](https://aimstack.io/) to save on local servers metrics during training which can be later explored via its web UI or aggregated in report summaries using tcbench |
32 | 
33 | ## Features and roadmap
34 | 
35 | tcbench is still under development, but (as suggested by its name) ultimately aims
36 | to be a reference framework for benchmarking multiple ML/DL solutions 
37 | related to TC.
38 | 
39 | At the current stage, tcbench offers
40 | 
41 | * Integration with 4 datasets, namely `ucdavis-icdm19`, `mirage19`, `mirage22` and `utmobilenet21`.
42 | You can use these datasets and their curated version independently from tcbench.
43 | Check out the [dataset install](/tcbench/datasets/install/) process and [dataset loading tutorial](/tcbench/datasets/guides/tutorial_load_datasets/).
44 | 
45 | * Good support for flowpic input representation and minimal support
46 | for 1d time series (based on network packets properties) input representation.
47 | 
48 | * Data augmentation functionality for flowpic input representation.
49 | 
50 | * Modeling via XGBoost, vanilla DL supervision and contrastive learning (via SimCLR or SupCon).
51 | 
52 | Most of the above functionalities described relate to our __:material-file-document-outline: [IMC23 paper](/papers/imc23/)__.
53 | 
54 | More exiting features including more datasets and algorithms will come in the next months. 
55 | 
56 | Stay tuned :wink:!
57 | 
58 | 


--------------------------------------------------------------------------------
/docs.material/tcbench/install.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | icon: octicons/package-16
 3 | ---
 4 | 
 5 | # Install
 6 | 
 7 | First prepare a python virtual environment, for example via :simple-anaconda: conda
 8 | ```
 9 | conda create -n tcbench python=3.10 pip
10 | conda activate tcbench
11 | ```
12 | 
13 | tcbench is [availabe on pypi](https://pypi.org/project/tcbench/) so you install it via pip
14 | ```
15 | python -m pip install tcbench
16 | ```
17 | 
18 | All dependecies are automatically pulled.
19 | 
20 | Verify the installation was successful by running
21 | ```
22 | tcbench --version
23 | ```
24 | 
25 | !!! note "Output"
26 |     ```
27 |     version: 0.0.21
28 |     ```
29 | 
30 | # Developer
31 | 
32 | For developing your own projects or contributing
33 | to tcbench fork/clone the [official repository](https://github.com/tcbenchstack/tcbench)
34 | and install the developer version.
35 | 
36 | ```
37 | python -m pip install .[dev]
38 | ```
39 | 
40 | The only difference with respect to the base version
41 | is the installation of extra dependencies.
42 | 


--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/.DS_Store


--------------------------------------------------------------------------------
/docs/arrow-right-solid.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Pro 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><path d="M438.6 278.6c12.5-12.5 12.5-32.8 0-45.3l-160-160c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3L338.8 224 32 224c-17.7 0-32 14.3-32 32s14.3 32 32 32l306.7 0L233.4 393.4c-12.5 12.5-12.5 32.8 0 45.3s32.8 12.5 45.3 0l160-160z"/></svg>


--------------------------------------------------------------------------------
/docs/assets/_mkdocstrings.css:
--------------------------------------------------------------------------------
 1 | 
 2 | /* Avoid breaking parameter names, etc. in table cells. */
 3 | .doc-contents td code {
 4 |   word-break: normal !important;
 5 | }
 6 | 
 7 | /* No line break before first paragraph of descriptions. */
 8 | .doc-md-description,
 9 | .doc-md-description>p:first-child {
10 |   display: inline;
11 | }
12 | 
13 | /* Max width for docstring sections tables. */
14 | .doc .md-typeset__table,
15 | .doc .md-typeset__table table {
16 |   display: table !important;
17 |   width: 100%;
18 | }
19 | 
20 | .doc .md-typeset__table tr {
21 |   display: table-row;
22 | }
23 | 
24 | /* Defaults in Spacy table style. */
25 | .doc-param-default {
26 |   float: right;
27 | }
28 | 
29 | /* Keep headings consistent. */
30 | h1.doc-heading,
31 | h2.doc-heading,
32 | h3.doc-heading,
33 | h4.doc-heading,
34 | h5.doc-heading,
35 | h6.doc-heading {
36 |   font-weight: 400;
37 |   line-height: 1.5;
38 |   color: inherit;
39 |   text-transform: none;
40 | }
41 | 
42 | h1.doc-heading {
43 |   font-size: 1.6rem;
44 | }
45 | 
46 | h2.doc-heading {
47 |   font-size: 1.2rem;
48 | }
49 | 
50 | h3.doc-heading {
51 |   font-size: 1.15rem;
52 | }
53 | 
54 | h4.doc-heading {
55 |   font-size: 1.10rem;
56 | }
57 | 
58 | h5.doc-heading {
59 |   font-size: 1.05rem;
60 | }
61 | 
62 | h6.doc-heading {
63 |   font-size: 1rem;
64 | }


--------------------------------------------------------------------------------
/docs/assets/images/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/assets/images/favicon.png


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.hi.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hi=function(){this.pipeline.reset(),this.pipeline.add(e.hi.trimmer,e.hi.stopWordFilter,e.hi.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.hi.stemmer))},e.hi.wordCharacters="ऀ-ःऄ-एऐ-टठ-यर-िी-ॏॐ-य़ॠ-९॰-ॿa-zA-Zａ-ｚＡ-Ｚ0-9０-９",e.hi.trimmer=e.trimmerSupport.generateTrimmer(e.hi.wordCharacters),e.Pipeline.registerFunction(e.hi.trimmer,"trimmer-hi"),e.hi.stopWordFilter=e.generateStopWordFilter("अत अपना अपनी अपने अभी अंदर आदि आप इत्यादि इन इनका इन्हीं इन्हें इन्हों इस इसका इसकी इसके इसमें इसी इसे उन उनका उनकी उनके उनको उन्हीं उन्हें उन्हों उस उसके उसी उसे एक एवं एस ऐसे और कई कर करता करते करना करने करें कहते कहा का काफ़ी कि कितना किन्हें किन्हों किया किर किस किसी किसे की कुछ कुल के को कोई कौन कौनसा गया घर जब जहाँ जा जितना जिन जिन्हें जिन्हों जिस जिसे जीधर जैसा जैसे जो तक तब तरह तिन तिन्हें तिन्हों तिस तिसे तो था थी थे दबारा दिया दुसरा दूसरे दो द्वारा न नके नहीं ना निहायत नीचे ने पर पहले पूरा पे फिर बनी बही बहुत बाद बाला बिलकुल भी भीतर मगर मानो मे में यदि यह यहाँ यही या यिह ये रखें रहा रहे ऱ्वासा लिए लिये लेकिन व वग़ैरह वर्ग वह वहाँ वहीं वाले वुह वे वो सकता सकते सबसे सभी साथ साबुत साभ सारा से सो संग ही हुआ हुई हुए है हैं हो होता होती होते होना होने".split(" ")),e.hi.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var r=e.wordcut;r.init(),e.hi.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(r){return isLunr2?new e.Token(r.toLowerCase()):r.toLowerCase()});var t=i.toString().toLowerCase().replace(/^\s+/,"");return r.cut(t).split("|")},e.Pipeline.registerFunction(e.hi.stemmer,"stemmer-hi"),e.Pipeline.registerFunction(e.hi.stopWordFilter,"stopWordFilter-hi")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.hy.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hy=function(){this.pipeline.reset(),this.pipeline.add(e.hy.trimmer,e.hy.stopWordFilter)},e.hy.wordCharacters="[A-Za-z԰-֏ﬀ-ﭏ]",e.hy.trimmer=e.trimmerSupport.generateTrimmer(e.hy.wordCharacters),e.Pipeline.registerFunction(e.hy.trimmer,"trimmer-hy"),e.hy.stopWordFilter=e.generateStopWordFilter("դու և եք էիր էիք հետո նաև նրանք որը վրա է որ պիտի են այս մեջ ն իր ու ի այդ որոնք այն կամ էր մի ես համար այլ իսկ էին ենք հետ ին թ էինք մենք նրա նա դուք եմ էի ըստ որպես ում".split(" ")),e.Pipeline.registerFunction(e.hy.stopWordFilter,"stopWordFilter-hy"),e.hy.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}(),e.Pipeline.registerFunction(e.hy.stemmer,"stemmer-hy")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.ja.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.ja=function(){this.pipeline.reset(),this.pipeline.add(e.ja.trimmer,e.ja.stopWordFilter,e.ja.stemmer),r?this.tokenizer=e.ja.tokenizer:(e.tokenizer&&(e.tokenizer=e.ja.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.ja.tokenizer))};var t=new e.TinySegmenter;e.ja.tokenizer=function(i){var n,o,s,p,a,u,m,l,c,f;if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t.toLowerCase()):t.toLowerCase()});for(o=i.toString().toLowerCase().replace(/^\s+/,""),n=o.length-1;n>=0;n--)if(/\S/.test(o.charAt(n))){o=o.substring(0,n+1);break}for(a=[],s=o.length,c=0,l=0;c<=s;c++)if(u=o.charAt(c),m=c-l,u.match(/\s/)||c==s){if(m>0)for(p=t.segment(o.slice(l,c)).filter(function(e){return!!e}),f=l,n=0;n<p.length;n++)r?a.push(new e.Token(p[n],{position:[f,p[n].length],index:a.length})):a.push(p[n]),f+=p[n].length;l=c+1}return a},e.ja.stemmer=function(){return function(e){return e}}(),e.Pipeline.registerFunction(e.ja.stemmer,"stemmer-ja"),e.ja.wordCharacters="一二三四五六七八九十百千万億兆一-龠々〆ヵヶぁ-んァ-ヴーｱ-ﾝﾞa-zA-Zａ-ｚＡ-Ｚ0-9０-９",e.ja.trimmer=e.trimmerSupport.generateTrimmer(e.ja.wordCharacters),e.Pipeline.registerFunction(e.ja.trimmer,"trimmer-ja"),e.ja.stopWordFilter=e.generateStopWordFilter("これ それ あれ この その あの ここ そこ あそこ こちら どこ だれ なに なん 何 私 貴方 貴方方 我々 私達 あの人 あのかた 彼女 彼 です あります おります います は が の に を で え から まで より も どの と し それで しかし".split(" ")),e.Pipeline.registerFunction(e.ja.stopWordFilter,"stopWordFilter-ja"),e.jp=e.ja,e.Pipeline.registerFunction(e.jp.stemmer,"stemmer-jp"),e.Pipeline.registerFunction(e.jp.trimmer,"trimmer-jp"),e.Pipeline.registerFunction(e.jp.stopWordFilter,"stopWordFilter-jp")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.jp.min.js:
--------------------------------------------------------------------------------
1 | module.exports=require("./lunr.ja");


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.kn.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.kn=function(){this.pipeline.reset(),this.pipeline.add(e.kn.trimmer,e.kn.stopWordFilter,e.kn.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.kn.stemmer))},e.kn.wordCharacters="ಀ-಄ಅ-ಔಕ-ಹಾ-ೌ಼-ಽೕ-ೖೝ-ೞೠ-ೡೢ-ೣ೤೥೦-೯ೱ-ೳ",e.kn.trimmer=e.trimmerSupport.generateTrimmer(e.kn.wordCharacters),e.Pipeline.registerFunction(e.kn.trimmer,"trimmer-kn"),e.kn.stopWordFilter=e.generateStopWordFilter("ಮತ್ತು ಈ ಒಂದು ರಲ್ಲಿ ಹಾಗೂ ಎಂದು ಅಥವಾ ಇದು ರ ಅವರು ಎಂಬ ಮೇಲೆ ಅವರ ತನ್ನ ಆದರೆ ತಮ್ಮ ನಂತರ ಮೂಲಕ ಹೆಚ್ಚು ನ ಆ ಕೆಲವು ಅನೇಕ ಎರಡು ಹಾಗು ಪ್ರಮುಖ ಇದನ್ನು ಇದರ ಸುಮಾರು ಅದರ ಅದು ಮೊದಲ ಬಗ್ಗೆ ನಲ್ಲಿ ರಂದು ಇತರ ಅತ್ಯಂತ ಹೆಚ್ಚಿನ ಸಹ ಸಾಮಾನ್ಯವಾಗಿ ನೇ ಹಲವಾರು ಹೊಸ ದಿ ಕಡಿಮೆ ಯಾವುದೇ ಹೊಂದಿದೆ ದೊಡ್ಡ ಅನ್ನು ಇವರು ಪ್ರಕಾರ ಇದೆ ಮಾತ್ರ ಕೂಡ ಇಲ್ಲಿ ಎಲ್ಲಾ ವಿವಿಧ ಅದನ್ನು ಹಲವು ರಿಂದ ಕೇವಲ ದ ದಕ್ಷಿಣ ಗೆ ಅವನ ಅತಿ ನೆಯ ಬಹಳ ಕೆಲಸ ಎಲ್ಲ ಪ್ರತಿ ಇತ್ಯಾದಿ ಇವು ಬೇರೆ ಹೀಗೆ ನಡುವೆ ಇದಕ್ಕೆ ಎಸ್ ಇವರ ಮೊದಲು ಶ್ರೀ ಮಾಡುವ ಇದರಲ್ಲಿ ರೀತಿಯ ಮಾಡಿದ ಕಾಲ ಅಲ್ಲಿ ಮಾಡಲು ಅದೇ ಈಗ ಅವು ಗಳು ಎ ಎಂಬುದು ಅವನು ಅಂದರೆ ಅವರಿಗೆ ಇರುವ ವಿಶೇಷ ಮುಂದೆ ಅವುಗಳ ಮುಂತಾದ ಮೂಲ ಬಿ ಮೀ ಒಂದೇ ಇನ್ನೂ ಹೆಚ್ಚಾಗಿ ಮಾಡಿ ಅವರನ್ನು ಇದೇ ಯ ರೀತಿಯಲ್ಲಿ ಜೊತೆ ಅದರಲ್ಲಿ ಮಾಡಿದರು ನಡೆದ ಆಗ ಮತ್ತೆ ಪೂರ್ವ ಆತ ಬಂದ ಯಾವ ಒಟ್ಟು ಇತರೆ ಹಿಂದೆ ಪ್ರಮಾಣದ ಗಳನ್ನು ಕುರಿತು ಯು ಆದ್ದರಿಂದ ಅಲ್ಲದೆ ನಗರದ ಮೇಲಿನ ಏಕೆಂದರೆ ರಷ್ಟು ಎಂಬುದನ್ನು ಬಾರಿ ಎಂದರೆ ಹಿಂದಿನ ಆದರೂ ಆದ ಸಂಬಂಧಿಸಿದ ಮತ್ತೊಂದು ಸಿ ಆತನ ".split(" ")),e.kn.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var r=e.wordcut;r.init(),e.kn.tokenizer=function(t){if(!arguments.length||null==t||void 0==t)return[];if(Array.isArray(t))return t.map(function(r){return isLunr2?new e.Token(r.toLowerCase()):r.toLowerCase()});var n=t.toString().toLowerCase().replace(/^\s+/,"");return r.cut(n).split("|")},e.Pipeline.registerFunction(e.kn.stemmer,"stemmer-kn"),e.Pipeline.registerFunction(e.kn.stopWordFilter,"stopWordFilter-kn")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.ko.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ko=function(){this.pipeline.reset(),this.pipeline.add(e.ko.trimmer,e.ko.stopWordFilter)},e.ko.wordCharacters="[A-Za-z가-힣]",e.ko.trimmer=e.trimmerSupport.generateTrimmer(e.ko.wordCharacters),e.Pipeline.registerFunction(e.ko.trimmer,"trimmer-ko"),e.ko.stopWordFilter=e.generateStopWordFilter("아 휴 아이구 아이쿠 아이고 어 나 우리 저희 따라 의해 을 를 에 의 가 으로 로 에게 뿐이다 의거하여 근거하여 입각하여 기준으로 예하면 예를 들면 예를 들자면 저 소인 소생 저희 지말고 하지마 하지마라 다른 물론 또한 그리고 비길수 없다 해서는 안된다 뿐만 아니라 만이 아니다 만은 아니다 막론하고 관계없이 그치지 않다 그러나 그런데 하지만 든간에 논하지 않다 따지지 않다 설사 비록 더라도 아니면 만 못하다 하는 편이 낫다 불문하고 향하여 향해서 향하다 쪽으로 틈타 이용하여 타다 오르다 제외하고 이 외에 이 밖에 하여야 비로소 한다면 몰라도 외에도 이곳 여기 부터 기점으로 따라서 할 생각이다 하려고하다 이리하여 그리하여 그렇게 함으로써 하지만 일때 할때 앞에서 중에서 보는데서 으로써 로써 까지 해야한다 일것이다 반드시 할줄알다 할수있다 할수있어 임에 틀림없다 한다면 등 등등 제 겨우 단지 다만 할뿐 딩동 댕그 대해서 대하여 대하면 훨씬 얼마나 얼마만큼 얼마큼 남짓 여 얼마간 약간 다소 좀 조금 다수 몇 얼마 지만 하물며 또한 그러나 그렇지만 하지만 이외에도 대해 말하자면 뿐이다 다음에 반대로 반대로 말하자면 이와 반대로 바꾸어서 말하면 바꾸어서 한다면 만약 그렇지않으면 까악 툭 딱 삐걱거리다 보드득 비걱거리다 꽈당 응당 해야한다 에 가서 각 각각 여러분 각종 각자 제각기 하도록하다 와 과 그러므로 그래서 고로 한 까닭에 하기 때문에 거니와 이지만 대하여 관하여 관한 과연 실로 아니나다를가 생각한대로 진짜로 한적이있다 하곤하였다 하 하하 허허 아하 거바 와 오 왜 어째서 무엇때문에 어찌 하겠는가 무슨 어디 어느곳 더군다나 하물며 더욱이는 어느때 언제 야 이봐 어이 여보시오 흐흐 흥 휴 헉헉 헐떡헐떡 영차 여차 어기여차 끙끙 아야 앗 아야 콸콸 졸졸 좍좍 뚝뚝 주룩주룩 솨 우르르 그래도 또 그리고 바꾸어말하면 바꾸어말하자면 혹은 혹시 답다 및 그에 따르는 때가 되어 즉 지든지 설령 가령 하더라도 할지라도 일지라도 지든지 몇 거의 하마터면 인젠 이젠 된바에야 된이상 만큼\t어찌됏든 그위에 게다가 점에서 보아 비추어 보아 고려하면 하게될것이다 일것이다 비교적 좀 보다더 비하면 시키다 하게하다 할만하다 의해서 연이서 이어서 잇따라 뒤따라 뒤이어 결국 의지하여 기대여 통하여 자마자 더욱더 불구하고 얼마든지 마음대로 주저하지 않고 곧 즉시 바로 당장 하자마자 밖에 안된다 하면된다 그래 그렇지 요컨대 다시 말하자면 바꿔 말하면 즉 구체적으로 말하자면 시작하여 시초에 이상 허 헉 허걱 바와같이 해도좋다 해도된다 게다가 더구나 하물며 와르르 팍 퍽 펄렁 동안 이래 하고있었다 이었다 에서 로부터 까지 예하면 했어요 해요 함께 같이 더불어 마저 마저도 양자 모두 습니다 가까스로 하려고하다 즈음하여 다른 다른 방면으로 해봐요 습니까 했어요 말할것도 없고 무릎쓰고 개의치않고 하는것만 못하다 하는것이 낫다 매 매번 들 모 어느것 어느 로써 갖고말하자면 어디 어느쪽 어느것 어느해 어느 년도 라 해도 언젠가 어떤것 어느것 저기 저쪽 저것 그때 그럼 그러면 요만한걸 그래 그때 저것만큼 그저 이르기까지 할 줄 안다 할 힘이 있다 너 너희 당신 어찌 설마 차라리 할지언정 할지라도 할망정 할지언정 구토하다 게우다 토하다 메쓰겁다 옆사람 퉤 쳇 의거하여 근거하여 의해 따라 힘입어 그 다음 버금 두번째로 기타 첫번째로 나머지는 그중에서 견지에서 형식으로 쓰여 입장에서 위해서 단지 의해되다 하도록시키다 뿐만아니라 반대로 전후 전자 앞의것 잠시 잠깐 하면서 그렇지만 다음에 그러한즉 그런즉 남들 아무거나 어찌하든지 같다 비슷하다 예컨대 이럴정도로 어떻게 만약 만일 위에서 서술한바와같이 인 듯하다 하지 않는다면 만약에 무엇 무슨 어느 어떤 아래윗 조차 한데 그럼에도 불구하고 여전히 심지어 까지도 조차도 하지 않도록 않기 위하여 때 시각 무렵 시간 동안 어때 어떠한 하여금 네 예 우선 누구 누가 알겠는가 아무도 줄은모른다 줄은 몰랏다 하는 김에 겸사겸사 하는바 그런 까닭에 한 이유는 그러니 그러니까 때문에 그 너희 그들 너희들 타인 것 것들 너 위하여 공동으로 동시에 하기 위하여 어찌하여 무엇때문에 붕붕 윙윙 나 우리 엉엉 휘익 윙윙 오호 아하 어쨋든 만 못하다\t하기보다는 차라리 하는 편이 낫다 흐흐 놀라다 상대적으로 말하자면 마치 아니라면 쉿 그렇지 않으면 그렇지 않다면 안 그러면 아니었다면 하든지 아니면 이라면 좋아 알았어 하는것도 그만이다 어쩔수 없다 하나 일 일반적으로 일단 한켠으로는 오자마자 이렇게되면 이와같다면 전부 한마디 한항목 근거로 하기에 아울러 하지 않도록 않기 위해서 이르기까지 이 되다 로 인하여 까닭으로 이유만으로 이로 인하여 그래서 이 때문에 그러므로 그런 까닭에 알 수 있다 결론을 낼 수 있다 으로 인하여 있다 어떤것 관계가 있다 관련이 있다 연관되다 어떤것들 에 대해 이리하여 그리하여 여부 하기보다는 하느니 하면 할수록 운운 이러이러하다 하구나 하도다 다시말하면 다음으로 에 있다 에 달려 있다 우리 우리들 오히려 하기는한데 어떻게 어떻해 어찌됏어 어때 어째서 본대로 자 이 이쪽 여기 이것 이번 이렇게말하자면 이런 이러한 이와 같은 요만큼 요만한 것 얼마 안 되는 것 이만큼 이 정도의 이렇게 많은 것 이와 같다 이때 이렇구나 것과 같이 끼익 삐걱 따위 와 같은 사람들 부류의 사람들 왜냐하면 중의하나 오직 오로지 에 한하다 하기만 하면 도착하다 까지 미치다 도달하다 정도에 이르다 할 지경이다 결과에 이르다 관해서는 여러분 하고 있다 한 후 혼자 자기 자기집 자신 우에 종합한것과같이 총적으로 보면 총적으로 말하면 총적으로 대로 하다 으로서 참 그만이다 할 따름이다 쿵 탕탕 쾅쾅 둥둥 봐 봐라 아이야 아니 와아 응 아이 참나 년 월 일 령 영 일 이 삼 사 오 육 륙 칠 팔 구 이천육 이천칠 이천팔 이천구 하나 둘 셋 넷 다섯 여섯 일곱 여덟 아홉 령 영".split(" ")),e.Pipeline.registerFunction(e.ko.stopWordFilter,"stopWordFilter-ko"),e.ko.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}(),e.Pipeline.registerFunction(e.ko.stemmer,"stemmer-ko")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.multi.min.js:
--------------------------------------------------------------------------------
1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){e.multiLanguage=function(){for(var t=Array.prototype.slice.call(arguments),i=t.join("-"),r="",n=[],s=[],p=0;p<t.length;++p)"en"==t[p]?(r+="\\w",n.unshift(e.stopWordFilter),n.push(e.stemmer),s.push(e.stemmer)):(r+=e[t[p]].wordCharacters,e[t[p]].stopWordFilter&&n.unshift(e[t[p]].stopWordFilter),e[t[p]].stemmer&&(n.push(e[t[p]].stemmer),s.push(e[t[p]].stemmer)));var o=e.trimmerSupport.generateTrimmer(r);return e.Pipeline.registerFunction(o,"lunr-multi-trimmer-"+i),n.unshift(o),function(){this.pipeline.reset(),this.pipeline.add.apply(this.pipeline,n),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add.apply(this.searchPipeline,s))}}}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.sa.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.sa=function(){this.pipeline.reset(),this.pipeline.add(e.sa.trimmer,e.sa.stopWordFilter,e.sa.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.sa.stemmer))},e.sa.wordCharacters="ऀ-ःऄ-एऐ-टठ-यर-िी-ॏॐ-य़ॠ-९॰-ॿ꣠-꣱ꣲ-ꣷ꣸-ꣻ꣼-ꣽꣾ-ꣿᆰ0-ᆰ9",e.sa.trimmer=e.trimmerSupport.generateTrimmer(e.sa.wordCharacters),e.Pipeline.registerFunction(e.sa.trimmer,"trimmer-sa"),e.sa.stopWordFilter=e.generateStopWordFilter('तथा अयम्‌ एकम्‌ इत्यस्मिन्‌ तथा तत्‌ वा अयम्‌ इत्यस्य ते आहूत उपरि तेषाम्‌  किन्तु तेषाम्‌ तदा इत्यनेन अधिकः इत्यस्य तत्‌ केचन बहवः द्वि तथा महत्वपूर्णः अयम्‌ अस्य  विषये अयं अस्ति तत्‌ प्रथमः विषये इत्युपरि इत्युपरि इतर अधिकतमः अधिकः अपि सामान्यतया ठ इतरेतर नूतनम्‌ द  न्यूनम्‌ कश्चित्‌ वा विशालः द  सः अस्ति तदनुसारम् तत्र अस्ति केवलम्‌ अपि अत्र सर्वे विविधाः तत्‌ बहवः यतः इदानीम्‌ द  दक्षिण इत्यस्मै तस्य उपरि नथ अतीव कार्यम्‌ सर्वे एकैकम्‌ इत्यादि। एते सन्ति  उत इत्थम्‌ मध्ये एतदर्थं . स कस्य प्रथमः श्री. करोति अस्मिन् प्रकारः निर्मिता कालः तत्र कर्तुं  समान अधुना ते सन्ति स एकः अस्ति सः अर्थात् तेषां कृते . स्थितम्  विशेषः अग्रिम तेषाम्‌ समान स्रोतः ख म समान इदानीमपि अधिकतया करोतु ते समान इत्यस्य वीथी सह यस्मिन्  कृतवान्‌ धृतः तदा पुनः पूर्वं सः आगतः किम्‌ कुल इतर पुरा  मात्रा स विषये उ अतएव अपि नगरस्य  उपरि यतः प्रतिशतं  कतरः कालः साधनानि भूत तथापि जात सम्बन्धि अन्यत्‌ ग अतः अस्माकं स्वकीयाः अस्माकं इदानीं अन्तः इत्यादयः भवन्तः इत्यादयः एते एताः तस्य अस्य इदम् एते तेषां तेषां तेषां तान् तेषां तेषां तेषां समानः सः एकः च तादृशाः बहवः अन्ये च वदन्ति यत् कियत् कस्मै  कस्मै  यस्मै  यस्मै  यस्मै  यस्मै न अतिनीचः किन्तु प्रथमं सम्पूर्णतया  ततः चिरकालानन्तरं पुस्तकं सम्पूर्णतया अन्तः  किन्तु अत्र वा इह इव श्रद्धाय अवशिष्यते  परन्तु अन्ये वर्गाः सन्ति ते सन्ति शक्नुवन्ति सर्वे मिलित्वा सर्वे एकत्र"'.split(" ")),e.sa.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var r=e.wordcut;r.init(),e.sa.tokenizer=function(t){if(!arguments.length||null==t||void 0==t)return[];if(Array.isArray(t))return t.map(function(r){return isLunr2?new e.Token(r.toLowerCase()):r.toLowerCase()});var i=t.toString().toLowerCase().replace(/^\s+/,"");return r.cut(i).split("|")},e.Pipeline.registerFunction(e.sa.stemmer,"stemmer-sa"),e.Pipeline.registerFunction(e.sa.stopWordFilter,"stopWordFilter-sa")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.stemmer.support.min.js:
--------------------------------------------------------------------------------
1 | !function(r,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(r.lunr)}(this,function(){return function(r){r.stemmerSupport={Among:function(r,t,i,s){if(this.toCharArray=function(r){for(var t=r.length,i=new Array(t),s=0;s<t;s++)i[s]=r.charCodeAt(s);return i},!r&&""!=r||!t&&0!=t||!i)throw"Bad Among initialisation: s:"+r+", substring_i: "+t+", result: "+i;this.s_size=r.length,this.s=this.toCharArray(r),this.substring_i=t,this.result=i,this.method=s},SnowballProgram:function(){var r;return{bra:0,ket:0,limit:0,cursor:0,limit_backward:0,setCurrent:function(t){r=t,this.cursor=0,this.limit=t.length,this.limit_backward=0,this.bra=this.cursor,this.ket=this.limit},getCurrent:function(){var t=r;return r=null,t},in_grouping:function(t,i,s){if(this.cursor<this.limit){var e=r.charCodeAt(this.cursor);if(e<=s&&e>=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor++,!0}return!1},in_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e<=s&&e>=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor--,!0}return!1},out_grouping:function(t,i,s){if(this.cursor<this.limit){var e=r.charCodeAt(this.cursor);if(e>s||e<i)return this.cursor++,!0;if(e-=i,!(t[e>>3]&1<<(7&e)))return this.cursor++,!0}return!1},out_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e>s||e<i)return this.cursor--,!0;if(e-=i,!(t[e>>3]&1<<(7&e)))return this.cursor--,!0}return!1},eq_s:function(t,i){if(this.limit-this.cursor<t)return!1;for(var s=0;s<t;s++)if(r.charCodeAt(this.cursor+s)!=i.charCodeAt(s))return!1;return this.cursor+=t,!0},eq_s_b:function(t,i){if(this.cursor-this.limit_backward<t)return!1;for(var s=0;s<t;s++)if(r.charCodeAt(this.cursor-t+s)!=i.charCodeAt(s))return!1;return this.cursor-=t,!0},find_among:function(t,i){for(var s=0,e=i,n=this.cursor,u=this.limit,o=0,h=0,c=!1;;){for(var a=s+(e-s>>1),f=0,l=o<h?o:h,_=t[a],m=l;m<_.s_size;m++){if(n+l==u){f=-1;break}if(f=r.charCodeAt(n+l)-_.s[m])break;l++}if(f<0?(e=a,h=l):(s=a,o=l),e-s<=1){if(s>0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n+_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n+_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},find_among_b:function(t,i){for(var s=0,e=i,n=this.cursor,u=this.limit_backward,o=0,h=0,c=!1;;){for(var a=s+(e-s>>1),f=0,l=o<h?o:h,_=t[a],m=_.s_size-1-l;m>=0;m--){if(n-l==u){f=-1;break}if(f=r.charCodeAt(n-1-l)-_.s[m])break;l++}if(f<0?(e=a,h=l):(s=a,o=l),e-s<=1){if(s>0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n-_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n-_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},replace_s:function(t,i,s){var e=s.length-(i-t),n=r.substring(0,t),u=r.substring(i);return r=n+s+u,this.limit+=e,this.cursor>=i?this.cursor+=e:this.cursor>t&&(this.cursor=t),e},slice_check:function(){if(this.bra<0||this.bra>this.ket||this.ket>this.limit||this.limit>r.length)throw"faulty slice operation"},slice_from:function(r){this.slice_check(),this.replace_s(this.bra,this.ket,r)},slice_del:function(){this.slice_from("")},insert:function(r,t,i){var s=this.replace_s(r,t,i);r<=this.bra&&(this.bra+=s),r<=this.ket&&(this.ket+=s)},slice_to:function(){return this.slice_check(),r.substring(this.bra,this.ket)},eq_v_b:function(r){return this.eq_s_b(r.length,r)}}}},r.trimmerSupport={generateTrimmer:function(r){var t=new RegExp("^[^"+r+"]+"),i=new RegExp("[^"+r+"]+$");return function(r){return"function"==typeof r.update?r.update(function(r){return r.replace(t,"").replace(i,"")}):r.replace(t,"").replace(i,"")}}}}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.ta.min.js:
--------------------------------------------------------------------------------
1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ta=function(){this.pipeline.reset(),this.pipeline.add(e.ta.trimmer,e.ta.stopWordFilter,e.ta.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ta.stemmer))},e.ta.wordCharacters="஀-உஊ-ஏஐ-ஙச-ட஠-னப-யர-ஹ஺-ிீ-௉ொ-௏ௐ-௙௚-௟௠-௩௪-௯௰-௹௺-௿a-zA-Zａ-ｚＡ-Ｚ0-9０-９",e.ta.trimmer=e.trimmerSupport.generateTrimmer(e.ta.wordCharacters),e.Pipeline.registerFunction(e.ta.trimmer,"trimmer-ta"),e.ta.stopWordFilter=e.generateStopWordFilter("அங்கு அங்கே அது அதை அந்த அவர் அவர்கள் அவள் அவன் அவை ஆக ஆகவே ஆகையால் ஆதலால் ஆதலினால் ஆனாலும் ஆனால் இங்கு இங்கே இது இதை இந்த இப்படி இவர் இவர்கள் இவள் இவன் இவை இவ்வளவு உனக்கு உனது உன் உன்னால் எங்கு எங்கே எது எதை எந்த எப்படி எவர் எவர்கள் எவள் எவன் எவை எவ்வளவு எனக்கு எனது எனவே என் என்ன என்னால் ஏது ஏன் தனது தன்னால் தானே தான் நாங்கள் நாம் நான் நீ நீங்கள்".split(" ")),e.ta.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.ta.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.ta.stemmer,"stemmer-ta"),e.Pipeline.registerFunction(e.ta.stopWordFilter,"stopWordFilter-ta")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.te.min.js:
--------------------------------------------------------------------------------
1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.te=function(){this.pipeline.reset(),this.pipeline.add(e.te.trimmer,e.te.stopWordFilter,e.te.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.te.stemmer))},e.te.wordCharacters="ఀ-ఄఅ-ఔక-హా-ౌౕ-ౖౘ-ౚౠ-ౡౢ-ౣ౦-౯౸-౿఼ఽ్ౝ౷౤౥",e.te.trimmer=e.trimmerSupport.generateTrimmer(e.te.wordCharacters),e.Pipeline.registerFunction(e.te.trimmer,"trimmer-te"),e.te.stopWordFilter=e.generateStopWordFilter("అందరూ అందుబాటులో అడగండి అడగడం అడ్డంగా అనుగుణంగా అనుమతించు అనుమతిస్తుంది అయితే ఇప్పటికే ఉన్నారు ఎక్కడైనా ఎప్పుడు ఎవరైనా ఎవరో ఏ ఏదైనా ఏమైనప్పటికి ఒక ఒకరు కనిపిస్తాయి కాదు కూడా గా గురించి చుట్టూ చేయగలిగింది తగిన తర్వాత దాదాపు దూరంగా నిజంగా పై ప్రకారం ప్రక్కన మధ్య మరియు మరొక మళ్ళీ మాత్రమే మెచ్చుకో వద్ద వెంట వేరుగా వ్యతిరేకంగా సంబంధం".split(" ")),e.te.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.te.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.te.stemmer,"stemmer-te"),e.Pipeline.registerFunction(e.te.stopWordFilter,"stopWordFilter-te")}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.th.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.th=function(){this.pipeline.reset(),this.pipeline.add(e.th.trimmer),r?this.tokenizer=e.th.tokenizer:(e.tokenizer&&(e.tokenizer=e.th.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.th.tokenizer))},e.th.wordCharacters="[฀-๿]",e.th.trimmer=e.trimmerSupport.generateTrimmer(e.th.wordCharacters),e.Pipeline.registerFunction(e.th.trimmer,"trimmer-th");var t=e.wordcut;t.init(),e.th.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t):t});var n=i.toString().replace(/^\s+/,"");return t.cut(n).split("|")}}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.vi.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.vi=function(){this.pipeline.reset(),this.pipeline.add(e.vi.stopWordFilter,e.vi.trimmer)},e.vi.wordCharacters="[A-Za-ẓ̀͐́͑̉̃̓ÂâÊêÔôĂ-ăĐ-đƠ-ơƯ-ư]",e.vi.trimmer=e.trimmerSupport.generateTrimmer(e.vi.wordCharacters),e.Pipeline.registerFunction(e.vi.trimmer,"trimmer-vi"),e.vi.stopWordFilter=e.generateStopWordFilter("là cái nhưng mà".split(" "))}});


--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.zh.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r(require("@node-rs/jieba")):r()(e.lunr)}(this,function(e){return function(r,t){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var i="2"==r.version[0];r.zh=function(){this.pipeline.reset(),this.pipeline.add(r.zh.trimmer,r.zh.stopWordFilter,r.zh.stemmer),i?this.tokenizer=r.zh.tokenizer:(r.tokenizer&&(r.tokenizer=r.zh.tokenizer),this.tokenizerFn&&(this.tokenizerFn=r.zh.tokenizer))},r.zh.tokenizer=function(n){if(!arguments.length||null==n||void 0==n)return[];if(Array.isArray(n))return n.map(function(e){return i?new r.Token(e.toLowerCase()):e.toLowerCase()});t&&e.load(t);var o=n.toString().trim().toLowerCase(),s=[];e.cut(o,!0).forEach(function(e){s=s.concat(e.split(" "))}),s=s.filter(function(e){return!!e});var u=0;return s.map(function(e,t){if(i){var n=o.indexOf(e,u),s={};return s.position=[n,e.length],s.index=t,u=n,new r.Token(e,s)}return e})},r.zh.wordCharacters="\\w一-龥",r.zh.trimmer=r.trimmerSupport.generateTrimmer(r.zh.wordCharacters),r.Pipeline.registerFunction(r.zh.trimmer,"trimmer-zh"),r.zh.stemmer=function(){return function(e){return e}}(),r.Pipeline.registerFunction(r.zh.stemmer,"stemmer-zh"),r.zh.stopWordFilter=r.generateStopWordFilter("的 一 不 在 人 有 是 为 為 以 于 於 上 他 而 后 後 之 来 來 及 了 因 下 可 到 由 这 這 与 與 也 此 但 并 並 个 個 其 已 无 無 小 我 们 們 起 最 再 今 去 好 只 又 或 很 亦 某 把 那 你 乃 它 吧 被 比 别 趁 当 當 从 從 得 打 凡 儿 兒 尔 爾 该 該 各 给 給 跟 和 何 还 還 即 几 幾 既 看 据 據 距 靠 啦 另 么 麽 每 嘛 拿 哪 您 凭 憑 且 却 卻 让 讓 仍 啥 如 若 使 谁 誰 虽 雖 随 隨 同 所 她 哇 嗡 往 些 向 沿 哟 喲 用 咱 则 則 怎 曾 至 致 着 著 诸 諸 自".split(" ")),r.Pipeline.registerFunction(r.zh.stopWordFilter,"stopWordFilter-zh")}});


--------------------------------------------------------------------------------
/docs/assets/stylesheets/palette.06af60db.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["src/templates/assets/stylesheets/palette/_scheme.scss","../../../../src/templates/assets/stylesheets/palette.scss","src/templates/assets/stylesheets/palette/_accent.scss","src/templates/assets/stylesheets/palette/_primary.scss","src/templates/assets/stylesheets/utilities/_break.scss"],"names":[],"mappings":"AA2BA,cAGE,6BAME,sDAAA,CACA,6DAAA,CACA,+DAAA,CACA,gEAAA,CACA,mDAAA,CACA,6DAAA,CACA,+DAAA,CACA,gEAAA,CAGA,mDAAA,CACA,gDAAA,CAGA,0BAAA,CACA,mCAAA,CAGA,iCAAA,CACA,kCAAA,CACA,mCAAA,CACA,mCAAA,CACA,kCAAA,CACA,iCAAA,CACA,+CAAA,CACA,6DAAA,CACA,gEAAA,CACA,4DAAA,CACA,4DAAA,CACA,6DAAA,CAGA,6CAAA,CAGA,+CAAA,CAGA,uDAAA,CACA,6DAAA,CACA,2DAAA,CAGA,iCAAA,CAGA,yDAAA,CACA,iEAAA,CAGA,mDAAA,CACA,mDAAA,CAGA,qDAAA,CACA,uDAAA,CAGA,8DAAA,CAKA,8DAAA,CAKA,0DAAA,CAvEA,iBCeF,CD6DE,kHAEE,YC3DJ,CDkFE,yDACE,4BChFJ,CD+EE,2DACE,4BC7EJ,CD4EE,gEACE,4BC1EJ,CDyEE,2DACE,4BCvEJ,CDsEE,yDACE,4BCpEJ,CDmEE,0DACE,4BCjEJ,CDgEE,gEACE,4BC9DJ,CD6DE,0DACE,4BC3DJ,CD0DE,2OACE,4BC/CJ,CDsDA,+FAGE,iCCpDF,CACF,CC/CE,2BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD2CN,CCrDE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDkDN,CC5DE,8BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDyDN,CCnEE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDgEN,CC1EE,8BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDuEN,CCjFE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD8EN,CCxFE,kCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDqFN,CC/FE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD4FN,CCtGE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDmGN,CC7GE,6BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD0GN,CCpHE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDiHN,CC3HE,4BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCD2HN,CClIE,8BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDkIN,CCzIE,6BACE,yBAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDyIN,CChJE,8BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDgJN,CCvJE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDoJN,CEzJE,4BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsJN,CEjKE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8JN,CEzKE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsKN,CEjLE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8KN,CEzLE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsLN,CEjME,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8LN,CEzME,mCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsMN,CEjNE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8MN,CEzNE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsNN,CEjOE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8NN,CEzOE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsON,CEjPE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFiPN,CEzPE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFyPN,CEjQE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFiQN,CEzQE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFyQN,CEjRE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8QN,CEzRE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsRN,CEjSE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCAAA,CAKA,4BF0RN,CE1SE,kCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCAAA,CAKA,4BFmSN,CEpRE,sEACE,4BFuRJ,CExRE,+DACE,4BF2RJ,CE5RE,iEACE,4BF+RJ,CEhSE,gEACE,4BFmSJ,CEpSE,iEACE,4BFuSJ,CE9RA,8BACE,mDAAA,CACA,4DAAA,CACA,0DAAA,CACA,oDAAA,CACA,2DAAA,CAGA,4BF+RF,CE5RE,yCACE,+BF8RJ,CE3RI,kDAEE,0CAAA,CACA,sCAAA,CAFA,mCF+RN,CG3MI,mCD1EA,+CACE,8CFwRJ,CErRI,qDACE,8CFuRN,CElRE,iEACE,mCFoRJ,CACF,CGtNI,sCDvDA,uCACE,oCFgRJ,CACF,CEvQA,8BACE,kDAAA,CACA,4DAAA,CACA,wDAAA,CACA,oDAAA,CACA,6DAAA,CAGA,4BFwQF,CErQE,yCACE,+BFuQJ,CEpQI,kDAEE,0CAAA,CACA,sCAAA,CAFA,mCFwQN,CEjQE,yCACE,6CFmQJ,CG5NI,0CDhCA,8CACE,gDF+PJ,CACF,CGjOI,0CDvBA,iFACE,6CF2PJ,CACF,CGzPI,sCDKA,uCACE,6CFuPJ,CACF","file":"palette.css"}


--------------------------------------------------------------------------------
/docs/css/fonts.css:
--------------------------------------------------------------------------------
 1 | .md-typeset code,
 2 | .md-typeset kbd,
 3 | .md-typeset pre {
 4 |   font-feature-settings: "kern", "liga";
 5 |   font-variant-ligatures: normal;
 6 | }
 7 | 
 8 | :root{
 9 | --md-text-font:"Roboto";
10 | --md-code-font:""
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/css/jupyter-notebook.css:
--------------------------------------------------------------------------------
 1 | .jp-RenderedHTMLCommon p {
 2 |     margin: 0pt;
 3 | }
 4 | 
 5 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
 6 |     display: none;
 7 | }
 8 | 
 9 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
10 |     display: none;
11 | }
12 | 
13 | .jupyter-wrapper .jp-OutputArea-output pre {
14 |     border-left: solid 5px #e0e0e0;
15 |     padding-left: 5pt;
16 | }
17 | 


--------------------------------------------------------------------------------
/docs/css/material.css:
--------------------------------------------------------------------------------
1 | /* More space at the bottom of the page. */
2 | .md-main__inner {
3 |   margin-bottom: 1.5rem;
4 | }
5 | 


--------------------------------------------------------------------------------
/docs/css/mkdocstrings.css:
--------------------------------------------------------------------------------
 1 | /* Indentation. */
 2 | div.doc-contents:not(.first) {
 3 |   padding-left: 25px;
 4 |   border-left: 4px solid rgba(230, 230, 230);
 5 |   margin-bottom: 80px;
 6 | }
 7 | 
 8 | /* Avoid breaking parameters name, etc. in table cells. */
 9 | td code {
10 |   word-break: normal !important;
11 | }
12 | 


--------------------------------------------------------------------------------
/docs/css/style.css:
--------------------------------------------------------------------------------
 1 | /* Mark external links as such (also in nav) */
 2 | a.external:hover::after, a.md-nav__link[href^="https:"]:hover::after {
 3 |   /* https://primer.style/octicons/link-external-16 */
 4 |   background-image: url('data:image/svg+xml,<svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 16 16"><path fill="rgb(233, 235, 252)" d="M10.604 1h4.146a.25.25 0 01.25.25v4.146a.25.25 0 01-.427.177L13.03 4.03 9.28 7.78a.75.75 0 01-1.06-1.06l3.75-3.75-1.543-1.543A.25.25 0 0110.604 1zM3.75 2A1.75 1.75 0 002 3.75v8.5c0 .966.784 1.75 1.75 1.75h8.5A1.75 1.75 0 0014 12.25v-3.5a.75.75 0 00-1.5 0v3.5a.25.25 0 01-.25.25h-8.5a.25.25 0 01-.25-.25v-8.5a.25.25 0 01.25-.25h3.5a.75.75 0 000-1.5h-3.5z"></path></svg>');
 5 |   height: 0.8em;
 6 |   width: 0.8em;
 7 |   margin-left: 0.2em;
 8 |   content: ' ';
 9 |   display: inline-block;
10 | }
11 | 
12 | /* More space at the bottom of the page */
13 | .md-main__inner {
14 |   margin-bottom: 1.5rem;
15 | }
16 | 


--------------------------------------------------------------------------------
/docs/css/tables_style.css:
--------------------------------------------------------------------------------
 1 | th, td {
 2 |     border: 1px solid var(--md-typeset-table-color);
 3 |     border-spacing: 0;
 4 |     border-bottom: none;
 5 |     border-left: none;
 6 |     border-top: none;
 7 | }
 8 | 
 9 | th {
10 | 	background:var(--md-primary-fg-color);
11 | 	color:white;
12 | }
13 | 
14 | .md-typeset table:not([class]) th {
15 | 	font-weight: 200;
16 | }
17 | 
18 | .md-typeset__table {
19 |     line-height: 1;
20 | }
21 | 
22 | .md-typeset__table table:not([class]) {
23 |     font-size: .74rem;
24 |     border-right: none;
25 | }
26 | 
27 | .md-typeset__table table:not([class]) td,
28 | .md-typeset__table table:not([class]) th {
29 |     padding: 9px;
30 | }
31 | 
32 | /* light mode alternating table bg colors */
33 | .md-typeset__table tr:nth-child(2n) {
34 |     background-color: #f8f8f8;
35 | }
36 | 
37 | /* dark mode alternating table bg colors */
38 | [data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) {
39 |     background-color: hsla(var(--md-hue),25%,25%,1)
40 | }
41 | 


--------------------------------------------------------------------------------
/docs/datasets/datasets.csv:
--------------------------------------------------------------------------------
1 | Name,Classes, PDF, Data, Code, Auto-download
2 | ucdavis-icdm19,5,[pdf](https://arxiv.org/pdf/1812.09761.pdf), [data](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), [code](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), :octicons-x-12:
3 | mirage19, 20, [pdf](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-2019.html), -, :heavy_check_mark:
4 | mirage22, 9, [pdf](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html), -, :heavy_check_mark:
5 | utmobilenet21, 17, [pdf](https://ieeexplore.ieee.org/abstract/document/9490678/), [data](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), [code](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), :octicons-x-12:
6 | 


--------------------------------------------------------------------------------
/docs/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log1.png


--------------------------------------------------------------------------------
/docs/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log2.png


--------------------------------------------------------------------------------
/docs/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log3.png


--------------------------------------------------------------------------------
/docs/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run1.png


--------------------------------------------------------------------------------
/docs/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run2.png


--------------------------------------------------------------------------------
/docs/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run3.png


--------------------------------------------------------------------------------
/docs/figs/dataset_properties_mirage19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_mirage19.png


--------------------------------------------------------------------------------
/docs/figs/dataset_properties_mirage22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_mirage22.png


--------------------------------------------------------------------------------
/docs/figs/dataset_properties_ucdavis-icdm19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_ucdavis-icdm19.png


--------------------------------------------------------------------------------
/docs/figs/dataset_properties_utmobilenet21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_utmobilenet21.png


--------------------------------------------------------------------------------
/docs/github-mark/github-mark-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/github-mark/github-mark-white.png


--------------------------------------------------------------------------------
/docs/github-mark/github-mark-white.svg:
--------------------------------------------------------------------------------
1 | <svg width="98" height="96" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/docs/github-mark/github-mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/github-mark/github-mark.png


--------------------------------------------------------------------------------
/docs/github-mark/github-mark.svg:
--------------------------------------------------------------------------------
1 | <svg width="98" height="96" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z" fill="#24292f"/></svg>


--------------------------------------------------------------------------------
/docs/index.md.DEPRECATED:
--------------------------------------------------------------------------------
 1 | This website documents code and data artifacts related to the IMC23 submission #132 titled
 2 | 
 3 | !!! quote ""
 4 |     __Contrastive Learning and Data Augmentation in Traffic Classification via a Flowpic Representation__
 5 |     *Replicating and Reproducing “A Few Shots Traffic Classification with mini-FlowPic Augmentations”
 6 |     from IMC’22*
 7 | 
 8 | Our submission investigates the role of data
 9 | augmentation by using both supervised
10 | and contrastive learning techniques
11 | across [4 datasets](datasets/install).
12 | 
13 | It replicates and reproduces the following paper
14 | from the IMC22 program
15 | 
16 | 
17 | ```
18 | @inproceedings{10.1145/3517745.3561436,
19 | author = {Horowicz, Eyal and Shapira, Tal and Shavitt, Yuval},
20 | title = {A Few Shots Traffic Classification with Mini-FlowPic Augmentations},
21 | year = {2022},
22 | isbn = {9781450392594},
23 | publisher = {Association for Computing Machinery},
24 | address = {New York, NY, USA},
25 | url = {https://doi.org/10.1145/3517745.3561436},
26 | doi = {10.1145/3517745.3561436},
27 | booktitle = {Proceedings of the 22nd ACM Internet Measurement Conference},
28 | pages = {647–654},
29 | numpages = {8},
30 | location = {Nice, France},
31 | series = {IMC '22}
32 | }
33 | ```
34 | 
35 | We adopt the same traffic representation used in :material-file-document-outline:`imc22-paper`,
36 | namely a Flowpic -- a summarization of the packet size time series of a flow by means of 
37 | frequency histograms extracted from consecutive time windows of the flow -- 
38 | applied on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
39 | 
40 | In the first part of the submission we investigate how augmentations
41 | affect classification performance -- the study considers 3 image transformations (*rotation, 
42 | color jitter, horizontal flip*) and 3 time series transformations (*time shift, packet drop, change rtt*)
43 | applied to packets timestamps -- when used either in a fully supervised setting or via
44 | contrastive learning.
45 | 
46 | !!! info "Key takeaways from reproducibility"
47 |     1. We can only partially reproduce the results from :material-file-document-outline:`imc22-paper` on [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
48 |        Specifically, we uncover a data shift present in the dataset itself which justifies our results; 
49 |        yet, we cannot comment on why this was not detected in :material-file-document-outline:`imc22-paper`.
50 | 
51 |     2. Simply based on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) dataset, and differently
52 |        from the argumentation presented in :material-file-document-outline:`imc22-paper`, 
53 |        we do not find statistical significance differences across the different augmentations.
54 | 
55 |     3. Contrastive learning can help to "bootstrap" a model in an unsupervised fashion, yet
56 |        relying on more samples is beneficial to boost performance.
57 |        
58 | Then, in the second part of the submission we replicate the 
59 | analysis testing the same 6 augmentations across 3 other datasets.
60 | 
61 | !!! info "Key takeaways from replicability"
62 |     Using multiple datasets allow to confirm the argument of the  :material-file-document-outline:`imc22-paper`, i.e.,
63 |     *Change RTT* augmentation used in [`ucdavis-icdm19`](datasets/#ucdavis-icdm19)
64 |     is superior to the alternative transformations presented in the paper.
65 | 
66 | 
67 | ## Website conventions
68 | 
69 | * :material-file-document-outline:`imc22-paper` is used to the reference the replicated/reproduced paper.
70 | 
71 | * WIP (Work in progress) and :construction: suggest documentation that is incomplete or not yet available.
72 | 
73 | * :material-link-off: suggests a link is expected to be added but is not yet available.
74 | 


--------------------------------------------------------------------------------
/docs/main.html.DEPRECATED:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | {% if page.nb_url %}
 6 |     <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
 7 |         {% include ".icons/simple/jupyter.svg" %}
 8 | 		{% include ".icons/material/download.svg" %}
 9 |     </a>
10 | {% endif %}
11 | 
12 | {{ super() }}
13 | 
14 | <style>
15 | // Do whatever changes you need here
16 | 
17 | .jp-RenderedHTMLCommon p {
18 |     margin: 0pt;
19 | }
20 | 
21 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
22 |     display: none;
23 | }
24 | 
25 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
26 |     display: none;
27 | }
28 | 
29 | .jupyter-wrapper .jp-OutputArea-output pre {
30 |     border-left: solid 5px #e0e0e0;
31 |     padding-left: 5pt;
32 | }
33 | 
34 | </style>
35 | 
36 | <script>
37 | tags = document.getElementsByClassName("md-nav__link");
38 | for (var idx=0; idx < tags.length; idx++) {
39 |     //if (item.hasAttribute("href") && item.attributes["href"].textContent.endsWith("ipynb")) {
40 |     if (tags[idx].innerText.includes("HIDETHIS")) {
41 |         tags[idx].style.display = "none";
42 |     }
43 | }
44 | </script>
45 | 
46 | 
47 | {% endblock content %}
48 | 


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_home-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_home-page.png


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log1.png


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log2.png


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log3.png


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run1.png


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run2.png


--------------------------------------------------------------------------------
/docs/modeling/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run3.png


--------------------------------------------------------------------------------
/docs/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/objects.inv


--------------------------------------------------------------------------------
/docs/overrides/arrow-right-solid.svg:
--------------------------------------------------------------------------------
1 | <svg xmlns="http://www.w3.org/2000/svg" viewBox="0 0 448 512"><!--! Font Awesome Pro 6.4.2 by @fontawesome - https://fontawesome.com License - https://fontawesome.com/license (Commercial License) Copyright 2023 Fonticons, Inc. --><path d="M438.6 278.6c12.5-12.5 12.5-32.8 0-45.3l-160-160c-12.5-12.5-32.8-12.5-45.3 0s-12.5 32.8 0 45.3L338.8 224 32 224c-17.7 0-32 14.3-32 32s14.3 32 32 32l306.7 0L233.4 393.4c-12.5 12.5-12.5 32.8 0 45.3s32.8 12.5 45.3 0l160-160z"/></svg>


--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/overrides/github-mark/github-mark-white.png


--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark-white.svg:
--------------------------------------------------------------------------------
1 | <svg width="98" height="96" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z" fill="#fff"/></svg>


--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/overrides/github-mark/github-mark.png


--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark.svg:
--------------------------------------------------------------------------------
1 | <svg width="98" height="96" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M48.854 0C21.839 0 0 22 0 49.217c0 21.756 13.993 40.172 33.405 46.69 2.427.49 3.316-1.059 3.316-2.362 0-1.141-.08-5.052-.08-9.127-13.59 2.934-16.42-5.867-16.42-5.867-2.184-5.704-5.42-7.17-5.42-7.17-4.448-3.015.324-3.015.324-3.015 4.934.326 7.523 5.052 7.523 5.052 4.367 7.496 11.404 5.378 14.235 4.074.404-3.178 1.699-5.378 3.074-6.6-10.839-1.141-22.243-5.378-22.243-24.283 0-5.378 1.94-9.778 5.014-13.2-.485-1.222-2.184-6.275.486-13.038 0 0 4.125-1.304 13.426 5.052a46.97 46.97 0 0 1 12.214-1.63c4.125 0 8.33.571 12.213 1.63 9.302-6.356 13.427-5.052 13.427-5.052 2.67 6.763.97 11.816.485 13.038 3.155 3.422 5.015 7.822 5.015 13.2 0 18.905-11.404 23.06-22.324 24.283 1.78 1.548 3.316 4.481 3.316 9.126 0 6.6-.08 11.897-.08 13.526 0 1.304.89 2.853 3.316 2.364 19.412-6.52 33.405-24.935 33.405-46.691C97.707 22 75.788 0 48.854 0z" fill="#24292f"/></svg>


--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | {% if page.nb_url %}
 6 |     <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
 7 |         {% include ".icons/simple/jupyter.svg" %}
 8 | 		{% include ".icons/material/download.svg" %}
 9 |     </a>
10 | {% endif %}
11 | 
12 | {{ super() }}
13 | 
14 | <style>
15 | // Do whatever changes you need here
16 | 
17 | .jp-RenderedHTMLCommon p {
18 |     margin: 0pt;
19 | }
20 | 
21 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
22 |     display: none;
23 | }
24 | 
25 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
26 |     display: none;
27 | }
28 | 
29 | .jupyter-wrapper .jp-OutputArea-output pre {
30 |     border-left: solid 5px #e0e0e0;
31 |     padding-left: 5pt;
32 | }
33 | 
34 | </style>
35 | 
36 | <script>
37 | tags = document.getElementsByClassName("md-nav__link");
38 | for (var idx=0; idx < tags.length; idx++) {
39 |     //if (item.hasAttribute("href") && item.attributes["href"].textContent.endsWith("ipynb")) {
40 |     if (tags[idx].innerText.includes("HIDETHIS")) {
41 |         tags[idx].style.display = "none";
42 |     }
43 | }
44 | </script>
45 | 
46 | 
47 | {% endblock content %}
48 | 


--------------------------------------------------------------------------------
/docs/overrides/main.html.DEPRECATED:
--------------------------------------------------------------------------------
 1 | {% extends "base.html" %}
 2 | 
 3 | {% block content %}
 4 | 
 5 | {% if page.nb_url %}
 6 |     <a href="{{ page.nb_url }}" title="Download Notebook" class="md-content__button md-icon">
 7 |         {% include ".icons/simple/jupyter.svg" %}
 8 | 		{% include ".icons/material/download.svg" %}
 9 |     </a>
10 | {% endif %}
11 | 
12 | {{ super() }}
13 | 
14 | <style>
15 | // Do whatever changes you need here
16 | 
17 | .jp-RenderedHTMLCommon p {
18 |     margin: 0pt;
19 | }
20 | 
21 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
22 |     display: none;
23 | }
24 | 
25 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
26 |     display: none;
27 | }
28 | 
29 | .jupyter-wrapper .jp-OutputArea-output pre {
30 |     border-left: solid 5px #e0e0e0;
31 |     padding-left: 5pt;
32 | }
33 | 
34 | </style>
35 | 
36 | <script>
37 | tags = document.getElementsByClassName("md-nav__link");
38 | for (var idx=0; idx < tags.length; idx++) {
39 |     //if (item.hasAttribute("href") && item.attributes["href"].textContent.endsWith("ipynb")) {
40 |     if (tags[idx].innerText.includes("HIDETHIS")) {
41 |         tags[idx].style.display = "none";
42 |     }
43 | }
44 | </script>
45 | 
46 | 
47 | {% endblock content %}
48 | 


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png


--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png


--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
1 | <?xml version="1.0" encoding="UTF-8"?>
2 | <urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
3 | </urlset>


--------------------------------------------------------------------------------
/docs/sitemap.xml.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/sitemap.xml.gz


--------------------------------------------------------------------------------
/notebooks/imc23/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 tcbenchstack team
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | # pyproject.toml
 2 | 
 3 | [build-system]
 4 | requires      = ["setuptools>=61.0.0", "wheel"]
 5 | build-backend = "setuptools.build_meta"
 6 | 
 7 | [project]
 8 | name = "tcbench"
 9 | version = "0.0.22"
10 | description = "A ML/DL framework for Traffic Classification"
11 | readme = "README.md"
12 | authors = [{ name = "Alessandro Finamore", email = "alessandro.finamore@huawei.com" }]
13 | license = { file = "LICENSE" }
14 | classifiers = [
15 |     "License :: OSI Approved :: MIT License",
16 |     "Programming Language :: Python",
17 |     "Programming Language :: Python :: 3",
18 | ]
19 | keywords = ["machine learning", "deep learning", "traffic classification", "time series"]
20 | dependencies = [
21 |     "aim == 3.17.4",
22 |     "autorank",
23 |     "click",
24 |     "dask <= 2023.5.1",
25 |     "distributed",
26 |     "numpy",
27 |     "pandas==2.0.2",
28 |     "pyarrow==12.0.0",
29 |     "pyyaml",
30 |     "requests",
31 |     "rich",
32 |     "rich-click",
33 |     "scikit-learn",
34 |     "statsmodels",
35 |     "torch==2.0.1",
36 |     "torchsummary",
37 |     "torchvision==0.15.2",
38 |     "xgboost==1.7.5"
39 | ]
40 | requires-python = ">=3.9"
41 | 
42 | [project.optional-dependencies]
43 | dev = [
44 |     "black", 
45 |     "bumpver", 
46 |     "click-plugins",
47 |     "isort", 
48 |     "jupyterlab",
49 |     "matplotlib",
50 |     "mkdocs",
51 |     "mkdocs-autorefs",
52 |     "mkdocs-glightbox",
53 |     "mkdocs-jupyter",
54 |     "mkdocs-material",
55 |     "mkdocs-material-extensions",
56 |     "mkdocs-table-reader-plugin",
57 |     "mkdocstrings",
58 |     "mkdocstrings-python",
59 |     "pip-tools", 
60 |     "pytest",
61 |     "pytest-helpers-namespace",
62 |     "seaborn"
63 | ]
64 | 
65 | [project.urls]
66 | Homepage = "https://tcbenchstack.github.io/tcbench/"
67 | 
68 | [project.scripts]
69 | tcbench = "tcbench.cli.main:main"
70 | 
71 | [tool.bumpver]
72 | current_version = "0.0.22"
73 | version_pattern = "MAJOR.MINOR.PATCH"
74 | commit_message = "bump version {old_version} -> {new_version}"
75 | tag_message = "{new_version}"
76 | tag_scope = "default"
77 | pre_commit_hook = ""
78 | post_commit_hook = ""
79 | commit = true
80 | tag = true
81 | push = false
82 | 
83 | [tool.bumpver.file_patterns]
84 | "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"']
85 | "src/tcbench/__init__.py" = ["{version}"]
86 | 


--------------------------------------------------------------------------------
/src/tcbench/FIGSHARE_RESOURCES.yml:
--------------------------------------------------------------------------------
 1 | imc23:
 2 |     notebooks:
 3 |         url: "https://figshare.com/ndownloader/files/42550111"
 4 |         md5: "224764907e634fcab3ae1e20bc58bbbf"
 5 |         dst_folder: "./"
 6 | 
 7 |     pytest_resources:
 8 |         url: "https://figshare.com/ndownloader/files/42538741"
 9 |         md5: "3a2482ad6359ba48be8728221e42f727"
10 |         dst_folder: "./tests"
11 | 
12 |     ml_artifacts:
13 |         url: "https://figshare.com/ndownloader/files/42538675"
14 |         md5: "a4b53b2d0b95995c5f14bbf2f8489c7c"
15 |         dst_folder: "notebooks/imc23"
16 | 


--------------------------------------------------------------------------------
/src/tcbench/__init__.py:
--------------------------------------------------------------------------------
 1 | import pathlib
 2 | 
 3 | __version__ = "0.0.22"
 4 | 
 5 | DEFAULT_AIM_REPO = pathlib.Path("./aim-repo")
 6 | DEFAULT_ARTIFACTS_FOLDER = pathlib.Path("./aim-repo/artifacts")
 7 | 
 8 | DEFAULT_CAMPAIGN_AUGATLOAD_AUGMENTATIONS = (
 9 |     "noaug",
10 |     "rotate",
11 |     "horizontalflip",
12 |     "colorjitter",
13 |     "packetloss",
14 |     "changertt",
15 |     "timeshift",
16 | )
17 | DEFAULT_CAMPAIGN_AUGATLOAD_SEEDS = (12345, 42, 666)
18 | DEFAULT_CAMPAIGN_AUGATLOAD_FLOWPICDIMS = (32, 64, 1500)
19 | DEFAULT_CAMPAIGN_AUGATLOAD_PKTSERIESLEN = (10, 30)
20 | 
21 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_FLOWPICDIMS = (32, 64, 1500)
22 | DEFAULT_CAMPAING_CONTRALEARNANDFINETUNE_SEEDS_CONTRALEARN = (12345, 1, 2, 3, 4)
23 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_SEEDS_FINETUNE = (12345, 1, 2, 3, 4)
24 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_AUGMENTATIONS = "changertt,timeshift"
25 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_VALID_AUGMENTATIONS = tuple([
26 |     aug_name
27 |     for aug_name in DEFAULT_CAMPAIGN_AUGATLOAD_AUGMENTATIONS
28 |     if aug_name != "noaug"
29 | ])
30 | 
31 | from tcbench.libtcdatasets.datasets_utils import (
32 |     get_datasets_root_folder,
33 |     get_dataset_folder,
34 |     DATASETS,
35 |     load_parquet,
36 | )
37 | 
38 | from tcbench.modeling import (
39 |     MODELING_DATASET_TYPE,
40 |     MODELING_INPUT_REPR_TYPE,
41 |     MODELING_METHOD_TYPE,
42 | )
43 | 


--------------------------------------------------------------------------------
/src/tcbench/cli/__init__.py:
--------------------------------------------------------------------------------
 1 | def get_rich_console():
 2 |     from rich.console import Console
 3 |     from rich.theme import Theme
 4 |     import sys
 5 |     import pathlib
 6 | 
 7 |     curr_module = sys.modules[__name__]
 8 |     folder_module = pathlib.Path(curr_module.__file__).parent
 9 |     return Console(theme=Theme.read(folder_module / "rich.theme"))
10 | 
11 | 
12 | console = get_rich_console()
13 | 


--------------------------------------------------------------------------------
/src/tcbench/cli/clickutils.py:
--------------------------------------------------------------------------------
 1 | import rich_click as click
 2 | 
 3 | from typing import List, Dict, Any
 4 | 
 5 | from tcbench import DATASETS
 6 | from tcbench.modeling import MODELING_METHOD_TYPE, MODELING_INPUT_REPR_TYPE
 7 | 
 8 | 
 9 | def _create_choice(enumeration):
10 |     return click.Choice(list(map(lambda x: x.value, enumeration)), case_sensitive=False)
11 | 
12 | 
13 | def _create_choice_callback(enumeration):
14 |     return lambda c, p, v: enumeration.from_str(v)
15 | 
16 | 
17 | CLICK_TYPE_DATASET_NAME = _create_choice(DATASETS)
18 | CLICK_CALLBACK_DATASET_NAME = _create_choice_callback(DATASETS)
19 | 
20 | CLICK_TYPE_METHOD_NAME = _create_choice(MODELING_METHOD_TYPE)
21 | CLICK_CALLBACK_METHOD_NAME = _create_choice_callback(MODELING_METHOD_TYPE)
22 | 
23 | CLICK_TYPE_INPUT_REPR = _create_choice(MODELING_INPUT_REPR_TYPE)
24 | CLICK_CALLBACK_INPUT_REPR = _create_choice_callback(MODELING_INPUT_REPR_TYPE)
25 | 
26 | CLICK_CALLBACK_TOINT = lambda c, p, v: int(v)
27 | 
28 | 
29 | def compose_help_string_from_list(items:List[str]) -> str:
30 |     """Compose a string from a list"""
31 |     return "\[" + f'{"|".join(items)}' + "]."
32 | 
33 | 
34 | def convert_params_dict_to_list(params:Dict[str,Any], skip_params:List[str]=None) -> List[str]:
35 |     """Convert a dictionary of parameters (name,value) pairs into a list of "--<param-name> <param-value>"""
36 |     if skip_params is None:
37 |         skip_params = set()
38 | 
39 |     l = []
40 |     for par_name, par_value in params.items():
41 |         if par_name in skip_params or par_value == False or par_value is None:
42 |             continue
43 |         par_name = par_name.replace("_", "-")
44 |         if par_value == True:
45 |             l.append(f"--{par_name}")
46 |         else:
47 |             l.append(f"--{par_name} {str(par_value)}")
48 | 
49 |     return l
50 | 
51 | 
52 | def help_append_choices(help_string:str, values:List[str]) -> str:
53 |     """Append to an help string a styled version of a list of values"""
54 |     text = "|".join([f"[bold]{text}[/bold]" for text in values])
55 |     return f"{help_string} [yellow]Choices: [{text}][/yellow]"
56 | 


--------------------------------------------------------------------------------
/src/tcbench/cli/command_fetchartifacts.py:
--------------------------------------------------------------------------------
 1 | import rich_click as click
 2 | 
 3 | import pathlib
 4 | import shutil
 5 | import tempfile
 6 | 
 7 | from tcbench.cli import clickutils
 8 | from tcbench.cli import console
 9 | 
10 | click.rich_click.SHOW_ARGUMENTS = True
11 | click.rich_click.USE_RICH_MARKUP = True
12 | 
13 | FIGSHARE_RESOURCES_FNAME = "FIGSHARE_RESOURCES.yml"
14 | 
15 | def _copy_file(src, dst):
16 |     keyword = "installing"
17 |     if pathlib.Path(dst).exists():
18 |         keyword = "overwriting"
19 |     print(f"{keyword}: {dst}")
20 |     shutil.copy2(src, dst)
21 | 
22 | @click.command("fetch-artifacts")
23 | @click.pass_context
24 | def fetchartifacts(ctx):
25 |     """Download from figshare and install all required artifacts."""
26 |     from tcbench.libtcdatasets import datasets_utils
27 |     import requests
28 | 
29 |     check_exists = [
30 |         pathlib.Path("./src/tcbench"),
31 |         pathlib.Path("./tests"),
32 |         pathlib.Path("./notebooks/tutorials"),
33 |         pathlib.Path("./pyproject.toml"),
34 |     ]
35 |     if any(not folder.exists() for folder in check_exists):
36 |         raise RuntimeError("Run the command from within the cloned github repository")
37 | 
38 |     fname = datasets_utils._get_module_folder().parent / FIGSHARE_RESOURCES_FNAME
39 |     data = datasets_utils.load_yaml(fname)
40 |     for primary_key in data:
41 |         for secondary_key in data[primary_key]:
42 |             print(f"fetching: {primary_key} / {secondary_key}")
43 | 
44 |             params = data[primary_key][secondary_key]
45 | 
46 |             url = params["url"]
47 |             dst_folder = params["dst_folder"]
48 |             with tempfile.TemporaryDirectory() as tmpfolder:
49 |                 tmpfolder = pathlib.Path(tmpfolder)
50 |                 try:
51 |                     path = datasets_utils.download_url(url, tmpfolder)
52 |                 except requests.exceptions.SSLError:
53 |                     path = datasets_utils.download_url(url, tmpfolder, verify=False)
54 | 
55 |                 untar_folder = tmpfolder / "__untar__"
56 |                 datasets_utils.untar(path, untar_folder)
57 |                 path.unlink()
58 |                 shutil.copytree(untar_folder, dst_folder, copy_function=_copy_file, dirs_exist_ok=True)
59 | 


--------------------------------------------------------------------------------
/src/tcbench/cli/main.py:
--------------------------------------------------------------------------------
 1 | from pkg_resources import iter_entry_points
 2 | 
 3 | import rich_click as click
 4 | 
 5 | import tcbench
 6 | from tcbench import cli
 7 | from click_plugins import with_plugins
 8 | 
 9 | 
10 | @with_plugins(iter_entry_points('click_command_tree'))
11 | @click.group(invoke_without_command=True)
12 | @click.pass_context
13 | @click.option(
14 |     "--version", "show_version", is_flag=True, help="Show tcbench version and exit."
15 | )
16 | def main(ctx, show_version):
17 |     if show_version:
18 |         import sys
19 |         cli.console.print(f"version: {tcbench.__version__}")
20 |         sys.exit()
21 | 
22 | 
23 | from tcbench.cli.command_datasets import datasets
24 | from tcbench.cli.command_singlerun import singlerun
25 | from tcbench.cli.command_campaign import campaign
26 | from tcbench.cli.command_aimrepo import aimrepo
27 | from tcbench.cli.command_fetchartifacts import fetchartifacts
28 | 
29 | main.add_command(datasets)
30 | main.add_command(singlerun)
31 | main.add_command(campaign)
32 | main.add_command(aimrepo)
33 | main.add_command(fetchartifacts)
34 | 
35 | if __name__ == "__main__":
36 |     main()
37 | 


--------------------------------------------------------------------------------
/src/tcbench/cli/rich.theme:
--------------------------------------------------------------------------------
 1 | [styles]
 2 | progress.description = none
 3 | progress.filesize = none
 4 | progress.filesize.total = none
 5 | progress.download = none
 6 | progress.elapsed = none
 7 | progress.percentage = none
 8 | progress.remaining = none
 9 | progress.data.speed = none
10 | progress.spinner = none
11 | repr.ellipsis = none
12 | repr.indent = none
13 | repr.error = none
14 | repr.str = none
15 | repr.brace = none
16 | repr.comma = none
17 | repr.ipv4 = none
18 | repr.ipv6 = none
19 | repr.eui48 = none
20 | repr.eui64 = none
21 | repr.tag_start = none
22 | repr.tag_name = none
23 | repr.tag_contents = none
24 | repr.tag_end = none
25 | repr.attrib_name = none
26 | repr.attrib_equal = none
27 | repr.attrib_value = none
28 | repr.number = none
29 | repr.number_complex = none
30 | repr.bool_true = none
31 | repr.bool_false = none
32 | repr.none = none
33 | repr.url = none
34 | repr.uuid = none
35 | repr.call = none
36 | repr.path = none
37 | repr.filename = none
38 | rule.line = none
39 | 


--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/__init__.py:
--------------------------------------------------------------------------------
 1 | # from . import datasets_utils
 2 | #
 3 | # from . import ucdavis_icdm19_csv_to_parquet
 4 | # from . import ucdavis_icdm19_generate_splits
 5 | #
 6 | # from . import utmobilenet21_csv_to_parquet
 7 | # from . import utmobilenet21_generate_splits
 8 | #
 9 | # from . import mirage19_json_to_parquet
10 | # from . import mirage19_generate_splits
11 | #
12 | # from . import mirage22_json_to_parquet
13 | # from . import mirage22_generate_splits
14 | 


--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/mirage22_json_to_parquet.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import numpy as np
 3 | 
 4 | import argparse
 5 | import pathlib
 6 | import tempfile
 7 | 
 8 | from tcbench.libtcdatasets import mirage19_json_to_parquet
 9 | 
10 | 
11 | def postprocess(df: pd.DataFrame) -> pd.DataFrame:
12 |     """Process the loaded MIRAGE JSON by
13 |     (1) adding a background class;
14 |     (2) adding an "app" column with label information, and encoding it as pandas category
15 |     """
16 |     df = df.assign(
17 |         app=np.where(
18 |             df["android_name"] == df["flow_metadata_bf_label"],
19 |             df["android_name"],
20 |             "background",
21 |         )
22 |     )
23 |     df = df.assign(
24 |         app=np.where(
25 |             df["flow_metadata_bf_activity"] == "Unknown", "background", df["app"]
26 |         )
27 |     )
28 |     df = df.assign(
29 |         app=df["app"].astype("category"),
30 |         packets=df["packet_data_l4_payload_bytes"].apply(len),
31 |     )
32 |     return df
33 | 
34 | 
35 | def main(args: argparse.Namespace) -> None:
36 |     if (args.input_folder / "MIRAGE-COVID-CCMA-2022").exists():
37 |         args.input_folder = args.input_folder / "MIRAGE-COVID-CCMA-2022" / "Raw_JSON"
38 | 
39 |     df = mirage19_json_to_parquet.main(
40 |         args.input_folder, save_as=None, workers=args.num_workers
41 |     )
42 |     df = postprocess(df)
43 | 
44 |     fname = args.output_folder / "mirage22.parquet"
45 |     if not fname.parent.exists():
46 |         fname.parent.mkdir(parents=True)
47 |     print(f"saving: {fname}")
48 |     df.to_parquet(fname)
49 | 
50 | 
51 | def cli_parser():
52 |     return mirage19_json_to_parquet.cli_parser()
53 | 
54 | if __name__ == "__main__":
55 |     args = cli_parser().parse_args()
56 |     main(args)
57 | 


--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/DATASETS.yml:
--------------------------------------------------------------------------------
 1 | ucdavis-icdm19:
 2 |     num_classes: 5
 3 |     paper: "https://arxiv.org/pdf/1812.09761.pdf"
 4 |     website: "https://github.com/shrezaei/Semi-supervised-Learning-QUIC-"
 5 |     data: "https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE"
 6 |     data_curated: "https://figshare.com/ndownloader/files/42438621"
 7 |     data_curated_md5: "36294e70968fe0a30a054e626cb87afe"
 8 | 
 9 | mirage19:
10 |     num_classes: 20
11 |     paper: "http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf"
12 |     website: "https://traffic.comics.unina.it/mirage/mirage-2019.html"
13 |     data: "https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-2019_traffic_dataset_downloadable_v2.tar.gz"
14 | #    data_curated: ""
15 | #    data_curated_md5: ""
16 | 
17 | mirage22:
18 |     num_classes: 9
19 |     paper: "http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf"
20 |     website: "https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html"
21 |     data: "https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-COVID-CCMA-2022.zip"
22 | #    data_curated: ""
23 | #    data_curated_md5: ""
24 | 
25 | utmobilenet21:
26 |     num_classes: 17
27 |     paper: "https://ieeexplore.ieee.org/abstract/document/9490678/"
28 |     website: "https://github.com/YuqiangHeng/UTMobileNetTraffic2021"
29 |     data: "https://utexas.app.box.com/s/okrimcsz1mn9ec4j667kbb00d9gt16ii"
30 |     data_curated: "https://figshare.com/ndownloader/files/42438624"
31 |     data_curated_md5: "789b01c4f7dedfbb781b89e6f2dcbb1a"
32 | 


--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/DATASETS_FILES_MD5.yml:
--------------------------------------------------------------------------------
 1 | ucdavis-icdm19:
 2 |     ucdavis-icdm19.parquet: "f4333724f03a0ccaa7d87ba878148f34"
 3 |     imc23:
 4 |         test_split_human.parquet: "5a6f27a51d6dde6bb3b59d6757c00c1f"
 5 |         test_split_script.parquet: "93a49d51513f7b1dec0dc7ccf6f139b5"
 6 |         train_split_0.parquet: "98bc4a849c2f2e3abf259be26eed2f06"
 7 |         train_split_1.parquet: "2f7b849325c1f4d710b761d2d48a84f2"
 8 |         train_split_2.parquet: "1d74dc9dc389a72a0f6b29e2be3b72e3"
 9 |         train_split_3.parquet: "323bb4504d23d25b25ef31b8b76205f5"
10 |         train_split_4.parquet: "5a0b00ed58e365551f9ef12956caa0d0"
11 | 
12 | mirage19:
13 |     mirage19.parquet: "aa0c4cbffc6f5dffba6718a7ab43f451"
14 |     imc23:
15 |         mirage19_filtered_minpkts10_splits.parquet: "12c83fb39eb61924aa411ca2d663eb94"
16 |         mirage19_filtered_minpkts10.parquet: "75851ec3312751a8a3dca79a4c24e2fb"
17 | 
18 | mirage22:
19 |     mirage22.parquet: "4b8f5bfa528989ee857934f7611b052e"
20 |     imc23:
21 |         mirage22_filtered_minpkts10.parquet: "e117cbe37eba5c1235e4df787cf3b2d6"
22 |         mirage22_filtered_minpkts10_splits.parquet: "a445db52fe1ec342fed7eb1d765c9825"
23 |         mirage22_filtered_minpkts1000.parquet: "6312e82a0526071ab269a92d5eb745c6"
24 |         mirage22_filtered_minpkts1000_splits.parquet: "21396f8a9d5033cf049407c4dc573195"
25 | 
26 | #utmobilenet21:
27 | #    utmobilenet21.parquet: "863e35d558c7ef9f4f5d0e552a57f3cb"
28 | #    imc23:
29 | #        utmobilenet21_filtered_minpkts10.parquet: "102e125e3236a1e8211bfd5e8272afdb"
30 | #        utmobilenet21_filtered_minpkts10_splits.parquet: "3ea1378753f1b4e1f2773bd750e56d1b"
31 | 


--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/ucdavis-icdm19.yml:
--------------------------------------------------------------------------------
 1 | __all__:
 2 |    row_id:
 3 |       dtype: int
 4 |       description: "Unique row id"
 5 |    app:
 6 |       dtype: category
 7 |       description: "Label of the flow"
 8 |    flow_id:
 9 |       dtype: str
10 |       description: "Original filename"
11 |    partition:
12 |       dtype: str
13 |       description: "Partition related to the flow"
14 |    num_pkts:
15 |       dtype: int
16 |       description: "Number of packets in the flow"
17 |    duration:
18 |       dtype: float
19 |       description: "Duration of the flow"
20 |    bytes:
21 |       dtype: int
22 |       description: "Number of bytes of the flow"
23 |    unixtime:
24 |       dtype: str
25 |       description: "Absolute time of each packet"
26 |    timetofirst:
27 |       dtype: np.array
28 |       description: "Delta between a packet the first packet of the flow"
29 |    pkts_size:
30 |       dtype: np.array
31 |       description: "Packet size time series"
32 |    pkts_dir:
33 |       dtype: np.array
34 |       description: "Packet direction time series"
35 |    pkts_iat:
36 |       dtype: np.array
37 |       description: "Packet inter-arrival time series"
38 | 


--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/utmobilenet21.yml:
--------------------------------------------------------------------------------
  1 | __unfiltered__:
  2 |    row_id:
  3 |       dtype: int
  4 |       description: "Unique flow id"
  5 |    src_ip:
  6 |       dtype: str
  7 |       description: "Source ip of the flow"
  8 |    src_port:
  9 |       dtype: int
 10 |       description: "Source port of the flow"
 11 |    dst_ip:
 12 |       dtype: str
 13 |       description: "Destination ip of the flow"
 14 |    dst_port:
 15 |       dtype: int
 16 |       description: "Destination port of the flow"
 17 |    ip_proto:
 18 |       dtype: int
 19 |       description: "Protocol of the flow (TCP or UDP)"
 20 |    first:
 21 |       dtype: float
 22 |       description: "Timestamp of the first packet"
 23 |    last:
 24 |       dtype: float
 25 |       description: "Timestamp of the last packet"
 26 |    duration:
 27 |       dtype: float
 28 |       description: "Duration of the flow"
 29 |    packets:
 30 |       dtype: int
 31 |       description: "Number of packets in the flow"
 32 |    bytes:
 33 |       dtype: int
 34 |       description: "Number of bytes in the flow"
 35 |    partition:
 36 |       dtype: str
 37 |       description: "From which folder the flow was originally stored"
 38 |    location:
 39 |       dtype: str
 40 |       description: "Label originally provided by the dataset (see the related paper for details)"
 41 |    fname:
 42 |       dtype: str
 43 |       description: "Original filename where the packets of the flow come from"
 44 |    app:
 45 |       dtype: category
 46 |       description: "Final label of the flow, encoded as pandas category"
 47 |    pkts_size:
 48 |       dtype: np.array
 49 |       description: "Packet size time series"
 50 |    pkts_dir:
 51 |       dtype: np.array
 52 |       description: "Packet diretion time series"
 53 |    timetofirst:
 54 |       dtype: np.array
 55 |       description: "Delta between the each packet timestamp the first packet of the flow"
 56 | 
 57 | __filtered__:
 58 |    row_id:
 59 |       dtype: int
 60 |       description: "Unique flow id"
 61 |    src_ip:
 62 |       dtype: str
 63 |       description: "Source ip of the flow"
 64 |    src_port:
 65 |       dtype: int
 66 |       description: "Source port of the flow"
 67 |    dst_ip:
 68 |       dtype: str
 69 |       description: "Destination ip of the flow"
 70 |    dst_port:
 71 |       dtype: int
 72 |       description: "Destination port of the flow"
 73 |    ip_proto:
 74 |       dtype: int
 75 |       description: "Protocol of the flow (TCP or UDP)"
 76 |    first:
 77 |       dtype: float
 78 |       description: "Timestamp of the first packet"
 79 |    last:
 80 |       dtype: float
 81 |       description: "Timestamp of the last packet"
 82 |    duration:
 83 |       dtype: float
 84 |       description: "Duration of the flow"
 85 |    packets:
 86 |       dtype: int
 87 |       description: "Number of packets in the flow"
 88 |    bytes:
 89 |       dtype: int
 90 |       description: "Number of bytes in the flow"
 91 |    partition:
 92 |       dtype: str
 93 |       description: "From which folder the flow was originally stored"
 94 |    location:
 95 |       dtype: str
 96 |       description: "Label originally provided by the dataset (see the related paper for details)"
 97 |    fname:
 98 |       dtype: str
 99 |       description: "Original filename where the packets of the flow come from"
100 |    app:
101 |       dtype: category
102 |       description: "Final label of the flow, encoded as pandas category"
103 |    pkts_size:
104 |       dtype: np.array
105 |       description: "Packet size time series"
106 |    pkts_dir:
107 |       dtype: np.array
108 |       description: "Packet diretion time series"
109 |    timetofirst:
110 |       dtype: np.array
111 |       description: "Delta between the each packet timestamp the first packet of the flow"
112 | 
113 | __splits__:
114 |    train_indexes:
115 |       dtype: np.array
116 |       description: "row_id of training samples"
117 |    val_indexes:
118 |       dtype: np.array
119 |       description: "row_id of validation samples"
120 |    test_indexes:
121 |       dtype: np.array
122 |       description: "row_id of test samples"
123 |    split_index:
124 |       dtype: int
125 |       description: "Split id"
126 | 


--------------------------------------------------------------------------------
/src/tcbench/modeling/__init__.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class MODELING_DATASET_TYPE(Enum):
 5 |     """An enumeration to specify which type of dataset to load"""
 6 | 
 7 |     TRAIN_VAL = "train_val_datasets"
 8 |     TEST = "test_dataset"
 9 |     TRAIN_VAL_LEFTOVER = "train_val_leftover_dataset"
10 |     FINETUNING = "for_finetuning_dataset"
11 | 
12 | 
13 | class MODELING_INPUT_REPR_TYPE(Enum):
14 |     FLOWPIC = "flowpic"
15 |     PKTSERIES = "pktseries"
16 | 
17 |     @classmethod
18 |     def from_str(cls, text):
19 |         for member in cls.__members__.values():
20 |             if member.value == text:
21 |                 return member
22 |         return None
23 | 
24 |     def __str__(self):
25 |         return self.value
26 | 
27 | 
28 | class MODELING_METHOD_TYPE(Enum):
29 |     MONOLITHIC = "monolithic"
30 |     XGBOOST = "xgboost"
31 |     SIMCLR = "simclr"
32 | 
33 |     @classmethod
34 |     def from_str(cls, text):
35 |         for member in cls.__members__.values():
36 |             if member.value == text:
37 |                 return member
38 |         return None
39 | 
40 |     def __str__(self):
41 |         return self.value
42 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
  1 | import pytest
  2 | import pathlib
  3 | import hashlib
  4 | 
  5 | from tcbench.modeling import utils
  6 | 
  7 | 
  8 | def pytest_configure():
  9 |     pytest.DIR_RESOURCES = (pathlib.Path(__file__).parent / "resources").resolve()
 10 | 
 11 | 
 12 | @pytest.helpers.register
 13 | def verify_deeplearning_model(fname, reference_fname, epsilon=None):
 14 |     """Verifying trained model weights"""
 15 |     import torch
 16 | 
 17 |     net = torch.load(fname)
 18 |     ref_net = torch.load(reference_fname)
 19 | 
 20 |     assert len(net) == len(ref_net)
 21 |     assert sorted(net.keys()) == sorted(ref_net.keys())
 22 | 
 23 |     for name in net.keys():
 24 |         weights = net[name]
 25 |         ref_weights = ref_net[name]
 26 |         if epsilon is None:
 27 |             assert (weights.flatten() == ref_weights.flatten()).all()
 28 |         else:
 29 |             assert ((weights.flatten() - ref_weights.flatten()).abs() < epsilon).all()
 30 | 
 31 | 
 32 | def _get_md5(fname):
 33 |     data = pathlib.Path(fname).read_bytes()
 34 |     md5 = hashlib.md5(data)
 35 |     return md5.hexdigest()
 36 | 
 37 | 
 38 | @pytest.helpers.register
 39 | def verify_md5_model(fname, reference_fname):
 40 |     assert _get_md5(fname) == _get_md5(reference_fname)
 41 | 
 42 | 
 43 | @pytest.helpers.register
 44 | def verify_reports(
 45 |     folder, reference_folder, with_train=True, with_val=True, with_test=True
 46 | ):
 47 |     """Verify classification report and confusion matrixes"""
 48 |     import pandas as pd
 49 | 
 50 |     # note: by using folder / test*.csv automatically
 51 |     # skips leftover if not found
 52 | 
 53 |     def _add_file(folder, fname, fname_list):
 54 |         if not (folder / fname).exists():
 55 |             raise RuntimeError(f"missing {fname}")
 56 |         fname_list.append(fname)
 57 | 
 58 |     fnames = []
 59 |     if with_train:
 60 |         _add_file(folder, "train_class_rep.csv", fnames)
 61 |         _add_file(folder, "train_conf_mtx.csv", fnames)
 62 |     if with_val:
 63 |         _add_file(folder, "val_class_rep.csv", fnames)
 64 |         _add_file(folder, "val_conf_mtx.csv", fnames)
 65 |     if with_test:
 66 |         tmp = list(folder.glob("test*.csv"))
 67 |         assert len(tmp) != 0
 68 |         fnames.extend([item.name for item in tmp])
 69 | 
 70 |     if len(fnames) == 0:
 71 |         raise RuntimeError("empty list of files to verify")
 72 | 
 73 |     for fname in fnames:
 74 |         df = pd.read_csv(folder / fname)
 75 |         ref_df = pd.read_csv(reference_folder / fname)
 76 |         assert (df == ref_df).all().all()
 77 | 
 78 | 
 79 | @pytest.helpers.register
 80 | def match_run_hashes(folder, reference_folder, params_to_match=['seed', 'split_index', 'flowpic_dim', 'aug_name']):
 81 |     
 82 |     ref_catalog = {
 83 |         path.name: utils.load_yaml(path / 'params.yml')
 84 |         for path in reference_folder.iterdir()
 85 |     }
 86 | 
 87 |     pairs = []
 88 |     for path in folder.iterdir():
 89 |         curr_params = utils.load_yaml(path / 'params.yml')
 90 |         curr_hash = path.name
 91 | 
 92 |         curr_pair = [curr_hash, None]
 93 |         for ref_hash, ref_params in ref_catalog.items():
 94 |             tmp1 = {}
 95 |             tmp2 = {}
 96 |             for param_name in params_to_match:
 97 |                 tmp1[param_name] = str(curr_params[param_name])
 98 |                 tmp2[param_name] = str(ref_params[param_name])
 99 | 
100 |             if tmp1 == tmp2:
101 |                 curr_pair[-1] = ref_hash
102 |                 del(ref_catalog[ref_hash])
103 |                 break
104 | 
105 |         pairs.append(curr_pair)
106 | 
107 |     return pairs
108 | 


--------------------------------------------------------------------------------
/tests/test_augmentations_at_loading_xgboost.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | 
 3 | import pytest
 4 | import pathlib
 5 | 
 6 | import tcbench
 7 | from tcbench.modeling import (
 8 |     utils,
 9 |     run_augmentations_at_loading_xgboost,
10 |     MODELING_DATASET_TYPE,
11 | )
12 | 
13 | 
14 | @pytest.mark.parametrize(
15 |     "params, expected_artifacts_folder",
16 |     [
17 |         (
18 |             [
19 |                 f"--dataset {str(tcbench.DATASETS.UCDAVISICDM19)}",
20 |                 "--flow-representation pktseries",
21 |                 "--max-n-pkts 10",
22 |                 "--split-index 0",
23 |                 "--seed 12345",
24 |             ],
25 |             pytest.DIR_RESOURCES
26 |             / pathlib.Path(
27 |                 "_reference_aim_run/ucdavis-icdm19/xgboost/noaugmentation-timeseries/5fa59c129a3e4aa6bb9b7640"
28 |             ),
29 |         ),
30 |     ],
31 | )
32 | def test_main(tmp_path, params, expected_artifacts_folder):
33 |     params.append(f"--artifacts-folder {tmp_path}/artifacts")
34 |     params.append(f"--aim-repo {tmp_path}")
35 | 
36 |     parser = run_augmentations_at_loading_xgboost.cli_parser()
37 |     args = parser.parse_args((" ".join(params)).split())
38 | 
39 |     state = run_augmentations_at_loading_xgboost.main(args)
40 | 
41 |     # the output folder is based on the aim run hash
42 |     artifacts_folder = next((tmp_path / 'artifacts').iterdir())
43 | 
44 |     # verifying model files
45 |     fname = f"xgb_model_split_{args.split_index}.json"
46 | #    pytest.helpers.verify_md5_model(
47 | #        artifacts_folder / fname, expected_artifacts_folder / fname
48 | #    )
49 | 
50 |     pytest.helpers.verify_reports(artifacts_folder, expected_artifacts_folder)
51 | 


--------------------------------------------------------------------------------
/tests/test_contrastive_learning_and_finetune.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | 
  3 | import pytest
  4 | import torch
  5 | import pathlib
  6 | 
  7 | import tcbench
  8 | from tcbench.modeling import utils, run_contrastive_learning_and_finetune
  9 | 
 10 | 
 11 | @pytest.mark.parametrize(
 12 |     "params, expected_artifacts_folder",
 13 |     [
 14 |         (
 15 |             dict(
 16 |                 dataset_name=tcbench.DATASETS.UCDAVISICDM19,
 17 |                 learning_rate=0.001,
 18 |                 batch_size=32,
 19 |                 flowpic_dim=32,
 20 |                 split_idx=0,
 21 |                 seed=12345,
 22 |                 loss_temperature=0.07,
 23 |                 with_dropout=False,
 24 |                 projection_layer_dim=30,
 25 |             ),
 26 |             pytest.DIR_RESOURCES
 27 |             / pathlib.Path(
 28 |                 "_reference_aim_run/ucdavis-icdm19/simclr-dropout-and-projection/9e2dc14286ab452f992e5c2d"
 29 |             ),
 30 |         ),
 31 |     ],
 32 | )
 33 | def test_pretrain(tmp_path, params, expected_artifacts_folder):
 34 |     params["artifacts_folder"] = tmp_path
 35 |     utils.seed_everything(params.get("seed", 12345))
 36 | 
 37 |     state = run_contrastive_learning_and_finetune.pretrain(**params)
 38 | 
 39 |     # verifying trained model weights
 40 |     fname = f'best_model_weights_pretrain_split_{params["split_idx"]}.pt'
 41 |     pytest.helpers.verify_deeplearning_model(
 42 |         tmp_path / fname, expected_artifacts_folder / fname
 43 |     )
 44 | 
 45 | 
 46 | 
 47 | @pytest.mark.parametrize(
 48 |     "params, expected_artifacts_folder",
 49 |     [
 50 |         (
 51 |             [
 52 |                 "--dataset ucdavis-icdm19",
 53 |                 "--contrastive-learning-seed 12345",
 54 |                 "--finetune-seed 12345",
 55 |                 "--batch-size 32",
 56 |                 "--flowpic-dim 32",
 57 |                 "--split-index 0",
 58 |                 "--suppress-dropout",
 59 |                 "--projection-layer-dim 30",
 60 |             ],
 61 |             pytest.DIR_RESOURCES
 62 |             / pathlib.Path(
 63 |                 "_reference_aim_run/ucdavis-icdm19/simclr-dropout-and-projection/9e2dc14286ab452f992e5c2d"
 64 |             ),
 65 |         ),
 66 |     ],
 67 | )
 68 | def test_main(tmp_path, params, expected_artifacts_folder):
 69 |     params.append(f"--artifacts-folder {tmp_path}/artifacts")
 70 | 
 71 |     parser = run_contrastive_learning_and_finetune.cli_parser()
 72 |     args = parser.parse_args(" ".join(params).split())
 73 |     args.method = "simclr"
 74 |     args.augmentations = args.augmentations.split(",")
 75 | 
 76 |     run_contrastive_learning_and_finetune.main(args)
 77 | 
 78 |     # artifacts are stored into a doubly nested folder
 79 |     # as <dataset>/<aim-hash>
 80 |     artifacts_folder = next((tmp_path / 'artifacts').iterdir())
 81 | 
 82 |     fname_models = sorted(path.name for path in artifacts_folder.glob("*.pt"))
 83 |     expected_fname_models = sorted(
 84 |         path.name for path in expected_artifacts_folder.glob("*.pt")
 85 |     )
 86 |     assert fname_models == expected_fname_models
 87 | 
 88 |     for fname in fname_models:
 89 |         pytest.helpers.verify_deeplearning_model(
 90 |             artifacts_folder / fname, expected_artifacts_folder / fname
 91 |         )
 92 | 
 93 |     # verifying reports
 94 |     # note: by using tmp_path / test*.csv automatically
 95 |     # skips leftover if suppressed with the command line option
 96 |     pytest.helpers.verify_reports(
 97 |         artifacts_folder,
 98 |         expected_artifacts_folder,
 99 |         with_train=False,
100 |         with_val=False,
101 |         with_test=True,
102 |     )
103 | 


--------------------------------------------------------------------------------
/tests/test_modeling_backbone.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | import pathlib
 3 | 
 4 | from tcbench.modeling import backbone
 5 | from tcbench.modeling.backbone import LeNet5FlowpicIMC22_Mini
 6 | from tcbench.modeling.methods import ContrastiveLearningTrainer
 7 | 
 8 | @pytest.mark.parametrize(
 9 |     "net1, net2, expected",
10 |     [
11 |         (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(), True),
12 |         (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(num_classes=5), False),
13 |         (
14 |             LeNet5FlowpicIMC22_Mini(),
15 |             ContrastiveLearningTrainer.prepare_net_for_train(LeNet5FlowpicIMC22_Mini()),
16 |             False,
17 |         ),
18 |         (
19 |             LeNet5FlowpicIMC22_Mini(),
20 |             ContrastiveLearningTrainer.init_train(LeNet5FlowpicIMC22_Mini(), None)[0],
21 |             False,
22 |         ),
23 |     ],
24 | )
25 | def test_have_same_layers_and_types(net1, net2, expected):
26 |     assert backbone.have_same_layers_and_types(net1, net2) == expected
27 | 
28 | 
29 | @pytest.mark.parametrize(
30 |     "num_classes1, num_classes2",
31 |     [
32 |         (5, 5),
33 |         (None, 5),
34 |         (5, None),
35 |         (None, None),
36 |     ],
37 | )
38 | def test_have_same_layers_and_types_after_reloading_from_file(
39 |     tmp_path, num_classes1, num_classes2
40 | ):
41 |     net1 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes1)
42 |     net1 = ContrastiveLearningTrainer.prepare_net_for_train(net1)
43 |     net1.save_weights(tmp_path / "weights.pt")
44 | 
45 |     net2 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes2)
46 |     net2, _ = ContrastiveLearningTrainer.init_train(net2, None, tmp_path / "weights.pt")
47 |     assert backbone.have_same_layers_and_types(net1, net2)
48 | 
49 | 
50 | @pytest.mark.parametrize(
51 |     "net1, net2, expected",
52 |     [
53 |         (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(), False),
54 |         (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(num_classes=5), False),
55 |     ],
56 | )
57 | def test_are_equal(net1, net2, expected):
58 |     assert backbone.are_equal(net1, net2) == expected
59 | 
60 | 
61 | @pytest.mark.parametrize(
62 |     "num_classes1, num_classes2",
63 |     [
64 |         (5, 5),
65 |         (None, 5),
66 |         (5, None),
67 |         (None, None),
68 |     ],
69 | )
70 | def test_are_equal_after_reloading_from_file(tmp_path, num_classes1, num_classes2):
71 |     net1 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes1)
72 |     net1 = ContrastiveLearningTrainer.prepare_net_for_train(net1)
73 |     net1.save_weights(tmp_path / "weights.pt")
74 | 
75 |     net2 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes2)
76 |     net2, _ = ContrastiveLearningTrainer.init_train(net2, None, tmp_path / "weights.pt")
77 |     assert backbone.are_equal(net1, net2)
78 | 
79 | 
80 | @pytest.mark.parametrize(
81 |     "net",
82 |     [
83 |         LeNet5FlowpicIMC22_Mini(),
84 |         LeNet5FlowpicIMC22_Mini(num_classes=5),
85 |     ],
86 | )
87 | def test_clone_net(net):
88 |     new_net = backbone.clone_net(net)
89 |     assert backbone.are_equal(net, new_net)
90 |     assert id(net) != id(new_net)
91 | 


--------------------------------------------------------------------------------
/tests/test_modeling_methods.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | 
 3 | import torch
 4 | 
 5 | from tcbench.modeling import backbone, methods
 6 | from tcbench.modeling.backbone import LeNet5FlowpicIMC22_Mini
 7 | 
 8 | 
 9 | @pytest.mark.parametrize(
10 |     "net, optimizer_class",
11 |     [
12 |         (LeNet5FlowpicIMC22_Mini(), None),
13 |         (LeNet5FlowpicIMC22_Mini(), torch.optim.Adam),
14 |     ],
15 | )
16 | def test_simclr_init_pretrain(net, optimizer_class):
17 |     net = LeNet5FlowpicIMC22_Mini()
18 | 
19 |     optimizer = None
20 |     if optimizer_class:
21 |         optimizer = optimizer_class(net.parameters(), lr=0.001)
22 | 
23 |     new_net1, optimizer1 = methods.ContrastiveLearningTrainer.init_train(net, optimizer)
24 |     new_net2, optimizer2 = methods.SimCLRTrainer.init_pretrain(net, optimizer)
25 |     # the two networks need to have the same architecture
26 |     # but weights are not be the same overall because
27 |     # new layers are added
28 | 
29 |     assert backbone.have_same_layers_and_types(new_net1, new_net2)
30 |     assert id(new_net1) != id(new_net2)
31 | 
32 |     # compare first convolutional layer
33 |     assert (list(new_net1.parameters())[0] == list(new_net2.parameters())[0]).all()
34 | 
35 |     # compare last linear layer weights (bias is 0)
36 |     assert (list(new_net1.parameters())[-2] != list(new_net2.parameters())[-2]).any()
37 | 
38 |     if optimizer:
39 |         assert id(optimizer1) != id(optimizer2)
40 |         assert id(optimizer) != id(optimizer1)
41 |         assert id(optimizer) != id(optimizer2)
42 |         params1 = optimizer1.param_groups[0]["params"]
43 |         params2 = optimizer2.param_groups[0]["params"]
44 |         assert len(params1) == len(params2)
45 |         assert (params1[0] == params2[0]).all()
46 | 
47 | 
48 | @pytest.mark.parametrize(
49 |     "net, optimizer_class",
50 |     [
51 |         (LeNet5FlowpicIMC22_Mini(), None),
52 |         (LeNet5FlowpicIMC22_Mini(), torch.optim.Adam),
53 |     ],
54 | )
55 | def test_simclr_init_finetune(net, optimizer_class):
56 |     net = LeNet5FlowpicIMC22_Mini()
57 | 
58 |     optimizer = None
59 |     if optimizer_class:
60 |         optimizer = optimizer_class(net.parameters(), lr=0.001)
61 | 
62 |     new_net, new_optimizer = methods.SimCLRTrainer.init_finetune(
63 |         net, optimizer=optimizer, num_classes=5
64 |     )
65 |     assert not new_net.is_equal_to(net)
66 |     assert new_net.classifier is not None
67 |     if optimizer:
68 |         assert len(new_optimizer.param_groups[0]["params"]) == 2
69 |         for p1, p2 in zip(
70 |             new_net.classifier.parameters(), new_optimizer.param_groups[0]["params"]
71 |         ):
72 |             assert (p1 == p2).all()
73 | 


--------------------------------------------------------------------------------