├── .gitignore
├── LICENSE
├── MANIFEST.in
├── README.md
├── docs.material
├── about.md
├── artifacts.md
├── css
│ ├── fonts.css
│ ├── jupyter-notebook.css
│ ├── material.css
│ ├── mkdocstrings.css
│ ├── style.css
│ └── tables_style.css
├── datasets
│ ├── curation_and_metadata.md
│ ├── datasets.csv
│ ├── datasets.md
│ ├── datasets_splits.md
│ ├── guides
│ │ ├── index.md
│ │ ├── tutorial_load_datasets.ipynb
│ │ └── tutorial_load_datasets.md
│ ├── import.md
│ ├── index.md
│ ├── install.md
│ ├── install
│ │ ├── index.md
│ │ ├── mirage19.md
│ │ ├── mirage22.md
│ │ ├── ucdavis-icdm19.md
│ │ └── utmobilenet21.md
│ ├── metadata.md
│ ├── metadata.md.DEPRECATED
│ ├── samples_count
│ │ ├── index.md
│ │ ├── mirage19.md
│ │ ├── mirage22.md
│ │ ├── ucdavis-icdm19.md
│ │ └── utmobilenet21.md
│ ├── schemas
│ │ ├── index.md
│ │ ├── mirage19.md
│ │ ├── mirage22.md
│ │ ├── ucdavis-icdm19.md
│ │ └── utmobilenet21.md
│ └── tutorial_load_parquet.ipynb
├── figs
│ ├── aim_log1.png
│ ├── aim_log2.png
│ ├── aim_log3.png
│ ├── aim_run1.png
│ ├── aim_run2.png
│ ├── aim_run3.png
│ ├── dataset_properties_mirage19.png
│ ├── dataset_properties_mirage22.png
│ ├── dataset_properties_ucdavis-icdm19.png
│ └── dataset_properties_utmobilenet21.png
├── index.md
├── index.md.DEPRECATED
├── install.md
├── modeling
│ ├── aim_repos
│ │ ├── aim_webui.md
│ │ ├── aimrepo_subcmd.md
│ │ └── index.md
│ ├── aim_repositories_content.md
│ ├── campaigns.md
│ ├── exploring_artifacts.md
│ ├── figs
│ │ ├── aim_home-page.png
│ │ ├── aim_log1.png
│ │ ├── aim_log2.png
│ │ ├── aim_log3.png
│ │ ├── aim_run1.png
│ │ ├── aim_run2.png
│ │ └── aim_run3.png
│ ├── index.md
│ ├── overview.md
│ └── runs.md
├── overrides
│ ├── arrow-right-solid.svg
│ ├── github-mark
│ │ └── github-mark.svg
│ ├── home.html
│ ├── home.js
│ ├── main.html
│ ├── main.html.DEPRECATED
│ ├── tcbench.svg
│ └── tcbench_logo.svg
├── papers
│ ├── imc23
│ │ ├── artifacts.md
│ │ ├── campaigns.md
│ │ ├── index.md
│ │ ├── ml_artifacts.md
│ │ ├── notebooks.md
│ │ ├── notebooks
│ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb
│ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.md
│ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human_files
│ │ │ │ └── figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png
│ │ │ ├── figure11_dropout_impact_supervised_setting.ipynb
│ │ │ ├── figure11_dropout_impact_supervised_setting.md
│ │ │ ├── figure11_dropout_impact_supervised_setting_files
│ │ │ │ └── figure11_dropout_impact_supervised_setting_15_1.png
│ │ │ ├── figure1_flowpic_example.ipynb
│ │ │ ├── figure1_flowpic_example.md
│ │ │ ├── figure1_flowpic_example_files
│ │ │ │ └── figure1_flowpic_example_8_0.png
│ │ │ ├── figure3_confusion_matrix_supervised_setting.ipynb
│ │ │ ├── figure3_confusion_matrix_supervised_setting.md
│ │ │ ├── figure3_confusion_matrix_supervised_setting_files
│ │ │ │ └── figure3_confusion_matrix_supervised_setting_5_0.png
│ │ │ ├── figure4_ucdavis_per_class_average_flowpic.ipynb
│ │ │ ├── figure4_ucdavis_per_class_average_flowpic.md
│ │ │ ├── figure4_ucdavis_per_class_average_flowpic_files
│ │ │ │ └── figure4_ucdavis_per_class_average_flowpic_12_1.png
│ │ │ ├── figure5_ucdavis_augmentations_comparison.ipynb
│ │ │ ├── figure5_ucdavis_augmentations_comparison.md
│ │ │ ├── figure5_ucdavis_augmentations_comparison_files
│ │ │ │ └── figure5_ucdavis_augmentations_comparison_6_1.png
│ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb
│ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.md
│ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance_files
│ │ │ │ └── figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png
│ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb
│ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank.md
│ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank_files
│ │ │ │ └── figure7_augmentations_comparison_across_datasets_average_rank_8_0.png
│ │ │ ├── figure8_ucdavis_kde_on_pkts_size.ipynb
│ │ │ ├── figure8_ucdavis_kde_on_pkts_size.md
│ │ │ ├── figure8_ucdavis_kde_on_pkts_size_files
│ │ │ │ └── figure8_ucdavis_kde_on_pkts_size_10_0.png
│ │ │ ├── miscellaneous_stats.ipynb
│ │ │ ├── miscellaneous_stats.md
│ │ │ ├── table10_ucdavis-icdm19_tukey.ipynb
│ │ │ ├── table10_ucdavis-icdm19_tukey.md
│ │ │ ├── table2_datasets_properties.ipynb
│ │ │ ├── table2_datasets_properties.md
│ │ │ ├── table3_xgboost_baseline.ipynb
│ │ │ ├── table3_xgboost_baseline.md
│ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb
│ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.md
│ │ │ ├── table5_simclr_dropout_and_projectionlayer.ipynb
│ │ │ ├── table5_simclr_dropout_and_projectionlayer.md
│ │ │ ├── table6_simclr_other_augmentation_pairs.ipynb
│ │ │ ├── table6_simclr_other_augmentation_pairs.md
│ │ │ ├── table7_larger_trainset.ipynb
│ │ │ ├── table7_larger_trainset.md
│ │ │ ├── table8_augmentation-at-loading_on_other_datasets.ipynb
│ │ │ ├── table8_augmentation-at-loading_on_other_datasets.md
│ │ │ ├── table9_icdm_finetuning_per_class_metrics_on_human.ipynb
│ │ │ └── table9_icdm_finetuning_per_class_metrics_on_human.md
│ │ └── pytest.md
│ └── index.md
├── quick_tour.md
└── tcbench
│ ├── api
│ ├── overview.md
│ ├── tcbench_cli_clickutils.md
│ ├── tcbench_cli_command_aimrepo.md
│ ├── tcbench_cli_command_campaign.md
│ ├── tcbench_cli_command_datasets.md
│ ├── tcbench_cli_command_singlerun.md
│ ├── tcbench_cli_richutils.md
│ ├── tcbench_libtcdatasets.md
│ ├── tcbench_libtcdatasets_datasets_utils.md
│ ├── tcbench_libtcdatasets_mirage19_json_to_parquet.md
│ ├── tcbench_libtcdatasets_mirage22_json_to_parquet.md
│ ├── tcbench_libtcdatasets_tcbench_mirage19_generate_splits.md
│ ├── tcbench_libtcdatasets_tcbench_mirage22_generate_splits.md
│ ├── tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits.md
│ ├── tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits.md
│ ├── tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet.md
│ ├── tcbench_libtcdatasets_utmobilenet21_csv_to_parquet.md
│ ├── tcbench_modeling_aimutils.md
│ ├── tcbench_modeling_augmentation.md
│ ├── tcbench_modeling_backbone.md
│ ├── tcbench_modeling_dataprep.md
│ ├── tcbench_modeling_losses.md
│ ├── tcbench_modeling_methods.md
│ ├── tcbench_modeling_run_augmentations_at_loading.md
│ ├── tcbench_modeling_run_augmentations_at_loading_xgboost.md
│ ├── tcbench_modeling_run_campaign_augmentations_at_loading.md
│ ├── tcbench_modeling_run_campaign_augmentations_at_loading_xgboost.md
│ ├── tcbench_modeling_run_campaign_contrastive_learning_and_finetune.md
│ ├── tcbench_modeling_run_contrastive_learning_and_finetune.md
│ └── tcbench_modeling_utils.md
│ ├── cli_intro.md
│ ├── index.md
│ ├── install.md
│ ├── internals.md
│ └── overview.md
├── docs
├── .DS_Store
├── 404.html
├── about
│ └── index.html
├── arrow-right-solid.svg
├── artifacts
│ └── index.html
├── assets
│ ├── _mkdocstrings.css
│ ├── images
│ │ └── favicon.png
│ ├── javascripts
│ │ ├── bundle.83f73b43.min.js
│ │ ├── bundle.83f73b43.min.js.map
│ │ ├── glightbox.min.js
│ │ ├── lunr
│ │ │ ├── min
│ │ │ │ ├── lunr.ar.min.js
│ │ │ │ ├── lunr.da.min.js
│ │ │ │ ├── lunr.de.min.js
│ │ │ │ ├── lunr.du.min.js
│ │ │ │ ├── lunr.el.min.js
│ │ │ │ ├── lunr.es.min.js
│ │ │ │ ├── lunr.fi.min.js
│ │ │ │ ├── lunr.fr.min.js
│ │ │ │ ├── lunr.he.min.js
│ │ │ │ ├── lunr.hi.min.js
│ │ │ │ ├── lunr.hu.min.js
│ │ │ │ ├── lunr.hy.min.js
│ │ │ │ ├── lunr.it.min.js
│ │ │ │ ├── lunr.ja.min.js
│ │ │ │ ├── lunr.jp.min.js
│ │ │ │ ├── lunr.kn.min.js
│ │ │ │ ├── lunr.ko.min.js
│ │ │ │ ├── lunr.multi.min.js
│ │ │ │ ├── lunr.nl.min.js
│ │ │ │ ├── lunr.no.min.js
│ │ │ │ ├── lunr.pt.min.js
│ │ │ │ ├── lunr.ro.min.js
│ │ │ │ ├── lunr.ru.min.js
│ │ │ │ ├── lunr.sa.min.js
│ │ │ │ ├── lunr.stemmer.support.min.js
│ │ │ │ ├── lunr.sv.min.js
│ │ │ │ ├── lunr.ta.min.js
│ │ │ │ ├── lunr.te.min.js
│ │ │ │ ├── lunr.th.min.js
│ │ │ │ ├── lunr.tr.min.js
│ │ │ │ ├── lunr.vi.min.js
│ │ │ │ └── lunr.zh.min.js
│ │ │ ├── tinyseg.js
│ │ │ └── wordcut.js
│ │ └── workers
│ │ │ ├── search.6ce7567c.min.js
│ │ │ └── search.6ce7567c.min.js.map
│ └── stylesheets
│ │ ├── glightbox.min.css
│ │ ├── main.0253249f.min.css
│ │ ├── main.0253249f.min.css.map
│ │ ├── palette.06af60db.min.css
│ │ └── palette.06af60db.min.css.map
├── css
│ ├── fonts.css
│ ├── jupyter-notebook.css
│ ├── material.css
│ ├── mkdocstrings.css
│ ├── style.css
│ └── tables_style.css
├── datasets
│ ├── curation_and_metadata
│ │ └── index.html
│ ├── datasets.csv
│ ├── datasets
│ │ └── index.html
│ ├── datasets_splits
│ │ └── index.html
│ ├── guides
│ │ ├── index.html
│ │ ├── tutorial_load_datasets.ipynb
│ │ └── tutorial_load_datasets
│ │ │ └── index.html
│ ├── import
│ │ └── index.html
│ ├── index.html
│ ├── install
│ │ ├── index.html
│ │ ├── mirage19
│ │ │ └── index.html
│ │ ├── mirage22
│ │ │ └── index.html
│ │ ├── ucdavis-icdm19
│ │ │ └── index.html
│ │ └── utmobilenet21
│ │ │ └── index.html
│ ├── metadata.md.DEPRECATED
│ ├── metadata
│ │ └── index.html
│ ├── samples_count
│ │ ├── index.html
│ │ ├── mirage19
│ │ │ └── index.html
│ │ ├── mirage22
│ │ │ └── index.html
│ │ ├── ucdavis-icdm19
│ │ │ └── index.html
│ │ └── utmobilenet21
│ │ │ └── index.html
│ ├── schemas
│ │ ├── index.html
│ │ ├── mirage19
│ │ │ └── index.html
│ │ ├── mirage22
│ │ │ └── index.html
│ │ ├── ucdavis-icdm19
│ │ │ └── index.html
│ │ └── utmobilenet21
│ │ │ └── index.html
│ └── tutorial_load_parquet.ipynb
├── figs
│ ├── aim_log1.png
│ ├── aim_log2.png
│ ├── aim_log3.png
│ ├── aim_run1.png
│ ├── aim_run2.png
│ ├── aim_run3.png
│ ├── dataset_properties_mirage19.png
│ ├── dataset_properties_mirage22.png
│ ├── dataset_properties_ucdavis-icdm19.png
│ └── dataset_properties_utmobilenet21.png
├── github-mark
│ ├── github-mark-white.png
│ ├── github-mark-white.svg
│ ├── github-mark.png
│ └── github-mark.svg
├── home.js
├── index.html
├── index.md.DEPRECATED
├── install
│ └── index.html
├── main.html.DEPRECATED
├── modeling
│ ├── aim_repos
│ │ ├── aim_webui
│ │ │ └── index.html
│ │ ├── aimrepo_subcmd
│ │ │ └── index.html
│ │ └── index.html
│ ├── aim_repositories_content
│ │ └── index.html
│ ├── campaigns
│ │ └── index.html
│ ├── exploring_artifacts
│ │ └── index.html
│ ├── figs
│ │ ├── aim_home-page.png
│ │ ├── aim_log1.png
│ │ ├── aim_log2.png
│ │ ├── aim_log3.png
│ │ ├── aim_run1.png
│ │ ├── aim_run2.png
│ │ └── aim_run3.png
│ ├── index.html
│ ├── overview
│ │ └── index.html
│ └── runs
│ │ └── index.html
├── objects.inv
├── overrides
│ ├── arrow-right-solid.svg
│ ├── github-mark
│ │ ├── github-mark-white.png
│ │ ├── github-mark-white.svg
│ │ ├── github-mark.png
│ │ └── github-mark.svg
│ ├── home.html
│ ├── home.js
│ ├── main.html
│ ├── main.html.DEPRECATED
│ ├── tcbench.svg
│ └── tcbench_logo.svg
├── papers
│ ├── imc23
│ │ ├── artifacts
│ │ │ └── index.html
│ │ ├── campaigns
│ │ │ └── index.html
│ │ ├── index.html
│ │ ├── ml_artifacts
│ │ │ └── index.html
│ │ ├── notebooks
│ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb
│ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human
│ │ │ │ └── index.html
│ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human_files
│ │ │ │ └── figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png
│ │ │ ├── figure11_dropout_impact_supervised_setting.ipynb
│ │ │ ├── figure11_dropout_impact_supervised_setting
│ │ │ │ └── index.html
│ │ │ ├── figure11_dropout_impact_supervised_setting_files
│ │ │ │ └── figure11_dropout_impact_supervised_setting_15_1.png
│ │ │ ├── figure1_flowpic_example.ipynb
│ │ │ ├── figure1_flowpic_example
│ │ │ │ └── index.html
│ │ │ ├── figure1_flowpic_example_files
│ │ │ │ └── figure1_flowpic_example_8_0.png
│ │ │ ├── figure3_confusion_matrix_supervised_setting.ipynb
│ │ │ ├── figure3_confusion_matrix_supervised_setting
│ │ │ │ └── index.html
│ │ │ ├── figure3_confusion_matrix_supervised_setting_files
│ │ │ │ └── figure3_confusion_matrix_supervised_setting_5_0.png
│ │ │ ├── figure4_ucdavis_per_class_average_flowpic.ipynb
│ │ │ ├── figure4_ucdavis_per_class_average_flowpic
│ │ │ │ └── index.html
│ │ │ ├── figure4_ucdavis_per_class_average_flowpic_files
│ │ │ │ └── figure4_ucdavis_per_class_average_flowpic_12_1.png
│ │ │ ├── figure5_ucdavis_augmentations_comparison.ipynb
│ │ │ ├── figure5_ucdavis_augmentations_comparison
│ │ │ │ └── index.html
│ │ │ ├── figure5_ucdavis_augmentations_comparison_files
│ │ │ │ └── figure5_ucdavis_augmentations_comparison_6_1.png
│ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb
│ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance
│ │ │ │ └── index.html
│ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance_files
│ │ │ │ └── figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png
│ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb
│ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank
│ │ │ │ └── index.html
│ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank_files
│ │ │ │ └── figure7_augmentations_comparison_across_datasets_average_rank_8_0.png
│ │ │ ├── figure8_ucdavis_kde_on_pkts_size.ipynb
│ │ │ ├── figure8_ucdavis_kde_on_pkts_size
│ │ │ │ └── index.html
│ │ │ ├── figure8_ucdavis_kde_on_pkts_size_files
│ │ │ │ └── figure8_ucdavis_kde_on_pkts_size_10_0.png
│ │ │ ├── index.html
│ │ │ ├── miscellaneous_stats.ipynb
│ │ │ ├── miscellaneous_stats
│ │ │ │ └── index.html
│ │ │ ├── table10_ucdavis-icdm19_tukey.ipynb
│ │ │ ├── table10_ucdavis-icdm19_tukey
│ │ │ │ └── index.html
│ │ │ ├── table2_datasets_properties.ipynb
│ │ │ ├── table2_datasets_properties
│ │ │ │ └── index.html
│ │ │ ├── table3_xgboost_baseline.ipynb
│ │ │ ├── table3_xgboost_baseline
│ │ │ │ └── index.html
│ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb
│ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions
│ │ │ │ └── index.html
│ │ │ ├── table5_simclr_dropout_and_projectionlayer.ipynb
│ │ │ ├── table5_simclr_dropout_and_projectionlayer
│ │ │ │ └── index.html
│ │ │ ├── table6_simclr_other_augmentation_pairs.ipynb
│ │ │ ├── table6_simclr_other_augmentation_pairs
│ │ │ │ └── index.html
│ │ │ ├── table7_larger_trainset.ipynb
│ │ │ ├── table7_larger_trainset
│ │ │ │ └── index.html
│ │ │ ├── table8_augmentation-at-loading_on_other_datasets.ipynb
│ │ │ ├── table8_augmentation-at-loading_on_other_datasets
│ │ │ │ └── index.html
│ │ │ ├── table9_icdm_finetuning_per_class_metrics_on_human.ipynb
│ │ │ └── table9_icdm_finetuning_per_class_metrics_on_human
│ │ │ │ └── index.html
│ │ └── pytest
│ │ │ └── index.html
│ └── index.html
├── quick_tour
│ └── index.html
├── search
│ └── search_index.json
├── sitemap.xml
├── sitemap.xml.gz
├── tcbench.svg
├── tcbench
│ ├── api
│ │ ├── overview
│ │ │ └── index.html
│ │ ├── tcbench_cli_clickutils
│ │ │ └── index.html
│ │ ├── tcbench_cli_command_aimrepo
│ │ │ └── index.html
│ │ ├── tcbench_cli_command_campaign
│ │ │ └── index.html
│ │ ├── tcbench_cli_command_datasets
│ │ │ └── index.html
│ │ ├── tcbench_cli_command_singlerun
│ │ │ └── index.html
│ │ ├── tcbench_cli_richutils
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_datasets_utils
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_mirage19_json_to_parquet
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_mirage22_json_to_parquet
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_tcbench_mirage19_generate_splits
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_tcbench_mirage22_generate_splits
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet
│ │ │ └── index.html
│ │ ├── tcbench_libtcdatasets_utmobilenet21_csv_to_parquet
│ │ │ └── index.html
│ │ ├── tcbench_modeling_aimutils
│ │ │ └── index.html
│ │ ├── tcbench_modeling_augmentation
│ │ │ └── index.html
│ │ ├── tcbench_modeling_backbone
│ │ │ └── index.html
│ │ ├── tcbench_modeling_dataprep
│ │ │ └── index.html
│ │ ├── tcbench_modeling_losses
│ │ │ └── index.html
│ │ ├── tcbench_modeling_methods
│ │ │ └── index.html
│ │ ├── tcbench_modeling_run_augmentations_at_loading
│ │ │ └── index.html
│ │ ├── tcbench_modeling_run_augmentations_at_loading_xgboost
│ │ │ └── index.html
│ │ ├── tcbench_modeling_run_campaign_augmentations_at_loading
│ │ │ └── index.html
│ │ ├── tcbench_modeling_run_campaign_augmentations_at_loading_xgboost
│ │ │ └── index.html
│ │ ├── tcbench_modeling_run_campaign_contrastive_learning_and_finetune
│ │ │ └── index.html
│ │ ├── tcbench_modeling_run_contrastive_learning_and_finetune
│ │ │ └── index.html
│ │ └── tcbench_modeling_utils
│ │ │ └── index.html
│ ├── cli_intro
│ │ └── index.html
│ ├── index.html
│ ├── install
│ │ └── index.html
│ ├── internals
│ │ └── index.html
│ └── overview
│ │ └── index.html
└── tcbench_logo.svg
├── mkdocs.yml
├── notebooks
├── imc23
│ ├── LICENSE
│ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb
│ ├── figure11_dropout_impact_supervised_setting.ipynb
│ ├── figure1_flowpic_example.ipynb
│ ├── figure3_confusion_matrix_supervised_setting.ipynb
│ ├── figure3_ucdavis_augmentations_comparison.ipynb
│ ├── figure4_ucdavis_per_class_average_flowpic.ipynb
│ ├── figure5_ucdavis_augmentations_comparison.ipynb
│ ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb
│ ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb
│ ├── figure8_ucdavis_kde_on_pkts_size.ipynb
│ ├── miscellaneous_stats.ipynb
│ ├── table10_ucdavis-icdm19_tukey.ipynb
│ ├── table2_datasets_properties.ipynb
│ ├── table3_xgboost_baseline.ipynb
│ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb
│ ├── table5_simclr_dropout_and_projectionlayer.ipynb
│ ├── table6_simclr_other_augmentation_pairs.ipynb
│ ├── table7_larger_trainset.ipynb
│ ├── table8_augmentation-at-loading_on_other_datasets.ipynb
│ └── table9_icdm_finetuning_per_class_metrics_on_human.ipynb
└── tutorials
│ └── tutorial_load_parquet.ipynb
├── pyproject.toml
├── src
└── tcbench
│ ├── FIGSHARE_RESOURCES.yml
│ ├── __init__.py
│ ├── cli
│ ├── __init__.py
│ ├── clickutils.py
│ ├── command_aimrepo.py
│ ├── command_campaign.py
│ ├── command_datasets.py
│ ├── command_fetchartifacts.py
│ ├── command_singlerun.py
│ ├── main.py
│ ├── rich.theme
│ └── richutils.py
│ ├── libtcdatasets
│ ├── __init__.py
│ ├── datasets_utils.py
│ ├── mirage19_generate_splits.py
│ ├── mirage19_json_to_parquet.py
│ ├── mirage22_generate_splits.py
│ ├── mirage22_json_to_parquet.py
│ ├── resources
│ │ ├── DATASETS.yml
│ │ ├── DATASETS_FILES_MD5.yml
│ │ ├── mirage19.yml
│ │ ├── mirage22.yml
│ │ ├── ucdavis-icdm19.yml
│ │ └── utmobilenet21.yml
│ ├── ucdavis_icdm19_csv_to_parquet.py
│ ├── ucdavis_icdm19_generate_splits.py
│ ├── utmobilenet21_csv_to_parquet.py
│ └── utmobilenet21_generate_splits.py
│ └── modeling
│ ├── __init__.py
│ ├── aimutils.py
│ ├── augmentation.py
│ ├── backbone.py
│ ├── dataprep.py
│ ├── losses.py
│ ├── methods.py
│ ├── run_augmentations_at_loading.py
│ ├── run_augmentations_at_loading_xgboost.py
│ ├── run_campaign_augmentations_at_loading.py
│ ├── run_campaign_augmentations_at_loading_xgboost.py
│ ├── run_campaign_contrastive_learning_and_finetune.py
│ ├── run_contrastive_learning_and_finetune.py
│ └── utils.py
└── tests
├── conftest.py
├── test_augmentations_at_loading.py
├── test_augmentations_at_loading_xgboost.py
├── test_cli_command_campaign.py
├── test_cli_command_singlerun.py
├── test_contrastive_learning_and_finetune.py
├── test_libtcdatasets_datasets_utils.py
├── test_modeling_backbone.py
├── test_modeling_dataprep.py
└── test_modeling_methods.py
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 | *.swp
6 |
7 | # C extensions
8 | *.so
9 |
10 | # Distribution / packaging
11 | .Python
12 | build/
13 | develop-eggs/
14 | dist/
15 | downloads/
16 | eggs/
17 | .eggs/
18 | lib/
19 | lib64/
20 | parts/
21 | sdist/
22 | var/
23 | wheels/
24 | share/python-wheels/
25 | *.egg-info/
26 | .installed.cfg
27 | *.egg
28 | MANIFEST
29 |
30 | # PyInstaller
31 | # Usually these files are written by a python script from a template
32 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
33 | *.manifest
34 | *.spec
35 |
36 | # Installer logs
37 | pip-log.txt
38 | pip-delete-this-directory.txt
39 |
40 | # Unit test / coverage reports
41 | htmlcov/
42 | .tox/
43 | .nox/
44 | .coverage
45 | .coverage.*
46 | .cache
47 | nosetests.xml
48 | coverage.xml
49 | *.cover
50 | *.py,cover
51 | .hypothesis/
52 | .pytest_cache/
53 | cover/
54 |
55 | # Translations
56 | *.mo
57 | *.pot
58 |
59 | # Django stuff:
60 | *.log
61 | local_settings.py
62 | db.sqlite3
63 | db.sqlite3-journal
64 |
65 | # Flask stuff:
66 | instance/
67 | .webassets-cache
68 |
69 | # Scrapy stuff:
70 | .scrapy
71 |
72 | # Sphinx documentation
73 | docs/_build/
74 |
75 | # PyBuilder
76 | .pybuilder/
77 | target/
78 |
79 | # Jupyter Notebook
80 | .ipynb_checkpoints
81 |
82 | # IPython
83 | profile_default/
84 | ipython_config.py
85 |
86 | # pyenv
87 | # For a library or package, you might want to ignore these files since the code is
88 | # intended to run in multiple environments; otherwise, check them in:
89 | # .python-version
90 |
91 | # pipenv
92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
95 | # install all needed dependencies.
96 | #Pipfile.lock
97 |
98 | # poetry
99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | # This is especially recommended for binary packages to ensure reproducibility, and is more
101 | # commonly ignored for libraries.
102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 |
105 | # pdm
106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | # in version control.
110 | # https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 |
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 |
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 |
120 | # SageMath parsed files
121 | *.sage.py
122 |
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 |
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 |
136 | # Rope project settings
137 | .ropeproject
138 |
139 | # mkdocs documentation
140 | /site
141 |
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 |
147 | # Pyre type checker
148 | .pyre/
149 |
150 | # pytype static type analyzer
151 | .pytype/
152 |
153 | # Cython debug symbols
154 | cython_debug/
155 |
156 | # PyCharm
157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | # and can be added to the global gitignore or merged into this file. For a more nuclear
160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 |
163 | ######
164 | # EXTRAS
165 | __ATTIC__
166 | __TMP__
167 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 tcbenchstack
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include requirements.txt
2 | include src/tcbench/libtcdatasets/resources/*yml
3 | include src/tcbench/cli/rich.theme
4 | include src/tcbench/FIGSHARE_RESOURCES.yml
5 | recursive-exclude .*swp
6 | recursive-exclude *.bck
7 |
--------------------------------------------------------------------------------
/docs.material/about.md:
--------------------------------------------------------------------------------
1 | # The tcbench framework
2 |
3 | tcbench is a ML/DL framework specific for __Traffic Classification (TC)__
4 | created as research project by the AI4NET team of the Huawei Technologies
5 | research center in Paris, France.
6 |
7 | !!! info "What is Traffic Classification?"
8 |
9 | Nodes within a computer network operate by exchanging
10 | information, namely *packets*, which is regulated according
11 | to standardized protocols (e.g., HTTP for the web). So to understand
12 | the network health it is required to constantly monitor
13 | this information flow and react accordingly. For instance, one
14 | might want to prioritize certain traffic (e.g., video meeting)
15 | or block it (e.g., social media in working environment).
16 |
17 | Traffic classification is the the act of labeling an exchange of packets
18 | based on the Internet application which generated it.
19 |
20 |
21 | The academic literature is ripe with methods and proposals for TC.
22 | Yet, it is scarce of code artifacts and public datasets
23 | do not offer common conventions of use.
24 |
25 | We designed tcbench with the following goals in mind:
26 |
27 | | Goal | State of the art | tcbench |
28 | |:-----|:-----------------|:--------|
29 | |__:octicons-stack-24: Data curation__ | There are a few public datasets for TC, yet no common format/schema, cleaning process, or standard train/val/test folds. | An (opinionated) curation of datasets to create easy to use parquet files with associated train/val/test fold.|
30 | |__:octicons-file-code-24: Code__ | TC literature has no reference code base for ML/DL modeling | tcbench is [:material-github: open source](https://github.com/tcbenchstack/tcbench) with an easy to use CLI based on [:fontawesome-solid-arrow-pointer: click](https://click.palletsprojects.com/en/8.1.x/)|
31 | |__:material-monitor-dashboard: Model tracking__ | Most of ML framework requires integration with cloud environments and subscription services | tcbench uses [aimstack](https://aimstack.io/) to save on local servers metrics during training which can be later explored via its web UI or aggregated in report summaries using tcbench |
32 |
33 | ## Features and roadmap
34 |
35 | tcbench is still under development, but (as suggested by its name) ultimately aims
36 | to be a reference framework for benchmarking multiple ML/DL solutions
37 | related to TC.
38 |
39 | At the current stage, tcbench offers
40 |
41 | * Integration with 4 datasets, namely `ucdavis-icdm19`, `mirage19`, `mirage22` and `utmobilenet21`.
42 | You can use these datasets and their curated version independently from tcbench.
43 | Check out the [dataset install](/tcbench/datasets/install) process and [dataset loading tutorial](/tcbench/datasets/guides/tutorial_load_datasets).
44 |
45 | * Good support for flowpic input representation and minimal support
46 | for 1d time series (based on network packets properties) input representation.
47 |
48 | * Data augmentation functionality for flowpic input representation.
49 |
50 | * Modeling via XGBoost, vanilla DL supervision and contrastive learning (via SimCLR or SupCon).
51 |
52 | Most of the above functionalities described relate to our __:material-file-document-outline: [IMC23 paper](/tcbench/papers/imc23/)__.
53 |
54 | More exiting features including more datasets and algorithms will come in the next months.
55 |
56 | Stay tuned :wink:!
57 |
58 |
--------------------------------------------------------------------------------
/docs.material/artifacts.md:
--------------------------------------------------------------------------------
1 | The submission is associated to three types of artifacts
2 |
3 | * __:spider_web: Website__: This website serves as a primary source
4 | of documentation. It collects
5 | * Documentation about [datasets :simple-artifacthub:](../datasets/install).
6 | * Documentation about our modeling framework called :material-link-off:[`tcbench`]().
7 | * Guides on how to [run experiments :fontawesome-solid-flask:](/tcbench/modeling/campaigns/) via `tcbench`.
8 |
9 | * __:octicons-file-code-24: Code__: This includes
10 | * All source code related to :material-link-off:[`tcbench` :material-language-python:]().
11 | * A collection of [:simple-jupyter: Jupyter notebooks](../paper_tables_and_figures/reference)
12 | used for the tables and figures of the submission.
13 |
14 | * __:octicons-stack-24: Data__: This includes
15 | * The [datasets install, curation and split generation :material-rhombus-split-outline:](../datasets/install) used in our modeling
16 | * All [models and logs :material-file-multiple-outline:](/tcbench/modeling/exploring_artifacts/) generated through our modeling campaigns.
17 |
18 | ## :simple-figshare: Figshare material
19 |
20 | A key objective of our submission is to made available all artifacts
21 | to the research community.
22 | For instance, all code will be pushed to a :material-github: github repository,
23 | this website will be published on github pages or similar solutions,
24 | and data artifacts will be on a public cloud storage solution.
25 |
26 | Yet, due to double-blind policy, we temporarily uploaded our artifacts to a
27 | :simple-figshare: [figshare repository](https://figshare.com/collections/IMC23_artifacts_-_Replication_Contrastive_Learning_and_Data_Augmentation_in_Traffic_Classification_Using_a_Flowpic_Input_Representation/6849252).
28 |
29 | More specifically, on figshare you find the following tarball.
30 |
31 | * `website_documentation.tgz`: Well...if you are reading this page
32 | you already know the tarball contains this website :octicons-smiley-24:.
33 |
34 | * `code_artifacts_paper132.tgz`: All code developed. See
35 | * [Quick tour](../quick_tour) for `tcbench`.
36 | * [Table and figures](../paper_tables_and_figures/reference/) for jupyter notebooks.
37 |
38 | * `curated_datasets.tgz`: The preprocessed version of the datasets.
39 | Please see the datasets pages in this website.
40 |
41 | * `ml_artifacts.tgz`: All output data generated via modeling campaigns.
42 | For fine grained view, those can be explored via [AIM web UI](/tcbench/modeling/exploring_artifacts/#aim-web-ui)
43 | while results are generated via [:simple-jupyter: Jupyter notebooks](../paper_tables_and_figures/reference/).
44 |
45 | ## :material-package-variant: Unpack artifacts
46 |
47 | In the figshare folder we also provide a `unpack_scripts.tgz`
48 | tarball containing the following scripts
49 |
50 | ```
51 | unpack_all.sh
52 | _unpack_code_artifacts_paper132.sh
53 | _unpack_curated_datasets.sh
54 | _unpack_ml_artifacts.sh
55 | ```
56 |
57 | These are simple bash scripts to simplify the
58 | extraction and installation of all material.
59 |
60 | Use the following process
61 |
62 | 1. First of all, prepare a python virtual environment, for example via :simple-anaconda: conda
63 | ```
64 | conda create -n tcbench python=3.10 pip
65 | conda activate tcbench
66 | ```
67 |
68 | 2. Download all figshare tarballs in the same folder and run
69 | ```
70 | tar -xzvf unpack_script.tgz
71 | bash ./unpack_all.sh
72 | ```
73 |
--------------------------------------------------------------------------------
/docs.material/css/fonts.css:
--------------------------------------------------------------------------------
1 | .md-typeset code,
2 | .md-typeset kbd,
3 | .md-typeset pre {
4 | font-feature-settings: "kern", "liga";
5 | font-variant-ligatures: normal;
6 | }
7 |
8 | :root{
9 | --md-text-font:"Roboto";
10 | --md-code-font:""
11 | }
12 |
--------------------------------------------------------------------------------
/docs.material/css/jupyter-notebook.css:
--------------------------------------------------------------------------------
1 | .jp-RenderedHTMLCommon p {
2 | margin: 0pt;
3 | }
4 |
5 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
6 | display: none;
7 | }
8 |
9 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
10 | display: none;
11 | }
12 |
13 | .jupyter-wrapper .jp-OutputArea-output pre {
14 | border-left: solid 5px #e0e0e0;
15 | padding-left: 5pt;
16 | }
17 |
--------------------------------------------------------------------------------
/docs.material/css/material.css:
--------------------------------------------------------------------------------
1 | /* More space at the bottom of the page. */
2 | .md-main__inner {
3 | margin-bottom: 1.5rem;
4 | }
5 |
--------------------------------------------------------------------------------
/docs.material/css/mkdocstrings.css:
--------------------------------------------------------------------------------
1 | /* Indentation. */
2 | div.doc-contents:not(.first) {
3 | padding-left: 25px;
4 | border-left: 4px solid rgba(230, 230, 230);
5 | margin-bottom: 80px;
6 | }
7 |
8 | /* Avoid breaking parameters name, etc. in table cells. */
9 | td code {
10 | word-break: normal !important;
11 | }
12 |
--------------------------------------------------------------------------------
/docs.material/css/style.css:
--------------------------------------------------------------------------------
1 | /* Mark external links as such (also in nav) */
2 | a.external:hover::after, a.md-nav__link[href^="https:"]:hover::after {
3 | /* https://primer.style/octicons/link-external-16 */
4 | background-image: url('data:image/svg+xml,');
5 | height: 0.8em;
6 | width: 0.8em;
7 | margin-left: 0.2em;
8 | content: ' ';
9 | display: inline-block;
10 | }
11 |
12 | /* More space at the bottom of the page */
13 | .md-main__inner {
14 | margin-bottom: 1.5rem;
15 | }
16 |
--------------------------------------------------------------------------------
/docs.material/css/tables_style.css:
--------------------------------------------------------------------------------
1 | th, td {
2 | border: 1px solid var(--md-typeset-table-color);
3 | border-spacing: 0;
4 | border-bottom: none;
5 | border-left: none;
6 | border-top: none;
7 | }
8 |
9 | th {
10 | background:var(--md-primary-fg-color);
11 | color:white;
12 | }
13 |
14 | .md-typeset table:not([class]) th {
15 | font-weight: 200;
16 | }
17 |
18 | .md-typeset__table {
19 | line-height: 1;
20 | }
21 |
22 | .md-typeset__table table:not([class]) {
23 | font-size: .74rem;
24 | border-right: none;
25 | }
26 |
27 | .md-typeset__table table:not([class]) td,
28 | .md-typeset__table table:not([class]) th {
29 | padding: 9px;
30 | }
31 |
32 | /* light mode alternating table bg colors */
33 | .md-typeset__table tr:nth-child(2n) {
34 | background-color: #f8f8f8;
35 | }
36 |
37 | /* dark mode alternating table bg colors */
38 | [data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) {
39 | background-color: hsla(var(--md-hue),25%,25%,1)
40 | }
41 |
--------------------------------------------------------------------------------
/docs.material/datasets/datasets.csv:
--------------------------------------------------------------------------------
1 | Name,Classes, PDF, Data, Code, Auto-download
2 | ucdavis-icdm19,5,[pdf](https://arxiv.org/pdf/1812.09761.pdf), [data](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), [code](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), :octicons-x-12:
3 | mirage19, 20, [pdf](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-2019.html), -, :heavy_check_mark:
4 | mirage22, 9, [pdf](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html), -, :heavy_check_mark:
5 | utmobilenet21, 17, [pdf](https://ieeexplore.ieee.org/abstract/document/9490678/), [data](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), [code](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), :octicons-x-12:
6 |
--------------------------------------------------------------------------------
/docs.material/datasets/datasets_splits.md:
--------------------------------------------------------------------------------
1 | The splits described here are specific for our submission
2 | and the aim to replicate the previous IMC22 paper.
3 |
4 |
5 | ### ucdavis-icdm19
6 |
7 | Differently from the other datasets inhere described,
8 | `ucdavis-icdm19` does NOT require any filtering/adaptation
9 | after transforming the original CSV into a monolithic parquet.
10 |
11 | The testing partition are also predefined ("human" and "script").
12 |
13 | We need however to define splits of 100 samples per class
14 | for modeling. To do so we perform a random shuffle of
15 | the data and generate 5 non overlapping groups of 100 samples.
16 |
17 | ```
18 | python datasets/generate_splits.py --config config.yml
19 | ```
20 |
21 | ???+ note "output"
22 | ```
23 | loading: datasets/ucdavis-icdm19/ucdavis-icdm19.parquet
24 | saving: datasets/ucdavis-icdm19/train_split_0.parquet
25 | saving: datasets/ucdavis-icdm19/train_split_1.parquet
26 | saving: datasets/ucdavis-icdm19/train_split_2.parquet
27 | saving: datasets/ucdavis-icdm19/train_split_3.parquet
28 | saving: datasets/ucdavis-icdm19/train_split_4.parquet
29 | loading: datasets/ucdavis-icdm19/ucdavis-icdm19.parquet
30 | saving: datasets/ucdavis-icdm19/test_split_human.parquet
31 | saving: datasets/ucdavis-icdm19/test_split_script.parquet
32 | ```
33 |
34 |
--------------------------------------------------------------------------------
/docs.material/datasets/guides/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: material/book-outline
3 | ---
4 |
5 | # Guides
6 |
7 | [:simple-jupyter: Datasets loading](/tcbench/datasets/guides/tutorial_load_datasets): A jupyter notebook
8 | showing the APIs used loading the parquet files composing a dataset.
9 |
--------------------------------------------------------------------------------
/docs.material/datasets/import.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: material/cloud-download-outline
3 | title: Import
4 | ---
5 |
6 | # Import curated datasets
7 |
8 | The `datasets` command offers also the option
9 | to import a pre-computed curation of datasets.
10 |
11 | This is
12 |
13 | * To avoid spending computation.
14 | Some of the preprocessing requires ingenuity and
15 | multiprocessing/multicore architecture.
16 |
17 | * Further strength replicability (although
18 | the curation process of tcbench is deterministic).
19 |
20 | The [datasets summary table](/tcbench/datasets/#table-datasets-properties) indicates that the
21 | not all datasets have the curated data already available.
22 | This is because some datasets (namely MIRAGE) has
23 | tighter licensing. For these datasets
24 | please refer to the related installation page.
25 |
26 | ## The `import` subcommand
27 |
28 | For datasets which licensing allows to redistribute
29 | modified version, the curated data is stored
30 | in a public [:simple-figshare: figshare collection](https://figshare.com/collections/IMC23_artifacts_-_Replication_Contrastive_Learning_and_Data_Augmentation_in_Traffic_Classification_Using_a_Flowpic_Input_Representation/6849252).
31 |
32 | You can manually fetch the datasets from the collection or use
33 | automate their installation with the `datasets import` subcommand.
34 |
35 | ```
36 | tcbench datasets import --name ucdavis-icdm19
37 | ```
38 |
39 | !!! info Output
40 | ```
41 | Downloading... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 554.2 MB / 554.2 MB eta 0:00:00
42 | opening: /tmp/tmpb586lqhh/42438621
43 |
44 | Files installed
45 | Datasets
46 | └── ucdavis-icdm19
47 | └── 📁 preprocessed/
48 | ├── ucdavis-icdm19.parquet
49 | ├── LICENSE
50 | └── 📁 imc23/
51 | ├── test_split_human.parquet
52 | ├── test_split_script.parquet
53 | ├── train_split_0.parquet
54 | ├── train_split_1.parquet
55 | ├── train_split_2.parquet
56 | ├── train_split_3.parquet
57 | └── train_split_4.parquet
58 | ```
59 |
60 |
61 | Notice that `installed` is not set. Indeed
62 | the prepared curated datasets do NOT repack
63 | the original datasets, just the preprocessed ones
64 | (see the [meta-data](/tcbench/datasets/metadata/#samples-count-reports) page).
65 |
66 | You can also import the curated data by downloading the individual
67 | archives from figshare and use the `--archive` option
68 |
69 | ```
70 | tcbench datasets import \
71 | --name ucdavis-icdm19 \
72 | --archive
73 | ```
74 |
75 | !!! warning ":simple-figshare: Figshare versioning"
76 |
77 | Figshare updates the version of a published entry for any modification
78 | to any of the elements related to the entry (including changes to
79 | description).
80 |
81 | tcbench is configured to automatically fetch the latest version of
82 | the curated datasets. But if you download them manually make
83 | sure to download the latest versions
84 |
85 |
86 | ## The `delete` subcommand
87 |
88 | You can use the `delete` subcommand to remove installed/imported datasets.
89 |
90 | For instance, continuing the example above
91 |
92 | ```
93 | tcbench datasets delete --name ucdavis-icdm19
94 | ```
95 |
96 | ...now `info` stats all data for `ucdavis-icdm19` has been removed
97 |
98 | ```
99 | tcbench datasets info --name ucdavis-icdm19
100 | ```
101 | !!! info "Output"
102 | ```
103 | Datasets
104 | └── ucdavis-icdm19
105 | └── 🚩 classes: 5
106 | 🔗 paper_url: https://arxiv.org/pdf/1812.09761.pdf
107 | 🔗 website: https://github.com/shrezaei/Semi-supervised-Learning-QUIC-
108 | 🔗 data: https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE
109 | 🔗 curated data: https://figshare.com/ndownloader/files/42437043
110 | ➕ curated data MD5: 9828cce0c3a092ff19ed77f9e07f317c
111 | 📁 installed: None
112 | 📁 preprocessed: None
113 | 📁 data splits: None
114 | ```
115 |
--------------------------------------------------------------------------------
/docs.material/datasets/index.md:
--------------------------------------------------------------------------------
1 | # Datasets
2 |
3 | TCBench supports the following *public* traffic classification datasets
4 |
5 | ##### Table : Datasets properties
6 | | Name | Applications | Links | License | Our curation |
7 | |:----:|:------------:|:-----:|:-------:|:------------:|
8 | |[`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)|5|[:fontawesome-regular-file-pdf:](https://arxiv.org/pdf/1812.09761.pdf)[:material-package-down:](https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE)[:material-github:](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-)| [:material-creative-commons:](https://creativecommons.org/licenses/by/4.0/) | [:simple-figshare:](https://figshare.com/articles/dataset/curated_datasets_ucdavisicdm19_tgz/23538141/1) |
9 | |[`mirage19`](/tcbench/datasets/install/mirage19/)|20|[:fontawesome-regular-file-pdf:](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf)[:material-package-down:](https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-2019_traffic_dataset_downloadable_v2.tar.gz)[:material-web:](https://traffic.comics.unina.it/mirage/mirage-2019.html)| [:material-creative-commons: NC-ND](http://creativecommons.org/licenses/by-nc-nd/4.0/) | - |
10 | |[`mirage22`](/tcbench/datasets/install/mirage22/)|9|[:fontawesome-regular-file-pdf:](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf)[:material-package-down:](https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-COVID-CCMA-2022.zip)[:material-web:](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html)| [:material-creative-commons: NC-ND](http://creativecommons.org/licenses/by-nc-nd/4.0/) | - |
11 | |[`utmobilenet21`](/tcbench/datasets/install/utmobilenet21/)|17|[:fontawesome-regular-file-pdf:](https://ieeexplore.ieee.org/abstract/document/9490678/)[:material-package-down:](https://github.com/YuqiangHeng/UTMobileNetTraffic2021)[:material-github:](https://github.com/YuqiangHeng/UTMobileNetTraffic2021)| [:simple-gnu: GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) | [:simple-figshare:](https://figshare.com/articles/dataset/curated_datasets_utmobilenet21_tgz/23648703/1) |
12 |
13 | At a glance, these datasets
14 |
15 | * Are collections of either *CSV or JSON* files.
16 |
17 | * Are reporting individual *packet level information or per-flow time series* and metrics.
18 |
19 | * May have been organized in subfolders, namely *partitions*,
20 | to reflect the related measurement campaign (see `ucdavis-icdm19`, `utmobilenet21`).
21 |
22 | * May have file names carrying semantic.
23 |
24 | * May require preprocessing to remove "background" noise, i.e.,
25 | traffic unrelated to a target application (see `mirage19` and `mirage22`).
26 |
27 | * Do not have reference train/validation/test splits.
28 |
29 | In other words, these datasets need to be *curated*
30 | to be used.
31 |
32 | !!! tip "Important"
33 |
34 | The integration of these datasets in tcbench does not break
35 | the original licensing of the data nor it breaks their ownership.
36 | Rather, the integration aims at easing the access to these dataset.
37 | We thus encourage researchers and practitioners interesting in
38 | using these datasets to cite the original publications
39 | (see links in the table above).
40 |
41 | ## Terminology
42 |
43 | When describing datasets and related processing we
44 | use the following conventions:
45 |
46 | * A __partition__ is a set of samples
47 | pre-defined by the authors of the dataset.
48 | For instance, a partition can relate to a
49 | specific set of samples to use for training/test
50 | (see [`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)).
51 |
52 | * A __split__ is a set of indexes of samples
53 | that need to be used for train/validation/test.
54 |
55 | * An __unfiltered__ dataset corresponds a
56 | monolithic parquet files containing the
57 | original raw data of a dataset (no filtering
58 | is applied).
59 |
60 | * A __curated__ dataset is generated
61 | processing the unfiltered parquet
62 | to clean noise, remove small flows, etc.,
63 | and each dataset have slightly different
64 | curation rules.
65 |
--------------------------------------------------------------------------------
/docs.material/datasets/install/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: material/arrow-down-bold-box
3 | ---
4 |
5 | # Datasets installation
6 |
7 | Dataset installation is triggered with the `datasets install` subcommand
8 |
9 | ```
10 | tcbench datasets install --help
11 | ```
12 |
13 | !!! info "Output"
14 | ```
15 | Usage: tcbench datasets install [OPTIONS]
16 |
17 | Install a dataset.
18 |
19 | ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
20 | │ * --name -n [ucdavis-icdm19|utmobilenet21|mirage19|mirage22] Dataset to install. [required] │
21 | │ --input-folder -i PATH Folder where to find pre-downloaded tarballs. │
22 | │ --help Show this message and exit. │
23 | ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
24 | ```
25 |
26 | The raw data of the datasets is either hosted on websites
27 | or cloud environments. The automatic download from
28 | those locations is available only for some of the datasets.
29 |
30 | | Name | Auto download |
31 | |:----:|:-------------:|
32 | |[`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)| :octicons-x-24: |
33 | |[`mirage19`](/tcbench/datasets/install/mirage19/)| :material-check: |
34 | |[`mirage22`](/tcbench/datasets/install/mirage22/)| :material-check: |
35 | |[`utmobilenet21`](/tcbench/datasets/install/utmobilenet21/)| :octicons-x-24: |
36 |
37 | If auto download is not possible, to install the dataset
38 | you need to manually fetch the related archives, place them
39 | in a folder, e.g., `/download`, and provide the `--input-folder`
40 | option when triggering installation.
41 |
42 | When installing a dataset, `tcbench` also
43 | shows two types of reports as formatted tables.
44 |
45 | * __Samples count__: This tables collect
46 | the number of samples (i.e., flows)
47 | available.
48 |
49 | * __Stats__: The curation process
50 | can filter out flows (e.g., based
51 | on a minum number of packets
52 | or remove classes without a minimum
53 | number of flows). As such, when
54 | installing, `tcbench` is showing
55 | general stats (mean, std, percentiles)
56 | about number of packets
57 | for each flow across classes.
58 |
59 | Please check the specific install page for each dataset for more details.
60 |
61 |
62 | ## Datasets deletion
63 |
64 | The datasets files are installed within the
65 | python environment where tcbench is installed.
66 |
67 | You can delete a dataset using the following command
68 |
69 | ```
70 | tcbench datasets delete --name
71 | ```
72 |
73 |
--------------------------------------------------------------------------------
/docs.material/datasets/samples_count/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: octicons/number-24
3 | ---
4 |
5 | # Samples count report
6 |
7 | An important dataset property to keep an eye
8 | on when aiming for modeling is the number of
9 | samples for each class available in the datasets.
10 |
11 | You can easily recover this using the `datasets samples-count` subcommand.
12 |
13 | For instance,
14 | the following command computes the samples count for the *unfitered*
15 | version of the [`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19) dataset.
16 |
17 | ```
18 | tcbench datasets samples-count --name ucdavis-icdm19
19 | ```
20 |
21 | !!! note "Output"
22 | ```
23 | unfiltered
24 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓
25 | ┃ partition ┃ app ┃ samples ┃
26 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩
27 | │ pretraining │ google-doc │ 1221 │
28 | │ │ google-drive │ 1634 │
29 | │ │ google-music │ 592 │
30 | │ │ google-search │ 1915 │
31 | │ │ youtube │ 1077 │
32 | │ │ __total__ │ 6439 │
33 | ├─────────────────────────────┼───────────────┼─────────┤
34 | │ retraining-human-triggered │ google-doc │ 15 │
35 | │ │ google-drive │ 18 │
36 | │ │ google-music │ 15 │
37 | │ │ google-search │ 15 │
38 | │ │ youtube │ 20 │
39 | │ │ __total__ │ 83 │
40 | ├─────────────────────────────┼───────────────┼─────────┤
41 | │ retraining-script-triggered │ google-doc │ 30 │
42 | │ │ google-drive │ 30 │
43 | │ │ google-music │ 30 │
44 | │ │ google-search │ 30 │
45 | │ │ youtube │ 30 │
46 | │ │ __total__ │ 150 │
47 | └─────────────────────────────┴───────────────┴─────────┘
48 | ```
49 |
50 | While to obtain the breakdown of the first train split
51 |
52 | ```
53 | tcbench datasets samples-count --name ucdavis-icdm19 --split 0
54 | ```
55 |
56 | !!! note "Output"
57 | ```
58 | filtered, split: 0
59 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
60 | ┃ app ┃ samples ┃
61 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
62 | │ google-doc │ 100 │
63 | │ google-drive │ 100 │
64 | │ google-music │ 100 │
65 | │ google-search │ 100 │
66 | │ youtube │ 100 │
67 | ├───────────────┼─────────┤
68 | │ __total__ │ 500 │
69 | └───────────────┴─────────┘
70 | ```
71 |
72 | ...or the `human` test split
73 |
74 | ```
75 | tcbench datasets samples-count --name ucdavis-icdm19 --split human
76 | ```
77 |
78 | !!! note "Output"
79 | ```
80 | filtered, split: human
81 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
82 | ┃ app ┃ samples ┃
83 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
84 | │ youtube │ 20 │
85 | │ google-drive │ 18 │
86 | │ google-doc │ 15 │
87 | │ google-music │ 15 │
88 | │ google-search │ 15 │
89 | ├───────────────┼─────────┤
90 | │ __total__ │ 83 │
91 | └───────────────┴─────────┘
92 | ```
93 |
--------------------------------------------------------------------------------
/docs.material/datasets/samples_count/ucdavis-icdm19.md:
--------------------------------------------------------------------------------
1 | # `ucdavis-icdm19`
2 |
3 | Below we report the samples count for each version of the dataset.
4 |
5 | !!! tip "Semantic of the splits"
6 |
7 | The split available for this datasets relate to our [:material-file-document-outline: IMC23 paper](/tcbench/papers/imc23).
8 |
9 | ### unfiltered
10 |
11 | The unfitered version contains all data before curation.
12 |
13 | ```
14 | tcbench datasets samples-count --name ucdavis-icdm19
15 | ```
16 |
17 | !!! note "Output"
18 | ```
19 | unfiltered
20 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓
21 | ┃ partition ┃ app ┃ samples ┃
22 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩
23 | │ pretraining │ google-doc │ 1221 │
24 | │ │ google-drive │ 1634 │
25 | │ │ google-music │ 592 │
26 | │ │ google-search │ 1915 │
27 | │ │ youtube │ 1077 │
28 | │ │ __total__ │ 6439 │
29 | ├─────────────────────────────┼───────────────┼─────────┤
30 | │ retraining-human-triggered │ google-doc │ 15 │
31 | │ │ google-drive │ 18 │
32 | │ │ google-music │ 15 │
33 | │ │ google-search │ 15 │
34 | │ │ youtube │ 20 │
35 | │ │ __total__ │ 83 │
36 | ├─────────────────────────────┼───────────────┼─────────┤
37 | │ retraining-script-triggered │ google-doc │ 30 │
38 | │ │ google-drive │ 30 │
39 | │ │ google-music │ 30 │
40 | │ │ google-search │ 30 │
41 | │ │ youtube │ 30 │
42 | │ │ __total__ │ 150 │
43 | └─────────────────────────────┴───────────────┴─────────┘
44 | ```
45 |
46 |
47 | ### First training split
48 |
49 | ```
50 | tcbench datasets samples-count --name ucdavis-icdm19 --split 0
51 | ```
52 |
53 | !!! note "Output"
54 | ```
55 | filtered, split: 0
56 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
57 | ┃ app ┃ samples ┃
58 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
59 | │ google-doc │ 100 │
60 | │ google-drive │ 100 │
61 | │ google-music │ 100 │
62 | │ google-search │ 100 │
63 | │ youtube │ 100 │
64 | ├───────────────┼─────────┤
65 | │ __total__ │ 500 │
66 | └───────────────┴─────────┘
67 | ```
68 |
69 | ### `human` test split
70 |
71 | This is equivalent to the `human` partition of the unfiltered dataset.
72 |
73 | ```
74 | tcbench datasets samples-count --name ucdavis-icdm19 --split human
75 | ```
76 |
77 | !!! note "Output"
78 | ```
79 | filtered, split: human
80 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
81 | ┃ app ┃ samples ┃
82 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
83 | │ youtube │ 20 │
84 | │ google-drive │ 18 │
85 | │ google-doc │ 15 │
86 | │ google-music │ 15 │
87 | │ google-search │ 15 │
88 | ├───────────────┼─────────┤
89 | │ __total__ │ 83 │
90 | └───────────────┴─────────┘
91 | ```
92 |
93 | ### `script` test split
94 |
95 | This is equivalent to the `script` partition of the unfiltered dataset.
96 |
97 | ```
98 | tcbench datasets samples-count --name ucdavis-icdm19 --split script
99 | ```
100 |
101 | !!! note "Output"
102 | ```
103 | filtered, split: script
104 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓
105 | ┃ app ┃ samples ┃
106 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩
107 | │ google-doc │ 30 │
108 | │ google-drive │ 30 │
109 | │ google-music │ 30 │
110 | │ google-search │ 30 │
111 | │ youtube │ 30 │
112 | ├───────────────┼─────────┤
113 | │ __total__ │ 150 │
114 | └───────────────┴─────────┘
115 | ```
116 |
--------------------------------------------------------------------------------
/docs.material/datasets/samples_count/utmobilenet21.md:
--------------------------------------------------------------------------------
1 | # `utmobilenet21`
2 |
3 | Below we report the samples count for each version of the dataset.
4 |
5 | !!! tip "Semantic of the splits"
6 |
7 | The split available for this datasets relate to our [:material-file-document-outline: IMC23 paper](/tcbench/papers/imc23).
8 |
9 | ### unfiltered
10 |
11 | The unfitered version contains all data before curation.
12 |
13 | ```
14 | tcbench datasets samples-count --name utmobilenet21
15 | ```
16 |
17 | !!! note "Output"
18 | ```
19 | unfiltered
20 | ┏━━━━━━━━━━━━━━┳━━━━━━━━━┓
21 | ┃ app ┃ samples ┃
22 | ┡━━━━━━━━━━━━━━╇━━━━━━━━━┩
23 | │ youtube │ 5591 │
24 | │ reddit │ 4370 │
25 | │ google-maps │ 4347 │
26 | │ spotify │ 2550 │
27 | │ netflix │ 2237 │
28 | │ pinterest │ 2165 │
29 | │ hulu │ 1839 │
30 | │ instagram │ 1778 │
31 | │ dropbox │ 1752 │
32 | │ facebook │ 1654 │
33 | │ twitter │ 1494 │
34 | │ gmail │ 1133 │
35 | │ pandora │ 949 │
36 | │ messenger │ 837 │
37 | │ google-drive │ 803 │
38 | │ hangout │ 720 │
39 | │ skype │ 159 │
40 | ├──────────────┼─────────┤
41 | │ __total__ │ 34378 │
42 | └──────────────┴─────────┘
43 | ```
44 |
45 |
46 | ### First training split
47 |
48 | ```
49 | tcbench datasets samples-count --name utmobilenet21 --split 0
50 | ```
51 |
52 | !!! note "Output"
53 | ```
54 | min_pkts: 10, split: 0
55 | ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓
56 | ┃ app ┃ train_samples ┃ val_samples ┃ test_samples ┃ all_samples ┃
57 | ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩
58 | │ youtube │ 2021 │ 225 │ 250 │ 2496 │
59 | │ google-maps │ 1456 │ 162 │ 180 │ 1798 │
60 | │ hulu │ 947 │ 105 │ 117 │ 1169 │
61 | │ reddit │ 661 │ 73 │ 82 │ 816 │
62 | │ spotify │ 538 │ 60 │ 66 │ 664 │
63 | │ netflix │ 391 │ 44 │ 48 │ 483 │
64 | │ pinterest │ 353 │ 39 │ 44 │ 436 │
65 | │ twitter │ 296 │ 33 │ 36 │ 365 │
66 | │ instagram │ 222 │ 25 │ 27 │ 274 │
67 | │ hangout │ 206 │ 23 │ 25 │ 254 │
68 | │ dropbox │ 193 │ 21 │ 24 │ 238 │
69 | │ pandora │ 162 │ 18 │ 20 │ 200 │
70 | │ facebook │ 111 │ 12 │ 14 │ 137 │
71 | │ google-drive │ 105 │ 12 │ 13 │ 130 │
72 | ├──────────────┼───────────────┼─────────────┼──────────────┼─────────────┤
73 | │ __total__ │ 7662 │ 852 │ 946 │ 9460 │
74 | └──────────────┴───────────────┴─────────────┴──────────────┴─────────────┘
75 | ```
76 |
--------------------------------------------------------------------------------
/docs.material/datasets/schemas/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: material/table
3 | title: Schemas
4 | ---
5 |
6 | # Datasets schemas
7 |
8 | Despite the [curation](/tcbench/datasets/curation_and_metadata/), datasets can have intrinsically
9 | different schemas.
10 |
11 | You can investigate those on the command line via
12 | the `datasets schema` sub-command.
13 |
14 | ```
15 | tcbench datasets schema --help
16 |
17 | Usage: tcbench datasets schema [OPTIONS]
18 |
19 | Show datasets schemas
20 |
21 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
22 | │ --name -n [ucdavis-icdm19|utmobilenet21|mirage19|mirage22] Dataset to install │
23 | │ --type -t [unfiltered|filtered|splits] Schema type (unfiltered: original raw data; filtered: │
24 | │ curated data; splits: train/val/test splits) │
25 | │ --help Show this message and exit. │
26 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
27 | ```
28 |
29 | Beside the dataset name `--name`, the selection
30 | of the schema is simplified via a single parameter `--type`
31 | which matches the parquet files as follows
32 |
33 | * `"unfiltered"` corresponds to the monolithic
34 | before any filtering (i.e., the files under `/preprocessed`)
35 |
36 | * `"filtered"` corresponds to the filtered
37 | version of the monolithic files (i.e., the files
38 | having `minpkts` in the filename).
39 |
40 | * `"splits"` corresponds to the split files
41 | (i.e., the files having `xyz_split.parquet`
42 | in the filename).
43 |
44 |
45 |
--------------------------------------------------------------------------------
/docs.material/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log1.png
--------------------------------------------------------------------------------
/docs.material/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log2.png
--------------------------------------------------------------------------------
/docs.material/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log3.png
--------------------------------------------------------------------------------
/docs.material/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run1.png
--------------------------------------------------------------------------------
/docs.material/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run2.png
--------------------------------------------------------------------------------
/docs.material/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run3.png
--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_mirage19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_mirage19.png
--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_mirage22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_mirage22.png
--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_ucdavis-icdm19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_ucdavis-icdm19.png
--------------------------------------------------------------------------------
/docs.material/figs/dataset_properties_utmobilenet21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_utmobilenet21.png
--------------------------------------------------------------------------------
/docs.material/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: TCBench
3 | template: home.html
4 | ---
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/docs.material/index.md.DEPRECATED:
--------------------------------------------------------------------------------
1 | This website documents code and data artifacts related to the IMC23 submission #132 titled
2 |
3 | !!! quote ""
4 | __Contrastive Learning and Data Augmentation in Traffic Classification via a Flowpic Representation__
5 | *Replicating and Reproducing “A Few Shots Traffic Classification with mini-FlowPic Augmentations”
6 | from IMC’22*
7 |
8 | Our submission investigates the role of data
9 | augmentation by using both supervised
10 | and contrastive learning techniques
11 | across [4 datasets](datasets/install).
12 |
13 | It replicates and reproduces the following paper
14 | from the IMC22 program
15 |
16 |
17 | ```
18 | @inproceedings{10.1145/3517745.3561436,
19 | author = {Horowicz, Eyal and Shapira, Tal and Shavitt, Yuval},
20 | title = {A Few Shots Traffic Classification with Mini-FlowPic Augmentations},
21 | year = {2022},
22 | isbn = {9781450392594},
23 | publisher = {Association for Computing Machinery},
24 | address = {New York, NY, USA},
25 | url = {https://doi.org/10.1145/3517745.3561436},
26 | doi = {10.1145/3517745.3561436},
27 | booktitle = {Proceedings of the 22nd ACM Internet Measurement Conference},
28 | pages = {647–654},
29 | numpages = {8},
30 | location = {Nice, France},
31 | series = {IMC '22}
32 | }
33 | ```
34 |
35 | We adopt the same traffic representation used in :material-file-document-outline:`imc22-paper`,
36 | namely a Flowpic -- a summarization of the packet size time series of a flow by means of
37 | frequency histograms extracted from consecutive time windows of the flow --
38 | applied on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
39 |
40 | In the first part of the submission we investigate how augmentations
41 | affect classification performance -- the study considers 3 image transformations (*rotation,
42 | color jitter, horizontal flip*) and 3 time series transformations (*time shift, packet drop, change rtt*)
43 | applied to packets timestamps -- when used either in a fully supervised setting or via
44 | contrastive learning.
45 |
46 | !!! info "Key takeaways from reproducibility"
47 | 1. We can only partially reproduce the results from :material-file-document-outline:`imc22-paper` on [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
48 | Specifically, we uncover a data shift present in the dataset itself which justifies our results;
49 | yet, we cannot comment on why this was not detected in :material-file-document-outline:`imc22-paper`.
50 |
51 | 2. Simply based on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) dataset, and differently
52 | from the argumentation presented in :material-file-document-outline:`imc22-paper`,
53 | we do not find statistical significance differences across the different augmentations.
54 |
55 | 3. Contrastive learning can help to "bootstrap" a model in an unsupervised fashion, yet
56 | relying on more samples is beneficial to boost performance.
57 |
58 | Then, in the second part of the submission we replicate the
59 | analysis testing the same 6 augmentations across 3 other datasets.
60 |
61 | !!! info "Key takeaways from replicability"
62 | Using multiple datasets allow to confirm the argument of the :material-file-document-outline:`imc22-paper`, i.e.,
63 | *Change RTT* augmentation used in [`ucdavis-icdm19`](datasets/#ucdavis-icdm19)
64 | is superior to the alternative transformations presented in the paper.
65 |
66 |
67 | ## Website conventions
68 |
69 | * :material-file-document-outline:`imc22-paper` is used to the reference the replicated/reproduced paper.
70 |
71 | * WIP (Work in progress) and :construction: suggest documentation that is incomplete or not yet available.
72 |
73 | * :material-link-off: suggests a link is expected to be added but is not yet available.
74 |
--------------------------------------------------------------------------------
/docs.material/install.md:
--------------------------------------------------------------------------------
1 | # Install and config
2 |
3 | ## Download code and artifacts
4 |
5 | If you see this documentation it means
6 | you downloaded the file from figshare so you already have the code
7 | in your hand :)
8 |
9 | !!! note
10 | It is our intent to push all the code into a proper repository
11 |
12 |
13 | ## Configure a python environment :material-language-python:
14 |
15 | We first create a `conda` environment to install
16 | all required dependencies
17 |
18 | ```
19 | conda create -n replicating-imc22-flowpic python=3.10 pip
20 | conda activate replicating-imc22-flowpic
21 | python -m pip install -r ./requirements.txt
22 | ```
23 |
24 | The code artifacts are also a python package
25 | that can be installed inside the environment.
26 | From inside `/replicate_imc22_flowpic` run
27 |
28 | ```
29 | python -m pip install .
30 | ```
31 |
--------------------------------------------------------------------------------
/docs.material/modeling/aim_repos/aim_webui.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: AIM Web UI
3 | icon: material/monitor-dashboard
4 | ---
5 |
6 | # AIM Web UI
7 |
8 | AIM web interface is quite intuitive and
9 | the official documentation already provides
10 | a [general purpose tutorial](https://aimstack.readthedocs.io/en/latest/ui/overview.html).
11 |
12 | In this mini guide we limit to showcase a basic set
13 | of operations to navigate the ML artifacts using
14 | some artifacts from our [IMC23](/tcbench/papers/imc23) paper.
15 |
16 | To replicate the following, make sure you [installed
17 | the needed artifacts](/tcbench/papers/imc23/artifacts/#downloading-artifacts).
18 |
19 | ```
20 | aim up --repo notebooks/imc23/campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/
21 | ```
22 |
23 | !!! info "Output"
24 | ```
25 | Running Aim UI on repo ``
26 | Open http://127.0.0.1:43800
27 | Press Ctrl+C to exit
28 | ```
29 |
30 | Run `aim up --help` for more options (e.g., specifying a different port or hostname).
31 |
32 | When visiting the URL reported in the output
33 | you land on the home page of the AIM repository.
34 |
35 | This collects a variety of aggregate metrics
36 | and track activity over time.
37 | Hence, in our scenario
38 | the home page of the ML artifacts are mostly empty
39 | because all campaigns were generated in a specific moment in time.
40 |
41 | [![aim-home-page]][aim-home-page]
42 |
43 | [aim-home-page]: ../../figs/aim_home-page.png
44 |
45 | The left side bar allows switch the view.
46 | In particular, "Runs" show a tabular
47 | view of the runs collected in the repository.
48 |
49 | [![aim-run1]][aim-run1]
50 |
51 | [aim-run1]: ../../figs/aim_run1.png
52 |
53 | From the view you can see the hash of each run
54 | and scrolling horizontally you can glance
55 | over the metadata stored for each run.
56 |
57 | [![aim-run2]][aim-run2]
58 |
59 | [aim-run2]: ../../figs/aim_run2.png
60 |
61 | The search bar on the top of the page
62 | allows to filter runs.
63 | It accept python expression bounded
64 | to a `run` entry point.
65 |
66 | For instance, in the following example we filter
67 | one specific run based on hyper parameters.
68 |
69 | [![aim-run3]][aim-run3]
70 |
71 | [aim-run3]: ../../figs/aim_run3.png
72 |
73 |
74 | !!! tip "Using the search box"
75 |
76 | The search box accept python expressions and `run.hparams`
77 | is a dictionary of key-value pairs related to the different runs.
78 |
79 | As from the example, you can use the traditional python
80 | syntax of `dict[] == ` to filter, but the search
81 | box supports also a dot-notated syntax `hparams. == `
82 | which has an autocomplete.
83 |
84 | In the example, the search is based on equality but any other
85 | python operation is allowed.
86 |
87 | When clicking the hash of a run (e.g., the one we filtered)
88 | we switch to a per-run view which
89 | further details the collected metadata of the selected run.
90 |
91 | [![aim-log1]][aim-log1]
92 |
93 | [aim-log1]: ../../figs/aim_log1.png
94 |
95 | For instance, when scrolling at
96 | the bottom of the per-run page
97 | we can see that AIM details
98 |
99 | * The specific git commit used when executing the run.
100 |
101 | * The specific python packages and related versions
102 | available in the environment when executing the run.
103 |
104 | Both are automatically tracked by AIM with
105 | no extra code required (beside activating the
106 | their collection when creating the run).
107 |
108 | [![aim-log2]][aim-log2]
109 |
110 | [aim-log2]: ../../figs/aim_log2.png
111 |
112 | The per-run view offers a variety of information
113 | organized in multiple tabs.
114 |
115 | For instance, the tab "Logs"
116 | details the console output.
117 |
118 | [![aim-log3]][aim-log3]
119 |
120 | [aim-log3]: ../../figs/aim_log3.png
121 |
122 |
--------------------------------------------------------------------------------
/docs.material/modeling/aim_repos/index.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: Explore AIM repos
3 | icon: simple/awsorganizations
4 | ---
5 |
6 | An AIM repository is merely a folder
7 | where AIM stores a [rocksdb database](https://rocksdb.org/docs/getting-started.html)
8 | (see [AIM reference doc](https://aimstack.readthedocs.io/en/v3.17.5/understanding/data_storage.html) for more info).
9 |
10 | AIM has great functionality for tracking metrics
11 | but has very little support for
12 | tracking general artifacts outside
13 | console output and nor has native support for storing trained models
14 | files.
15 |
16 | Hence tcbench complement AIM by collecting
17 | runs artifacts into run-specific folders.
18 |
19 | Specifically, a tcbench repository has the following structure
20 |
21 | ```
22 |
23 | ├── .aim
24 | ├── artifacts
25 | │ ├── 001baa39ed8d4b8bb9966e94
26 | │ ├── 025830cb840b4f3f8f0a1625
27 | │ ├── 050bae064b5246f88e821a29
28 | ...
29 | └── campaign_summary
30 | └──
31 | ```
32 |
33 | * Each subfolder of `/artifacts` collects
34 | the artificats of a specific run and
35 | is named with the hash of the run itself.
36 |
37 | * The `/campaign_summary` subfolder collects
38 | reports generated by the [`aimrepo report` subcommand](/tcbench/modeling/aim_repos/aimrepo_subcmd/).
39 |
40 | Investigating the content of one run artifact folder
41 |
42 | ```
43 | ls -1 /artifacts/001baa39ed8d4b8bb9966e94
44 | ```
45 |
46 | !!! note "Output"
47 |
48 | ```
49 | log.txt
50 | params.yml
51 | test-human_class_rep.csv
52 | test-human_conf_mtx.csv
53 | test-script_class_rep.csv
54 | test-script_conf_mtx.csv
55 | test-train-val-leftover_class_rep.csv
56 | test-train-val-leftover_conf_mtx.csv
57 | train_class_rep.csv
58 | train_conf_mtx.csv
59 | val_class_rep.csv
60 | val_conf_mtx.csv
61 | best_model_weights_split_2.pt
62 | ```
63 |
64 | For each run tcbench creates the following artifacts:
65 |
66 | * `params.yml` is a YAML file collecting
67 | parameters used when triggering a run, i.e., both
68 | the arguments explicitly defined on the command line,
69 | as well the ones with default values.
70 |
71 | * `log.txt` collects the console output generated by the run.
72 |
73 | * `_class_rep.csv` contains a [classification report](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html). The filename is bounded to the context (i.e., train, val, test)
74 | used to generate it.
75 |
76 | * `_conf_mtx.csv` contains [confusion matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html). The filename is bounded to the context (i.e., train, val, test)
77 | used to generate it.
78 |
79 | * `best_model_weights_split_.pt` stores the weights of the best
80 | trained pytorch model (for a deep learning model). The filename is bounded to the specific
81 | split index configured when triggering the run.
82 |
83 | * `xgb_model_split_.json` stores an XGBoost model (when training
84 | via xgboost). The filename is bounded to the specific
85 | split index configured when triggering the run.
86 |
87 |
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_home-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_home-page.png
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log1.png
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log2.png
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log3.png
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run1.png
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run2.png
--------------------------------------------------------------------------------
/docs.material/modeling/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run3.png
--------------------------------------------------------------------------------
/docs.material/modeling/overview.md:
--------------------------------------------------------------------------------
1 | # Modeling overview
2 |
3 | When training ML/DL models,
4 | finding the right combination of data
5 | preprocessing/splitting, algorithms and
6 | hyper-parameters can be challenging.
7 | Even more so when the modeling process
8 | aims to be [repeatable/replicable/reproducible](https://www.acm.org/publications/policies/artifact-review-badging).
9 |
10 | To ease this process is key to
11 |
12 | * Collect __telemetry and metadata__.
13 | This includes both the parameters used to create models
14 | as well as lower level metrics such as the evolution of the
15 | training loss over time.
16 |
17 | * Generate __artifacts__ such as
18 | reports about the overall performance
19 | (e.g., confusion matrixes).
20 |
21 | ## AIM stack tracking
22 |
23 | `tcbench` integrates
24 | with [AIM stack](https://aimstack.io/), an
25 | open-source and self-hosted model
26 | tracking framework enabling logging of metrics
27 | related to model training. Such telemetry
28 | can later be explored via a [web interface](https://aimstack.readthedocs.io/en/latest/ui/overview.html)
29 | or [programmatically extracted](https://aimstack.readthedocs.io/en/latest/using/query_runs.html) via AIM SKD.
30 |
31 | !!! info "__Why not using more popular frameworks?__"
32 |
33 | There are [many solutions for model tracking](https://neptune.ai/blog/best-ml-experiment-tracking-tools).
34 | While frameworks such as __Weights & Biases__ or __Neptune.ai__
35 | are extremely rich with features, unfortunately they typically
36 | are cloud-based solutions and not necessarily open-sourced.
37 |
38 | Alternative frameworks such as __Tensorboard__ and __MLFlow__
39 | have only primitive functionalities with respect to AIM stack.
40 |
41 | Aim stack is sitting in the middle of this spectrum:
42 | It is self-hosted (i.e., no need to push data to the cloud)
43 | and provides nice data exploration features.
44 |
45 | ## Runs and campaigns
46 |
47 | AIM collects modeling metadata into __repositories__
48 | which are fully controlled by end-users:
49 |
50 | * Repositories are not tied to specific projects.
51 | In other words, the end-user can store
52 | in a repository models completely unrelated to each other.
53 |
54 | * There is no limit on the amount of repositories
55 | can be created.
56 |
57 | `tcbench` tracks in an AIM repository two types of tasks,
58 | namely *runs* and *campaigns*:
59 |
60 | * A __run__ corresponds to the training of an
61 | individual ML/DL model and is "minimal experiment object" used by AIM,
62 | i.e., any tracked metadata need to be
63 | associated to an AIM run.
64 |
65 | * A __campaign__ corresponds to a
66 | collection of runs.
67 |
68 | AIM assign a unique hash code to a run,
69 | but a run object be further enriched with
70 | extra metadata using AIM SDK or web UI.
71 |
72 | A run can be enriched with both individual values
73 | (e.g., best validation loss observed or the final accuracy score)
74 | as well as series (e.g., loss value for each epoch).
75 | Morever, values can have a *context* to further
76 | specify semantic (e.g., define if a registered metric
77 | relates to trainining, validation or test).
78 |
79 | While *run* is at term borrowed from AIM terminology,
80 | `tcbench` introduces *campaign* to
81 | group runs which are semantically related
82 | and need to be summarized together (e.g., results
83 | collected across different train/val/test splits).
84 |
85 | It follows that:
86 |
87 | * Runs are the fundamental building block for collecting
88 | modeling results. But they are also the fundamental
89 | unit when developing/debugging modeling tasks.
90 |
91 | * Campaigns bind multiple runs together. Hence,
92 | are meant to be stored in separate AIM repositories
93 | (although this is NOT a strict requirement for `tcbench`).
94 |
--------------------------------------------------------------------------------
/docs.material/overrides/arrow-right-solid.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs.material/overrides/github-mark/github-mark.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs.material/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block content %}
4 |
5 | {% if page.nb_url %}
6 |
7 | {% include ".icons/simple/jupyter.svg" %}
8 | {% include ".icons/material/download.svg" %}
9 |
10 | {% endif %}
11 |
12 | {{ super() }}
13 |
14 |
35 |
36 |
45 |
46 |
47 | {% endblock content %}
48 |
--------------------------------------------------------------------------------
/docs.material/overrides/main.html.DEPRECATED:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block content %}
4 |
5 | {% if page.nb_url %}
6 |
7 | {% include ".icons/simple/jupyter.svg" %}
8 | {% include ".icons/material/download.svg" %}
9 |
10 | {% endif %}
11 |
12 | {{ super() }}
13 |
14 |
35 |
36 |
45 |
46 |
47 | {% endblock content %}
48 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human.md:
--------------------------------------------------------------------------------
1 | # Figure 10(b): Classwise evaluation on human.
2 |
3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb)
4 |
5 |
6 | ```python
7 | import pathlib
8 |
9 | import matplotlib as mpl
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import pandas as pd
13 | import seaborn as sns
14 | import statsmodels.stats.api as sms
15 |
16 | %matplotlib inline
17 | %config InlineBackend.figure_format='retina'
18 | ```
19 |
20 |
21 | ```python
22 | def compute_confidence_intervals(array, alpha=0.05):
23 | array = np.array(array)
24 | low, high = sms.DescrStatsW(array).tconfint_mean(alpha)
25 | mean = array.mean()
26 | ci = high - mean
27 | return ci
28 | ```
29 |
30 |
31 | ```python
32 | path = pathlib.Path(
33 | "./campaigns/ucdavis-icdm19-git-repo-forked/artifacts/IncrementalSampling_Retraining(human-triggered)_20/"
34 | )
35 |
36 | class_reps = list(path.glob("*class_rep.csv"))
37 |
38 | per_cls = np.stack(
39 | [
40 | pd.read_csv(file)[:5][["Accuracy", "precision", "recall", "f1-score"]].values
41 | for file in class_reps
42 | ],
43 | axis=0,
44 | )
45 |
46 |
47 | means = np.mean(per_cls, axis=0)
48 |
49 | cis = np.zeros([per_cls.shape[1], per_cls.shape[2]])
50 | for i in range(per_cls.shape[1]):
51 | for j in range(per_cls.shape[2]):
52 | cis[i, j] = compute_confidence_intervals(per_cls[:, i, j])
53 | ```
54 |
55 |
56 | ```python
57 | X = ["G. Drive", "Youtube", "G. Doc", "G. Search", "G. Music"]
58 | X_axis = np.arange(len(X))
59 |
60 | plt.rcParams.update({'font.size': 16})
61 |
62 | fig, ax = plt.subplots(figsize=(7, 6.5))
63 | ax.bar(
64 | X_axis - 0.3,
65 | means[:, 0],
66 | 0.2,
67 | label="Accuracy",
68 | yerr=cis[:, 0],
69 | ecolor="black",
70 | alpha=0.5,
71 | capsize=10,
72 | )
73 | ax.bar(
74 | X_axis - 0.1,
75 | means[:, 1],
76 | 0.2,
77 | label="Precision",
78 | yerr=cis[:, 1],
79 | ecolor="black",
80 | alpha=0.5,
81 | capsize=10,
82 | )
83 | ax.bar(
84 | X_axis + 0.1,
85 | means[:, 2],
86 | 0.2,
87 | label="Recall",
88 | yerr=cis[:, 2],
89 | ecolor="black",
90 | alpha=0.5,
91 | capsize=10,
92 | )
93 | ax.bar(
94 | X_axis + 0.3,
95 | means[:, 3],
96 | 0.2,
97 | label="F1",
98 | yerr=cis[:, 3],
99 | ecolor="black",
100 | alpha=0.5,
101 | capsize=10,
102 | )
103 |
104 |
105 | plt.xticks(X_axis, X)
106 | ax.set_xlabel("Class")
107 | ax.set_ylabel("Value")
108 | ax.set_ylim([0, 1])
109 | plt.legend()
110 | ax.legend(bbox_to_anchor=(1, 1.02))
111 | plt.grid(axis="y")
112 |
113 | plt.savefig("icdm19_fig3b_replicate_human_ci.png", dpi=300, bbox_inches="tight")
114 | ```
115 |
116 |
117 |
118 | 
119 |
120 |
121 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure1_flowpic_example.md:
--------------------------------------------------------------------------------
1 | # Figure 1 : Example of a packet time series transformed into a flowpic representation for a randomly selected flow
2 |
3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure1_flowpic_example.ipynb)
4 |
5 |
6 | ```python
7 | import numpy as np
8 | import tcbench as tcb
9 | from matplotlib.colors import LogNorm, Normalize
10 | from tcbench import dataprep
11 | ```
12 |
13 |
14 | ```python
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import seaborn as sns
18 |
19 | %matplotlib inline
20 | %config InlineBackend.figure_format='retina'
21 | ```
22 |
23 |
24 | ```python
25 | import tcbench
26 | ```
27 |
28 |
29 | ```python
30 | # load unfiltered dataset
31 | FLOWPIC_BLOCK_DURATION = 15
32 | ```
33 |
34 |
35 | ```python
36 | df = tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19)
37 | ```
38 |
39 |
40 | ```python
41 | df_sample = df.sample(n=1, random_state=12345)
42 | ser = df_sample.iloc[0]
43 | ```
44 |
45 |
46 | ```python
47 | fig, axes = plt.subplots(
48 | nrows=1, ncols=5, figsize=(15, 3), gridspec_kw=dict(width_ratios=[1, 1, 1, 1, 1.1])
49 | )
50 |
51 | direction = np.where(ser["pkts_dir"] == 0, -1, 1)
52 | y = ser["pkts_size"] * direction
53 | x = ser["timetofirst"]
54 |
55 | ax = axes[0]
56 | ax.stem(
57 | np.where(y > 0, x, 0),
58 | np.where(y > 0, y, 0),
59 | markerfmt="",
60 | basefmt="lightgray",
61 | label="outgoing",
62 | linefmt="green",
63 | )
64 | ax.stem(
65 | np.where(y < 0, x, 0),
66 | np.where(y < 0, y, 0),
67 | markerfmt="",
68 | basefmt="lightgray",
69 | label="incoming",
70 | linefmt="lightgreen",
71 | )
72 | ax.legend()
73 | ax.set_ylabel("packet size [B]")
74 | ax.set_xlabel("time [s]")
75 |
76 | rect = mpl.patches.Rectangle(
77 | (0, -1500), 15, 3000, linewidth=1, edgecolor="r", facecolor="none"
78 | )
79 | ax.add_patch(rect)
80 | ax.annotate("first\n15s", (5, 1000))
81 |
82 | for idx, flowpic_dim in enumerate((32, 64, 256, 512), start=1):
83 | # create a single sample dataset
84 | dset = dataprep.FlowpicDataset(
85 | data=df_sample,
86 | timetofirst_colname="timetofirst",
87 | pkts_size_colname="pkts_size",
88 | pkts_dir_colname="pkts_dir",
89 | target_colname="app",
90 | flowpic_dim=flowpic_dim,
91 | flowpic_block_duration=FLOWPIC_BLOCK_DURATION,
92 | )
93 |
94 | # fetch the flowpic representation
95 | flowpic, label = dset[0]
96 |
97 | # flattening the representation
98 | # to remove zero values (used for finding
99 | # min values)
100 | flowpic = flowpic.numpy().squeeze()
101 | flattened = flowpic.flatten()
102 | flattened = flattened[flattened > 0]
103 |
104 | ax = axes[idx]
105 |
106 | sns.heatmap(
107 | ax=ax,
108 | data=np.where(flowpic == 0, np.nan, flowpic),
109 | vmin=flattened.min(),
110 | vmax=flattened.max(),
111 | cbar=idx == 4,
112 | cbar_kws=dict(fraction=0.046, pad=0.01, aspect=20, label="Normalized packets count"),
113 | cmap=plt.get_cmap("viridis_r"),
114 | square=True,
115 | norm=LogNorm(flattened.min(), flattened.max()),
116 | )
117 | for _, spine in ax.spines.items():
118 | spine.set_visible(True)
119 | spine.set_linewidth(1)
120 | ax.yaxis.set_ticks([], None)
121 | ax.xaxis.set_ticks([], None)
122 | ax.set_ylabel(f"packets size (bins of {1500 // flowpic_dim}B)")
123 | ax.set_xlabel(f"time (bins of {FLOWPIC_BLOCK_DURATION / flowpic_dim * 1000:.1f}ms)")
124 | ax.set_title(f"{flowpic_dim}x{flowpic_dim}")
125 |
126 | plt.savefig("flowpic_example.png", dpi=300, bbox_inches="tight")
127 | ```
128 |
129 |
130 |
131 | 
132 |
133 |
134 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting.md:
--------------------------------------------------------------------------------
1 | # Figure 3 : Average confusion matrixes for the 32x32 resolution across all experiments in Table 4
2 |
3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting.ipynb)
4 |
5 |
6 | ```python
7 | import pathlib
8 |
9 | import matplotlib as mpl
10 | import matplotlib.pyplot as plt
11 | import numpy as np
12 | import pandas as pd
13 | import seaborn as sns
14 | from sklearn.preprocessing import normalize
15 |
16 | %matplotlib inline
17 | %config InlineBackend.figure_format='retina'
18 | ```
19 |
20 |
21 | ```python
22 | folder_artifacts = pathlib.Path(
23 | "./campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/artifacts/"
24 | )
25 | ```
26 |
27 |
28 | ```python
29 | filelists = [
30 | list(folder_artifacts.glob("*/test-human_conf_mtx.csv")),
31 | list(folder_artifacts.glob("*/test-script_conf_mtx.csv")),
32 | ]
33 |
34 | titles = ["human", "script"]
35 |
36 | CLASSES = {
37 | "google-doc": "G. Doc",
38 | "google-drive": "G. Drive",
39 | "google-music": "G. Music",
40 | "google-search": "G. Search",
41 | "youtube": "YouTube",
42 | }
43 | ```
44 |
45 |
46 | ```python
47 | plt.rcParams.update({"font.size": 14})
48 |
49 | fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(9, 5))
50 | # cbar_ax = fig.add_axes([0.93, 0.2, 0.02, 0.6]) # (left, bottom, width, height)
51 | for i in range(2):
52 | cm_mean = np.mean(
53 | np.stack(
54 | [pd.read_csv(file)[list(CLASSES.keys())].values for file in filelists[i]]
55 | ),
56 | axis=0,
57 | )
58 |
59 | normed_cm_mean = normalize(cm_mean, axis=1, norm="l1")
60 |
61 | ax = axes[i]
62 |
63 | sns.heatmap(
64 | data=normed_cm_mean,
65 | ax=ax,
66 | square=True,
67 | cmap="viridis",
68 | annot=True,
69 | annot_kws={"fontsize": 11},
70 | fmt=".2f",
71 | vmin=0,
72 | vmax=1,
73 | cbar_kws=dict(fraction=0.046, pad=0.03, aspect=20),
74 | )
75 |
76 | ax.set_xticklabels(list(CLASSES.values()), rotation=45, ha="right")
77 | ax.set_yticklabels(list(CLASSES.values()), rotation=0)
78 |
79 | ax.set_title(titles[i])
80 |
81 | ax.set_ylabel("Ground Truth")
82 | ax.set_xlabel("Prediction")
83 |
84 | plt.tight_layout()
85 | plt.savefig("ucdavis_dataset_confusion_matrix.png", bbox_inches="tight", dpi=150)
86 | ```
87 |
88 |
89 |
90 | 
91 |
92 |
93 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size.md:
--------------------------------------------------------------------------------
1 | # Figure 8: Investigating root cause of G1 discrepancies: Kernel density estimation of the per-class packet size distributions.
2 |
3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size.ipynb)
4 |
5 |
6 | ```python
7 | import itertools
8 |
9 | import numpy as np
10 | import pandas as pd
11 | ```
12 |
13 |
14 | ```python
15 | import matplotlib as mpl
16 | import matplotlib.pyplot as plt
17 | import seaborn as sns
18 | from matplotlib.colors import LogNorm, Normalize
19 |
20 | %matplotlib inline
21 | %config InlineBackend.figure_format='retina'
22 | ```
23 |
24 |
25 | ```python
26 | import tcbench as tcb
27 | from tcbench import dataprep
28 | ```
29 |
30 |
31 | ```python
32 | FLOWPIC_DIM = 32
33 | FLOWPIC_BLOCK_DURATION = 15
34 | ```
35 |
36 |
37 | ```python
38 | # load unfiltered dataset
39 | dset = dataprep.FlowpicDataset(
40 | data=tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19),
41 | timetofirst_colname="timetofirst",
42 | pkts_size_colname="pkts_size",
43 | pkts_dir_colname="pkts_dir",
44 | target_colname="app",
45 | flowpic_dim=FLOWPIC_DIM,
46 | flowpic_block_duration=FLOWPIC_BLOCK_DURATION,
47 | )
48 | ```
49 |
50 |
51 | ```python
52 | REPLACE = {
53 | "google-doc": "G. Doc",
54 | "google-drive": "G. Drive",
55 | "google-music": "G. Music",
56 | "google-search": "G. Search",
57 | "youtube": "YouTube",
58 | "retraining-human-triggered": "Human",
59 | "retraining-script-triggered": "Script",
60 | }
61 |
62 | dset.df = dset.df.assign(
63 | app = dset.df["app"].replace(REPLACE),
64 | partition = dset.df["partition"].replace(REPLACE)
65 | )
66 | ```
67 |
68 |
69 | ```python
70 | TARGETS_LABEL = sorted(dset.df["app"].unique())
71 | PARTITIONS_NAME = sorted(dset.df["partition"].unique())
72 | ```
73 |
74 |
75 | ```python
76 | all_pkts_size = dict()
77 |
78 | for partition_name in PARTITIONS_NAME:
79 | all_pkts_size[partition_name] = dict()
80 |
81 | for app in TARGETS_LABEL:
82 | df_tmp = dset.df[
83 | (dset.df["partition"] == partition_name) & (dset.df["app"] == app)
84 | ]
85 |
86 | l = []
87 | for idx in df_tmp.index:
88 | ser = df_tmp.loc[idx]
89 | indexes = np.where(ser["timetofirst"] < FLOWPIC_BLOCK_DURATION)[0]
90 | pkts_size = ser["pkts_size"][indexes]
91 | l.append(pkts_size)
92 | all_pkts_size[partition_name][app] = np.concatenate(l)
93 | ```
94 |
95 |
96 | ```python
97 | # WARNING: computing the KDE will take a few minutes
98 |
99 | fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(15, 5))
100 |
101 | line_props = {
102 | "pretraining": dict(linestyle="-"),
103 | "Script": dict(
104 | linestyle=(0, (1, 1))
105 | ),
106 | "Human": dict(linestyle=(0, (1, 1))),
107 | }
108 |
109 | for ax, app in zip(axes, TARGETS_LABEL):
110 | for partition_name in [
111 | "pretraining",
112 | "Script",
113 | "Human",
114 | ]:
115 | props = line_props[partition_name]
116 | sns.kdeplot(
117 | ax=ax,
118 | data=all_pkts_size[partition_name][app],
119 | linewidth=2,
120 | label=partition_name,
121 | **props,
122 | fill=True,
123 | alpha=0.1
124 | )
125 | ax.legend(bbox_to_anchor=(0.5, 1.5), loc="upper center")
126 | ax.set_title(app, fontsize=10)
127 | ax.set_xlim((-500, 1800))
128 | ax.set_xlabel("packet size")
129 | ax.set_ylabel("kde")
130 |
131 | plt.tight_layout()
132 | plt.savefig("ucdavid-icdm19_kde_pkts_size.png", dpi=300, bbox_inches='tight')
133 | ```
134 |
135 |
136 |
137 | 
138 |
139 |
140 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table10_ucdavis-icdm19_tukey.md:
--------------------------------------------------------------------------------
1 |
2 |
23 | # Table 10: Performance comparison across augmentations for different flowpic sizes.
24 |
25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table10_ucdavis-icdm19_tukey.ipynb)
26 |
27 |
28 | ```python
29 | import pathlib
30 |
31 | import numpy as np
32 | import pandas as pd
33 | from scipy.stats import tukey_hsd
34 | ```
35 |
36 | ```python
37 | folder = pathlib.Path(
38 | "campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/augment-at-loading-with-dropout"
39 | )
40 | df = pd.concat(
41 | (
42 | pd.read_parquet(folder / "runsinfo_flowpic_dim_1500.parquet"),
43 | pd.read_parquet(folder / "runsinfo_flowpic_dim_64.parquet"),
44 | pd.read_parquet(folder / "runsinfo_flowpic_dim_32.parquet"),
45 | )
46 | )
47 | ```
48 |
49 | ```python
50 | # df = pd.read_parquet('campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/1684447037/merged_runsinfo.parquet')
51 | ```
52 |
53 | ```python
54 | df_script = df[df["test_split_name"] == "test-script"]
55 |
56 | acc_32 = df_script[df_script["flowpic_dim"] == 32]["acc"].values.tolist()
57 | acc_64 = df_script[df_script["flowpic_dim"] == 64]["acc"].values.tolist()
58 | acc_1500 = df_script[df_script["flowpic_dim"] == 1500]["acc"].values.tolist()
59 | ```
60 |
61 | ```python
62 | res = tukey_hsd(acc_32, acc_64, acc_1500)
63 | ```
64 |
65 | ```python
66 | df = pd.DataFrame(
67 | np.array([res.pvalue[0, 1], res.pvalue[0, 2], res.pvalue[1, 2]]).reshape(-1, 1),
68 | columns=["pvalue"],
69 | index=pd.MultiIndex.from_arrays(
70 | [("32x32", "32x32", "64x64"), ("64x64", "1500x1500", "1500x1500")]
71 | ),
72 | )
73 | df = df.assign(is_different=df["pvalue"] < 0.05)
74 | ```
75 |
76 | ```python
77 | df
78 | ```
79 |
80 |
81 |
82 |
115 |
116 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table3_xgboost_baseline.md:
--------------------------------------------------------------------------------
1 |
2 |
23 | # Table 3: (G0) Baseline ML performance without augmentation in a supervised setting.
24 |
25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table3_xgboost_baseline.ipynb)
26 |
27 |
28 | ```python
29 | import pandas as pd
30 | ```
31 |
32 | ```python
33 | df = pd.read_csv(
34 | "./campaigns/ucdavis-icdm19/xgboost/noaugmentation-flowpic/campaign_summary/noaugmentation-flowpic/summary_flowpic_dim_32.csv",
35 | header=[0, 1],
36 | index_col=[0, 1],
37 | )
38 | ```
39 |
40 | ```python
41 | # reformatting
42 | df_tmp = df["acc"][["mean", "ci95"]].round(2)
43 | df_tmp.loc[["test-script", "test-human"]].droplevel(1, axis=0).astype(float).round(2)
44 | ```
45 |
46 |
47 |
48 |
73 |
74 |
75 |
76 | ```python
77 | df = pd.read_csv(
78 | "./campaigns/ucdavis-icdm19/xgboost/noaugmentation-timeseries/campaign_summary/noaugmentation-timeseries/summary_max_n_pkts_10.csv",
79 | header=[0, 1],
80 | index_col=[0, 1],
81 | )
82 | ```
83 |
84 | ```python
85 | # reformatting
86 | df_tmp = df["acc"][["mean", "ci95"]].round(2)
87 | df_tmp.loc[["test-script", "test-human"]].droplevel(1, axis=0).astype(float).round(2)
88 | ```
89 |
90 |
91 |
92 |
117 |
118 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table5_simclr_dropout_and_projectionlayer.md:
--------------------------------------------------------------------------------
1 |
2 |
23 | # Table 5: Impact of dropout and SimCLR projection layer dimension on fine-tuning.
24 |
25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table5_simclr_dropout_and_projectionlayer.ipynb)
26 |
27 |
28 | ```python
29 | import itertools
30 |
31 | import pandas as pd
32 | ```
33 |
34 | ```python
35 | df = pd.read_csv(
36 | "campaigns/ucdavis-icdm19/simclr-dropout-and-projection/campaign_summary/simclr-dropout-and-projection/summary_flowpic_dim_32.csv",
37 | header=[0, 1],
38 | index_col=[0, 1, 2],
39 | )
40 |
41 | df = df["acc"][["mean", "ci95"]]
42 | df = df.T
43 | df.columns.set_names("test_split_name", level=0, inplace=True)
44 | df.columns.set_names("projection_layer_dim", level=1, inplace=True)
45 | df.columns.set_names("with_dropout", level=2, inplace=True)
46 | df = df.reorder_levels(
47 | ["test_split_name", "with_dropout", "projection_layer_dim"], axis=1
48 | )
49 |
50 | df = df[list(itertools.product(["test-script", "test-human"], [True, False], [30, 84]))]
51 | df = df.round(2)
52 |
53 | df.to_csv("table5_simclr_dropout_and_projectionlayer.csv")
54 | df
55 | ```
56 |
57 |
58 |
59 |
114 |
115 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table6_simclr_other_augmentation_pairs.md:
--------------------------------------------------------------------------------
1 |
2 |
23 | # Table 6: Comparing the fine-tuning performance when using different pairs of augmentation for pretraining.
24 |
25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table6_simclr_other_augmentation_pairs.ipynb)
26 |
27 |
28 | ```python
29 | import itertools
30 |
31 | import pandas as pd
32 | ```
33 |
34 | ```python
35 | RENAME = {
36 | "colorjitter": "Color jitter",
37 | "timeshift": "Time shift",
38 | "changertt": "Change RTT",
39 | "rotate": "Rotate",
40 | "packetloss": "Packet loss",
41 | }
42 | ```
43 |
44 | ```python
45 | df = pd.read_csv(
46 | "./campaigns/ucdavis-icdm19/simclr-other-augmentation-pairs/campaign_summary/simclr-other-augmentation-pairs/summary_flowpic_dim_32.csv",
47 | header=[0, 1],
48 | index_col=[0, 1],
49 | )
50 |
51 | df = df["acc"][["mean", "ci95"]].round(2)
52 | df = df.reset_index()
53 | df = df.assign(
54 | aug1=df["level_1"].apply(eval).str[0],
55 | aug2=df["level_1"].apply(eval).str[1],
56 | )
57 | df = df.drop("level_1", axis=1)
58 | df = df.rename({"level_0": "test_split_name"}, axis=1)
59 | df = df.replace(RENAME)
60 | df = df.pivot(index="test_split_name", columns=["aug1", "aug2"])
61 | df.columns.set_names(["stat", "aug1", "aug2"], inplace=True)
62 | df = df.reorder_levels(["aug1", "aug2", "stat"], axis=1)
63 | df.columns.set_names(["", "", ""], inplace=True)
64 | df.index.name = None
65 |
66 | df = df[
67 | list(itertools.product(["Change RTT"], ["Time shift"], ["mean", "ci95"]))
68 | + list(
69 | itertools.product(["Packet loss"], ["Color jitter", "Rotate"], ["mean", "ci95"])
70 | )
71 | + list(
72 | itertools.product(["Change RTT"], ["Color jitter", "Rotate"], ["mean", "ci95"])
73 | )
74 | + list(itertools.product(["Color jitter"], ["Rotate"], ["mean", "ci95"]))
75 | ]
76 | df = df.loc[["test-script", "test-human"]]
77 |
78 | df.to_csv("table5_simclr_other_augmentation_pairs.csv")
79 | df
80 | ```
81 |
82 |
83 |
84 |
155 |
156 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/notebooks/table9_icdm_finetuning_per_class_metrics_on_human.md:
--------------------------------------------------------------------------------
1 |
2 |
23 | # Table 9: Macro-average Accuracy with different retraining dataset and different sampling methods
24 |
25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table9_icdm_finetuning_per_class_metrics_on_human.ipynb)
26 |
27 |
28 | ```python
29 | import pathlib
30 |
31 | import matplotlib as mpl
32 | import matplotlib.pyplot as plt
33 | import numpy as np
34 | import pandas as pd
35 | import seaborn as sns
36 | import statsmodels.stats.api as sms
37 |
38 | %matplotlib inline
39 | %config InlineBackend.figure_format='retina'
40 | ```
41 |
42 | ```python
43 | def compute_confidence_intervals(array, alpha=0.05):
44 | array = np.array(array)
45 | low, high = sms.DescrStatsW(array).tconfint_mean(alpha)
46 | mean = array.mean()
47 | ci = high - mean
48 | return ci
49 | ```
50 |
51 | ```python
52 | path = pathlib.Path("./campaigns/ucdavis-icdm19-git-repo-forked/artifacts/")
53 |
54 | class_repss = list(path.glob("*10/"))
55 | ```
56 |
57 | ```python
58 | data = dict()
59 |
60 | for path in class_repss:
61 | if "script" in str(path):
62 | class_reps = list(path.glob("*class_rep.csv"))
63 | accs = [pd.read_csv(file).iloc[6].values[2] for file in class_reps]
64 |
65 | augmentation_name = path.name.split("_")[0].replace("Sampling", "")
66 | data[augmentation_name] = (
67 | np.mean(accs) * 100,
68 | compute_confidence_intervals(accs),
69 | )
70 |
71 | df_script = pd.DataFrame(data, index=["mean", "ci95"]).T.round(2)
72 | df_script.columns = pd.MultiIndex.from_arrays([["script", "script"], df_script.columns])
73 | # df_script
74 | ```
75 |
76 | ```python
77 | data = dict()
78 | for path in class_repss:
79 | if "human" in str(path):
80 | class_reps = list(path.glob("*class_rep.csv"))
81 | accs = [pd.read_csv(file).iloc[6].values[2] for file in class_reps]
82 |
83 | augmentation_name = path.name.split("_")[0].replace("Sampling", "")
84 | data[augmentation_name] = (
85 | np.mean(accs) * 100,
86 | compute_confidence_intervals(accs),
87 | )
88 |
89 | df_human = pd.DataFrame(data, index=["mean", "ci95"]).T.round(2)
90 | df_human.columns = pd.MultiIndex.from_arrays([["human", "human"], df_human.columns])
91 | ```
92 |
93 | ```python
94 | df_tmp = pd.concat((df_script, df_human), axis=1).T
95 | display(df_tmp)
96 | df_tmp.to_csv("icdm_finetuning_per_class_metrics_on_human.csv")
97 | ```
98 |
99 |
142 |
--------------------------------------------------------------------------------
/docs.material/papers/imc23/pytest.md:
--------------------------------------------------------------------------------
1 | ---
2 | title: pytest
3 | icon: simple/pytest
4 | ---
5 |
6 | # ML unit testing
7 |
8 | Multiple tests are available to verify different functionalities
9 | for either tcbench and the modeling campaigns created.
10 |
11 | Tests are not bundled with pypi installation. Rather, you need
12 | to follow the procedure described in the [artifact page](/tcbench/papers/imc23/artifacts/)
13 | to fetch the source code and install all artifacts and datasets.
14 |
15 | Tests are coded via [`pytest` :simple-pytest:](https://docs.pytest.org/en/7.4.x/)
16 | and are available under the `/tests` folder.
17 |
18 | !!! warning "Tests trigger model training"
19 |
20 | Most of the test verify that the models train for
21 | the campaigns described in the paper are indeed reproducible, i.e.,
22 | the provide the exact same models obtained for the paper.
23 |
24 | To do so, the pytest resources fetched from figshare
25 | contains a subset of reference models so the test
26 | trigger the modeling for those scenarios and check
27 | that what trained matches what created for the paper.
28 |
29 | So be aware that running these tests might take a while
30 | depending on your local environment.
31 |
32 |
33 | To trigger all tests run
34 |
35 | ```
36 | pytest tests
37 | ```
38 |
39 | !!! note "Output"
40 | ```
41 | ============================ test session starts ======================================
42 | platform linux -- Python 3.10.13, pytest-7.4.2, pluggy-1.3.0
43 | rootdir: /tmp/tcbench-pip/tcbench
44 | plugins: anyio-3.7.1, helpers-namespace-2021.12.29
45 | collected 101 items
46 |
47 | tests/test_augmentations_at_loading.py ........... [ 10%]
48 | tests/test_augmentations_at_loading_xgboost.py . [ 11%]
49 | tests/test_cli_command_campaign.py .... [ 15%]
50 | tests/test_cli_command_singlerun.py ............ [ 27%]
51 | tests/test_contrastive_learning_and_finetune.py .. [ 29%]
52 | tests/test_libtcdatasets_datasets_utils.py ................. [ 46%]
53 | tests/test_modeling_backbone.py ................ [ 62%]
54 | tests/test_modeling_dataprep.py .................................. [ 96%]
55 | tests/test_modeling_methods.py .... [100%]
56 | ============================== 101 passed, 8 warnings in 6523.55s (1:48:43) =========================
57 | ```
58 |
--------------------------------------------------------------------------------
/docs.material/papers/index.md:
--------------------------------------------------------------------------------
1 | # Research articles featuring tcbench
2 |
3 | [__Replication: Contrastive Learning and Data Augmentation in Traffic Classification__](/tcbench/papers/imc23)
4 |
5 | *A. Finamore, C. Wang, J. Krolikowki, J. M. Navarro, F. Cheng, D. Rossi*,
6 |
ACM Internet Measurement Conference (IMC), 2023
7 |
[:material-hexagon-outline: __Artifacts__](/tcbench/papers/imc23/artifacts) [:fontawesome-regular-file-pdf: __PDF__](https://arxiv.org/pdf/2309.09733)
8 |
9 | === "Bibtex"
10 | ```
11 | @misc{finamore2023contrastive,
12 | title={
13 | Contrastive Learning and Data Augmentation
14 | in Traffic Classification Using a
15 | Flowpic Input Representation
16 | },
17 | author={
18 | Alessandro Finamore and
19 | Chao Wang and
20 | Jonatan Krolikowski
21 | and Jose M. Navarro
22 | and Fuxing Chen and
23 | Dario Rossi
24 | },
25 | year={2023},
26 | eprint={2309.09733},
27 | archivePrefix={arXiv},
28 | primaryClass={cs.LG}
29 | }
30 | ```
31 |
32 | === "Abstract"
33 | Over the last years we witnessed a renewed interest towards
34 | Traffic Classification (TC) captivated by the rise of Deep
35 | Learning (DL). Yet, the vast majority of TC literature lacks
36 | code artifacts, performance assessments across datasets and
37 | reference comparisons against Machine Learning (ML) meth-
38 | ods. Among those works, a recent study from IMC'22 [17] is
39 | worth of attention since it adopts recent DL methodologies
40 | (namely, few-shot learning, self-sup ervision via contrastive
41 | learning and data augmentation) appealing for networking as
42 | they enable to learn from a few samples and transfer across
43 | datasets. The main result of [17] on the UCDAVIS19, ISCX-VPN
44 | and ISCX-Tor datasets is that, with such DL methodologies,
45 | 100 input samples are enough to achieve very high accuracy
46 | using an input representation called "flowpic" (i.e., a per-flow
47 | 2d histograms of the packets size evolution over time).
48 | In this paper (i) we rep roduce [17] on the same datasets
49 | and (ii) we rep licate its most salient aspect (the importance
50 | of data augmentation) on three additional public datasets,
51 | MIRAGE-19, MIRAGE-22 and UTMOBILENET21. While we con-
52 | firm most of the original results, we also found a 20% ac-
53 | curacy drop on some of the investigated scenarios due to
54 | a data shift of the original dataset that we uncovered. Ad-
55 | ditionally, our study validates that the data augmentation
56 | strategies studied in [17] perform well on other datasets too.
57 | In the spirit of reproducibility and replicability we make all
58 | artifacts (code and data) available at [10].
59 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/overview.md:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/tcbench/api/overview.md
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_clickutils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.clickutils
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_aimrepo.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_aimrepo
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_campaign.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_campaign
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_datasets.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_datasets
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_command_singlerun.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.command_singlerun
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_cli_richutils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.cli.richutils
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets.md:
--------------------------------------------------------------------------------
1 | ## Generating train/val/test splits
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_datasets_utils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.datasets_utils
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_mirage19_json_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage19_json_to_parquet
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_mirage22_json_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage22_json_to_parquet
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_mirage19_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage19_generate_splits
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_mirage22_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.mirage22_generate_splits
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.ucdavis_icdm19_generate_splits
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.utmobilenet21_generate_splits
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.ucdavis_icdm19_csv_to_parquet
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_libtcdatasets_utmobilenet21_csv_to_parquet.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.libtcdatasets.utmobilenet21_csv_to_parquet
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_aimutils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.aimutils
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_augmentation.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.augmentation
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_backbone.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.backbone
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_dataprep.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.dataprep
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_losses.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.losses
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_methods.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.methods
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_augmentations_at_loading.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_augmentations_at_loading
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_augmentations_at_loading_xgboost.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_augmentations_at_loading_xgboost
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_campaign_augmentations_at_loading.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_campaign_augmentations_at_loading
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_campaign_augmentations_at_loading_xgboost.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_campaign_augmentations_at_loading_xgboost
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_campaign_contrastive_learning_and_finetune.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_campaign_contrastive_learning_and_finetune
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_run_contrastive_learning_and_finetune.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.run_contrastive_learning_and_finetune
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/api/tcbench_modeling_utils.md:
--------------------------------------------------------------------------------
1 | ::: tcbench.modeling.utils
2 |
--------------------------------------------------------------------------------
/docs.material/tcbench/cli_intro.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: octicons/terminal-16
3 | title: CLI Intro
4 | ---
5 |
6 | # CLI Introduction
7 |
8 | tcbench can be used for as SDK and
9 | from the command line.
10 |
11 | When installing tcbench you install
12 | also a `tcbench` command line script
13 | created via [:material-cursor-default: click](https://click.palletsprojects.com/en/8.1.x/)
14 | and [:material-language-python: rich](https://github.com/Textualize/rich).
15 |
16 | For instance
17 | ```
18 | tcbench --help
19 | ```
20 |
21 | !!! info "Output"
22 | ```bash
23 | Usage: tcbench [OPTIONS] COMMAND [ARGS]...
24 |
25 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────╮
26 | │ --version Show tcbench version and exit. │
27 | │ --help Show this message and exit. │
28 | ╰──────────────────────────────────────────────────────────────────────────────────────────╯
29 | ╭─ Commands ───────────────────────────────────────────────────────────────────────────────╮
30 | │ aimrepo Investigate AIM repository content. │
31 | │ campaign Triggers a modeling campaign. │
32 | │ datasets Install/Remove traffic classification datasets. │
33 | │ run Triggers a modeling run. │
34 | │ tree show the command tree of your CLI. │
35 | ╰──────────────────────────────────────────────────────────────────────────────────────────╯
36 | ```
37 |
38 | The commands are organized in a nested structure which
39 | you can visualize using
40 |
41 | ```
42 | tcbench tree
43 | ```
44 |
45 | !!! info "Output"
46 | ```bash
47 | main
48 | ├── aimrepo - Investigate AIM repository content.
49 | │ ├── ls - List a subset of properties of each run.
50 | │ ├── merge - Coalesce different AIM repos into a single new repo.
51 | │ ├── properties - List properties across all runs.
52 | │ └── report - Summarize runs performance metrics.
53 | ├── campaign - Triggers a modeling campaign.
54 | │ ├── augment-at-loading - Modeling by applying data augmentation when loading the training set.
55 | │ └── contralearn-and-finetune - Modeling by pre-training via constrative learning and then finetune the final classifier from the pre-trained model.
56 | ├── datasets - Install/Remove traffic classification datasets.
57 | │ ├── delete - Delete a dataset.
58 | │ ├── import - Import datasets.
59 | │ ├── info - Show the meta-data related to supported datasets.
60 | │ ├── install - Install a dataset.
61 | │ ├── lsparquet - Tree view of the datasets parquet files.
62 | │ ├── samples-count - Show report on number of samples per class.
63 | │ └── schema - Show datasets schemas
64 | ├── run - Triggers a modeling run.
65 | │ ├── augment-at-loading - Modeling by applying data augmentation when loading the training set.
66 | │ └── contralearn-and-finetune - Modeling by pre-training via constrative learning and then finetune the final classifier from the pre-trained model.
67 | └── tree - show the command tree of your CLI
68 | ```
69 |
--------------------------------------------------------------------------------
/docs.material/tcbench/index.md:
--------------------------------------------------------------------------------
1 | # The tcbench framework
2 |
3 | tcbench is a ML/DL framework specific for __Traffic Classification (TC)__
4 | created as research project by the AI4NET team of the Huawei Technologies
5 | research center in Paris, France.
6 |
7 | !!! info "What is Traffic Classification?"
8 |
9 | Nodes within a computer network operate by exchanging
10 | information, namely *packets*, which is regulated according
11 | to standardized protocols (e.g., HTTP for the web). So to understand
12 | the network health it is required to constantly monitor
13 | this information flow and react accordingly. For instance, one
14 | might want to prioritize certain traffic (e.g., video meeting)
15 | or block it (e.g., social media in working environment).
16 |
17 | Traffic classification is the the act of labeling an exchange of packets
18 | based on the Internet application which generated it.
19 |
20 |
21 | The academic literature is ripe with methods and proposals for TC.
22 | Yet, it is scarce of code artifacts and public datasets
23 | do not offer common conventions of use.
24 |
25 | We designed tcbench with the following goals in mind:
26 |
27 | | Goal | State of the art | tcbench |
28 | |:-----|:-----------------|:--------|
29 | |__:octicons-stack-24: Data curation__ | There are a few public datasets for TC, yet no common format/schema, cleaning process, or standard train/val/test folds. | An (opinionated) curation of datasets to create easy to use parquet files with associated train/val/test fold.|
30 | |__:octicons-file-code-24: Code__ | TC literature has no reference code base for ML/DL modeling | tcbench is [:material-github: open source](https://github.com/tcbenchstack/tcbench) with an easy to use CLI based on [:fontawesome-solid-arrow-pointer: click](https://click.palletsprojects.com/en/8.1.x/)|
31 | |__:material-monitor-dashboard: Model tracking__ | Most of ML framework requires integration with cloud environments and subscription services | tcbench uses [aimstack](https://aimstack.io/) to save on local servers metrics during training which can be later explored via its web UI or aggregated in report summaries using tcbench |
32 |
33 | ## Features and roadmap
34 |
35 | tcbench is still under development, but (as suggested by its name) ultimately aims
36 | to be a reference framework for benchmarking multiple ML/DL solutions
37 | related to TC.
38 |
39 | At the current stage, tcbench offers
40 |
41 | * Integration with 4 datasets, namely `ucdavis-icdm19`, `mirage19`, `mirage22` and `utmobilenet21`.
42 | You can use these datasets and their curated version independently from tcbench.
43 | Check out the [dataset install](/tcbench/datasets/install/) process and [dataset loading tutorial](/tcbench/datasets/guides/tutorial_load_datasets/).
44 |
45 | * Good support for flowpic input representation and minimal support
46 | for 1d time series (based on network packets properties) input representation.
47 |
48 | * Data augmentation functionality for flowpic input representation.
49 |
50 | * Modeling via XGBoost, vanilla DL supervision and contrastive learning (via SimCLR or SupCon).
51 |
52 | Most of the above functionalities described relate to our __:material-file-document-outline: [IMC23 paper](/papers/imc23/)__.
53 |
54 | More exiting features including more datasets and algorithms will come in the next months.
55 |
56 | Stay tuned :wink:!
57 |
58 |
--------------------------------------------------------------------------------
/docs.material/tcbench/install.md:
--------------------------------------------------------------------------------
1 | ---
2 | icon: octicons/package-16
3 | ---
4 |
5 | # Install
6 |
7 | First prepare a python virtual environment, for example via :simple-anaconda: conda
8 | ```
9 | conda create -n tcbench python=3.10 pip
10 | conda activate tcbench
11 | ```
12 |
13 | tcbench is [availabe on pypi](https://pypi.org/project/tcbench/) so you install it via pip
14 | ```
15 | python -m pip install tcbench
16 | ```
17 |
18 | All dependecies are automatically pulled.
19 |
20 | Verify the installation was successful by running
21 | ```
22 | tcbench --version
23 | ```
24 |
25 | !!! note "Output"
26 | ```
27 | version: 0.0.21
28 | ```
29 |
30 | # Developer
31 |
32 | For developing your own projects or contributing
33 | to tcbench fork/clone the [official repository](https://github.com/tcbenchstack/tcbench)
34 | and install the developer version.
35 |
36 | ```
37 | python -m pip install .[dev]
38 | ```
39 |
40 | The only difference with respect to the base version
41 | is the installation of extra dependencies.
42 |
--------------------------------------------------------------------------------
/docs/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/.DS_Store
--------------------------------------------------------------------------------
/docs/arrow-right-solid.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/assets/_mkdocstrings.css:
--------------------------------------------------------------------------------
1 |
2 | /* Avoid breaking parameter names, etc. in table cells. */
3 | .doc-contents td code {
4 | word-break: normal !important;
5 | }
6 |
7 | /* No line break before first paragraph of descriptions. */
8 | .doc-md-description,
9 | .doc-md-description>p:first-child {
10 | display: inline;
11 | }
12 |
13 | /* Max width for docstring sections tables. */
14 | .doc .md-typeset__table,
15 | .doc .md-typeset__table table {
16 | display: table !important;
17 | width: 100%;
18 | }
19 |
20 | .doc .md-typeset__table tr {
21 | display: table-row;
22 | }
23 |
24 | /* Defaults in Spacy table style. */
25 | .doc-param-default {
26 | float: right;
27 | }
28 |
29 | /* Keep headings consistent. */
30 | h1.doc-heading,
31 | h2.doc-heading,
32 | h3.doc-heading,
33 | h4.doc-heading,
34 | h5.doc-heading,
35 | h6.doc-heading {
36 | font-weight: 400;
37 | line-height: 1.5;
38 | color: inherit;
39 | text-transform: none;
40 | }
41 |
42 | h1.doc-heading {
43 | font-size: 1.6rem;
44 | }
45 |
46 | h2.doc-heading {
47 | font-size: 1.2rem;
48 | }
49 |
50 | h3.doc-heading {
51 | font-size: 1.15rem;
52 | }
53 |
54 | h4.doc-heading {
55 | font-size: 1.10rem;
56 | }
57 |
58 | h5.doc-heading {
59 | font-size: 1.05rem;
60 | }
61 |
62 | h6.doc-heading {
63 | font-size: 1rem;
64 | }
--------------------------------------------------------------------------------
/docs/assets/images/favicon.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/assets/images/favicon.png
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.hi.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hi=function(){this.pipeline.reset(),this.pipeline.add(e.hi.trimmer,e.hi.stopWordFilter,e.hi.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.hi.stemmer))},e.hi.wordCharacters="ऀ-ःऄ-एऐ-टठ-यर-िी-ॏॐ-य़ॠ-९॰-ॿa-zA-Za-zA-Z0-90-9",e.hi.trimmer=e.trimmerSupport.generateTrimmer(e.hi.wordCharacters),e.Pipeline.registerFunction(e.hi.trimmer,"trimmer-hi"),e.hi.stopWordFilter=e.generateStopWordFilter("अत अपना अपनी अपने अभी अंदर आदि आप इत्यादि इन इनका इन्हीं इन्हें इन्हों इस इसका इसकी इसके इसमें इसी इसे उन उनका उनकी उनके उनको उन्हीं उन्हें उन्हों उस उसके उसी उसे एक एवं एस ऐसे और कई कर करता करते करना करने करें कहते कहा का काफ़ी कि कितना किन्हें किन्हों किया किर किस किसी किसे की कुछ कुल के को कोई कौन कौनसा गया घर जब जहाँ जा जितना जिन जिन्हें जिन्हों जिस जिसे जीधर जैसा जैसे जो तक तब तरह तिन तिन्हें तिन्हों तिस तिसे तो था थी थे दबारा दिया दुसरा दूसरे दो द्वारा न नके नहीं ना निहायत नीचे ने पर पहले पूरा पे फिर बनी बही बहुत बाद बाला बिलकुल भी भीतर मगर मानो मे में यदि यह यहाँ यही या यिह ये रखें रहा रहे ऱ्वासा लिए लिये लेकिन व वग़ैरह वर्ग वह वहाँ वहीं वाले वुह वे वो सकता सकते सबसे सभी साथ साबुत साभ सारा से सो संग ही हुआ हुई हुए है हैं हो होता होती होते होना होने".split(" ")),e.hi.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var r=e.wordcut;r.init(),e.hi.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(r){return isLunr2?new e.Token(r.toLowerCase()):r.toLowerCase()});var t=i.toString().toLowerCase().replace(/^\s+/,"");return r.cut(t).split("|")},e.Pipeline.registerFunction(e.hi.stemmer,"stemmer-hi"),e.Pipeline.registerFunction(e.hi.stopWordFilter,"stopWordFilter-hi")}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.hy.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hy=function(){this.pipeline.reset(),this.pipeline.add(e.hy.trimmer,e.hy.stopWordFilter)},e.hy.wordCharacters="[A-Za-z-֏ff-ﭏ]",e.hy.trimmer=e.trimmerSupport.generateTrimmer(e.hy.wordCharacters),e.Pipeline.registerFunction(e.hy.trimmer,"trimmer-hy"),e.hy.stopWordFilter=e.generateStopWordFilter("դու և եք էիր էիք հետո նաև նրանք որը վրա է որ պիտի են այս մեջ ն իր ու ի այդ որոնք այն կամ էր մի ես համար այլ իսկ էին ենք հետ ին թ էինք մենք նրա նա դուք եմ էի ըստ որպես ում".split(" ")),e.Pipeline.registerFunction(e.hy.stopWordFilter,"stopWordFilter-hy"),e.hy.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}(),e.Pipeline.registerFunction(e.hy.stemmer,"stemmer-hy")}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.ja.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.ja=function(){this.pipeline.reset(),this.pipeline.add(e.ja.trimmer,e.ja.stopWordFilter,e.ja.stemmer),r?this.tokenizer=e.ja.tokenizer:(e.tokenizer&&(e.tokenizer=e.ja.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.ja.tokenizer))};var t=new e.TinySegmenter;e.ja.tokenizer=function(i){var n,o,s,p,a,u,m,l,c,f;if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t.toLowerCase()):t.toLowerCase()});for(o=i.toString().toLowerCase().replace(/^\s+/,""),n=o.length-1;n>=0;n--)if(/\S/.test(o.charAt(n))){o=o.substring(0,n+1);break}for(a=[],s=o.length,c=0,l=0;c<=s;c++)if(u=o.charAt(c),m=c-l,u.match(/\s/)||c==s){if(m>0)for(p=t.segment(o.slice(l,c)).filter(function(e){return!!e}),f=l,n=0;n=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor++,!0}return!1},in_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e<=s&&e>=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor--,!0}return!1},out_grouping:function(t,i,s){if(this.cursors||e>3]&1<<(7&e)))return this.cursor++,!0}return!1},out_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e>s||e>3]&1<<(7&e)))return this.cursor--,!0}return!1},eq_s:function(t,i){if(this.limit-this.cursor>1),f=0,l=o0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n+_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n+_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},find_among_b:function(t,i){for(var s=0,e=i,n=this.cursor,u=this.limit_backward,o=0,h=0,c=!1;;){for(var a=s+(e-s>>1),f=0,l=o=0;m--){if(n-l==u){f=-1;break}if(f=r.charCodeAt(n-1-l)-_.s[m])break;l++}if(f<0?(e=a,h=l):(s=a,o=l),e-s<=1){if(s>0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n-_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n-_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},replace_s:function(t,i,s){var e=s.length-(i-t),n=r.substring(0,t),u=r.substring(i);return r=n+s+u,this.limit+=e,this.cursor>=i?this.cursor+=e:this.cursor>t&&(this.cursor=t),e},slice_check:function(){if(this.bra<0||this.bra>this.ket||this.ket>this.limit||this.limit>r.length)throw"faulty slice operation"},slice_from:function(r){this.slice_check(),this.replace_s(this.bra,this.ket,r)},slice_del:function(){this.slice_from("")},insert:function(r,t,i){var s=this.replace_s(r,t,i);r<=this.bra&&(this.bra+=s),r<=this.ket&&(this.ket+=s)},slice_to:function(){return this.slice_check(),r.substring(this.bra,this.ket)},eq_v_b:function(r){return this.eq_s_b(r.length,r)}}}},r.trimmerSupport={generateTrimmer:function(r){var t=new RegExp("^[^"+r+"]+"),i=new RegExp("[^"+r+"]+$");return function(r){return"function"==typeof r.update?r.update(function(r){return r.replace(t,"").replace(i,"")}):r.replace(t,"").replace(i,"")}}}}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.ta.min.js:
--------------------------------------------------------------------------------
1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ta=function(){this.pipeline.reset(),this.pipeline.add(e.ta.trimmer,e.ta.stopWordFilter,e.ta.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ta.stemmer))},e.ta.wordCharacters="-உஊ-ஏஐ-ஙச-ட-னப-யர-ஹ-ிீ-ொ-ௐ---௩௪-௯௰-௹௺-a-zA-Za-zA-Z0-90-9",e.ta.trimmer=e.trimmerSupport.generateTrimmer(e.ta.wordCharacters),e.Pipeline.registerFunction(e.ta.trimmer,"trimmer-ta"),e.ta.stopWordFilter=e.generateStopWordFilter("அங்கு அங்கே அது அதை அந்த அவர் அவர்கள் அவள் அவன் அவை ஆக ஆகவே ஆகையால் ஆதலால் ஆதலினால் ஆனாலும் ஆனால் இங்கு இங்கே இது இதை இந்த இப்படி இவர் இவர்கள் இவள் இவன் இவை இவ்வளவு உனக்கு உனது உன் உன்னால் எங்கு எங்கே எது எதை எந்த எப்படி எவர் எவர்கள் எவள் எவன் எவை எவ்வளவு எனக்கு எனது எனவே என் என்ன என்னால் ஏது ஏன் தனது தன்னால் தானே தான் நாங்கள் நாம் நான் நீ நீங்கள்".split(" ")),e.ta.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.ta.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.ta.stemmer,"stemmer-ta"),e.Pipeline.registerFunction(e.ta.stopWordFilter,"stopWordFilter-ta")}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.te.min.js:
--------------------------------------------------------------------------------
1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.te=function(){this.pipeline.reset(),this.pipeline.add(e.te.trimmer,e.te.stopWordFilter,e.te.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.te.stemmer))},e.te.wordCharacters="ఀ-ఄఅ-ఔక-హా-ౌౕ-ౖౘ-ౚౠ-ౡౢ-ౣ౦-౯౸-౿఼ఽ్ౝ౷",e.te.trimmer=e.trimmerSupport.generateTrimmer(e.te.wordCharacters),e.Pipeline.registerFunction(e.te.trimmer,"trimmer-te"),e.te.stopWordFilter=e.generateStopWordFilter("అందరూ అందుబాటులో అడగండి అడగడం అడ్డంగా అనుగుణంగా అనుమతించు అనుమతిస్తుంది అయితే ఇప్పటికే ఉన్నారు ఎక్కడైనా ఎప్పుడు ఎవరైనా ఎవరో ఏ ఏదైనా ఏమైనప్పటికి ఒక ఒకరు కనిపిస్తాయి కాదు కూడా గా గురించి చుట్టూ చేయగలిగింది తగిన తర్వాత దాదాపు దూరంగా నిజంగా పై ప్రకారం ప్రక్కన మధ్య మరియు మరొక మళ్ళీ మాత్రమే మెచ్చుకో వద్ద వెంట వేరుగా వ్యతిరేకంగా సంబంధం".split(" ")),e.te.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.te.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.te.stemmer,"stemmer-te"),e.Pipeline.registerFunction(e.te.stopWordFilter,"stopWordFilter-te")}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.th.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.th=function(){this.pipeline.reset(),this.pipeline.add(e.th.trimmer),r?this.tokenizer=e.th.tokenizer:(e.tokenizer&&(e.tokenizer=e.th.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.th.tokenizer))},e.th.wordCharacters="[-]",e.th.trimmer=e.trimmerSupport.generateTrimmer(e.th.wordCharacters),e.Pipeline.registerFunction(e.th.trimmer,"trimmer-th");var t=e.wordcut;t.init(),e.th.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t):t});var n=i.toString().replace(/^\s+/,"");return t.cut(n).split("|")}}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.vi.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.vi=function(){this.pipeline.reset(),this.pipeline.add(e.vi.stopWordFilter,e.vi.trimmer)},e.vi.wordCharacters="[A-Za-ẓ̀͐́͑̉̃̓ÂâÊêÔôĂ-ăĐ-đƠ-ơƯ-ư]",e.vi.trimmer=e.trimmerSupport.generateTrimmer(e.vi.wordCharacters),e.Pipeline.registerFunction(e.vi.trimmer,"trimmer-vi"),e.vi.stopWordFilter=e.generateStopWordFilter("là cái nhưng mà".split(" "))}});
--------------------------------------------------------------------------------
/docs/assets/javascripts/lunr/min/lunr.zh.min.js:
--------------------------------------------------------------------------------
1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r(require("@node-rs/jieba")):r()(e.lunr)}(this,function(e){return function(r,t){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var i="2"==r.version[0];r.zh=function(){this.pipeline.reset(),this.pipeline.add(r.zh.trimmer,r.zh.stopWordFilter,r.zh.stemmer),i?this.tokenizer=r.zh.tokenizer:(r.tokenizer&&(r.tokenizer=r.zh.tokenizer),this.tokenizerFn&&(this.tokenizerFn=r.zh.tokenizer))},r.zh.tokenizer=function(n){if(!arguments.length||null==n||void 0==n)return[];if(Array.isArray(n))return n.map(function(e){return i?new r.Token(e.toLowerCase()):e.toLowerCase()});t&&e.load(t);var o=n.toString().trim().toLowerCase(),s=[];e.cut(o,!0).forEach(function(e){s=s.concat(e.split(" "))}),s=s.filter(function(e){return!!e});var u=0;return s.map(function(e,t){if(i){var n=o.indexOf(e,u),s={};return s.position=[n,e.length],s.index=t,u=n,new r.Token(e,s)}return e})},r.zh.wordCharacters="\\w一-龥",r.zh.trimmer=r.trimmerSupport.generateTrimmer(r.zh.wordCharacters),r.Pipeline.registerFunction(r.zh.trimmer,"trimmer-zh"),r.zh.stemmer=function(){return function(e){return e}}(),r.Pipeline.registerFunction(r.zh.stemmer,"stemmer-zh"),r.zh.stopWordFilter=r.generateStopWordFilter("的 一 不 在 人 有 是 为 為 以 于 於 上 他 而 后 後 之 来 來 及 了 因 下 可 到 由 这 這 与 與 也 此 但 并 並 个 個 其 已 无 無 小 我 们 們 起 最 再 今 去 好 只 又 或 很 亦 某 把 那 你 乃 它 吧 被 比 别 趁 当 當 从 從 得 打 凡 儿 兒 尔 爾 该 該 各 给 給 跟 和 何 还 還 即 几 幾 既 看 据 據 距 靠 啦 另 么 麽 每 嘛 拿 哪 您 凭 憑 且 却 卻 让 讓 仍 啥 如 若 使 谁 誰 虽 雖 随 隨 同 所 她 哇 嗡 往 些 向 沿 哟 喲 用 咱 则 則 怎 曾 至 致 着 著 诸 諸 自".split(" ")),r.Pipeline.registerFunction(r.zh.stopWordFilter,"stopWordFilter-zh")}});
--------------------------------------------------------------------------------
/docs/assets/stylesheets/palette.06af60db.min.css.map:
--------------------------------------------------------------------------------
1 | {"version":3,"sources":["src/templates/assets/stylesheets/palette/_scheme.scss","../../../../src/templates/assets/stylesheets/palette.scss","src/templates/assets/stylesheets/palette/_accent.scss","src/templates/assets/stylesheets/palette/_primary.scss","src/templates/assets/stylesheets/utilities/_break.scss"],"names":[],"mappings":"AA2BA,cAGE,6BAME,sDAAA,CACA,6DAAA,CACA,+DAAA,CACA,gEAAA,CACA,mDAAA,CACA,6DAAA,CACA,+DAAA,CACA,gEAAA,CAGA,mDAAA,CACA,gDAAA,CAGA,0BAAA,CACA,mCAAA,CAGA,iCAAA,CACA,kCAAA,CACA,mCAAA,CACA,mCAAA,CACA,kCAAA,CACA,iCAAA,CACA,+CAAA,CACA,6DAAA,CACA,gEAAA,CACA,4DAAA,CACA,4DAAA,CACA,6DAAA,CAGA,6CAAA,CAGA,+CAAA,CAGA,uDAAA,CACA,6DAAA,CACA,2DAAA,CAGA,iCAAA,CAGA,yDAAA,CACA,iEAAA,CAGA,mDAAA,CACA,mDAAA,CAGA,qDAAA,CACA,uDAAA,CAGA,8DAAA,CAKA,8DAAA,CAKA,0DAAA,CAvEA,iBCeF,CD6DE,kHAEE,YC3DJ,CDkFE,yDACE,4BChFJ,CD+EE,2DACE,4BC7EJ,CD4EE,gEACE,4BC1EJ,CDyEE,2DACE,4BCvEJ,CDsEE,yDACE,4BCpEJ,CDmEE,0DACE,4BCjEJ,CDgEE,gEACE,4BC9DJ,CD6DE,0DACE,4BC3DJ,CD0DE,2OACE,4BC/CJ,CDsDA,+FAGE,iCCpDF,CACF,CC/CE,2BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD2CN,CCrDE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDkDN,CC5DE,8BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDyDN,CCnEE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDgEN,CC1EE,8BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDuEN,CCjFE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD8EN,CCxFE,kCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDqFN,CC/FE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD4FN,CCtGE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDmGN,CC7GE,6BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD0GN,CCpHE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDiHN,CC3HE,4BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCD2HN,CClIE,8BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDkIN,CCzIE,6BACE,yBAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDyIN,CChJE,8BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDgJN,CCvJE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDoJN,CEzJE,4BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsJN,CEjKE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8JN,CEzKE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsKN,CEjLE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8KN,CEzLE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsLN,CEjME,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8LN,CEzME,mCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsMN,CEjNE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8MN,CEzNE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsNN,CEjOE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8NN,CEzOE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsON,CEjPE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFiPN,CEzPE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFyPN,CEjQE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFiQN,CEzQE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFyQN,CEjRE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8QN,CEzRE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsRN,CEjSE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCAAA,CAKA,4BF0RN,CE1SE,kCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCAAA,CAKA,4BFmSN,CEpRE,sEACE,4BFuRJ,CExRE,+DACE,4BF2RJ,CE5RE,iEACE,4BF+RJ,CEhSE,gEACE,4BFmSJ,CEpSE,iEACE,4BFuSJ,CE9RA,8BACE,mDAAA,CACA,4DAAA,CACA,0DAAA,CACA,oDAAA,CACA,2DAAA,CAGA,4BF+RF,CE5RE,yCACE,+BF8RJ,CE3RI,kDAEE,0CAAA,CACA,sCAAA,CAFA,mCF+RN,CG3MI,mCD1EA,+CACE,8CFwRJ,CErRI,qDACE,8CFuRN,CElRE,iEACE,mCFoRJ,CACF,CGtNI,sCDvDA,uCACE,oCFgRJ,CACF,CEvQA,8BACE,kDAAA,CACA,4DAAA,CACA,wDAAA,CACA,oDAAA,CACA,6DAAA,CAGA,4BFwQF,CErQE,yCACE,+BFuQJ,CEpQI,kDAEE,0CAAA,CACA,sCAAA,CAFA,mCFwQN,CEjQE,yCACE,6CFmQJ,CG5NI,0CDhCA,8CACE,gDF+PJ,CACF,CGjOI,0CDvBA,iFACE,6CF2PJ,CACF,CGzPI,sCDKA,uCACE,6CFuPJ,CACF","file":"palette.css"}
--------------------------------------------------------------------------------
/docs/css/fonts.css:
--------------------------------------------------------------------------------
1 | .md-typeset code,
2 | .md-typeset kbd,
3 | .md-typeset pre {
4 | font-feature-settings: "kern", "liga";
5 | font-variant-ligatures: normal;
6 | }
7 |
8 | :root{
9 | --md-text-font:"Roboto";
10 | --md-code-font:""
11 | }
12 |
--------------------------------------------------------------------------------
/docs/css/jupyter-notebook.css:
--------------------------------------------------------------------------------
1 | .jp-RenderedHTMLCommon p {
2 | margin: 0pt;
3 | }
4 |
5 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt {
6 | display: none;
7 | }
8 |
9 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt {
10 | display: none;
11 | }
12 |
13 | .jupyter-wrapper .jp-OutputArea-output pre {
14 | border-left: solid 5px #e0e0e0;
15 | padding-left: 5pt;
16 | }
17 |
--------------------------------------------------------------------------------
/docs/css/material.css:
--------------------------------------------------------------------------------
1 | /* More space at the bottom of the page. */
2 | .md-main__inner {
3 | margin-bottom: 1.5rem;
4 | }
5 |
--------------------------------------------------------------------------------
/docs/css/mkdocstrings.css:
--------------------------------------------------------------------------------
1 | /* Indentation. */
2 | div.doc-contents:not(.first) {
3 | padding-left: 25px;
4 | border-left: 4px solid rgba(230, 230, 230);
5 | margin-bottom: 80px;
6 | }
7 |
8 | /* Avoid breaking parameters name, etc. in table cells. */
9 | td code {
10 | word-break: normal !important;
11 | }
12 |
--------------------------------------------------------------------------------
/docs/css/style.css:
--------------------------------------------------------------------------------
1 | /* Mark external links as such (also in nav) */
2 | a.external:hover::after, a.md-nav__link[href^="https:"]:hover::after {
3 | /* https://primer.style/octicons/link-external-16 */
4 | background-image: url('data:image/svg+xml,');
5 | height: 0.8em;
6 | width: 0.8em;
7 | margin-left: 0.2em;
8 | content: ' ';
9 | display: inline-block;
10 | }
11 |
12 | /* More space at the bottom of the page */
13 | .md-main__inner {
14 | margin-bottom: 1.5rem;
15 | }
16 |
--------------------------------------------------------------------------------
/docs/css/tables_style.css:
--------------------------------------------------------------------------------
1 | th, td {
2 | border: 1px solid var(--md-typeset-table-color);
3 | border-spacing: 0;
4 | border-bottom: none;
5 | border-left: none;
6 | border-top: none;
7 | }
8 |
9 | th {
10 | background:var(--md-primary-fg-color);
11 | color:white;
12 | }
13 |
14 | .md-typeset table:not([class]) th {
15 | font-weight: 200;
16 | }
17 |
18 | .md-typeset__table {
19 | line-height: 1;
20 | }
21 |
22 | .md-typeset__table table:not([class]) {
23 | font-size: .74rem;
24 | border-right: none;
25 | }
26 |
27 | .md-typeset__table table:not([class]) td,
28 | .md-typeset__table table:not([class]) th {
29 | padding: 9px;
30 | }
31 |
32 | /* light mode alternating table bg colors */
33 | .md-typeset__table tr:nth-child(2n) {
34 | background-color: #f8f8f8;
35 | }
36 |
37 | /* dark mode alternating table bg colors */
38 | [data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) {
39 | background-color: hsla(var(--md-hue),25%,25%,1)
40 | }
41 |
--------------------------------------------------------------------------------
/docs/datasets/datasets.csv:
--------------------------------------------------------------------------------
1 | Name,Classes, PDF, Data, Code, Auto-download
2 | ucdavis-icdm19,5,[pdf](https://arxiv.org/pdf/1812.09761.pdf), [data](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), [code](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), :octicons-x-12:
3 | mirage19, 20, [pdf](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-2019.html), -, :heavy_check_mark:
4 | mirage22, 9, [pdf](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html), -, :heavy_check_mark:
5 | utmobilenet21, 17, [pdf](https://ieeexplore.ieee.org/abstract/document/9490678/), [data](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), [code](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), :octicons-x-12:
6 |
--------------------------------------------------------------------------------
/docs/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log1.png
--------------------------------------------------------------------------------
/docs/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log2.png
--------------------------------------------------------------------------------
/docs/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log3.png
--------------------------------------------------------------------------------
/docs/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run1.png
--------------------------------------------------------------------------------
/docs/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run2.png
--------------------------------------------------------------------------------
/docs/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run3.png
--------------------------------------------------------------------------------
/docs/figs/dataset_properties_mirage19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_mirage19.png
--------------------------------------------------------------------------------
/docs/figs/dataset_properties_mirage22.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_mirage22.png
--------------------------------------------------------------------------------
/docs/figs/dataset_properties_ucdavis-icdm19.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_ucdavis-icdm19.png
--------------------------------------------------------------------------------
/docs/figs/dataset_properties_utmobilenet21.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_utmobilenet21.png
--------------------------------------------------------------------------------
/docs/github-mark/github-mark-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/github-mark/github-mark-white.png
--------------------------------------------------------------------------------
/docs/github-mark/github-mark-white.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/github-mark/github-mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/github-mark/github-mark.png
--------------------------------------------------------------------------------
/docs/github-mark/github-mark.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/index.md.DEPRECATED:
--------------------------------------------------------------------------------
1 | This website documents code and data artifacts related to the IMC23 submission #132 titled
2 |
3 | !!! quote ""
4 | __Contrastive Learning and Data Augmentation in Traffic Classification via a Flowpic Representation__
5 | *Replicating and Reproducing “A Few Shots Traffic Classification with mini-FlowPic Augmentations”
6 | from IMC’22*
7 |
8 | Our submission investigates the role of data
9 | augmentation by using both supervised
10 | and contrastive learning techniques
11 | across [4 datasets](datasets/install).
12 |
13 | It replicates and reproduces the following paper
14 | from the IMC22 program
15 |
16 |
17 | ```
18 | @inproceedings{10.1145/3517745.3561436,
19 | author = {Horowicz, Eyal and Shapira, Tal and Shavitt, Yuval},
20 | title = {A Few Shots Traffic Classification with Mini-FlowPic Augmentations},
21 | year = {2022},
22 | isbn = {9781450392594},
23 | publisher = {Association for Computing Machinery},
24 | address = {New York, NY, USA},
25 | url = {https://doi.org/10.1145/3517745.3561436},
26 | doi = {10.1145/3517745.3561436},
27 | booktitle = {Proceedings of the 22nd ACM Internet Measurement Conference},
28 | pages = {647–654},
29 | numpages = {8},
30 | location = {Nice, France},
31 | series = {IMC '22}
32 | }
33 | ```
34 |
35 | We adopt the same traffic representation used in :material-file-document-outline:`imc22-paper`,
36 | namely a Flowpic -- a summarization of the packet size time series of a flow by means of
37 | frequency histograms extracted from consecutive time windows of the flow --
38 | applied on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
39 |
40 | In the first part of the submission we investigate how augmentations
41 | affect classification performance -- the study considers 3 image transformations (*rotation,
42 | color jitter, horizontal flip*) and 3 time series transformations (*time shift, packet drop, change rtt*)
43 | applied to packets timestamps -- when used either in a fully supervised setting or via
44 | contrastive learning.
45 |
46 | !!! info "Key takeaways from reproducibility"
47 | 1. We can only partially reproduce the results from :material-file-document-outline:`imc22-paper` on [`ucdavis-icdm19`](datasets/#ucdavis-icdm19).
48 | Specifically, we uncover a data shift present in the dataset itself which justifies our results;
49 | yet, we cannot comment on why this was not detected in :material-file-document-outline:`imc22-paper`.
50 |
51 | 2. Simply based on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) dataset, and differently
52 | from the argumentation presented in :material-file-document-outline:`imc22-paper`,
53 | we do not find statistical significance differences across the different augmentations.
54 |
55 | 3. Contrastive learning can help to "bootstrap" a model in an unsupervised fashion, yet
56 | relying on more samples is beneficial to boost performance.
57 |
58 | Then, in the second part of the submission we replicate the
59 | analysis testing the same 6 augmentations across 3 other datasets.
60 |
61 | !!! info "Key takeaways from replicability"
62 | Using multiple datasets allow to confirm the argument of the :material-file-document-outline:`imc22-paper`, i.e.,
63 | *Change RTT* augmentation used in [`ucdavis-icdm19`](datasets/#ucdavis-icdm19)
64 | is superior to the alternative transformations presented in the paper.
65 |
66 |
67 | ## Website conventions
68 |
69 | * :material-file-document-outline:`imc22-paper` is used to the reference the replicated/reproduced paper.
70 |
71 | * WIP (Work in progress) and :construction: suggest documentation that is incomplete or not yet available.
72 |
73 | * :material-link-off: suggests a link is expected to be added but is not yet available.
74 |
--------------------------------------------------------------------------------
/docs/main.html.DEPRECATED:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block content %}
4 |
5 | {% if page.nb_url %}
6 |
7 | {% include ".icons/simple/jupyter.svg" %}
8 | {% include ".icons/material/download.svg" %}
9 |
10 | {% endif %}
11 |
12 | {{ super() }}
13 |
14 |
35 |
36 |
45 |
46 |
47 | {% endblock content %}
48 |
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_home-page.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_home-page.png
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_log1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log1.png
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_log2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log2.png
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_log3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log3.png
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_run1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run1.png
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_run2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run2.png
--------------------------------------------------------------------------------
/docs/modeling/figs/aim_run3.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run3.png
--------------------------------------------------------------------------------
/docs/objects.inv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/objects.inv
--------------------------------------------------------------------------------
/docs/overrides/arrow-right-solid.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark-white.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/overrides/github-mark/github-mark-white.png
--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark-white.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/overrides/github-mark/github-mark.png
--------------------------------------------------------------------------------
/docs/overrides/github-mark/github-mark.svg:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/docs/overrides/main.html:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block content %}
4 |
5 | {% if page.nb_url %}
6 |
7 | {% include ".icons/simple/jupyter.svg" %}
8 | {% include ".icons/material/download.svg" %}
9 |
10 | {% endif %}
11 |
12 | {{ super() }}
13 |
14 |
35 |
36 |
45 |
46 |
47 | {% endblock content %}
48 |
--------------------------------------------------------------------------------
/docs/overrides/main.html.DEPRECATED:
--------------------------------------------------------------------------------
1 | {% extends "base.html" %}
2 |
3 | {% block content %}
4 |
5 | {% if page.nb_url %}
6 |
7 | {% include ".icons/simple/jupyter.svg" %}
8 | {% include ".icons/material/download.svg" %}
9 |
10 | {% endif %}
11 |
12 | {{ super() }}
13 |
14 |
35 |
36 |
45 |
46 |
47 | {% endblock content %}
48 |
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png
--------------------------------------------------------------------------------
/docs/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png
--------------------------------------------------------------------------------
/docs/sitemap.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
--------------------------------------------------------------------------------
/docs/sitemap.xml.gz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/sitemap.xml.gz
--------------------------------------------------------------------------------
/notebooks/imc23/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 tcbenchstack team
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | # pyproject.toml
2 |
3 | [build-system]
4 | requires = ["setuptools>=61.0.0", "wheel"]
5 | build-backend = "setuptools.build_meta"
6 |
7 | [project]
8 | name = "tcbench"
9 | version = "0.0.22"
10 | description = "A ML/DL framework for Traffic Classification"
11 | readme = "README.md"
12 | authors = [{ name = "Alessandro Finamore", email = "alessandro.finamore@huawei.com" }]
13 | license = { file = "LICENSE" }
14 | classifiers = [
15 | "License :: OSI Approved :: MIT License",
16 | "Programming Language :: Python",
17 | "Programming Language :: Python :: 3",
18 | ]
19 | keywords = ["machine learning", "deep learning", "traffic classification", "time series"]
20 | dependencies = [
21 | "aim == 3.17.4",
22 | "autorank",
23 | "click",
24 | "dask <= 2023.5.1",
25 | "distributed",
26 | "numpy",
27 | "pandas==2.0.2",
28 | "pyarrow==12.0.0",
29 | "pyyaml",
30 | "requests",
31 | "rich",
32 | "rich-click",
33 | "scikit-learn",
34 | "statsmodels",
35 | "torch==2.0.1",
36 | "torchsummary",
37 | "torchvision==0.15.2",
38 | "xgboost==1.7.5"
39 | ]
40 | requires-python = ">=3.9"
41 |
42 | [project.optional-dependencies]
43 | dev = [
44 | "black",
45 | "bumpver",
46 | "click-plugins",
47 | "isort",
48 | "jupyterlab",
49 | "matplotlib",
50 | "mkdocs",
51 | "mkdocs-autorefs",
52 | "mkdocs-glightbox",
53 | "mkdocs-jupyter",
54 | "mkdocs-material",
55 | "mkdocs-material-extensions",
56 | "mkdocs-table-reader-plugin",
57 | "mkdocstrings",
58 | "mkdocstrings-python",
59 | "pip-tools",
60 | "pytest",
61 | "pytest-helpers-namespace",
62 | "seaborn"
63 | ]
64 |
65 | [project.urls]
66 | Homepage = "https://tcbenchstack.github.io/tcbench/"
67 |
68 | [project.scripts]
69 | tcbench = "tcbench.cli.main:main"
70 |
71 | [tool.bumpver]
72 | current_version = "0.0.22"
73 | version_pattern = "MAJOR.MINOR.PATCH"
74 | commit_message = "bump version {old_version} -> {new_version}"
75 | tag_message = "{new_version}"
76 | tag_scope = "default"
77 | pre_commit_hook = ""
78 | post_commit_hook = ""
79 | commit = true
80 | tag = true
81 | push = false
82 |
83 | [tool.bumpver.file_patterns]
84 | "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"']
85 | "src/tcbench/__init__.py" = ["{version}"]
86 |
--------------------------------------------------------------------------------
/src/tcbench/FIGSHARE_RESOURCES.yml:
--------------------------------------------------------------------------------
1 | imc23:
2 | notebooks:
3 | url: "https://figshare.com/ndownloader/files/42550111"
4 | md5: "224764907e634fcab3ae1e20bc58bbbf"
5 | dst_folder: "./"
6 |
7 | pytest_resources:
8 | url: "https://figshare.com/ndownloader/files/42538741"
9 | md5: "3a2482ad6359ba48be8728221e42f727"
10 | dst_folder: "./tests"
11 |
12 | ml_artifacts:
13 | url: "https://figshare.com/ndownloader/files/42538675"
14 | md5: "a4b53b2d0b95995c5f14bbf2f8489c7c"
15 | dst_folder: "notebooks/imc23"
16 |
--------------------------------------------------------------------------------
/src/tcbench/__init__.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 |
3 | __version__ = "0.0.22"
4 |
5 | DEFAULT_AIM_REPO = pathlib.Path("./aim-repo")
6 | DEFAULT_ARTIFACTS_FOLDER = pathlib.Path("./aim-repo/artifacts")
7 |
8 | DEFAULT_CAMPAIGN_AUGATLOAD_AUGMENTATIONS = (
9 | "noaug",
10 | "rotate",
11 | "horizontalflip",
12 | "colorjitter",
13 | "packetloss",
14 | "changertt",
15 | "timeshift",
16 | )
17 | DEFAULT_CAMPAIGN_AUGATLOAD_SEEDS = (12345, 42, 666)
18 | DEFAULT_CAMPAIGN_AUGATLOAD_FLOWPICDIMS = (32, 64, 1500)
19 | DEFAULT_CAMPAIGN_AUGATLOAD_PKTSERIESLEN = (10, 30)
20 |
21 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_FLOWPICDIMS = (32, 64, 1500)
22 | DEFAULT_CAMPAING_CONTRALEARNANDFINETUNE_SEEDS_CONTRALEARN = (12345, 1, 2, 3, 4)
23 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_SEEDS_FINETUNE = (12345, 1, 2, 3, 4)
24 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_AUGMENTATIONS = "changertt,timeshift"
25 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_VALID_AUGMENTATIONS = tuple([
26 | aug_name
27 | for aug_name in DEFAULT_CAMPAIGN_AUGATLOAD_AUGMENTATIONS
28 | if aug_name != "noaug"
29 | ])
30 |
31 | from tcbench.libtcdatasets.datasets_utils import (
32 | get_datasets_root_folder,
33 | get_dataset_folder,
34 | DATASETS,
35 | load_parquet,
36 | )
37 |
38 | from tcbench.modeling import (
39 | MODELING_DATASET_TYPE,
40 | MODELING_INPUT_REPR_TYPE,
41 | MODELING_METHOD_TYPE,
42 | )
43 |
--------------------------------------------------------------------------------
/src/tcbench/cli/__init__.py:
--------------------------------------------------------------------------------
1 | def get_rich_console():
2 | from rich.console import Console
3 | from rich.theme import Theme
4 | import sys
5 | import pathlib
6 |
7 | curr_module = sys.modules[__name__]
8 | folder_module = pathlib.Path(curr_module.__file__).parent
9 | return Console(theme=Theme.read(folder_module / "rich.theme"))
10 |
11 |
12 | console = get_rich_console()
13 |
--------------------------------------------------------------------------------
/src/tcbench/cli/clickutils.py:
--------------------------------------------------------------------------------
1 | import rich_click as click
2 |
3 | from typing import List, Dict, Any
4 |
5 | from tcbench import DATASETS
6 | from tcbench.modeling import MODELING_METHOD_TYPE, MODELING_INPUT_REPR_TYPE
7 |
8 |
9 | def _create_choice(enumeration):
10 | return click.Choice(list(map(lambda x: x.value, enumeration)), case_sensitive=False)
11 |
12 |
13 | def _create_choice_callback(enumeration):
14 | return lambda c, p, v: enumeration.from_str(v)
15 |
16 |
17 | CLICK_TYPE_DATASET_NAME = _create_choice(DATASETS)
18 | CLICK_CALLBACK_DATASET_NAME = _create_choice_callback(DATASETS)
19 |
20 | CLICK_TYPE_METHOD_NAME = _create_choice(MODELING_METHOD_TYPE)
21 | CLICK_CALLBACK_METHOD_NAME = _create_choice_callback(MODELING_METHOD_TYPE)
22 |
23 | CLICK_TYPE_INPUT_REPR = _create_choice(MODELING_INPUT_REPR_TYPE)
24 | CLICK_CALLBACK_INPUT_REPR = _create_choice_callback(MODELING_INPUT_REPR_TYPE)
25 |
26 | CLICK_CALLBACK_TOINT = lambda c, p, v: int(v)
27 |
28 |
29 | def compose_help_string_from_list(items:List[str]) -> str:
30 | """Compose a string from a list"""
31 | return "\[" + f'{"|".join(items)}' + "]."
32 |
33 |
34 | def convert_params_dict_to_list(params:Dict[str,Any], skip_params:List[str]=None) -> List[str]:
35 | """Convert a dictionary of parameters (name,value) pairs into a list of "-- """
36 | if skip_params is None:
37 | skip_params = set()
38 |
39 | l = []
40 | for par_name, par_value in params.items():
41 | if par_name in skip_params or par_value == False or par_value is None:
42 | continue
43 | par_name = par_name.replace("_", "-")
44 | if par_value == True:
45 | l.append(f"--{par_name}")
46 | else:
47 | l.append(f"--{par_name} {str(par_value)}")
48 |
49 | return l
50 |
51 |
52 | def help_append_choices(help_string:str, values:List[str]) -> str:
53 | """Append to an help string a styled version of a list of values"""
54 | text = "|".join([f"[bold]{text}[/bold]" for text in values])
55 | return f"{help_string} [yellow]Choices: [{text}][/yellow]"
56 |
--------------------------------------------------------------------------------
/src/tcbench/cli/command_fetchartifacts.py:
--------------------------------------------------------------------------------
1 | import rich_click as click
2 |
3 | import pathlib
4 | import shutil
5 | import tempfile
6 |
7 | from tcbench.cli import clickutils
8 | from tcbench.cli import console
9 |
10 | click.rich_click.SHOW_ARGUMENTS = True
11 | click.rich_click.USE_RICH_MARKUP = True
12 |
13 | FIGSHARE_RESOURCES_FNAME = "FIGSHARE_RESOURCES.yml"
14 |
15 | def _copy_file(src, dst):
16 | keyword = "installing"
17 | if pathlib.Path(dst).exists():
18 | keyword = "overwriting"
19 | print(f"{keyword}: {dst}")
20 | shutil.copy2(src, dst)
21 |
22 | @click.command("fetch-artifacts")
23 | @click.pass_context
24 | def fetchartifacts(ctx):
25 | """Download from figshare and install all required artifacts."""
26 | from tcbench.libtcdatasets import datasets_utils
27 | import requests
28 |
29 | check_exists = [
30 | pathlib.Path("./src/tcbench"),
31 | pathlib.Path("./tests"),
32 | pathlib.Path("./notebooks/tutorials"),
33 | pathlib.Path("./pyproject.toml"),
34 | ]
35 | if any(not folder.exists() for folder in check_exists):
36 | raise RuntimeError("Run the command from within the cloned github repository")
37 |
38 | fname = datasets_utils._get_module_folder().parent / FIGSHARE_RESOURCES_FNAME
39 | data = datasets_utils.load_yaml(fname)
40 | for primary_key in data:
41 | for secondary_key in data[primary_key]:
42 | print(f"fetching: {primary_key} / {secondary_key}")
43 |
44 | params = data[primary_key][secondary_key]
45 |
46 | url = params["url"]
47 | dst_folder = params["dst_folder"]
48 | with tempfile.TemporaryDirectory() as tmpfolder:
49 | tmpfolder = pathlib.Path(tmpfolder)
50 | try:
51 | path = datasets_utils.download_url(url, tmpfolder)
52 | except requests.exceptions.SSLError:
53 | path = datasets_utils.download_url(url, tmpfolder, verify=False)
54 |
55 | untar_folder = tmpfolder / "__untar__"
56 | datasets_utils.untar(path, untar_folder)
57 | path.unlink()
58 | shutil.copytree(untar_folder, dst_folder, copy_function=_copy_file, dirs_exist_ok=True)
59 |
--------------------------------------------------------------------------------
/src/tcbench/cli/main.py:
--------------------------------------------------------------------------------
1 | from pkg_resources import iter_entry_points
2 |
3 | import rich_click as click
4 |
5 | import tcbench
6 | from tcbench import cli
7 | from click_plugins import with_plugins
8 |
9 |
10 | @with_plugins(iter_entry_points('click_command_tree'))
11 | @click.group(invoke_without_command=True)
12 | @click.pass_context
13 | @click.option(
14 | "--version", "show_version", is_flag=True, help="Show tcbench version and exit."
15 | )
16 | def main(ctx, show_version):
17 | if show_version:
18 | import sys
19 | cli.console.print(f"version: {tcbench.__version__}")
20 | sys.exit()
21 |
22 |
23 | from tcbench.cli.command_datasets import datasets
24 | from tcbench.cli.command_singlerun import singlerun
25 | from tcbench.cli.command_campaign import campaign
26 | from tcbench.cli.command_aimrepo import aimrepo
27 | from tcbench.cli.command_fetchartifacts import fetchartifacts
28 |
29 | main.add_command(datasets)
30 | main.add_command(singlerun)
31 | main.add_command(campaign)
32 | main.add_command(aimrepo)
33 | main.add_command(fetchartifacts)
34 |
35 | if __name__ == "__main__":
36 | main()
37 |
--------------------------------------------------------------------------------
/src/tcbench/cli/rich.theme:
--------------------------------------------------------------------------------
1 | [styles]
2 | progress.description = none
3 | progress.filesize = none
4 | progress.filesize.total = none
5 | progress.download = none
6 | progress.elapsed = none
7 | progress.percentage = none
8 | progress.remaining = none
9 | progress.data.speed = none
10 | progress.spinner = none
11 | repr.ellipsis = none
12 | repr.indent = none
13 | repr.error = none
14 | repr.str = none
15 | repr.brace = none
16 | repr.comma = none
17 | repr.ipv4 = none
18 | repr.ipv6 = none
19 | repr.eui48 = none
20 | repr.eui64 = none
21 | repr.tag_start = none
22 | repr.tag_name = none
23 | repr.tag_contents = none
24 | repr.tag_end = none
25 | repr.attrib_name = none
26 | repr.attrib_equal = none
27 | repr.attrib_value = none
28 | repr.number = none
29 | repr.number_complex = none
30 | repr.bool_true = none
31 | repr.bool_false = none
32 | repr.none = none
33 | repr.url = none
34 | repr.uuid = none
35 | repr.call = none
36 | repr.path = none
37 | repr.filename = none
38 | rule.line = none
39 |
--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/__init__.py:
--------------------------------------------------------------------------------
1 | # from . import datasets_utils
2 | #
3 | # from . import ucdavis_icdm19_csv_to_parquet
4 | # from . import ucdavis_icdm19_generate_splits
5 | #
6 | # from . import utmobilenet21_csv_to_parquet
7 | # from . import utmobilenet21_generate_splits
8 | #
9 | # from . import mirage19_json_to_parquet
10 | # from . import mirage19_generate_splits
11 | #
12 | # from . import mirage22_json_to_parquet
13 | # from . import mirage22_generate_splits
14 |
--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/mirage22_json_to_parquet.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | import numpy as np
3 |
4 | import argparse
5 | import pathlib
6 | import tempfile
7 |
8 | from tcbench.libtcdatasets import mirage19_json_to_parquet
9 |
10 |
11 | def postprocess(df: pd.DataFrame) -> pd.DataFrame:
12 | """Process the loaded MIRAGE JSON by
13 | (1) adding a background class;
14 | (2) adding an "app" column with label information, and encoding it as pandas category
15 | """
16 | df = df.assign(
17 | app=np.where(
18 | df["android_name"] == df["flow_metadata_bf_label"],
19 | df["android_name"],
20 | "background",
21 | )
22 | )
23 | df = df.assign(
24 | app=np.where(
25 | df["flow_metadata_bf_activity"] == "Unknown", "background", df["app"]
26 | )
27 | )
28 | df = df.assign(
29 | app=df["app"].astype("category"),
30 | packets=df["packet_data_l4_payload_bytes"].apply(len),
31 | )
32 | return df
33 |
34 |
35 | def main(args: argparse.Namespace) -> None:
36 | if (args.input_folder / "MIRAGE-COVID-CCMA-2022").exists():
37 | args.input_folder = args.input_folder / "MIRAGE-COVID-CCMA-2022" / "Raw_JSON"
38 |
39 | df = mirage19_json_to_parquet.main(
40 | args.input_folder, save_as=None, workers=args.num_workers
41 | )
42 | df = postprocess(df)
43 |
44 | fname = args.output_folder / "mirage22.parquet"
45 | if not fname.parent.exists():
46 | fname.parent.mkdir(parents=True)
47 | print(f"saving: {fname}")
48 | df.to_parquet(fname)
49 |
50 |
51 | def cli_parser():
52 | return mirage19_json_to_parquet.cli_parser()
53 |
54 | if __name__ == "__main__":
55 | args = cli_parser().parse_args()
56 | main(args)
57 |
--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/DATASETS.yml:
--------------------------------------------------------------------------------
1 | ucdavis-icdm19:
2 | num_classes: 5
3 | paper: "https://arxiv.org/pdf/1812.09761.pdf"
4 | website: "https://github.com/shrezaei/Semi-supervised-Learning-QUIC-"
5 | data: "https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE"
6 | data_curated: "https://figshare.com/ndownloader/files/42438621"
7 | data_curated_md5: "36294e70968fe0a30a054e626cb87afe"
8 |
9 | mirage19:
10 | num_classes: 20
11 | paper: "http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf"
12 | website: "https://traffic.comics.unina.it/mirage/mirage-2019.html"
13 | data: "https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-2019_traffic_dataset_downloadable_v2.tar.gz"
14 | # data_curated: ""
15 | # data_curated_md5: ""
16 |
17 | mirage22:
18 | num_classes: 9
19 | paper: "http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf"
20 | website: "https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html"
21 | data: "https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-COVID-CCMA-2022.zip"
22 | # data_curated: ""
23 | # data_curated_md5: ""
24 |
25 | utmobilenet21:
26 | num_classes: 17
27 | paper: "https://ieeexplore.ieee.org/abstract/document/9490678/"
28 | website: "https://github.com/YuqiangHeng/UTMobileNetTraffic2021"
29 | data: "https://utexas.app.box.com/s/okrimcsz1mn9ec4j667kbb00d9gt16ii"
30 | data_curated: "https://figshare.com/ndownloader/files/42438624"
31 | data_curated_md5: "789b01c4f7dedfbb781b89e6f2dcbb1a"
32 |
--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/DATASETS_FILES_MD5.yml:
--------------------------------------------------------------------------------
1 | ucdavis-icdm19:
2 | ucdavis-icdm19.parquet: "f4333724f03a0ccaa7d87ba878148f34"
3 | imc23:
4 | test_split_human.parquet: "5a6f27a51d6dde6bb3b59d6757c00c1f"
5 | test_split_script.parquet: "93a49d51513f7b1dec0dc7ccf6f139b5"
6 | train_split_0.parquet: "98bc4a849c2f2e3abf259be26eed2f06"
7 | train_split_1.parquet: "2f7b849325c1f4d710b761d2d48a84f2"
8 | train_split_2.parquet: "1d74dc9dc389a72a0f6b29e2be3b72e3"
9 | train_split_3.parquet: "323bb4504d23d25b25ef31b8b76205f5"
10 | train_split_4.parquet: "5a0b00ed58e365551f9ef12956caa0d0"
11 |
12 | mirage19:
13 | mirage19.parquet: "aa0c4cbffc6f5dffba6718a7ab43f451"
14 | imc23:
15 | mirage19_filtered_minpkts10_splits.parquet: "12c83fb39eb61924aa411ca2d663eb94"
16 | mirage19_filtered_minpkts10.parquet: "75851ec3312751a8a3dca79a4c24e2fb"
17 |
18 | mirage22:
19 | mirage22.parquet: "4b8f5bfa528989ee857934f7611b052e"
20 | imc23:
21 | mirage22_filtered_minpkts10.parquet: "e117cbe37eba5c1235e4df787cf3b2d6"
22 | mirage22_filtered_minpkts10_splits.parquet: "a445db52fe1ec342fed7eb1d765c9825"
23 | mirage22_filtered_minpkts1000.parquet: "6312e82a0526071ab269a92d5eb745c6"
24 | mirage22_filtered_minpkts1000_splits.parquet: "21396f8a9d5033cf049407c4dc573195"
25 |
26 | #utmobilenet21:
27 | # utmobilenet21.parquet: "863e35d558c7ef9f4f5d0e552a57f3cb"
28 | # imc23:
29 | # utmobilenet21_filtered_minpkts10.parquet: "102e125e3236a1e8211bfd5e8272afdb"
30 | # utmobilenet21_filtered_minpkts10_splits.parquet: "3ea1378753f1b4e1f2773bd750e56d1b"
31 |
--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/ucdavis-icdm19.yml:
--------------------------------------------------------------------------------
1 | __all__:
2 | row_id:
3 | dtype: int
4 | description: "Unique row id"
5 | app:
6 | dtype: category
7 | description: "Label of the flow"
8 | flow_id:
9 | dtype: str
10 | description: "Original filename"
11 | partition:
12 | dtype: str
13 | description: "Partition related to the flow"
14 | num_pkts:
15 | dtype: int
16 | description: "Number of packets in the flow"
17 | duration:
18 | dtype: float
19 | description: "Duration of the flow"
20 | bytes:
21 | dtype: int
22 | description: "Number of bytes of the flow"
23 | unixtime:
24 | dtype: str
25 | description: "Absolute time of each packet"
26 | timetofirst:
27 | dtype: np.array
28 | description: "Delta between a packet the first packet of the flow"
29 | pkts_size:
30 | dtype: np.array
31 | description: "Packet size time series"
32 | pkts_dir:
33 | dtype: np.array
34 | description: "Packet direction time series"
35 | pkts_iat:
36 | dtype: np.array
37 | description: "Packet inter-arrival time series"
38 |
--------------------------------------------------------------------------------
/src/tcbench/libtcdatasets/resources/utmobilenet21.yml:
--------------------------------------------------------------------------------
1 | __unfiltered__:
2 | row_id:
3 | dtype: int
4 | description: "Unique flow id"
5 | src_ip:
6 | dtype: str
7 | description: "Source ip of the flow"
8 | src_port:
9 | dtype: int
10 | description: "Source port of the flow"
11 | dst_ip:
12 | dtype: str
13 | description: "Destination ip of the flow"
14 | dst_port:
15 | dtype: int
16 | description: "Destination port of the flow"
17 | ip_proto:
18 | dtype: int
19 | description: "Protocol of the flow (TCP or UDP)"
20 | first:
21 | dtype: float
22 | description: "Timestamp of the first packet"
23 | last:
24 | dtype: float
25 | description: "Timestamp of the last packet"
26 | duration:
27 | dtype: float
28 | description: "Duration of the flow"
29 | packets:
30 | dtype: int
31 | description: "Number of packets in the flow"
32 | bytes:
33 | dtype: int
34 | description: "Number of bytes in the flow"
35 | partition:
36 | dtype: str
37 | description: "From which folder the flow was originally stored"
38 | location:
39 | dtype: str
40 | description: "Label originally provided by the dataset (see the related paper for details)"
41 | fname:
42 | dtype: str
43 | description: "Original filename where the packets of the flow come from"
44 | app:
45 | dtype: category
46 | description: "Final label of the flow, encoded as pandas category"
47 | pkts_size:
48 | dtype: np.array
49 | description: "Packet size time series"
50 | pkts_dir:
51 | dtype: np.array
52 | description: "Packet diretion time series"
53 | timetofirst:
54 | dtype: np.array
55 | description: "Delta between the each packet timestamp the first packet of the flow"
56 |
57 | __filtered__:
58 | row_id:
59 | dtype: int
60 | description: "Unique flow id"
61 | src_ip:
62 | dtype: str
63 | description: "Source ip of the flow"
64 | src_port:
65 | dtype: int
66 | description: "Source port of the flow"
67 | dst_ip:
68 | dtype: str
69 | description: "Destination ip of the flow"
70 | dst_port:
71 | dtype: int
72 | description: "Destination port of the flow"
73 | ip_proto:
74 | dtype: int
75 | description: "Protocol of the flow (TCP or UDP)"
76 | first:
77 | dtype: float
78 | description: "Timestamp of the first packet"
79 | last:
80 | dtype: float
81 | description: "Timestamp of the last packet"
82 | duration:
83 | dtype: float
84 | description: "Duration of the flow"
85 | packets:
86 | dtype: int
87 | description: "Number of packets in the flow"
88 | bytes:
89 | dtype: int
90 | description: "Number of bytes in the flow"
91 | partition:
92 | dtype: str
93 | description: "From which folder the flow was originally stored"
94 | location:
95 | dtype: str
96 | description: "Label originally provided by the dataset (see the related paper for details)"
97 | fname:
98 | dtype: str
99 | description: "Original filename where the packets of the flow come from"
100 | app:
101 | dtype: category
102 | description: "Final label of the flow, encoded as pandas category"
103 | pkts_size:
104 | dtype: np.array
105 | description: "Packet size time series"
106 | pkts_dir:
107 | dtype: np.array
108 | description: "Packet diretion time series"
109 | timetofirst:
110 | dtype: np.array
111 | description: "Delta between the each packet timestamp the first packet of the flow"
112 |
113 | __splits__:
114 | train_indexes:
115 | dtype: np.array
116 | description: "row_id of training samples"
117 | val_indexes:
118 | dtype: np.array
119 | description: "row_id of validation samples"
120 | test_indexes:
121 | dtype: np.array
122 | description: "row_id of test samples"
123 | split_index:
124 | dtype: int
125 | description: "Split id"
126 |
--------------------------------------------------------------------------------
/src/tcbench/modeling/__init__.py:
--------------------------------------------------------------------------------
1 | from enum import Enum
2 |
3 |
4 | class MODELING_DATASET_TYPE(Enum):
5 | """An enumeration to specify which type of dataset to load"""
6 |
7 | TRAIN_VAL = "train_val_datasets"
8 | TEST = "test_dataset"
9 | TRAIN_VAL_LEFTOVER = "train_val_leftover_dataset"
10 | FINETUNING = "for_finetuning_dataset"
11 |
12 |
13 | class MODELING_INPUT_REPR_TYPE(Enum):
14 | FLOWPIC = "flowpic"
15 | PKTSERIES = "pktseries"
16 |
17 | @classmethod
18 | def from_str(cls, text):
19 | for member in cls.__members__.values():
20 | if member.value == text:
21 | return member
22 | return None
23 |
24 | def __str__(self):
25 | return self.value
26 |
27 |
28 | class MODELING_METHOD_TYPE(Enum):
29 | MONOLITHIC = "monolithic"
30 | XGBOOST = "xgboost"
31 | SIMCLR = "simclr"
32 |
33 | @classmethod
34 | def from_str(cls, text):
35 | for member in cls.__members__.values():
36 | if member.value == text:
37 | return member
38 | return None
39 |
40 | def __str__(self):
41 | return self.value
42 |
--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pathlib
3 | import hashlib
4 |
5 | from tcbench.modeling import utils
6 |
7 |
8 | def pytest_configure():
9 | pytest.DIR_RESOURCES = (pathlib.Path(__file__).parent / "resources").resolve()
10 |
11 |
12 | @pytest.helpers.register
13 | def verify_deeplearning_model(fname, reference_fname, epsilon=None):
14 | """Verifying trained model weights"""
15 | import torch
16 |
17 | net = torch.load(fname)
18 | ref_net = torch.load(reference_fname)
19 |
20 | assert len(net) == len(ref_net)
21 | assert sorted(net.keys()) == sorted(ref_net.keys())
22 |
23 | for name in net.keys():
24 | weights = net[name]
25 | ref_weights = ref_net[name]
26 | if epsilon is None:
27 | assert (weights.flatten() == ref_weights.flatten()).all()
28 | else:
29 | assert ((weights.flatten() - ref_weights.flatten()).abs() < epsilon).all()
30 |
31 |
32 | def _get_md5(fname):
33 | data = pathlib.Path(fname).read_bytes()
34 | md5 = hashlib.md5(data)
35 | return md5.hexdigest()
36 |
37 |
38 | @pytest.helpers.register
39 | def verify_md5_model(fname, reference_fname):
40 | assert _get_md5(fname) == _get_md5(reference_fname)
41 |
42 |
43 | @pytest.helpers.register
44 | def verify_reports(
45 | folder, reference_folder, with_train=True, with_val=True, with_test=True
46 | ):
47 | """Verify classification report and confusion matrixes"""
48 | import pandas as pd
49 |
50 | # note: by using folder / test*.csv automatically
51 | # skips leftover if not found
52 |
53 | def _add_file(folder, fname, fname_list):
54 | if not (folder / fname).exists():
55 | raise RuntimeError(f"missing {fname}")
56 | fname_list.append(fname)
57 |
58 | fnames = []
59 | if with_train:
60 | _add_file(folder, "train_class_rep.csv", fnames)
61 | _add_file(folder, "train_conf_mtx.csv", fnames)
62 | if with_val:
63 | _add_file(folder, "val_class_rep.csv", fnames)
64 | _add_file(folder, "val_conf_mtx.csv", fnames)
65 | if with_test:
66 | tmp = list(folder.glob("test*.csv"))
67 | assert len(tmp) != 0
68 | fnames.extend([item.name for item in tmp])
69 |
70 | if len(fnames) == 0:
71 | raise RuntimeError("empty list of files to verify")
72 |
73 | for fname in fnames:
74 | df = pd.read_csv(folder / fname)
75 | ref_df = pd.read_csv(reference_folder / fname)
76 | assert (df == ref_df).all().all()
77 |
78 |
79 | @pytest.helpers.register
80 | def match_run_hashes(folder, reference_folder, params_to_match=['seed', 'split_index', 'flowpic_dim', 'aug_name']):
81 |
82 | ref_catalog = {
83 | path.name: utils.load_yaml(path / 'params.yml')
84 | for path in reference_folder.iterdir()
85 | }
86 |
87 | pairs = []
88 | for path in folder.iterdir():
89 | curr_params = utils.load_yaml(path / 'params.yml')
90 | curr_hash = path.name
91 |
92 | curr_pair = [curr_hash, None]
93 | for ref_hash, ref_params in ref_catalog.items():
94 | tmp1 = {}
95 | tmp2 = {}
96 | for param_name in params_to_match:
97 | tmp1[param_name] = str(curr_params[param_name])
98 | tmp2[param_name] = str(ref_params[param_name])
99 |
100 | if tmp1 == tmp2:
101 | curr_pair[-1] = ref_hash
102 | del(ref_catalog[ref_hash])
103 | break
104 |
105 | pairs.append(curr_pair)
106 |
107 | return pairs
108 |
--------------------------------------------------------------------------------
/tests/test_augmentations_at_loading_xgboost.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | import pytest
4 | import pathlib
5 |
6 | import tcbench
7 | from tcbench.modeling import (
8 | utils,
9 | run_augmentations_at_loading_xgboost,
10 | MODELING_DATASET_TYPE,
11 | )
12 |
13 |
14 | @pytest.mark.parametrize(
15 | "params, expected_artifacts_folder",
16 | [
17 | (
18 | [
19 | f"--dataset {str(tcbench.DATASETS.UCDAVISICDM19)}",
20 | "--flow-representation pktseries",
21 | "--max-n-pkts 10",
22 | "--split-index 0",
23 | "--seed 12345",
24 | ],
25 | pytest.DIR_RESOURCES
26 | / pathlib.Path(
27 | "_reference_aim_run/ucdavis-icdm19/xgboost/noaugmentation-timeseries/5fa59c129a3e4aa6bb9b7640"
28 | ),
29 | ),
30 | ],
31 | )
32 | def test_main(tmp_path, params, expected_artifacts_folder):
33 | params.append(f"--artifacts-folder {tmp_path}/artifacts")
34 | params.append(f"--aim-repo {tmp_path}")
35 |
36 | parser = run_augmentations_at_loading_xgboost.cli_parser()
37 | args = parser.parse_args((" ".join(params)).split())
38 |
39 | state = run_augmentations_at_loading_xgboost.main(args)
40 |
41 | # the output folder is based on the aim run hash
42 | artifacts_folder = next((tmp_path / 'artifacts').iterdir())
43 |
44 | # verifying model files
45 | fname = f"xgb_model_split_{args.split_index}.json"
46 | # pytest.helpers.verify_md5_model(
47 | # artifacts_folder / fname, expected_artifacts_folder / fname
48 | # )
49 |
50 | pytest.helpers.verify_reports(artifacts_folder, expected_artifacts_folder)
51 |
--------------------------------------------------------------------------------
/tests/test_contrastive_learning_and_finetune.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 |
3 | import pytest
4 | import torch
5 | import pathlib
6 |
7 | import tcbench
8 | from tcbench.modeling import utils, run_contrastive_learning_and_finetune
9 |
10 |
11 | @pytest.mark.parametrize(
12 | "params, expected_artifacts_folder",
13 | [
14 | (
15 | dict(
16 | dataset_name=tcbench.DATASETS.UCDAVISICDM19,
17 | learning_rate=0.001,
18 | batch_size=32,
19 | flowpic_dim=32,
20 | split_idx=0,
21 | seed=12345,
22 | loss_temperature=0.07,
23 | with_dropout=False,
24 | projection_layer_dim=30,
25 | ),
26 | pytest.DIR_RESOURCES
27 | / pathlib.Path(
28 | "_reference_aim_run/ucdavis-icdm19/simclr-dropout-and-projection/9e2dc14286ab452f992e5c2d"
29 | ),
30 | ),
31 | ],
32 | )
33 | def test_pretrain(tmp_path, params, expected_artifacts_folder):
34 | params["artifacts_folder"] = tmp_path
35 | utils.seed_everything(params.get("seed", 12345))
36 |
37 | state = run_contrastive_learning_and_finetune.pretrain(**params)
38 |
39 | # verifying trained model weights
40 | fname = f'best_model_weights_pretrain_split_{params["split_idx"]}.pt'
41 | pytest.helpers.verify_deeplearning_model(
42 | tmp_path / fname, expected_artifacts_folder / fname
43 | )
44 |
45 |
46 |
47 | @pytest.mark.parametrize(
48 | "params, expected_artifacts_folder",
49 | [
50 | (
51 | [
52 | "--dataset ucdavis-icdm19",
53 | "--contrastive-learning-seed 12345",
54 | "--finetune-seed 12345",
55 | "--batch-size 32",
56 | "--flowpic-dim 32",
57 | "--split-index 0",
58 | "--suppress-dropout",
59 | "--projection-layer-dim 30",
60 | ],
61 | pytest.DIR_RESOURCES
62 | / pathlib.Path(
63 | "_reference_aim_run/ucdavis-icdm19/simclr-dropout-and-projection/9e2dc14286ab452f992e5c2d"
64 | ),
65 | ),
66 | ],
67 | )
68 | def test_main(tmp_path, params, expected_artifacts_folder):
69 | params.append(f"--artifacts-folder {tmp_path}/artifacts")
70 |
71 | parser = run_contrastive_learning_and_finetune.cli_parser()
72 | args = parser.parse_args(" ".join(params).split())
73 | args.method = "simclr"
74 | args.augmentations = args.augmentations.split(",")
75 |
76 | run_contrastive_learning_and_finetune.main(args)
77 |
78 | # artifacts are stored into a doubly nested folder
79 | # as /
80 | artifacts_folder = next((tmp_path / 'artifacts').iterdir())
81 |
82 | fname_models = sorted(path.name for path in artifacts_folder.glob("*.pt"))
83 | expected_fname_models = sorted(
84 | path.name for path in expected_artifacts_folder.glob("*.pt")
85 | )
86 | assert fname_models == expected_fname_models
87 |
88 | for fname in fname_models:
89 | pytest.helpers.verify_deeplearning_model(
90 | artifacts_folder / fname, expected_artifacts_folder / fname
91 | )
92 |
93 | # verifying reports
94 | # note: by using tmp_path / test*.csv automatically
95 | # skips leftover if suppressed with the command line option
96 | pytest.helpers.verify_reports(
97 | artifacts_folder,
98 | expected_artifacts_folder,
99 | with_train=False,
100 | with_val=False,
101 | with_test=True,
102 | )
103 |
--------------------------------------------------------------------------------
/tests/test_modeling_backbone.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import pathlib
3 |
4 | from tcbench.modeling import backbone
5 | from tcbench.modeling.backbone import LeNet5FlowpicIMC22_Mini
6 | from tcbench.modeling.methods import ContrastiveLearningTrainer
7 |
8 | @pytest.mark.parametrize(
9 | "net1, net2, expected",
10 | [
11 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(), True),
12 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(num_classes=5), False),
13 | (
14 | LeNet5FlowpicIMC22_Mini(),
15 | ContrastiveLearningTrainer.prepare_net_for_train(LeNet5FlowpicIMC22_Mini()),
16 | False,
17 | ),
18 | (
19 | LeNet5FlowpicIMC22_Mini(),
20 | ContrastiveLearningTrainer.init_train(LeNet5FlowpicIMC22_Mini(), None)[0],
21 | False,
22 | ),
23 | ],
24 | )
25 | def test_have_same_layers_and_types(net1, net2, expected):
26 | assert backbone.have_same_layers_and_types(net1, net2) == expected
27 |
28 |
29 | @pytest.mark.parametrize(
30 | "num_classes1, num_classes2",
31 | [
32 | (5, 5),
33 | (None, 5),
34 | (5, None),
35 | (None, None),
36 | ],
37 | )
38 | def test_have_same_layers_and_types_after_reloading_from_file(
39 | tmp_path, num_classes1, num_classes2
40 | ):
41 | net1 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes1)
42 | net1 = ContrastiveLearningTrainer.prepare_net_for_train(net1)
43 | net1.save_weights(tmp_path / "weights.pt")
44 |
45 | net2 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes2)
46 | net2, _ = ContrastiveLearningTrainer.init_train(net2, None, tmp_path / "weights.pt")
47 | assert backbone.have_same_layers_and_types(net1, net2)
48 |
49 |
50 | @pytest.mark.parametrize(
51 | "net1, net2, expected",
52 | [
53 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(), False),
54 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(num_classes=5), False),
55 | ],
56 | )
57 | def test_are_equal(net1, net2, expected):
58 | assert backbone.are_equal(net1, net2) == expected
59 |
60 |
61 | @pytest.mark.parametrize(
62 | "num_classes1, num_classes2",
63 | [
64 | (5, 5),
65 | (None, 5),
66 | (5, None),
67 | (None, None),
68 | ],
69 | )
70 | def test_are_equal_after_reloading_from_file(tmp_path, num_classes1, num_classes2):
71 | net1 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes1)
72 | net1 = ContrastiveLearningTrainer.prepare_net_for_train(net1)
73 | net1.save_weights(tmp_path / "weights.pt")
74 |
75 | net2 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes2)
76 | net2, _ = ContrastiveLearningTrainer.init_train(net2, None, tmp_path / "weights.pt")
77 | assert backbone.are_equal(net1, net2)
78 |
79 |
80 | @pytest.mark.parametrize(
81 | "net",
82 | [
83 | LeNet5FlowpicIMC22_Mini(),
84 | LeNet5FlowpicIMC22_Mini(num_classes=5),
85 | ],
86 | )
87 | def test_clone_net(net):
88 | new_net = backbone.clone_net(net)
89 | assert backbone.are_equal(net, new_net)
90 | assert id(net) != id(new_net)
91 |
--------------------------------------------------------------------------------
/tests/test_modeling_methods.py:
--------------------------------------------------------------------------------
1 | import pytest
2 |
3 | import torch
4 |
5 | from tcbench.modeling import backbone, methods
6 | from tcbench.modeling.backbone import LeNet5FlowpicIMC22_Mini
7 |
8 |
9 | @pytest.mark.parametrize(
10 | "net, optimizer_class",
11 | [
12 | (LeNet5FlowpicIMC22_Mini(), None),
13 | (LeNet5FlowpicIMC22_Mini(), torch.optim.Adam),
14 | ],
15 | )
16 | def test_simclr_init_pretrain(net, optimizer_class):
17 | net = LeNet5FlowpicIMC22_Mini()
18 |
19 | optimizer = None
20 | if optimizer_class:
21 | optimizer = optimizer_class(net.parameters(), lr=0.001)
22 |
23 | new_net1, optimizer1 = methods.ContrastiveLearningTrainer.init_train(net, optimizer)
24 | new_net2, optimizer2 = methods.SimCLRTrainer.init_pretrain(net, optimizer)
25 | # the two networks need to have the same architecture
26 | # but weights are not be the same overall because
27 | # new layers are added
28 |
29 | assert backbone.have_same_layers_and_types(new_net1, new_net2)
30 | assert id(new_net1) != id(new_net2)
31 |
32 | # compare first convolutional layer
33 | assert (list(new_net1.parameters())[0] == list(new_net2.parameters())[0]).all()
34 |
35 | # compare last linear layer weights (bias is 0)
36 | assert (list(new_net1.parameters())[-2] != list(new_net2.parameters())[-2]).any()
37 |
38 | if optimizer:
39 | assert id(optimizer1) != id(optimizer2)
40 | assert id(optimizer) != id(optimizer1)
41 | assert id(optimizer) != id(optimizer2)
42 | params1 = optimizer1.param_groups[0]["params"]
43 | params2 = optimizer2.param_groups[0]["params"]
44 | assert len(params1) == len(params2)
45 | assert (params1[0] == params2[0]).all()
46 |
47 |
48 | @pytest.mark.parametrize(
49 | "net, optimizer_class",
50 | [
51 | (LeNet5FlowpicIMC22_Mini(), None),
52 | (LeNet5FlowpicIMC22_Mini(), torch.optim.Adam),
53 | ],
54 | )
55 | def test_simclr_init_finetune(net, optimizer_class):
56 | net = LeNet5FlowpicIMC22_Mini()
57 |
58 | optimizer = None
59 | if optimizer_class:
60 | optimizer = optimizer_class(net.parameters(), lr=0.001)
61 |
62 | new_net, new_optimizer = methods.SimCLRTrainer.init_finetune(
63 | net, optimizer=optimizer, num_classes=5
64 | )
65 | assert not new_net.is_equal_to(net)
66 | assert new_net.classifier is not None
67 | if optimizer:
68 | assert len(new_optimizer.param_groups[0]["params"]) == 2
69 | for p1, p2 in zip(
70 | new_net.classifier.parameters(), new_optimizer.param_groups[0]["params"]
71 | ):
72 | assert (p1 == p2).all()
73 |
--------------------------------------------------------------------------------