├── .gitignore ├── LICENSE ├── MANIFEST.in ├── README.md ├── docs.material ├── about.md ├── artifacts.md ├── css │ ├── fonts.css │ ├── jupyter-notebook.css │ ├── material.css │ ├── mkdocstrings.css │ ├── style.css │ └── tables_style.css ├── datasets │ ├── curation_and_metadata.md │ ├── datasets.csv │ ├── datasets.md │ ├── datasets_splits.md │ ├── guides │ │ ├── index.md │ │ ├── tutorial_load_datasets.ipynb │ │ └── tutorial_load_datasets.md │ ├── import.md │ ├── index.md │ ├── install.md │ ├── install │ │ ├── index.md │ │ ├── mirage19.md │ │ ├── mirage22.md │ │ ├── ucdavis-icdm19.md │ │ └── utmobilenet21.md │ ├── metadata.md │ ├── metadata.md.DEPRECATED │ ├── samples_count │ │ ├── index.md │ │ ├── mirage19.md │ │ ├── mirage22.md │ │ ├── ucdavis-icdm19.md │ │ └── utmobilenet21.md │ ├── schemas │ │ ├── index.md │ │ ├── mirage19.md │ │ ├── mirage22.md │ │ ├── ucdavis-icdm19.md │ │ └── utmobilenet21.md │ └── tutorial_load_parquet.ipynb ├── figs │ ├── aim_log1.png │ ├── aim_log2.png │ ├── aim_log3.png │ ├── aim_run1.png │ ├── aim_run2.png │ ├── aim_run3.png │ ├── dataset_properties_mirage19.png │ ├── dataset_properties_mirage22.png │ ├── dataset_properties_ucdavis-icdm19.png │ └── dataset_properties_utmobilenet21.png ├── index.md ├── index.md.DEPRECATED ├── install.md ├── modeling │ ├── aim_repos │ │ ├── aim_webui.md │ │ ├── aimrepo_subcmd.md │ │ └── index.md │ ├── aim_repositories_content.md │ ├── campaigns.md │ ├── exploring_artifacts.md │ ├── figs │ │ ├── aim_home-page.png │ │ ├── aim_log1.png │ │ ├── aim_log2.png │ │ ├── aim_log3.png │ │ ├── aim_run1.png │ │ ├── aim_run2.png │ │ └── aim_run3.png │ ├── index.md │ ├── overview.md │ └── runs.md ├── overrides │ ├── arrow-right-solid.svg │ ├── github-mark │ │ └── github-mark.svg │ ├── home.html │ ├── home.js │ ├── main.html │ ├── main.html.DEPRECATED │ ├── tcbench.svg │ └── tcbench_logo.svg ├── papers │ ├── imc23 │ │ ├── artifacts.md │ │ ├── campaigns.md │ │ ├── index.md │ │ ├── ml_artifacts.md │ │ ├── notebooks.md │ │ ├── notebooks │ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb │ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.md │ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human_files │ │ │ │ └── figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png │ │ │ ├── figure11_dropout_impact_supervised_setting.ipynb │ │ │ ├── figure11_dropout_impact_supervised_setting.md │ │ │ ├── figure11_dropout_impact_supervised_setting_files │ │ │ │ └── figure11_dropout_impact_supervised_setting_15_1.png │ │ │ ├── figure1_flowpic_example.ipynb │ │ │ ├── figure1_flowpic_example.md │ │ │ ├── figure1_flowpic_example_files │ │ │ │ └── figure1_flowpic_example_8_0.png │ │ │ ├── figure3_confusion_matrix_supervised_setting.ipynb │ │ │ ├── figure3_confusion_matrix_supervised_setting.md │ │ │ ├── figure3_confusion_matrix_supervised_setting_files │ │ │ │ └── figure3_confusion_matrix_supervised_setting_5_0.png │ │ │ ├── figure4_ucdavis_per_class_average_flowpic.ipynb │ │ │ ├── figure4_ucdavis_per_class_average_flowpic.md │ │ │ ├── figure4_ucdavis_per_class_average_flowpic_files │ │ │ │ └── figure4_ucdavis_per_class_average_flowpic_12_1.png │ │ │ ├── figure5_ucdavis_augmentations_comparison.ipynb │ │ │ ├── figure5_ucdavis_augmentations_comparison.md │ │ │ ├── figure5_ucdavis_augmentations_comparison_files │ │ │ │ └── figure5_ucdavis_augmentations_comparison_6_1.png │ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb │ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.md │ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance_files │ │ │ │ └── figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png │ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb │ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank.md │ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank_files │ │ │ │ └── figure7_augmentations_comparison_across_datasets_average_rank_8_0.png │ │ │ ├── figure8_ucdavis_kde_on_pkts_size.ipynb │ │ │ ├── figure8_ucdavis_kde_on_pkts_size.md │ │ │ ├── figure8_ucdavis_kde_on_pkts_size_files │ │ │ │ └── figure8_ucdavis_kde_on_pkts_size_10_0.png │ │ │ ├── miscellaneous_stats.ipynb │ │ │ ├── miscellaneous_stats.md │ │ │ ├── table10_ucdavis-icdm19_tukey.ipynb │ │ │ ├── table10_ucdavis-icdm19_tukey.md │ │ │ ├── table2_datasets_properties.ipynb │ │ │ ├── table2_datasets_properties.md │ │ │ ├── table3_xgboost_baseline.ipynb │ │ │ ├── table3_xgboost_baseline.md │ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb │ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.md │ │ │ ├── table5_simclr_dropout_and_projectionlayer.ipynb │ │ │ ├── table5_simclr_dropout_and_projectionlayer.md │ │ │ ├── table6_simclr_other_augmentation_pairs.ipynb │ │ │ ├── table6_simclr_other_augmentation_pairs.md │ │ │ ├── table7_larger_trainset.ipynb │ │ │ ├── table7_larger_trainset.md │ │ │ ├── table8_augmentation-at-loading_on_other_datasets.ipynb │ │ │ ├── table8_augmentation-at-loading_on_other_datasets.md │ │ │ ├── table9_icdm_finetuning_per_class_metrics_on_human.ipynb │ │ │ └── table9_icdm_finetuning_per_class_metrics_on_human.md │ │ └── pytest.md │ └── index.md ├── quick_tour.md └── tcbench │ ├── api │ ├── overview.md │ ├── tcbench_cli_clickutils.md │ ├── tcbench_cli_command_aimrepo.md │ ├── tcbench_cli_command_campaign.md │ ├── tcbench_cli_command_datasets.md │ ├── tcbench_cli_command_singlerun.md │ ├── tcbench_cli_richutils.md │ ├── tcbench_libtcdatasets.md │ ├── tcbench_libtcdatasets_datasets_utils.md │ ├── tcbench_libtcdatasets_mirage19_json_to_parquet.md │ ├── tcbench_libtcdatasets_mirage22_json_to_parquet.md │ ├── tcbench_libtcdatasets_tcbench_mirage19_generate_splits.md │ ├── tcbench_libtcdatasets_tcbench_mirage22_generate_splits.md │ ├── tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits.md │ ├── tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits.md │ ├── tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet.md │ ├── tcbench_libtcdatasets_utmobilenet21_csv_to_parquet.md │ ├── tcbench_modeling_aimutils.md │ ├── tcbench_modeling_augmentation.md │ ├── tcbench_modeling_backbone.md │ ├── tcbench_modeling_dataprep.md │ ├── tcbench_modeling_losses.md │ ├── tcbench_modeling_methods.md │ ├── tcbench_modeling_run_augmentations_at_loading.md │ ├── tcbench_modeling_run_augmentations_at_loading_xgboost.md │ ├── tcbench_modeling_run_campaign_augmentations_at_loading.md │ ├── tcbench_modeling_run_campaign_augmentations_at_loading_xgboost.md │ ├── tcbench_modeling_run_campaign_contrastive_learning_and_finetune.md │ ├── tcbench_modeling_run_contrastive_learning_and_finetune.md │ └── tcbench_modeling_utils.md │ ├── cli_intro.md │ ├── index.md │ ├── install.md │ ├── internals.md │ └── overview.md ├── docs ├── .DS_Store ├── 404.html ├── about │ └── index.html ├── arrow-right-solid.svg ├── artifacts │ └── index.html ├── assets │ ├── _mkdocstrings.css │ ├── images │ │ └── favicon.png │ ├── javascripts │ │ ├── bundle.83f73b43.min.js │ │ ├── bundle.83f73b43.min.js.map │ │ ├── glightbox.min.js │ │ ├── lunr │ │ │ ├── min │ │ │ │ ├── lunr.ar.min.js │ │ │ │ ├── lunr.da.min.js │ │ │ │ ├── lunr.de.min.js │ │ │ │ ├── lunr.du.min.js │ │ │ │ ├── lunr.el.min.js │ │ │ │ ├── lunr.es.min.js │ │ │ │ ├── lunr.fi.min.js │ │ │ │ ├── lunr.fr.min.js │ │ │ │ ├── lunr.he.min.js │ │ │ │ ├── lunr.hi.min.js │ │ │ │ ├── lunr.hu.min.js │ │ │ │ ├── lunr.hy.min.js │ │ │ │ ├── lunr.it.min.js │ │ │ │ ├── lunr.ja.min.js │ │ │ │ ├── lunr.jp.min.js │ │ │ │ ├── lunr.kn.min.js │ │ │ │ ├── lunr.ko.min.js │ │ │ │ ├── lunr.multi.min.js │ │ │ │ ├── lunr.nl.min.js │ │ │ │ ├── lunr.no.min.js │ │ │ │ ├── lunr.pt.min.js │ │ │ │ ├── lunr.ro.min.js │ │ │ │ ├── lunr.ru.min.js │ │ │ │ ├── lunr.sa.min.js │ │ │ │ ├── lunr.stemmer.support.min.js │ │ │ │ ├── lunr.sv.min.js │ │ │ │ ├── lunr.ta.min.js │ │ │ │ ├── lunr.te.min.js │ │ │ │ ├── lunr.th.min.js │ │ │ │ ├── lunr.tr.min.js │ │ │ │ ├── lunr.vi.min.js │ │ │ │ └── lunr.zh.min.js │ │ │ ├── tinyseg.js │ │ │ └── wordcut.js │ │ └── workers │ │ │ ├── search.6ce7567c.min.js │ │ │ └── search.6ce7567c.min.js.map │ └── stylesheets │ │ ├── glightbox.min.css │ │ ├── main.0253249f.min.css │ │ ├── main.0253249f.min.css.map │ │ ├── palette.06af60db.min.css │ │ └── palette.06af60db.min.css.map ├── css │ ├── fonts.css │ ├── jupyter-notebook.css │ ├── material.css │ ├── mkdocstrings.css │ ├── style.css │ └── tables_style.css ├── datasets │ ├── curation_and_metadata │ │ └── index.html │ ├── datasets.csv │ ├── datasets │ │ └── index.html │ ├── datasets_splits │ │ └── index.html │ ├── guides │ │ ├── index.html │ │ ├── tutorial_load_datasets.ipynb │ │ └── tutorial_load_datasets │ │ │ └── index.html │ ├── import │ │ └── index.html │ ├── index.html │ ├── install │ │ ├── index.html │ │ ├── mirage19 │ │ │ └── index.html │ │ ├── mirage22 │ │ │ └── index.html │ │ ├── ucdavis-icdm19 │ │ │ └── index.html │ │ └── utmobilenet21 │ │ │ └── index.html │ ├── metadata.md.DEPRECATED │ ├── metadata │ │ └── index.html │ ├── samples_count │ │ ├── index.html │ │ ├── mirage19 │ │ │ └── index.html │ │ ├── mirage22 │ │ │ └── index.html │ │ ├── ucdavis-icdm19 │ │ │ └── index.html │ │ └── utmobilenet21 │ │ │ └── index.html │ ├── schemas │ │ ├── index.html │ │ ├── mirage19 │ │ │ └── index.html │ │ ├── mirage22 │ │ │ └── index.html │ │ ├── ucdavis-icdm19 │ │ │ └── index.html │ │ └── utmobilenet21 │ │ │ └── index.html │ └── tutorial_load_parquet.ipynb ├── figs │ ├── aim_log1.png │ ├── aim_log2.png │ ├── aim_log3.png │ ├── aim_run1.png │ ├── aim_run2.png │ ├── aim_run3.png │ ├── dataset_properties_mirage19.png │ ├── dataset_properties_mirage22.png │ ├── dataset_properties_ucdavis-icdm19.png │ └── dataset_properties_utmobilenet21.png ├── github-mark │ ├── github-mark-white.png │ ├── github-mark-white.svg │ ├── github-mark.png │ └── github-mark.svg ├── home.js ├── index.html ├── index.md.DEPRECATED ├── install │ └── index.html ├── main.html.DEPRECATED ├── modeling │ ├── aim_repos │ │ ├── aim_webui │ │ │ └── index.html │ │ ├── aimrepo_subcmd │ │ │ └── index.html │ │ └── index.html │ ├── aim_repositories_content │ │ └── index.html │ ├── campaigns │ │ └── index.html │ ├── exploring_artifacts │ │ └── index.html │ ├── figs │ │ ├── aim_home-page.png │ │ ├── aim_log1.png │ │ ├── aim_log2.png │ │ ├── aim_log3.png │ │ ├── aim_run1.png │ │ ├── aim_run2.png │ │ └── aim_run3.png │ ├── index.html │ ├── overview │ │ └── index.html │ └── runs │ │ └── index.html ├── objects.inv ├── overrides │ ├── arrow-right-solid.svg │ ├── github-mark │ │ ├── github-mark-white.png │ │ ├── github-mark-white.svg │ │ ├── github-mark.png │ │ └── github-mark.svg │ ├── home.html │ ├── home.js │ ├── main.html │ ├── main.html.DEPRECATED │ ├── tcbench.svg │ └── tcbench_logo.svg ├── papers │ ├── imc23 │ │ ├── artifacts │ │ │ └── index.html │ │ ├── campaigns │ │ │ └── index.html │ │ ├── index.html │ │ ├── ml_artifacts │ │ │ └── index.html │ │ ├── notebooks │ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb │ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human │ │ │ │ └── index.html │ │ │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human_files │ │ │ │ └── figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png │ │ │ ├── figure11_dropout_impact_supervised_setting.ipynb │ │ │ ├── figure11_dropout_impact_supervised_setting │ │ │ │ └── index.html │ │ │ ├── figure11_dropout_impact_supervised_setting_files │ │ │ │ └── figure11_dropout_impact_supervised_setting_15_1.png │ │ │ ├── figure1_flowpic_example.ipynb │ │ │ ├── figure1_flowpic_example │ │ │ │ └── index.html │ │ │ ├── figure1_flowpic_example_files │ │ │ │ └── figure1_flowpic_example_8_0.png │ │ │ ├── figure3_confusion_matrix_supervised_setting.ipynb │ │ │ ├── figure3_confusion_matrix_supervised_setting │ │ │ │ └── index.html │ │ │ ├── figure3_confusion_matrix_supervised_setting_files │ │ │ │ └── figure3_confusion_matrix_supervised_setting_5_0.png │ │ │ ├── figure4_ucdavis_per_class_average_flowpic.ipynb │ │ │ ├── figure4_ucdavis_per_class_average_flowpic │ │ │ │ └── index.html │ │ │ ├── figure4_ucdavis_per_class_average_flowpic_files │ │ │ │ └── figure4_ucdavis_per_class_average_flowpic_12_1.png │ │ │ ├── figure5_ucdavis_augmentations_comparison.ipynb │ │ │ ├── figure5_ucdavis_augmentations_comparison │ │ │ │ └── index.html │ │ │ ├── figure5_ucdavis_augmentations_comparison_files │ │ │ │ └── figure5_ucdavis_augmentations_comparison_6_1.png │ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb │ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance │ │ │ │ └── index.html │ │ │ ├── figure6_augmentations_comparison_across_datasets_critical_distance_files │ │ │ │ └── figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png │ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb │ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank │ │ │ │ └── index.html │ │ │ ├── figure7_augmentations_comparison_across_datasets_average_rank_files │ │ │ │ └── figure7_augmentations_comparison_across_datasets_average_rank_8_0.png │ │ │ ├── figure8_ucdavis_kde_on_pkts_size.ipynb │ │ │ ├── figure8_ucdavis_kde_on_pkts_size │ │ │ │ └── index.html │ │ │ ├── figure8_ucdavis_kde_on_pkts_size_files │ │ │ │ └── figure8_ucdavis_kde_on_pkts_size_10_0.png │ │ │ ├── index.html │ │ │ ├── miscellaneous_stats.ipynb │ │ │ ├── miscellaneous_stats │ │ │ │ └── index.html │ │ │ ├── table10_ucdavis-icdm19_tukey.ipynb │ │ │ ├── table10_ucdavis-icdm19_tukey │ │ │ │ └── index.html │ │ │ ├── table2_datasets_properties.ipynb │ │ │ ├── table2_datasets_properties │ │ │ │ └── index.html │ │ │ ├── table3_xgboost_baseline.ipynb │ │ │ ├── table3_xgboost_baseline │ │ │ │ └── index.html │ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb │ │ │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions │ │ │ │ └── index.html │ │ │ ├── table5_simclr_dropout_and_projectionlayer.ipynb │ │ │ ├── table5_simclr_dropout_and_projectionlayer │ │ │ │ └── index.html │ │ │ ├── table6_simclr_other_augmentation_pairs.ipynb │ │ │ ├── table6_simclr_other_augmentation_pairs │ │ │ │ └── index.html │ │ │ ├── table7_larger_trainset.ipynb │ │ │ ├── table7_larger_trainset │ │ │ │ └── index.html │ │ │ ├── table8_augmentation-at-loading_on_other_datasets.ipynb │ │ │ ├── table8_augmentation-at-loading_on_other_datasets │ │ │ │ └── index.html │ │ │ ├── table9_icdm_finetuning_per_class_metrics_on_human.ipynb │ │ │ └── table9_icdm_finetuning_per_class_metrics_on_human │ │ │ │ └── index.html │ │ └── pytest │ │ │ └── index.html │ └── index.html ├── quick_tour │ └── index.html ├── search │ └── search_index.json ├── sitemap.xml ├── sitemap.xml.gz ├── tcbench.svg ├── tcbench │ ├── api │ │ ├── overview │ │ │ └── index.html │ │ ├── tcbench_cli_clickutils │ │ │ └── index.html │ │ ├── tcbench_cli_command_aimrepo │ │ │ └── index.html │ │ ├── tcbench_cli_command_campaign │ │ │ └── index.html │ │ ├── tcbench_cli_command_datasets │ │ │ └── index.html │ │ ├── tcbench_cli_command_singlerun │ │ │ └── index.html │ │ ├── tcbench_cli_richutils │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_datasets_utils │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_mirage19_json_to_parquet │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_mirage22_json_to_parquet │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_tcbench_mirage19_generate_splits │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_tcbench_mirage22_generate_splits │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet │ │ │ └── index.html │ │ ├── tcbench_libtcdatasets_utmobilenet21_csv_to_parquet │ │ │ └── index.html │ │ ├── tcbench_modeling_aimutils │ │ │ └── index.html │ │ ├── tcbench_modeling_augmentation │ │ │ └── index.html │ │ ├── tcbench_modeling_backbone │ │ │ └── index.html │ │ ├── tcbench_modeling_dataprep │ │ │ └── index.html │ │ ├── tcbench_modeling_losses │ │ │ └── index.html │ │ ├── tcbench_modeling_methods │ │ │ └── index.html │ │ ├── tcbench_modeling_run_augmentations_at_loading │ │ │ └── index.html │ │ ├── tcbench_modeling_run_augmentations_at_loading_xgboost │ │ │ └── index.html │ │ ├── tcbench_modeling_run_campaign_augmentations_at_loading │ │ │ └── index.html │ │ ├── tcbench_modeling_run_campaign_augmentations_at_loading_xgboost │ │ │ └── index.html │ │ ├── tcbench_modeling_run_campaign_contrastive_learning_and_finetune │ │ │ └── index.html │ │ ├── tcbench_modeling_run_contrastive_learning_and_finetune │ │ │ └── index.html │ │ └── tcbench_modeling_utils │ │ │ └── index.html │ ├── cli_intro │ │ └── index.html │ ├── index.html │ ├── install │ │ └── index.html │ ├── internals │ │ └── index.html │ └── overview │ │ └── index.html └── tcbench_logo.svg ├── mkdocs.yml ├── notebooks ├── imc23 │ ├── LICENSE │ ├── figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb │ ├── figure11_dropout_impact_supervised_setting.ipynb │ ├── figure1_flowpic_example.ipynb │ ├── figure3_confusion_matrix_supervised_setting.ipynb │ ├── figure3_ucdavis_augmentations_comparison.ipynb │ ├── figure4_ucdavis_per_class_average_flowpic.ipynb │ ├── figure5_ucdavis_augmentations_comparison.ipynb │ ├── figure6_augmentations_comparison_across_datasets_critical_distance.ipynb │ ├── figure7_augmentations_comparison_across_datasets_average_rank.ipynb │ ├── figure8_ucdavis_kde_on_pkts_size.ipynb │ ├── miscellaneous_stats.ipynb │ ├── table10_ucdavis-icdm19_tukey.ipynb │ ├── table2_datasets_properties.ipynb │ ├── table3_xgboost_baseline.ipynb │ ├── table4_ucdavis-icdm19_comparing_data_augmentations_functions.ipynb │ ├── table5_simclr_dropout_and_projectionlayer.ipynb │ ├── table6_simclr_other_augmentation_pairs.ipynb │ ├── table7_larger_trainset.ipynb │ ├── table8_augmentation-at-loading_on_other_datasets.ipynb │ └── table9_icdm_finetuning_per_class_metrics_on_human.ipynb └── tutorials │ └── tutorial_load_parquet.ipynb ├── pyproject.toml ├── src └── tcbench │ ├── FIGSHARE_RESOURCES.yml │ ├── __init__.py │ ├── cli │ ├── __init__.py │ ├── clickutils.py │ ├── command_aimrepo.py │ ├── command_campaign.py │ ├── command_datasets.py │ ├── command_fetchartifacts.py │ ├── command_singlerun.py │ ├── main.py │ ├── rich.theme │ └── richutils.py │ ├── libtcdatasets │ ├── __init__.py │ ├── datasets_utils.py │ ├── mirage19_generate_splits.py │ ├── mirage19_json_to_parquet.py │ ├── mirage22_generate_splits.py │ ├── mirage22_json_to_parquet.py │ ├── resources │ │ ├── DATASETS.yml │ │ ├── DATASETS_FILES_MD5.yml │ │ ├── mirage19.yml │ │ ├── mirage22.yml │ │ ├── ucdavis-icdm19.yml │ │ └── utmobilenet21.yml │ ├── ucdavis_icdm19_csv_to_parquet.py │ ├── ucdavis_icdm19_generate_splits.py │ ├── utmobilenet21_csv_to_parquet.py │ └── utmobilenet21_generate_splits.py │ └── modeling │ ├── __init__.py │ ├── aimutils.py │ ├── augmentation.py │ ├── backbone.py │ ├── dataprep.py │ ├── losses.py │ ├── methods.py │ ├── run_augmentations_at_loading.py │ ├── run_augmentations_at_loading_xgboost.py │ ├── run_campaign_augmentations_at_loading.py │ ├── run_campaign_augmentations_at_loading_xgboost.py │ ├── run_campaign_contrastive_learning_and_finetune.py │ ├── run_contrastive_learning_and_finetune.py │ └── utils.py └── tests ├── conftest.py ├── test_augmentations_at_loading.py ├── test_augmentations_at_loading_xgboost.py ├── test_cli_command_campaign.py ├── test_cli_command_singlerun.py ├── test_contrastive_learning_and_finetune.py ├── test_libtcdatasets_datasets_utils.py ├── test_modeling_backbone.py ├── test_modeling_dataprep.py └── test_modeling_methods.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | *.swp 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # poetry 99 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 100 | # This is especially recommended for binary packages to ensure reproducibility, and is more 101 | # commonly ignored for libraries. 102 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 103 | #poetry.lock 104 | 105 | # pdm 106 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 107 | #pdm.lock 108 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 109 | # in version control. 110 | # https://pdm.fming.dev/#use-with-ide 111 | .pdm.toml 112 | 113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 114 | __pypackages__/ 115 | 116 | # Celery stuff 117 | celerybeat-schedule 118 | celerybeat.pid 119 | 120 | # SageMath parsed files 121 | *.sage.py 122 | 123 | # Environments 124 | .env 125 | .venv 126 | env/ 127 | venv/ 128 | ENV/ 129 | env.bak/ 130 | venv.bak/ 131 | 132 | # Spyder project settings 133 | .spyderproject 134 | .spyproject 135 | 136 | # Rope project settings 137 | .ropeproject 138 | 139 | # mkdocs documentation 140 | /site 141 | 142 | # mypy 143 | .mypy_cache/ 144 | .dmypy.json 145 | dmypy.json 146 | 147 | # Pyre type checker 148 | .pyre/ 149 | 150 | # pytype static type analyzer 151 | .pytype/ 152 | 153 | # Cython debug symbols 154 | cython_debug/ 155 | 156 | # PyCharm 157 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 158 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 159 | # and can be added to the global gitignore or merged into this file. For a more nuclear 160 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 161 | #.idea/ 162 | 163 | ###### 164 | # EXTRAS 165 | __ATTIC__ 166 | __TMP__ 167 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 tcbenchstack 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include requirements.txt 2 | include src/tcbench/libtcdatasets/resources/*yml 3 | include src/tcbench/cli/rich.theme 4 | include src/tcbench/FIGSHARE_RESOURCES.yml 5 | recursive-exclude .*swp 6 | recursive-exclude *.bck 7 | -------------------------------------------------------------------------------- /docs.material/about.md: -------------------------------------------------------------------------------- 1 | # The tcbench framework 2 | 3 | tcbench is a ML/DL framework specific for __Traffic Classification (TC)__ 4 | created as research project by the AI4NET team of the Huawei Technologies 5 | research center in Paris, France. 6 | 7 | !!! info "What is Traffic Classification?" 8 | 9 | Nodes within a computer network operate by exchanging 10 | information, namely *packets*, which is regulated according 11 | to standardized protocols (e.g., HTTP for the web). So to understand 12 | the network health it is required to constantly monitor 13 | this information flow and react accordingly. For instance, one 14 | might want to prioritize certain traffic (e.g., video meeting) 15 | or block it (e.g., social media in working environment). 16 | 17 | Traffic classification is the the act of labeling an exchange of packets 18 | based on the Internet application which generated it. 19 | 20 | 21 | The academic literature is ripe with methods and proposals for TC. 22 | Yet, it is scarce of code artifacts and public datasets 23 | do not offer common conventions of use. 24 | 25 | We designed tcbench with the following goals in mind: 26 | 27 | | Goal | State of the art | tcbench | 28 | |:-----|:-----------------|:--------| 29 | |__:octicons-stack-24: Data curation__ | There are a few public datasets for TC, yet no common format/schema, cleaning process, or standard train/val/test folds. | An (opinionated) curation of datasets to create easy to use parquet files with associated train/val/test fold.| 30 | |__:octicons-file-code-24: Code__ | TC literature has no reference code base for ML/DL modeling | tcbench is [:material-github: open source](https://github.com/tcbenchstack/tcbench) with an easy to use CLI based on [:fontawesome-solid-arrow-pointer: click](https://click.palletsprojects.com/en/8.1.x/)| 31 | |__:material-monitor-dashboard: Model tracking__ | Most of ML framework requires integration with cloud environments and subscription services | tcbench uses [aimstack](https://aimstack.io/) to save on local servers metrics during training which can be later explored via its web UI or aggregated in report summaries using tcbench | 32 | 33 | ## Features and roadmap 34 | 35 | tcbench is still under development, but (as suggested by its name) ultimately aims 36 | to be a reference framework for benchmarking multiple ML/DL solutions 37 | related to TC. 38 | 39 | At the current stage, tcbench offers 40 | 41 | * Integration with 4 datasets, namely `ucdavis-icdm19`, `mirage19`, `mirage22` and `utmobilenet21`. 42 | You can use these datasets and their curated version independently from tcbench. 43 | Check out the [dataset install](/tcbench/datasets/install) process and [dataset loading tutorial](/tcbench/datasets/guides/tutorial_load_datasets). 44 | 45 | * Good support for flowpic input representation and minimal support 46 | for 1d time series (based on network packets properties) input representation. 47 | 48 | * Data augmentation functionality for flowpic input representation. 49 | 50 | * Modeling via XGBoost, vanilla DL supervision and contrastive learning (via SimCLR or SupCon). 51 | 52 | Most of the above functionalities described relate to our __:material-file-document-outline: [IMC23 paper](/tcbench/papers/imc23/)__. 53 | 54 | More exiting features including more datasets and algorithms will come in the next months. 55 | 56 | Stay tuned :wink:! 57 | 58 | -------------------------------------------------------------------------------- /docs.material/artifacts.md: -------------------------------------------------------------------------------- 1 | The submission is associated to three types of artifacts 2 | 3 | * __:spider_web: Website__: This website serves as a primary source 4 | of documentation. It collects 5 | * Documentation about [datasets :simple-artifacthub:](../datasets/install). 6 | * Documentation about our modeling framework called :material-link-off:[`tcbench`](). 7 | * Guides on how to [run experiments :fontawesome-solid-flask:](/tcbench/modeling/campaigns/) via `tcbench`. 8 | 9 | * __:octicons-file-code-24: Code__: This includes 10 | * All source code related to :material-link-off:[`tcbench` :material-language-python:](). 11 | * A collection of [:simple-jupyter: Jupyter notebooks](../paper_tables_and_figures/reference) 12 | used for the tables and figures of the submission. 13 | 14 | * __:octicons-stack-24: Data__: This includes 15 | * The [datasets install, curation and split generation :material-rhombus-split-outline:](../datasets/install) used in our modeling 16 | * All [models and logs :material-file-multiple-outline:](/tcbench/modeling/exploring_artifacts/) generated through our modeling campaigns. 17 | 18 | ## :simple-figshare: Figshare material 19 | 20 | A key objective of our submission is to made available all artifacts 21 | to the research community. 22 | For instance, all code will be pushed to a :material-github: github repository, 23 | this website will be published on github pages or similar solutions, 24 | and data artifacts will be on a public cloud storage solution. 25 | 26 | Yet, due to double-blind policy, we temporarily uploaded our artifacts to a 27 | :simple-figshare: [figshare repository](https://figshare.com/collections/IMC23_artifacts_-_Replication_Contrastive_Learning_and_Data_Augmentation_in_Traffic_Classification_Using_a_Flowpic_Input_Representation/6849252). 28 | 29 | More specifically, on figshare you find the following tarball. 30 | 31 | * `website_documentation.tgz`: Well...if you are reading this page 32 | you already know the tarball contains this website :octicons-smiley-24:. 33 | 34 | * `code_artifacts_paper132.tgz`: All code developed. See 35 | * [Quick tour](../quick_tour) for `tcbench`. 36 | * [Table and figures](../paper_tables_and_figures/reference/) for jupyter notebooks. 37 | 38 | * `curated_datasets.tgz`: The preprocessed version of the datasets. 39 | Please see the datasets pages in this website. 40 | 41 | * `ml_artifacts.tgz`: All output data generated via modeling campaigns. 42 | For fine grained view, those can be explored via [AIM web UI](/tcbench/modeling/exploring_artifacts/#aim-web-ui) 43 | while results are generated via [:simple-jupyter: Jupyter notebooks](../paper_tables_and_figures/reference/). 44 | 45 | ## :material-package-variant: Unpack artifacts 46 | 47 | In the figshare folder we also provide a `unpack_scripts.tgz` 48 | tarball containing the following scripts 49 | 50 | ``` 51 | unpack_all.sh 52 | _unpack_code_artifacts_paper132.sh 53 | _unpack_curated_datasets.sh 54 | _unpack_ml_artifacts.sh 55 | ``` 56 | 57 | These are simple bash scripts to simplify the 58 | extraction and installation of all material. 59 | 60 | Use the following process 61 | 62 | 1. First of all, prepare a python virtual environment, for example via :simple-anaconda: conda 63 | ``` 64 | conda create -n tcbench python=3.10 pip 65 | conda activate tcbench 66 | ``` 67 | 68 | 2. Download all figshare tarballs in the same folder and run 69 | ``` 70 | tar -xzvf unpack_script.tgz 71 | bash ./unpack_all.sh 72 | ``` 73 | -------------------------------------------------------------------------------- /docs.material/css/fonts.css: -------------------------------------------------------------------------------- 1 | .md-typeset code, 2 | .md-typeset kbd, 3 | .md-typeset pre { 4 | font-feature-settings: "kern", "liga"; 5 | font-variant-ligatures: normal; 6 | } 7 | 8 | :root{ 9 | --md-text-font:"Roboto"; 10 | --md-code-font:"" 11 | } 12 | -------------------------------------------------------------------------------- /docs.material/css/jupyter-notebook.css: -------------------------------------------------------------------------------- 1 | .jp-RenderedHTMLCommon p { 2 | margin: 0pt; 3 | } 4 | 5 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt { 6 | display: none; 7 | } 8 | 9 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt { 10 | display: none; 11 | } 12 | 13 | .jupyter-wrapper .jp-OutputArea-output pre { 14 | border-left: solid 5px #e0e0e0; 15 | padding-left: 5pt; 16 | } 17 | -------------------------------------------------------------------------------- /docs.material/css/material.css: -------------------------------------------------------------------------------- 1 | /* More space at the bottom of the page. */ 2 | .md-main__inner { 3 | margin-bottom: 1.5rem; 4 | } 5 | -------------------------------------------------------------------------------- /docs.material/css/mkdocstrings.css: -------------------------------------------------------------------------------- 1 | /* Indentation. */ 2 | div.doc-contents:not(.first) { 3 | padding-left: 25px; 4 | border-left: 4px solid rgba(230, 230, 230); 5 | margin-bottom: 80px; 6 | } 7 | 8 | /* Avoid breaking parameters name, etc. in table cells. */ 9 | td code { 10 | word-break: normal !important; 11 | } 12 | -------------------------------------------------------------------------------- /docs.material/css/style.css: -------------------------------------------------------------------------------- 1 | /* Mark external links as such (also in nav) */ 2 | a.external:hover::after, a.md-nav__link[href^="https:"]:hover::after { 3 | /* https://primer.style/octicons/link-external-16 */ 4 | background-image: url('data:image/svg+xml,'); 5 | height: 0.8em; 6 | width: 0.8em; 7 | margin-left: 0.2em; 8 | content: ' '; 9 | display: inline-block; 10 | } 11 | 12 | /* More space at the bottom of the page */ 13 | .md-main__inner { 14 | margin-bottom: 1.5rem; 15 | } 16 | -------------------------------------------------------------------------------- /docs.material/css/tables_style.css: -------------------------------------------------------------------------------- 1 | th, td { 2 | border: 1px solid var(--md-typeset-table-color); 3 | border-spacing: 0; 4 | border-bottom: none; 5 | border-left: none; 6 | border-top: none; 7 | } 8 | 9 | th { 10 | background:var(--md-primary-fg-color); 11 | color:white; 12 | } 13 | 14 | .md-typeset table:not([class]) th { 15 | font-weight: 200; 16 | } 17 | 18 | .md-typeset__table { 19 | line-height: 1; 20 | } 21 | 22 | .md-typeset__table table:not([class]) { 23 | font-size: .74rem; 24 | border-right: none; 25 | } 26 | 27 | .md-typeset__table table:not([class]) td, 28 | .md-typeset__table table:not([class]) th { 29 | padding: 9px; 30 | } 31 | 32 | /* light mode alternating table bg colors */ 33 | .md-typeset__table tr:nth-child(2n) { 34 | background-color: #f8f8f8; 35 | } 36 | 37 | /* dark mode alternating table bg colors */ 38 | [data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) { 39 | background-color: hsla(var(--md-hue),25%,25%,1) 40 | } 41 | -------------------------------------------------------------------------------- /docs.material/datasets/datasets.csv: -------------------------------------------------------------------------------- 1 | Name,Classes, PDF, Data, Code, Auto-download 2 | ucdavis-icdm19,5,[pdf](https://arxiv.org/pdf/1812.09761.pdf), [data](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), [code](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), :octicons-x-12: 3 | mirage19, 20, [pdf](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-2019.html), -, :heavy_check_mark: 4 | mirage22, 9, [pdf](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html), -, :heavy_check_mark: 5 | utmobilenet21, 17, [pdf](https://ieeexplore.ieee.org/abstract/document/9490678/), [data](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), [code](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), :octicons-x-12: 6 | -------------------------------------------------------------------------------- /docs.material/datasets/datasets_splits.md: -------------------------------------------------------------------------------- 1 | The splits described here are specific for our submission 2 | and the aim to replicate the previous IMC22 paper. 3 | 4 | 5 | ### ucdavis-icdm19 6 | 7 | Differently from the other datasets inhere described, 8 | `ucdavis-icdm19` does NOT require any filtering/adaptation 9 | after transforming the original CSV into a monolithic parquet. 10 | 11 | The testing partition are also predefined ("human" and "script"). 12 | 13 | We need however to define splits of 100 samples per class 14 | for modeling. To do so we perform a random shuffle of 15 | the data and generate 5 non overlapping groups of 100 samples. 16 | 17 | ``` 18 | python datasets/generate_splits.py --config config.yml 19 | ``` 20 | 21 | ???+ note "output" 22 | ``` 23 | loading: datasets/ucdavis-icdm19/ucdavis-icdm19.parquet 24 | saving: datasets/ucdavis-icdm19/train_split_0.parquet 25 | saving: datasets/ucdavis-icdm19/train_split_1.parquet 26 | saving: datasets/ucdavis-icdm19/train_split_2.parquet 27 | saving: datasets/ucdavis-icdm19/train_split_3.parquet 28 | saving: datasets/ucdavis-icdm19/train_split_4.parquet 29 | loading: datasets/ucdavis-icdm19/ucdavis-icdm19.parquet 30 | saving: datasets/ucdavis-icdm19/test_split_human.parquet 31 | saving: datasets/ucdavis-icdm19/test_split_script.parquet 32 | ``` 33 | 34 | -------------------------------------------------------------------------------- /docs.material/datasets/guides/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: material/book-outline 3 | --- 4 | 5 | # Guides 6 | 7 | [:simple-jupyter: Datasets loading](/tcbench/datasets/guides/tutorial_load_datasets): A jupyter notebook 8 | showing the APIs used loading the parquet files composing a dataset. 9 | -------------------------------------------------------------------------------- /docs.material/datasets/import.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: material/cloud-download-outline 3 | title: Import 4 | --- 5 | 6 | # Import curated datasets 7 | 8 | The `datasets` command offers also the option 9 | to import a pre-computed curation of datasets. 10 | 11 | This is 12 | 13 | * To avoid spending computation. 14 | Some of the preprocessing requires ingenuity and 15 | multiprocessing/multicore architecture. 16 | 17 | * Further strength replicability (although 18 | the curation process of tcbench is deterministic). 19 | 20 | The [datasets summary table](/tcbench/datasets/#table-datasets-properties) indicates that the 21 | not all datasets have the curated data already available. 22 | This is because some datasets (namely MIRAGE) has 23 | tighter licensing. For these datasets 24 | please refer to the related installation page. 25 | 26 | ## The `import` subcommand 27 | 28 | For datasets which licensing allows to redistribute 29 | modified version, the curated data is stored 30 | in a public [:simple-figshare: figshare collection](https://figshare.com/collections/IMC23_artifacts_-_Replication_Contrastive_Learning_and_Data_Augmentation_in_Traffic_Classification_Using_a_Flowpic_Input_Representation/6849252). 31 | 32 | You can manually fetch the datasets from the collection or use 33 | automate their installation with the `datasets import` subcommand. 34 | 35 | ``` 36 | tcbench datasets import --name ucdavis-icdm19 37 | ``` 38 | 39 | !!! info Output 40 | ``` 41 | Downloading... ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━ 554.2 MB / 554.2 MB eta 0:00:00 42 | opening: /tmp/tmpb586lqhh/42438621 43 | 44 | Files installed 45 | Datasets 46 | └── ucdavis-icdm19 47 | └── 📁 preprocessed/ 48 | ├── ucdavis-icdm19.parquet 49 | ├── LICENSE 50 | └── 📁 imc23/ 51 | ├── test_split_human.parquet 52 | ├── test_split_script.parquet 53 | ├── train_split_0.parquet 54 | ├── train_split_1.parquet 55 | ├── train_split_2.parquet 56 | ├── train_split_3.parquet 57 | └── train_split_4.parquet 58 | ``` 59 | 60 | 61 | Notice that `installed` is not set. Indeed 62 | the prepared curated datasets do NOT repack 63 | the original datasets, just the preprocessed ones 64 | (see the [meta-data](/tcbench/datasets/metadata/#samples-count-reports) page). 65 | 66 | You can also import the curated data by downloading the individual 67 | archives from figshare and use the `--archive` option 68 | 69 | ``` 70 | tcbench datasets import \ 71 | --name ucdavis-icdm19 \ 72 | --archive 73 | ``` 74 | 75 | !!! warning ":simple-figshare: Figshare versioning" 76 | 77 | Figshare updates the version of a published entry for any modification 78 | to any of the elements related to the entry (including changes to 79 | description). 80 | 81 | tcbench is configured to automatically fetch the latest version of 82 | the curated datasets. But if you download them manually make 83 | sure to download the latest versions 84 | 85 | 86 | ## The `delete` subcommand 87 | 88 | You can use the `delete` subcommand to remove installed/imported datasets. 89 | 90 | For instance, continuing the example above 91 | 92 | ``` 93 | tcbench datasets delete --name ucdavis-icdm19 94 | ``` 95 | 96 | ...now `info` stats all data for `ucdavis-icdm19` has been removed 97 | 98 | ``` 99 | tcbench datasets info --name ucdavis-icdm19 100 | ``` 101 | !!! info "Output" 102 | ``` 103 | Datasets 104 | └── ucdavis-icdm19 105 | └── 🚩 classes: 5 106 | 🔗 paper_url: https://arxiv.org/pdf/1812.09761.pdf 107 | 🔗 website: https://github.com/shrezaei/Semi-supervised-Learning-QUIC- 108 | 🔗 data: https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE 109 | 🔗 curated data: https://figshare.com/ndownloader/files/42437043 110 | ➕ curated data MD5: 9828cce0c3a092ff19ed77f9e07f317c 111 | 📁 installed: None 112 | 📁 preprocessed: None 113 | 📁 data splits: None 114 | ``` 115 | -------------------------------------------------------------------------------- /docs.material/datasets/index.md: -------------------------------------------------------------------------------- 1 | # Datasets 2 | 3 | TCBench supports the following *public* traffic classification datasets 4 | 5 | ##### Table : Datasets properties 6 | | Name | Applications | Links | License | Our curation | 7 | |:----:|:------------:|:-----:|:-------:|:------------:| 8 | |[`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)|5|[:fontawesome-regular-file-pdf:](https://arxiv.org/pdf/1812.09761.pdf)[:material-package-down:](https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE)[:material-github:](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-)| [:material-creative-commons:](https://creativecommons.org/licenses/by/4.0/) | [:simple-figshare:](https://figshare.com/articles/dataset/curated_datasets_ucdavisicdm19_tgz/23538141/1) | 9 | |[`mirage19`](/tcbench/datasets/install/mirage19/)|20|[:fontawesome-regular-file-pdf:](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf)[:material-package-down:](https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-2019_traffic_dataset_downloadable_v2.tar.gz)[:material-web:](https://traffic.comics.unina.it/mirage/mirage-2019.html)| [:material-creative-commons: NC-ND](http://creativecommons.org/licenses/by-nc-nd/4.0/) | - | 10 | |[`mirage22`](/tcbench/datasets/install/mirage22/)|9|[:fontawesome-regular-file-pdf:](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf)[:material-package-down:](https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-COVID-CCMA-2022.zip)[:material-web:](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html)| [:material-creative-commons: NC-ND](http://creativecommons.org/licenses/by-nc-nd/4.0/) | - | 11 | |[`utmobilenet21`](/tcbench/datasets/install/utmobilenet21/)|17|[:fontawesome-regular-file-pdf:](https://ieeexplore.ieee.org/abstract/document/9490678/)[:material-package-down:](https://github.com/YuqiangHeng/UTMobileNetTraffic2021)[:material-github:](https://github.com/YuqiangHeng/UTMobileNetTraffic2021)| [:simple-gnu: GPLv3](https://www.gnu.org/licenses/gpl-3.0.en.html) | [:simple-figshare:](https://figshare.com/articles/dataset/curated_datasets_utmobilenet21_tgz/23648703/1) | 12 | 13 | At a glance, these datasets 14 | 15 | * Are collections of either *CSV or JSON* files. 16 | 17 | * Are reporting individual *packet level information or per-flow time series* and metrics. 18 | 19 | * May have been organized in subfolders, namely *partitions*, 20 | to reflect the related measurement campaign (see `ucdavis-icdm19`, `utmobilenet21`). 21 | 22 | * May have file names carrying semantic. 23 | 24 | * May require preprocessing to remove "background" noise, i.e., 25 | traffic unrelated to a target application (see `mirage19` and `mirage22`). 26 | 27 | * Do not have reference train/validation/test splits. 28 | 29 | In other words, these datasets need to be *curated* 30 | to be used. 31 | 32 | !!! tip "Important" 33 | 34 | The integration of these datasets in tcbench does not break 35 | the original licensing of the data nor it breaks their ownership. 36 | Rather, the integration aims at easing the access to these dataset. 37 | We thus encourage researchers and practitioners interesting in 38 | using these datasets to cite the original publications 39 | (see links in the table above). 40 | 41 | ## Terminology 42 | 43 | When describing datasets and related processing we 44 | use the following conventions: 45 | 46 | * A __partition__ is a set of samples 47 | pre-defined by the authors of the dataset. 48 | For instance, a partition can relate to a 49 | specific set of samples to use for training/test 50 | (see [`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)). 51 | 52 | * A __split__ is a set of indexes of samples 53 | that need to be used for train/validation/test. 54 | 55 | * An __unfiltered__ dataset corresponds a 56 | monolithic parquet files containing the 57 | original raw data of a dataset (no filtering 58 | is applied). 59 | 60 | * A __curated__ dataset is generated 61 | processing the unfiltered parquet 62 | to clean noise, remove small flows, etc., 63 | and each dataset have slightly different 64 | curation rules. 65 | -------------------------------------------------------------------------------- /docs.material/datasets/install/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: material/arrow-down-bold-box 3 | --- 4 | 5 | # Datasets installation 6 | 7 | Dataset installation is triggered with the `datasets install` subcommand 8 | 9 | ``` 10 | tcbench datasets install --help 11 | ``` 12 | 13 | !!! info "Output" 14 | ``` 15 | Usage: tcbench datasets install [OPTIONS] 16 | 17 | Install a dataset. 18 | 19 | ╭─ Options ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 20 | │ * --name -n [ucdavis-icdm19|utmobilenet21|mirage19|mirage22] Dataset to install. [required] │ 21 | │ --input-folder -i PATH Folder where to find pre-downloaded tarballs. │ 22 | │ --help Show this message and exit. │ 23 | ╰─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 24 | ``` 25 | 26 | The raw data of the datasets is either hosted on websites 27 | or cloud environments. The automatic download from 28 | those locations is available only for some of the datasets. 29 | 30 | | Name | Auto download | 31 | |:----:|:-------------:| 32 | |[`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19/)| :octicons-x-24: | 33 | |[`mirage19`](/tcbench/datasets/install/mirage19/)| :material-check: | 34 | |[`mirage22`](/tcbench/datasets/install/mirage22/)| :material-check: | 35 | |[`utmobilenet21`](/tcbench/datasets/install/utmobilenet21/)| :octicons-x-24: | 36 | 37 | If auto download is not possible, to install the dataset 38 | you need to manually fetch the related archives, place them 39 | in a folder, e.g., `/download`, and provide the `--input-folder` 40 | option when triggering installation. 41 | 42 | When installing a dataset, `tcbench` also 43 | shows two types of reports as formatted tables. 44 | 45 | * __Samples count__: This tables collect 46 | the number of samples (i.e., flows) 47 | available. 48 | 49 | * __Stats__: The curation process 50 | can filter out flows (e.g., based 51 | on a minum number of packets 52 | or remove classes without a minimum 53 | number of flows). As such, when 54 | installing, `tcbench` is showing 55 | general stats (mean, std, percentiles) 56 | about number of packets 57 | for each flow across classes. 58 | 59 | Please check the specific install page for each dataset for more details. 60 | 61 | 62 | ## Datasets deletion 63 | 64 | The datasets files are installed within the 65 | python environment where tcbench is installed. 66 | 67 | You can delete a dataset using the following command 68 | 69 | ``` 70 | tcbench datasets delete --name 71 | ``` 72 | 73 | -------------------------------------------------------------------------------- /docs.material/datasets/samples_count/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: octicons/number-24 3 | --- 4 | 5 | # Samples count report 6 | 7 | An important dataset property to keep an eye 8 | on when aiming for modeling is the number of 9 | samples for each class available in the datasets. 10 | 11 | You can easily recover this using the `datasets samples-count` subcommand. 12 | 13 | For instance, 14 | the following command computes the samples count for the *unfitered* 15 | version of the [`ucdavis-icdm19`](/tcbench/datasets/install/ucdavis-icdm19) dataset. 16 | 17 | ``` 18 | tcbench datasets samples-count --name ucdavis-icdm19 19 | ``` 20 | 21 | !!! note "Output" 22 | ``` 23 | unfiltered 24 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 25 | ┃ partition ┃ app ┃ samples ┃ 26 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 27 | │ pretraining │ google-doc │ 1221 │ 28 | │ │ google-drive │ 1634 │ 29 | │ │ google-music │ 592 │ 30 | │ │ google-search │ 1915 │ 31 | │ │ youtube │ 1077 │ 32 | │ │ __total__ │ 6439 │ 33 | ├─────────────────────────────┼───────────────┼─────────┤ 34 | │ retraining-human-triggered │ google-doc │ 15 │ 35 | │ │ google-drive │ 18 │ 36 | │ │ google-music │ 15 │ 37 | │ │ google-search │ 15 │ 38 | │ │ youtube │ 20 │ 39 | │ │ __total__ │ 83 │ 40 | ├─────────────────────────────┼───────────────┼─────────┤ 41 | │ retraining-script-triggered │ google-doc │ 30 │ 42 | │ │ google-drive │ 30 │ 43 | │ │ google-music │ 30 │ 44 | │ │ google-search │ 30 │ 45 | │ │ youtube │ 30 │ 46 | │ │ __total__ │ 150 │ 47 | └─────────────────────────────┴───────────────┴─────────┘ 48 | ``` 49 | 50 | While to obtain the breakdown of the first train split 51 | 52 | ``` 53 | tcbench datasets samples-count --name ucdavis-icdm19 --split 0 54 | ``` 55 | 56 | !!! note "Output" 57 | ``` 58 | filtered, split: 0 59 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 60 | ┃ app ┃ samples ┃ 61 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 62 | │ google-doc │ 100 │ 63 | │ google-drive │ 100 │ 64 | │ google-music │ 100 │ 65 | │ google-search │ 100 │ 66 | │ youtube │ 100 │ 67 | ├───────────────┼─────────┤ 68 | │ __total__ │ 500 │ 69 | └───────────────┴─────────┘ 70 | ``` 71 | 72 | ...or the `human` test split 73 | 74 | ``` 75 | tcbench datasets samples-count --name ucdavis-icdm19 --split human 76 | ``` 77 | 78 | !!! note "Output" 79 | ``` 80 | filtered, split: human 81 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 82 | ┃ app ┃ samples ┃ 83 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 84 | │ youtube │ 20 │ 85 | │ google-drive │ 18 │ 86 | │ google-doc │ 15 │ 87 | │ google-music │ 15 │ 88 | │ google-search │ 15 │ 89 | ├───────────────┼─────────┤ 90 | │ __total__ │ 83 │ 91 | └───────────────┴─────────┘ 92 | ``` 93 | -------------------------------------------------------------------------------- /docs.material/datasets/samples_count/ucdavis-icdm19.md: -------------------------------------------------------------------------------- 1 | # `ucdavis-icdm19` 2 | 3 | Below we report the samples count for each version of the dataset. 4 | 5 | !!! tip "Semantic of the splits" 6 | 7 | The split available for this datasets relate to our [:material-file-document-outline: IMC23 paper](/tcbench/papers/imc23). 8 | 9 | ### unfiltered 10 | 11 | The unfitered version contains all data before curation. 12 | 13 | ``` 14 | tcbench datasets samples-count --name ucdavis-icdm19 15 | ``` 16 | 17 | !!! note "Output" 18 | ``` 19 | unfiltered 20 | ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 21 | ┃ partition ┃ app ┃ samples ┃ 22 | ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 23 | │ pretraining │ google-doc │ 1221 │ 24 | │ │ google-drive │ 1634 │ 25 | │ │ google-music │ 592 │ 26 | │ │ google-search │ 1915 │ 27 | │ │ youtube │ 1077 │ 28 | │ │ __total__ │ 6439 │ 29 | ├─────────────────────────────┼───────────────┼─────────┤ 30 | │ retraining-human-triggered │ google-doc │ 15 │ 31 | │ │ google-drive │ 18 │ 32 | │ │ google-music │ 15 │ 33 | │ │ google-search │ 15 │ 34 | │ │ youtube │ 20 │ 35 | │ │ __total__ │ 83 │ 36 | ├─────────────────────────────┼───────────────┼─────────┤ 37 | │ retraining-script-triggered │ google-doc │ 30 │ 38 | │ │ google-drive │ 30 │ 39 | │ │ google-music │ 30 │ 40 | │ │ google-search │ 30 │ 41 | │ │ youtube │ 30 │ 42 | │ │ __total__ │ 150 │ 43 | └─────────────────────────────┴───────────────┴─────────┘ 44 | ``` 45 | 46 | 47 | ### First training split 48 | 49 | ``` 50 | tcbench datasets samples-count --name ucdavis-icdm19 --split 0 51 | ``` 52 | 53 | !!! note "Output" 54 | ``` 55 | filtered, split: 0 56 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 57 | ┃ app ┃ samples ┃ 58 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 59 | │ google-doc │ 100 │ 60 | │ google-drive │ 100 │ 61 | │ google-music │ 100 │ 62 | │ google-search │ 100 │ 63 | │ youtube │ 100 │ 64 | ├───────────────┼─────────┤ 65 | │ __total__ │ 500 │ 66 | └───────────────┴─────────┘ 67 | ``` 68 | 69 | ### `human` test split 70 | 71 | This is equivalent to the `human` partition of the unfiltered dataset. 72 | 73 | ``` 74 | tcbench datasets samples-count --name ucdavis-icdm19 --split human 75 | ``` 76 | 77 | !!! note "Output" 78 | ``` 79 | filtered, split: human 80 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 81 | ┃ app ┃ samples ┃ 82 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 83 | │ youtube │ 20 │ 84 | │ google-drive │ 18 │ 85 | │ google-doc │ 15 │ 86 | │ google-music │ 15 │ 87 | │ google-search │ 15 │ 88 | ├───────────────┼─────────┤ 89 | │ __total__ │ 83 │ 90 | └───────────────┴─────────┘ 91 | ``` 92 | 93 | ### `script` test split 94 | 95 | This is equivalent to the `script` partition of the unfiltered dataset. 96 | 97 | ``` 98 | tcbench datasets samples-count --name ucdavis-icdm19 --split script 99 | ``` 100 | 101 | !!! note "Output" 102 | ``` 103 | filtered, split: script 104 | ┏━━━━━━━━━━━━━━━┳━━━━━━━━━┓ 105 | ┃ app ┃ samples ┃ 106 | ┡━━━━━━━━━━━━━━━╇━━━━━━━━━┩ 107 | │ google-doc │ 30 │ 108 | │ google-drive │ 30 │ 109 | │ google-music │ 30 │ 110 | │ google-search │ 30 │ 111 | │ youtube │ 30 │ 112 | ├───────────────┼─────────┤ 113 | │ __total__ │ 150 │ 114 | └───────────────┴─────────┘ 115 | ``` 116 | -------------------------------------------------------------------------------- /docs.material/datasets/samples_count/utmobilenet21.md: -------------------------------------------------------------------------------- 1 | # `utmobilenet21` 2 | 3 | Below we report the samples count for each version of the dataset. 4 | 5 | !!! tip "Semantic of the splits" 6 | 7 | The split available for this datasets relate to our [:material-file-document-outline: IMC23 paper](/tcbench/papers/imc23). 8 | 9 | ### unfiltered 10 | 11 | The unfitered version contains all data before curation. 12 | 13 | ``` 14 | tcbench datasets samples-count --name utmobilenet21 15 | ``` 16 | 17 | !!! note "Output" 18 | ``` 19 | unfiltered 20 | ┏━━━━━━━━━━━━━━┳━━━━━━━━━┓ 21 | ┃ app ┃ samples ┃ 22 | ┡━━━━━━━━━━━━━━╇━━━━━━━━━┩ 23 | │ youtube │ 5591 │ 24 | │ reddit │ 4370 │ 25 | │ google-maps │ 4347 │ 26 | │ spotify │ 2550 │ 27 | │ netflix │ 2237 │ 28 | │ pinterest │ 2165 │ 29 | │ hulu │ 1839 │ 30 | │ instagram │ 1778 │ 31 | │ dropbox │ 1752 │ 32 | │ facebook │ 1654 │ 33 | │ twitter │ 1494 │ 34 | │ gmail │ 1133 │ 35 | │ pandora │ 949 │ 36 | │ messenger │ 837 │ 37 | │ google-drive │ 803 │ 38 | │ hangout │ 720 │ 39 | │ skype │ 159 │ 40 | ├──────────────┼─────────┤ 41 | │ __total__ │ 34378 │ 42 | └──────────────┴─────────┘ 43 | ``` 44 | 45 | 46 | ### First training split 47 | 48 | ``` 49 | tcbench datasets samples-count --name utmobilenet21 --split 0 50 | ``` 51 | 52 | !!! note "Output" 53 | ``` 54 | min_pkts: 10, split: 0 55 | ┏━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┓ 56 | ┃ app ┃ train_samples ┃ val_samples ┃ test_samples ┃ all_samples ┃ 57 | ┡━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━╇━━━━━━━━━━━━━┩ 58 | │ youtube │ 2021 │ 225 │ 250 │ 2496 │ 59 | │ google-maps │ 1456 │ 162 │ 180 │ 1798 │ 60 | │ hulu │ 947 │ 105 │ 117 │ 1169 │ 61 | │ reddit │ 661 │ 73 │ 82 │ 816 │ 62 | │ spotify │ 538 │ 60 │ 66 │ 664 │ 63 | │ netflix │ 391 │ 44 │ 48 │ 483 │ 64 | │ pinterest │ 353 │ 39 │ 44 │ 436 │ 65 | │ twitter │ 296 │ 33 │ 36 │ 365 │ 66 | │ instagram │ 222 │ 25 │ 27 │ 274 │ 67 | │ hangout │ 206 │ 23 │ 25 │ 254 │ 68 | │ dropbox │ 193 │ 21 │ 24 │ 238 │ 69 | │ pandora │ 162 │ 18 │ 20 │ 200 │ 70 | │ facebook │ 111 │ 12 │ 14 │ 137 │ 71 | │ google-drive │ 105 │ 12 │ 13 │ 130 │ 72 | ├──────────────┼───────────────┼─────────────┼──────────────┼─────────────┤ 73 | │ __total__ │ 7662 │ 852 │ 946 │ 9460 │ 74 | └──────────────┴───────────────┴─────────────┴──────────────┴─────────────┘ 75 | ``` 76 | -------------------------------------------------------------------------------- /docs.material/datasets/schemas/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: material/table 3 | title: Schemas 4 | --- 5 | 6 | # Datasets schemas 7 | 8 | Despite the [curation](/tcbench/datasets/curation_and_metadata/), datasets can have intrinsically 9 | different schemas. 10 | 11 | You can investigate those on the command line via 12 | the `datasets schema` sub-command. 13 | 14 | ``` 15 | tcbench datasets schema --help 16 | 17 | Usage: tcbench datasets schema [OPTIONS] 18 | 19 | Show datasets schemas 20 | 21 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 22 | │ --name -n [ucdavis-icdm19|utmobilenet21|mirage19|mirage22] Dataset to install │ 23 | │ --type -t [unfiltered|filtered|splits] Schema type (unfiltered: original raw data; filtered: │ 24 | │ curated data; splits: train/val/test splits) │ 25 | │ --help Show this message and exit. │ 26 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 27 | ``` 28 | 29 | Beside the dataset name `--name`, the selection 30 | of the schema is simplified via a single parameter `--type` 31 | which matches the parquet files as follows 32 | 33 | * `"unfiltered"` corresponds to the monolithic 34 | before any filtering (i.e., the files under `/preprocessed`) 35 | 36 | * `"filtered"` corresponds to the filtered 37 | version of the monolithic files (i.e., the files 38 | having `minpkts` in the filename). 39 | 40 | * `"splits"` corresponds to the split files 41 | (i.e., the files having `xyz_split.parquet` 42 | in the filename). 43 | 44 | 45 | -------------------------------------------------------------------------------- /docs.material/figs/aim_log1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log1.png -------------------------------------------------------------------------------- /docs.material/figs/aim_log2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log2.png -------------------------------------------------------------------------------- /docs.material/figs/aim_log3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_log3.png -------------------------------------------------------------------------------- /docs.material/figs/aim_run1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run1.png -------------------------------------------------------------------------------- /docs.material/figs/aim_run2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run2.png -------------------------------------------------------------------------------- /docs.material/figs/aim_run3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/aim_run3.png -------------------------------------------------------------------------------- /docs.material/figs/dataset_properties_mirage19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_mirage19.png -------------------------------------------------------------------------------- /docs.material/figs/dataset_properties_mirage22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_mirage22.png -------------------------------------------------------------------------------- /docs.material/figs/dataset_properties_ucdavis-icdm19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_ucdavis-icdm19.png -------------------------------------------------------------------------------- /docs.material/figs/dataset_properties_utmobilenet21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/figs/dataset_properties_utmobilenet21.png -------------------------------------------------------------------------------- /docs.material/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: TCBench 3 | template: home.html 4 | --- 5 | 6 | 7 | 8 | -------------------------------------------------------------------------------- /docs.material/index.md.DEPRECATED: -------------------------------------------------------------------------------- 1 | This website documents code and data artifacts related to the IMC23 submission #132 titled 2 | 3 | !!! quote "" 4 | __Contrastive Learning and Data Augmentation in Traffic Classification via a Flowpic Representation__ 5 | *Replicating and Reproducing “A Few Shots Traffic Classification with mini-FlowPic Augmentations” 6 | from IMC’22* 7 | 8 | Our submission investigates the role of data 9 | augmentation by using both supervised 10 | and contrastive learning techniques 11 | across [4 datasets](datasets/install). 12 | 13 | It replicates and reproduces the following paper 14 | from the IMC22 program 15 | 16 | 17 | ``` 18 | @inproceedings{10.1145/3517745.3561436, 19 | author = {Horowicz, Eyal and Shapira, Tal and Shavitt, Yuval}, 20 | title = {A Few Shots Traffic Classification with Mini-FlowPic Augmentations}, 21 | year = {2022}, 22 | isbn = {9781450392594}, 23 | publisher = {Association for Computing Machinery}, 24 | address = {New York, NY, USA}, 25 | url = {https://doi.org/10.1145/3517745.3561436}, 26 | doi = {10.1145/3517745.3561436}, 27 | booktitle = {Proceedings of the 22nd ACM Internet Measurement Conference}, 28 | pages = {647–654}, 29 | numpages = {8}, 30 | location = {Nice, France}, 31 | series = {IMC '22} 32 | } 33 | ``` 34 | 35 | We adopt the same traffic representation used in :material-file-document-outline:`imc22-paper`, 36 | namely a Flowpic -- a summarization of the packet size time series of a flow by means of 37 | frequency histograms extracted from consecutive time windows of the flow -- 38 | applied on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19). 39 | 40 | In the first part of the submission we investigate how augmentations 41 | affect classification performance -- the study considers 3 image transformations (*rotation, 42 | color jitter, horizontal flip*) and 3 time series transformations (*time shift, packet drop, change rtt*) 43 | applied to packets timestamps -- when used either in a fully supervised setting or via 44 | contrastive learning. 45 | 46 | !!! info "Key takeaways from reproducibility" 47 | 1. We can only partially reproduce the results from :material-file-document-outline:`imc22-paper` on [`ucdavis-icdm19`](datasets/#ucdavis-icdm19). 48 | Specifically, we uncover a data shift present in the dataset itself which justifies our results; 49 | yet, we cannot comment on why this was not detected in :material-file-document-outline:`imc22-paper`. 50 | 51 | 2. Simply based on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) dataset, and differently 52 | from the argumentation presented in :material-file-document-outline:`imc22-paper`, 53 | we do not find statistical significance differences across the different augmentations. 54 | 55 | 3. Contrastive learning can help to "bootstrap" a model in an unsupervised fashion, yet 56 | relying on more samples is beneficial to boost performance. 57 | 58 | Then, in the second part of the submission we replicate the 59 | analysis testing the same 6 augmentations across 3 other datasets. 60 | 61 | !!! info "Key takeaways from replicability" 62 | Using multiple datasets allow to confirm the argument of the :material-file-document-outline:`imc22-paper`, i.e., 63 | *Change RTT* augmentation used in [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) 64 | is superior to the alternative transformations presented in the paper. 65 | 66 | 67 | ## Website conventions 68 | 69 | * :material-file-document-outline:`imc22-paper` is used to the reference the replicated/reproduced paper. 70 | 71 | * WIP (Work in progress) and :construction: suggest documentation that is incomplete or not yet available. 72 | 73 | * :material-link-off: suggests a link is expected to be added but is not yet available. 74 | -------------------------------------------------------------------------------- /docs.material/install.md: -------------------------------------------------------------------------------- 1 | # Install and config 2 | 3 | ## Download code and artifacts 4 | 5 | If you see this documentation it means 6 | you downloaded the file from figshare so you already have the code 7 | in your hand :) 8 | 9 | !!! note 10 | It is our intent to push all the code into a proper repository 11 | 12 | 13 | ## Configure a python environment :material-language-python: 14 | 15 | We first create a `conda` environment to install 16 | all required dependencies 17 | 18 | ``` 19 | conda create -n replicating-imc22-flowpic python=3.10 pip 20 | conda activate replicating-imc22-flowpic 21 | python -m pip install -r ./requirements.txt 22 | ``` 23 | 24 | The code artifacts are also a python package 25 | that can be installed inside the environment. 26 | From inside `/replicate_imc22_flowpic` run 27 | 28 | ``` 29 | python -m pip install . 30 | ``` 31 | -------------------------------------------------------------------------------- /docs.material/modeling/aim_repos/aim_webui.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: AIM Web UI 3 | icon: material/monitor-dashboard 4 | --- 5 | 6 | # AIM Web UI 7 | 8 | AIM web interface is quite intuitive and 9 | the official documentation already provides 10 | a [general purpose tutorial](https://aimstack.readthedocs.io/en/latest/ui/overview.html). 11 | 12 | In this mini guide we limit to showcase a basic set 13 | of operations to navigate the ML artifacts using 14 | some artifacts from our [IMC23](/tcbench/papers/imc23) paper. 15 | 16 | To replicate the following, make sure you [installed 17 | the needed artifacts](/tcbench/papers/imc23/artifacts/#downloading-artifacts). 18 | 19 | ``` 20 | aim up --repo notebooks/imc23/campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/ 21 | ``` 22 | 23 | !!! info "Output" 24 | ``` 25 | Running Aim UI on repo `` 26 | Open http://127.0.0.1:43800 27 | Press Ctrl+C to exit 28 | ``` 29 | 30 | Run `aim up --help` for more options (e.g., specifying a different port or hostname). 31 | 32 | When visiting the URL reported in the output 33 | you land on the home page of the AIM repository. 34 | 35 | This collects a variety of aggregate metrics 36 | and track activity over time. 37 | Hence, in our scenario 38 | the home page of the ML artifacts are mostly empty 39 | because all campaigns were generated in a specific moment in time. 40 | 41 | [![aim-home-page]][aim-home-page] 42 | 43 | [aim-home-page]: ../../figs/aim_home-page.png 44 | 45 | The left side bar allows switch the view. 46 | In particular, "Runs" show a tabular 47 | view of the runs collected in the repository. 48 | 49 | [![aim-run1]][aim-run1] 50 | 51 | [aim-run1]: ../../figs/aim_run1.png 52 | 53 | From the view you can see the hash of each run 54 | and scrolling horizontally you can glance 55 | over the metadata stored for each run. 56 | 57 | [![aim-run2]][aim-run2] 58 | 59 | [aim-run2]: ../../figs/aim_run2.png 60 | 61 | The search bar on the top of the page 62 | allows to filter runs. 63 | It accept python expression bounded 64 | to a `run` entry point. 65 | 66 | For instance, in the following example we filter 67 | one specific run based on hyper parameters. 68 | 69 | [![aim-run3]][aim-run3] 70 | 71 | [aim-run3]: ../../figs/aim_run3.png 72 | 73 | 74 | !!! tip "Using the search box" 75 | 76 | The search box accept python expressions and `run.hparams` 77 | is a dictionary of key-value pairs related to the different runs. 78 | 79 | As from the example, you can use the traditional python 80 | syntax of `dict[] == ` to filter, but the search 81 | box supports also a dot-notated syntax `hparams. == ` 82 | which has an autocomplete. 83 | 84 | In the example, the search is based on equality but any other 85 | python operation is allowed. 86 | 87 | When clicking the hash of a run (e.g., the one we filtered) 88 | we switch to a per-run view which 89 | further details the collected metadata of the selected run. 90 | 91 | [![aim-log1]][aim-log1] 92 | 93 | [aim-log1]: ../../figs/aim_log1.png 94 | 95 | For instance, when scrolling at 96 | the bottom of the per-run page 97 | we can see that AIM details 98 | 99 | * The specific git commit used when executing the run. 100 | 101 | * The specific python packages and related versions 102 | available in the environment when executing the run. 103 | 104 | Both are automatically tracked by AIM with 105 | no extra code required (beside activating the 106 | their collection when creating the run). 107 | 108 | [![aim-log2]][aim-log2] 109 | 110 | [aim-log2]: ../../figs/aim_log2.png 111 | 112 | The per-run view offers a variety of information 113 | organized in multiple tabs. 114 | 115 | For instance, the tab "Logs" 116 | details the console output. 117 | 118 | [![aim-log3]][aim-log3] 119 | 120 | [aim-log3]: ../../figs/aim_log3.png 121 | 122 | -------------------------------------------------------------------------------- /docs.material/modeling/aim_repos/index.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: Explore AIM repos 3 | icon: simple/awsorganizations 4 | --- 5 | 6 | An AIM repository is merely a folder 7 | where AIM stores a [rocksdb database](https://rocksdb.org/docs/getting-started.html) 8 | (see [AIM reference doc](https://aimstack.readthedocs.io/en/v3.17.5/understanding/data_storage.html) for more info). 9 | 10 | AIM has great functionality for tracking metrics 11 | but has very little support for 12 | tracking general artifacts outside 13 | console output and nor has native support for storing trained models 14 | files. 15 | 16 | Hence tcbench complement AIM by collecting 17 | runs artifacts into run-specific folders. 18 | 19 | Specifically, a tcbench repository has the following structure 20 | 21 | ``` 22 | 23 | ├── .aim 24 | ├── artifacts 25 | │   ├── 001baa39ed8d4b8bb9966e94 26 | │   ├── 025830cb840b4f3f8f0a1625 27 | │   ├── 050bae064b5246f88e821a29 28 | ... 29 | └── campaign_summary 30 | └── 31 | ``` 32 | 33 | * Each subfolder of `/artifacts` collects 34 | the artificats of a specific run and 35 | is named with the hash of the run itself. 36 | 37 | * The `/campaign_summary` subfolder collects 38 | reports generated by the [`aimrepo report` subcommand](/tcbench/modeling/aim_repos/aimrepo_subcmd/). 39 | 40 | Investigating the content of one run artifact folder 41 | 42 | ``` 43 | ls -1 /artifacts/001baa39ed8d4b8bb9966e94 44 | ``` 45 | 46 | !!! note "Output" 47 | 48 | ``` 49 | log.txt 50 | params.yml 51 | test-human_class_rep.csv 52 | test-human_conf_mtx.csv 53 | test-script_class_rep.csv 54 | test-script_conf_mtx.csv 55 | test-train-val-leftover_class_rep.csv 56 | test-train-val-leftover_conf_mtx.csv 57 | train_class_rep.csv 58 | train_conf_mtx.csv 59 | val_class_rep.csv 60 | val_conf_mtx.csv 61 | best_model_weights_split_2.pt 62 | ``` 63 | 64 | For each run tcbench creates the following artifacts: 65 | 66 | * `params.yml` is a YAML file collecting 67 | parameters used when triggering a run, i.e., both 68 | the arguments explicitly defined on the command line, 69 | as well the ones with default values. 70 | 71 | * `log.txt` collects the console output generated by the run. 72 | 73 | * `_class_rep.csv` contains a [classification report](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.classification_report.html). The filename is bounded to the context (i.e., train, val, test) 74 | used to generate it. 75 | 76 | * `_conf_mtx.csv` contains [confusion matrix](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.confusion_matrix.html). The filename is bounded to the context (i.e., train, val, test) 77 | used to generate it. 78 | 79 | * `best_model_weights_split_.pt` stores the weights of the best 80 | trained pytorch model (for a deep learning model). The filename is bounded to the specific 81 | split index configured when triggering the run. 82 | 83 | * `xgb_model_split_.json` stores an XGBoost model (when training 84 | via xgboost). The filename is bounded to the specific 85 | split index configured when triggering the run. 86 | 87 | -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_home-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_home-page.png -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_log1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log1.png -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_log2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log2.png -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_log3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_log3.png -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_run1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run1.png -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_run2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run2.png -------------------------------------------------------------------------------- /docs.material/modeling/figs/aim_run3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/modeling/figs/aim_run3.png -------------------------------------------------------------------------------- /docs.material/modeling/overview.md: -------------------------------------------------------------------------------- 1 | # Modeling overview 2 | 3 | When training ML/DL models, 4 | finding the right combination of data 5 | preprocessing/splitting, algorithms and 6 | hyper-parameters can be challenging. 7 | Even more so when the modeling process 8 | aims to be [repeatable/replicable/reproducible](https://www.acm.org/publications/policies/artifact-review-badging). 9 | 10 | To ease this process is key to 11 | 12 | * Collect __telemetry and metadata__. 13 | This includes both the parameters used to create models 14 | as well as lower level metrics such as the evolution of the 15 | training loss over time. 16 | 17 | * Generate __artifacts__ such as 18 | reports about the overall performance 19 | (e.g., confusion matrixes). 20 | 21 | ## AIM stack tracking 22 | 23 | `tcbench` integrates 24 | with [AIM stack](https://aimstack.io/), an 25 | open-source and self-hosted model 26 | tracking framework enabling logging of metrics 27 | related to model training. Such telemetry 28 | can later be explored via a [web interface](https://aimstack.readthedocs.io/en/latest/ui/overview.html) 29 | or [programmatically extracted](https://aimstack.readthedocs.io/en/latest/using/query_runs.html) via AIM SKD. 30 | 31 | !!! info "__Why not using more popular frameworks?__" 32 | 33 | There are [many solutions for model tracking](https://neptune.ai/blog/best-ml-experiment-tracking-tools). 34 | While frameworks such as __Weights & Biases__ or __Neptune.ai__ 35 | are extremely rich with features, unfortunately they typically 36 | are cloud-based solutions and not necessarily open-sourced. 37 | 38 | Alternative frameworks such as __Tensorboard__ and __MLFlow__ 39 | have only primitive functionalities with respect to AIM stack. 40 | 41 | Aim stack is sitting in the middle of this spectrum: 42 | It is self-hosted (i.e., no need to push data to the cloud) 43 | and provides nice data exploration features. 44 | 45 | ## Runs and campaigns 46 | 47 | AIM collects modeling metadata into __repositories__ 48 | which are fully controlled by end-users: 49 | 50 | * Repositories are not tied to specific projects. 51 | In other words, the end-user can store 52 | in a repository models completely unrelated to each other. 53 | 54 | * There is no limit on the amount of repositories 55 | can be created. 56 | 57 | `tcbench` tracks in an AIM repository two types of tasks, 58 | namely *runs* and *campaigns*: 59 | 60 | * A __run__ corresponds to the training of an 61 | individual ML/DL model and is "minimal experiment object" used by AIM, 62 | i.e., any tracked metadata need to be 63 | associated to an AIM run. 64 | 65 | * A __campaign__ corresponds to a 66 | collection of runs. 67 | 68 | AIM assign a unique hash code to a run, 69 | but a run object be further enriched with 70 | extra metadata using AIM SDK or web UI. 71 | 72 | A run can be enriched with both individual values 73 | (e.g., best validation loss observed or the final accuracy score) 74 | as well as series (e.g., loss value for each epoch). 75 | Morever, values can have a *context* to further 76 | specify semantic (e.g., define if a registered metric 77 | relates to trainining, validation or test). 78 | 79 | While *run* is at term borrowed from AIM terminology, 80 | `tcbench` introduces *campaign* to 81 | group runs which are semantically related 82 | and need to be summarized together (e.g., results 83 | collected across different train/val/test splits). 84 | 85 | It follows that: 86 | 87 | * Runs are the fundamental building block for collecting 88 | modeling results. But they are also the fundamental 89 | unit when developing/debugging modeling tasks. 90 | 91 | * Campaigns bind multiple runs together. Hence, 92 | are meant to be stored in separate AIM repositories 93 | (although this is NOT a strict requirement for `tcbench`). 94 | -------------------------------------------------------------------------------- /docs.material/overrides/arrow-right-solid.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs.material/overrides/github-mark/github-mark.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs.material/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if page.nb_url %} 6 | 7 | {% include ".icons/simple/jupyter.svg" %} 8 | {% include ".icons/material/download.svg" %} 9 | 10 | {% endif %} 11 | 12 | {{ super() }} 13 | 14 | 35 | 36 | 45 | 46 | 47 | {% endblock content %} 48 | -------------------------------------------------------------------------------- /docs.material/overrides/main.html.DEPRECATED: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if page.nb_url %} 6 | 7 | {% include ".icons/simple/jupyter.svg" %} 8 | {% include ".icons/material/download.svg" %} 9 | 10 | {% endif %} 11 | 12 | {{ super() }} 13 | 14 | 35 | 36 | 45 | 46 | 47 | {% endblock content %} 48 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human.md: -------------------------------------------------------------------------------- 1 | # Figure 10(b): Classwise evaluation on human. 2 | 3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human.ipynb) 4 | 5 | 6 | ```python 7 | import pathlib 8 | 9 | import matplotlib as mpl 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import seaborn as sns 14 | import statsmodels.stats.api as sms 15 | 16 | %matplotlib inline 17 | %config InlineBackend.figure_format='retina' 18 | ``` 19 | 20 | 21 | ```python 22 | def compute_confidence_intervals(array, alpha=0.05): 23 | array = np.array(array) 24 | low, high = sms.DescrStatsW(array).tconfint_mean(alpha) 25 | mean = array.mean() 26 | ci = high - mean 27 | return ci 28 | ``` 29 | 30 | 31 | ```python 32 | path = pathlib.Path( 33 | "./campaigns/ucdavis-icdm19-git-repo-forked/artifacts/IncrementalSampling_Retraining(human-triggered)_20/" 34 | ) 35 | 36 | class_reps = list(path.glob("*class_rep.csv")) 37 | 38 | per_cls = np.stack( 39 | [ 40 | pd.read_csv(file)[:5][["Accuracy", "precision", "recall", "f1-score"]].values 41 | for file in class_reps 42 | ], 43 | axis=0, 44 | ) 45 | 46 | 47 | means = np.mean(per_cls, axis=0) 48 | 49 | cis = np.zeros([per_cls.shape[1], per_cls.shape[2]]) 50 | for i in range(per_cls.shape[1]): 51 | for j in range(per_cls.shape[2]): 52 | cis[i, j] = compute_confidence_intervals(per_cls[:, i, j]) 53 | ``` 54 | 55 | 56 | ```python 57 | X = ["G. Drive", "Youtube", "G. Doc", "G. Search", "G. Music"] 58 | X_axis = np.arange(len(X)) 59 | 60 | plt.rcParams.update({'font.size': 16}) 61 | 62 | fig, ax = plt.subplots(figsize=(7, 6.5)) 63 | ax.bar( 64 | X_axis - 0.3, 65 | means[:, 0], 66 | 0.2, 67 | label="Accuracy", 68 | yerr=cis[:, 0], 69 | ecolor="black", 70 | alpha=0.5, 71 | capsize=10, 72 | ) 73 | ax.bar( 74 | X_axis - 0.1, 75 | means[:, 1], 76 | 0.2, 77 | label="Precision", 78 | yerr=cis[:, 1], 79 | ecolor="black", 80 | alpha=0.5, 81 | capsize=10, 82 | ) 83 | ax.bar( 84 | X_axis + 0.1, 85 | means[:, 2], 86 | 0.2, 87 | label="Recall", 88 | yerr=cis[:, 2], 89 | ecolor="black", 90 | alpha=0.5, 91 | capsize=10, 92 | ) 93 | ax.bar( 94 | X_axis + 0.3, 95 | means[:, 3], 96 | 0.2, 97 | label="F1", 98 | yerr=cis[:, 3], 99 | ecolor="black", 100 | alpha=0.5, 101 | capsize=10, 102 | ) 103 | 104 | 105 | plt.xticks(X_axis, X) 106 | ax.set_xlabel("Class") 107 | ax.set_ylabel("Value") 108 | ax.set_ylim([0, 1]) 109 | plt.legend() 110 | ax.legend(bbox_to_anchor=(1, 1.02)) 111 | plt.grid(axis="y") 112 | 113 | plt.savefig("icdm19_fig3b_replicate_human_ci.png", dpi=300, bbox_inches="tight") 114 | ``` 115 | 116 | 117 | 118 | ![png](figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png) 119 | 120 | 121 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure1_flowpic_example.md: -------------------------------------------------------------------------------- 1 | # Figure 1 : Example of a packet time series transformed into a flowpic representation for a randomly selected flow 2 | 3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure1_flowpic_example.ipynb) 4 | 5 | 6 | ```python 7 | import numpy as np 8 | import tcbench as tcb 9 | from matplotlib.colors import LogNorm, Normalize 10 | from tcbench import dataprep 11 | ``` 12 | 13 | 14 | ```python 15 | import matplotlib as mpl 16 | import matplotlib.pyplot as plt 17 | import seaborn as sns 18 | 19 | %matplotlib inline 20 | %config InlineBackend.figure_format='retina' 21 | ``` 22 | 23 | 24 | ```python 25 | import tcbench 26 | ``` 27 | 28 | 29 | ```python 30 | # load unfiltered dataset 31 | FLOWPIC_BLOCK_DURATION = 15 32 | ``` 33 | 34 | 35 | ```python 36 | df = tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19) 37 | ``` 38 | 39 | 40 | ```python 41 | df_sample = df.sample(n=1, random_state=12345) 42 | ser = df_sample.iloc[0] 43 | ``` 44 | 45 | 46 | ```python 47 | fig, axes = plt.subplots( 48 | nrows=1, ncols=5, figsize=(15, 3), gridspec_kw=dict(width_ratios=[1, 1, 1, 1, 1.1]) 49 | ) 50 | 51 | direction = np.where(ser["pkts_dir"] == 0, -1, 1) 52 | y = ser["pkts_size"] * direction 53 | x = ser["timetofirst"] 54 | 55 | ax = axes[0] 56 | ax.stem( 57 | np.where(y > 0, x, 0), 58 | np.where(y > 0, y, 0), 59 | markerfmt="", 60 | basefmt="lightgray", 61 | label="outgoing", 62 | linefmt="green", 63 | ) 64 | ax.stem( 65 | np.where(y < 0, x, 0), 66 | np.where(y < 0, y, 0), 67 | markerfmt="", 68 | basefmt="lightgray", 69 | label="incoming", 70 | linefmt="lightgreen", 71 | ) 72 | ax.legend() 73 | ax.set_ylabel("packet size [B]") 74 | ax.set_xlabel("time [s]") 75 | 76 | rect = mpl.patches.Rectangle( 77 | (0, -1500), 15, 3000, linewidth=1, edgecolor="r", facecolor="none" 78 | ) 79 | ax.add_patch(rect) 80 | ax.annotate("first\n15s", (5, 1000)) 81 | 82 | for idx, flowpic_dim in enumerate((32, 64, 256, 512), start=1): 83 | # create a single sample dataset 84 | dset = dataprep.FlowpicDataset( 85 | data=df_sample, 86 | timetofirst_colname="timetofirst", 87 | pkts_size_colname="pkts_size", 88 | pkts_dir_colname="pkts_dir", 89 | target_colname="app", 90 | flowpic_dim=flowpic_dim, 91 | flowpic_block_duration=FLOWPIC_BLOCK_DURATION, 92 | ) 93 | 94 | # fetch the flowpic representation 95 | flowpic, label = dset[0] 96 | 97 | # flattening the representation 98 | # to remove zero values (used for finding 99 | # min values) 100 | flowpic = flowpic.numpy().squeeze() 101 | flattened = flowpic.flatten() 102 | flattened = flattened[flattened > 0] 103 | 104 | ax = axes[idx] 105 | 106 | sns.heatmap( 107 | ax=ax, 108 | data=np.where(flowpic == 0, np.nan, flowpic), 109 | vmin=flattened.min(), 110 | vmax=flattened.max(), 111 | cbar=idx == 4, 112 | cbar_kws=dict(fraction=0.046, pad=0.01, aspect=20, label="Normalized packets count"), 113 | cmap=plt.get_cmap("viridis_r"), 114 | square=True, 115 | norm=LogNorm(flattened.min(), flattened.max()), 116 | ) 117 | for _, spine in ax.spines.items(): 118 | spine.set_visible(True) 119 | spine.set_linewidth(1) 120 | ax.yaxis.set_ticks([], None) 121 | ax.xaxis.set_ticks([], None) 122 | ax.set_ylabel(f"packets size (bins of {1500 // flowpic_dim}B)") 123 | ax.set_xlabel(f"time (bins of {FLOWPIC_BLOCK_DURATION / flowpic_dim * 1000:.1f}ms)") 124 | ax.set_title(f"{flowpic_dim}x{flowpic_dim}") 125 | 126 | plt.savefig("flowpic_example.png", dpi=300, bbox_inches="tight") 127 | ``` 128 | 129 | 130 | 131 | ![png](figure1_flowpic_example_files/figure1_flowpic_example_8_0.png) 132 | 133 | 134 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting.md: -------------------------------------------------------------------------------- 1 | # Figure 3 : Average confusion matrixes for the 32x32 resolution across all experiments in Table 4 2 | 3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting.ipynb) 4 | 5 | 6 | ```python 7 | import pathlib 8 | 9 | import matplotlib as mpl 10 | import matplotlib.pyplot as plt 11 | import numpy as np 12 | import pandas as pd 13 | import seaborn as sns 14 | from sklearn.preprocessing import normalize 15 | 16 | %matplotlib inline 17 | %config InlineBackend.figure_format='retina' 18 | ``` 19 | 20 | 21 | ```python 22 | folder_artifacts = pathlib.Path( 23 | "./campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/artifacts/" 24 | ) 25 | ``` 26 | 27 | 28 | ```python 29 | filelists = [ 30 | list(folder_artifacts.glob("*/test-human_conf_mtx.csv")), 31 | list(folder_artifacts.glob("*/test-script_conf_mtx.csv")), 32 | ] 33 | 34 | titles = ["human", "script"] 35 | 36 | CLASSES = { 37 | "google-doc": "G. Doc", 38 | "google-drive": "G. Drive", 39 | "google-music": "G. Music", 40 | "google-search": "G. Search", 41 | "youtube": "YouTube", 42 | } 43 | ``` 44 | 45 | 46 | ```python 47 | plt.rcParams.update({"font.size": 14}) 48 | 49 | fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(9, 5)) 50 | # cbar_ax = fig.add_axes([0.93, 0.2, 0.02, 0.6]) # (left, bottom, width, height) 51 | for i in range(2): 52 | cm_mean = np.mean( 53 | np.stack( 54 | [pd.read_csv(file)[list(CLASSES.keys())].values for file in filelists[i]] 55 | ), 56 | axis=0, 57 | ) 58 | 59 | normed_cm_mean = normalize(cm_mean, axis=1, norm="l1") 60 | 61 | ax = axes[i] 62 | 63 | sns.heatmap( 64 | data=normed_cm_mean, 65 | ax=ax, 66 | square=True, 67 | cmap="viridis", 68 | annot=True, 69 | annot_kws={"fontsize": 11}, 70 | fmt=".2f", 71 | vmin=0, 72 | vmax=1, 73 | cbar_kws=dict(fraction=0.046, pad=0.03, aspect=20), 74 | ) 75 | 76 | ax.set_xticklabels(list(CLASSES.values()), rotation=45, ha="right") 77 | ax.set_yticklabels(list(CLASSES.values()), rotation=0) 78 | 79 | ax.set_title(titles[i]) 80 | 81 | ax.set_ylabel("Ground Truth") 82 | ax.set_xlabel("Prediction") 83 | 84 | plt.tight_layout() 85 | plt.savefig("ucdavis_dataset_confusion_matrix.png", bbox_inches="tight", dpi=150) 86 | ``` 87 | 88 | 89 | 90 | ![png](figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png) 91 | 92 | 93 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size.md: -------------------------------------------------------------------------------- 1 | # Figure 8: Investigating root cause of G1 discrepancies: Kernel density estimation of the per-class packet size distributions. 2 | 3 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size.ipynb) 4 | 5 | 6 | ```python 7 | import itertools 8 | 9 | import numpy as np 10 | import pandas as pd 11 | ``` 12 | 13 | 14 | ```python 15 | import matplotlib as mpl 16 | import matplotlib.pyplot as plt 17 | import seaborn as sns 18 | from matplotlib.colors import LogNorm, Normalize 19 | 20 | %matplotlib inline 21 | %config InlineBackend.figure_format='retina' 22 | ``` 23 | 24 | 25 | ```python 26 | import tcbench as tcb 27 | from tcbench import dataprep 28 | ``` 29 | 30 | 31 | ```python 32 | FLOWPIC_DIM = 32 33 | FLOWPIC_BLOCK_DURATION = 15 34 | ``` 35 | 36 | 37 | ```python 38 | # load unfiltered dataset 39 | dset = dataprep.FlowpicDataset( 40 | data=tcb.load_parquet(tcb.DATASETS.UCDAVISICDM19), 41 | timetofirst_colname="timetofirst", 42 | pkts_size_colname="pkts_size", 43 | pkts_dir_colname="pkts_dir", 44 | target_colname="app", 45 | flowpic_dim=FLOWPIC_DIM, 46 | flowpic_block_duration=FLOWPIC_BLOCK_DURATION, 47 | ) 48 | ``` 49 | 50 | 51 | ```python 52 | REPLACE = { 53 | "google-doc": "G. Doc", 54 | "google-drive": "G. Drive", 55 | "google-music": "G. Music", 56 | "google-search": "G. Search", 57 | "youtube": "YouTube", 58 | "retraining-human-triggered": "Human", 59 | "retraining-script-triggered": "Script", 60 | } 61 | 62 | dset.df = dset.df.assign( 63 | app = dset.df["app"].replace(REPLACE), 64 | partition = dset.df["partition"].replace(REPLACE) 65 | ) 66 | ``` 67 | 68 | 69 | ```python 70 | TARGETS_LABEL = sorted(dset.df["app"].unique()) 71 | PARTITIONS_NAME = sorted(dset.df["partition"].unique()) 72 | ``` 73 | 74 | 75 | ```python 76 | all_pkts_size = dict() 77 | 78 | for partition_name in PARTITIONS_NAME: 79 | all_pkts_size[partition_name] = dict() 80 | 81 | for app in TARGETS_LABEL: 82 | df_tmp = dset.df[ 83 | (dset.df["partition"] == partition_name) & (dset.df["app"] == app) 84 | ] 85 | 86 | l = [] 87 | for idx in df_tmp.index: 88 | ser = df_tmp.loc[idx] 89 | indexes = np.where(ser["timetofirst"] < FLOWPIC_BLOCK_DURATION)[0] 90 | pkts_size = ser["pkts_size"][indexes] 91 | l.append(pkts_size) 92 | all_pkts_size[partition_name][app] = np.concatenate(l) 93 | ``` 94 | 95 | 96 | ```python 97 | # WARNING: computing the KDE will take a few minutes 98 | 99 | fig, axes = plt.subplots(nrows=1, ncols=5, figsize=(15, 5)) 100 | 101 | line_props = { 102 | "pretraining": dict(linestyle="-"), 103 | "Script": dict( 104 | linestyle=(0, (1, 1)) 105 | ), 106 | "Human": dict(linestyle=(0, (1, 1))), 107 | } 108 | 109 | for ax, app in zip(axes, TARGETS_LABEL): 110 | for partition_name in [ 111 | "pretraining", 112 | "Script", 113 | "Human", 114 | ]: 115 | props = line_props[partition_name] 116 | sns.kdeplot( 117 | ax=ax, 118 | data=all_pkts_size[partition_name][app], 119 | linewidth=2, 120 | label=partition_name, 121 | **props, 122 | fill=True, 123 | alpha=0.1 124 | ) 125 | ax.legend(bbox_to_anchor=(0.5, 1.5), loc="upper center") 126 | ax.set_title(app, fontsize=10) 127 | ax.set_xlim((-500, 1800)) 128 | ax.set_xlabel("packet size") 129 | ax.set_ylabel("kde") 130 | 131 | plt.tight_layout() 132 | plt.savefig("ucdavid-icdm19_kde_pkts_size.png", dpi=300, bbox_inches='tight') 133 | ``` 134 | 135 | 136 | 137 | ![png](figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png) 138 | 139 | 140 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/table10_ucdavis-icdm19_tukey.md: -------------------------------------------------------------------------------- 1 | 2 | 23 | # Table 10: Performance comparison across augmentations for different flowpic sizes. 24 | 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table10_ucdavis-icdm19_tukey.ipynb) 26 | 27 | 28 | ```python 29 | import pathlib 30 | 31 | import numpy as np 32 | import pandas as pd 33 | from scipy.stats import tukey_hsd 34 | ``` 35 | 36 | ```python 37 | folder = pathlib.Path( 38 | "campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/augment-at-loading-with-dropout" 39 | ) 40 | df = pd.concat( 41 | ( 42 | pd.read_parquet(folder / "runsinfo_flowpic_dim_1500.parquet"), 43 | pd.read_parquet(folder / "runsinfo_flowpic_dim_64.parquet"), 44 | pd.read_parquet(folder / "runsinfo_flowpic_dim_32.parquet"), 45 | ) 46 | ) 47 | ``` 48 | 49 | ```python 50 | # df = pd.read_parquet('campaigns/ucdavis-icdm19/augmentation-at-loading-with-dropout/campaign_summary/1684447037/merged_runsinfo.parquet') 51 | ``` 52 | 53 | ```python 54 | df_script = df[df["test_split_name"] == "test-script"] 55 | 56 | acc_32 = df_script[df_script["flowpic_dim"] == 32]["acc"].values.tolist() 57 | acc_64 = df_script[df_script["flowpic_dim"] == 64]["acc"].values.tolist() 58 | acc_1500 = df_script[df_script["flowpic_dim"] == 1500]["acc"].values.tolist() 59 | ``` 60 | 61 | ```python 62 | res = tukey_hsd(acc_32, acc_64, acc_1500) 63 | ``` 64 | 65 | ```python 66 | df = pd.DataFrame( 67 | np.array([res.pvalue[0, 1], res.pvalue[0, 2], res.pvalue[1, 2]]).reshape(-1, 1), 68 | columns=["pvalue"], 69 | index=pd.MultiIndex.from_arrays( 70 | [("32x32", "32x32", "64x64"), ("64x64", "1500x1500", "1500x1500")] 71 | ), 72 | ) 73 | df = df.assign(is_different=df["pvalue"] < 0.05) 74 | ``` 75 | 76 | ```python 77 | df 78 | ``` 79 | 80 | 81 | 82 |
83 |
84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 |
pvalueis_different
32x3264x645.772842e-01False
1500x15001.936038e-06True
64x641500x15001.044272e-08True
113 |
114 |
115 | 116 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/table3_xgboost_baseline.md: -------------------------------------------------------------------------------- 1 | 2 | 23 | # Table 3: (G0) Baseline ML performance without augmentation in a supervised setting. 24 | 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table3_xgboost_baseline.ipynb) 26 | 27 | 28 | ```python 29 | import pandas as pd 30 | ``` 31 | 32 | ```python 33 | df = pd.read_csv( 34 | "./campaigns/ucdavis-icdm19/xgboost/noaugmentation-flowpic/campaign_summary/noaugmentation-flowpic/summary_flowpic_dim_32.csv", 35 | header=[0, 1], 36 | index_col=[0, 1], 37 | ) 38 | ``` 39 | 40 | ```python 41 | # reformatting 42 | df_tmp = df["acc"][["mean", "ci95"]].round(2) 43 | df_tmp.loc[["test-script", "test-human"]].droplevel(1, axis=0).astype(float).round(2) 44 | ``` 45 | 46 | 47 | 48 |
49 |
50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 |
meanci95
test-script96.800.37
test-human73.652.14
71 |
72 |
73 | 74 | 75 | 76 | ```python 77 | df = pd.read_csv( 78 | "./campaigns/ucdavis-icdm19/xgboost/noaugmentation-timeseries/campaign_summary/noaugmentation-timeseries/summary_max_n_pkts_10.csv", 79 | header=[0, 1], 80 | index_col=[0, 1], 81 | ) 82 | ``` 83 | 84 | ```python 85 | # reformatting 86 | df_tmp = df["acc"][["mean", "ci95"]].round(2) 87 | df_tmp.loc[["test-script", "test-human"]].droplevel(1, axis=0).astype(float).round(2) 88 | ``` 89 | 90 | 91 | 92 |
93 |
94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 |
meanci95
test-script94.530.56
test-human66.911.40
115 |
116 |
117 | 118 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/table5_simclr_dropout_and_projectionlayer.md: -------------------------------------------------------------------------------- 1 | 2 | 23 | # Table 5: Impact of dropout and SimCLR projection layer dimension on fine-tuning. 24 | 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table5_simclr_dropout_and_projectionlayer.ipynb) 26 | 27 | 28 | ```python 29 | import itertools 30 | 31 | import pandas as pd 32 | ``` 33 | 34 | ```python 35 | df = pd.read_csv( 36 | "campaigns/ucdavis-icdm19/simclr-dropout-and-projection/campaign_summary/simclr-dropout-and-projection/summary_flowpic_dim_32.csv", 37 | header=[0, 1], 38 | index_col=[0, 1, 2], 39 | ) 40 | 41 | df = df["acc"][["mean", "ci95"]] 42 | df = df.T 43 | df.columns.set_names("test_split_name", level=0, inplace=True) 44 | df.columns.set_names("projection_layer_dim", level=1, inplace=True) 45 | df.columns.set_names("with_dropout", level=2, inplace=True) 46 | df = df.reorder_levels( 47 | ["test_split_name", "with_dropout", "projection_layer_dim"], axis=1 48 | ) 49 | 50 | df = df[list(itertools.product(["test-script", "test-human"], [True, False], [30, 84]))] 51 | df = df.round(2) 52 | 53 | df.to_csv("table5_simclr_dropout_and_projectionlayer.csv") 54 | df 55 | ``` 56 | 57 | 58 | 59 |
60 |
61 | 62 | 63 | 64 | 65 | 66 | 67 | 68 | 69 | 70 | 71 | 72 | 73 | 74 | 75 | 76 | 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 |
test_split_nametest-scripttest-human
with_dropoutTrueFalseTrueFalse
projection_layer_dim3084308430843084
mean91.8192.0292.1892.5472.1273.3174.6974.35
ci950.380.360.310.331.371.041.131.38
112 |
113 |
114 | 115 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/table6_simclr_other_augmentation_pairs.md: -------------------------------------------------------------------------------- 1 | 2 | 23 | # Table 6: Comparing the fine-tuning performance when using different pairs of augmentation for pretraining. 24 | 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table6_simclr_other_augmentation_pairs.ipynb) 26 | 27 | 28 | ```python 29 | import itertools 30 | 31 | import pandas as pd 32 | ``` 33 | 34 | ```python 35 | RENAME = { 36 | "colorjitter": "Color jitter", 37 | "timeshift": "Time shift", 38 | "changertt": "Change RTT", 39 | "rotate": "Rotate", 40 | "packetloss": "Packet loss", 41 | } 42 | ``` 43 | 44 | ```python 45 | df = pd.read_csv( 46 | "./campaigns/ucdavis-icdm19/simclr-other-augmentation-pairs/campaign_summary/simclr-other-augmentation-pairs/summary_flowpic_dim_32.csv", 47 | header=[0, 1], 48 | index_col=[0, 1], 49 | ) 50 | 51 | df = df["acc"][["mean", "ci95"]].round(2) 52 | df = df.reset_index() 53 | df = df.assign( 54 | aug1=df["level_1"].apply(eval).str[0], 55 | aug2=df["level_1"].apply(eval).str[1], 56 | ) 57 | df = df.drop("level_1", axis=1) 58 | df = df.rename({"level_0": "test_split_name"}, axis=1) 59 | df = df.replace(RENAME) 60 | df = df.pivot(index="test_split_name", columns=["aug1", "aug2"]) 61 | df.columns.set_names(["stat", "aug1", "aug2"], inplace=True) 62 | df = df.reorder_levels(["aug1", "aug2", "stat"], axis=1) 63 | df.columns.set_names(["", "", ""], inplace=True) 64 | df.index.name = None 65 | 66 | df = df[ 67 | list(itertools.product(["Change RTT"], ["Time shift"], ["mean", "ci95"])) 68 | + list( 69 | itertools.product(["Packet loss"], ["Color jitter", "Rotate"], ["mean", "ci95"]) 70 | ) 71 | + list( 72 | itertools.product(["Change RTT"], ["Color jitter", "Rotate"], ["mean", "ci95"]) 73 | ) 74 | + list(itertools.product(["Color jitter"], ["Rotate"], ["mean", "ci95"])) 75 | ] 76 | df = df.loc[["test-script", "test-human"]] 77 | 78 | df.to_csv("table5_simclr_other_augmentation_pairs.csv") 79 | df 80 | ``` 81 | 82 | 83 | 84 |
85 |
86 | 87 | 88 | 89 | 90 | 91 | 92 | 93 | 94 | 95 | 96 | 97 | 98 | 99 | 100 | 101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 |
Change RTTPacket lossChange RTTColor jitter
Time shiftColor jitterRotateColor jitterRotateRotate
meanci95meanci95meanci95meanci95meanci95meanci95
test-script92.180.3190.170.4191.940.391.720.3692.380.3291.790.34
test-human74.691.1373.671.2471.221.275.561.2374.331.2671.641.23
153 |
154 |
155 | 156 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/notebooks/table9_icdm_finetuning_per_class_metrics_on_human.md: -------------------------------------------------------------------------------- 1 | 2 | 23 | # Table 9: Macro-average Accuracy with different retraining dataset and different sampling methods 24 | 25 | [:simple-jupyter: :material-download:](/tcbench/papers/imc23/notebooks/table9_icdm_finetuning_per_class_metrics_on_human.ipynb) 26 | 27 | 28 | ```python 29 | import pathlib 30 | 31 | import matplotlib as mpl 32 | import matplotlib.pyplot as plt 33 | import numpy as np 34 | import pandas as pd 35 | import seaborn as sns 36 | import statsmodels.stats.api as sms 37 | 38 | %matplotlib inline 39 | %config InlineBackend.figure_format='retina' 40 | ``` 41 | 42 | ```python 43 | def compute_confidence_intervals(array, alpha=0.05): 44 | array = np.array(array) 45 | low, high = sms.DescrStatsW(array).tconfint_mean(alpha) 46 | mean = array.mean() 47 | ci = high - mean 48 | return ci 49 | ``` 50 | 51 | ```python 52 | path = pathlib.Path("./campaigns/ucdavis-icdm19-git-repo-forked/artifacts/") 53 | 54 | class_repss = list(path.glob("*10/")) 55 | ``` 56 | 57 | ```python 58 | data = dict() 59 | 60 | for path in class_repss: 61 | if "script" in str(path): 62 | class_reps = list(path.glob("*class_rep.csv")) 63 | accs = [pd.read_csv(file).iloc[6].values[2] for file in class_reps] 64 | 65 | augmentation_name = path.name.split("_")[0].replace("Sampling", "") 66 | data[augmentation_name] = ( 67 | np.mean(accs) * 100, 68 | compute_confidence_intervals(accs), 69 | ) 70 | 71 | df_script = pd.DataFrame(data, index=["mean", "ci95"]).T.round(2) 72 | df_script.columns = pd.MultiIndex.from_arrays([["script", "script"], df_script.columns]) 73 | # df_script 74 | ``` 75 | 76 | ```python 77 | data = dict() 78 | for path in class_repss: 79 | if "human" in str(path): 80 | class_reps = list(path.glob("*class_rep.csv")) 81 | accs = [pd.read_csv(file).iloc[6].values[2] for file in class_reps] 82 | 83 | augmentation_name = path.name.split("_")[0].replace("Sampling", "") 84 | data[augmentation_name] = ( 85 | np.mean(accs) * 100, 86 | compute_confidence_intervals(accs), 87 | ) 88 | 89 | df_human = pd.DataFrame(data, index=["mean", "ci95"]).T.round(2) 90 | df_human.columns = pd.MultiIndex.from_arrays([["human", "human"], df_human.columns]) 91 | ``` 92 | 93 | ```python 94 | df_tmp = pd.concat((df_script, df_human), axis=1).T 95 | display(df_tmp) 96 | df_tmp.to_csv("icdm_finetuning_per_class_metrics_on_human.csv") 97 | ``` 98 | 99 |
100 |
101 | 102 | 103 | 104 | 105 | 106 | 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | 117 | 118 | 119 | 120 | 121 | 122 | 123 | 124 | 125 | 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 |
FixedStepRandomIncremental
scriptmean87.1194.6396.22
ci950.090.020.01
humanmean82.6087.2992.56
ci950.030.040.03
140 |
141 |
142 | -------------------------------------------------------------------------------- /docs.material/papers/imc23/pytest.md: -------------------------------------------------------------------------------- 1 | --- 2 | title: pytest 3 | icon: simple/pytest 4 | --- 5 | 6 | # ML unit testing 7 | 8 | Multiple tests are available to verify different functionalities 9 | for either tcbench and the modeling campaigns created. 10 | 11 | Tests are not bundled with pypi installation. Rather, you need 12 | to follow the procedure described in the [artifact page](/tcbench/papers/imc23/artifacts/) 13 | to fetch the source code and install all artifacts and datasets. 14 | 15 | Tests are coded via [`pytest` :simple-pytest:](https://docs.pytest.org/en/7.4.x/) 16 | and are available under the `/tests` folder. 17 | 18 | !!! warning "Tests trigger model training" 19 | 20 | Most of the test verify that the models train for 21 | the campaigns described in the paper are indeed reproducible, i.e., 22 | the provide the exact same models obtained for the paper. 23 | 24 | To do so, the pytest resources fetched from figshare 25 | contains a subset of reference models so the test 26 | trigger the modeling for those scenarios and check 27 | that what trained matches what created for the paper. 28 | 29 | So be aware that running these tests might take a while 30 | depending on your local environment. 31 | 32 | 33 | To trigger all tests run 34 | 35 | ``` 36 | pytest tests 37 | ``` 38 | 39 | !!! note "Output" 40 | ``` 41 | ============================ test session starts ====================================== 42 | platform linux -- Python 3.10.13, pytest-7.4.2, pluggy-1.3.0 43 | rootdir: /tmp/tcbench-pip/tcbench 44 | plugins: anyio-3.7.1, helpers-namespace-2021.12.29 45 | collected 101 items 46 | 47 | tests/test_augmentations_at_loading.py ........... [ 10%] 48 | tests/test_augmentations_at_loading_xgboost.py . [ 11%] 49 | tests/test_cli_command_campaign.py .... [ 15%] 50 | tests/test_cli_command_singlerun.py ............ [ 27%] 51 | tests/test_contrastive_learning_and_finetune.py .. [ 29%] 52 | tests/test_libtcdatasets_datasets_utils.py ................. [ 46%] 53 | tests/test_modeling_backbone.py ................ [ 62%] 54 | tests/test_modeling_dataprep.py .................................. [ 96%] 55 | tests/test_modeling_methods.py .... [100%] 56 | ============================== 101 passed, 8 warnings in 6523.55s (1:48:43) ========================= 57 | ``` 58 | -------------------------------------------------------------------------------- /docs.material/papers/index.md: -------------------------------------------------------------------------------- 1 | # Research articles featuring tcbench 2 | 3 | [__Replication: Contrastive Learning and Data Augmentation in Traffic Classification__](/tcbench/papers/imc23) 4 |
5 | *A. Finamore, C. Wang, J. Krolikowki, J. M. Navarro, F. Cheng, D. Rossi*, 6 |
ACM Internet Measurement Conference (IMC), 2023 7 |
[:material-hexagon-outline: __Artifacts__](/tcbench/papers/imc23/artifacts) [:fontawesome-regular-file-pdf: __PDF__](https://arxiv.org/pdf/2309.09733) 8 | 9 | === "Bibtex" 10 | ``` 11 | @misc{finamore2023contrastive, 12 | title={ 13 | Contrastive Learning and Data Augmentation 14 | in Traffic Classification Using a 15 | Flowpic Input Representation 16 | }, 17 | author={ 18 | Alessandro Finamore and 19 | Chao Wang and 20 | Jonatan Krolikowski 21 | and Jose M. Navarro 22 | and Fuxing Chen and 23 | Dario Rossi 24 | }, 25 | year={2023}, 26 | eprint={2309.09733}, 27 | archivePrefix={arXiv}, 28 | primaryClass={cs.LG} 29 | } 30 | ``` 31 | 32 | === "Abstract" 33 | Over the last years we witnessed a renewed interest towards 34 | Traffic Classification (TC) captivated by the rise of Deep 35 | Learning (DL). Yet, the vast majority of TC literature lacks 36 | code artifacts, performance assessments across datasets and 37 | reference comparisons against Machine Learning (ML) meth- 38 | ods. Among those works, a recent study from IMC'22 [17] is 39 | worth of attention since it adopts recent DL methodologies 40 | (namely, few-shot learning, self-sup ervision via contrastive 41 | learning and data augmentation) appealing for networking as 42 | they enable to learn from a few samples and transfer across 43 | datasets. The main result of [17] on the UCDAVIS19, ISCX-VPN 44 | and ISCX-Tor datasets is that, with such DL methodologies, 45 | 100 input samples are enough to achieve very high accuracy 46 | using an input representation called "flowpic" (i.e., a per-flow 47 | 2d histograms of the packets size evolution over time). 48 | In this paper (i) we rep roduce [17] on the same datasets 49 | and (ii) we rep licate its most salient aspect (the importance 50 | of data augmentation) on three additional public datasets, 51 | MIRAGE-19, MIRAGE-22 and UTMOBILENET21. While we con- 52 | firm most of the original results, we also found a 20% ac- 53 | curacy drop on some of the investigated scenarios due to 54 | a data shift of the original dataset that we uncovered. Ad- 55 | ditionally, our study validates that the data augmentation 56 | strategies studied in [17] perform well on other datasets too. 57 | In the spirit of reproducibility and replicability we make all 58 | artifacts (code and data) available at [10]. 59 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/overview.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs.material/tcbench/api/overview.md -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_cli_clickutils.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.cli.clickutils 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_cli_command_aimrepo.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.cli.command_aimrepo 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_cli_command_campaign.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.cli.command_campaign 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_cli_command_datasets.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.cli.command_datasets 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_cli_command_singlerun.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.cli.command_singlerun 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_cli_richutils.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.cli.richutils 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets.md: -------------------------------------------------------------------------------- 1 | ## Generating train/val/test splits 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_datasets_utils.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.datasets_utils 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_mirage19_json_to_parquet.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.mirage19_json_to_parquet 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_mirage22_json_to_parquet.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.mirage22_json_to_parquet 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_mirage19_generate_splits.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.mirage19_generate_splits 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_mirage22_generate_splits.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.mirage22_generate_splits 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_ucdavis_icdm19_generate_splits.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.ucdavis_icdm19_generate_splits 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_tcbench_utmobilenet21_generate_splits.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.utmobilenet21_generate_splits 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_ucdavis_icdm19_csv_to_parquet.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.ucdavis_icdm19_csv_to_parquet 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_libtcdatasets_utmobilenet21_csv_to_parquet.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.libtcdatasets.utmobilenet21_csv_to_parquet 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_aimutils.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.aimutils 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_augmentation.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.augmentation 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_backbone.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.backbone 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_dataprep.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.dataprep 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_losses.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.losses 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_methods.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.methods 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_run_augmentations_at_loading.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.run_augmentations_at_loading 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_run_augmentations_at_loading_xgboost.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.run_augmentations_at_loading_xgboost 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_run_campaign_augmentations_at_loading.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.run_campaign_augmentations_at_loading 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_run_campaign_augmentations_at_loading_xgboost.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.run_campaign_augmentations_at_loading_xgboost 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_run_campaign_contrastive_learning_and_finetune.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.run_campaign_contrastive_learning_and_finetune 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_run_contrastive_learning_and_finetune.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.run_contrastive_learning_and_finetune 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/api/tcbench_modeling_utils.md: -------------------------------------------------------------------------------- 1 | ::: tcbench.modeling.utils 2 | -------------------------------------------------------------------------------- /docs.material/tcbench/cli_intro.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: octicons/terminal-16 3 | title: CLI Intro 4 | --- 5 | 6 | # CLI Introduction 7 | 8 | tcbench can be used for as SDK and 9 | from the command line. 10 | 11 | When installing tcbench you install 12 | also a `tcbench` command line script 13 | created via [:material-cursor-default: click](https://click.palletsprojects.com/en/8.1.x/) 14 | and [:material-language-python: rich](https://github.com/Textualize/rich). 15 | 16 | For instance 17 | ``` 18 | tcbench --help 19 | ``` 20 | 21 | !!! info "Output" 22 | ```bash 23 | Usage: tcbench [OPTIONS] COMMAND [ARGS]... 24 | 25 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────╮ 26 | │ --version Show tcbench version and exit. │ 27 | │ --help Show this message and exit. │ 28 | ╰──────────────────────────────────────────────────────────────────────────────────────────╯ 29 | ╭─ Commands ───────────────────────────────────────────────────────────────────────────────╮ 30 | │ aimrepo Investigate AIM repository content. │ 31 | │ campaign Triggers a modeling campaign. │ 32 | │ datasets Install/Remove traffic classification datasets. │ 33 | │ run Triggers a modeling run. │ 34 | │ tree show the command tree of your CLI. │ 35 | ╰──────────────────────────────────────────────────────────────────────────────────────────╯ 36 | ``` 37 | 38 | The commands are organized in a nested structure which 39 | you can visualize using 40 | 41 | ``` 42 | tcbench tree 43 | ``` 44 | 45 | !!! info "Output" 46 | ```bash 47 | main 48 | ├── aimrepo - Investigate AIM repository content. 49 | │ ├── ls - List a subset of properties of each run. 50 | │ ├── merge - Coalesce different AIM repos into a single new repo. 51 | │ ├── properties - List properties across all runs. 52 | │ └── report - Summarize runs performance metrics. 53 | ├── campaign - Triggers a modeling campaign. 54 | │ ├── augment-at-loading - Modeling by applying data augmentation when loading the training set. 55 | │ └── contralearn-and-finetune - Modeling by pre-training via constrative learning and then finetune the final classifier from the pre-trained model. 56 | ├── datasets - Install/Remove traffic classification datasets. 57 | │ ├── delete - Delete a dataset. 58 | │ ├── import - Import datasets. 59 | │ ├── info - Show the meta-data related to supported datasets. 60 | │ ├── install - Install a dataset. 61 | │ ├── lsparquet - Tree view of the datasets parquet files. 62 | │ ├── samples-count - Show report on number of samples per class. 63 | │ └── schema - Show datasets schemas 64 | ├── run - Triggers a modeling run. 65 | │ ├── augment-at-loading - Modeling by applying data augmentation when loading the training set. 66 | │ └── contralearn-and-finetune - Modeling by pre-training via constrative learning and then finetune the final classifier from the pre-trained model. 67 | └── tree - show the command tree of your CLI 68 | ``` 69 | -------------------------------------------------------------------------------- /docs.material/tcbench/index.md: -------------------------------------------------------------------------------- 1 | # The tcbench framework 2 | 3 | tcbench is a ML/DL framework specific for __Traffic Classification (TC)__ 4 | created as research project by the AI4NET team of the Huawei Technologies 5 | research center in Paris, France. 6 | 7 | !!! info "What is Traffic Classification?" 8 | 9 | Nodes within a computer network operate by exchanging 10 | information, namely *packets*, which is regulated according 11 | to standardized protocols (e.g., HTTP for the web). So to understand 12 | the network health it is required to constantly monitor 13 | this information flow and react accordingly. For instance, one 14 | might want to prioritize certain traffic (e.g., video meeting) 15 | or block it (e.g., social media in working environment). 16 | 17 | Traffic classification is the the act of labeling an exchange of packets 18 | based on the Internet application which generated it. 19 | 20 | 21 | The academic literature is ripe with methods and proposals for TC. 22 | Yet, it is scarce of code artifacts and public datasets 23 | do not offer common conventions of use. 24 | 25 | We designed tcbench with the following goals in mind: 26 | 27 | | Goal | State of the art | tcbench | 28 | |:-----|:-----------------|:--------| 29 | |__:octicons-stack-24: Data curation__ | There are a few public datasets for TC, yet no common format/schema, cleaning process, or standard train/val/test folds. | An (opinionated) curation of datasets to create easy to use parquet files with associated train/val/test fold.| 30 | |__:octicons-file-code-24: Code__ | TC literature has no reference code base for ML/DL modeling | tcbench is [:material-github: open source](https://github.com/tcbenchstack/tcbench) with an easy to use CLI based on [:fontawesome-solid-arrow-pointer: click](https://click.palletsprojects.com/en/8.1.x/)| 31 | |__:material-monitor-dashboard: Model tracking__ | Most of ML framework requires integration with cloud environments and subscription services | tcbench uses [aimstack](https://aimstack.io/) to save on local servers metrics during training which can be later explored via its web UI or aggregated in report summaries using tcbench | 32 | 33 | ## Features and roadmap 34 | 35 | tcbench is still under development, but (as suggested by its name) ultimately aims 36 | to be a reference framework for benchmarking multiple ML/DL solutions 37 | related to TC. 38 | 39 | At the current stage, tcbench offers 40 | 41 | * Integration with 4 datasets, namely `ucdavis-icdm19`, `mirage19`, `mirage22` and `utmobilenet21`. 42 | You can use these datasets and their curated version independently from tcbench. 43 | Check out the [dataset install](/tcbench/datasets/install/) process and [dataset loading tutorial](/tcbench/datasets/guides/tutorial_load_datasets/). 44 | 45 | * Good support for flowpic input representation and minimal support 46 | for 1d time series (based on network packets properties) input representation. 47 | 48 | * Data augmentation functionality for flowpic input representation. 49 | 50 | * Modeling via XGBoost, vanilla DL supervision and contrastive learning (via SimCLR or SupCon). 51 | 52 | Most of the above functionalities described relate to our __:material-file-document-outline: [IMC23 paper](/papers/imc23/)__. 53 | 54 | More exiting features including more datasets and algorithms will come in the next months. 55 | 56 | Stay tuned :wink:! 57 | 58 | -------------------------------------------------------------------------------- /docs.material/tcbench/install.md: -------------------------------------------------------------------------------- 1 | --- 2 | icon: octicons/package-16 3 | --- 4 | 5 | # Install 6 | 7 | First prepare a python virtual environment, for example via :simple-anaconda: conda 8 | ``` 9 | conda create -n tcbench python=3.10 pip 10 | conda activate tcbench 11 | ``` 12 | 13 | tcbench is [availabe on pypi](https://pypi.org/project/tcbench/) so you install it via pip 14 | ``` 15 | python -m pip install tcbench 16 | ``` 17 | 18 | All dependecies are automatically pulled. 19 | 20 | Verify the installation was successful by running 21 | ``` 22 | tcbench --version 23 | ``` 24 | 25 | !!! note "Output" 26 | ``` 27 | version: 0.0.21 28 | ``` 29 | 30 | # Developer 31 | 32 | For developing your own projects or contributing 33 | to tcbench fork/clone the [official repository](https://github.com/tcbenchstack/tcbench) 34 | and install the developer version. 35 | 36 | ``` 37 | python -m pip install .[dev] 38 | ``` 39 | 40 | The only difference with respect to the base version 41 | is the installation of extra dependencies. 42 | -------------------------------------------------------------------------------- /docs/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/.DS_Store -------------------------------------------------------------------------------- /docs/arrow-right-solid.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/assets/_mkdocstrings.css: -------------------------------------------------------------------------------- 1 | 2 | /* Avoid breaking parameter names, etc. in table cells. */ 3 | .doc-contents td code { 4 | word-break: normal !important; 5 | } 6 | 7 | /* No line break before first paragraph of descriptions. */ 8 | .doc-md-description, 9 | .doc-md-description>p:first-child { 10 | display: inline; 11 | } 12 | 13 | /* Max width for docstring sections tables. */ 14 | .doc .md-typeset__table, 15 | .doc .md-typeset__table table { 16 | display: table !important; 17 | width: 100%; 18 | } 19 | 20 | .doc .md-typeset__table tr { 21 | display: table-row; 22 | } 23 | 24 | /* Defaults in Spacy table style. */ 25 | .doc-param-default { 26 | float: right; 27 | } 28 | 29 | /* Keep headings consistent. */ 30 | h1.doc-heading, 31 | h2.doc-heading, 32 | h3.doc-heading, 33 | h4.doc-heading, 34 | h5.doc-heading, 35 | h6.doc-heading { 36 | font-weight: 400; 37 | line-height: 1.5; 38 | color: inherit; 39 | text-transform: none; 40 | } 41 | 42 | h1.doc-heading { 43 | font-size: 1.6rem; 44 | } 45 | 46 | h2.doc-heading { 47 | font-size: 1.2rem; 48 | } 49 | 50 | h3.doc-heading { 51 | font-size: 1.15rem; 52 | } 53 | 54 | h4.doc-heading { 55 | font-size: 1.10rem; 56 | } 57 | 58 | h5.doc-heading { 59 | font-size: 1.05rem; 60 | } 61 | 62 | h6.doc-heading { 63 | font-size: 1rem; 64 | } -------------------------------------------------------------------------------- /docs/assets/images/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/assets/images/favicon.png -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.hi.min.js: -------------------------------------------------------------------------------- 1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hi=function(){this.pipeline.reset(),this.pipeline.add(e.hi.trimmer,e.hi.stopWordFilter,e.hi.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.hi.stemmer))},e.hi.wordCharacters="ऀ-ःऄ-एऐ-टठ-यर-िी-ॏॐ-य़ॠ-९॰-ॿa-zA-Za-zA-Z0-90-9",e.hi.trimmer=e.trimmerSupport.generateTrimmer(e.hi.wordCharacters),e.Pipeline.registerFunction(e.hi.trimmer,"trimmer-hi"),e.hi.stopWordFilter=e.generateStopWordFilter("अत अपना अपनी अपने अभी अंदर आदि आप इत्यादि इन इनका इन्हीं इन्हें इन्हों इस इसका इसकी इसके इसमें इसी इसे उन उनका उनकी उनके उनको उन्हीं उन्हें उन्हों उस उसके उसी उसे एक एवं एस ऐसे और कई कर करता करते करना करने करें कहते कहा का काफ़ी कि कितना किन्हें किन्हों किया किर किस किसी किसे की कुछ कुल के को कोई कौन कौनसा गया घर जब जहाँ जा जितना जिन जिन्हें जिन्हों जिस जिसे जीधर जैसा जैसे जो तक तब तरह तिन तिन्हें तिन्हों तिस तिसे तो था थी थे दबारा दिया दुसरा दूसरे दो द्वारा न नके नहीं ना निहायत नीचे ने पर पहले पूरा पे फिर बनी बही बहुत बाद बाला बिलकुल भी भीतर मगर मानो मे में यदि यह यहाँ यही या यिह ये रखें रहा रहे ऱ्वासा लिए लिये लेकिन व वग़ैरह वर्ग वह वहाँ वहीं वाले वुह वे वो सकता सकते सबसे सभी साथ साबुत साभ सारा से सो संग ही हुआ हुई हुए है हैं हो होता होती होते होना होने".split(" ")),e.hi.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var r=e.wordcut;r.init(),e.hi.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(r){return isLunr2?new e.Token(r.toLowerCase()):r.toLowerCase()});var t=i.toString().toLowerCase().replace(/^\s+/,"");return r.cut(t).split("|")},e.Pipeline.registerFunction(e.hi.stemmer,"stemmer-hi"),e.Pipeline.registerFunction(e.hi.stopWordFilter,"stopWordFilter-hi")}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.hy.min.js: -------------------------------------------------------------------------------- 1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.hy=function(){this.pipeline.reset(),this.pipeline.add(e.hy.trimmer,e.hy.stopWordFilter)},e.hy.wordCharacters="[A-Za-z԰-֏ff-ﭏ]",e.hy.trimmer=e.trimmerSupport.generateTrimmer(e.hy.wordCharacters),e.Pipeline.registerFunction(e.hy.trimmer,"trimmer-hy"),e.hy.stopWordFilter=e.generateStopWordFilter("դու և եք էիր էիք հետո նաև նրանք որը վրա է որ պիտի են այս մեջ ն իր ու ի այդ որոնք այն կամ էր մի ես համար այլ իսկ էին ենք հետ ին թ էինք մենք նրա նա դուք եմ էի ըստ որպես ում".split(" ")),e.Pipeline.registerFunction(e.hy.stopWordFilter,"stopWordFilter-hy"),e.hy.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}(),e.Pipeline.registerFunction(e.hy.stemmer,"stemmer-hy")}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.ja.min.js: -------------------------------------------------------------------------------- 1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.ja=function(){this.pipeline.reset(),this.pipeline.add(e.ja.trimmer,e.ja.stopWordFilter,e.ja.stemmer),r?this.tokenizer=e.ja.tokenizer:(e.tokenizer&&(e.tokenizer=e.ja.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.ja.tokenizer))};var t=new e.TinySegmenter;e.ja.tokenizer=function(i){var n,o,s,p,a,u,m,l,c,f;if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t.toLowerCase()):t.toLowerCase()});for(o=i.toString().toLowerCase().replace(/^\s+/,""),n=o.length-1;n>=0;n--)if(/\S/.test(o.charAt(n))){o=o.substring(0,n+1);break}for(a=[],s=o.length,c=0,l=0;c<=s;c++)if(u=o.charAt(c),m=c-l,u.match(/\s/)||c==s){if(m>0)for(p=t.segment(o.slice(l,c)).filter(function(e){return!!e}),f=l,n=0;n=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor++,!0}return!1},in_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e<=s&&e>=i&&(e-=i,t[e>>3]&1<<(7&e)))return this.cursor--,!0}return!1},out_grouping:function(t,i,s){if(this.cursors||e>3]&1<<(7&e)))return this.cursor++,!0}return!1},out_grouping_b:function(t,i,s){if(this.cursor>this.limit_backward){var e=r.charCodeAt(this.cursor-1);if(e>s||e>3]&1<<(7&e)))return this.cursor--,!0}return!1},eq_s:function(t,i){if(this.limit-this.cursor>1),f=0,l=o0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n+_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n+_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},find_among_b:function(t,i){for(var s=0,e=i,n=this.cursor,u=this.limit_backward,o=0,h=0,c=!1;;){for(var a=s+(e-s>>1),f=0,l=o=0;m--){if(n-l==u){f=-1;break}if(f=r.charCodeAt(n-1-l)-_.s[m])break;l++}if(f<0?(e=a,h=l):(s=a,o=l),e-s<=1){if(s>0||e==s||c)break;c=!0}}for(;;){var _=t[s];if(o>=_.s_size){if(this.cursor=n-_.s_size,!_.method)return _.result;var b=_.method();if(this.cursor=n-_.s_size,b)return _.result}if((s=_.substring_i)<0)return 0}},replace_s:function(t,i,s){var e=s.length-(i-t),n=r.substring(0,t),u=r.substring(i);return r=n+s+u,this.limit+=e,this.cursor>=i?this.cursor+=e:this.cursor>t&&(this.cursor=t),e},slice_check:function(){if(this.bra<0||this.bra>this.ket||this.ket>this.limit||this.limit>r.length)throw"faulty slice operation"},slice_from:function(r){this.slice_check(),this.replace_s(this.bra,this.ket,r)},slice_del:function(){this.slice_from("")},insert:function(r,t,i){var s=this.replace_s(r,t,i);r<=this.bra&&(this.bra+=s),r<=this.ket&&(this.ket+=s)},slice_to:function(){return this.slice_check(),r.substring(this.bra,this.ket)},eq_v_b:function(r){return this.eq_s_b(r.length,r)}}}},r.trimmerSupport={generateTrimmer:function(r){var t=new RegExp("^[^"+r+"]+"),i=new RegExp("[^"+r+"]+$");return function(r){return"function"==typeof r.update?r.update(function(r){return r.replace(t,"").replace(i,"")}):r.replace(t,"").replace(i,"")}}}}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.ta.min.js: -------------------------------------------------------------------------------- 1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.ta=function(){this.pipeline.reset(),this.pipeline.add(e.ta.trimmer,e.ta.stopWordFilter,e.ta.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.ta.stemmer))},e.ta.wordCharacters="஀-உஊ-ஏஐ-ஙச-ட஠-னப-யர-ஹ஺-ிீ-௉ொ-௏ௐ-௙௚-௟௠-௩௪-௯௰-௹௺-௿a-zA-Za-zA-Z0-90-9",e.ta.trimmer=e.trimmerSupport.generateTrimmer(e.ta.wordCharacters),e.Pipeline.registerFunction(e.ta.trimmer,"trimmer-ta"),e.ta.stopWordFilter=e.generateStopWordFilter("அங்கு அங்கே அது அதை அந்த அவர் அவர்கள் அவள் அவன் அவை ஆக ஆகவே ஆகையால் ஆதலால் ஆதலினால் ஆனாலும் ஆனால் இங்கு இங்கே இது இதை இந்த இப்படி இவர் இவர்கள் இவள் இவன் இவை இவ்வளவு உனக்கு உனது உன் உன்னால் எங்கு எங்கே எது எதை எந்த எப்படி எவர் எவர்கள் எவள் எவன் எவை எவ்வளவு எனக்கு எனது எனவே என் என்ன என்னால் ஏது ஏன் தனது தன்னால் தானே தான் நாங்கள் நாம் நான் நீ நீங்கள்".split(" ")),e.ta.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.ta.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.ta.stemmer,"stemmer-ta"),e.Pipeline.registerFunction(e.ta.stopWordFilter,"stopWordFilter-ta")}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.te.min.js: -------------------------------------------------------------------------------- 1 | !function(e,t){"function"==typeof define&&define.amd?define(t):"object"==typeof exports?module.exports=t():t()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.te=function(){this.pipeline.reset(),this.pipeline.add(e.te.trimmer,e.te.stopWordFilter,e.te.stemmer),this.searchPipeline&&(this.searchPipeline.reset(),this.searchPipeline.add(e.te.stemmer))},e.te.wordCharacters="ఀ-ఄఅ-ఔక-హా-ౌౕ-ౖౘ-ౚౠ-ౡౢ-ౣ౦-౯౸-౿఼ఽ్ౝ౷౤౥",e.te.trimmer=e.trimmerSupport.generateTrimmer(e.te.wordCharacters),e.Pipeline.registerFunction(e.te.trimmer,"trimmer-te"),e.te.stopWordFilter=e.generateStopWordFilter("అందరూ అందుబాటులో అడగండి అడగడం అడ్డంగా అనుగుణంగా అనుమతించు అనుమతిస్తుంది అయితే ఇప్పటికే ఉన్నారు ఎక్కడైనా ఎప్పుడు ఎవరైనా ఎవరో ఏ ఏదైనా ఏమైనప్పటికి ఒక ఒకరు కనిపిస్తాయి కాదు కూడా గా గురించి చుట్టూ చేయగలిగింది తగిన తర్వాత దాదాపు దూరంగా నిజంగా పై ప్రకారం ప్రక్కన మధ్య మరియు మరొక మళ్ళీ మాత్రమే మెచ్చుకో వద్ద వెంట వేరుగా వ్యతిరేకంగా సంబంధం".split(" ")),e.te.stemmer=function(){return function(e){return"function"==typeof e.update?e.update(function(e){return e}):e}}();var t=e.wordcut;t.init(),e.te.tokenizer=function(r){if(!arguments.length||null==r||void 0==r)return[];if(Array.isArray(r))return r.map(function(t){return isLunr2?new e.Token(t.toLowerCase()):t.toLowerCase()});var i=r.toString().toLowerCase().replace(/^\s+/,"");return t.cut(i).split("|")},e.Pipeline.registerFunction(e.te.stemmer,"stemmer-te"),e.Pipeline.registerFunction(e.te.stopWordFilter,"stopWordFilter-te")}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.th.min.js: -------------------------------------------------------------------------------- 1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var r="2"==e.version[0];e.th=function(){this.pipeline.reset(),this.pipeline.add(e.th.trimmer),r?this.tokenizer=e.th.tokenizer:(e.tokenizer&&(e.tokenizer=e.th.tokenizer),this.tokenizerFn&&(this.tokenizerFn=e.th.tokenizer))},e.th.wordCharacters="[฀-๿]",e.th.trimmer=e.trimmerSupport.generateTrimmer(e.th.wordCharacters),e.Pipeline.registerFunction(e.th.trimmer,"trimmer-th");var t=e.wordcut;t.init(),e.th.tokenizer=function(i){if(!arguments.length||null==i||void 0==i)return[];if(Array.isArray(i))return i.map(function(t){return r?new e.Token(t):t});var n=i.toString().replace(/^\s+/,"");return t.cut(n).split("|")}}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.vi.min.js: -------------------------------------------------------------------------------- 1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r():r()(e.lunr)}(this,function(){return function(e){if(void 0===e)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===e.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");e.vi=function(){this.pipeline.reset(),this.pipeline.add(e.vi.stopWordFilter,e.vi.trimmer)},e.vi.wordCharacters="[A-Za-ẓ̀͐́͑̉̃̓ÂâÊêÔôĂ-ăĐ-đƠ-ơƯ-ư]",e.vi.trimmer=e.trimmerSupport.generateTrimmer(e.vi.wordCharacters),e.Pipeline.registerFunction(e.vi.trimmer,"trimmer-vi"),e.vi.stopWordFilter=e.generateStopWordFilter("là cái nhưng mà".split(" "))}}); -------------------------------------------------------------------------------- /docs/assets/javascripts/lunr/min/lunr.zh.min.js: -------------------------------------------------------------------------------- 1 | !function(e,r){"function"==typeof define&&define.amd?define(r):"object"==typeof exports?module.exports=r(require("@node-rs/jieba")):r()(e.lunr)}(this,function(e){return function(r,t){if(void 0===r)throw new Error("Lunr is not present. Please include / require Lunr before this script.");if(void 0===r.stemmerSupport)throw new Error("Lunr stemmer support is not present. Please include / require Lunr stemmer support before this script.");var i="2"==r.version[0];r.zh=function(){this.pipeline.reset(),this.pipeline.add(r.zh.trimmer,r.zh.stopWordFilter,r.zh.stemmer),i?this.tokenizer=r.zh.tokenizer:(r.tokenizer&&(r.tokenizer=r.zh.tokenizer),this.tokenizerFn&&(this.tokenizerFn=r.zh.tokenizer))},r.zh.tokenizer=function(n){if(!arguments.length||null==n||void 0==n)return[];if(Array.isArray(n))return n.map(function(e){return i?new r.Token(e.toLowerCase()):e.toLowerCase()});t&&e.load(t);var o=n.toString().trim().toLowerCase(),s=[];e.cut(o,!0).forEach(function(e){s=s.concat(e.split(" "))}),s=s.filter(function(e){return!!e});var u=0;return s.map(function(e,t){if(i){var n=o.indexOf(e,u),s={};return s.position=[n,e.length],s.index=t,u=n,new r.Token(e,s)}return e})},r.zh.wordCharacters="\\w一-龥",r.zh.trimmer=r.trimmerSupport.generateTrimmer(r.zh.wordCharacters),r.Pipeline.registerFunction(r.zh.trimmer,"trimmer-zh"),r.zh.stemmer=function(){return function(e){return e}}(),r.Pipeline.registerFunction(r.zh.stemmer,"stemmer-zh"),r.zh.stopWordFilter=r.generateStopWordFilter("的 一 不 在 人 有 是 为 為 以 于 於 上 他 而 后 後 之 来 來 及 了 因 下 可 到 由 这 這 与 與 也 此 但 并 並 个 個 其 已 无 無 小 我 们 們 起 最 再 今 去 好 只 又 或 很 亦 某 把 那 你 乃 它 吧 被 比 别 趁 当 當 从 從 得 打 凡 儿 兒 尔 爾 该 該 各 给 給 跟 和 何 还 還 即 几 幾 既 看 据 據 距 靠 啦 另 么 麽 每 嘛 拿 哪 您 凭 憑 且 却 卻 让 讓 仍 啥 如 若 使 谁 誰 虽 雖 随 隨 同 所 她 哇 嗡 往 些 向 沿 哟 喲 用 咱 则 則 怎 曾 至 致 着 著 诸 諸 自".split(" ")),r.Pipeline.registerFunction(r.zh.stopWordFilter,"stopWordFilter-zh")}}); -------------------------------------------------------------------------------- /docs/assets/stylesheets/palette.06af60db.min.css.map: -------------------------------------------------------------------------------- 1 | {"version":3,"sources":["src/templates/assets/stylesheets/palette/_scheme.scss","../../../../src/templates/assets/stylesheets/palette.scss","src/templates/assets/stylesheets/palette/_accent.scss","src/templates/assets/stylesheets/palette/_primary.scss","src/templates/assets/stylesheets/utilities/_break.scss"],"names":[],"mappings":"AA2BA,cAGE,6BAME,sDAAA,CACA,6DAAA,CACA,+DAAA,CACA,gEAAA,CACA,mDAAA,CACA,6DAAA,CACA,+DAAA,CACA,gEAAA,CAGA,mDAAA,CACA,gDAAA,CAGA,0BAAA,CACA,mCAAA,CAGA,iCAAA,CACA,kCAAA,CACA,mCAAA,CACA,mCAAA,CACA,kCAAA,CACA,iCAAA,CACA,+CAAA,CACA,6DAAA,CACA,gEAAA,CACA,4DAAA,CACA,4DAAA,CACA,6DAAA,CAGA,6CAAA,CAGA,+CAAA,CAGA,uDAAA,CACA,6DAAA,CACA,2DAAA,CAGA,iCAAA,CAGA,yDAAA,CACA,iEAAA,CAGA,mDAAA,CACA,mDAAA,CAGA,qDAAA,CACA,uDAAA,CAGA,8DAAA,CAKA,8DAAA,CAKA,0DAAA,CAvEA,iBCeF,CD6DE,kHAEE,YC3DJ,CDkFE,yDACE,4BChFJ,CD+EE,2DACE,4BC7EJ,CD4EE,gEACE,4BC1EJ,CDyEE,2DACE,4BCvEJ,CDsEE,yDACE,4BCpEJ,CDmEE,0DACE,4BCjEJ,CDgEE,gEACE,4BC9DJ,CD6DE,0DACE,4BC3DJ,CD0DE,2OACE,4BC/CJ,CDsDA,+FAGE,iCCpDF,CACF,CC/CE,2BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD2CN,CCrDE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDkDN,CC5DE,8BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDyDN,CCnEE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDgEN,CC1EE,8BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDuEN,CCjFE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD8EN,CCxFE,kCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDqFN,CC/FE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD4FN,CCtGE,4BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDmGN,CC7GE,6BACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCD0GN,CCpHE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDiHN,CC3HE,4BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCD2HN,CClIE,8BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDkIN,CCzIE,6BACE,yBAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDyIN,CChJE,8BACE,4BAAA,CACA,2CAAA,CAIE,8BAAA,CACA,qCDgJN,CCvJE,mCACE,4BAAA,CACA,2CAAA,CAOE,yBAAA,CACA,qCDoJN,CEzJE,4BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsJN,CEjKE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8JN,CEzKE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsKN,CEjLE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8KN,CEzLE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsLN,CEjME,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8LN,CEzME,mCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsMN,CEjNE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8MN,CEzNE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsNN,CEjOE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8NN,CEzOE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsON,CEjPE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFiPN,CEzPE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFyPN,CEjQE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFiQN,CEzQE,+BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAIE,+BAAA,CACA,sCFyQN,CEjRE,oCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCF8QN,CEzRE,8BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCFsRN,CEjSE,6BACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCAAA,CAKA,4BF0RN,CE1SE,kCACE,6BAAA,CACA,oCAAA,CACA,mCAAA,CAOE,0BAAA,CACA,sCAAA,CAKA,4BFmSN,CEpRE,sEACE,4BFuRJ,CExRE,+DACE,4BF2RJ,CE5RE,iEACE,4BF+RJ,CEhSE,gEACE,4BFmSJ,CEpSE,iEACE,4BFuSJ,CE9RA,8BACE,mDAAA,CACA,4DAAA,CACA,0DAAA,CACA,oDAAA,CACA,2DAAA,CAGA,4BF+RF,CE5RE,yCACE,+BF8RJ,CE3RI,kDAEE,0CAAA,CACA,sCAAA,CAFA,mCF+RN,CG3MI,mCD1EA,+CACE,8CFwRJ,CErRI,qDACE,8CFuRN,CElRE,iEACE,mCFoRJ,CACF,CGtNI,sCDvDA,uCACE,oCFgRJ,CACF,CEvQA,8BACE,kDAAA,CACA,4DAAA,CACA,wDAAA,CACA,oDAAA,CACA,6DAAA,CAGA,4BFwQF,CErQE,yCACE,+BFuQJ,CEpQI,kDAEE,0CAAA,CACA,sCAAA,CAFA,mCFwQN,CEjQE,yCACE,6CFmQJ,CG5NI,0CDhCA,8CACE,gDF+PJ,CACF,CGjOI,0CDvBA,iFACE,6CF2PJ,CACF,CGzPI,sCDKA,uCACE,6CFuPJ,CACF","file":"palette.css"} -------------------------------------------------------------------------------- /docs/css/fonts.css: -------------------------------------------------------------------------------- 1 | .md-typeset code, 2 | .md-typeset kbd, 3 | .md-typeset pre { 4 | font-feature-settings: "kern", "liga"; 5 | font-variant-ligatures: normal; 6 | } 7 | 8 | :root{ 9 | --md-text-font:"Roboto"; 10 | --md-code-font:"" 11 | } 12 | -------------------------------------------------------------------------------- /docs/css/jupyter-notebook.css: -------------------------------------------------------------------------------- 1 | .jp-RenderedHTMLCommon p { 2 | margin: 0pt; 3 | } 4 | 5 | .jupyter-wrapper .jp-CodeCell .jp-Cell-inputWrapper .jp-InputPrompt { 6 | display: none; 7 | } 8 | 9 | .jupyter-wrapper .jp-CodeCell .jp-Cell-outputWrapper .jp-OutputPrompt { 10 | display: none; 11 | } 12 | 13 | .jupyter-wrapper .jp-OutputArea-output pre { 14 | border-left: solid 5px #e0e0e0; 15 | padding-left: 5pt; 16 | } 17 | -------------------------------------------------------------------------------- /docs/css/material.css: -------------------------------------------------------------------------------- 1 | /* More space at the bottom of the page. */ 2 | .md-main__inner { 3 | margin-bottom: 1.5rem; 4 | } 5 | -------------------------------------------------------------------------------- /docs/css/mkdocstrings.css: -------------------------------------------------------------------------------- 1 | /* Indentation. */ 2 | div.doc-contents:not(.first) { 3 | padding-left: 25px; 4 | border-left: 4px solid rgba(230, 230, 230); 5 | margin-bottom: 80px; 6 | } 7 | 8 | /* Avoid breaking parameters name, etc. in table cells. */ 9 | td code { 10 | word-break: normal !important; 11 | } 12 | -------------------------------------------------------------------------------- /docs/css/style.css: -------------------------------------------------------------------------------- 1 | /* Mark external links as such (also in nav) */ 2 | a.external:hover::after, a.md-nav__link[href^="https:"]:hover::after { 3 | /* https://primer.style/octicons/link-external-16 */ 4 | background-image: url('data:image/svg+xml,'); 5 | height: 0.8em; 6 | width: 0.8em; 7 | margin-left: 0.2em; 8 | content: ' '; 9 | display: inline-block; 10 | } 11 | 12 | /* More space at the bottom of the page */ 13 | .md-main__inner { 14 | margin-bottom: 1.5rem; 15 | } 16 | -------------------------------------------------------------------------------- /docs/css/tables_style.css: -------------------------------------------------------------------------------- 1 | th, td { 2 | border: 1px solid var(--md-typeset-table-color); 3 | border-spacing: 0; 4 | border-bottom: none; 5 | border-left: none; 6 | border-top: none; 7 | } 8 | 9 | th { 10 | background:var(--md-primary-fg-color); 11 | color:white; 12 | } 13 | 14 | .md-typeset table:not([class]) th { 15 | font-weight: 200; 16 | } 17 | 18 | .md-typeset__table { 19 | line-height: 1; 20 | } 21 | 22 | .md-typeset__table table:not([class]) { 23 | font-size: .74rem; 24 | border-right: none; 25 | } 26 | 27 | .md-typeset__table table:not([class]) td, 28 | .md-typeset__table table:not([class]) th { 29 | padding: 9px; 30 | } 31 | 32 | /* light mode alternating table bg colors */ 33 | .md-typeset__table tr:nth-child(2n) { 34 | background-color: #f8f8f8; 35 | } 36 | 37 | /* dark mode alternating table bg colors */ 38 | [data-md-color-scheme="slate"] .md-typeset__table tr:nth-child(2n) { 39 | background-color: hsla(var(--md-hue),25%,25%,1) 40 | } 41 | -------------------------------------------------------------------------------- /docs/datasets/datasets.csv: -------------------------------------------------------------------------------- 1 | Name,Classes, PDF, Data, Code, Auto-download 2 | ucdavis-icdm19,5,[pdf](https://arxiv.org/pdf/1812.09761.pdf), [data](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), [code](https://github.com/shrezaei/Semi-supervised-Learning-QUIC-), :octicons-x-12: 3 | mirage19, 20, [pdf](http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-2019.html), -, :heavy_check_mark: 4 | mirage22, 9, [pdf](http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf), [data](https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html), -, :heavy_check_mark: 5 | utmobilenet21, 17, [pdf](https://ieeexplore.ieee.org/abstract/document/9490678/), [data](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), [code](https://github.com/YuqiangHeng/UTMobileNetTraffic2021), :octicons-x-12: 6 | -------------------------------------------------------------------------------- /docs/figs/aim_log1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log1.png -------------------------------------------------------------------------------- /docs/figs/aim_log2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log2.png -------------------------------------------------------------------------------- /docs/figs/aim_log3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_log3.png -------------------------------------------------------------------------------- /docs/figs/aim_run1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run1.png -------------------------------------------------------------------------------- /docs/figs/aim_run2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run2.png -------------------------------------------------------------------------------- /docs/figs/aim_run3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/aim_run3.png -------------------------------------------------------------------------------- /docs/figs/dataset_properties_mirage19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_mirage19.png -------------------------------------------------------------------------------- /docs/figs/dataset_properties_mirage22.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_mirage22.png -------------------------------------------------------------------------------- /docs/figs/dataset_properties_ucdavis-icdm19.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_ucdavis-icdm19.png -------------------------------------------------------------------------------- /docs/figs/dataset_properties_utmobilenet21.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/figs/dataset_properties_utmobilenet21.png -------------------------------------------------------------------------------- /docs/github-mark/github-mark-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/github-mark/github-mark-white.png -------------------------------------------------------------------------------- /docs/github-mark/github-mark-white.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/github-mark/github-mark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/github-mark/github-mark.png -------------------------------------------------------------------------------- /docs/github-mark/github-mark.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/index.md.DEPRECATED: -------------------------------------------------------------------------------- 1 | This website documents code and data artifacts related to the IMC23 submission #132 titled 2 | 3 | !!! quote "" 4 | __Contrastive Learning and Data Augmentation in Traffic Classification via a Flowpic Representation__ 5 | *Replicating and Reproducing “A Few Shots Traffic Classification with mini-FlowPic Augmentations” 6 | from IMC’22* 7 | 8 | Our submission investigates the role of data 9 | augmentation by using both supervised 10 | and contrastive learning techniques 11 | across [4 datasets](datasets/install). 12 | 13 | It replicates and reproduces the following paper 14 | from the IMC22 program 15 | 16 | 17 | ``` 18 | @inproceedings{10.1145/3517745.3561436, 19 | author = {Horowicz, Eyal and Shapira, Tal and Shavitt, Yuval}, 20 | title = {A Few Shots Traffic Classification with Mini-FlowPic Augmentations}, 21 | year = {2022}, 22 | isbn = {9781450392594}, 23 | publisher = {Association for Computing Machinery}, 24 | address = {New York, NY, USA}, 25 | url = {https://doi.org/10.1145/3517745.3561436}, 26 | doi = {10.1145/3517745.3561436}, 27 | booktitle = {Proceedings of the 22nd ACM Internet Measurement Conference}, 28 | pages = {647–654}, 29 | numpages = {8}, 30 | location = {Nice, France}, 31 | series = {IMC '22} 32 | } 33 | ``` 34 | 35 | We adopt the same traffic representation used in :material-file-document-outline:`imc22-paper`, 36 | namely a Flowpic -- a summarization of the packet size time series of a flow by means of 37 | frequency histograms extracted from consecutive time windows of the flow -- 38 | applied on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19). 39 | 40 | In the first part of the submission we investigate how augmentations 41 | affect classification performance -- the study considers 3 image transformations (*rotation, 42 | color jitter, horizontal flip*) and 3 time series transformations (*time shift, packet drop, change rtt*) 43 | applied to packets timestamps -- when used either in a fully supervised setting or via 44 | contrastive learning. 45 | 46 | !!! info "Key takeaways from reproducibility" 47 | 1. We can only partially reproduce the results from :material-file-document-outline:`imc22-paper` on [`ucdavis-icdm19`](datasets/#ucdavis-icdm19). 48 | Specifically, we uncover a data shift present in the dataset itself which justifies our results; 49 | yet, we cannot comment on why this was not detected in :material-file-document-outline:`imc22-paper`. 50 | 51 | 2. Simply based on the [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) dataset, and differently 52 | from the argumentation presented in :material-file-document-outline:`imc22-paper`, 53 | we do not find statistical significance differences across the different augmentations. 54 | 55 | 3. Contrastive learning can help to "bootstrap" a model in an unsupervised fashion, yet 56 | relying on more samples is beneficial to boost performance. 57 | 58 | Then, in the second part of the submission we replicate the 59 | analysis testing the same 6 augmentations across 3 other datasets. 60 | 61 | !!! info "Key takeaways from replicability" 62 | Using multiple datasets allow to confirm the argument of the :material-file-document-outline:`imc22-paper`, i.e., 63 | *Change RTT* augmentation used in [`ucdavis-icdm19`](datasets/#ucdavis-icdm19) 64 | is superior to the alternative transformations presented in the paper. 65 | 66 | 67 | ## Website conventions 68 | 69 | * :material-file-document-outline:`imc22-paper` is used to the reference the replicated/reproduced paper. 70 | 71 | * WIP (Work in progress) and :construction: suggest documentation that is incomplete or not yet available. 72 | 73 | * :material-link-off: suggests a link is expected to be added but is not yet available. 74 | -------------------------------------------------------------------------------- /docs/main.html.DEPRECATED: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if page.nb_url %} 6 | 7 | {% include ".icons/simple/jupyter.svg" %} 8 | {% include ".icons/material/download.svg" %} 9 | 10 | {% endif %} 11 | 12 | {{ super() }} 13 | 14 | 35 | 36 | 45 | 46 | 47 | {% endblock content %} 48 | -------------------------------------------------------------------------------- /docs/modeling/figs/aim_home-page.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_home-page.png -------------------------------------------------------------------------------- /docs/modeling/figs/aim_log1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log1.png -------------------------------------------------------------------------------- /docs/modeling/figs/aim_log2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log2.png -------------------------------------------------------------------------------- /docs/modeling/figs/aim_log3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_log3.png -------------------------------------------------------------------------------- /docs/modeling/figs/aim_run1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run1.png -------------------------------------------------------------------------------- /docs/modeling/figs/aim_run2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run2.png -------------------------------------------------------------------------------- /docs/modeling/figs/aim_run3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/modeling/figs/aim_run3.png -------------------------------------------------------------------------------- /docs/objects.inv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/objects.inv -------------------------------------------------------------------------------- /docs/overrides/arrow-right-solid.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/overrides/github-mark/github-mark-white.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/overrides/github-mark/github-mark-white.png -------------------------------------------------------------------------------- /docs/overrides/github-mark/github-mark-white.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/overrides/github-mark/github-mark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/overrides/github-mark/github-mark.png -------------------------------------------------------------------------------- /docs/overrides/github-mark/github-mark.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /docs/overrides/main.html: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if page.nb_url %} 6 | 7 | {% include ".icons/simple/jupyter.svg" %} 8 | {% include ".icons/material/download.svg" %} 9 | 10 | {% endif %} 11 | 12 | {{ super() }} 13 | 14 | 35 | 36 | 45 | 46 | 47 | {% endblock content %} 48 | -------------------------------------------------------------------------------- /docs/overrides/main.html.DEPRECATED: -------------------------------------------------------------------------------- 1 | {% extends "base.html" %} 2 | 3 | {% block content %} 4 | 5 | {% if page.nb_url %} 6 | 7 | {% include ".icons/simple/jupyter.svg" %} 8 | {% include ".icons/material/download.svg" %} 9 | 10 | {% endif %} 11 | 12 | {{ super() }} 13 | 14 | 35 | 36 | 45 | 46 | 47 | {% endblock content %} 48 | -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure10b_icdm_finetuning_per_class_metrics_on_human_files/figure10b_icdm_finetuning_per_class_metrics_on_human_5_0.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure11_dropout_impact_supervised_setting_files/figure11_dropout_impact_supervised_setting_15_1.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure1_flowpic_example_files/figure1_flowpic_example_8_0.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure3_confusion_matrix_supervised_setting_files/figure3_confusion_matrix_supervised_setting_5_0.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure4_ucdavis_per_class_average_flowpic_files/figure4_ucdavis_per_class_average_flowpic_12_1.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure5_ucdavis_augmentations_comparison_files/figure5_ucdavis_augmentations_comparison_6_1.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure6_augmentations_comparison_across_datasets_critical_distance_files/figure6_augmentations_comparison_across_datasets_critical_distance_6_1.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure7_augmentations_comparison_across_datasets_average_rank_files/figure7_augmentations_comparison_across_datasets_average_rank_8_0.png -------------------------------------------------------------------------------- /docs/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/papers/imc23/notebooks/figure8_ucdavis_kde_on_pkts_size_files/figure8_ucdavis_kde_on_pkts_size_10_0.png -------------------------------------------------------------------------------- /docs/sitemap.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | -------------------------------------------------------------------------------- /docs/sitemap.xml.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/tcbenchstack/tcbench/45b77da46d7e3796c9f67981bede30a42c0c1071/docs/sitemap.xml.gz -------------------------------------------------------------------------------- /notebooks/imc23/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 tcbenchstack team 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # pyproject.toml 2 | 3 | [build-system] 4 | requires = ["setuptools>=61.0.0", "wheel"] 5 | build-backend = "setuptools.build_meta" 6 | 7 | [project] 8 | name = "tcbench" 9 | version = "0.0.22" 10 | description = "A ML/DL framework for Traffic Classification" 11 | readme = "README.md" 12 | authors = [{ name = "Alessandro Finamore", email = "alessandro.finamore@huawei.com" }] 13 | license = { file = "LICENSE" } 14 | classifiers = [ 15 | "License :: OSI Approved :: MIT License", 16 | "Programming Language :: Python", 17 | "Programming Language :: Python :: 3", 18 | ] 19 | keywords = ["machine learning", "deep learning", "traffic classification", "time series"] 20 | dependencies = [ 21 | "aim == 3.17.4", 22 | "autorank", 23 | "click", 24 | "dask <= 2023.5.1", 25 | "distributed", 26 | "numpy", 27 | "pandas==2.0.2", 28 | "pyarrow==12.0.0", 29 | "pyyaml", 30 | "requests", 31 | "rich", 32 | "rich-click", 33 | "scikit-learn", 34 | "statsmodels", 35 | "torch==2.0.1", 36 | "torchsummary", 37 | "torchvision==0.15.2", 38 | "xgboost==1.7.5" 39 | ] 40 | requires-python = ">=3.9" 41 | 42 | [project.optional-dependencies] 43 | dev = [ 44 | "black", 45 | "bumpver", 46 | "click-plugins", 47 | "isort", 48 | "jupyterlab", 49 | "matplotlib", 50 | "mkdocs", 51 | "mkdocs-autorefs", 52 | "mkdocs-glightbox", 53 | "mkdocs-jupyter", 54 | "mkdocs-material", 55 | "mkdocs-material-extensions", 56 | "mkdocs-table-reader-plugin", 57 | "mkdocstrings", 58 | "mkdocstrings-python", 59 | "pip-tools", 60 | "pytest", 61 | "pytest-helpers-namespace", 62 | "seaborn" 63 | ] 64 | 65 | [project.urls] 66 | Homepage = "https://tcbenchstack.github.io/tcbench/" 67 | 68 | [project.scripts] 69 | tcbench = "tcbench.cli.main:main" 70 | 71 | [tool.bumpver] 72 | current_version = "0.0.22" 73 | version_pattern = "MAJOR.MINOR.PATCH" 74 | commit_message = "bump version {old_version} -> {new_version}" 75 | tag_message = "{new_version}" 76 | tag_scope = "default" 77 | pre_commit_hook = "" 78 | post_commit_hook = "" 79 | commit = true 80 | tag = true 81 | push = false 82 | 83 | [tool.bumpver.file_patterns] 84 | "pyproject.toml" = ['current_version = "{version}"', 'version = "{version}"'] 85 | "src/tcbench/__init__.py" = ["{version}"] 86 | -------------------------------------------------------------------------------- /src/tcbench/FIGSHARE_RESOURCES.yml: -------------------------------------------------------------------------------- 1 | imc23: 2 | notebooks: 3 | url: "https://figshare.com/ndownloader/files/42550111" 4 | md5: "224764907e634fcab3ae1e20bc58bbbf" 5 | dst_folder: "./" 6 | 7 | pytest_resources: 8 | url: "https://figshare.com/ndownloader/files/42538741" 9 | md5: "3a2482ad6359ba48be8728221e42f727" 10 | dst_folder: "./tests" 11 | 12 | ml_artifacts: 13 | url: "https://figshare.com/ndownloader/files/42538675" 14 | md5: "a4b53b2d0b95995c5f14bbf2f8489c7c" 15 | dst_folder: "notebooks/imc23" 16 | -------------------------------------------------------------------------------- /src/tcbench/__init__.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | 3 | __version__ = "0.0.22" 4 | 5 | DEFAULT_AIM_REPO = pathlib.Path("./aim-repo") 6 | DEFAULT_ARTIFACTS_FOLDER = pathlib.Path("./aim-repo/artifacts") 7 | 8 | DEFAULT_CAMPAIGN_AUGATLOAD_AUGMENTATIONS = ( 9 | "noaug", 10 | "rotate", 11 | "horizontalflip", 12 | "colorjitter", 13 | "packetloss", 14 | "changertt", 15 | "timeshift", 16 | ) 17 | DEFAULT_CAMPAIGN_AUGATLOAD_SEEDS = (12345, 42, 666) 18 | DEFAULT_CAMPAIGN_AUGATLOAD_FLOWPICDIMS = (32, 64, 1500) 19 | DEFAULT_CAMPAIGN_AUGATLOAD_PKTSERIESLEN = (10, 30) 20 | 21 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_FLOWPICDIMS = (32, 64, 1500) 22 | DEFAULT_CAMPAING_CONTRALEARNANDFINETUNE_SEEDS_CONTRALEARN = (12345, 1, 2, 3, 4) 23 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_SEEDS_FINETUNE = (12345, 1, 2, 3, 4) 24 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_AUGMENTATIONS = "changertt,timeshift" 25 | DEFAULT_CAMPAIGN_CONTRALEARNANDFINETUNE_VALID_AUGMENTATIONS = tuple([ 26 | aug_name 27 | for aug_name in DEFAULT_CAMPAIGN_AUGATLOAD_AUGMENTATIONS 28 | if aug_name != "noaug" 29 | ]) 30 | 31 | from tcbench.libtcdatasets.datasets_utils import ( 32 | get_datasets_root_folder, 33 | get_dataset_folder, 34 | DATASETS, 35 | load_parquet, 36 | ) 37 | 38 | from tcbench.modeling import ( 39 | MODELING_DATASET_TYPE, 40 | MODELING_INPUT_REPR_TYPE, 41 | MODELING_METHOD_TYPE, 42 | ) 43 | -------------------------------------------------------------------------------- /src/tcbench/cli/__init__.py: -------------------------------------------------------------------------------- 1 | def get_rich_console(): 2 | from rich.console import Console 3 | from rich.theme import Theme 4 | import sys 5 | import pathlib 6 | 7 | curr_module = sys.modules[__name__] 8 | folder_module = pathlib.Path(curr_module.__file__).parent 9 | return Console(theme=Theme.read(folder_module / "rich.theme")) 10 | 11 | 12 | console = get_rich_console() 13 | -------------------------------------------------------------------------------- /src/tcbench/cli/clickutils.py: -------------------------------------------------------------------------------- 1 | import rich_click as click 2 | 3 | from typing import List, Dict, Any 4 | 5 | from tcbench import DATASETS 6 | from tcbench.modeling import MODELING_METHOD_TYPE, MODELING_INPUT_REPR_TYPE 7 | 8 | 9 | def _create_choice(enumeration): 10 | return click.Choice(list(map(lambda x: x.value, enumeration)), case_sensitive=False) 11 | 12 | 13 | def _create_choice_callback(enumeration): 14 | return lambda c, p, v: enumeration.from_str(v) 15 | 16 | 17 | CLICK_TYPE_DATASET_NAME = _create_choice(DATASETS) 18 | CLICK_CALLBACK_DATASET_NAME = _create_choice_callback(DATASETS) 19 | 20 | CLICK_TYPE_METHOD_NAME = _create_choice(MODELING_METHOD_TYPE) 21 | CLICK_CALLBACK_METHOD_NAME = _create_choice_callback(MODELING_METHOD_TYPE) 22 | 23 | CLICK_TYPE_INPUT_REPR = _create_choice(MODELING_INPUT_REPR_TYPE) 24 | CLICK_CALLBACK_INPUT_REPR = _create_choice_callback(MODELING_INPUT_REPR_TYPE) 25 | 26 | CLICK_CALLBACK_TOINT = lambda c, p, v: int(v) 27 | 28 | 29 | def compose_help_string_from_list(items:List[str]) -> str: 30 | """Compose a string from a list""" 31 | return "\[" + f'{"|".join(items)}' + "]." 32 | 33 | 34 | def convert_params_dict_to_list(params:Dict[str,Any], skip_params:List[str]=None) -> List[str]: 35 | """Convert a dictionary of parameters (name,value) pairs into a list of "-- """ 36 | if skip_params is None: 37 | skip_params = set() 38 | 39 | l = [] 40 | for par_name, par_value in params.items(): 41 | if par_name in skip_params or par_value == False or par_value is None: 42 | continue 43 | par_name = par_name.replace("_", "-") 44 | if par_value == True: 45 | l.append(f"--{par_name}") 46 | else: 47 | l.append(f"--{par_name} {str(par_value)}") 48 | 49 | return l 50 | 51 | 52 | def help_append_choices(help_string:str, values:List[str]) -> str: 53 | """Append to an help string a styled version of a list of values""" 54 | text = "|".join([f"[bold]{text}[/bold]" for text in values]) 55 | return f"{help_string} [yellow]Choices: [{text}][/yellow]" 56 | -------------------------------------------------------------------------------- /src/tcbench/cli/command_fetchartifacts.py: -------------------------------------------------------------------------------- 1 | import rich_click as click 2 | 3 | import pathlib 4 | import shutil 5 | import tempfile 6 | 7 | from tcbench.cli import clickutils 8 | from tcbench.cli import console 9 | 10 | click.rich_click.SHOW_ARGUMENTS = True 11 | click.rich_click.USE_RICH_MARKUP = True 12 | 13 | FIGSHARE_RESOURCES_FNAME = "FIGSHARE_RESOURCES.yml" 14 | 15 | def _copy_file(src, dst): 16 | keyword = "installing" 17 | if pathlib.Path(dst).exists(): 18 | keyword = "overwriting" 19 | print(f"{keyword}: {dst}") 20 | shutil.copy2(src, dst) 21 | 22 | @click.command("fetch-artifacts") 23 | @click.pass_context 24 | def fetchartifacts(ctx): 25 | """Download from figshare and install all required artifacts.""" 26 | from tcbench.libtcdatasets import datasets_utils 27 | import requests 28 | 29 | check_exists = [ 30 | pathlib.Path("./src/tcbench"), 31 | pathlib.Path("./tests"), 32 | pathlib.Path("./notebooks/tutorials"), 33 | pathlib.Path("./pyproject.toml"), 34 | ] 35 | if any(not folder.exists() for folder in check_exists): 36 | raise RuntimeError("Run the command from within the cloned github repository") 37 | 38 | fname = datasets_utils._get_module_folder().parent / FIGSHARE_RESOURCES_FNAME 39 | data = datasets_utils.load_yaml(fname) 40 | for primary_key in data: 41 | for secondary_key in data[primary_key]: 42 | print(f"fetching: {primary_key} / {secondary_key}") 43 | 44 | params = data[primary_key][secondary_key] 45 | 46 | url = params["url"] 47 | dst_folder = params["dst_folder"] 48 | with tempfile.TemporaryDirectory() as tmpfolder: 49 | tmpfolder = pathlib.Path(tmpfolder) 50 | try: 51 | path = datasets_utils.download_url(url, tmpfolder) 52 | except requests.exceptions.SSLError: 53 | path = datasets_utils.download_url(url, tmpfolder, verify=False) 54 | 55 | untar_folder = tmpfolder / "__untar__" 56 | datasets_utils.untar(path, untar_folder) 57 | path.unlink() 58 | shutil.copytree(untar_folder, dst_folder, copy_function=_copy_file, dirs_exist_ok=True) 59 | -------------------------------------------------------------------------------- /src/tcbench/cli/main.py: -------------------------------------------------------------------------------- 1 | from pkg_resources import iter_entry_points 2 | 3 | import rich_click as click 4 | 5 | import tcbench 6 | from tcbench import cli 7 | from click_plugins import with_plugins 8 | 9 | 10 | @with_plugins(iter_entry_points('click_command_tree')) 11 | @click.group(invoke_without_command=True) 12 | @click.pass_context 13 | @click.option( 14 | "--version", "show_version", is_flag=True, help="Show tcbench version and exit." 15 | ) 16 | def main(ctx, show_version): 17 | if show_version: 18 | import sys 19 | cli.console.print(f"version: {tcbench.__version__}") 20 | sys.exit() 21 | 22 | 23 | from tcbench.cli.command_datasets import datasets 24 | from tcbench.cli.command_singlerun import singlerun 25 | from tcbench.cli.command_campaign import campaign 26 | from tcbench.cli.command_aimrepo import aimrepo 27 | from tcbench.cli.command_fetchartifacts import fetchartifacts 28 | 29 | main.add_command(datasets) 30 | main.add_command(singlerun) 31 | main.add_command(campaign) 32 | main.add_command(aimrepo) 33 | main.add_command(fetchartifacts) 34 | 35 | if __name__ == "__main__": 36 | main() 37 | -------------------------------------------------------------------------------- /src/tcbench/cli/rich.theme: -------------------------------------------------------------------------------- 1 | [styles] 2 | progress.description = none 3 | progress.filesize = none 4 | progress.filesize.total = none 5 | progress.download = none 6 | progress.elapsed = none 7 | progress.percentage = none 8 | progress.remaining = none 9 | progress.data.speed = none 10 | progress.spinner = none 11 | repr.ellipsis = none 12 | repr.indent = none 13 | repr.error = none 14 | repr.str = none 15 | repr.brace = none 16 | repr.comma = none 17 | repr.ipv4 = none 18 | repr.ipv6 = none 19 | repr.eui48 = none 20 | repr.eui64 = none 21 | repr.tag_start = none 22 | repr.tag_name = none 23 | repr.tag_contents = none 24 | repr.tag_end = none 25 | repr.attrib_name = none 26 | repr.attrib_equal = none 27 | repr.attrib_value = none 28 | repr.number = none 29 | repr.number_complex = none 30 | repr.bool_true = none 31 | repr.bool_false = none 32 | repr.none = none 33 | repr.url = none 34 | repr.uuid = none 35 | repr.call = none 36 | repr.path = none 37 | repr.filename = none 38 | rule.line = none 39 | -------------------------------------------------------------------------------- /src/tcbench/libtcdatasets/__init__.py: -------------------------------------------------------------------------------- 1 | # from . import datasets_utils 2 | # 3 | # from . import ucdavis_icdm19_csv_to_parquet 4 | # from . import ucdavis_icdm19_generate_splits 5 | # 6 | # from . import utmobilenet21_csv_to_parquet 7 | # from . import utmobilenet21_generate_splits 8 | # 9 | # from . import mirage19_json_to_parquet 10 | # from . import mirage19_generate_splits 11 | # 12 | # from . import mirage22_json_to_parquet 13 | # from . import mirage22_generate_splits 14 | -------------------------------------------------------------------------------- /src/tcbench/libtcdatasets/mirage22_json_to_parquet.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | 4 | import argparse 5 | import pathlib 6 | import tempfile 7 | 8 | from tcbench.libtcdatasets import mirage19_json_to_parquet 9 | 10 | 11 | def postprocess(df: pd.DataFrame) -> pd.DataFrame: 12 | """Process the loaded MIRAGE JSON by 13 | (1) adding a background class; 14 | (2) adding an "app" column with label information, and encoding it as pandas category 15 | """ 16 | df = df.assign( 17 | app=np.where( 18 | df["android_name"] == df["flow_metadata_bf_label"], 19 | df["android_name"], 20 | "background", 21 | ) 22 | ) 23 | df = df.assign( 24 | app=np.where( 25 | df["flow_metadata_bf_activity"] == "Unknown", "background", df["app"] 26 | ) 27 | ) 28 | df = df.assign( 29 | app=df["app"].astype("category"), 30 | packets=df["packet_data_l4_payload_bytes"].apply(len), 31 | ) 32 | return df 33 | 34 | 35 | def main(args: argparse.Namespace) -> None: 36 | if (args.input_folder / "MIRAGE-COVID-CCMA-2022").exists(): 37 | args.input_folder = args.input_folder / "MIRAGE-COVID-CCMA-2022" / "Raw_JSON" 38 | 39 | df = mirage19_json_to_parquet.main( 40 | args.input_folder, save_as=None, workers=args.num_workers 41 | ) 42 | df = postprocess(df) 43 | 44 | fname = args.output_folder / "mirage22.parquet" 45 | if not fname.parent.exists(): 46 | fname.parent.mkdir(parents=True) 47 | print(f"saving: {fname}") 48 | df.to_parquet(fname) 49 | 50 | 51 | def cli_parser(): 52 | return mirage19_json_to_parquet.cli_parser() 53 | 54 | if __name__ == "__main__": 55 | args = cli_parser().parse_args() 56 | main(args) 57 | -------------------------------------------------------------------------------- /src/tcbench/libtcdatasets/resources/DATASETS.yml: -------------------------------------------------------------------------------- 1 | ucdavis-icdm19: 2 | num_classes: 5 3 | paper: "https://arxiv.org/pdf/1812.09761.pdf" 4 | website: "https://github.com/shrezaei/Semi-supervised-Learning-QUIC-" 5 | data: "https://drive.google.com/drive/folders/1Pvev0hJ82usPh6dWDlz7Lv8L6h3JpWhE" 6 | data_curated: "https://figshare.com/ndownloader/files/42438621" 7 | data_curated_md5: "36294e70968fe0a30a054e626cb87afe" 8 | 9 | mirage19: 10 | num_classes: 20 11 | paper: "http://wpage.unina.it/antonio.montieri/pubs/MIRAGE_ICCCS_2019.pdf" 12 | website: "https://traffic.comics.unina.it/mirage/mirage-2019.html" 13 | data: "https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-2019_traffic_dataset_downloadable_v2.tar.gz" 14 | # data_curated: "" 15 | # data_curated_md5: "" 16 | 17 | mirage22: 18 | num_classes: 9 19 | paper: "http://wpage.unina.it/antonio.montieri/pubs/_C__IEEE_CAMAD_2021___Traffic_Classification_Covid_app.pdf" 20 | website: "https://traffic.comics.unina.it/mirage/mirage-covid-ccma-2022.html" 21 | data: "https://traffic.comics.unina.it/mirage/MIRAGE/MIRAGE-COVID-CCMA-2022.zip" 22 | # data_curated: "" 23 | # data_curated_md5: "" 24 | 25 | utmobilenet21: 26 | num_classes: 17 27 | paper: "https://ieeexplore.ieee.org/abstract/document/9490678/" 28 | website: "https://github.com/YuqiangHeng/UTMobileNetTraffic2021" 29 | data: "https://utexas.app.box.com/s/okrimcsz1mn9ec4j667kbb00d9gt16ii" 30 | data_curated: "https://figshare.com/ndownloader/files/42438624" 31 | data_curated_md5: "789b01c4f7dedfbb781b89e6f2dcbb1a" 32 | -------------------------------------------------------------------------------- /src/tcbench/libtcdatasets/resources/DATASETS_FILES_MD5.yml: -------------------------------------------------------------------------------- 1 | ucdavis-icdm19: 2 | ucdavis-icdm19.parquet: "f4333724f03a0ccaa7d87ba878148f34" 3 | imc23: 4 | test_split_human.parquet: "5a6f27a51d6dde6bb3b59d6757c00c1f" 5 | test_split_script.parquet: "93a49d51513f7b1dec0dc7ccf6f139b5" 6 | train_split_0.parquet: "98bc4a849c2f2e3abf259be26eed2f06" 7 | train_split_1.parquet: "2f7b849325c1f4d710b761d2d48a84f2" 8 | train_split_2.parquet: "1d74dc9dc389a72a0f6b29e2be3b72e3" 9 | train_split_3.parquet: "323bb4504d23d25b25ef31b8b76205f5" 10 | train_split_4.parquet: "5a0b00ed58e365551f9ef12956caa0d0" 11 | 12 | mirage19: 13 | mirage19.parquet: "aa0c4cbffc6f5dffba6718a7ab43f451" 14 | imc23: 15 | mirage19_filtered_minpkts10_splits.parquet: "12c83fb39eb61924aa411ca2d663eb94" 16 | mirage19_filtered_minpkts10.parquet: "75851ec3312751a8a3dca79a4c24e2fb" 17 | 18 | mirage22: 19 | mirage22.parquet: "4b8f5bfa528989ee857934f7611b052e" 20 | imc23: 21 | mirage22_filtered_minpkts10.parquet: "e117cbe37eba5c1235e4df787cf3b2d6" 22 | mirage22_filtered_minpkts10_splits.parquet: "a445db52fe1ec342fed7eb1d765c9825" 23 | mirage22_filtered_minpkts1000.parquet: "6312e82a0526071ab269a92d5eb745c6" 24 | mirage22_filtered_minpkts1000_splits.parquet: "21396f8a9d5033cf049407c4dc573195" 25 | 26 | #utmobilenet21: 27 | # utmobilenet21.parquet: "863e35d558c7ef9f4f5d0e552a57f3cb" 28 | # imc23: 29 | # utmobilenet21_filtered_minpkts10.parquet: "102e125e3236a1e8211bfd5e8272afdb" 30 | # utmobilenet21_filtered_minpkts10_splits.parquet: "3ea1378753f1b4e1f2773bd750e56d1b" 31 | -------------------------------------------------------------------------------- /src/tcbench/libtcdatasets/resources/ucdavis-icdm19.yml: -------------------------------------------------------------------------------- 1 | __all__: 2 | row_id: 3 | dtype: int 4 | description: "Unique row id" 5 | app: 6 | dtype: category 7 | description: "Label of the flow" 8 | flow_id: 9 | dtype: str 10 | description: "Original filename" 11 | partition: 12 | dtype: str 13 | description: "Partition related to the flow" 14 | num_pkts: 15 | dtype: int 16 | description: "Number of packets in the flow" 17 | duration: 18 | dtype: float 19 | description: "Duration of the flow" 20 | bytes: 21 | dtype: int 22 | description: "Number of bytes of the flow" 23 | unixtime: 24 | dtype: str 25 | description: "Absolute time of each packet" 26 | timetofirst: 27 | dtype: np.array 28 | description: "Delta between a packet the first packet of the flow" 29 | pkts_size: 30 | dtype: np.array 31 | description: "Packet size time series" 32 | pkts_dir: 33 | dtype: np.array 34 | description: "Packet direction time series" 35 | pkts_iat: 36 | dtype: np.array 37 | description: "Packet inter-arrival time series" 38 | -------------------------------------------------------------------------------- /src/tcbench/libtcdatasets/resources/utmobilenet21.yml: -------------------------------------------------------------------------------- 1 | __unfiltered__: 2 | row_id: 3 | dtype: int 4 | description: "Unique flow id" 5 | src_ip: 6 | dtype: str 7 | description: "Source ip of the flow" 8 | src_port: 9 | dtype: int 10 | description: "Source port of the flow" 11 | dst_ip: 12 | dtype: str 13 | description: "Destination ip of the flow" 14 | dst_port: 15 | dtype: int 16 | description: "Destination port of the flow" 17 | ip_proto: 18 | dtype: int 19 | description: "Protocol of the flow (TCP or UDP)" 20 | first: 21 | dtype: float 22 | description: "Timestamp of the first packet" 23 | last: 24 | dtype: float 25 | description: "Timestamp of the last packet" 26 | duration: 27 | dtype: float 28 | description: "Duration of the flow" 29 | packets: 30 | dtype: int 31 | description: "Number of packets in the flow" 32 | bytes: 33 | dtype: int 34 | description: "Number of bytes in the flow" 35 | partition: 36 | dtype: str 37 | description: "From which folder the flow was originally stored" 38 | location: 39 | dtype: str 40 | description: "Label originally provided by the dataset (see the related paper for details)" 41 | fname: 42 | dtype: str 43 | description: "Original filename where the packets of the flow come from" 44 | app: 45 | dtype: category 46 | description: "Final label of the flow, encoded as pandas category" 47 | pkts_size: 48 | dtype: np.array 49 | description: "Packet size time series" 50 | pkts_dir: 51 | dtype: np.array 52 | description: "Packet diretion time series" 53 | timetofirst: 54 | dtype: np.array 55 | description: "Delta between the each packet timestamp the first packet of the flow" 56 | 57 | __filtered__: 58 | row_id: 59 | dtype: int 60 | description: "Unique flow id" 61 | src_ip: 62 | dtype: str 63 | description: "Source ip of the flow" 64 | src_port: 65 | dtype: int 66 | description: "Source port of the flow" 67 | dst_ip: 68 | dtype: str 69 | description: "Destination ip of the flow" 70 | dst_port: 71 | dtype: int 72 | description: "Destination port of the flow" 73 | ip_proto: 74 | dtype: int 75 | description: "Protocol of the flow (TCP or UDP)" 76 | first: 77 | dtype: float 78 | description: "Timestamp of the first packet" 79 | last: 80 | dtype: float 81 | description: "Timestamp of the last packet" 82 | duration: 83 | dtype: float 84 | description: "Duration of the flow" 85 | packets: 86 | dtype: int 87 | description: "Number of packets in the flow" 88 | bytes: 89 | dtype: int 90 | description: "Number of bytes in the flow" 91 | partition: 92 | dtype: str 93 | description: "From which folder the flow was originally stored" 94 | location: 95 | dtype: str 96 | description: "Label originally provided by the dataset (see the related paper for details)" 97 | fname: 98 | dtype: str 99 | description: "Original filename where the packets of the flow come from" 100 | app: 101 | dtype: category 102 | description: "Final label of the flow, encoded as pandas category" 103 | pkts_size: 104 | dtype: np.array 105 | description: "Packet size time series" 106 | pkts_dir: 107 | dtype: np.array 108 | description: "Packet diretion time series" 109 | timetofirst: 110 | dtype: np.array 111 | description: "Delta between the each packet timestamp the first packet of the flow" 112 | 113 | __splits__: 114 | train_indexes: 115 | dtype: np.array 116 | description: "row_id of training samples" 117 | val_indexes: 118 | dtype: np.array 119 | description: "row_id of validation samples" 120 | test_indexes: 121 | dtype: np.array 122 | description: "row_id of test samples" 123 | split_index: 124 | dtype: int 125 | description: "Split id" 126 | -------------------------------------------------------------------------------- /src/tcbench/modeling/__init__.py: -------------------------------------------------------------------------------- 1 | from enum import Enum 2 | 3 | 4 | class MODELING_DATASET_TYPE(Enum): 5 | """An enumeration to specify which type of dataset to load""" 6 | 7 | TRAIN_VAL = "train_val_datasets" 8 | TEST = "test_dataset" 9 | TRAIN_VAL_LEFTOVER = "train_val_leftover_dataset" 10 | FINETUNING = "for_finetuning_dataset" 11 | 12 | 13 | class MODELING_INPUT_REPR_TYPE(Enum): 14 | FLOWPIC = "flowpic" 15 | PKTSERIES = "pktseries" 16 | 17 | @classmethod 18 | def from_str(cls, text): 19 | for member in cls.__members__.values(): 20 | if member.value == text: 21 | return member 22 | return None 23 | 24 | def __str__(self): 25 | return self.value 26 | 27 | 28 | class MODELING_METHOD_TYPE(Enum): 29 | MONOLITHIC = "monolithic" 30 | XGBOOST = "xgboost" 31 | SIMCLR = "simclr" 32 | 33 | @classmethod 34 | def from_str(cls, text): 35 | for member in cls.__members__.values(): 36 | if member.value == text: 37 | return member 38 | return None 39 | 40 | def __str__(self): 41 | return self.value 42 | -------------------------------------------------------------------------------- /tests/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pathlib 3 | import hashlib 4 | 5 | from tcbench.modeling import utils 6 | 7 | 8 | def pytest_configure(): 9 | pytest.DIR_RESOURCES = (pathlib.Path(__file__).parent / "resources").resolve() 10 | 11 | 12 | @pytest.helpers.register 13 | def verify_deeplearning_model(fname, reference_fname, epsilon=None): 14 | """Verifying trained model weights""" 15 | import torch 16 | 17 | net = torch.load(fname) 18 | ref_net = torch.load(reference_fname) 19 | 20 | assert len(net) == len(ref_net) 21 | assert sorted(net.keys()) == sorted(ref_net.keys()) 22 | 23 | for name in net.keys(): 24 | weights = net[name] 25 | ref_weights = ref_net[name] 26 | if epsilon is None: 27 | assert (weights.flatten() == ref_weights.flatten()).all() 28 | else: 29 | assert ((weights.flatten() - ref_weights.flatten()).abs() < epsilon).all() 30 | 31 | 32 | def _get_md5(fname): 33 | data = pathlib.Path(fname).read_bytes() 34 | md5 = hashlib.md5(data) 35 | return md5.hexdigest() 36 | 37 | 38 | @pytest.helpers.register 39 | def verify_md5_model(fname, reference_fname): 40 | assert _get_md5(fname) == _get_md5(reference_fname) 41 | 42 | 43 | @pytest.helpers.register 44 | def verify_reports( 45 | folder, reference_folder, with_train=True, with_val=True, with_test=True 46 | ): 47 | """Verify classification report and confusion matrixes""" 48 | import pandas as pd 49 | 50 | # note: by using folder / test*.csv automatically 51 | # skips leftover if not found 52 | 53 | def _add_file(folder, fname, fname_list): 54 | if not (folder / fname).exists(): 55 | raise RuntimeError(f"missing {fname}") 56 | fname_list.append(fname) 57 | 58 | fnames = [] 59 | if with_train: 60 | _add_file(folder, "train_class_rep.csv", fnames) 61 | _add_file(folder, "train_conf_mtx.csv", fnames) 62 | if with_val: 63 | _add_file(folder, "val_class_rep.csv", fnames) 64 | _add_file(folder, "val_conf_mtx.csv", fnames) 65 | if with_test: 66 | tmp = list(folder.glob("test*.csv")) 67 | assert len(tmp) != 0 68 | fnames.extend([item.name for item in tmp]) 69 | 70 | if len(fnames) == 0: 71 | raise RuntimeError("empty list of files to verify") 72 | 73 | for fname in fnames: 74 | df = pd.read_csv(folder / fname) 75 | ref_df = pd.read_csv(reference_folder / fname) 76 | assert (df == ref_df).all().all() 77 | 78 | 79 | @pytest.helpers.register 80 | def match_run_hashes(folder, reference_folder, params_to_match=['seed', 'split_index', 'flowpic_dim', 'aug_name']): 81 | 82 | ref_catalog = { 83 | path.name: utils.load_yaml(path / 'params.yml') 84 | for path in reference_folder.iterdir() 85 | } 86 | 87 | pairs = [] 88 | for path in folder.iterdir(): 89 | curr_params = utils.load_yaml(path / 'params.yml') 90 | curr_hash = path.name 91 | 92 | curr_pair = [curr_hash, None] 93 | for ref_hash, ref_params in ref_catalog.items(): 94 | tmp1 = {} 95 | tmp2 = {} 96 | for param_name in params_to_match: 97 | tmp1[param_name] = str(curr_params[param_name]) 98 | tmp2[param_name] = str(ref_params[param_name]) 99 | 100 | if tmp1 == tmp2: 101 | curr_pair[-1] = ref_hash 102 | del(ref_catalog[ref_hash]) 103 | break 104 | 105 | pairs.append(curr_pair) 106 | 107 | return pairs 108 | -------------------------------------------------------------------------------- /tests/test_augmentations_at_loading_xgboost.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | import pytest 4 | import pathlib 5 | 6 | import tcbench 7 | from tcbench.modeling import ( 8 | utils, 9 | run_augmentations_at_loading_xgboost, 10 | MODELING_DATASET_TYPE, 11 | ) 12 | 13 | 14 | @pytest.mark.parametrize( 15 | "params, expected_artifacts_folder", 16 | [ 17 | ( 18 | [ 19 | f"--dataset {str(tcbench.DATASETS.UCDAVISICDM19)}", 20 | "--flow-representation pktseries", 21 | "--max-n-pkts 10", 22 | "--split-index 0", 23 | "--seed 12345", 24 | ], 25 | pytest.DIR_RESOURCES 26 | / pathlib.Path( 27 | "_reference_aim_run/ucdavis-icdm19/xgboost/noaugmentation-timeseries/5fa59c129a3e4aa6bb9b7640" 28 | ), 29 | ), 30 | ], 31 | ) 32 | def test_main(tmp_path, params, expected_artifacts_folder): 33 | params.append(f"--artifacts-folder {tmp_path}/artifacts") 34 | params.append(f"--aim-repo {tmp_path}") 35 | 36 | parser = run_augmentations_at_loading_xgboost.cli_parser() 37 | args = parser.parse_args((" ".join(params)).split()) 38 | 39 | state = run_augmentations_at_loading_xgboost.main(args) 40 | 41 | # the output folder is based on the aim run hash 42 | artifacts_folder = next((tmp_path / 'artifacts').iterdir()) 43 | 44 | # verifying model files 45 | fname = f"xgb_model_split_{args.split_index}.json" 46 | # pytest.helpers.verify_md5_model( 47 | # artifacts_folder / fname, expected_artifacts_folder / fname 48 | # ) 49 | 50 | pytest.helpers.verify_reports(artifacts_folder, expected_artifacts_folder) 51 | -------------------------------------------------------------------------------- /tests/test_contrastive_learning_and_finetune.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | 3 | import pytest 4 | import torch 5 | import pathlib 6 | 7 | import tcbench 8 | from tcbench.modeling import utils, run_contrastive_learning_and_finetune 9 | 10 | 11 | @pytest.mark.parametrize( 12 | "params, expected_artifacts_folder", 13 | [ 14 | ( 15 | dict( 16 | dataset_name=tcbench.DATASETS.UCDAVISICDM19, 17 | learning_rate=0.001, 18 | batch_size=32, 19 | flowpic_dim=32, 20 | split_idx=0, 21 | seed=12345, 22 | loss_temperature=0.07, 23 | with_dropout=False, 24 | projection_layer_dim=30, 25 | ), 26 | pytest.DIR_RESOURCES 27 | / pathlib.Path( 28 | "_reference_aim_run/ucdavis-icdm19/simclr-dropout-and-projection/9e2dc14286ab452f992e5c2d" 29 | ), 30 | ), 31 | ], 32 | ) 33 | def test_pretrain(tmp_path, params, expected_artifacts_folder): 34 | params["artifacts_folder"] = tmp_path 35 | utils.seed_everything(params.get("seed", 12345)) 36 | 37 | state = run_contrastive_learning_and_finetune.pretrain(**params) 38 | 39 | # verifying trained model weights 40 | fname = f'best_model_weights_pretrain_split_{params["split_idx"]}.pt' 41 | pytest.helpers.verify_deeplearning_model( 42 | tmp_path / fname, expected_artifacts_folder / fname 43 | ) 44 | 45 | 46 | 47 | @pytest.mark.parametrize( 48 | "params, expected_artifacts_folder", 49 | [ 50 | ( 51 | [ 52 | "--dataset ucdavis-icdm19", 53 | "--contrastive-learning-seed 12345", 54 | "--finetune-seed 12345", 55 | "--batch-size 32", 56 | "--flowpic-dim 32", 57 | "--split-index 0", 58 | "--suppress-dropout", 59 | "--projection-layer-dim 30", 60 | ], 61 | pytest.DIR_RESOURCES 62 | / pathlib.Path( 63 | "_reference_aim_run/ucdavis-icdm19/simclr-dropout-and-projection/9e2dc14286ab452f992e5c2d" 64 | ), 65 | ), 66 | ], 67 | ) 68 | def test_main(tmp_path, params, expected_artifacts_folder): 69 | params.append(f"--artifacts-folder {tmp_path}/artifacts") 70 | 71 | parser = run_contrastive_learning_and_finetune.cli_parser() 72 | args = parser.parse_args(" ".join(params).split()) 73 | args.method = "simclr" 74 | args.augmentations = args.augmentations.split(",") 75 | 76 | run_contrastive_learning_and_finetune.main(args) 77 | 78 | # artifacts are stored into a doubly nested folder 79 | # as / 80 | artifacts_folder = next((tmp_path / 'artifacts').iterdir()) 81 | 82 | fname_models = sorted(path.name for path in artifacts_folder.glob("*.pt")) 83 | expected_fname_models = sorted( 84 | path.name for path in expected_artifacts_folder.glob("*.pt") 85 | ) 86 | assert fname_models == expected_fname_models 87 | 88 | for fname in fname_models: 89 | pytest.helpers.verify_deeplearning_model( 90 | artifacts_folder / fname, expected_artifacts_folder / fname 91 | ) 92 | 93 | # verifying reports 94 | # note: by using tmp_path / test*.csv automatically 95 | # skips leftover if suppressed with the command line option 96 | pytest.helpers.verify_reports( 97 | artifacts_folder, 98 | expected_artifacts_folder, 99 | with_train=False, 100 | with_val=False, 101 | with_test=True, 102 | ) 103 | -------------------------------------------------------------------------------- /tests/test_modeling_backbone.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import pathlib 3 | 4 | from tcbench.modeling import backbone 5 | from tcbench.modeling.backbone import LeNet5FlowpicIMC22_Mini 6 | from tcbench.modeling.methods import ContrastiveLearningTrainer 7 | 8 | @pytest.mark.parametrize( 9 | "net1, net2, expected", 10 | [ 11 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(), True), 12 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(num_classes=5), False), 13 | ( 14 | LeNet5FlowpicIMC22_Mini(), 15 | ContrastiveLearningTrainer.prepare_net_for_train(LeNet5FlowpicIMC22_Mini()), 16 | False, 17 | ), 18 | ( 19 | LeNet5FlowpicIMC22_Mini(), 20 | ContrastiveLearningTrainer.init_train(LeNet5FlowpicIMC22_Mini(), None)[0], 21 | False, 22 | ), 23 | ], 24 | ) 25 | def test_have_same_layers_and_types(net1, net2, expected): 26 | assert backbone.have_same_layers_and_types(net1, net2) == expected 27 | 28 | 29 | @pytest.mark.parametrize( 30 | "num_classes1, num_classes2", 31 | [ 32 | (5, 5), 33 | (None, 5), 34 | (5, None), 35 | (None, None), 36 | ], 37 | ) 38 | def test_have_same_layers_and_types_after_reloading_from_file( 39 | tmp_path, num_classes1, num_classes2 40 | ): 41 | net1 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes1) 42 | net1 = ContrastiveLearningTrainer.prepare_net_for_train(net1) 43 | net1.save_weights(tmp_path / "weights.pt") 44 | 45 | net2 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes2) 46 | net2, _ = ContrastiveLearningTrainer.init_train(net2, None, tmp_path / "weights.pt") 47 | assert backbone.have_same_layers_and_types(net1, net2) 48 | 49 | 50 | @pytest.mark.parametrize( 51 | "net1, net2, expected", 52 | [ 53 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(), False), 54 | (LeNet5FlowpicIMC22_Mini(), LeNet5FlowpicIMC22_Mini(num_classes=5), False), 55 | ], 56 | ) 57 | def test_are_equal(net1, net2, expected): 58 | assert backbone.are_equal(net1, net2) == expected 59 | 60 | 61 | @pytest.mark.parametrize( 62 | "num_classes1, num_classes2", 63 | [ 64 | (5, 5), 65 | (None, 5), 66 | (5, None), 67 | (None, None), 68 | ], 69 | ) 70 | def test_are_equal_after_reloading_from_file(tmp_path, num_classes1, num_classes2): 71 | net1 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes1) 72 | net1 = ContrastiveLearningTrainer.prepare_net_for_train(net1) 73 | net1.save_weights(tmp_path / "weights.pt") 74 | 75 | net2 = LeNet5FlowpicIMC22_Mini(num_classes=num_classes2) 76 | net2, _ = ContrastiveLearningTrainer.init_train(net2, None, tmp_path / "weights.pt") 77 | assert backbone.are_equal(net1, net2) 78 | 79 | 80 | @pytest.mark.parametrize( 81 | "net", 82 | [ 83 | LeNet5FlowpicIMC22_Mini(), 84 | LeNet5FlowpicIMC22_Mini(num_classes=5), 85 | ], 86 | ) 87 | def test_clone_net(net): 88 | new_net = backbone.clone_net(net) 89 | assert backbone.are_equal(net, new_net) 90 | assert id(net) != id(new_net) 91 | -------------------------------------------------------------------------------- /tests/test_modeling_methods.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | import torch 4 | 5 | from tcbench.modeling import backbone, methods 6 | from tcbench.modeling.backbone import LeNet5FlowpicIMC22_Mini 7 | 8 | 9 | @pytest.mark.parametrize( 10 | "net, optimizer_class", 11 | [ 12 | (LeNet5FlowpicIMC22_Mini(), None), 13 | (LeNet5FlowpicIMC22_Mini(), torch.optim.Adam), 14 | ], 15 | ) 16 | def test_simclr_init_pretrain(net, optimizer_class): 17 | net = LeNet5FlowpicIMC22_Mini() 18 | 19 | optimizer = None 20 | if optimizer_class: 21 | optimizer = optimizer_class(net.parameters(), lr=0.001) 22 | 23 | new_net1, optimizer1 = methods.ContrastiveLearningTrainer.init_train(net, optimizer) 24 | new_net2, optimizer2 = methods.SimCLRTrainer.init_pretrain(net, optimizer) 25 | # the two networks need to have the same architecture 26 | # but weights are not be the same overall because 27 | # new layers are added 28 | 29 | assert backbone.have_same_layers_and_types(new_net1, new_net2) 30 | assert id(new_net1) != id(new_net2) 31 | 32 | # compare first convolutional layer 33 | assert (list(new_net1.parameters())[0] == list(new_net2.parameters())[0]).all() 34 | 35 | # compare last linear layer weights (bias is 0) 36 | assert (list(new_net1.parameters())[-2] != list(new_net2.parameters())[-2]).any() 37 | 38 | if optimizer: 39 | assert id(optimizer1) != id(optimizer2) 40 | assert id(optimizer) != id(optimizer1) 41 | assert id(optimizer) != id(optimizer2) 42 | params1 = optimizer1.param_groups[0]["params"] 43 | params2 = optimizer2.param_groups[0]["params"] 44 | assert len(params1) == len(params2) 45 | assert (params1[0] == params2[0]).all() 46 | 47 | 48 | @pytest.mark.parametrize( 49 | "net, optimizer_class", 50 | [ 51 | (LeNet5FlowpicIMC22_Mini(), None), 52 | (LeNet5FlowpicIMC22_Mini(), torch.optim.Adam), 53 | ], 54 | ) 55 | def test_simclr_init_finetune(net, optimizer_class): 56 | net = LeNet5FlowpicIMC22_Mini() 57 | 58 | optimizer = None 59 | if optimizer_class: 60 | optimizer = optimizer_class(net.parameters(), lr=0.001) 61 | 62 | new_net, new_optimizer = methods.SimCLRTrainer.init_finetune( 63 | net, optimizer=optimizer, num_classes=5 64 | ) 65 | assert not new_net.is_equal_to(net) 66 | assert new_net.classifier is not None 67 | if optimizer: 68 | assert len(new_optimizer.param_groups[0]["params"]) == 2 69 | for p1, p2 in zip( 70 | new_net.classifier.parameters(), new_optimizer.param_groups[0]["params"] 71 | ): 72 | assert (p1 == p2).all() 73 | --------------------------------------------------------------------------------