├── .dockerignore ├── .gitignore ├── Dockerfile ├── INSTALL ├── LICENSE ├── README-docker.md ├── README.md ├── REQUIREMENTS ├── STATUS ├── app.py ├── codeart ├── .gitignore ├── README.md ├── code │ ├── arguments.py │ ├── codeart_tokenizer │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ └── tokenizer_config.json │ ├── data_utils.py │ ├── modeling_utils.py │ ├── models │ │ ├── __init__.py │ │ ├── configuration_codeart.py │ │ ├── configuration_rabert.py │ │ ├── modeling_codeart.py │ │ ├── modeling_jtrans.py │ │ ├── modeling_rabert.py │ │ ├── tokenization_codeart.py │ │ └── tokenization_rabert.py │ ├── run.py │ └── trainer.py ├── evaluation-jtrans │ ├── malware-family-classification │ │ ├── config │ │ │ ├── eval.json │ │ │ └── train.json │ │ ├── eval_config.sh │ │ ├── evaluate_multilabel.py │ │ ├── run.py │ │ ├── run_config.sh │ │ ├── run_multilabel.py │ │ ├── temp_evaluate.py │ │ └── utils.py │ └── provenance-attribution │ │ ├── run.py │ │ └── run.sh ├── evaluation │ ├── binary-similarity │ │ ├── .gitignore │ │ ├── README.md │ │ ├── binsim_dataset.py │ │ ├── binsim_trainer.py │ │ ├── config │ │ │ └── train.json │ │ ├── dump_files.py │ │ ├── encode.sh │ │ ├── eval.py │ │ ├── inference.py │ │ ├── model_utils.py │ │ ├── pretty_print.sh │ │ ├── pretty_print_all.py │ │ ├── run.py │ │ ├── run_config.sh │ │ ├── sample_and_report.py │ │ ├── sample_and_report.sh │ │ └── utils.py │ ├── malware-family-classification │ │ ├── config │ │ │ ├── eval-2f-100c.json │ │ │ └── train-2f-100c.json │ │ ├── eval_config.sh │ │ ├── evaluate_multilabel.py │ │ ├── run_config.sh │ │ ├── run_multilabel.py │ │ └── utils.py │ └── type-inference │ │ ├── config │ │ ├── eval-O0.json │ │ └── train-O0.json │ │ ├── eval_config.sh │ │ ├── labels.json │ │ ├── run.py │ │ ├── run_config.sh │ │ └── utils.py ├── preprocess │ ├── README.md │ ├── analysis │ │ ├── expr_lang_analyzer.py │ │ └── prog_model.py │ ├── analyze.py │ ├── binary_base.py │ ├── collect.py │ ├── disassemble.py │ ├── type_inference │ │ ├── base_calculator.py │ │ ├── die_globals.py │ │ ├── gen_dataset.py │ │ ├── parse_dwarf.py │ │ ├── upload_dataset.py │ │ └── utils.py │ └── utils │ │ ├── asm_parser.py │ │ └── data_utils.py └── scripts │ ├── config │ └── default.json │ └── train_config.sh └── requirements.txt /.dockerignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | .save/ 3 | convert.ipynb 4 | *.npy 5 | 6 | __pycache__ -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .vscode/ 2 | save/ 3 | convert.ipynb 4 | *.npy 5 | 6 | */__pycache__ -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/Dockerfile -------------------------------------------------------------------------------- /INSTALL: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/INSTALL -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/LICENSE -------------------------------------------------------------------------------- /README-docker.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/README-docker.md -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/README.md -------------------------------------------------------------------------------- /REQUIREMENTS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/REQUIREMENTS -------------------------------------------------------------------------------- /STATUS: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/STATUS -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/app.py -------------------------------------------------------------------------------- /codeart/.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/.gitignore -------------------------------------------------------------------------------- /codeart/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/README.md -------------------------------------------------------------------------------- /codeart/code/arguments.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/arguments.py -------------------------------------------------------------------------------- /codeart/code/codeart_tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/codeart_tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /codeart/code/codeart_tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/codeart_tokenizer/tokenizer.json -------------------------------------------------------------------------------- /codeart/code/codeart_tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/codeart_tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /codeart/code/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/data_utils.py -------------------------------------------------------------------------------- /codeart/code/modeling_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/modeling_utils.py -------------------------------------------------------------------------------- /codeart/code/models/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/__init__.py -------------------------------------------------------------------------------- /codeart/code/models/configuration_codeart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/configuration_codeart.py -------------------------------------------------------------------------------- /codeart/code/models/configuration_rabert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/configuration_rabert.py -------------------------------------------------------------------------------- /codeart/code/models/modeling_codeart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/modeling_codeart.py -------------------------------------------------------------------------------- /codeart/code/models/modeling_jtrans.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/modeling_jtrans.py -------------------------------------------------------------------------------- /codeart/code/models/modeling_rabert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/modeling_rabert.py -------------------------------------------------------------------------------- /codeart/code/models/tokenization_codeart.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/tokenization_codeart.py -------------------------------------------------------------------------------- /codeart/code/models/tokenization_rabert.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/models/tokenization_rabert.py -------------------------------------------------------------------------------- /codeart/code/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/run.py -------------------------------------------------------------------------------- /codeart/code/trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/code/trainer.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/config/eval.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/config/eval.json -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/config/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/config/train.json -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/eval_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/eval_config.sh -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/evaluate_multilabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/evaluate_multilabel.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/run.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/run_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/run_config.sh -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/run_multilabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/run_multilabel.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/temp_evaluate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/temp_evaluate.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/malware-family-classification/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/malware-family-classification/utils.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/provenance-attribution/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/provenance-attribution/run.py -------------------------------------------------------------------------------- /codeart/evaluation-jtrans/provenance-attribution/run.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation-jtrans/provenance-attribution/run.sh -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/.gitignore: -------------------------------------------------------------------------------- 1 | report_*pool*.txt 2 | cache/** 3 | output/** -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/README.md -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/binsim_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/binsim_dataset.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/binsim_trainer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/binsim_trainer.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/config/train.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/config/train.json -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/dump_files.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/dump_files.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/encode.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/encode.sh -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/eval.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/inference.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/inference.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/model_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/model_utils.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/pretty_print.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/pretty_print.sh -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/pretty_print_all.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/pretty_print_all.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/run.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/run_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/run_config.sh -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/sample_and_report.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/sample_and_report.py -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/sample_and_report.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/sample_and_report.sh -------------------------------------------------------------------------------- /codeart/evaluation/binary-similarity/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/binary-similarity/utils.py -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/config/eval-2f-100c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/config/eval-2f-100c.json -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/config/train-2f-100c.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/config/train-2f-100c.json -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/eval_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/eval_config.sh -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/evaluate_multilabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/evaluate_multilabel.py -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/run_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/run_config.sh -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/run_multilabel.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/run_multilabel.py -------------------------------------------------------------------------------- /codeart/evaluation/malware-family-classification/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/malware-family-classification/utils.py -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/config/eval-O0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/config/eval-O0.json -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/config/train-O0.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/config/train-O0.json -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/eval_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/eval_config.sh -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/labels.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/labels.json -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/run.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/run.py -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/run_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/run_config.sh -------------------------------------------------------------------------------- /codeart/evaluation/type-inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/evaluation/type-inference/utils.py -------------------------------------------------------------------------------- /codeart/preprocess/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/README.md -------------------------------------------------------------------------------- /codeart/preprocess/analysis/expr_lang_analyzer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/analysis/expr_lang_analyzer.py -------------------------------------------------------------------------------- /codeart/preprocess/analysis/prog_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/analysis/prog_model.py -------------------------------------------------------------------------------- /codeart/preprocess/analyze.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/analyze.py -------------------------------------------------------------------------------- /codeart/preprocess/binary_base.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/binary_base.py -------------------------------------------------------------------------------- /codeart/preprocess/collect.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/collect.py -------------------------------------------------------------------------------- /codeart/preprocess/disassemble.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/disassemble.py -------------------------------------------------------------------------------- /codeart/preprocess/type_inference/base_calculator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/type_inference/base_calculator.py -------------------------------------------------------------------------------- /codeart/preprocess/type_inference/die_globals.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/type_inference/die_globals.py -------------------------------------------------------------------------------- /codeart/preprocess/type_inference/gen_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/type_inference/gen_dataset.py -------------------------------------------------------------------------------- /codeart/preprocess/type_inference/parse_dwarf.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/type_inference/parse_dwarf.py -------------------------------------------------------------------------------- /codeart/preprocess/type_inference/upload_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/type_inference/upload_dataset.py -------------------------------------------------------------------------------- /codeart/preprocess/type_inference/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/type_inference/utils.py -------------------------------------------------------------------------------- /codeart/preprocess/utils/asm_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/utils/asm_parser.py -------------------------------------------------------------------------------- /codeart/preprocess/utils/data_utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/preprocess/utils/data_utils.py -------------------------------------------------------------------------------- /codeart/scripts/config/default.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/scripts/config/default.json -------------------------------------------------------------------------------- /codeart/scripts/train_config.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/codeart/scripts/train_config.sh -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ziansu/codeart/HEAD/requirements.txt --------------------------------------------------------------------------------