├── .gitignore ├── Dockerfile ├── README.md ├── requirements.txt ├── varbert ├── README.md ├── __init__.py ├── cmlm │ ├── eval.py │ ├── preprocess.py │ └── training.py ├── fine-tune │ ├── __init__.py │ ├── eval.py │ ├── preprocess.py │ └── training.py ├── generate_vocab.py ├── mlm │ ├── preprocess.py │ └── training.py ├── resize_model.py └── tokenizer │ ├── preprocess.py │ └── train_bpe_tokenizer.py └── varcorpus ├── README.md ├── __init__.py └── dataset-gen ├── __init__.py ├── binary.py ├── create_dataset_splits.py ├── decompiler ├── ghidra_dec.py ├── ida_analysis.py ├── ida_dec.py ├── ida_unrecogn_func.py └── run_decompilers.py ├── dwarf_info.py ├── generate.py ├── joern_parser.py ├── log.py ├── parse_decompiled_code.py ├── pathmanager.py ├── preprocess_vars.py ├── runner.py ├── strip_types.py ├── utils.py └── variable_matching.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/.gitignore -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/Dockerfile -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/README.md -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/requirements.txt -------------------------------------------------------------------------------- /varbert/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/README.md -------------------------------------------------------------------------------- /varbert/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /varbert/cmlm/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/cmlm/eval.py -------------------------------------------------------------------------------- /varbert/cmlm/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/cmlm/preprocess.py -------------------------------------------------------------------------------- /varbert/cmlm/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/cmlm/training.py -------------------------------------------------------------------------------- /varbert/fine-tune/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /varbert/fine-tune/eval.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/fine-tune/eval.py -------------------------------------------------------------------------------- /varbert/fine-tune/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/fine-tune/preprocess.py -------------------------------------------------------------------------------- /varbert/fine-tune/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/fine-tune/training.py -------------------------------------------------------------------------------- /varbert/generate_vocab.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/generate_vocab.py -------------------------------------------------------------------------------- /varbert/mlm/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/mlm/preprocess.py -------------------------------------------------------------------------------- /varbert/mlm/training.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/mlm/training.py -------------------------------------------------------------------------------- /varbert/resize_model.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/resize_model.py -------------------------------------------------------------------------------- /varbert/tokenizer/preprocess.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/tokenizer/preprocess.py -------------------------------------------------------------------------------- /varbert/tokenizer/train_bpe_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varbert/tokenizer/train_bpe_tokenizer.py -------------------------------------------------------------------------------- /varcorpus/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/README.md -------------------------------------------------------------------------------- /varcorpus/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /varcorpus/dataset-gen/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /varcorpus/dataset-gen/binary.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/binary.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/create_dataset_splits.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/create_dataset_splits.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/decompiler/ghidra_dec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/decompiler/ghidra_dec.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/decompiler/ida_analysis.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/decompiler/ida_analysis.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/decompiler/ida_dec.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/decompiler/ida_dec.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/decompiler/ida_unrecogn_func.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/decompiler/ida_unrecogn_func.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/decompiler/run_decompilers.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/decompiler/run_decompilers.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/dwarf_info.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/dwarf_info.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/generate.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/generate.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/joern_parser.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/joern_parser.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/log.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/log.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/parse_decompiled_code.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/parse_decompiled_code.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/pathmanager.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/pathmanager.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/preprocess_vars.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/preprocess_vars.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/runner.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/runner.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/strip_types.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/strip_types.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/utils.py -------------------------------------------------------------------------------- /varcorpus/dataset-gen/variable_matching.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/sefcom/VarBERT/HEAD/varcorpus/dataset-gen/variable_matching.py --------------------------------------------------------------------------------