├── .gitignore ├── LICENSE ├── README.md ├── demo.gif ├── dev_scripts ├── README.md ├── cflow.py ├── dataset_generation │ ├── DatasetDescription.py │ ├── __init__.py │ ├── bytecode2csv.py │ ├── create_code_dataset.py │ ├── normalize_source.py │ └── upload_raw_dataset.py ├── evaluate_cflow.py ├── prepare_dataset.py ├── sample_jsons │ ├── py36-sample-data.json │ ├── py36-sample-segmentation.json │ └── py36-sample-statement.json ├── segmentation │ ├── SegmentationConfiguration.py │ ├── tokenize_seg.py │ ├── train_mlm.py │ ├── train_seg.py │ └── train_tokenizer.py ├── statement │ ├── README.md │ ├── StatementConfiguration.py │ ├── __init__.py │ ├── tokenize_seq2seq.py │ ├── tokenizer │ │ ├── special_tokens_map.json │ │ ├── tokenizer.json │ │ └── tokenizer_config.json │ ├── train_seq2seq.py │ └── train_tokenizer_auto.py └── train_models.py ├── poetry.lock ├── pylingual ├── __init__.py ├── control_flow_reconstruction │ ├── cfg.py │ ├── cft.py │ ├── source.py │ ├── structure.py │ ├── templates │ │ ├── Block.py │ │ ├── CDG.py │ │ ├── Conditional.py │ │ ├── Exception.py │ │ ├── Generator.py │ │ ├── Loop.py │ │ ├── Match.py │ │ ├── With.py │ │ └── __init__.py │ └── utils.py ├── decompiler.py ├── decompiler_config.yaml ├── editable_bytecode │ ├── EditableBytecode.py │ ├── Instruction.py │ ├── PYCFile.py │ ├── __init__.py │ ├── bytecode_patches │ │ ├── __init__.py │ │ ├── fix_indirect_jump.py │ │ ├── fix_unreachable.py │ │ ├── remove_docstrings.py │ │ ├── remove_extended_arg.py │ │ ├── remove_nop.py │ │ └── replace_firstlno.py │ ├── control_flow_graph.py │ └── utils.py ├── equivalence_check.py ├── main.py ├── masking │ ├── ast_masker.py │ ├── global_masker.py │ └── model_disasm.py ├── models.py ├── segmentation │ ├── segmentation_search_strategies.py │ └── sliding_window.py └── utils │ ├── ascii_art.py │ ├── generate_bytecode.py │ ├── get_logger.py │ ├── lazy.py │ ├── lists.py │ ├── tracked_list.py │ ├── use_escape_sequences.py │ └── version.py ├── pyproject.toml └── test ├── Conditional.py ├── Exception.py ├── Generator.py ├── Loop.py ├── Match.py └── With.py /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/.gitignore -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/LICENSE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/README.md -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/demo.gif -------------------------------------------------------------------------------- /dev_scripts/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/README.md -------------------------------------------------------------------------------- /dev_scripts/cflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/cflow.py -------------------------------------------------------------------------------- /dev_scripts/dataset_generation/DatasetDescription.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/dataset_generation/DatasetDescription.py -------------------------------------------------------------------------------- /dev_scripts/dataset_generation/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/dataset_generation/__init__.py -------------------------------------------------------------------------------- /dev_scripts/dataset_generation/bytecode2csv.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/dataset_generation/bytecode2csv.py -------------------------------------------------------------------------------- /dev_scripts/dataset_generation/create_code_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/dataset_generation/create_code_dataset.py -------------------------------------------------------------------------------- /dev_scripts/dataset_generation/normalize_source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/dataset_generation/normalize_source.py -------------------------------------------------------------------------------- /dev_scripts/dataset_generation/upload_raw_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/dataset_generation/upload_raw_dataset.py -------------------------------------------------------------------------------- /dev_scripts/evaluate_cflow.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/evaluate_cflow.py -------------------------------------------------------------------------------- /dev_scripts/prepare_dataset.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/prepare_dataset.py -------------------------------------------------------------------------------- /dev_scripts/sample_jsons/py36-sample-data.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/sample_jsons/py36-sample-data.json -------------------------------------------------------------------------------- /dev_scripts/sample_jsons/py36-sample-segmentation.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/sample_jsons/py36-sample-segmentation.json -------------------------------------------------------------------------------- /dev_scripts/sample_jsons/py36-sample-statement.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/sample_jsons/py36-sample-statement.json -------------------------------------------------------------------------------- /dev_scripts/segmentation/SegmentationConfiguration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/segmentation/SegmentationConfiguration.py -------------------------------------------------------------------------------- /dev_scripts/segmentation/tokenize_seg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/segmentation/tokenize_seg.py -------------------------------------------------------------------------------- /dev_scripts/segmentation/train_mlm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/segmentation/train_mlm.py -------------------------------------------------------------------------------- /dev_scripts/segmentation/train_seg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/segmentation/train_seg.py -------------------------------------------------------------------------------- /dev_scripts/segmentation/train_tokenizer.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/segmentation/train_tokenizer.py -------------------------------------------------------------------------------- /dev_scripts/statement/README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/README.md -------------------------------------------------------------------------------- /dev_scripts/statement/StatementConfiguration.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/StatementConfiguration.py -------------------------------------------------------------------------------- /dev_scripts/statement/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dev_scripts/statement/tokenize_seq2seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/tokenize_seq2seq.py -------------------------------------------------------------------------------- /dev_scripts/statement/tokenizer/special_tokens_map.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/tokenizer/special_tokens_map.json -------------------------------------------------------------------------------- /dev_scripts/statement/tokenizer/tokenizer.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/tokenizer/tokenizer.json -------------------------------------------------------------------------------- /dev_scripts/statement/tokenizer/tokenizer_config.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/tokenizer/tokenizer_config.json -------------------------------------------------------------------------------- /dev_scripts/statement/train_seq2seq.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/train_seq2seq.py -------------------------------------------------------------------------------- /dev_scripts/statement/train_tokenizer_auto.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/statement/train_tokenizer_auto.py -------------------------------------------------------------------------------- /dev_scripts/train_models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/dev_scripts/train_models.py -------------------------------------------------------------------------------- /poetry.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/poetry.lock -------------------------------------------------------------------------------- /pylingual/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/__init__.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/cfg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/cfg.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/cft.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/cft.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/source.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/source.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/structure.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/structure.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/Block.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/Block.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/CDG.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/CDG.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/Conditional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/Conditional.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/Exception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/Exception.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/Generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/Generator.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/Loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/Loop.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/Match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/Match.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/With.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/With.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/templates/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/templates/__init__.py -------------------------------------------------------------------------------- /pylingual/control_flow_reconstruction/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/control_flow_reconstruction/utils.py -------------------------------------------------------------------------------- /pylingual/decompiler.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/decompiler.py -------------------------------------------------------------------------------- /pylingual/decompiler_config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/decompiler_config.yaml -------------------------------------------------------------------------------- /pylingual/editable_bytecode/EditableBytecode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/EditableBytecode.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/Instruction.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/Instruction.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/PYCFile.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/PYCFile.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/__init__.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/__init__.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/fix_indirect_jump.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/fix_indirect_jump.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/fix_unreachable.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/fix_unreachable.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/remove_docstrings.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/remove_docstrings.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/remove_extended_arg.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/remove_extended_arg.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/remove_nop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/remove_nop.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/bytecode_patches/replace_firstlno.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/bytecode_patches/replace_firstlno.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/control_flow_graph.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/control_flow_graph.py -------------------------------------------------------------------------------- /pylingual/editable_bytecode/utils.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/editable_bytecode/utils.py -------------------------------------------------------------------------------- /pylingual/equivalence_check.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/equivalence_check.py -------------------------------------------------------------------------------- /pylingual/main.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/main.py -------------------------------------------------------------------------------- /pylingual/masking/ast_masker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/masking/ast_masker.py -------------------------------------------------------------------------------- /pylingual/masking/global_masker.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/masking/global_masker.py -------------------------------------------------------------------------------- /pylingual/masking/model_disasm.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/masking/model_disasm.py -------------------------------------------------------------------------------- /pylingual/models.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/models.py -------------------------------------------------------------------------------- /pylingual/segmentation/segmentation_search_strategies.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/segmentation/segmentation_search_strategies.py -------------------------------------------------------------------------------- /pylingual/segmentation/sliding_window.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/segmentation/sliding_window.py -------------------------------------------------------------------------------- /pylingual/utils/ascii_art.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/ascii_art.py -------------------------------------------------------------------------------- /pylingual/utils/generate_bytecode.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/generate_bytecode.py -------------------------------------------------------------------------------- /pylingual/utils/get_logger.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/get_logger.py -------------------------------------------------------------------------------- /pylingual/utils/lazy.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/lazy.py -------------------------------------------------------------------------------- /pylingual/utils/lists.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/lists.py -------------------------------------------------------------------------------- /pylingual/utils/tracked_list.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/tracked_list.py -------------------------------------------------------------------------------- /pylingual/utils/use_escape_sequences.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/use_escape_sequences.py -------------------------------------------------------------------------------- /pylingual/utils/version.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pylingual/utils/version.py -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/pyproject.toml -------------------------------------------------------------------------------- /test/Conditional.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/test/Conditional.py -------------------------------------------------------------------------------- /test/Exception.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/test/Exception.py -------------------------------------------------------------------------------- /test/Generator.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/test/Generator.py -------------------------------------------------------------------------------- /test/Loop.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/test/Loop.py -------------------------------------------------------------------------------- /test/Match.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/test/Match.py -------------------------------------------------------------------------------- /test/With.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/syssec-utd/pylingual/HEAD/test/With.py --------------------------------------------------------------------------------