├── .gitignore ├── LICENSE ├── README.md ├── autofix.py ├── demo.gif ├── examples ├── example.java ├── example.js └── example.py ├── howitworks.png └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | .DS_Store 131 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | **Active development on this project has now moved to [Patchwork](https://github.com/patched-codes/patchwork). Patchwork is a more generic framework that combines tools and LLMs in agentic workflows for software development.** 2 | 3 | # AutoFix 4 | 5 | Static Analysis + LLM = AutoFix 6 | 7 | _Note: If you are looking for a cloud service for vulnerability remediation, please try [patched](https://www.patched.codes/)._ 8 | 9 | - The new [StarCoder](https://huggingface.co/bigcode/starcoderbase-1b) model is now supported. Pass `--model bigcode/starcoderbase-1b` to AutoFix to try the 1B parameter base model. 10 | 11 | - We now support using the [CodeGen2](https://github.com/salesforce/CodeGen2) model from Salesforce. Just use `--model Salesforce/codegen2-1B` with AutoFix. Note that the inference on CPU with `CodeGen2` is very slow compared to `SantaFixer`. 12 | 13 | In the initial release, we used Semgrep for doing static analysis and the [SantaFixer](https://huggingface.co/lambdasec/santafixer) LLM for bug fixing. 14 | 15 | ## Setup 16 | 17 | ``` 18 | python3 -m venv .venv 19 | source .venv/bin/activate 20 | pip install -r requirements.txt 21 | ``` 22 | 23 | ## Usage 24 | 25 | ``` 26 | python autofix.py --input examples/example.java 27 | ``` 28 | 29 | ## Demo 30 | 31 | ![](https://github.com/lambdasec/autofix/blob/main/demo.gif) 32 | 33 | ## How it works? 34 | ![](https://github.com/lambdasec/autofix/blob/main/howitworks.png) 35 | -------------------------------------------------------------------------------- /autofix.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, AutoModelForCausalLM, set_seed 2 | from transformers import pipeline 3 | import os 4 | import torch 5 | import sys 6 | import json 7 | import argparse 8 | 9 | 10 | HF_TOKEN = os.environ.get("HF_TOKEN") 11 | FIM_PREFIX = "" 12 | FIM_MIDDLE = "" 13 | FIM_SUFFIX = "" 14 | FIM_PAD = "" 15 | EOD = "<|endoftext|>" 16 | MASK_1 = "" 17 | SEP = "" 18 | EOM = "" 19 | 20 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 21 | 22 | def format(prefix, suffix): 23 | return prefix + MASK_1 + suffix + EOD + SEP + MASK_1 24 | 25 | def post_processing_fim(prefix, middle, suffix): 26 | return f"{prefix}{middle}{suffix}" 27 | 28 | def fim_generation(model, tokenizer_fim, prompt, max_new_tokens, temperature): 29 | prefix = prompt.split("")[0] 30 | suffix = prompt.split("")[1] 31 | if opt.model is None or opt.model.startswith("bigcode") or opt.model == "lambdasec/santafixer": 32 | list_of_middles = infill(model, tokenizer_fim, (prefix, suffix), max_new_tokens, temperature) 33 | # for middle in list_of_middles: 34 | # print("\n\n") 35 | # print(middle) 36 | # [middle] = [list_of_middles[0]] 37 | return [post_processing_fim(prefix, middle, suffix) for middle in list_of_middles] 38 | else: 39 | text = format(prefix, suffix) 40 | # input_ids = tokenizer(text, return_tensors="pt").input_ids 41 | # generated_ids = model.generate(input_ids, max_length=128) 42 | inputs = tokenizer_fim(text, return_tensors="pt", padding=True, return_token_type_ids=False, 43 | max_length=1024, truncation=True).to(device) 44 | with torch.no_grad(): 45 | outputs = model.generate( 46 | **inputs, 47 | do_sample=True, 48 | temperature=temperature, 49 | top_p=1, 50 | num_return_sequences=10, 51 | max_new_tokens=max_new_tokens, 52 | pad_token_id=tokenizer_fim.eos_token_id 53 | ) 54 | # print(tokenizer_fim.decode(generated_ids[0], skip_special_tokens=False)[len(text):]) 55 | pats = [r"\n\n^#", "^'''", "\n\n\n"] 56 | list_of_middles = [extract_mask(tokenizer_fim.decode(tensor, skip_special_tokens=False, truncate_before_pattern=pats), text) for tensor in outputs] 57 | return [post_processing_fim(prefix, middle, suffix) for middle in list_of_middles] 58 | 59 | def extract_mask(s: str, text: str): 60 | if EOM not in s: 61 | print("*** File truncated ***") 62 | start = len(text) 63 | stop = s.find(EOM, start) or len(s) 64 | # print(s) 65 | return s[start:stop] 66 | 67 | def extract_fim_part(s: str): 68 | # Find the index of 69 | # print(s) 70 | if FIM_MIDDLE not in s: 71 | print("*** File truncated ***") 72 | start = s.find(FIM_MIDDLE) + len(FIM_MIDDLE) 73 | stop = s.find(EOD, start) or len(s) 74 | return s[start:stop] 75 | 76 | def infill(model, tokenizer_fim, prefix_suffix_tuples, max_new_tokens, temperature): 77 | if type(prefix_suffix_tuples) == tuple: 78 | prefix_suffix_tuples = [prefix_suffix_tuples] 79 | 80 | prompts = [f"{FIM_PREFIX}{prefix}{FIM_SUFFIX}{suffix}{FIM_MIDDLE}" for prefix, suffix in prefix_suffix_tuples] 81 | # `return_token_type_ids=False` is essential, or we get nonsense output. 82 | inputs = tokenizer_fim(prompts, return_tensors="pt", padding=True, return_token_type_ids=False, 83 | max_length=1024, truncation=True).to(device) 84 | end_sequence = tokenizer_fim.encode('\n', return_tensors='pt')[0] 85 | with torch.no_grad(): 86 | outputs = model.generate( 87 | **inputs, 88 | do_sample=True, 89 | temperature=temperature, 90 | top_p=1, 91 | num_return_sequences=10, 92 | max_new_tokens=max_new_tokens, 93 | pad_token_id=tokenizer_fim.eos_token_id, 94 | eos_token_id=end_sequence 95 | ) 96 | # WARNING: cannot use skip_special_tokens, because it blows away the FIM special tokens. 97 | return [ 98 | extract_fim_part(tokenizer_fim.decode(tensor, skip_special_tokens=False)) for tensor in outputs 99 | ] 100 | 101 | def code_generation(prompt, max_new_tokens, temperature, model, tokenizer_fim): 102 | #set_seed(seed) 103 | 104 | if "" in prompt: 105 | # print('\n' + fim_generation(original_model,original_tokenizer_fim, prompt, max_new_tokens, temperature)) 106 | return fim_generation(model, tokenizer_fim, prompt, max_new_tokens, temperature) 107 | else: 108 | print("No infilling token found, please add the token in the code at the place you want the model to do infilling") 109 | 110 | def process(): 111 | max_new_tokens = 32 112 | temperature = 0.8 113 | input_file = opt.input 114 | tmp_prompt_file = input_file.split('.')[0] + '_prompt.' + input_file.split('.')[1] 115 | tmp_file = 'results.json' 116 | output_file = input_file.split('.')[0] + '_fixed.' + input_file.split('.')[1] 117 | if input_file.endswith(".java"): 118 | config_str = 'java' 119 | comment_str = '//' 120 | elif input_file.endswith(".js"): 121 | config_str = 'javascript' 122 | comment_str = '//' 123 | elif input_file.endswith(".py"): 124 | config_str = 'python' 125 | comment_str = '#' 126 | else: 127 | print("Only .java, .js and .py files are supported as input") 128 | exit(1) 129 | scan_command_input = "semgrep --config p/"+ config_str +" "+ input_file +" --output "+ tmp_file +" --json > /dev/null 2>&1" 130 | scan_command_output = "semgrep --config p/"+ config_str +" "+ output_file +" --output "+ tmp_file +" --json > /dev/null 2>&1" 131 | if os.path.isfile(input_file): 132 | if os.path.exists(tmp_file): 133 | os.remove(tmp_file) 134 | print("Scanning file " + input_file + "...") 135 | os.system(scan_command_input) 136 | with open(tmp_file, 'r') as jf: 137 | data = json.load(jf) 138 | if len(data["errors"]) == 0: 139 | if len(data["results"]) == 0: 140 | print(input_file + " has no vulnerabilities") 141 | exit(0) 142 | else: 143 | print("Vulnerability found in " + input_file + "...") 144 | cwe = data["results"][0]["extra"]["metadata"]["cwe"][0] 145 | lines = data["results"][0]["extra"]["lines"] 146 | with open(input_file, 'r') as rf: 147 | file_content = rf.read() 148 | prefix = file_content.split(lines)[0] 149 | suffix = file_content.split(lines)[1] 150 | write_content = prefix + '\n' + comment_str + ' BUG: ' + cwe + '\n' + comment_str + lines + '\n' + comment_str + ' FIXED: \n\n' + suffix 151 | with open(tmp_prompt_file, 'w') as wf: 152 | wf.write(write_content) 153 | print("Attempting fix with prompt file " + tmp_prompt_file + "...") 154 | model = "lambdasec/santafixer" if opt.model is None else opt.model 155 | tokenizer_fim = AutoTokenizer.from_pretrained(model, trust_remote_code=True, padding_side="left", use_auth_token=HF_TOKEN) 156 | 157 | if model == "lambdasec/santafixer" or model == "bigcode/santacoder": 158 | tokenizer_fim.add_special_tokens({ 159 | "additional_special_tokens": [EOD, FIM_PREFIX, FIM_MIDDLE, FIM_SUFFIX, FIM_PAD] 160 | }) 161 | 162 | tokenizer_fim.add_special_tokens({ 163 | "pad_token": EOD 164 | }) 165 | 166 | model = AutoModelForCausalLM.from_pretrained(model, trust_remote_code=True, use_auth_token=HF_TOKEN).to(device) 167 | 168 | with open(tmp_prompt_file, 'r') as rf: 169 | s = rf.read() 170 | infill_token_id = s.find("") 171 | prefix_index = infill_token_id - 1600 172 | if prefix_index < 0: 173 | prefix_index = 0 174 | suffix_index = infill_token_id + 1600 175 | if suffix_index + 1600 > len(s): 176 | suffix_index = len(s) 177 | text = s[prefix_index:suffix_index] 178 | generations = code_generation(text, max_new_tokens, temperature, model, tokenizer_fim) 179 | i = 0 180 | for fixed_code in generations: 181 | i += 1 182 | fixed_code = s[:prefix_index] + fixed_code + s[suffix_index:] 183 | if os.path.exists(output_file): 184 | os.remove(output_file) 185 | with open(output_file, 'w') as wf: 186 | wf.write(fixed_code) 187 | if os.path.exists(tmp_file): 188 | os.remove(tmp_file) 189 | os.system(scan_command_output) 190 | with open(tmp_file, 'r') as jf: 191 | data = json.load(jf) 192 | if len(data["errors"]) == 0 and len(data["results"]) == 0: 193 | print("\n Auto fixed file " + output_file + " with code generated at attempt " + str(i)) 194 | break 195 | if i == 10: 196 | print("Auto fix couldn't fix the file " + input_file) 197 | else: 198 | print(input_file + " has parsing errors") 199 | exit(3) 200 | else: 201 | print(input_file + " is not a valid file") 202 | exit(2) 203 | 204 | 205 | if __name__ == '__main__': 206 | parser = argparse.ArgumentParser() 207 | parser.add_argument('--model', type=str, help='Specify the Hugging Face model') 208 | parser.add_argument('--input', type=str, help='The file to scan and fix') 209 | parser.add_argument('--version', action='version', version='%(prog)s 0.1') 210 | opt = parser.parse_args() 211 | # print(opt) 212 | if opt.input is None: 213 | print('No input file specified, use --input filename to scan and fix') 214 | else: 215 | if opt.model is None: 216 | print('No model is specified, using lambdasec/santafixer (see https://huggingface.co/lambdasec/santafixer)') 217 | process() -------------------------------------------------------------------------------- /demo.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lambdasec/autofix/f475008e19956aa39028afe7a424da5eed53f033/demo.gif -------------------------------------------------------------------------------- /examples/example.java: -------------------------------------------------------------------------------- 1 | package servlets; 2 | 3 | import java.io.File; 4 | import java.io.IOException; 5 | import java.io.PrintWriter; 6 | 7 | import javax.servlet.ServletException; 8 | import javax.servlet.http.HttpServlet; 9 | import javax.servlet.http.HttpServletRequest; 10 | import javax.servlet.http.HttpServletResponse; 11 | import javax.servlet.http.HttpSession; 12 | 13 | import org.apache.commons.io.FilenameUtils; 14 | 15 | public class Cls extends HttpServlet 16 | { 17 | private static org.apache.log4j.Logger log = Logger.getLogger(Register.class); 18 | 19 | public void doPost(HttpServletRequest request, HttpServletResponse response) 20 | throws ServletException, IOException 21 | { 22 | String image = request.getParameter("image"); 23 | // ruleid:httpservlet-path-traversal 24 | File file = new File("static/images/", image); 25 | 26 | if (!file.exists()) { 27 | log.info(image + " could not be created."); 28 | response.sendError(); 29 | } 30 | 31 | response.sendRedirect("/index.html"); 32 | } 33 | 34 | public void ok(HttpServletRequest request, HttpServletResponse response) 35 | throws ServletException, IOException 36 | { 37 | // ok:httpservlet-path-traversal 38 | String image = request.getParameter("image"); 39 | File file = new File("static/images/", FilenameUtils.getName(image)); 40 | 41 | if (!file.exists()) { 42 | log.info(image + " could not be created."); 43 | response.sendError(); 44 | } 45 | 46 | response.sendRedirect("/index.html"); 47 | } 48 | } -------------------------------------------------------------------------------- /examples/example.js: -------------------------------------------------------------------------------- 1 | const express = require('express'); 2 | const router = express.Router() 3 | 4 | 5 | router.get("/tstMe", (req, res) => { 6 | var r = /([a-z]+)+$/; 7 | 8 | let match = r.test(req.params.id); 9 | res.send(match) 10 | 11 | }); 12 | 13 | 14 | module.exports = router -------------------------------------------------------------------------------- /examples/example.py: -------------------------------------------------------------------------------- 1 | import hashlib 2 | 3 | hs = hashlib.md5(b"foo") 4 | assert hs.hexdigest() == "acbd18db4cc2f85cedef654fccc4a4d8" 5 | 6 | hs.hexdigest() == "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33" 7 | 8 | hs = hashlib.sha256(b"foo") 9 | assert hs.hexdigest() == "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae" 10 | -------------------------------------------------------------------------------- /howitworks.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/lambdasec/autofix/f475008e19956aa39028afe7a424da5eed53f033/howitworks.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | transformers 2 | torch 3 | semgrep>=1.31.0 4 | --------------------------------------------------------------------------------