├── .github
    └── workflows
    │   └── validate.yaml
├── .gitignore
├── LICENSE
├── README.md
├── cli.md
├── codetf.md
├── codetf.schema.json
├── examples
    ├── pygoat.codetf.json
    └── webgoat.codetf.json
├── logging.md
└── scripts
    ├── requirements.txt
    └── validate_schema.py


/.github/workflows/validate.yaml:
--------------------------------------------------------------------------------
 1 | name: Validate Schema
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - main
 7 |   pull_request:
 8 |     branches:
 9 |       - main
10 | 
11 | concurrency:
12 |   group: ${{ github.workflow }}-${{ github.ref }}
13 |   cancel-in-progress: true
14 | 
15 | jobs:
16 |   validate:
17 |     name: Validate Schema
18 |     runs-on: ubuntu-latest
19 |     timeout-minutes: 5
20 |     steps:
21 |       - name: Set Up Python
22 |         uses: actions/setup-python@v5
23 |         with:
24 |           python-version: '3.12'
25 |       - name: Check out code
26 |         uses: actions/checkout@v4
27 |       - name: Install dependencies
28 |         run: pip install -r scripts/requirements.txt
29 |       - name: Validate schema
30 |         run: ./scripts/validate_schema.py codetf.schema.json
31 |   test-examples:
32 |     name: Test CodeTF Examples
33 |     runs-on: ubuntu-20.04
34 |     timeout-minutes: 5
35 |     steps:
36 |       - name: Set Up Python
37 |         uses: actions/setup-python@v5
38 |         with:
39 |           python-version: '3.12'
40 |       - name: Check out code
41 |         uses: actions/checkout@v4
42 |       - name: Install dependencies
43 |         run: pip install check-jsonschema
44 |       - name: Validate PyGoat example
45 |         run: check-jsonschema --schemafile codetf.schema.json examples/pygoat.codetf.json
46 |       - name: Validate WebGoat example
47 |         run: check-jsonschema --schemafile codetf.schema.json examples/pygoat.codetf.json
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.idea/
2 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Pixee
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Codemodder Specificiations
 2 | 
 3 | Codemodder is a framework for developing codemods. There are implementations in different languages, because the best tools for providing context and performing code manipulation are likely to be written in a given language. There is intended to be tight specification alignment across different languages to ensure a similar experience and help new language teams avoid running into already-solved problems.
 4 | 
 5 | This repository is intended to hold those specifications and supporting guidance.
 6 | 
 7 | * [Code Transformation Format (CodeTF)](codetf.md)
 8 | * [CLI Parameters](cli.md)
 9 | * [Logging](logging.md)
10 | 


--------------------------------------------------------------------------------
/cli.md:
--------------------------------------------------------------------------------
  1 | # Codemodder CLI Parameters
  2 | 
  3 | To guarantee a consistent user experience when using codemodder codemods, we offer a specification for what CLI parameters must be offered by any codemodder framework.
  4 | 
  5 | | Parameter | Description |
  6 | | --------- | ----------- |
  7 | | --help            | print help, then exit  |
  8 | | --list            | print codemod names, then exit |
  9 | | --describe        | print detailed codemod metadata, then exit |
 10 | | --output          | the output file to produce (optional) |
 11 | | --output-format   | the format for the data output file (codetf or diff) |
 12 | | --sarif           | comma-separated set of path(s) to SARIF file(s) to feed to the codemods |
 13 | | --contrast-vulnerabilities-xml | the path to a file containing the result of a call to the Contrast Assess XML export API |
 14 | | --sonar-json | the path to a file containing output from Sonar's Issues or Hotspots API, or a merged combination of two such files
 15 | | --veracode-json | the path to a file containing output from Veracode's Findings API |
 16 | | --defectdojo-findings-json | the path to a file containing output from DefectDojo's v2 Findings API |
 17 | | --path-include    | comma-separated, exact-match, set of UNIX glob patterns to include. In the case of a conflict with excludes, excludes are given precedence.|
 18 | | --path-exclude    | comma-separated, exact-match, set of UNIX glob patterns to exclude. In the case of a conflict with includes, excludes are given precedence.|
 19 | | --dry-run         | do everything except make changes to files|
 20 | | --codemod-include | comma-separated set of codemod ID(s) to include|
 21 | | --codemod-exclude | comma-separated set of codemod ID(s) to exclude|
 22 | | --verbose         | print more log messages|
 23 | | --log-format      | human (default), or json|
 24 | | --project-name    | a descriptive and ideally unique name for the project being scanned to capture in reporting |
 25 | | --version         | print the version of the codemodder framework, then exit|
 26 | | --parameter       | a parameter for individual codemod (can provide multiple)|
 27 | | --max-workers     | specify the maximum number of workers (threads) to use for parallel processing
 28 | 
 29 | ## Specifying parameters
 30 | The codemods must run in the given format:
 31 | 
 32 | ```
 33 | [executable] [arguments] <project directory>
 34 | ```
 35 | 
 36 | The `executable` could involve multiple command line tokens (e.g., `npm run` or `java -jar my-codemod.jar`) in order to invoke the executable.
 37 | 
 38 | ## Implementation details
 39 | - Passing one of `--help`, `--list`, `--describe`, and `--version` will cause the given action to be run, then exit.
 40 | - You can only run one of `--help`, `--list`, `--describe`, `--version`. Running multiple will cause an error and should show help like any other argument error.
 41 | - The only required field is `<project directory>`. However, this field is not required if running either `--help`, `--list`, `--describe`, or `--version`.
 42 | - You cannot legally specify any argument more than one time.
 43 | - If `--output` is given, it indicates the path where a codetf or diff file will be created (depending on the value of `--output-format`). Otherwise no output file is generated.
 44 | - All codemod rules are loaded by default unless `--codemod-include` specifies a list. `--codemod-exclude` works off all default codemods.
 45 | - Specifying a `--codemod-include` will set the order of codemod execution, otherwise the order will be the order of the codemod collection passed to the entry point API.
 46 | - Specifying a `--codemod-include` or `--codemod-exclude`  that references a non-existent codemod will result in an error
 47 | - You can specify a simple wildcard for `--codemod-include` and `--codemod-exclude` (e.g., `--codemod-include=acme:*`). If this pattern doesn't match any codemods, a warning will be issued.
 48 | - If the `<project directory>` doesn’t exist, an error should be thrown
 49 | - You can provide multiple `--parameter` arguments, but only one per codemod/name/file/line combination
 50 | - The `--parameter` argument contains a set of `name=value` pairs following the LDAPv3 Distinguished Name spec (see [RFC 4514](https://datatracker.ietf.org/doc/html/rfc4514.html)).
 51 |     - The attributes of these parameters are as follows. Unexpected attributes should cause an error.
 52 |         - **“codemod”:** the codemod to which the parameter applies (required)
 53 |         - **“file”:** a file in which the change will be applied (optional — assumed “all” if not present)
 54 |         - **“line”:** the codemod to which the parameter applies (optional — assumed “all” if not present)
 55 |         - **“name”:** the of the parameter (required)
 56 |         - **“value”:** the value of the parameter (required)
 57 | - The `--max-workers` argument specifies the maximum number of workers to use for parallel codemod processing. For most codemodders "workers" will be threads. When this parameter is not explicitly provided codemodders should rely on the default behavior of the underlying threading/concurrency provider for their language. Most providers will use reasonable defaults that automatically scale to system resources.
 58 | - The `--describe` argument causes detailed codemod metadata to be printed to `stdout` as a JSON blob before exiting. This is intended to be used by upstream tooling to collect detailed metadata about available codemods. This argument honors the `--codemod-include` and `--codemod-exclude` flags to determine which codemods should be included in the output. The format of the JSON mirrors the `results` section of the codetf format, except each entry only includes the following fields: `codemod`, `summary`, `description`, and `references`. For example, the output might look like this:
 59 | ```json
 60 | {
 61 |   "results": [
 62 |     {
 63 |       "codemod": "pixee:java/fix-my-java",
 64 |       "summary": "Fixes the Java",
 65 |       "description": "A longer detailed description of how to fix Java...",
 66 |       "references": [
 67 |         {
 68 |           "url": "https://www.java.com",
 69 |           "description": "Everyone's favorite Java website"
 70 |         }
 71 |       ]
 72 |     }
 73 |   ]
 74 | }
 75 | ```
 76 | 
 77 | ## Path inclusion and exclusion
 78 | 
 79 | The `--path-include` and `--path-exclude` patterns are interpreted as relative to the given `<project directory>`. In practice this means that the patterns should be joined with the `<project directory>` when used internally and also when passed to external tools.
 80 | 
 81 | In general, codemods that remediate the results of other tools respect the file paths specified in findings by those tools. Explicit configuration provided by the user via `--path-include` and `--path-exclude` takes precedence over any defaults a codemod may define. Remediation codemods must not impose their own defaults: the philosophy here is that an external tool has its own defaults and/or configuration, and this should be respected by codemodder.
 82 | 
 83 | Codemods that perform their own detection (i.e. "find-and-fix" codemods) may wish to define reasonable defaults for the paths to be included and excluded for analysis. It is recommended that such codemods should include only relevant source files and ignore test directories and build artifacts by default. For example, such codemods will generally want to exclude `**/tests/**` by default. This will be interpreted relative to the given `<project directory>`, which means that the effective pattern will be `<project directory>/**/tests/**`.
 84 | 
 85 | For `--path-include` and `--path-exclude`, specific line numbers can be supplied. For instance, to include `src/Foo.java` but only allow changes found on line 11, you would pass `--path-include src/Foo.java:11`.
 86 | - *Included* patterns that contain line numbers should be stripped of the line number before being used by either codemodder or external tools to determine which paths are included.
 87 | - *Excluded* patterns that contain line numbers should *not* be used by either codemodder or external tools when determining paths to be excluded. In other words, a single excluded line should not prevent the entire file from being excluded by either codemodder or any external tools.
 88 | 
 89 | The line includes/excludes only only specifies if nodes are scanned/considered by the codemods. It won’t guarantee that nodes that matches the rules remains unchanges. For example, for `--path-exclude src/[Foo.java](http://Foo.java):11` any vulnerable node inside line `11` in `Foo.java` will be ignore by individual codemods. However it may be changed as part of a fix for another vulnerable node.
 90 | 
 91 | It is up to the individual codemodders to handle edge cases in the line includes/excludes.
 92 | 
 93 | ## Tool result parameters
 94 | 
 95 | Codemodder accepts several parameters that are used to provide tool result inputs to the codemods. These include `--sarif`, `--sonar-json`, and a handful of others that are tied to tool-specific formats. The available parameters may be gradually expanded as new tools are supported.
 96 | 
 97 | In general each tool result flag accecpts a comma-separated list of paths to files that contain the tool results. It is also possible to combine multiple tool result flags in a single invocation of the codemodder (e.g. to use both `--sarif` and `--sonar-json`), subject to the restriction below.
 98 | 
 99 | **NOTE:** It is _not_ allowed to provide multiple SARIF inputs _for the same tool_ in a single invocation of the codemodder. For example, it is not possible to provide two Semgrep SARIF files, although it would be possible to provide e.g. a Semgrep SARIF file and a CodeQL JSON file in the same invocation.
100 | 
101 | ## Configuring OpenAI
102 | 
103 | You can optionally allow codemods to access OpenAI by running with the following environment variable during execution:
104 | ```bash
105 | CODEMODDER_OPENAI_API_KEY=<KEY>
106 | ```
107 | 
108 | In the case of problems communicating with an AI subprocessor, all other codemods should still run normally.
109 | 
110 | ### Using Azure OpenAI
111 | 
112 | Codemods request access to OpenAI models by name, and by default are given access to types that talk to OpenAI directly. It's possible to use Azure OpenAI in codemods by specifying the following environment variables:
113 | 
114 | ```bash
115 | CODEMODDER_AZURE_OPENAI_API_KEY=<KEY>
116 | CODEMODDER_AZURE_OPENAI_ENDPOINT=<ENDPOINT>
117 | CODEMODDER_AZURE_OPENAI_API_VERSION=<VERSION>
118 | CODEMODDER_AZURE_OPENAI_GPT_3_5_TURBO_2024_12_12_DEPLOYMENT=<DEPLOYMENT_NAME>
119 | CODEMODDER_AZURE_OPENAI_GPT_4_TURBO_2024_04_12_API_DEPLOYMENT=<DEPLOYMENT_NAME>
120 | ```
121 | 
122 | - Providing `CODEMODDER_AZURE_OPENAI_API_KEY` without `CODEMODDER_AZURE_OPENAI_ENDPOINT` (and vice versa) will cause a failure on startup.
123 | - The `CODEMODDER_AZURE_API_VERSION` is optional and codemodders can choose a reasonable default as a fallback. This spec does not intend to be prescriptive about particular fallback versions.
124 | - If using Azure OpenAI and a codemod requests access to a model for which there is no corresponding `CODEMODDER_AZURE_OPENAI_(MODELNAME)_DEPLOYMENT` variable, the deployment name will be assumed to be the name of the model (e.g., "gpt-4o").
125 | - If both Azure and OpenAI instructions are available, Azure will be preferred.
126 | 
127 | ## Console output
128 | 
129 | The codemodder CLI output is described in [its own specification](logging.md).
130 | 
131 | ## Exit codes
132 | 
133 | The following are error codes the codemodder will report. Implementors can report errors under codes not specifically enumerated here. Warnings will not cause error codes.
134 | 
135 | | Code | Meaning |
136 | | --- | --- |
137 | | 0 | success |
138 | | 1 | project directory doesn’t exist or can’t be read |
139 | | 2 | can’t write output file |
140 | | 3 | codemod instructions conflicted |
141 | 
142 | 
143 | ## Plugin/Extension arguments
144 | 
145 | Because codemodder is pluggable, there may be need for plugins/extensions to act on CLI arguments. This behavior isn't specified yet.
146 | 


--------------------------------------------------------------------------------
/codetf.md:
--------------------------------------------------------------------------------
 1 | # Code Transformation Format (CodeTF)
 2 | 
 3 | This describes Code Transformation Format (CodeTF).
 4 | 
 5 | This open format describes code changes or suggestions made by an automated tool. Much how the SARIF format published by Microsoft helped static analysis tools drive consistent interoperation, we believe an open format can benefit developers to begin contributing towards platforms that make and suggest changes, as well as reduce the need for vendor-specific viewers, parsers, etc. We hope to one day publish this specification through an open body like [OASIS](https://www.oasis-open.org/).
 6 | 
 7 | # The specification JSON
 8 | 
 9 | The [specification](codetf.schema.json) is expressed in terms of [JSON Schema](https://json-schema.org/). The schema is currently not versioned. We are avoiding investment in ceremony, versioning, governance, etc., until we feel it has reached a more stable footing. Following [SARIF](https://docs.oasis-open.org/sarif/sarif/v2.1.0/csprd01/sarif-v2.1.0-csprd01.html) stylistically as a long term goal makes sense, not only because it's a successful standard, but also because our results will be closely linked with SARIF, so we could have many users, consumers, and implementors in common.
10 | 
11 | Note that like SARIF, this format is not intended to be a replacement for a diagnostic log. It's not intended to have anything more than minimum diagnostics to help with reproducibility.
12 | 
13 | # Structure
14 | 
15 | It may help to understand the major components of CodeTF from a high levels first before exploring or attemptin to implement the specification. The `results` and `changeset` fields can be seen as a series of patches against a project's directory. Each patch builds on any previous patches seen. Therefore, applying a patch from the middle of a `changeset` without the others may be invalid. Multiple locations can be changed in a single file within the scope of a single codemod and be represented by a single `changeset` array entry.
16 | 
17 | # Codemod URIs
18 | 
19 | Codemods are uniquely identified by a URI, which is represented in CodeTF as the `codemod` property of the `result` object.
20 | 
21 | URIs are descriptive and must conform to the following schema: `<detector>:<language>/<name>`
22 | 
23 | Each component of the URI has a particular meaning:
24 | 
25 | * `<detector>`: Describes the source of the analysis that drives the transformation. Codemods that remediate issues found by a specific analysis tool should be labeled with the detector corresponding to that tool name (e.g. "semgrep", "codeql", etc.). Implementers of custom codemods that perform their own internal detection should use a unique identifier for their detector. For example, Pixee's "find and fix" codemods use "pixee".
26 | * `<language>`: The language that is transformed by the codemod. This should be a short, unique identifier for the language. Valid languages include `java`, `python`, and `javascript`.
27 | * `<name>`: The name of the codemod. This should be a short, unique identifier for the transformation that is performed. Individual words in the name should be separated by hyphens. For example: `remove-unused-imports`.
28 | 
29 | # Notes
30 | Note that the `changeset` array can have multiple entries for the same given file.
31 | 
32 | Note that the `lineNumber` field should be deterministic, and this can't be traded for accuracy. The purpose of the line number is only to capture a location to be used in a `--path-include` or `--path-exclude` argument.
33 | 


--------------------------------------------------------------------------------
/codetf.schema.json:
--------------------------------------------------------------------------------
  1 | {
  2 |   "$schema": "https://json-schema.org/draft/2020-12/schema",
  3 |   "$id": "https://pixee.ai/codetf.schema.json",
  4 |   "title": "Code Transformation Format (CodeTF)",
  5 |   "description": "A format for representing the results of code transformations",
  6 |   "type": "object",
  7 | 
  8 |   "properties": {
  9 |     "run": {
 10 |       "description": "Metadata about the run that produced the results",
 11 |       "$ref": "#/definitions/run"
 12 |     },
 13 |     "results": {
 14 |       "description": "The results of the code transformations that were applied during analysis",
 15 |       "type": "array",
 16 |       "items": { "$ref": "#/definitions/result" },
 17 |       "minItems": 0
 18 |     }
 19 |   },
 20 | 
 21 |   "definitions": {
 22 |     "run": {
 23 |       "type": "object",
 24 |       "properties": {
 25 |         "vendor": {
 26 |           "type": "string",
 27 |           "description": "Name of the organization that produced the tool"
 28 |         },
 29 |         "tool": {
 30 |           "type": "string",
 31 |           "description": "Name of the tool that produced the CodeTF results"
 32 |         },
 33 |         "version": {
 34 |           "type": "string",
 35 |           "description": "Version of the tool that produced the CodeTF results"
 36 |         },
 37 |         "projectName": {
 38 |           "type": "string",
 39 |           "description": "Name of the project that was analyzed"
 40 |         },
 41 |         "commandLine": {
 42 |           "type": "string",
 43 |           "description": "The command line used to re-create this run"
 44 |         },
 45 |         "elapsed": {
 46 |           "type": ["integer", "string"],
 47 |           "description": "How long, in miliseconds, the analysis took to execute"
 48 |         },
 49 |         "directory": {
 50 |           "type": "string",
 51 |           "description": "The location on disk of the code directory being scanned"
 52 |         },
 53 |         "sarifs": {
 54 |           "type": "array",
 55 |           "items": { "$ref": "#/definitions/sarif" }
 56 |         }
 57 |       },
 58 |       "required": ["vendor", "tool", "version", "commandLine", "elapsed", "directory"]
 59 | 
 60 |     },
 61 | 
 62 |     "result": {
 63 |       "type": "object",
 64 |       "properties": {
 65 |         "codemod": {
 66 |           "type": "string",
 67 |           "description": "ID of the transformation that was applied"
 68 |         },
 69 |         "summary": {
 70 |           "type": "string",
 71 |           "description": "A short phrase describing the changes made"
 72 |         },
 73 |         "description": {
 74 |           "type": "string",
 75 |           "description": "A longer description of the changes made"
 76 |         },
 77 |         "detectionTool": {
 78 |           "description": "Metadata about the tool that detected the issue",
 79 |           "$ref": "#/definitions/detectionTool"
 80 |         },
 81 |         "references": {
 82 |           "type": "array",
 83 |           "items": { "$ref": "#/definitions/reference" }
 84 |         },
 85 |         "properties": {
 86 |           "type": "object",
 87 |           "description": "Vendor-specific properties to help storytelling",
 88 |           "additionalProperties": true
 89 |         },
 90 |         "failure": {
 91 |           "type": "object",
 92 |           "description": "If present indicates that the codemod failed to run",
 93 |           "properties": {
 94 |             "reason": {
 95 |               "type": "string",
 96 |               "description": "A short description of why the codemod failed"
 97 |             },
 98 |             "exception": {
 99 |               "type": "string",
100 |               "description": "More detailed information about the exception that caused the failure"
101 |             }
102 |           },
103 |           "required": ["reason"]
104 |         },
105 |         "failedFiles": {
106 |           "type": "array",
107 |           "description": "A set of file paths for files that the tool failed to parse or transform",
108 |           "items": { "type": "string" }
109 |         },
110 |         "changeset": {
111 |           "type": "array",
112 |           "description": "The changes for a given codemod",
113 |           "items": { "$ref": "#/definitions/changeset" },
114 |           "minItems": 0
115 |         },
116 |         "unfixed": {
117 |           "type": "array",
118 |           "description": "A set of file paths for files that the tool failed to fix",
119 |           "items": { "$ref": "#/definitions/unfixedFinding" }
120 |         }
121 |       },
122 |       "required": ["codemod", "summary", "description", "changeset"]
123 |     },
124 | 
125 |     "sarif": {
126 |       "type": "object",
127 |       "properties": {
128 |         "artifact": {
129 |           "type": "string",
130 |           "description": "The file containing the SARIF or other input"
131 |         },
132 |         "sha1": {
133 |           "type": "string",
134 |           "description": "SHA1 hash representing the artifact"
135 |         }
136 |       },
137 |       "required": ["artifact", "sha1"]
138 |     },
139 | 
140 |     "reference": {
141 |       "type": "object",
142 |       "properties": {
143 |         "url": {
144 |           "type": "string",
145 |           "description": "The URL of the reference"
146 |         },
147 |         "description": {
148 |           "type": "string",
149 |           "description": "A description of the reference"
150 |         }
151 |       },
152 |       "required": ["url"]
153 |     },
154 | 
155 |     "changeset": {
156 |       "type": "object",
157 |       "properties": {
158 |         "path": {
159 |           "type": "string",
160 |           "description": "The path of the file (relative to the directory) that was changed"
161 |         },
162 |         "diff": {
163 |           "type": "string",
164 |           "description": "The unified diff format of the changes made to the file"
165 |         },
166 |         "changes": {
167 |           "type": "array",
168 |           "items": { "$ref": "#/definitions/change" },
169 |           "minItems": 1
170 |         },
171 |         "ai": {
172 |           "description": "Metadata about AI tool used to generate the fix",
173 |           "$ref": "#/definitions/ai"
174 |         },
175 |         "strategy": {
176 |           "type": "string",
177 |           "description": "The strategy used to generate the fix",
178 |           "enum": ["ai", "hybrid", "deterministic"]
179 |         },
180 |         "provisional": {
181 |           "type": "boolean",
182 |           "description": "Indicates that the fix is provisional"
183 |         },
184 |         "fixedFindings": {
185 |           "type": "array",
186 |           "description": "List of findings that were fixed by this changeset. Used for findings that can't be attached to a specific change.",
187 |           "items": { "$ref": "#/definitions/detector/fixedFinding" }
188 |         },
189 |         "fixQuality": {
190 |           "type": "object",
191 |           "description": "Metadata about the quality of the fix",
192 |           "properties": {
193 |             "safetyRating": {
194 |               "description": "Can this change be accepted without breaking the code?",
195 |               "$ref": "#/definitions/rating"
196 |             },
197 |             "effectivenessRating": {
198 |               "description": "Does it fix the problem without introducing syntactic or semantic issues?",
199 |               "$ref": "#/definitions/rating"
200 |             },
201 |             "cleanlinessRating": {
202 |               "description": "Does the code make unnecessary costmetic changes (e.g. add/remove whitespace, comments)?",
203 |               "$ref": "#/definitions/rating"
204 |             }
205 |           },
206 |           "required": ["safetyRating", "effectivenessRating", "cleanlinessRating"]
207 |         }
208 |       },
209 |       "required": ["path", "diff", "changes"]
210 |     },
211 | 
212 |     "change": {
213 |       "type": "object",
214 |       "properties": {
215 |         "lineNumber": {
216 |           "type": ["integer", "string"],
217 |           "description": "The line number that was changed"
218 |         },
219 |         "description": {
220 |           "type": "string",
221 |           "description": "A human-readable description of the change"
222 |         },
223 |         "diffSide": {
224 |           "type": "string",
225 |           "description": "The side of the diff that the change is on. 'right' indicates that the lineNumber and description correspond to a position in the new or modified code. 'left' indicates these fields correspond to a position in the original code. Descriptions of the change itself should generally use 'right', except in cases where the only change is a deletion. In contrast, 'left' can be used to describe the original problem being fixed.",
226 |           "enum": ["left", "right", "LEFT", "RIGHT"]
227 |         },
228 |         "properties": {
229 |           "type": "object",
230 |           "description": "Vendor-specific properties to help storytelling",
231 |           "additionalProperties": true
232 |         },
233 |         "packageActions": {
234 |           "type": "array",
235 |           "description": "The package actions that were needed to support changes to the file",
236 |           "items": { "$ref": "#/definitions/packageAction" }
237 |         },
238 |         "fixedFindings": {
239 |           "type": "array",
240 |           "description": "List of findings that were fixed at this location",
241 |           "items": { "$ref": "#/definitions/detector/fixedFinding" }
242 |         }
243 |       },
244 |       "required": ["lineNumber", "diffSide"]
245 |     },
246 | 
247 |     "packageAction": {
248 |       "type": "object",
249 |       "properties": {
250 |         "action": {
251 |           "type": "string",
252 |           "description": "One of 'add' or 'remove'",
253 |           "enum": ["add", "remove", "ADD", "REMOVE"]
254 |         },
255 |         "result": {
256 |           "type": "string",
257 |           "description": "One of 'completed', 'failed', or 'skipped'",
258 |           "enum": ["completed", "failed", "skipped", "COMPLETED", "FAILED", "SKIPPED"]
259 |         },
260 |         "package": {
261 |           "type": "string",
262 |           "description": "The package that was added or removed"
263 |         }
264 |       },
265 |       "required": ["action", "result", "package"]
266 |     },
267 | 
268 |     "ai": {
269 |       "type": "object",
270 |       "properties": {
271 |         "provider": {
272 |           "type": "string",
273 |           "description": "Name of the AI provider used to generate this fix"
274 |         },
275 |         "model": {
276 |           "type": "string",
277 |           "description": "Name of the model used to generate this fix"
278 |         },
279 |         "tokens": {
280 |           "type": "integer",
281 |           "description": "Total number of tokens used to generate the fix"
282 |         }
283 |       }
284 |     },
285 | 
286 |     "detectionTool": {
287 |       "type": "object",
288 |       "properties": {
289 |         "name": {
290 |           "type": "string",
291 |           "description": "Name of the tool that detected the issue"
292 |         }
293 |       },
294 |       "additionalProperties": true,
295 |       "required": ["name"]
296 |     },
297 | 
298 |     "detector": {
299 |       "fixedFinding": {
300 |         "type": "object",
301 |         "properties": {
302 |           "id": {
303 |             "type": "string",
304 |             "description": "A unique identifier for the finding, if provided by the detection tool. Concerning logically equivalent findings across scans, this identifier has the same semantics as `guid` from SARIF."
305 |           },
306 |           "rule": {
307 |             "$ref": "#/definitions/detector/rule",
308 |             "description": "The rule that detected the issue"
309 |           }
310 |         },
311 |         "additionalProperties": true,
312 |         "required": ["rule"]
313 |       },
314 |       "unfixedFinding": {
315 |         "type": "object",
316 |         "properties": {
317 |           "id": {
318 |             "type": "string",
319 |             "description": "A unique identifier for the finding, if provided by the detection tool. Concerning logically equivalent findings across scans, this identifier has the same semantics as `guid` from SARIF."
320 |           },
321 |           "rule": {
322 |             "$ref": "#/definitions/detector/rule",
323 |             "description": "The rule that detected the issue"
324 |           },
325 |           "path": {
326 |             "type": "string",
327 |             "description": "The path of the file (relative to the directory) that was not fixed"
328 |           },
329 |           "lineNumber": {
330 |             "type": "integer",
331 |             "description": "The line number that was not fixed"
332 |           },
333 |           "reason": {
334 |             "type": "string",
335 |             "description": "A human-readable description of the reason the file was not fixed"
336 |           }
337 |         },
338 |         "additionalProperties": true,
339 |         "required": ["rule", "path", "reason"]
340 |       },
341 |       "rule": {
342 |         "type": "object",
343 |         "properties": {
344 |           "id": {
345 |             "type": "string",
346 |             "description": "The ID of the rule"
347 |           },
348 |           "name": {
349 |             "type": "string",
350 |             "description": "The name of the rule. Potentially the same as the ID, but more human-readable"
351 |           },
352 |           "url": {
353 |             "type": "string",
354 |             "description": "Link to the rule documentation"
355 |           }
356 |         },
357 |         "additionalProperties": false,
358 |         "required": ["id", "name"]
359 |       },
360 |       "rating": {
361 |         "type": "object",
362 |         "properties": {
363 |           "score": {
364 |             "type": "number",
365 |             "description": "A score from 0 to 100 representing the quality of the fix"
366 |           },
367 |           "description": {
368 |             "type": "string",
369 |             "description": "A human-readable description of the quality of the fix"
370 |           }
371 |         },
372 |         "additionalProperties": false,
373 |         "required": ["score"]
374 |       }
375 |     }
376 |   },
377 | 
378 |   "required": ["run", "results"]
379 | }
380 | 


--------------------------------------------------------------------------------
/examples/pygoat.codetf.json:
--------------------------------------------------------------------------------
1 | {"run": {"vendor": "pixee", "tool": "codemodder-python", "version": "0.81.1.dev13+g03f0a72", "sarifs": [], "elapsed": "34437", "commandLine": "codemodder /Users/danieldavella/pygoat --output=pygoat.codetf.json --dry-run", "directory": "/Users/danieldavella/pygoat"}, "results": [{"codemod": "pixee:python/add-requests-timeouts", "summary": "Add timeout to `requests` calls", "description": "Many developers will be surprised to learn that `requests` library calls do not include timeouts by default. This means that an attempted request could hang indefinitely if no connection is established or if no data is received from the server. \n\nThe [requests documentation](https://requests.readthedocs.io/en/latest/user/advanced/#timeouts) suggests that most calls should explicitly include a `timeout` parameter. This codemod adds a default timeout value in order to set an upper bound on connection times and ensure that requests connect or fail in a timely manner. This value also ensures the connection will timeout if the server does not respond with data within a reasonable amount of time. \n\nWhile timeout values will be application dependent, we believe that this codemod adds a reasonable default that serves as an appropriate ceiling for most situations. \n\nOur changes look like the following:\n```diff\n import requests\n \n- requests.get(\"http://example.com\")\n+ requests.get(\"http://example.com\", timeout=60)\n```\n", "references": [{"url": "https://docs.python-requests.org/en/master/user/quickstart/#timeouts", "description": "https://docs.python-requests.org/en/master/user/quickstart/#timeouts"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/apis.py", "diff": "--- \n+++ \n@@ -73,10 +73,10 @@\n         f.close()\n         url = \"http://127.0.0.1:8000/2021/discussion/A9/target\"\n         payload={'csrfmiddlewaretoken': csrf_token }\n-        requests.request(\"GET\", url)\n-        requests.request(\"POST\", url)\n-        requests.request(\"PATCH\", url, data=payload)\n-        requests.request(\"DELETE\", url)\n+        requests.request(\"GET\", url, timeout=60)\n+        requests.request(\"POST\", url, timeout=60)\n+        requests.request(\"PATCH\", url, data=payload, timeout=60)\n+        requests.request(\"DELETE\", url, timeout=60)\n         f = open('test.log', 'r')\n         lines = f.readlines()\n         f.close()\n", "changes": [{"lineNumber": "76", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "77", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "78", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "79", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/playground/A6/soln.py", "diff": "--- \n+++ \n@@ -5,7 +5,7 @@\n     for i in list_of_modules:\n         k = i.split(\"==\")\n         url = f\"https://pypi.org/pypi/{k[0]}/{k[1]}/json\"\n-        response = requests.get(url)\n+        response = requests.get(url, timeout=60)\n         response.raise_for_status()\n         info = response.json()\n         existing_vuln = info['vulnerabilities']\n", "changes": [{"lineNumber": "8", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/playground/A6/utility.py", "diff": "--- \n+++ \n@@ -5,7 +5,7 @@\n     for i in list_of_modules:\n         k = i.split(\"==\")\n         url = f\"https://pypi.org/pypi/{k[0]}/{k[1]}/json\"\n-        response = requests.get(url)\n+        response = requests.get(url, timeout=60)\n         response.raise_for_status()\n         info = response.json()\n         existing_vuln = info['vulnerabilities']\n", "changes": [{"lineNumber": "8", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -951,7 +951,7 @@\n     elif request.method == \"POST\":\n         url = request.POST[\"url\"]\n         try:\n-            response = requests.get(url)\n+            response = requests.get(url, timeout=60)\n             return render(request, \"Lab/ssrf/ssrf_lab2.html\", {\"response\": response.content.decode()})\n         except:\n             return render(request, \"Lab/ssrf/ssrf_lab2.html\", {\"error\": \"Invalid URL\"})\n", "changes": [{"lineNumber": "954", "description": "Add timeout to `requests` call", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/django-debug-flag-on", "summary": "Disable Django Debug Mode", "description": "This codemod will flip Django's `DEBUG` flag to `False` if it's `True` on the `settings.py` file within Django's default directory structure.\n\nHaving the debug flag on may result in sensitive information exposure. When an exception occurs while the `DEBUG` flag in on, it will dump metadata of your environment, including the settings module. The attacker can purposefully request a non-existing url to trigger an exception and gather information about your system.\n\n```diff\n- DEBUG = True\n+ DEBUG = False\n```\n", "references": [{"url": "https://owasp.org/www-project-top-ten/2017/A3_2017-Sensitive_Data_Exposure", "description": "https://owasp.org/www-project-top-ten/2017/A3_2017-Sensitive_Data_Exposure"}, {"url": "https://docs.djangoproject.com/en/4.2/ref/settings/#std-setting-DEBUG", "description": "https://docs.djangoproject.com/en/4.2/ref/settings/#std-setting-DEBUG"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "pygoat/settings.py", "diff": "--- \n+++ \n@@ -26,7 +26,7 @@\n SENSITIVE_DATA = 'FLAGTHATNEEDSTOBEFOUND'\n \n # SECURITY WARNING: don't run with debug turned on in production!\n-DEBUG = True\n+DEBUG = False\n \n ALLOWED_HOSTS = ['pygoat.herokuapp.com', '0.0.0.0.']\n \n", "changes": [{"lineNumber": "29", "description": "Flip `Django` debug flag to off.", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/django-session-cookie-secure-off", "summary": "Secure Setting for Django `SESSION_COOKIE_SECURE` flag", "description": "This codemod will set Django's `SESSION_COOKIE_SECURE` flag to `True` if it's `False` or missing on the `settings.py` file within Django's default directory structure.\n\n```diff\n+ SESSION_COOKIE_SECURE = True\n```\n\nSetting this flag on ensures that the session cookies are only sent under an HTTPS connection. Leaving this flag off may enable an attacker to use a sniffer to capture the unencrypted session cookie and hijack the user's session.\n", "references": [{"url": "https://owasp.org/www-community/controls/SecureCookieAttribute", "description": "https://owasp.org/www-community/controls/SecureCookieAttribute"}, {"url": "https://docs.djangoproject.com/en/4.2/ref/settings/#session-cookie-secure", "description": "https://docs.djangoproject.com/en/4.2/ref/settings/#session-cookie-secure"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "pygoat/settings.py", "diff": "--- \n+++ \n@@ -167,4 +167,5 @@\n }\n \n SECRET_COOKIE_KEY = \"PYGOAT\"\n-CSRF_TRUSTED_ORIGINS = [\"http://127.0.0.1:8000\",\"http://0.0.0.0:8000\",\"http://172.16.189.10\"]\n+CSRF_TRUSTED_ORIGINS = [\"http://127.0.0.1:8000\",\"http://0.0.0.0:8000\",\"http://172.16.189.10\"]\n+SESSION_COOKIE_SECURE = True", "changes": [{"lineNumber": "171", "description": "Sets Django's `SESSION_COOKIE_SECURE` flag if off or missing.", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/enable-jinja2-autoescape", "summary": "Enable Jinja2 Autoescape", "description": "This codemod enables autoescaping of HTML content in `jinja2`. Unfortunately, the jinja2 default behavior is to not autoescape when rendering templates, which makes your applications potentially vulnerable to Cross-Site Scripting (XSS) attacks.\n\nOur codemod checks if you forgot to enable autoescape or if you explicitly disabled it. The change looks as follows:\n\n```diff\n  from jinja2 import Environment\n\n- env = Environment()\n- env = Environment(autoescape=False, loader=some_loader)\n+ env = Environment(autoescape=True)\n+ env = Environment(autoescape=True, loader=some_loader)\n  ...\n```\n", "references": [{"url": "https://owasp.org/www-community/attacks/xss/", "description": "https://owasp.org/www-community/attacks/xss/"}, {"url": "https://jinja.palletsprojects.com/en/3.1.x/api/#autoescaping", "description": "https://jinja.palletsprojects.com/en/3.1.x/api/#autoescaping"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/fix-deprecated-abstractproperty", "summary": "Replace Deprecated `abc` Decorators", "description": "The `@abstractproperty`, `@abstractclassmethod`, and `@abstractstaticmethod` decorators from `abc` has been [deprecated](https://docs.python.org/3/library/abc.html) since Python 3.3. This is because it's possible to use `@property`, `@classmethod`, and `@staticmethod`  in combination with `@abstractmethod`. \n\nOur changes look like the following:\n```diff\n import abc\n\n class Foo:\n-   @abc.abstractproperty\n+   @property\n+   @abc.abstractmethod\n    def bar():\n        ...\n```\n\nand similarly for `@abstractclassmethod` and `@abstractstaticmethod`.\n", "references": [{"url": "https://docs.python.org/3/library/abc.html#abc.abstractproperty", "description": "https://docs.python.org/3/library/abc.html#abc.abstractproperty"}, {"url": "https://docs.python.org/3/library/abc.html#abc.abstractclassmethod", "description": "https://docs.python.org/3/library/abc.html#abc.abstractclassmethod"}, {"url": "https://docs.python.org/3/library/abc.html#abc.abstractstaticmethod", "description": "https://docs.python.org/3/library/abc.html#abc.abstractstaticmethod"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/fix-mutable-params", "summary": "Replace Mutable Default Parameters", "description": "Using mutable values for default arguments is not a safe practice.\nLook at the following very simple example code:\n\n```python\ndef foo(x, y=[]):\n    y.append(x)\n    print(y)\n```\n\nThe function `foo` doesn't do anything very interesting; it just prints the result of `x` appended to `y`. Naively we might expect this to simply print an array containing only `x` every time `foo` is called, like this:\n\n```python\n>>> foo(1)\n[1]\n>>> foo(2)\n[2]\n```\n\nBut that's not what happens!\n\n```python\n>>> foo(1)\n[1]\n>>> foo(2)\n[1, 2]\n```\n\nThe value of `y` is preserved between calls! This might seem surprising, and it is. It's due to the way that scope works for function arguments in Python.\n\nThe result is that any default argument value will be preserved between function calls. This is problematic for *mutable* types, including things like `list`, `dict`, and `set`.\n\nRelying on this behavior is unpredictable and generally considered to be unsafe. Most of us who write code like this were not anticipating the surprising behavior, so it's best to fix it.\n\nOur codemod makes an update that looks like this:\n```diff\n- def foo(x, y=[]):\n+ def foo(x, y=None):\n+   y = [] if y is None else y\n    y.append(x)\n    print(y)\n```\n\nUsing `None` is a much safer default. The new code checks if `None` is passed, and if so uses an empty `list` for the value of `y`. This will guarantee consistent and safe behavior between calls.\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/harden-pickle-load", "summary": "Harden `pickle.load()` against deserialization attacks", "description": "Python's `pickle` module is notoriouly insecure. While it is very useful for serializing and deserializing Python objects, it is not safe to use `pickle` to load data from untrusted sources. This is because `pickle` can execute arbitrary code when loading data. This can be exploited by an attacker to execute arbitrary code on your system. Unlike `yaml` there is no concept of a \"safe\" loader in `pickle`. Therefore, it is recommended to avoid `pickle` and to use a different serialization format such as `json` or `yaml` when working with untrusted data.\n\nHowever, if you must use `pickle` to load data from an untrusted source, we recommend using the open-source `fickling` library. `fickling` is a drop-in replacement for `pickle` that validates the data before loading it and checks for the possibility of code execution. This makes it much safer (although still not entirely safe) to use `pickle` to load data from untrusted sources.\n\nThis codemod replaces calls to `pickle.load` with `fickling.load` in Python code. It also adds an import statement for `fickling` if it is not already present. \n\nThe changes look like the following:\n```diff\n- import pickle\n+ import fickling\n \n- data = pickle.load(file)\n+ data = fickling.load(file)\n```\n", "references": [{"url": "https://docs.python.org/3/library/pickle.html", "description": "https://docs.python.org/3/library/pickle.html"}, {"url": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data", "description": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html#clear-box-review_1", "description": "https://cheatsheetseries.owasp.org/cheatsheets/Deserialization_Cheat_Sheet.html#clear-box-review_1"}, {"url": "https://github.com/trailofbits/fickling", "description": "https://github.com/trailofbits/fickling"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/harden-pyyaml", "summary": "Replace unsafe `pyyaml` loader with `SafeLoader`", "description": "The default loaders in PyYAML are not safe to use with untrusted data. They potentially make your application vulnerable to arbitrary code execution attacks. If you open a YAML file from an untrusted source, and the file is loaded with the default loader, an attacker could execute arbitrary code on your machine.\n\nThis codemod hardens all [`yaml.load()`](https://pyyaml.org/wiki/PyYAMLDocumentation) calls against such attacks by replacing the default loader with `yaml.SafeLoader`. This is the recommended loader for loading untrusted data. For most use cases it functions as a drop-in replacement for the default loader.\n\nCalling `yaml.load()` without an explicit loader argument is equivalent to calling it with `Loader=yaml.Loader`, which is unsafe. This usage [has been deprecated](https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input\\)-Deprecation) since PyYAML 5.1. This codemod will add an explicit `SafeLoader` argument to all `yaml.load()` calls that don't use an explicit loader.\n\nThe changes from this codemod look like the following:\n```diff\n  import yaml\n  data = b'!!python/object/apply:subprocess.Popen \\\\n- ls'\n- deserialized_data = yaml.load(data, yaml.Loader)\n+ deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)\n```\n", "references": [{"url": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data", "description": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"}, {"url": "https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation", "description": "https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input)-Deprecation"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/lab_code/test.py", "diff": "--- \n+++ \n@@ -17,7 +17,7 @@\n '''\n import yaml, subprocess\n stream = open('/home/fox/test.yaml', 'r')\n-data = yaml.load(stream)\n+data = yaml.load(stream, Loader=yaml.SafeLoader)\n \n '''\n stdout, stderr = data.communicate()\n", "changes": [{"lineNumber": "20", "description": "Replace unsafe `pyyaml` loader with `SafeLoader` in calls to `yaml.load` or custom loader classes.", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -548,7 +548,7 @@\n             try :\n                 file=request.FILES[\"file\"]\n                 try :\n-                    data = yaml.load(file,yaml.Loader)\n+                    data = yaml.load(file,yaml.SafeLoader)\n                     \n                     return render(request,\"Lab/A9/a9_lab.html\",{\"data\":data})\n                 except:\n", "changes": [{"lineNumber": "551", "description": "Replace unsafe `pyyaml` loader with `SafeLoader` in calls to `yaml.load` or custom loader classes.", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/harden-ruamel", "summary": "Use `typ='safe'` in ruamel.yaml() Calls", "description": "This codemod hardens any unsafe [`ruamel.yaml.YAML()`](https://yaml.readthedocs.io/en/latest/) calls against attacks that could result from deserializing untrusted data.\n\nThe fix uses a safety check that already exists in the `ruamel` module, replacing an unsafe `typ` argument with `typ=\"safe\"`.\nThe changes from this codemod look like this:\n\n```diff\n  from ruamel.yaml import YAML\n- serializer = YAML(typ=\"unsafe\")\n- serializer = YAML(typ=\"base\")\n+ serializer = YAML(typ=\"safe\")\n+ serializer = YAML(typ=\"safe\")\n```\n", "references": [{"url": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data", "description": "https://owasp.org/www-community/vulnerabilities/Deserialization_of_untrusted_data"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/https-connection", "summary": "Enforce HTTPS Connection for `urllib3`", "description": "This codemod replaces calls to `urllib3.connectionpool.HTTPConnectionPool` and `urllib3.HTTPConnectionPool` with their secure variant (`HTTPSConnectionPool`).\n\nProgrammers should opt to use HTTPS over HTTP for secure encrypted communication whenever possible.\n\n```diff\nimport urllib3\n- urllib3.HTTPConnectionPool(\"www.example.com\",\"80\")\n+ urllib3.HTTPSConnectionPool(\"www.example.com\",\"80\")\n```\n", "references": [{"url": "https://owasp.org/www-community/vulnerabilities/Insecure_Transport", "description": "https://owasp.org/www-community/vulnerabilities/Insecure_Transport"}, {"url": "https://urllib3.readthedocs.io/en/stable/reference/urllib3.connectionpool.html#urllib3.HTTPConnectionPool", "description": "https://urllib3.readthedocs.io/en/stable/reference/urllib3.connectionpool.html#urllib3.HTTPConnectionPool"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/jwt-decode-verify", "summary": "Verify JWT Decode", "description": "This codemod ensures calls to [jwt.decode](https://pyjwt.readthedocs.io/en/stable/api.html#jwt.decode) do not disable signature validation and other verifications. It checks that both the `verify` parameter (soon to be deprecated) and any `verify` key in the `options` dict parameter are not assigned to `False`.\n\nOur change looks as follows:\n\n```diff\n  import jwt\n  ...\n- decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=[\"HS256\"], verify=False)\n+ decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=[\"HS256\"], verify=True)\n  ...\n- decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=[\"HS256\"], options={\"verify_signature\": False, \"verify_exp\": False})\n+ decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=[\"HS256\"], options={\"verify_signature\": True, \"verify_exp\": True})\n```\n\nAny `verify` parameter not listed relies on the secure `True` default value.\n", "references": [{"url": "https://pyjwt.readthedocs.io/en/stable/api.html", "description": "https://pyjwt.readthedocs.io/en/stable/api.html"}, {"url": "https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/06-Session_Management_Testing/10-Testing_JSON_Web_Tokens", "description": "https://owasp.org/www-project-web-security-testing-guide/latest/4-Web_Application_Security_Testing/06-Session_Management_Testing/10-Testing_JSON_Web_Tokens"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/limit-readline", "summary": "Limit readline()", "description": "This codemod hardens all [`readline()`](https://docs.python.org/3/library/io.html#io.IOBase.readline) calls from file objects returned from an `open()` call, `StringIO` and `BytesIO` against denial of service attacks. A stream influenced by an attacker could keep providing bytes until the system runs out of memory, causing a crash.\n\nFixing it is straightforward by providing adding a size argument to any `readline()` calls.\nThe changes from this codemod look like this:\n\n```diff\n  file = open('some_file.txt')\n- file.readline()\n+ file.readline(5_000_000)\n```\n", "references": [{"url": "https://cwe.mitre.org/data/definitions/400.html", "description": "https://cwe.mitre.org/data/definitions/400.html"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/safe-lxml-parser-defaults", "summary": "Use Safe Defaults for `lxml` Parsers", "description": "This codemod configures safe parameter values when initializing `lxml.etree.XMLParser`, `lxml.etree.ETCompatXMLParser`, `lxml.etree.XMLTreeBuilder`, or `lxml.etree.XMLPullParser`. If parameters `resolve_entities`, `no_network`, and `dtd_validation` are not set to safe values, your code may be vulnerable to entity expansion attacks and external entity (XXE) attacks.\n\nParameters `no_network` and `dtd_validation` have safe default values of `True` and `False`, respectively, so this codemod will set each to the default safe value if your code has assigned either to an unsafe value.\n\nParameter `resolve_entities` has an unsafe default value of `True`. This codemod will set `resolve_entities=False` if set to `True` or omitted.\n\nThe changes look as follows:\n\n```diff\n  import lxml.etree\n\n- parser = lxml.etree.XMLParser()\n- parser = lxml.etree.XMLParser(resolve_entities=True)\n- parser = lxml.etree.XMLParser(resolve_entities=True, no_network=False, dtd_validation=True)\n+ parser = lxml.etree.XMLParser(resolve_entities=False)\n+ parser = lxml.etree.XMLParser(resolve_entities=False)\n+ parser = lxml.etree.XMLParser(resolve_entities=False, no_network=True, dtd_validation=False)\n```\n", "references": [{"url": "https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser", "description": "https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser"}, {"url": "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing", "description": "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html", "description": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/safe-lxml-parsing", "summary": "Use Safe Parsers in `lxml` Parsing Functions", "description": "This codemod sets the `parser` parameter in calls to  `lxml.etree.parse`  and `lxml.etree.fromstring` if omitted or set to `None` (the default value). Unfortunately, the default `parser=None` means `lxml` will rely on an unsafe parser, making your code potentially vulnerable to entity expansion attacks and external entity (XXE) attacks.\n\nThe changes look as follows:\n\n```diff\n  import lxml.etree\n- lxml.etree.parse(\"path_to_file\")\n- lxml.etree.fromstring(\"xml_str\")\n+ lxml.etree.parse(\"path_to_file\", parser=lxml.etree.XMLParser(resolve_entities=False))\n+ lxml.etree.fromstring(\"xml_str\", parser=lxml.etree.XMLParser(resolve_entities=False))\n```\n", "references": [{"url": "https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser", "description": "https://lxml.de/apidoc/lxml.etree.html#lxml.etree.XMLParser"}, {"url": "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing", "description": "https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html", "description": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/sandbox-process-creation", "summary": "Sandbox Process Creation", "description": "This codemod sandboxes all instances of [subprocess.run](https://docs.python.org/3/library/subprocess.html#subprocess.run) and [subprocess.call](https://docs.python.org/3/library/subprocess.html#subprocess.call) to offer protection against attack.\n\nLeft unchecked, `subprocess.run` and `subprocess.call` can execute any arbitrary system command. If an attacker can control part of the strings used as program paths or arguments, they could execute arbitrary programs, install malware, and anything else they could do if they had a shell open on the application host.\n\nOur change introduces a sandbox which protects the application:\n\n```diff\n  import subprocess\n+ from security import safe_command\n  ...\n- subprocess.run(\"echo 'hi'\", shell=True)\n+ safe_command.run(subprocess.run, \"echo 'hi'\", shell=True)\n  ...\n- subprocess.call([\"ls\", \"-l\"])\n+ safe_command.call(subprocess.call, [\"ls\", \"-l\"])\n```\n\nThe default `safe_command` restrictions applied are the following:\n* **Prevent command chaining**. Many exploits work by injecting command separators and causing the shell to interpret a second, malicious command. The `safe_command` functions attempt to parse the given command, and throw a `SecurityException` if multiple commands are present.\n* **Prevent arguments targeting sensitive files.** There is little reason for custom code to target sensitive system files like `/etc/passwd`, so the sandbox prevents arguments that point to these files that may be targets for exfiltration.\n\nThere are [more options for sandboxing](https://github.com/pixee/python-security/blob/main/src/security/safe_command/api.py#L5) if you are interested in locking down system commands even more.\n\n## Dependency Updates\n\nThis codemod relies on an external dependency. We have automatically added this dependency to your project's `requirements.txt` file. \n\nThis library holds security tools for protecting Python API calls. \n\nThere are a number of places where Python project dependencies can be expressed, including `setup.py`, `pyproject.toml`, `setup.cfg`, and `requirements.txt` files. If this change is incorrect, or if you are using another packaging system such as `poetry`, it may be necessary for you to manually add the dependency to the proper location in your project.\n", "references": [{"url": "https://github.com/pixee/python-security/blob/main/src/security/safe_command/api.py", "description": "https://github.com/pixee/python-security/blob/main/src/security/safe_command/api.py"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/OS_Command_Injection_Defense_Cheat_Sheet.html", "description": "https://cheatsheetseries.owasp.org/cheatsheets/OS_Command_Injection_Defense_Cheat_Sheet.html"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/mitre.py", "diff": "--- \n+++ \n@@ -8,6 +8,8 @@\n import subprocess\n from .models import CSRF_user_tbl\n from django.views.decorators.csrf import csrf_exempt\n+from security import safe_command\n+\n # import os\n \n ## Mitre top1 | CWE:787\n@@ -227,7 +229,7 @@\n     return render(request, 'mitre/mitre_lab_17.html')\n \n def command_out(command):\n-    process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n+    process = safe_command.run(subprocess.Popen, command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n     return process.communicate()\n     \n \n", "changes": [{"lineNumber": "230", "description": "Replaces subprocess.{func} with more secure safe_command library functions.", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -39,6 +39,8 @@\n import logging\n import requests\n import re\n+from security import safe_command\n+\n #*****************************************Login and Registration****************************************************#\n \n def register(request):\n@@ -418,8 +420,7 @@\n             \n             try:\n                 # output=subprocess.check_output(command,shell=True,encoding=\"UTF-8\")\n-                process = subprocess.Popen(\n-                    command,\n+                process = safe_command.run(subprocess.Popen, command,\n                     shell=True,\n                     stdout=subprocess.PIPE, \n                     stderr=subprocess.PIPE)\n", "changes": [{"lineNumber": "421", "description": "Replaces subprocess.{func} with more secure safe_command library functions.", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "uninstaller.py", "diff": "--- \n+++ \n@@ -6,6 +6,7 @@\n import colorama\n import subprocess\n from shutil import rmtree, which\n+from security import safe_command\n \n \n # Platform indepent way to check if user is admin\n@@ -37,7 +38,7 @@\n         # It is important to upgrade pip first to avoid environment errors\n         if (platform.system != 'Windows'):\n             pip_v = \"pip3\" if (which('pip3') is not None) else \"pip\"\n-            subprocess.run([pip_v,\n+            safe_command.run(subprocess.run, [pip_v,\n                             \"install\",\n                             \"--upgrade\",\n                             \"pip\"],\n", "changes": [{"lineNumber": "40", "description": "Replaces subprocess.{func} with more secure safe_command library functions.", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "requirements.txt", "diff": "--- \n+++ \n@@ -31,3 +31,4 @@\n Werkzeug==2.1.2\n whitenoise==6.2.0\n zipp==3.8.0\n+security==1.2.1 \\\n    --hash=sha256:4ca5f8cfc6b836e2192a84bb5a28b72c17f3cd1abbfe3281f917394c6e6c9238 \\\n    --hash=sha256:0a9dc7b457330e6d0f92bdae3603fecb85394beefad0fd3b5058758a58781ded", "changes": [{"lineNumber": "34", "description": "This library holds security tools for protecting Python API calls.\n\nLicense: [MIT](https://opensource.org/license/MIT/) \u2705 [Open Source](https://github.com/pixee/python-security) \u2705 [More facts](https://pypi.org/project/security/)\n", "properties": {"contextual_description": true, "contextual_description_position": "right"}, "diffSide": "right", "packageActions": [{"action": "ADD", "result": "COMPLETED", "package": "security==1.2.1"}]}]}]}, {"codemod": "pixee:python/remove-future-imports", "summary": "Remove deprecated `__future__` imports", "description": "Many older codebases have `__future__` imports for forwards compatibility with features. As of this writing, all but one of those features is now stable in all currently supported versions of Python and so the imports are no longer needed. While such imports are harmless, they are also unnecessary and in most cases you probably just forgot to remove them. \n\nThis codemod removes all such `__future__` imports, preserving only those that are still necessary for forwards compatibility. \n\nOur changes look like the following:\n```diff\n import os\n-from __future__ import print_function\n\n print(\"HELLO\")\n```\n", "references": [{"url": "https://docs.python.org/3/library/__future__.html", "description": "https://docs.python.org/3/library/__future__.html"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/remove-unnecessary-f-str", "summary": "Remove Unnecessary F-strings", "description": "This codemod converts any f-strings without interpolated variables into regular strings.\nIn these cases the use of f-string is not necessary; a simple string literal is sufficient. \n\nWhile in some (extreme) cases we might expect a very modest performance\nimprovement, in general this is a fix that improves the overall cleanliness and\nquality of your code.\n\n```diff\n- var = f\"hello\"\n+ var = \"hello\"\n  ...\n```\n", "references": [{"url": "https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/f-string-without-interpolation.html", "description": "https://pylint.readthedocs.io/en/latest/user_guide/messages/warning/f-string-without-interpolation.html"}, {"url": "https://github.com/Instagram/LibCST/blob/main/libcst/codemod/commands/unnecessary_format_string.py", "description": "https://github.com/Instagram/LibCST/blob/main/libcst/codemod/commands/unnecessary_format_string.py"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/requests-verify", "summary": "Verify SSL Certificates for Requests.", "description": "This codemod checks that calls to the `requests` module API or the `httpx` library use `verify=True` or a path to a CA bundle to ensure TLS certificate validation.\n\nThe [requests documentation](https://requests.readthedocs.io/en/latest/api/) warns that the `verify` flag\n> When set to False, requests will accept any TLS certificate presented by the server, and will ignore hostname mismatches and/or expired certificates, which will make your application vulnerable to man-in-the-middle (MitM) attacks. Setting verify to False may be useful during local development or testing.\n\nSimilarly, setting `verify=False` when using the `httpx` library to make requests disables certificate verification.\n\nThe changes from this codemod look like this:\n\n\n```diff\n  import requests\n  \n- requests.get(\"www.google.com\", ...,verify=False)\n+ requests.get(\"www.google.com\", ...,verify=True)\n...\nimport httpx\n  \n- httpx.get(\"www.google.com\", ...,verify=False)\n+ httpx.get(\"www.google.com\", ...,verify=True)\n\n```\n\nThis codemod also checks other methods in the `requests` module and `httpx` library that accept a `verify` flag (e.g. `requests.post`, `httpx.AsyncClient`, etc.)\n", "references": [{"url": "https://requests.readthedocs.io/en/latest/api/", "description": "https://requests.readthedocs.io/en/latest/api/"}, {"url": "https://www.python-httpx.org/", "description": "https://www.python-httpx.org/"}, {"url": "https://owasp.org/www-community/attacks/Manipulator-in-the-middle_attack", "description": "https://owasp.org/www-community/attacks/Manipulator-in-the-middle_attack"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/secure-flask-cookie", "summary": "Use Safe Parameters in `flask` Response `set_cookie` Call", "description": "This codemod sets the most secure parameters when Flask applications call `set_cookie` on a response object. Without these parameters, your Flask\napplication cookies may be vulnerable to being intercepted and used to gain access to sensitive data.\n\nThe changes from this codemod look like this:\n\n```diff\n  from flask import Flask, session, make_response\n  app = Flask(__name__)\n  @app.route('/')\n    def index():\n      resp = make_response('Custom Cookie Set')\n    - resp.set_cookie('custom_cookie', 'value')\n    + resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n      return resp\n```\n", "references": [{"url": "https://flask.palletsprojects.com/en/3.0.x/api/#flask.Response.set_cookie", "description": "https://flask.palletsprojects.com/en/3.0.x/api/#flask.Response.set_cookie"}, {"url": "https://owasp.org/www-community/controls/SecureCookieAttribute", "description": "https://owasp.org/www-community/controls/SecureCookieAttribute"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/secure-random", "summary": "Secure Source of Randomness", "description": "This codemod replaces all instances of functions in the `random` module (e.g. `random.random()` with their, much more secure, equivalents from the `secrets` module (e.g. `secrets.SystemRandom().random()`).\n\nThere is significant algorithmic complexity in getting computers to generate genuinely unguessable random bits. The `random.random()` function uses a method of pseudo-random number generation that unfortunately emits fairly predictable numbers.\n\nIf the numbers it emits are predictable, then it's obviously not safe to use in cryptographic operations, file name creation, token construction, password generation, and anything else that's related to security. In fact, it may affect security even if it's not directly obvious.\n\nSwitching to a more secure version is simple and the changes look something like this:\n\n```diff\n- import random\n+ import secrets\n  ...\n- random.random()\n+ secrets.SystemRandom().random()\n```\n", "references": [{"url": "https://owasp.org/www-community/vulnerabilities/Insecure_Randomness", "description": "https://owasp.org/www-community/vulnerabilities/Insecure_Randomness"}, {"url": "https://docs.python.org/3/library/random.html", "description": "https://docs.python.org/3/library/random.html"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -6,7 +6,6 @@\n from requests.structures import CaseInsensitiveDict\n from django.contrib.auth import login,authenticate\n from django.contrib.auth.forms import UserCreationForm\n-import random\n import string\n import os\n from hashlib import md5\n@@ -16,7 +15,6 @@\n #*****************************************Lab Requirements****************************************************#\n \n from .models import  FAANG,info,login,comments,otp\n-from random import randint\n from xml.dom.pulldom import parseString, START_ELEMENT\n from xml.sax.handler import feature_external_ges\n from xml.sax import make_parser\n@@ -39,6 +37,8 @@\n import logging\n import requests\n import re\n+import secrets\n+\n #*****************************************Login and Registration****************************************************#\n \n def register(request):\n@@ -484,7 +484,7 @@\n def Otp(request):\n     if request.method==\"GET\":\n         email=request.GET.get('email')\n-        otpN=randint(100,999)\n+        otpN=secrets.SystemRandom().randint(100,999)\n         if email and otpN:\n             if email==\"admin@pygoat.com\":\n                 otp.objects.filter(id=2).update(otp=otpN)\n@@ -668,7 +668,7 @@\n #*********************************************************A11*************************************************#\n \n def gentckt():\n-    return (''.join(random.choices(string.ascii_uppercase + string.ascii_lowercase, k=10)))\n+    return (''.join(secrets.SystemRandom().choices(string.ascii_uppercase + string.ascii_lowercase, k=10)))\n \n def insec_desgine(request):\n     if request.user.is_authenticated:\n", "changes": [{"lineNumber": "487", "description": "Replace random.{func} with more secure secrets library functions.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "671", "description": "Replace random.{func} with more secure secrets library functions.", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/secure-tempfile", "summary": "Upgrade and Secure Temp File Creation", "description": "This codemod replaces all `tempfile.mktemp` calls to the more secure `tempfile.mkstemp`.\n\nThe Python [tempfile documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) is explicit\nthat `tempfile.mktemp` should be deprecated to avoid an unsafe and unexpected race condition.\nThe changes from this codemod look like this:\n\n\n```diff\n  import tempfile\n- tempfile.mktemp(...)\n+ tempfile.mkstemp(...)\n```\n", "references": [{"url": "https://docs.python.org/3/library/tempfile.html#tempfile.mktemp", "description": "https://docs.python.org/3/library/tempfile.html#tempfile.mktemp"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/upgrade-sslcontext-minimum-version", "summary": "Upgrade SSLContext Minimum Version", "description": "This codemod replaces all unsafe and/or deprecated SSL/TLS versions when used\nto set the `ssl.SSLContext.minimum_version` attribute. It uses\n`ssl.TLSVersion.TLSv1_2` instead, which ensures a safe default minimum TLS\nversion.\n\nOur change involves modifying the `minimum_version` attribute of\n`ssl.SSLContext` instances to use `ssl.TLSVersion.TLSv1_2`.\n\n```diff\n  import ssl\n  context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT)\n- context.minimum_version = ssl.TLSVersion.SSLv3\n+ context.minimum_version = ssl.TLSVersion.TLSv1_2\n```\n\nThere is no functional difference between the unsafe and safe versions, and all modern servers offer TLSv1.2.\n", "references": [{"url": "https://docs.python.org/3/library/ssl.html#security-considerations", "description": "https://docs.python.org/3/library/ssl.html#security-considerations"}, {"url": "https://datatracker.ietf.org/doc/rfc8996/", "description": "https://datatracker.ietf.org/doc/rfc8996/"}, {"url": "https://www.digicert.com/blog/depreciating-tls-1-0-and-1-1", "description": "https://www.digicert.com/blog/depreciating-tls-1-0-and-1-1"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/upgrade-sslcontext-tls", "summary": "Upgrade TLS Version In SSLContext", "description": "This codemod replaces the use of all unsafe and/or deprecated SSL/TLS versions\nin the `ssl.SSLContext` constructor. It uses `PROTOCOL_TLS_CLIENT` instead,\nwhich ensures a safe default TLS version. It also sets the `protocol` parameter\nto `PROTOCOL_TLS_CLIENT` in calls without it, which is now deprecated.\n\nOur change involves modifying the argument to `ssl.SSLContext()` to\nuse `PROTOCOL_TLS_CLIENT`.\n\n```diff\n  import ssl\n- context = ssl.SSLContext()  \n+ context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT)\n- context = ssl.SSLContext(protocol=PROTOCOL_SSLv3)\n+ context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT)\n```\n\nThere is no functional difference between the unsafe and safe versions, and all modern servers offer TLSv1.2.\n\nThe use of explicit TLS versions (even safe ones) is deprecated by the `ssl`\nmodule, so it is necessary to choose either `PROTOCOL_TLS_CLIENT` or\n`PROTOCOL_TLS_SERVER`. Using `PROTOCOL_TLS_CLIENT` is expected to be the\ncorrect choice for most applications but in some cases it will be necessary to\nuse `PROTOCOL_TLS_SERVER` instead.\n", "references": [{"url": "https://docs.python.org/3/library/ssl.html#security-considerations", "description": "https://docs.python.org/3/library/ssl.html#security-considerations"}, {"url": "https://datatracker.ietf.org/doc/rfc8996/", "description": "https://datatracker.ietf.org/doc/rfc8996/"}, {"url": "https://www.digicert.com/blog/depreciating-tls-1-0-and-1-1", "description": "https://www.digicert.com/blog/depreciating-tls-1-0-and-1-1"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/url-sandbox", "summary": "Sandbox URL Creation", "description": "This codemod sandboxes calls to [`requests.get`](https://requests.readthedocs.io/en/latest/api/#requests.get) to be more resistant to Server-Side Request Forgery (SSRF) attacks.\n\nMost of the time when you make a `GET` request to a URL, you're intending to reference an HTTP endpoint, like an internal microservice. However, URLs can point to local file system files, a Gopher stream in your local network, a JAR file on a remote Internet site, and all kinds of other unexpected and undesirable outcomes. When the URL values are influenced by attackers, they can trick your application into fetching internal resources, running malicious code, or otherwise harming the system.\nConsider the following code for a Flask app:\n\n```python\nfrom flask import Flask, request\nimport requests\n\napp = Flask(__name__)\n\n@app.route(\"/request-url\")\ndef request_url():\n    url = request.args[\"loc\"]\n    resp = requests.get(url)\n    ...\n```\n\nIn this case, an attacker could supply a value like `\"http://169.254.169.254/user-data/\"` and attempt to access user information.\n\nOur changes introduce sandboxing around URL creation that force developers to specify some boundaries on the types of URLs they expect to create:\n\n```diff\n  from flask import Flask, request\n- import requests\n+ from security import safe_requests\n\n  app = Flask(__name__)\n\n  @app.route(\"/request-url\")\n  def request_url():\n    url = request.args[\"loc\"]\n-   resp = requests.get(url)\n+   resp = safe_requests.get(url)\n    ...\n```\n\nThis change alone reduces attack surface significantly because the default behavior of `safe_requests.get` raises a `SecurityException` if\na user attempts to access a known infrastructure location, unless specifically disabled.\n\n\nIf you have feedback on this codemod, [please let us know](mailto:feedback@pixee.ai)!\n\n## F.A.Q. \n\n### Why does this codemod require a Pixee dependency?\n\nWe always prefer to use built-in Python functions or one from a well-known and trusted community dependency. However, we cannot find any such control. If you know of one, [please let us know](https://ask.pixee.ai/feedback).\n\n### Why is this codemod marked as Merge After Cursory Review?\n\nBy default, the protection only weaves in 2 checks, which we believe will not cause any issues with the vast majority of code:\n1. The given URL must be HTTP/HTTPS.\n2. The given URL must not point to a \"well-known infrastructure target\", which includes things like AWS Metadata Service endpoints, and internal routers (e.g., 192.168.1.1) which are common targets of attacks.\n\nHowever, on rare occasions an application may use a URL protocol like \"file://\" or \"ftp://\" in backend or middleware code.\n\nIf you want to allow those protocols, change the incoming PR to look more like this and get the best security possible:\n\n```diff\n-resp = requests.get(url)\n+resp = safe_requests.get(url, allowed_protocols=(\"ftp\",))\n```\n\n## Dependency Updates\n\nThis codemod relies on an external dependency. However, we were unable to automatically add the dependency to your project. \n\nThis library holds security tools for protecting Python API calls. \n\nThere are a number of places where Python project dependencies can be expressed, including `setup.py`, `pyproject.toml`, `setup.cfg`, and `requirements.txt` files. You may need to manually add this dependency to the proper location in your project.\n\n### Manual Installation\n\nFor `setup.py`:\n```diff\n install_requires=[\n+    \"security==1.2.1\",\n ],\n```\n\nFor `pyproject.toml` (using `setuptools`):\n```diff\n [project]\n dependencies = [\n+    \"security==1.2.1\",\n ]\n```\n\nFor `setup.cfg`:\n```diff\n [options]\n install_requires =\n+    security==1.2.1\n```\n\nFor `requirements.txt`:\n```diff\n+security==1.2.1\n```\n\nFor more information on adding dependencies to `setuptools` projects, see [the setuptools documentation](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#declaring-required-dependency). \n\nIf you are using another build system, please refer to the documentation for that system to determine how to add dependencies.\n", "references": [{"url": "https://github.com/pixee/python-security/blob/main/src/security/safe_requests/api.py", "description": "https://github.com/pixee/python-security/blob/main/src/security/safe_requests/api.py"}, {"url": "https://portswigger.net/web-security/ssrf", "description": "https://portswigger.net/web-security/ssrf"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html", "description": "https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"}, {"url": "https://www.rapid7.com/blog/post/2021/11/23/owasp-top-10-deep-dive-defending-against-server-side-request-forgery/", "description": "https://www.rapid7.com/blog/post/2021/11/23/owasp-top-10-deep-dive-defending-against-server-side-request-forgery/"}, {"url": "https://blog.assetnote.io/2021/01/13/blind-ssrf-chains/", "description": "https://blog.assetnote.io/2021/01/13/blind-ssrf-chains/"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/playground/A6/soln.py", "diff": "--- \n+++ \n@@ -1,11 +1,11 @@\n-import requests\n+from security import safe_requests\n \n def check_vuln(list_of_modules)->list:\n     vulns = []\n     for i in list_of_modules:\n         k = i.split(\"==\")\n         url = f\"https://pypi.org/pypi/{k[0]}/{k[1]}/json\"\n-        response = requests.get(url)\n+        response = safe_requests.get(url)\n         response.raise_for_status()\n         info = response.json()\n         existing_vuln = info['vulnerabilities']\n", "changes": [{"lineNumber": "8", "description": "Switch use of requests for security.safe_requests", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/playground/A6/utility.py", "diff": "--- \n+++ \n@@ -1,11 +1,11 @@\n-import requests\n+from security import safe_requests\n \n def check_vuln(list_of_modules)->list:\n     vulns = []\n     for i in list_of_modules:\n         k = i.split(\"==\")\n         url = f\"https://pypi.org/pypi/{k[0]}/{k[1]}/json\"\n-        response = requests.get(url)\n+        response = safe_requests.get(url)\n         response.raise_for_status()\n         info = response.json()\n         existing_vuln = info['vulnerabilities']\n", "changes": [{"lineNumber": "8", "description": "Switch use of requests for security.safe_requests", "properties": {}, "diffSide": "right", "packageActions": []}]}, {"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -1,11 +1,8 @@\n import hashlib\n from django.shortcuts import render,redirect\n-from django.http import HttpResponse, HttpResponseBadRequest, JsonResponse\n-from .models import  FAANG, AF_session_id,info,login,comments,authLogin, tickits, sql_lab_table,Blogs,CF_user,AF_admin\n-from django.core import serializers\n-from requests.structures import CaseInsensitiveDict\n-from django.contrib.auth import login,authenticate\n-from django.contrib.auth.forms import UserCreationForm\n+from django.http import HttpResponse, HttpResponseBadRequest\n+from .models import  FAANG, AF_session_id,login,comments,authLogin, tickits, sql_lab_table,Blogs,CF_user,AF_admin\n+from django.contrib.auth import login\n import random\n import string\n import os\n@@ -15,19 +12,17 @@\n from django.contrib import messages\n #*****************************************Lab Requirements****************************************************#\n \n-from .models import  FAANG,info,login,comments,otp\n+from .models import  FAANG,login,comments,otp\n from random import randint\n from xml.dom.pulldom import parseString, START_ELEMENT\n from xml.sax.handler import feature_external_ges\n from xml.sax import make_parser\n from django.views.decorators.csrf import csrf_exempt\n-from django.template import loader\n from django.template.loader import render_to_string\n import subprocess\n import pickle\n import base64\n import yaml\n-import json\n from dataclasses import dataclass\n import uuid\n from .utility import filter_blog, customHash\n@@ -37,8 +32,9 @@\n from io import BytesIO\n from argon2 import PasswordHasher\n import logging\n-import requests\n import re\n+from security import safe_requests\n+\n #*****************************************Login and Registration****************************************************#\n \n def register(request):\n@@ -951,7 +947,7 @@\n     elif request.method == \"POST\":\n         url = request.POST[\"url\"]\n         try:\n-            response = requests.get(url)\n+            response = safe_requests.get(url)\n             return render(request, \"Lab/ssrf/ssrf_lab2.html\", {\"response\": response.content.decode()})\n         except:\n             return render(request, \"Lab/ssrf/ssrf_lab2.html\", {\"error\": \"Invalid URL\"})\n", "changes": [{"lineNumber": "954", "description": "Switch use of requests for security.safe_requests", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/use-defusedxml", "summary": "Use `defusedxml` for Parsing XML", "description": "You might be surprised to learn that Python's built-in XML libraries are [considered insecure](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) against various kinds of attacks.\n\nIn fact, the [Python documentation itself](https://docs.python.org/3/library/xml.html#the-defusedxml-package) recommends the use of [defusedxml](https://pypi.org/project/defusedxml/) for parsing untrusted XML data. `defusedxml` is an [open-source](https://github.com/tiran/defusedxml), permissively licensed project that is intended as a drop-in replacement for Python's standard library XML parsers.\n\nThis codemod updates all relevant uses of the standard library parsers with safe versions from `defusedxml`. It also adds the `defusedxml` dependency to your project where possible.\n\nThe changes from this codemod look like this:\n```diff\n- from xml.etree.ElementTree import parse\n+ import defusedxml.ElementTree\n\n- et = parse('data.xml')\n+ et = defusedxml.ElementTree.parse('data.xml')\n```\n\n## Dependency Updates\n\nThis codemod relies on an external dependency. However, we were unable to automatically add the dependency to your project. \n\nThis package is [recommended by the Python community](https://docs.python.org/3/library/xml.html#the-defusedxml-package) to protect against XML vulnerabilities. \n\nThere are a number of places where Python project dependencies can be expressed, including `setup.py`, `pyproject.toml`, `setup.cfg`, and `requirements.txt` files. You may need to manually add this dependency to the proper location in your project.\n\n### Manual Installation\n\nFor `setup.py`:\n```diff\n install_requires=[\n+    \"defusedxml==0.7.1\",\n ],\n```\n\nFor `pyproject.toml` (using `setuptools`):\n```diff\n [project]\n dependencies = [\n+    \"defusedxml==0.7.1\",\n ]\n```\n\nFor `setup.cfg`:\n```diff\n [options]\n install_requires =\n+    defusedxml==0.7.1\n```\n\nFor `requirements.txt`:\n```diff\n+defusedxml==0.7.1\n```\n\nFor more information on adding dependencies to `setuptools` projects, see [the setuptools documentation](https://setuptools.pypa.io/en/latest/userguide/dependency_management.html#declaring-required-dependency). \n\nIf you are using another build system, please refer to the documentation for that system to determine how to add dependencies.\n", "references": [{"url": "https://docs.python.org/3/library/xml.html#xml-vulnerabilities", "description": "https://docs.python.org/3/library/xml.html#xml-vulnerabilities"}, {"url": "https://docs.python.org/3/library/xml.html#the-defusedxml-package", "description": "https://docs.python.org/3/library/xml.html#the-defusedxml-package"}, {"url": "https://pypi.org/project/defusedxml/", "description": "https://pypi.org/project/defusedxml/"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html", "description": "https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -17,9 +17,8 @@\n \n from .models import  FAANG,info,login,comments,otp\n from random import randint\n-from xml.dom.pulldom import parseString, START_ELEMENT\n+from xml.dom.pulldom import START_ELEMENT\n from xml.sax.handler import feature_external_ges\n-from xml.sax import make_parser\n from django.views.decorators.csrf import csrf_exempt\n from django.template import loader\n from django.template.loader import render_to_string\n@@ -39,6 +38,9 @@\n import logging\n import requests\n import re\n+import defusedxml.pulldom\n+import defusedxml.sax\n+\n #*****************************************Login and Registration****************************************************#\n \n def register(request):\n@@ -247,9 +249,9 @@\n @csrf_exempt\n def xxe_parse(request):\n \n-    parser = make_parser()\n+    parser = defusedxml.sax.make_parser()\n     parser.setFeature(feature_external_ges, True)\n-    doc = parseString(request.body.decode('utf-8'), parser=parser)\n+    doc = defusedxml.pulldom.parseString(request.body.decode('utf-8'), parser=parser)\n     for event, node in doc:\n         if event == START_ELEMENT and node.tagName == 'text':\n             doc.expandNode(node)\n", "changes": [{"lineNumber": "250", "description": "Replace builtin XML method with safe `defusedxml` method", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "252", "description": "Replace builtin XML method with safe `defusedxml` method", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/use-generator", "summary": "Use Generator Expressions Instead of List Comprehensions", "description": "Imagine that someone handed you a pile of 100 apples and then asked you to count how many of them were green without putting any of them down. You'd probably find this quite challenging and you'd struggle to hold the pile of apples at all. Now imagine someone handed you the apples one at a time and asked you to just count the green ones. This would be a much easier task.\n\nIn Python, when we use list comprehensions, it's like we've created the entire pile of apples and asked the interpreter to hold onto it. Sometimes, a better practice involves using generator expressions, which create iterators that yield objects one at a time. For large data sets, this can turn a slow, memory intensive operation into a relatively fast one.\n\nUsing generator expressions instead of list comprehensions can lead to better performance. This is especially true for functions such as `any` where it's not always necessary to evaluate the entire list before returning. For other functions such as `max` or `sum` it means that the program does not need to store the entire list in memory. These performance effects becomes more noticeable as the sizes of the lists involved grow large.\n\nThis codemod replaces the use of a list comprehension expression with a generator expression within certain function calls. Generators allow for lazy evaluation of the iterator, which can have performance benefits.\n\nThe changes from this codemod look like this:\n```diff\n- result = sum([x for x in range(1000)])\n+ result = sum(x for x in range(1000))\n```\n", "references": [{"url": "https://pylint.readthedocs.io/en/latest/user_guide/messages/refactor/use-a-generator.html", "description": "https://pylint.readthedocs.io/en/latest/user_guide/messages/refactor/use-a-generator.html"}, {"url": "https://docs.python.org/3/glossary.html#term-generator-expression", "description": "https://docs.python.org/3/glossary.html#term-generator-expression"}, {"url": "https://docs.python.org/3/glossary.html#term-list-comprehension", "description": "https://docs.python.org/3/glossary.html#term-list-comprehension"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/use-set-literal", "summary": "Use Set Literals Instead of Sets from Lists", "description": "This codemod converts Python set constructions using literal list arguments into more efficient and readable set literals. It simplifies expressions like `set([1, 2, 3])` to `{1, 2, 3}`, enhancing both performance and code clarity.\n\nOur changes look like this:\n```diff\n-x = set([1, 2, 3])\n+x = {1, 2, 3}\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/use-walrus-if", "summary": "Use Assignment Expression (Walrus) In Conditional", "description": "This codemod updates places where two separate statements involving an assignment and conditional can be replaced with a single Assignment Expression (commonly known as the walrus operator).\n\nMany developers use this operator in new code that they write but don't have the time to find and update every place in existing code. So we do it for you! We believe this leads to more concise and readable code.\n\nThe changes from this codemod look like this:\n\n```diff\n- x = foo()\n- if x is not None:\n+ if (x := foo()) is not None:\n      print(x)\n```\n", "references": [{"url": "https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions", "description": "https://docs.python.org/3/whatsnew/3.8.html#assignment-expressions"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -524,8 +524,7 @@\n         return redirect('login')\n \n def secret(request):\n-    XHost = request.headers.get('X-Host', 'None')\n-    if(XHost == 'admin.localhost:8000'):\n+    if((XHost := request.headers.get('X-Host', 'None')) == 'admin.localhost:8000'):\n         return render(request,\"Lab/sec_mis/sec_mis_lab.html\", {\"secret\": \"S3CR37K3Y\"})\n     else:\n         return render(request,\"Lab/sec_mis/sec_mis_lab.html\", {\"no_secret\": \"Only admin.localhost:8000 can access, Your X-Host is \" + XHost})\n@@ -931,9 +930,8 @@\n \n \n def ssrf_target(request):\n-    x_forwarded_for = request.META.get('HTTP_X_FORWARDED_FOR')\n-\n-    if x_forwarded_for:\n+\n+    if x_forwarded_for := request.META.get('HTTP_X_FORWARDED_FOR'):\n         ip = x_forwarded_for.split(',')[0]\n     else:\n         ip = request.META.get('REMOTE_ADDR')\n", "changes": [{"lineNumber": "94", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "107", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "205", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "527", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "639", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "650", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "934", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "1137", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}, {"lineNumber": "1173", "description": "Replaces multiple expressions involving `if` operator with 'walrus' operator.", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/bad-lock-with-statement", "summary": "Separate Lock Instantiation from `with` Call", "description": "This codemod separates creating a threading lock instance from calling it as a context manager. Calling `with threading.Lock()` does not have the effect you would expect. The lock is not acquired. Instead, to correctly acquire a lock, create the instance separately, before calling it as a context manager.\n\nThe change will apply to any of these `threading` classes: `Lock`, `RLock`, `Condition`, `Semaphore`, and `BoundedSemaphore`.\n\nThe change looks like this:\n\n```diff\n  import threading\n- with threading.Lock():\n+ lock = threading.Lock()\n+ with lock:\n     ...\n```\n", "references": [{"url": "https://pylint.pycqa.org/en/latest/user_guide/messages/warning/useless-with-lock.", "description": "https://pylint.pycqa.org/en/latest/user_guide/messages/warning/useless-with-lock."}, {"url": "https://docs.python.org/3/library/threading.html#using-locks-conditions-and-semaphores-in-the-with-statement", "description": "https://docs.python.org/3/library/threading.html#using-locks-conditions-and-semaphores-in-the-with-statement"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/sql-parameterization", "summary": "Parameterize SQL Queries", "description": "This codemod refactors SQL statements to be parameterized, rather than built by hand.\n\nWithout parameterization, developers must remember to escape string inputs using the rules for that column type and database. This usually results in bugs -- and sometimes vulnerabilities. Although we can't tell for sure if your code is actually exploitable, this change will make the code more robust in case the conditions which prevent exploitation today ever go away.\n\nOur changes look something like this:\n\n```diff\nimport sqlite3\n\nname = input()\nconnection = sqlite3.connect(\"my_db.db\")\ncursor = connection.cursor()\n- cursor.execute(\"SELECT * from USERS WHERE name ='\" + name + \"'\")\n+ cursor.execute(\"SELECT * from USERS WHERE name =?\", (name, ))\n```\n", "references": [{"url": "https://cwe.mitre.org/data/definitions/89.html", "description": "https://cwe.mitre.org/data/definitions/89.html"}, {"url": "https://owasp.org/www-community/attacks/SQL_Injection", "description": "https://owasp.org/www-community/attacks/SQL_Injection"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/secure-flask-session-configuration", "summary": "Flip Insecure `Flask` Session Configurations", "description": "Flask applications can configure sessions behavior at the application level. \nThis codemod looks for Flask application configuration that set `SESSION_COOKIE_HTTPONLY`, `SESSION_COOKIE_SECURE`, or `SESSION_COOKIE_SAMESITE` to an insecure value and changes it to a secure one.\n\nThe changes from this codemod look like this:\n\n```diff\n  from flask import Flask\n  app = Flask(__name__)\n- app.config['SESSION_COOKIE_HTTPONLY'] = False\n- app.config.update(SESSION_COOKIE_SECURE=False)\n+ app.config['SESSION_COOKIE_HTTPONLY'] = True\n+ app.config.update(SESSION_COOKIE_SECURE=True)\n```\n", "references": [{"url": "https://owasp.org/www-community/controls/SecureCookieAttribute", "description": "https://owasp.org/www-community/controls/SecureCookieAttribute"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/Session_Management_Cheat_Sheet.html", "description": "https://cheatsheetseries.owasp.org/cheatsheets/Session_Management_Cheat_Sheet.html"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/subprocess-shell-false", "summary": "Use `shell=False` in `subprocess` Function Calls", "description": "This codemod sets the `shell` keyword argument to `False` in `subprocess` module function calls that have set it to `True`.\n\nSetting `shell=True` will execute the provided command through the system shell which can lead to shell injection vulnerabilities. In the worst case this can give an attacker the ability to run arbitrary commands on your system. In most cases using `shell=False` is sufficient and leads to much safer code.\n\nThe changes from this codemod look like this:\n\n```diff\n import subprocess\n- subprocess.run(\"echo 'hi'\", shell=True)\n+ subprocess.run(\"echo 'hi'\", shell=False)\n```\n", "references": [{"url": "https://docs.python.org/3/library/subprocess.html#security-considerations", "description": "https://docs.python.org/3/library/subprocess.html#security-considerations"}, {"url": "https://en.wikipedia.org/wiki/Code_injection#Shell_injection", "description": "https://en.wikipedia.org/wiki/Code_injection#Shell_injection"}, {"url": "https://stackoverflow.com/a/3172488", "description": "https://stackoverflow.com/a/3172488"}], "properties": {}, "failedFiles": [], "changeset": [{"path": "introduction/views.py", "diff": "--- \n+++ \n@@ -420,8 +420,7 @@\n                 # output=subprocess.check_output(command,shell=True,encoding=\"UTF-8\")\n                 process = subprocess.Popen(\n                     command,\n-                    shell=True,\n-                    stdout=subprocess.PIPE, \n+                    shell=False, stdout=subprocess.PIPE, \n                     stderr=subprocess.PIPE)\n                 stdout, stderr = process.communicate()\n                 data = stdout.decode('utf-8')\n", "changes": [{"lineNumber": "421", "description": "Set `shell` keyword argument to `False`", "properties": {}, "diffSide": "right", "packageActions": []}]}]}, {"codemod": "pixee:python/fix-file-resource-leak", "summary": "Automatically Close Resources", "description": "This codemod wraps assignments of `open` calls in a with statement. Without explicit closing, these resources will be \"leaked\" and won't be re-claimed until garbage collection. In situations where these resources are leaked rapidly (either through malicious repetitive action or unusually spiky usage), connection pool or file handle exhaustion will occur. These types of failures tend to be catastrophic, resulting in downtime and many times affect downstream applications.\n\nOur changes look something like this:\n\n```diff\nimport tempfile\npath = tempfile.NamedTemporaryFile().name\n-file = open(path, 'w', encoding='utf-8')\n-file.write('Hello World')\n+with open(path, 'w', encoding='utf-8') as file:\n+   file.write('Hello World')\n```\n", "references": [{"url": "https://cwe.mitre.org/data/definitions/772.html", "description": "https://cwe.mitre.org/data/definitions/772.html"}, {"url": "https://cwe.mitre.org/data/definitions/404.html", "description": "https://cwe.mitre.org/data/definitions/404.html"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/django-receiver-on-top", "summary": "Ensure Django @receiver is the first decorator", "description": "Django uses signals to notify and handle actions that happens elsewhere in the application. You can define a response to a given signal by decorating a function with the `@receiver(signal)` decorator. The order in which the decorators are declared for this function is important. If the `@receiver` decorator is not on top, any decorators before it will be ignored. \nOur changes look something like this:\n\n```diff\nfrom django.dispatch import receiver\nfrom django.views.decorators.csrf import csrf_exempt\nfrom django.core.signals import request_finished\n\n+@receiver(request_finished)\n@csrf_exempt\n-@receiver(request_finished)\ndef foo():\n    pass\n```\n", "references": [{"url": "https://docs.djangoproject.com/en/4.1/topics/signals/", "description": "https://docs.djangoproject.com/en/4.1/topics/signals/"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/numpy-nan-equality", "summary": "Replace == comparison with numpy.isnan()", "description": "Comparisons against `numpy.nan` always result in `False`. Thus comparing an expression directly against `numpy.nan` is always unintended. The correct way to compare a value for `NaN` is to use the `numpy.isnan` function.\n\nOur changes look something like this:\n\n```diff\nimport numpy as np\n\na = np.nan\n-if a == np.nan:\n+if np.isnan(a):\n    pass\n```\n", "references": [{"url": "https://numpy.org/doc/stable/reference/constants.html#numpy.nan", "description": "https://numpy.org/doc/stable/reference/constants.html#numpy.nan"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/django-json-response-type", "summary": "Set content type to `application/json` for `django.http.HttpResponse` with JSON data", "description": "The default `content_type` for `HttpResponse` in Django is `'text/html'`. This is true even when the response contains JSON data.\nIf the JSON contains (unsanitized) user-supplied input, a malicious user may supply HTML code which leaves the application vulnerable to cross-site scripting (XSS). \nThis fix explicitly sets the response type to `application/json` when the response body is JSON data to avoid this vulnerability. Our changes look something like this:\n\n```diff\nfrom django.http import HttpResponse\nimport json\n\ndef foo(request):\n    json_response = json.dumps({ \"user_input\": request.GET.get(\"input\") })\n-    return HttpResponse(json_response)\n+    return HttpResponse(json_response, content_type=\"application/json\")\n```\n", "references": [{"url": "https://docs.djangoproject.com/en/4.0/ref/request-response/#django.http.HttpResponse.__init__", "description": "https://docs.djangoproject.com/en/4.0/ref/request-response/#django.http.HttpResponse.__init__"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html#output-encoding-for-javascript-contexts", "description": "https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html#output-encoding-for-javascript-contexts"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/flask-json-response-type", "summary": "Set content type to `application/json` for `flask.make_response` with JSON data", "description": "The default `mimetype` for `make_response` in Flask is `'text/html'`. This is true even when the response contains JSON data.\nIf the JSON contains (unsanitized) user-supplied input, a malicious user may supply HTML code which leaves the application vulnerable to cross-site scripting (XSS). \nThis fix explicitly sets the response type to `application/json` when the response body is JSON data to avoid this vulnerability. Our changes look something like this:\n\n```diff\nfrom flask import make_response, Flask\nimport json\n\napp = Flask(__name__)\n\n@app.route(\"/test\")\ndef foo(request):\n    json_response = json.dumps({ \"user_input\": request.GET.get(\"input\") })\n-   return make_response(json_response)\n+   return make_response(json_response, {'Content-Type':'application/json'})\n```\n", "references": [{"url": "https://flask.palletsprojects.com/en/2.3.x/patterns/javascript/#return-json-from-views", "description": "https://flask.palletsprojects.com/en/2.3.x/patterns/javascript/#return-json-from-views"}, {"url": "https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html#output-encoding-for-javascript-contexts", "description": "https://cheatsheetseries.owasp.org/cheatsheets/Cross_Site_Scripting_Prevention_Cheat_Sheet.html#output-encoding-for-javascript-contexts"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/exception-without-raise", "summary": "Ensure bare exception statements are raised", "description": "This codemod fixes cases where an exception is referenced by itself in a statement without being raised. This most likely indicates a bug: you probably meant to actually raise the exception. \n\nOur changes look something like this:\n```diff\ntry:\n-   ValueError\n+   raise ValueError\nexcept:\n    pass\n```\n", "references": [{"url": "https://docs.python.org/3/tutorial/errors.html#raising-exceptions", "description": "https://docs.python.org/3/tutorial/errors.html#raising-exceptions"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/literal-or-new-object-identity", "summary": "Replace `is` with `==` for literal or new object comparisons", "description": "The `is` and `is not` operators only evaluate to `True` when the expressions on each side have the same `id`. In other words, `a is b` is equivalent to `id(a) == id(b)`. With few exceptions, objects and literals have unique identities and thus shouldn't generally be compared by using the `is` or `is not` operators.\n\nOur changes look something like this:\n\n```diff\ndef foo(l):\n-    return l is [1,2,3]\n+    return l == [1,2,3]\n```\n", "references": [{"url": "https://docs.python.org/3/library/stdtypes.html#comparisons", "description": "https://docs.python.org/3/library/stdtypes.html#comparisons"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/remove-module-global", "summary": "Remove `global` Usage at Module Level", "description": "Using the `global` keyword is necessary only when you intend to modify a module-level (aka global) variable within a non-global scope, such as within a class or function. It is unnecessary to call `global` at the module-level.\n\nOur changes look something like this:\n\n```diff\n price = 25\n print(\"hello\")\n- global price\n price = 30\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/remove-debug-breakpoint", "summary": "Remove Calls to `builtin` `breakpoint` and `pdb.set_trace", "description": "This codemod removes any calls to `breakpoint()` or `pdb.set_trace()` which are generally only used for interactive debugging and should not be deployed in production code.\n\nIn most cases if these calls are included in committed code, they were left there by mistake and indicate a potential problem.\n\n```diff\n print(\"hello\")\n- breakpoint()\n print(\"world\")\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/combine-startswith-endswith", "summary": "Simplify Boolean Expressions Using `startswith` and `endswith`", "description": "Many developers are not necessarily aware that the `startswith` and `endswith` methods of `str` objects can accept a tuple of strings to match. This means that there is a lot of code that uses boolean expressions such as `x.startswith('foo') or x.startswith('bar')` instead of the simpler expression `x.startswith(('foo', 'bar'))`.\n\nThis codemod simplifies the boolean expressions where possible which leads to cleaner and more concise code.\n\nThe changes from this codemod look like this:\n\n```diff\n  x = 'foo'\n- if x.startswith(\"foo\") or x.startswith(\"bar\"):\n+ if x.startswith((\"foo\", \"bar\")):\n     ...\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/fix-deprecated-logging-warn", "summary": "Replace Deprecated `logging.warn`", "description": "The `warn` method from `logging` has been [deprecated](https://docs.python.org/3/library/logging.html#logging.Logger.warning) in favor of `warning` since Python 3.3. Since the old method `warn` has been retained for a long time, there are a lot of developers that are unaware of this change and consequently a lot of code using the older method.\n\nOur changes look like the following:\n```diff\n import logging\n\n- logging.warn(\"hello\")\n+ logging.warning(\"hello\")\n ...\n log = logging.getLogger(\"my logger\")\n- log.warn(\"hello\")\n+ log.warning(\"hello\") \n```\n", "references": [{"url": "https://docs.python.org/3/library/logging.html#logging.Logger.warning", "description": "https://docs.python.org/3/library/logging.html#logging.Logger.warning"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/flask-enable-csrf-protection", "summary": "Enable CSRF protection globally for a Flask app.", "description": "Cross-site request forgery (CSRF) is an attack where a user is tricked by a malicious agent to submit a unintended request (e.g login requests). A common way to mitigate this issue is to embed an additional token into requests to identify requests from unauthorized locations.\n\nFlask views using `FlaskForm` have CSRF protection enabled by default. However other views may use AJAX to perform unsafe HTTP methods. FlaskWTF provides a way to enable CSRF protection globally for all views of a Flask app.\n\nThe changes in this codemod may require manual additions to maintain proper functionality. You need to setup either a flask `SECRET_KEY` or a `WTF_CSRF_SECRET_KEY` in you app configuration and adjust any views with HTML forms and javascript requests to include the CSRF token. See the [FlaskWTF docs](https://flask-wtf.readthedocs.io/en/1.2.x/csrf/) for examples on how to do it.\n\nOur changes look something like this:\n\n```diff\nfrom flask import Flask\n+from flask_wtf.csrf import CSRFProtect\n\napp = Flask(__name__)\n+csrf_app = CSRFProtect(app)\n```\n", "references": [{"url": "https://owasp.org/www-community/attacks/csrf", "description": "https://owasp.org/www-community/attacks/csrf"}, {"url": "https://flask-wtf.readthedocs.io/en/1.2.x/csrf/", "description": "https://flask-wtf.readthedocs.io/en/1.2.x/csrf/"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/replace-flask-send-file", "summary": "Replace unsafe usage of `flask.send_file`", "description": "The `Flask` `send_file` function from Flask is susceptible to a path traversal attack if its input is not properly validated.\nIn a path traversal attack, the malicious agent can craft a path containing special paths like `./` or `../` to resolve a file outside of the expected directory path. This potentially allows the agent to overwrite, delete or read arbitrary files. In the case of `flask.send_file`, the result is that a malicious user could potentially download sensitive files that exist on the filesystem where the application is being hosted.\nFlask offers a native solution with the `flask.send_from_directory` function that validates the given path.\n\nOur changes look something like this:\n\n```diff\n-from flask import Flask, send_file\n+from flask import Flask\n+import flask\n+from pathlib import Path\n\napp = Flask(__name__)\n\n@app.route(\"/uploads/<path:name>\")\ndef download_file(name):\n-    return send_file(f'path/to/{name}.txt')\n+    return flask.send_from_directory((p := Path(f'path/to/{name}.txt')).parent, p.name)\n```\n", "references": [{"url": "https://flask.palletsprojects.com/en/3.0.x/api/#flask.send_from_directory", "description": "https://flask.palletsprojects.com/en/3.0.x/api/#flask.send_from_directory"}, {"url": "https://owasp.org/www-community/attacks/Path_Traversal", "description": "https://owasp.org/www-community/attacks/Path_Traversal"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/remove-assertion-in-pytest-raises", "summary": "Moves assertions out of `pytest.raises` scope", "description": "The context manager object `pytest.raises(<exception>)` will assert if the code contained within its scope will raise an exception of type `<exception>`. The documentation points that the exception must be raised in the last line of its scope and any line afterwards won't be executed. \nIncluding asserts at the end of the scope is a common error. This codemod addresses that by moving them out of the scope.\nOur changes look something like this:\n\n```diff\nimport pytest\n\ndef test_foo():\n    with pytest.raises(ZeroDivisionError):\n        error = 1/0\n-       assert 1\n-       assert 2\n+   assert 1\n+   assert 2\n```\n", "references": [{"url": "https://docs.pytest.org/en/7.4.x/reference/reference.html#pytest-raises", "description": "https://docs.pytest.org/en/7.4.x/reference/reference.html#pytest-raises"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/fix-assert-tuple", "summary": "Fix `assert` on Non-Empty Tuple Literal", "description": "An assertion on a non-empty tuple will always evaluate to `True`. This means that `assert` statements involving non-empty tuple literals are likely unintentional and should be rewritten. This codemod rewrites the original `assert` statement by creating a new `assert` for each item in the original tuple.\n\nThe changes from this codemod look like this:\n\n```diff\n- assert (1 == 1, 2 == 2)\n+ assert 1 == 1\n+ assert 2 == 2\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/lazy-logging", "summary": "Convert Eager Logging to Lazy Logging", "description": "This codemod converts \"eager\" logging into \"lazy\" logging, which is preferred for performance efficiency and resource optimization.\nLazy logging defers the actual construction and formatting of log messages until it's confirmed that the message will be logged based on the current log level, thereby avoiding unnecessary computation for messages that will not be logged. \n\nOur changes look something like this:\n\n```diff\nimport logging\ne = \"Some error\"\n- logging.error(\"Error occurred: %s\" % e)\n- logging.error(\"Error occurred: \" + e)\n+ logging.error(\"Error occurred: %s\", e)\n+ logging.error(\"Error occurred: %s\", e)\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/str-concat-in-sequence-literals", "summary": "Convert Implicit String Concat Inside Sequence into Individual Elements", "description": "This codemod fixes cases of implicit string concatenation inside lists, sets, or tuples. This is most likely a mistake: you probably meant include a comma in between the concatenated strings. \n\nOur changes look something like this:\n```diff\nbad = [\n-    \"ab\"\n+    \"ab\",\n     \"cd\",\n     \"ef\",\n-    \"gh\"\n+    \"gh\",\n     \"ij\",\n]\n```\n", "references": [], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/fix-async-task-instantiation", "summary": "Use High-Level `asyncio` API Functions to Create Tasks", "description": "The `asyncio` [documentation](https://docs.python.org/3/library/asyncio-task.html#asyncio.Task) explicitly discourages manual instantiation of a `Task` instance and instead recommends calling `create_task`. This keeps your code in line with recommended best practices and promotes maintainability.\n\nOur changes look like the following:\n```diff\n import asyncio\n\n- task = asyncio.Task(my_coroutine(), name=\"my task\")\n+ task = asyncio.create_task(my_coroutine(), name=\"my task\")\n```\n", "references": [{"url": "https://docs.python.org/3/library/asyncio-task.html#asyncio.Task", "description": "https://docs.python.org/3/library/asyncio-task.html#asyncio.Task"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/django-model-without-dunder-str", "summary": "Ensure Django Model Classes Implement a `__str__` Method", "description": "If you've ever actively developed or debugged a Django application, you may have noticed that the string representations of Django models and their instances can sometimes be hard to read or to distinguish from one another. Loading models in the interactive Django console or viewing them in the admin interface can be puzzling. This is because the default string representation of Django models is fairly generic.\n\nThis codemod is intended to make the string representation of your model objects more human-readable. It will automatically detect all of your model's fields and display them as a descriptive string.\n\nFor example, the default string representation of the `Question` model from Django's popular Poll App tutorial looks like this:\n```diff\nfrom django.db import models\n\nclass Question(models.Model):\n    question_text = models.CharField(max_length=200)\n    pub_date = models.DateTimeField(\"date published\")\n+ \n+    def __str__(self):\n+        model_name = self.__class__.__name__\n+        fields_str = \", \".join((f\"{field.name}={getattr(self, field.name)}\" for field in self._meta.fields))\n+        return f\"{model_name}({fields_str})\"\n```\n\nWithout this change, the  string representation of `Question` objects look like this in the interactive Django shell:\n```\n>>> Question.objects.all()\n<QuerySet [<Question: Question object (1)>]>\n```\nWith this codemod's addition of `__str__`, it now looks like:\n```\n>>> Question.objects.all()\n<QuerySet [<Question: Question(id=1, question_text=What's new?, pub_date=2024-02-21 14:28:45.631782+00:00)>]>\n```\n\nYou'll notice this change works great for models with only a handful of fields. We encourage you to use this codemod's change as a starting point for further customization.\n", "references": [{"url": "https://docs.djangoproject.com/en/5.0/ref/models/instances/#django.db.models.Model.__str__", "description": "https://docs.djangoproject.com/en/5.0/ref/models/instances/#django.db.models.Model.__str__"}], "properties": {}, "failedFiles": [], "changeset": []}, {"codemod": "pixee:python/fix-hasattr-call", "summary": "Use `callable` builtin to check for callables", "description": "This codemod fixes cases where `hasattr` is used to check if an object is a callable. You likely want to use `callable` instead. This is because using `hasattr` will return different results in some cases, such as when the class implements a `__getattr__` method. \n\nOur changes look something like this:\n```diff\n class Test:\n     pass\n\n obj = Test()\n- hasattr(obj, \"__call__\")\n+ callable(obj)\n```\n", "references": [{"url": "https://docs.python.org/3/library/functions.html#callable", "description": "https://docs.python.org/3/library/functions.html#callable"}, {"url": "https://docs.python.org/3/library/functions.html#hasattr", "description": "https://docs.python.org/3/library/functions.html#hasattr"}], "properties": {}, "failedFiles": [], "changeset": []}]}


--------------------------------------------------------------------------------
/examples/webgoat.codetf.json:
--------------------------------------------------------------------------------
1 | {"run":{"vendor":"io.codemodder","tool":"codemodder","version":"1.0.0","commandLine":"--codemod-include pixee:java/harden-xmlinputfactory,pixee:java/harden-zip-entry-paths,pixee:java/encode-jsp-scriptlet,pixee:java/fix-verb-tampering,pixee:java/secure-random,pixee:java/sql-parameterizer,pixee:java/limit-readline,pixee:java/validate-jakarta-forward-path,pixee:java/upgrade-sslengine-tls,pixee:java/upgrade-sslparameters-tls,pixee:java/upgrade-sslsocket-tls,pixee:java/upgrade-sslcontext-tls,pixee:java/harden-process-creation,pixee:java/make-prng-seed-unpredictable,pixee:java/harden-xmldecoder-stream,pixee:java/sandbox-url-creation,pixee:java/sanitize-apache-multipart-filename,pixee:java/sanitize-spring-multipart-filename,pixee:java/strip-http-header-newlines,pixee:java/harden-xstream,codeql:java/missing-jwt-signature-check,codeql:java/database-resource-leak,codeql:java/stack-trace-exposure,codeql:java/insecure-cookie,codeql:java/output-resource-leak,pixee:java/upgrade-tempfile-to-nio,pixee:java/switch-literal-first,pixee:java/prevent-filewriter-leak-with-nio,pixee:java/hql-parameterizer,pixee:java/disable-dircontext-deserialization,pixee:java/move-switch-default-last,pixee:java/verbose-request-mapping,pixee:java/add-clarifying-braces,semgrep:java/java.lang.security.audit.overly-permissive-file-permission.overly-permissive-file-permission,sonar:java/harden-string-parse-to-primitives-s2130,sonar:java/overrides-match-synchronization-s3551,sonar:java/remove-redundant-variable-creation-s1488,sonar:java/simplify-rest-controller-annotations-s6833,sonar:java/remove-commented-code-s125,sonar:java/remove-unused-private-method-s1144,sonar:java/declare-variable-on-separate-line-s1659,sonar:java/remove-unused-local-variable-s1481,sonar:java/define-constant-for-duplicate-literal-s1192,sonar:java/remove-useless-parentheses-s1110,sonar:java/avoid-implicit-public-constructor-s1118,sonar:java/replace-stream-collectors-to-list-s6204,sonar:java/add-missing-override-s1161,sonar:java/substitute-replaceAll-s5361,sonar:java/remove-redundant-static-s2786 --output-format codetf --output /mnt/code-workspaces/workspace-11795311773268099153/workspace/results/results.codetf.json /mnt/code-workspaces/workspace-11795311773268099153/workspace","elapsed":579153,"directory":"/mnt/code-workspaces/workspace-11795311773268099153/workspace","sarifs":[]},"results":[{"codemod":"pixee:java/harden-process-creation","summary":"Introduced protections against system command injection","description":"This change hardens all instances of [Runtime#exec()](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/lang/Runtime.html) to offer protection against attack.\n\nLeft unchecked, `Runtime#exec()` can execute any arbitrary system command. If an attacker can control part of the strings used to as program paths or arguments, they could execute arbitrary programs, install malware, and anything else they could do if they had a shell open on the application host.\n\nOur change introduces a sandbox which protects the application:\n\n```diff\n+ import io.github.pixee.security.SystemCommand;\n  ...\n- Process p = Runtime.getRuntime().exec(command);\n+ Process p = SystemCommand.runCommand(Runtime.getRuntime(), command);\n```\n\nThe default restrictions applied are the following:\n* **Prevent command chaining**. Many exploits work by injecting command separators and causing the shell to interpret a second, malicious command. The `SystemCommand#runCommand()` attempts to parse the given command, and throw a `SecurityException` if multiple commands are present.\n* **Prevent arguments targeting sensitive files.** There is little reason for custom code to target sensitive system files like `/etc/passwd`, so the sandbox prevents arguments that point to these files that may be targets for exfiltration.\n\nThere are [more options for sandboxing](https://github.com/pixee/java-security-toolkit/blob/main/src/main/java/io/github/pixee/security/SystemCommand.java#L15) if you are interested in locking down system commands even more.\n","failedFiles":[],"references":[{"url":"https://cheatsheetseries.owasp.org/cheatsheets/OS_Command_Injection_Defense_Cheat_Sheet.html","description":"https://cheatsheetseries.owasp.org/cheatsheets/OS_Command_Injection_Defense_Cheat_Sheet.html"},{"url":"https://wiki.sei.cmu.edu/confluence/display/java/IDS07-J.+Sanitize+untrusted+data+passed+to+the+Runtime.exec%28%29+method","description":"https://wiki.sei.cmu.edu/confluence/display/java/IDS07-J.+Sanitize+untrusted+data+passed+to+the+Runtime.exec%28%29+method"}],"properties":{},"changeset":[{"path":"src/main/java/org/dummy/insecure/framework/VulnerableTaskHolder.java","diff":"--- VulnerableTaskHolder.java\n+++ VulnerableTaskHolder.java\n@@ -1,5 +1,6 @@\n package org.dummy.insecure.framework;\n \n+import io.github.pixee.security.SystemCommand;\n import java.io.BufferedReader;\n import java.io.IOException;\n import java.io.InputStreamReader;\n@@ -62,7 +63,7 @@\n         && taskAction.length() < 22) {\n       log.info(\"about to execute: {}\", taskAction);\n       try {\n-        Process p = Runtime.getRuntime().exec(taskAction);\n+        Process p = SystemCommand.runCommand(Runtime.getRuntime(), taskAction);\n         BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));\n         String line = null;\n         while ((line = in.readLine()) != null) {","changes":[{"lineNumber":65,"properties":{},"description":"Hardened this system call to make it resistant to injected commands and commands that target sensitive files","diffSide":"left","packageActions":[{"action":"ADD","result":"COMPLETED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]},{"path":"pom.xml","diff":"--- pom.xml\n+++ pom.xml\n@@ -149,6 +149,7 @@\n     <xstream.version>1.4.5</xstream.version>\n     <!-- do not update necessary for lesson -->\n     <zxcvbn.version>1.8.0</zxcvbn.version>\n+    <versions.java-security-toolkit>1.1.2</versions.java-security-toolkit>\n   </properties>\n \n   <dependencyManagement>\n@@ -267,6 +268,11 @@\n         <artifactId>jruby</artifactId>\n         <version>9.4.3.0</version>\n       </dependency>\n+      <dependency>\n+        <groupId>io.github.pixee</groupId>\n+        <artifactId>java-security-toolkit</artifactId>\n+        <version>${versions.java-security-toolkit}</version>\n+      </dependency>\n     </dependencies>\n   </dependencyManagement>\n   <dependencies>\n@@ -444,6 +450,10 @@\n       <artifactId>spring-boot-properties-migrator</artifactId>\n       <scope>runtime</scope>\n     </dependency>\n+    <dependency>\n+      <groupId>io.github.pixee</groupId>\n+      <artifactId>java-security-toolkit</artifactId>\n+    </dependency>\n   </dependencies>\n \n   <repositories>","changes":[{"lineNumber":447,"properties":{"contextual_description":"true"},"description":"This library holds security tools for protecting Java API calls.\n\nLicense: MIT ✅ | [Open source](https://github.com/pixee/java-security-toolkit) ✅ | [More facts](https://mvnrepository.com/artifact/io.github.pixee/java-security-toolkit/1.1.2)\n","diffSide":"right","packageActions":[],"parameters":[]}]}]},{"codemod":"pixee:java/harden-xmlinputfactory","summary":"Introduced protections against XXE attacks","description":"This change updates all instances of [XMLInputFactory](https://docs.oracle.com/javase/8/docs/api/javax/xml/stream/XMLInputFactory.html) to prevent them from resolving external entities, which can protect you from arbitrary code execution, sensitive data exfiltration, and probably a bunch more evil things attackers are still discovering.\n\nWithout this protection, attackers can cause your `XMLInputFactory` parser to retrieve sensitive information with attacks like this:\n\n```xml\n<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE foo [ <!ENTITY xxe SYSTEM \"file:///etc/passwd\"> ]>\n<book>\n    <title>&xxe;</title>\n</book>\n```\n\nYes, it's pretty insane that this is the default behavior. Our change hardens the factories created with the necessary security features to prevent your parser from resolving external entities.\n\n```diff\n+ import io.github.pixee.security.XMLInputFactorySecurity;\n  ...\n- XMLInputFactory xmlInputFactory = XMLInputFactory.newFactory();\n+ XMLInputFactory xmlInputFactory = XMLInputFactorySecurity.hardenFactory(XMLInputFactory.newFactory());\n```\n\nYou could take our protections one step further by changing our supplied code to prevent the user from supplying a `DOCTYPE`, which is more aggressive and more secure, but also more likely to affect existing code behavior:\n```diff\n+ import io.github.pixee.security.XMLInputFactorySecurity;\n+ import io.github.pixee.security.XMLRestrictions;\n  ...\n  XMLInputFactory xmlInputFactory = XMLInputFactorySecurity.hardenFactory(XMLInputFactory.newFactory(), XMLRestrictions.DISALLOW_DOCTYPE);\n```\n","failedFiles":[],"references":[{"url":"https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html","description":"https://cheatsheetseries.owasp.org/cheatsheets/XML_External_Entity_Prevention_Cheat_Sheet.html"},{"url":"https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing","description":"https://owasp.org/www-community/vulnerabilities/XML_External_Entity_(XXE)_Processing"},{"url":"https://github.com/swisskyrepo/PayloadsAllTheThings/blob/master/XXE%20Injection/README.md","description":"https://github.com/swisskyrepo/PayloadsAllTheThings/blob/master/XXE%20Injection/README.md"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/lessons/xxe/CommentsCache.java","diff":"--- CommentsCache.java\n+++ CommentsCache.java\n@@ -22,6 +22,7 @@\n \n package org.owasp.webgoat.lessons.xxe;\n \n+import static io.github.pixee.security.XMLInputFactorySecurity.hardenFactory;\n import static java.util.Optional.empty;\n import static java.util.Optional.of;\n \n@@ -95,7 +96,7 @@\n    */\n   protected Comment parseXml(String xml) throws XMLStreamException, JAXBException {\n     var jc = JAXBContext.newInstance(Comment.class);\n-    var xif = XMLInputFactory.newInstance();\n+    var xif = hardenFactory(XMLInputFactory.newInstance());\n \n     if (webSession.isSecurityEnabled()) {\n       xif.setProperty(XMLConstants.ACCESS_EXTERNAL_DTD, \"\"); // Compliant","changes":[{"lineNumber":98,"properties":{},"description":"Hardened the XML processor to prevent external entities from being resolved, which can prevent data exfiltration and arbitrary code execution","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]}]},{"codemod":"pixee:java/harden-xstream","summary":"Hardened XStream with a converter to prevent exploitation","description":"This change hardens usage of the `XStream` library to prevent remote code execution attacks.\n\nXStream is a very flexible library, but it has a history of serious vulnerabilities when handling untrusted data because it was never intended for that use case. There are some fundamental issues with the design that make it difficult to make safe when using it by default.\n\nOur change hardens new instances of `XStream` so that they can't deserialize types that are commonly used in exploits (and never in normal usage) and it looks like this:\n\n```diff\n+ import io.github.pixee.security.xstream.HardeningConverter;\n  XStream xstream = new XStream();\n+ xstream.registerConverter(new HardeningConverter());\n  return (AcmeObject)xstream.fromXML(xml);\n```\n\nLooking at the [history of exploits](https://x-stream.github.io/security.html#CVEs) shows that this change will either stop most exploits or raise the bar of exploitation. If you believe there should be more types added to the denylist, please [fill out a ticket](https://github.com/pixee/java-security-toolkit/issues/new) with your suggestions.\n","failedFiles":[],"references":[{"url":"https://x-stream.github.io/security.html","description":"https://x-stream.github.io/security.html"},{"url":"http://diniscruz.blogspot.com/2013/12/xstream-remote-code-execution-exploit.html","description":"http://diniscruz.blogspot.com/2013/12/xstream-remote-code-execution-exploit.html"},{"url":"https://www.contrastsecurity.com/security-influencers/serialization-must-die-act-2-xstream","description":"https://www.contrastsecurity.com/security-influencers/serialization-must-die-act-2-xstream"},{"url":"https://x-stream.github.io/CVE-2013-7285.html","description":"https://x-stream.github.io/CVE-2013-7285.html"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/lessons/vulnerablecomponents/VulnerableComponentsLesson.java","diff":"--- VulnerableComponentsLesson.java\n+++ VulnerableComponentsLesson.java\n@@ -23,6 +23,7 @@\n package org.owasp.webgoat.lessons.vulnerablecomponents;\n \n import com.thoughtworks.xstream.XStream;\n+import io.github.pixee.security.xstream.HardeningConverter;\n import org.apache.commons.lang3.StringUtils;\n import org.owasp.webgoat.container.assignments.AssignmentEndpoint;\n import org.owasp.webgoat.container.assignments.AssignmentHints;\n@@ -39,6 +40,7 @@\n   @PostMapping(\"/VulnerableComponents/attack1\")\n   public @ResponseBody AttackResult completed(@RequestParam String payload) {\n     XStream xstream = new XStream();\n+    xstream.registerConverter(new HardeningConverter());\n     xstream.setClassLoader(Contact.class.getClassLoader());\n     xstream.alias(\"contact\", ContactImpl.class);\n     xstream.ignoreUnknownElements();","changes":[{"lineNumber":41,"properties":{},"description":"Added an XStream [Converter](https://x-stream.github.io/converter-tutorial.html) which prevents common exploits","diffSide":"left","packageActions":[{"action":"ADD","result":"COMPLETED","package":"pkg:maven/io.github.pixee/java-security-toolkit-xstream@1.0.2"}],"parameters":[]}]},{"path":"pom.xml","diff":"--- pom.xml\n+++ pom.xml\n@@ -150,6 +150,7 @@\n     <!-- do not update necessary for lesson -->\n     <zxcvbn.version>1.8.0</zxcvbn.version>\n     <versions.java-security-toolkit>1.1.2</versions.java-security-toolkit>\n+    <versions.java-security-toolkit-xstream>1.0.2</versions.java-security-toolkit-xstream>\n   </properties>\n \n   <dependencyManagement>\n@@ -273,6 +274,11 @@\n         <artifactId>java-security-toolkit</artifactId>\n         <version>${versions.java-security-toolkit}</version>\n       </dependency>\n+      <dependency>\n+        <groupId>io.github.pixee</groupId>\n+        <artifactId>java-security-toolkit-xstream</artifactId>\n+        <version>${versions.java-security-toolkit-xstream}</version>\n+      </dependency>\n     </dependencies>\n   </dependencyManagement>\n   <dependencies>\n@@ -454,6 +460,10 @@\n       <groupId>io.github.pixee</groupId>\n       <artifactId>java-security-toolkit</artifactId>\n     </dependency>\n+    <dependency>\n+      <groupId>io.github.pixee</groupId>\n+      <artifactId>java-security-toolkit-xstream</artifactId>\n+    </dependency>\n   </dependencies>\n \n   <repositories>","changes":[{"lineNumber":457,"properties":{"contextual_description":"true"},"description":"This library holds security APIs for hardening XStream operations.\n\nLicense: MIT ✅ | [Open source](https://github.com/pixee/java-security-toolkit-xstream) ✅ | No transitive dependencies ✅ | [More facts](https://mvnrepository.com/artifact/io.github.pixee/java-security-toolkit-xstream/1.0.2)\n","diffSide":"right","packageActions":[],"parameters":[]}]}]},{"codemod":"pixee:java/limit-readline","summary":"Protect `readLine()` against DoS","description":"This change hardens all [`BufferedReader#readLine()`](https://docs.oracle.com/javase/8/docs/api/java/io/BufferedReader.html#readLine--) operations against memory exhaustion.\n\nThere is no way to call `readLine()` safely since it is, by its nature, a read that must be terminated by the stream provider. Furthermore, a stream of data provided by an untrusted source could lead to a denial of service attack, as attackers can provide an infinite stream of bytes until the process runs out of memory.\n\nFixing it is straightforward using an API which limits the amount of expected characters to some sane limit. This is what our changes look like:\n\n```diff\n+ import io.github.pixee.security.BoundedLineReader;\n  ...\n  BufferedReader reader = getReader();\n- String line = reader.readLine(); // unlimited read, can lead to DoS\n+ String line = BoundedLineReader.readLine(reader, 5_000_000); // limited to 5MB\n```\n","failedFiles":[],"references":[{"url":"https://vulncat.fortify.com/en/detail?id=desc.dataflow.abap.denial_of_service","description":"https://vulncat.fortify.com/en/detail?id=desc.dataflow.abap.denial_of_service"},{"url":"https://cwe.mitre.org/data/definitions/400.html","description":"https://cwe.mitre.org/data/definitions/400.html"}],"properties":{},"changeset":[{"path":"src/main/java/org/dummy/insecure/framework/VulnerableTaskHolder.java","diff":"--- VulnerableTaskHolder.java\n+++ VulnerableTaskHolder.java\n@@ -1,5 +1,6 @@\n package org.dummy.insecure.framework;\n \n+import io.github.pixee.security.BoundedLineReader;\n import io.github.pixee.security.SystemCommand;\n import java.io.BufferedReader;\n import java.io.IOException;\n@@ -66,7 +67,7 @@\n         Process p = SystemCommand.runCommand(Runtime.getRuntime(), taskAction);\n         BufferedReader in = new BufferedReader(new InputStreamReader(p.getInputStream()));\n         String line = null;\n-        while ((line = in.readLine()) != null) {\n+        while ((line = BoundedLineReader.readLine(in, 5_000_000)) != null) {\n           log.info(line);\n         }\n       } catch (IOException e) {","changes":[{"lineNumber":69,"properties":{},"description":"Replaced with a call that offers an upper bound on the number of characters that will be read before giving up and throwing a security exception","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]}]},{"codemod":"pixee:java/sanitize-spring-multipart-filename","summary":"Sanitized user-provided file names in HTTP multipart uploads","description":"This change hardens usage of the [Spring Web](https://github.com/spring-projects/spring-framework) multipart request and file uploading feature to prevent file overwrite attacks.\n\nAlthough end users uploading a file through the browser can't fully control the file name, attackers armed with HTTP proxies, scripts or `curl` could manipulate the file to contain directory escape sequences and send in values like `../../../../../etc/passwd`. This is a common place that developers forget to distrust user input and end up including the attacker's file name in the path they end up writing.\n\nOur change sanitizes the output of `FileItem#getName()`, stripping the value of null bytes and directory escape sequences, leaving a simple file name in the expected form. The code change is very simple and looks like this:\n\n```diff\n+ import io.github.pixee.security.Filenames;\n  ...\n  MultipartFile uploadedFile = parseFile(request);\n- String name = uploadedFile.getOriginalFilename(); // vulnerable\n+ String name = Filenames.toSimpleFileName(uploadedFile.getOriginalFilename()); // safe\n  writeFile(new File(\"my_upload_dir\", name));\n```\n","failedFiles":[],"references":[{"url":"https://owasp.org/www-community/vulnerabilities/Unrestricted_File_Upload","description":"https://owasp.org/www-community/vulnerabilities/Unrestricted_File_Upload"},{"url":"https://portswigger.net/web-security/file-upload","description":"https://portswigger.net/web-security/file-upload"},{"url":"https://github.com/spring-projects/spring-framework/blob/c989470f94926ee5c7474bead278b00e9aaac787/spring-web/src/main/java/org/springframework/web/multipart/MultipartFile.java#L68","description":"https://github.com/spring-projects/spring-framework/blob/c989470f94926ee5c7474bead278b00e9aaac787/spring-web/src/main/java/org/springframework/web/multipart/MultipartFile.java#L68"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/lessons/pathtraversal/ProfileUploadRemoveUserInput.java","diff":"--- ProfileUploadRemoveUserInput.java\n+++ ProfileUploadRemoveUserInput.java\n@@ -1,5 +1,6 @@\n package org.owasp.webgoat.lessons.pathtraversal;\n \n+import io.github.pixee.security.Filenames;\n import static org.springframework.http.MediaType.ALL_VALUE;\n import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;\n \n@@ -33,6 +34,6 @@\n   @ResponseBody\n   public AttackResult uploadFileHandler(\n       @RequestParam(\"uploadedFileRemoveUserInput\") MultipartFile file) {\n-    return super.execute(file, file.getOriginalFilename());\n+    return super.execute(file, Filenames.toSimpleFileName(file.getOriginalFilename()));\n   }\n }","changes":[{"lineNumber":36,"properties":{},"description":"Wrapped the file name with a sanitizer call that takes out path escaping characters","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/pathtraversal/ProfileZipSlip.java","diff":"--- ProfileZipSlip.java\n+++ ProfileZipSlip.java\n@@ -1,5 +1,6 @@\n package org.owasp.webgoat.lessons.pathtraversal;\n \n+import io.github.pixee.security.Filenames;\n import static org.springframework.http.MediaType.ALL_VALUE;\n import static org.springframework.http.MediaType.APPLICATION_JSON_VALUE;\n \n@@ -49,7 +50,7 @@\n       produces = APPLICATION_JSON_VALUE)\n   @ResponseBody\n   public AttackResult uploadFileHandler(@RequestParam(\"uploadedFileZipSlip\") MultipartFile file) {\n-    if (!file.getOriginalFilename().toLowerCase().endsWith(\".zip\")) {\n+    if (!Filenames.toSimpleFileName(file.getOriginalFilename()).toLowerCase().endsWith(\".zip\")) {\n       return failed(this).feedback(\"path-traversal-zip-slip.no-zip\").build();\n     } else {\n       return processZipUpload(file);\n@@ -63,7 +64,7 @@\n     var currentImage = getProfilePictureAsBase64();\n \n     try {\n-      var uploadedZipFile = tmpZipDirectory.resolve(file.getOriginalFilename());\n+      var uploadedZipFile = tmpZipDirectory.resolve(Filenames.toSimpleFileName(file.getOriginalFilename()));\n       FileCopyUtils.copy(file.getBytes(), uploadedZipFile.toFile());\n \n       ZipFile zip = new ZipFile(uploadedZipFile.toFile());","changes":[{"lineNumber":52,"properties":{},"description":"Wrapped the file name with a sanitizer call that takes out path escaping characters","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]},{"lineNumber":66,"properties":{},"description":"Wrapped the file name with a sanitizer call that takes out path escaping characters","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/webwolf/FileServer.java","diff":"--- FileServer.java\n+++ FileServer.java\n@@ -22,6 +22,7 @@\n \n package org.owasp.webgoat.webwolf;\n \n+import io.github.pixee.security.Filenames;\n import static java.util.Comparator.comparing;\n import static org.springframework.http.MediaType.ALL_VALUE;\n \n@@ -86,8 +87,8 @@\n     String username = authentication.getName();\n     var destinationDir = new File(fileLocation, username);\n     destinationDir.mkdirs();\n-    myFile.transferTo(new File(destinationDir, myFile.getOriginalFilename()));\n-    log.debug(\"File saved to {}\", new File(destinationDir, myFile.getOriginalFilename()));\n+    myFile.transferTo(new File(destinationDir, Filenames.toSimpleFileName(myFile.getOriginalFilename())));\n+    log.debug(\"File saved to {}\", new File(destinationDir, Filenames.toSimpleFileName(myFile.getOriginalFilename())));\n \n     return new ModelAndView(\n         new RedirectView(\"files\", true),","changes":[{"lineNumber":89,"properties":{},"description":"Wrapped the file name with a sanitizer call that takes out path escaping characters","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]},{"lineNumber":90,"properties":{},"description":"Wrapped the file name with a sanitizer call that takes out path escaping characters","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]}]},{"codemod":"pixee:java/sql-parameterizer","summary":"Refactored to use parameterized SQL APIs","description":"This change refactors SQL statements to be parameterized, rather than built by hand.\n\nWithout parameterization, developers must remember to escape inputs using the rules for that database. It's usually buggy, at the least -- and sometimes vulnerable.\n\nOur changes look something like this:\n\n```diff\n- Statement stmt = connection.createStatement();\n- ResultSet rs = stmt.executeQuery(\"SELECT * FROM users WHERE name = '\" + user + \"'\");\n+ PreparedStatement stmt = connection.prepareStatement(\"SELECT * FROM users WHERE name = ?\");\n+ stmt.setString(1, user);\n+ ResultSet rs = stmt.executeQuery();\n```\n","failedFiles":[],"references":[{"url":"https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html","description":"https://cheatsheetseries.owasp.org/cheatsheets/SQL_Injection_Prevention_Cheat_Sheet.html"},{"url":"https://cwe.mitre.org/data/definitions/89.html","description":"https://cwe.mitre.org/data/definitions/89.html"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/lessons/sqlinjection/advanced/SqlInjectionChallenge.java","diff":"--- SqlInjectionChallenge.java\n+++ SqlInjectionChallenge.java\n@@ -23,6 +23,7 @@\n package org.owasp.webgoat.lessons.sqlinjection.advanced;\n \n import java.sql.*;\n+import java.sql.PreparedStatement;\n import lombok.extern.slf4j.Slf4j;\n import org.owasp.webgoat.container.LessonDataSource;\n import org.owasp.webgoat.container.assignments.AssignmentEndpoint;\n@@ -64,9 +65,10 @@\n \n       try (Connection connection = dataSource.getConnection()) {\n         String checkUserQuery =\n-            \"select userid from sql_challenge_users where userid = '\" + username_reg + \"'\";\n-        Statement statement = connection.createStatement();\n-        ResultSet resultSet = statement.executeQuery(checkUserQuery);\n+            \"select userid from sql_challenge_users where userid = ?\";\n+        PreparedStatement statement = connection.prepareStatement(checkUserQuery);\n+        statement.setString(1, username_reg);\n+        ResultSet resultSet = statement.execute();\n \n         if (resultSet.next()) {\n           if (username_reg.contains(\"tom'\")) {","changes":[{"lineNumber":69,"properties":{},"description":"Parameterized SQL usage to prevent any bugs or vulnerabilities","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/sqlinjection/introduction/SqlInjectionLesson8.java","diff":"--- SqlInjectionLesson8.java\n+++ SqlInjectionLesson8.java\n@@ -22,6 +22,7 @@\n \n package org.owasp.webgoat.lessons.sqlinjection.introduction;\n \n+import java.sql.PreparedStatement;\n import static java.sql.ResultSet.CONCUR_UPDATABLE;\n import static java.sql.ResultSet.TYPE_SCROLL_SENSITIVE;\n \n@@ -148,14 +149,15 @@\n     action = action.replace('\\'', '\"');\n     Calendar cal = Calendar.getInstance();\n     SimpleDateFormat sdf = new SimpleDateFormat(\"yyyy-MM-dd HH:mm:ss\");\n-    String time = sdf.format(cal.getTime());\n \n     String logQuery =\n-        \"INSERT INTO access_log (time, action) VALUES ('\" + time + \"', '\" + action + \"')\";\n+        \"INSERT INTO access_log (time, action) VALUES (?\" + \", ?\" + \")\";\n \n     try {\n-      Statement statement = connection.createStatement(TYPE_SCROLL_SENSITIVE, CONCUR_UPDATABLE);\n-      statement.executeUpdate(logQuery);\n+      PreparedStatement statement = connection.prepareStatement(logQuery, TYPE_SCROLL_SENSITIVE, CONCUR_UPDATABLE);\n+      statement.setString(1, sdf.format(cal.getTime()));\n+      statement.setString(2, action);\n+      statement.execute();\n     } catch (SQLException e) {\n       System.err.println(e.getMessage());\n     }","changes":[{"lineNumber":158,"properties":{},"description":"Parameterized SQL usage to prevent any bugs or vulnerabilities","diffSide":"left","packageActions":[],"parameters":[]}]}]},{"codemod":"pixee:java/sandbox-url-creation","summary":"Sandboxed URL creation to prevent SSRF attacks","description":"This change sandboxes the creation of [`java.net.URL`](https://docs.oracle.com/en/java/javase/17/docs/api/java.base/java/net/URL.html) objects so they will be more resistant to Server-Side Request Forgery (SSRF) attacks.\n\nMost of the time when you create a URL, you're intending to reference an HTTP endpoint, like an internal microservice. However, URLs can point to local file system files, a Gopher stream in your local network, a JAR file on a remote Internet site, and all kinds of other unexpected and undesirable stuff. When the URL values are influenced by attackers, they can trick your application into fetching internal resources, running malicious code, or otherwise harming the system. Consider the following code:\n\n```java\nString url = userInput.getServiceAddress();\nreturn IOUtils.toString(new URL(url).openConnection());\n```\n\nIn this case, an attacker could supply a value like `jar:file:/path/to/appserver/lib.jar` and attempt to read the contents of your application's code.\n\nOur changes introduce sandboxing around URL creation that force the developers to specify some boundaries on the types of URLs they expect to create:\n\n```diff\n+ import io.github.pixee.security.Urls;\n+ import io.github.pixee.security.HostValidator;\n  ...\n  String url = userInput.getServiceAddress();\n- URL u = new URL(url);\n+ URL u = Urls.create(url, Urls.HTTP_PROTOCOLS, HostValidator.DENY_COMMON_INFRASTRUCTURE_TARGETS);\n  InputStream is = u.openConnection();\n```\n\nThis change alone reduces attack surface significantly, but can be enhanced to create even more security by specifying some controls around the hosts we expect to connect with:\n\n```diff\n+ import io.github.pixee.security.Urls;\n+ import io.github.pixee.security.HostValidator;\n  ...\n  HostValidator allowsOnlyGoodDotCom = HostValidator.fromAllowedHostPattern(Pattern.compile(\"good\\\\.com\"));\n  URL u = Urls.create(url, Urls.HTTP_PROTOCOLS, allowsOnlyGoodDotCom);\n```\n\nNote: Beware temptation to write some validation on your own. Parsing URLs is difficult and differences between parsers in validation and execution will certainly lead to exploits as attackers [have repeatedly proven](https://www.blackhat.com/docs/us-17/thursday/us-17-Tsai-A-New-Era-Of-SSRF-Exploiting-URL-Parser-In-Trending-Programming-Languages.pdf).\n","failedFiles":[],"references":[{"url":"https://www.hacksplaining.com/prevention/ssrf","description":"https://www.hacksplaining.com/prevention/ssrf"},{"url":"https://portswigger.net/web-security/ssrf","description":"https://portswigger.net/web-security/ssrf"},{"url":"https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html","description":"https://cheatsheetseries.owasp.org/cheatsheets/Server_Side_Request_Forgery_Prevention_Cheat_Sheet.html"},{"url":"https://www.rapid7.com/blog/post/2021/11/23/owasp-top-10-deep-dive-defending-against-server-side-request-forgery/","description":"https://www.rapid7.com/blog/post/2021/11/23/owasp-top-10-deep-dive-defending-against-server-side-request-forgery/"},{"url":"https://blog.assetnote.io/2021/01/13/blind-ssrf-chains/","description":"https://blog.assetnote.io/2021/01/13/blind-ssrf-chains/"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/lessons/jwt/claimmisuse/JWTHeaderJKUEndpoint.java","diff":"--- JWTHeaderJKUEndpoint.java\n+++ JWTHeaderJKUEndpoint.java\n@@ -6,6 +6,8 @@\n import com.auth0.jwt.JWT;\n import com.auth0.jwt.algorithms.Algorithm;\n import com.auth0.jwt.exceptions.JWTVerificationException;\n+import io.github.pixee.security.HostValidator;\n+import io.github.pixee.security.Urls;\n import java.net.MalformedURLException;\n import java.net.URL;\n import java.security.interfaces.RSAPublicKey;\n@@ -48,7 +50,7 @@\n       try {\n         var decodedJWT = JWT.decode(token);\n         var jku = decodedJWT.getHeaderClaim(\"jku\");\n-        JwkProvider jwkProvider = new JwkProviderBuilder(new URL(jku.asString())).build();\n+        JwkProvider jwkProvider = new JwkProviderBuilder(Urls.create(jku.asString(), Urls.HTTP_PROTOCOLS, HostValidator.DENY_COMMON_INFRASTRUCTURE_TARGETS)).build();\n         var jwk = jwkProvider.get(decodedJWT.getKeyId());\n         var algorithm = Algorithm.RSA256((RSAPublicKey) jwk.getPublicKey());\n         JWT.require(algorithm).build().verify(decodedJWT);","changes":[{"lineNumber":51,"properties":{},"description":"Wrapped the URL creation with a method that forces the caller to pick allowed protocols and domains that this URL can reach","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/ssrf/SSRFTask2.java","diff":"--- SSRFTask2.java\n+++ SSRFTask2.java\n@@ -22,6 +22,8 @@\n \n package org.owasp.webgoat.lessons.ssrf;\n \n+import io.github.pixee.security.HostValidator;\n+import io.github.pixee.security.Urls;\n import java.io.IOException;\n import java.io.InputStream;\n import java.net.MalformedURLException;\n@@ -48,7 +50,7 @@\n   protected AttackResult furBall(String url) {\n     if (url.matches(\"http://ifconfig\\\\.pro\")) {\n       String html;\n-      try (InputStream in = new URL(url).openStream()) {\n+      try (InputStream in = Urls.create(url, Urls.HTTP_PROTOCOLS, HostValidator.DENY_COMMON_INFRASTRUCTURE_TARGETS).openStream()) {\n         html =\n             new String(in.readAllBytes(), StandardCharsets.UTF_8)\n                 .replaceAll(\"\\n\", \"<br>\"); // Otherwise the \\n gets escaped in the response","changes":[{"lineNumber":51,"properties":{},"description":"Wrapped the URL creation with a method that forces the caller to pick allowed protocols and domains that this URL can reach","diffSide":"left","packageActions":[{"action":"ADD","result":"SKIPPED","package":"pkg:maven/io.github.pixee/java-security-toolkit@1.1.2"}],"parameters":[]}]}]},{"codemod":"pixee:java/switch-literal-first","summary":"Switch order of literals to prevent NullPointerException","description":"This change defensively switches the order of literals in comparison expressions to ensure that no null pointer exceptions are unexpectedly thrown. Runtime exceptions especially can cause exceptional and unexpected code paths to be taken, and this can result in unexpected behavior. \n\nBoth simple vulnerabilities (like information disclosure) and complex vulnerabilities (like business logic flaws) can take advantage of these unexpected code paths.\n\nOur changes look something like this:\n\n```diff\n  String fieldName = header.getFieldName();\n  String fieldValue = header.getFieldValue();\n- if(fieldName.equals(\"requestId\")) {\n+ if(\"requestId\".equals(fieldName)) {\n    logRequest(fieldValue);\n  }\n```\n","failedFiles":[],"references":[{"url":"http://cwe.mitre.org/data/definitions/476.html","description":"http://cwe.mitre.org/data/definitions/476.html"},{"url":"https://en.wikibooks.org/wiki/Java_Programming/Preventing_NullPointerException","description":"https://en.wikibooks.org/wiki/Java_Programming/Preventing_NullPointerException"},{"url":"https://rules.sonarsource.com/java/RSPEC-1132/","description":"https://rules.sonarsource.com/java/RSPEC-1132/"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/container/AsciiDoctorTemplateResolver.java","diff":"--- AsciiDoctorTemplateResolver.java\n+++ AsciiDoctorTemplateResolver.java\n@@ -123,7 +123,7 @@\n \n   private String computeResourceName(String resourceName, String language) {\n     String computedResourceName;\n-    if (language.equals(\"en\")) {\n+    if (\"en\".equals(language)) {\n       computedResourceName = resourceName;\n     } else {\n       computedResourceName = resourceName.replace(\".adoc\", \"_\".concat(language).concat(\".adoc\"));","changes":[{"lineNumber":126,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/container/UserInterceptor.java","diff":"--- UserInterceptor.java\n+++ UserInterceptor.java\n@@ -35,7 +35,7 @@\n       if (null != env) {\n         String githubClientId =\n             env.getProperty(\"spring.security.oauth2.client.registration.github.client-id\");\n-        if (null != githubClientId && !githubClientId.equals(\"dummy\")) {\n+        if (null != githubClientId && !\"dummy\".equals(githubClientId)) {\n           modelAndView.getModel().put(\"oauth\", Boolean.TRUE);\n         }\n       } else {","changes":[{"lineNumber":38,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/container/lessons/Assignment.java","diff":"--- Assignment.java\n+++ Assignment.java\n@@ -63,7 +63,7 @@\n   }\n \n   public Assignment(String name, String path, List<String> hints) {\n-    if (path.equals(\"\") || path.equals(\"/\") || path.equals(\"/WebGoat/\")) {\n+    if (\"\".equals(path) || \"/\".equals(path) || \"/WebGoat/\".equals(path)) {\n       throw new IllegalStateException(\n           \"The path of assignment '\"\n               + name","changes":[{"lineNumber":66,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":66,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":66,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/bypassrestrictions/BypassRestrictionsFieldRestrictions.java","diff":"--- BypassRestrictionsFieldRestrictions.java\n+++ BypassRestrictionsFieldRestrictions.java\n@@ -40,13 +40,13 @@\n       @RequestParam String checkbox,\n       @RequestParam String shortInput,\n       @RequestParam String readOnlyInput) {\n-    if (select.equals(\"option1\") || select.equals(\"option2\")) {\n+    if (\"option1\".equals(select) || \"option2\".equals(select)) {\n       return failed(this).build();\n     }\n-    if (radio.equals(\"option1\") || radio.equals(\"option2\")) {\n+    if (\"option1\".equals(radio) || \"option2\".equals(radio)) {\n       return failed(this).build();\n     }\n-    if (checkbox.equals(\"on\") || checkbox.equals(\"off\")) {\n+    if (\"on\".equals(checkbox) || \"off\".equals(checkbox)) {\n       return failed(this).build();\n     }\n     if (shortInput.length() <= 5) {","changes":[{"lineNumber":43,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":43,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":46,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":46,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":49,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":49,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/challenges/challenge7/Assignment7.java","diff":"--- Assignment7.java\n+++ Assignment7.java\n@@ -57,7 +57,7 @@\n \n   @GetMapping(\"/challenge/7/reset-password/{link}\")\n   public ResponseEntity<String> resetPassword(@PathVariable(value = \"link\") String link) {\n-    if (link.equals(ADMIN_PASSWORD_LINK)) {\n+    if (ADMIN_PASSWORD_LINK.equals(link)) {\n       return ResponseEntity.accepted()\n           .body(\n               \"<h1>Success!!</h1>\"","changes":[{"lineNumber":60,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/challenges/challenge7/PasswordResetLink.java","diff":"--- PasswordResetLink.java\n+++ PasswordResetLink.java\n@@ -12,7 +12,7 @@\n \n   public String createPasswordReset(String username, String key) {\n     Random random = new Random();\n-    if (username.equalsIgnoreCase(\"admin\")) {\n+    if (\"admin\".equalsIgnoreCase(username)) {\n       // Admin has a fix reset link\n       random.setSeed(key.length());\n     }","changes":[{"lineNumber":15,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/cryptography/SecureDefaultsAssignment.java","diff":"--- SecureDefaultsAssignment.java\n+++ SecureDefaultsAssignment.java\n@@ -44,7 +44,7 @@\n   public AttackResult completed(\n       @RequestParam String secretFileName, @RequestParam String secretText)\n       throws NoSuchAlgorithmException {\n-    if (secretFileName != null && secretFileName.equals(\"default_secret\")) {\n+    if (secretFileName != null && \"default_secret\".equals(secretFileName)) {\n       if (secretText != null\n           && HashingAssignment.getHash(secretText, \"SHA-256\")\n               .equalsIgnoreCase(","changes":[{"lineNumber":47,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/cryptography/XOREncodingAssignment.java","diff":"--- XOREncodingAssignment.java\n+++ XOREncodingAssignment.java\n@@ -37,7 +37,7 @@\n   @PostMapping(\"/crypto/encoding/xor\")\n   @ResponseBody\n   public AttackResult completed(@RequestParam String answer_pwd1) {\n-    if (answer_pwd1 != null && answer_pwd1.equals(\"databasepassword\")) {\n+    if (answer_pwd1 != null && \"databasepassword\".equals(answer_pwd1)) {\n       return success(this).feedback(\"crypto-encoding-xor.success\").build();\n     }\n     return failed(this).feedback(\"crypto-encoding-xor.empty\").build();","changes":[{"lineNumber":40,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/csrf/CSRFGetFlag.java","diff":"--- CSRFGetFlag.java\n+++ CSRFGetFlag.java\n@@ -52,7 +52,7 @@\n     String referer = (req.getHeader(\"referer\") == null) ? \"NULL\" : req.getHeader(\"referer\");\n     String[] refererArr = referer.split(\"/\");\n \n-    if (referer.equals(\"NULL\")) {\n+    if (\"NULL\".equals(referer)) {\n       if (\"true\".equals(req.getParameter(\"csrf\"))) {\n         Random random = new Random();\n         userSessionData.setValue(\"csrf-get-success\", random.nextInt(65536));","changes":[{"lineNumber":55,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/csrf/ForgedReviews.java","diff":"--- ForgedReviews.java\n+++ ForgedReviews.java\n@@ -103,7 +103,7 @@\n     reviews.add(review);\n     userReviews.put(webSession.getUserName(), reviews);\n     // short-circuit\n-    if (validateReq == null || !validateReq.equals(weakAntiCSRF)) {\n+    if (validateReq == null || !weakAntiCSRF.equals(validateReq)) {\n       return failed(this).feedback(\"csrf-you-forgot-something\").build();\n     }\n     // we have the spoofed files","changes":[{"lineNumber":106,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/idor/IDORDiffAttributes.java","diff":"--- IDORDiffAttributes.java\n+++ IDORDiffAttributes.java\n@@ -47,10 +47,10 @@\n     if (diffAttribs.length < 2) {\n       return failed(this).feedback(\"idor.diff.attributes.missing\").build();\n     }\n-    if (diffAttribs[0].toLowerCase().trim().equals(\"userid\")\n-            && diffAttribs[1].toLowerCase().trim().equals(\"role\")\n-        || diffAttribs[1].toLowerCase().trim().equals(\"userid\")\n-            && diffAttribs[0].toLowerCase().trim().equals(\"role\")) {\n+    if (\"userid\".equals(diffAttribs[0].toLowerCase().trim())\n+            && \"role\".equals(diffAttribs[1].toLowerCase().trim())\n+        || \"userid\".equals(diffAttribs[1].toLowerCase().trim())\n+            && \"role\".equals(diffAttribs[0].toLowerCase().trim())) {\n       return success(this).feedback(\"idor.diff.success\").build();\n     } else {\n       return failed(this).feedback(\"idor.diff.failure\").build();","changes":[{"lineNumber":50,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":51,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":52,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":53,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/idor/IDOREditOtherProfile.java","diff":"--- IDOREditOtherProfile.java\n+++ IDOREditOtherProfile.java\n@@ -103,7 +103,7 @@\n       return failed(this).feedback(\"idor.edit.profile.failure4\").build();\n     }\n \n-    if (currentUserProfile.getColor().equals(\"black\") && currentUserProfile.getRole() <= 1) {\n+    if (\"black\".equals(currentUserProfile.getColor()) && currentUserProfile.getRole() <= 1) {\n       return success(this)\n           .feedback(\"idor.edit.profile.success2\")\n           .output(userSessionData.getValue(\"idor-updated-own-profile\").toString())","changes":[{"lineNumber":106,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/idor/IDORViewOtherProfile.java","diff":"--- IDORViewOtherProfile.java\n+++ IDORViewOtherProfile.java\n@@ -66,7 +66,7 @@\n         // secure code would ensure there was a horizontal access control check prior to dishing up\n         // the requested profile\n         if (requestedProfile.getUserId() != null\n-            && requestedProfile.getUserId().equals(\"2342388\")) {\n+            && \"2342388\".equals(requestedProfile.getUserId())) {\n           return success(this)\n               .feedback(\"idor.view.profile.success\")\n               .output(requestedProfile.profileToMap().toString())","changes":[{"lineNumber":69,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/idor/IDORViewOwnProfileAltUrl.java","diff":"--- IDORViewOwnProfileAltUrl.java\n+++ IDORViewOwnProfileAltUrl.java\n@@ -52,9 +52,9 @@\n         String authUserId = (String) userSessionData.getValue(\"idor-authenticated-user-id\");\n         // don't care about http://localhost:8080 ... just want WebGoat/\n         String[] urlParts = url.split(\"/\");\n-        if (urlParts[0].equals(\"WebGoat\")\n-            && urlParts[1].equals(\"IDOR\")\n-            && urlParts[2].equals(\"profile\")\n+        if (\"WebGoat\".equals(urlParts[0])\n+            && \"IDOR\".equals(urlParts[1])\n+            && \"profile\".equals(urlParts[2])\n             && urlParts[3].equals(authUserId)) {\n           UserProfile userProfile = new UserProfile(authUserId);\n           return success(this)","changes":[{"lineNumber":55,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":56,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":57,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/idor/UserProfile.java","diff":"--- UserProfile.java\n+++ UserProfile.java\n@@ -44,14 +44,14 @@\n   //\n   private void setProfileFromId(String id) {\n     // emulate look up from database\n-    if (id.equals(\"2342384\")) {\n+    if (\"2342384\".equals(id)) {\n       this.userId = id;\n       this.color = \"yellow\";\n       this.name = \"Tom Cat\";\n       this.size = \"small\";\n       this.isAdmin = false;\n       this.role = 3;\n-    } else if (id.equals(\"2342388\")) {\n+    } else if (\"2342388\".equals(id)) {\n       this.userId = id;\n       this.color = \"brown\";\n       this.name = \"Buffalo Bill\";","changes":[{"lineNumber":47,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":54,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/logging/LogBleedingTask.java","diff":"--- LogBleedingTask.java\n+++ LogBleedingTask.java\n@@ -57,7 +57,7 @@\n       return failed(this).output(\"Please provide username (Admin) and password\").build();\n     }\n \n-    if (username.equals(\"Admin\") && password.equals(this.password)) {\n+    if (\"Admin\".equals(username) && password.equals(this.password)) {\n       return success(this).build();\n     }\n ","changes":[{"lineNumber":60,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/missingac/MissingFunctionACHiddenMenus.java","diff":"--- MissingFunctionACHiddenMenus.java\n+++ MissingFunctionACHiddenMenus.java\n@@ -43,11 +43,11 @@\n       produces = {\"application/json\"})\n   @ResponseBody\n   public AttackResult completed(String hiddenMenu1, String hiddenMenu2) {\n-    if (hiddenMenu1.equals(\"Users\") && hiddenMenu2.equals(\"Config\")) {\n+    if (\"Users\".equals(hiddenMenu1) && \"Config\".equals(hiddenMenu2)) {\n       return success(this).output(\"\").feedback(\"access-control.hidden-menus.success\").build();\n     }\n \n-    if (hiddenMenu1.equals(\"Config\") && hiddenMenu2.equals(\"Users\")) {\n+    if (\"Config\".equals(hiddenMenu1) && \"Users\".equals(hiddenMenu2)) {\n       return failed(this).output(\"\").feedback(\"access-control.hidden-menus.close\").build();\n     }\n ","changes":[{"lineNumber":46,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":46,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":50,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":50,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/passwordreset/ResetLinkAssignment.java","diff":"--- ResetLinkAssignment.java\n+++ ResetLinkAssignment.java\n@@ -85,7 +85,7 @@\n     if (TOM_EMAIL.equals(email)) {\n       String passwordTom =\n           usersToTomPassword.getOrDefault(getWebSession().getUserName(), PASSWORD_TOM_9);\n-      if (passwordTom.equals(PASSWORD_TOM_9)) {\n+      if (PASSWORD_TOM_9.equals(passwordTom)) {\n         return failed(this).feedback(\"login_failed\").build();\n       } else if (passwordTom.equals(password)) {\n         return success(this).build();","changes":[{"lineNumber":88,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/spoofcookie/SpoofCookieAssignment.java","diff":"--- SpoofCookieAssignment.java\n+++ SpoofCookieAssignment.java\n@@ -114,7 +114,7 @@\n       return failed(this).output(e.getMessage()).build();\n     }\n     if (users.containsKey(cookieUsername)) {\n-      if (cookieUsername.equals(ATTACK_USERNAME)) {\n+      if (ATTACK_USERNAME.equals(cookieUsername)) {\n         return success(this).build();\n       }\n       return failed(this)","changes":[{"lineNumber":117,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/sqlinjection/introduction/SqlInjectionLesson2.java","diff":"--- SqlInjectionLesson2.java\n+++ SqlInjectionLesson2.java\n@@ -67,7 +67,7 @@\n \n       results.first();\n \n-      if (results.getString(\"department\").equals(\"Marketing\")) {\n+      if (\"Marketing\".equals(results.getString(\"department\"))) {\n         output.append(\"<span class='feedback-positive'>\" + query + \"</span>\");\n         output.append(SqlInjectionLesson8.generateTable(results));\n         return success(this).feedback(\"sql-injection.2.success\").output(output.toString()).build();","changes":[{"lineNumber":70,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/sqlinjection/introduction/SqlInjectionLesson3.java","diff":"--- SqlInjectionLesson3.java\n+++ SqlInjectionLesson3.java\n@@ -66,7 +66,7 @@\n         StringBuilder output = new StringBuilder();\n         // user completes lesson if the department of Tobi Barnett now is 'Sales'\n         results.first();\n-        if (results.getString(\"department\").equals(\"Sales\")) {\n+        if (\"Sales\".equals(results.getString(\"department\"))) {\n           output.append(\"<span class='feedback-positive'>\" + query + \"</span>\");\n           output.append(SqlInjectionLesson8.generateTable(results));\n           return success(this).output(output.toString()).build();","changes":[{"lineNumber":69,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/xss/CrossSiteScriptingLesson3.java","diff":"--- CrossSiteScriptingLesson3.java\n+++ CrossSiteScriptingLesson3.java\n@@ -68,10 +68,10 @@\n           && include.contains(\"%>\")) {\n         includeCorrect = true;\n       }\n-      if (fistNameElement.equals(\"${e:forHtml(param.first_name)}\")) {\n+      if (\"${e:forHtml(param.first_name)}\".equals(fistNameElement)) {\n         firstNameCorrect = true;\n       }\n-      if (lastNameElement.equals(\"${e:forHtml(param.last_name)}\")) {\n+      if (\"${e:forHtml(param.last_name)}\".equals(lastNameElement)) {\n         lastNameCorrect = true;\n       }\n ","changes":[{"lineNumber":71,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":74,"properties":{},"description":"Switch order of literals to prevent NullPointerException","diffSide":"left","packageActions":[],"parameters":[]}]}]},{"codemod":"pixee:java/secure-random","summary":"Introduced protections against predictable RNG abuse","description":"This change replaces all new instances of `java.util.Random` with the marginally slower, but much more secure `java.security.SecureRandom`.\n\nWe have to work pretty hard to get computers to generate genuinely unguessable random bits. The `java.util.Random` type uses a method of pseudo-random number generation that unfortunately emits fairly predictable numbers.\n\nIf the numbers it emits are predictable, then it's obviously not safe to use in cryptographic operations, file name creation, token construction, password generation, and anything else that's related to security. In fact, it may affect security even if it's not directly obvious.\n\nSwitching to a more secure version is simple and our changes all look something like this:\n\n```diff\n- Random r = new Random();\n+ Random r = new java.security.SecureRandom();\n```\n","failedFiles":[],"references":[{"url":"https://owasp.org/www-community/vulnerabilities/Insecure_Randomness","description":"https://owasp.org/www-community/vulnerabilities/Insecure_Randomness"},{"url":"https://metebalci.com/blog/everything-about-javas-securerandom/","description":"https://metebalci.com/blog/everything-about-javas-securerandom/"},{"url":"https://cwe.mitre.org/data/definitions/330.html","description":"https://cwe.mitre.org/data/definitions/330.html"}],"properties":{},"changeset":[{"path":"src/main/java/org/owasp/webgoat/lessons/challenges/challenge1/ImageServlet.java","diff":"--- ImageServlet.java\n+++ ImageServlet.java\n@@ -1,5 +1,6 @@\n package org.owasp.webgoat.lessons.challenges.challenge1;\n \n+import java.security.SecureRandom;\n import static org.springframework.web.bind.annotation.RequestMethod.GET;\n import static org.springframework.web.bind.annotation.RequestMethod.POST;\n \n@@ -14,7 +15,7 @@\n @RestController\n public class ImageServlet {\n \n-  public static final int PINCODE = new Random().nextInt(10000);\n+  public static final int PINCODE = new SecureRandom().nextInt(10000);\n \n   @RequestMapping(\n       method = {GET, POST},","changes":[{"lineNumber":17,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/challenges/challenge7/PasswordResetLink.java","diff":"--- PasswordResetLink.java\n+++ PasswordResetLink.java\n@@ -1,5 +1,6 @@\n package org.owasp.webgoat.lessons.challenges.challenge7;\n \n+import java.security.SecureRandom;\n import java.util.Random;\n \n /**\n@@ -11,7 +12,7 @@\n public class PasswordResetLink {\n \n   public String createPasswordReset(String username, String key) {\n-    Random random = new Random();\n+    Random random = new SecureRandom();\n     if (\"admin\".equalsIgnoreCase(username)) {\n       // Admin has a fix reset link\n       random.setSeed(key.length());","changes":[{"lineNumber":14,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/cryptography/EncodingAssignment.java","diff":"--- EncodingAssignment.java\n+++ EncodingAssignment.java\n@@ -23,6 +23,7 @@\n package org.owasp.webgoat.lessons.cryptography;\n \n import jakarta.servlet.http.HttpServletRequest;\n+import java.security.SecureRandom;\n import java.util.Base64;\n import java.util.Random;\n import org.owasp.webgoat.container.assignments.AssignmentEndpoint;\n@@ -49,7 +50,7 @@\n     String username = request.getUserPrincipal().getName();\n     if (basicAuth == null) {\n       String password =\n-          HashingAssignment.SECRETS[new Random().nextInt(HashingAssignment.SECRETS.length)];\n+          HashingAssignment.SECRETS[new SecureRandom().nextInt(HashingAssignment.SECRETS.length)];\n       basicAuth = getBasicAuth(username, password);\n       request.getSession().setAttribute(\"basicAuth\", basicAuth);\n     }","changes":[{"lineNumber":52,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/cryptography/HashingAssignment.java","diff":"--- HashingAssignment.java\n+++ HashingAssignment.java\n@@ -25,6 +25,7 @@\n import jakarta.servlet.http.HttpServletRequest;\n import java.security.MessageDigest;\n import java.security.NoSuchAlgorithmException;\n+import java.security.SecureRandom;\n import java.util.Random;\n import javax.xml.bind.DatatypeConverter;\n import org.owasp.webgoat.container.assignments.AssignmentEndpoint;\n@@ -50,7 +51,7 @@\n     String md5Hash = (String) request.getSession().getAttribute(\"md5Hash\");\n     if (md5Hash == null) {\n \n-      String secret = SECRETS[new Random().nextInt(SECRETS.length)];\n+      String secret = SECRETS[new SecureRandom().nextInt(SECRETS.length)];\n \n       MessageDigest md = MessageDigest.getInstance(\"MD5\");\n       md.update(secret.getBytes());\n@@ -68,7 +69,7 @@\n \n     String sha256 = (String) request.getSession().getAttribute(\"sha256\");\n     if (sha256 == null) {\n-      String secret = SECRETS[new Random().nextInt(SECRETS.length)];\n+      String secret = SECRETS[new SecureRandom().nextInt(SECRETS.length)];\n       sha256 = getHash(secret, \"SHA-256\");\n       request.getSession().setAttribute(\"sha256Hash\", sha256);\n       request.getSession().setAttribute(\"sha256Secret\", secret);","changes":[{"lineNumber":53,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":71,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/csrf/CSRFGetFlag.java","diff":"--- CSRFGetFlag.java\n+++ CSRFGetFlag.java\n@@ -23,6 +23,7 @@\n package org.owasp.webgoat.lessons.csrf;\n \n import jakarta.servlet.http.HttpServletRequest;\n+import java.security.SecureRandom;\n import java.util.HashMap;\n import java.util.Map;\n import java.util.Random;\n@@ -54,13 +55,13 @@\n \n     if (\"NULL\".equals(referer)) {\n       if (\"true\".equals(req.getParameter(\"csrf\"))) {\n-        Random random = new Random();\n+        Random random = new SecureRandom();\n         userSessionData.setValue(\"csrf-get-success\", random.nextInt(65536));\n         response.put(\"success\", true);\n         response.put(\"message\", pluginMessages.getMessage(\"csrf-get-null-referer.success\"));\n         response.put(\"flag\", userSessionData.getValue(\"csrf-get-success\"));\n       } else {\n-        Random random = new Random();\n+        Random random = new SecureRandom();\n         userSessionData.setValue(\"csrf-get-success\", random.nextInt(65536));\n         response.put(\"success\", true);\n         response.put(\"message\", pluginMessages.getMessage(\"csrf-get-other-referer.success\"));\n@@ -71,7 +72,7 @@\n       response.put(\"message\", \"Appears the request came from the original host\");\n       response.put(\"flag\", null);\n     } else {\n-      Random random = new Random();\n+      Random random = new SecureRandom();\n       userSessionData.setValue(\"csrf-get-success\", random.nextInt(65536));\n       response.put(\"success\", true);\n       response.put(\"message\", pluginMessages.getMessage(\"csrf-get-other-referer.success\"));","changes":[{"lineNumber":57,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":63,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]},{"lineNumber":74,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/hijacksession/cas/HijackSessionAuthenticationProvider.java","diff":"--- HijackSessionAuthenticationProvider.java\n+++ HijackSessionAuthenticationProvider.java\n@@ -23,6 +23,7 @@\n \n package org.owasp.webgoat.lessons.hijacksession.cas;\n \n+import java.security.SecureRandom;\n import java.time.Instant;\n import java.util.LinkedList;\n import java.util.Queue;\n@@ -45,7 +46,7 @@\n public class HijackSessionAuthenticationProvider implements AuthenticationProvider<Authentication> {\n \n   private Queue<String> sessions = new LinkedList<>();\n-  private static long id = new Random().nextLong() & Long.MAX_VALUE;\n+  private static long id = new SecureRandom().nextLong() & Long.MAX_VALUE;\n   protected static final int MAX_SESSIONS = 50;\n \n   private static final DoublePredicate PROBABILITY_DOUBLE_PREDICATE = pr -> pr < 0.75;","changes":[{"lineNumber":48,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]},{"path":"src/main/java/org/owasp/webgoat/lessons/jwt/JWTSecretKeyEndpoint.java","diff":"--- JWTSecretKeyEndpoint.java\n+++ JWTSecretKeyEndpoint.java\n@@ -27,6 +27,7 @@\n import io.jsonwebtoken.Jwts;\n import io.jsonwebtoken.SignatureAlgorithm;\n import io.jsonwebtoken.impl.TextCodec;\n+import java.security.SecureRandom;\n import java.time.Instant;\n import java.util.Calendar;\n import java.util.Date;\n@@ -50,7 +51,7 @@\n     \"victory\", \"business\", \"available\", \"shipping\", \"washington\"\n   };\n   public static final String JWT_SECRET =\n-      TextCodec.BASE64.encode(SECRETS[new Random().nextInt(SECRETS.length)]);\n+      TextCodec.BASE64.encode(SECRETS[new SecureRandom().nextInt(SECRETS.length)]);\n   private static final String WEBGOAT_USER = \"WebGoat\";\n   private static final List<String> expectedClaims =\n       List.of(\"iss\", \"iat\", \"exp\", \"aud\", \"sub\", \"username\", \"Email\", \"Role\");","changes":[{"lineNumber":53,"properties":{},"description":"Replaced the weak pseudo-random number generator with a strong one","diffSide":"left","packageActions":[],"parameters":[]}]}]}]}


--------------------------------------------------------------------------------
/logging.md:
--------------------------------------------------------------------------------
  1 | # Logging
  2 | 
  3 | This specification dictates logging requirements for codemodder frameworks. Frameworks need to align on a general design, some important messages to log, and output formats. This way, users get a relatively consistent experience, design problems are solved once and only once, and downstream tooling can make assumptions about expected output.
  4 | 
  5 | ## Output format
  6 | 
  7 | The `--log-format` parameter tells the framework whether the output should be human readable by specifying `human` (which is the default) or `json`, which causes each message to be logged as its own self-contained JSON object. This structured logging will be beneficial for downstream tooling that wants to parse, query or programmatically act on codemod output.
  8 | 
  9 | ### `human` output
 10 | 
 11 | For `human` output, we expect a message like the following:
 12 | ```
 13 | $ ./run-my-codemod.sh (...) /my/project
 14 | [starting] 
 15 | codemodder: java/1.2
 16 | ```
 17 | 
 18 | Notice there is very little boilerplate context for the log message. The log message doesn't show the log level, timestamp, origin, thread, or any other contextual metadata. We believe CLI users would prefer a very simple, undecorated set of messages.
 19 | 
 20 | ### `json` output
 21 | 
 22 | For `json` output, we expect a message like the following:
 23 | 
 24 | ```
 25 | $ ./run-my-codemod.sh (...) /my/project
 26 | { "level" : "INFO", "file" : "Startup.java", "line" : "21", "message" : "[starting]" }
 27 | { "level" : "INFO", "file" : "Startup.java", "line" : "27", "message" : "codemodder: java/1.2" }
 28 | ```
 29 | 
 30 | The `message` field represents the text of the log message. 
 31 | 
 32 | The `level` field should be one of `DEBUG`, `INFO`, or `ERROR`. In general:
 33 |  - `ERROR` messages should contain unexpected errors
 34 |  - `INFO` should contain information about changes
 35 |  - `DEBUG` should contain diagnostic or forensic information
 36 | 
 37 | The `file` field should be the code file that is generating the message.
 38 | 
 39 | The `line` field should be the line in the code file that is generating the message.
 40 | 
 41 | Note: If the `--project-name` was specified in the [CLI](cld.md) arguments, it should be provided in a `project_name` key.
 42 | 
 43 | Frameworks can log extra, language-specific keys as desired as long as they don't conflict with standard keys.
 44 | 
 45 | ## Verbosity
 46 | 
 47 | The `--verbose` flag tells the codemodder framework to emit all log messages, regardless of their level. This flag has nothing to do with the output format, and strictly controls the volume of messages emitted.
 48 | 
 49 | ## Output streams
 50 | 
 51 | Generally, logs are expected to go to `stdout`. When the user specificies that the output format should be `human`, including the default case where no output format is specified, it's preferred that error messages be emitted to `stderr`.
 52 | 
 53 | ## Message Design
 54 | 
 55 | We take inspiration from YAML as a format for designing log messages. Outputting in a YAML-like structure gives us some helpful strategies for how to visualize familiar structures, like lists of files, section breaks, and displaying simple key-value pairs.
 56 | 
 57 | ## Messages
 58 | 
 59 | ### Without `--verbose`
 60 | 
 61 | The following example output shows all the required messages when running without `--verbose`. 
 62 | 
 63 | ```
 64 | $ ./run-codemodder-codemod.sh -o /tmp/my_project.codetf /tmp/my_project
 65 | codemodder: javascript/1.2
 66 | 
 67 | [scanning]
 68 | running codemod: pixee:javascript/limit-readline
 69 | changed:
 70 |   - /tmp/my_project/file_d:57,59
 71 |   - /tmp/my_project/file_e:194
 72 | ```
 73 | 
 74 | ### With `--verbose`
 75 | 
 76 | The following example output shows all the required messages when running with `--verbose`. Frameworks can emit more messages, but the following messages serve as a reasonable first milestone or baseline for good-enough logging to support users and downstream tooling. Most of these messages feel self-explanatory, but the specification can go into more detail over time as actual elicits a clearer vision of what's important.
 77 | 
 78 | ```
 79 | $ ./run-codemodder-codemod.sh --verbose -o /tmp/my_project.codetf /tmp/my_project
 80 | 
 81 | [startup] 
 82 | codemodder: javascript/1.2
 83 | 
 84 | [setup]
 85 | running: pixee:javascript/limit-readline, pixee:javascript/secure-random
 86 | including paths: **/*.js
 87 | excluding paths: **/tests/**,**/test.js
 88 | 
 89 | [scanning]
 90 | running codemod: pixee:javascript/limit-readline
 91 | scanned: 100 files...
 92 | scanned: 200 files...
 93 | scanned: 300 files...
 94 | failed:
 95 |   - /tmp/my_project/file_a
 96 |   - /tmp/my_project/file_b
 97 |   - /tmp/my_project/file_c
 98 | changed:
 99 |   - /tmp/my_project/file_d:57,59
100 |     diff:
101 |       --- frokm.txt   2015-03-17 04:34:47.076997087 -0430
102 |       +++ to.txt      2015-03-17 04:35:27.872996388 -0430
103 |       @@ -1,2 +1,2 @@
104 |        a
105 |       -b
106 |       +c
107 |   
108 |   - /tmp/my_project/file_e:194
109 |     diff:
110 |       --- frokm.txt   2015-03-17 04:34:47.076997087 -0430
111 |       +++ to.txt      2015-03-17 04:35:27.872996388 -0430
112 |       @@ -1,2 +1,2 @@
113 |        a
114 |       -b
115 |       +c
116 | 
117 | running codemod: pixee:javascript/secure-random
118 | scanned: 100 files...
119 | scanned: 200 files...  
120 | scanned: 300 files...
121 | failed:
122 |   - /tmp/my_project/file_a
123 |   - /tmp/my_project/file_b
124 |   - /tmp/my_project/file_c
125 | changed:
126 |   - /tmp/my_project/file_d:81
127 |     diff:
128 |       --- frokm.txt   2015-03-17 04:34:47.076997087 -0430
129 |       +++ to.txt      2015-03-17 04:35:27.872996388 -0430
130 |       @@ -1,2 +1,2 @@
131 |        a
132 |       -b
133 |       +c
134 | 
135 | [report]
136 | scanned: 356 files
137 | failed: 6 files (3 unique)
138 | changed: 3 files (3 unique)
139 | report file: /tmp/my_project.codetf
140 | elapsed: 14050ms 
141 | ```
142 | 


--------------------------------------------------------------------------------
/scripts/requirements.txt:
--------------------------------------------------------------------------------
1 | click~=8.0.0
2 | jsonschema~=4.21.0
3 | 


--------------------------------------------------------------------------------
/scripts/validate_schema.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python3
 2 | import json
 3 | 
 4 | import click
 5 | import jsonschema
 6 | 
 7 | 
 8 | @click.command()
 9 | @click.argument('schema_file', type=click.File('r'))
10 | def validate_schema(schema_file):
11 |     schema = json.load(schema_file)
12 |     jsonschema.Draft202012Validator.check_schema(schema)
13 |     print('✅ Schema is valid')
14 | 
15 | 
16 | if __name__ == '__main__':
17 |     validate_schema()
18 | 


--------------------------------------------------------------------------------