├── tests ├── __init__.py ├── codemods │ ├── __init__.py │ ├── conftest.py │ ├── sonar │ │ ├── test_sonar_exception_without_raise.py │ │ ├── test_sonar_literal_or_new_object_identity.py │ │ ├── test_sonar_numpy_nan_equality.py │ │ ├── test_sonar_break_or_continue_out_of_loop.py │ │ ├── test_sonar_fix_float_equality.py │ │ └── test_sonar_fix_math_isclose.py │ ├── test_use_set_literal.py │ ├── test_django_debug_flag_on.py │ ├── test_remove_unnecessary_f_str.py │ └── test_limit_readline.py ├── transformations │ ├── __init__.py │ └── test_remove_unused_imports.py ├── project_analysis │ ├── __init__.py │ ├── file_parsers │ │ └── __init__.py │ └── test_python_repo_manager.py ├── dependency_management │ └── __init__.py ├── samples │ ├── fix_assert_tuple.py │ ├── break_or_continue_out_of_loop.py │ ├── fix_float_equality.py │ ├── exception_without_raise.py │ ├── literal_or_new_object_identity.py │ ├── tempfile_mktemp.py │ ├── secure_random.py │ ├── fix_math_isclose.py │ ├── numpy_nan_equality.py │ ├── use_secure_protocols.py │ ├── multiple_codemods.py │ ├── jinja2_autoescape.py │ ├── make_request.py │ ├── remove_assertion_in_pytest_raises.py │ ├── fix_missing_self_or_cls.py │ ├── django_json_response_type.py │ ├── flask_request.py │ ├── django_receiver_on_top.py │ ├── secure_cookie.py │ ├── flask_json_response_type.py │ ├── fix_sonar_sql_parameterization.py │ ├── django_model.py │ ├── jwt_decode_verify.py │ └── disable_graphql_introspection.py ├── test_version.py ├── test_file_context.py ├── test_logging.py ├── test_llm.py ├── test_codemod_docs.py ├── test_semgrep.py └── test_registry.py ├── integration_tests ├── __init__.py ├── README.md ├── conftest.py ├── test_fix_assert_tuple.py ├── test_fix_empty_sequence_comparison.py ├── test_unnecessary_f_str.py ├── test_use_set_literal.py ├── test_remove_module_global.py ├── test_remove_debug_breakpoint.py ├── sonar │ ├── test_sonar_fix_assert_tuple.py │ ├── test_sonar_fix_math_isclose.py │ ├── test_sonar_tempfile_mktemp.py │ ├── test_sonar_secure_random.py │ ├── test_sonar_numpy_nan_equality.py │ ├── test_sonar_exception_without_raise.py │ ├── test_sonar_url_sandbox.py │ ├── test_sonar_break_or_continue_out_of_loop.py │ ├── test_sonar_use_secure_protocols.py │ ├── test_sonar_fix_missing_self_or_cls.py │ ├── test_sonar_literal_or_new_object_identity.py │ ├── test_sonar_jinja2_autoescape.py │ ├── test_sonar_fix_float_equality.py │ ├── test_sonar_secure_cookie.py │ ├── test_sonar_sql_parameterization.py │ ├── test_sonar_django_receiver_on_top.py │ ├── test_sonar_django_json_response_type.py │ ├── test_sonar_remove_assertion_in_pytest_raises.py │ ├── test_sonar_flask_json_response_type.py │ └── test_sonar_jwt_decode_verify.py ├── test_remove_unused_imports.py ├── test_lxml_safe_parser_defaults.py ├── test_upgrade_sslcontext_tls.py ├── test_fix_deprecated_logging_warn.py ├── test_combine_startswith_endswith.py ├── test_combine_isinstance_issubclass.py ├── test_with_threading_lock.py ├── test_tempfile_mktemp.py ├── test_use_generator.py ├── test_django_debug_flag_on.py ├── test_remove_future_imports.py ├── test_limit_readline.py ├── test_fix_hasattr_call.py ├── test_subprocess_shell_false.py ├── test_break_or_continue_out_of_loop.py ├── test_harden_ruamel.py ├── test_exception_without_raise.py ├── test_literal_or_new_object_identity.py ├── test_django_session_cookie_secure_off.py ├── test_secure_random.py ├── test_secure_flask_session_config.py ├── test_numpy_nan_equality.py ├── test_fix_math_isclose.py ├── test_lazy_logging.py ├── test_fix_float_equality.py ├── test_jinja2_autoescape.py ├── test_lxml_safe_parsing.py ├── test_str_concat_in_seq_literals.py ├── test_harden_pyyaml.py ├── test_secure_flask_cookie.py ├── test_upgrade_sslcontext_minimum_version.py ├── test_remove_assertion_in_pytest_raises.py ├── test_file_resource_leak.py ├── test_use_walrus_if.py ├── test_django_json_response_type.py ├── test_django_receiver_on_top.py ├── test_fix_task_instantiation.py ├── test_request_verify.py ├── test_fix_missing_self_or_cls.py ├── test_flask_json_response_type.py └── test_fix_mutable_params.py ├── src ├── codemodder │ ├── codemods │ │ ├── __init__.py │ │ ├── transformations │ │ │ └── __init__.py │ │ ├── test │ │ │ ├── __init__.py │ │ │ └── validations.py │ │ ├── base_detector.py │ │ └── codeql.py │ ├── scripts │ │ └── __init__.py │ ├── project_analysis │ │ ├── __init__.py │ │ └── file_parsers │ │ │ ├── utils.py │ │ │ ├── __init__.py │ │ │ ├── setup_cfg_file_parser.py │ │ │ └── base_parser.py │ ├── codetf │ │ └── __init__.py │ ├── dependency_management │ │ └── __init__.py │ ├── __init__.py │ └── utils │ │ ├── abc_dataclass.py │ │ ├── timer.py │ │ └── update_finding_metadata.py └── core_codemods │ ├── docs │ ├── __init__.py │ ├── pixee_python_unused-imports.md │ ├── pixee_python_invert-boolean-check.md │ ├── pixee_python_break-or-continue-out-of-loop.md │ ├── pixee_python_use-set-literal.md │ ├── pixee_python_exception-without-raise.md │ ├── pixee_python_remove-module-global.md │ ├── pixee_python_numpy-nan-equality.md │ ├── pixee_python_str-concat-in-sequence-literals.md │ ├── pixee_python_https-connection.md │ ├── pixee_python_fix-hasattr-call.md │ ├── pixee_python_remove-debug-breakpoint.md │ ├── pixee_python_fix-assert-tuple.md │ ├── pixee_python_remove-unnecessary-f-str.md │ ├── pixee_python_django-session-cookie-secure-off.md │ ├── pixee_python_literal-or-new-object-identity.md │ ├── pixee_python_fix-async-task-instantiation.md │ ├── pixee_python_django-debug-flag-on.md │ ├── pixee_python_harden-ruamel.md │ ├── pixee_python_fix-missing-self-or-cls.md │ ├── pixee_python_use-walrus-if.md │ ├── pixee_python_limit-readline.md │ ├── pixee_python_fix-deprecated-logging-warn.md │ ├── defectdojo_python_django-secure-set-cookie.md │ ├── pixee_python_fix-deprecated-abstractproperty.md │ ├── pixee_python_bad-lock-with-statement.md │ ├── pixee_python_fix-empty-sequence-comparison.md │ ├── pixee_python_subprocess-shell-false.md │ ├── pixee_python_secure-flask-session-configuration.md │ ├── pixee_python_enable-jinja2-autoescape.md │ ├── pixee_python_remove-future-imports.md │ ├── pixee_python_combine-isinstance-issubclass.md │ ├── pixee_python_combine-startswith-endswith.md │ ├── pixee_python_lazy-logging.md │ ├── pixee_python_secure-flask-cookie.md │ ├── pixee_python_remove-assertion-in-pytest-raises.md │ ├── pixee_python_safe-lxml-parsing.md │ ├── pixee_python_django-receiver-on-top.md │ ├── pixee_python_fix-math-isclose.md │ ├── pixee_python_upgrade-sslcontext-minimum-version.md │ ├── pixee_python_timezone-aware-datetime.md │ ├── pixee_python_fix-file-resource-leak.md │ ├── pixee_python_django-json-response-type.md │ ├── pixee_python_sql-parameterization.md │ ├── pixee_python_flask-json-response-type.md │ ├── pixee_python_fix-float-equality.md │ ├── pixee_python_jwt-decode-verify.md │ ├── pixee_python_secure-random.md │ ├── sonar_python_use-secure-protocols.md │ ├── pixee_python_use-defusedxml.md │ ├── pixee_python_add-requests-timeouts.md │ ├── pixee_python_secure-tempfile.md │ ├── pixee_python_fix-dataclass-defaults.md │ ├── pixee_python_replace-flask-send-file.md │ ├── pixee_python_flask-enable-csrf-protection.md │ ├── pixee_python_upgrade-sslcontext-tls.md │ ├── pixee_python_requests-verify.md │ ├── pixee_python_safe-lxml-parser-defaults.md │ ├── pixee_python_harden-pickle-load.md │ ├── pixee_python_harden-pyyaml.md │ └── pixee_python_disable-graphql-introspection.md │ ├── refactor │ └── __init__.py │ ├── api │ ├── __init__.py │ └── core_codemod.py │ ├── sonar │ ├── sonar_fix_assert_tuple.py │ ├── sonar_url_sandbox.py │ ├── sonar_secure_random.py │ ├── sonar_tempfile_mktemp.py │ ├── sonar_fix_float_equality.py │ ├── sonar_invert_boolean_check.py │ ├── sonar_numpy_nan_equality.py │ ├── sonar_exception_without_raise.py │ ├── sonar_fix_missing_self_or_cls.py │ ├── sonar_sandbox_process_creation.py │ ├── sonar_sql_parameterization.py │ ├── sonar_enable_jinja2_autoescape.py │ ├── sonar_flask_json_response_type.py │ ├── sonar_break_or_continue_out_of_loop.py │ ├── sonar_django_json_response_type.py │ ├── sonar_disable_graphql_introspection.py │ ├── sonar_django_model_without_dunder_str.py │ ├── sonar_django_receiver_on_top.py │ ├── sonar_literal_or_new_object_identity.py │ ├── sonar_timezone_aware_datetime.py │ ├── sonar_remove_assertion_in_pytest_raises.py │ ├── sonar_jwt_decode_verify.py │ └── sonar_fix_math_isclose.py │ ├── semgrep │ ├── semgrep_use_defused_xml.py │ ├── semgrep_sandbox_process_creation.py │ ├── semgrep_subprocess_shell_false.py │ ├── semgrep_enable_jinja2_autoescape.py │ ├── semgrep_django_secure_set_cookie.py │ ├── semgrep_url_sandbox.py │ ├── semgrep_jwt_decode_verify.py │ └── semgrep_harden_pyyaml.py │ ├── secure_cookie_mixin.py │ ├── fix_hasattr_call.py │ ├── combine_isinstance_issubclass.py │ ├── remove_module_global.py │ ├── limit_readline.py │ ├── use_set_literal.py │ ├── combine_startswith_endswith.py │ ├── django_debug_flag_on.py │ └── remove_debug_breakpoint.py ├── .github ├── CODEOWNERS ├── pixeebot.yaml ├── pull_request_template.md └── workflows │ ├── sonar_pixee.yml │ ├── pre-commit-autoupdate.yml │ ├── autoformat-pixeebot-prs.yaml │ ├── deploy_to_pypi.yml │ ├── integration_test.yml │ ├── codemod_pygoat.yml │ └── lint.yml ├── .sonarcloud.properties ├── img ├── codemodder.png ├── base-codemod.jpg ├── codemodder-dark.png └── codemodder-light.png ├── Dockerfile ├── MANIFEST.in ├── codecov.yaml ├── .coveragerc ├── .semgrepignore ├── renovate.json ├── Makefile ├── ci_tests └── test_pygoat_findings.py └── CONTRIBUTING.md /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/codemods/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /integration_tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/transformations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codemodder/codemods/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codemodder/scripts/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/core_codemods/docs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/core_codemods/refactor/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/project_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codemodder/project_analysis/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/dependency_management/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/project_analysis/file_parsers/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codemodder/codemods/transformations/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/codemodder/project_analysis/file_parsers/utils.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.github/CODEOWNERS: -------------------------------------------------------------------------------- 1 | * @drdavella @andrecsilva @clavedeluna 2 | -------------------------------------------------------------------------------- /.sonarcloud.properties: -------------------------------------------------------------------------------- 1 | sonar.exclusions=tests/samples/** 2 | -------------------------------------------------------------------------------- /tests/samples/fix_assert_tuple.py: -------------------------------------------------------------------------------- 1 | assert (1 == 1, 2 == 2) 2 | -------------------------------------------------------------------------------- /src/codemodder/codetf/__init__.py: -------------------------------------------------------------------------------- 1 | from .v2.codetf import * # noqa: F403 2 | -------------------------------------------------------------------------------- /tests/samples/break_or_continue_out_of_loop.py: -------------------------------------------------------------------------------- 1 | def f(): 2 | continue 3 | -------------------------------------------------------------------------------- /tests/samples/fix_float_equality.py: -------------------------------------------------------------------------------- 1 | def foo(a, b): 2 | return a == b - 0.1 3 | -------------------------------------------------------------------------------- /tests/samples/exception_without_raise.py: -------------------------------------------------------------------------------- 1 | try: 2 | ValueError 3 | except: 4 | pass 5 | -------------------------------------------------------------------------------- /tests/samples/literal_or_new_object_identity.py: -------------------------------------------------------------------------------- 1 | def foo(l): 2 | return l is [1,2,3] 3 | -------------------------------------------------------------------------------- /tests/samples/tempfile_mktemp.py: -------------------------------------------------------------------------------- 1 | import tempfile 2 | 3 | filename = tempfile.mktemp() 4 | -------------------------------------------------------------------------------- /img/codemodder.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixee/codemodder-python/HEAD/img/codemodder.png -------------------------------------------------------------------------------- /img/base-codemod.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixee/codemodder-python/HEAD/img/base-codemod.jpg -------------------------------------------------------------------------------- /tests/samples/secure_random.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | random.random() 4 | random.getrandbits(1) 5 | -------------------------------------------------------------------------------- /img/codemodder-dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixee/codemodder-python/HEAD/img/codemodder-dark.png -------------------------------------------------------------------------------- /img/codemodder-light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pixee/codemodder-python/HEAD/img/codemodder-light.png -------------------------------------------------------------------------------- /tests/samples/fix_math_isclose.py: -------------------------------------------------------------------------------- 1 | import math 2 | 3 | 4 | def foo(a): 5 | return math.isclose(a, 0) 6 | -------------------------------------------------------------------------------- /tests/samples/numpy_nan_equality.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | a = np.nan 4 | if a == np.nan: 5 | pass 6 | -------------------------------------------------------------------------------- /tests/samples/use_secure_protocols.py: -------------------------------------------------------------------------------- 1 | import ftplib 2 | import smtplib 3 | 4 | url = "http://example.com" 5 | -------------------------------------------------------------------------------- /src/codemodder/dependency_management/__init__.py: -------------------------------------------------------------------------------- 1 | from .dependency_manager import DependencyManager # noqa: F401 2 | -------------------------------------------------------------------------------- /tests/samples/multiple_codemods.py: -------------------------------------------------------------------------------- 1 | import random 2 | 3 | 4 | def func(foo=[]): 5 | return random.random() 6 | -------------------------------------------------------------------------------- /tests/samples/jinja2_autoescape.py: -------------------------------------------------------------------------------- 1 | from jinja2 import Environment 2 | 3 | env = Environment() 4 | env = Environment(autoescape=False) 5 | -------------------------------------------------------------------------------- /tests/test_version.py: -------------------------------------------------------------------------------- 1 | from codemodder import __version__ 2 | 3 | 4 | def test_version(): 5 | assert __version__ != "unknown" 6 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.14 2 | WORKDIR /codemodder 3 | COPY . . 4 | 5 | RUN pip install . 6 | 7 | ENTRYPOINT ["codemodder"] 8 | CMD ["--help"] 9 | -------------------------------------------------------------------------------- /integration_tests/README.md: -------------------------------------------------------------------------------- 1 | # Integration tests 2 | 3 | These tests should use minimal (none is best) amount of mocking to tells the full program. 4 | -------------------------------------------------------------------------------- /tests/samples/make_request.py: -------------------------------------------------------------------------------- 1 | from test_sources import untrusted_data 2 | import requests 3 | 4 | url = untrusted_data() 5 | requests.get(url) 6 | var = "hello" 7 | -------------------------------------------------------------------------------- /.github/pixeebot.yaml: -------------------------------------------------------------------------------- 1 | codemods: 2 | prepend: 3 | - pixee:python/use-walrus-if 4 | exclude: 5 | - pixee:python/sandbox-process-creation 6 | - pixee:python/url-sandbox 7 | -------------------------------------------------------------------------------- /MANIFEST.in: -------------------------------------------------------------------------------- 1 | include README.md 2 | include CHANGELOG.md 3 | include LICENSE 4 | 5 | recursive-include src/core_codemods/semgrep *.yaml 6 | recursive-include src/core_codemods/docs *.md 7 | -------------------------------------------------------------------------------- /src/core_codemods/api/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa: F401 2 | from codemodder.codemods.api import Metadata, Reference, ReviewGuidance 3 | 4 | from .core_codemod import CoreCodemod, SASTCodemod, SimpleCodemod 5 | -------------------------------------------------------------------------------- /tests/samples/remove_assertion_in_pytest_raises.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | def test_foo(): 4 | with pytest.raises(ZeroDivisionError): 5 | error = 1/0 6 | assert 1 7 | assert 2 8 | -------------------------------------------------------------------------------- /tests/samples/fix_missing_self_or_cls.py: -------------------------------------------------------------------------------- 1 | class MyClass: 2 | def instance_method(): 3 | print("instance_method") 4 | 5 | @classmethod 6 | def class_method(): 7 | print("class_method") 8 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_unused-imports.md: -------------------------------------------------------------------------------- 1 | Removes unused imports from a module. Imports involving the `__future__` module are ignored. 2 | 3 | ```diff 4 | - import a 5 | import b 6 | 7 | b.function() 8 | ``` 9 | -------------------------------------------------------------------------------- /codecov.yaml: -------------------------------------------------------------------------------- 1 | coverage: 2 | status: 3 | project: 4 | default: 5 | target: 90% 6 | patch: 7 | default: 8 | target: 90% 9 | 10 | comment: 11 | layout: "reach, diff, flags, files" 12 | -------------------------------------------------------------------------------- /src/codemodder/__init__.py: -------------------------------------------------------------------------------- 1 | try: 2 | from ._version import __version__ 3 | except ImportError: # pragma: no cover 4 | __version__ = "unknown" 5 | 6 | from codemodder.codemodder import run 7 | 8 | __all__ = ["run", "__version__"] 9 | -------------------------------------------------------------------------------- /tests/samples/django_json_response_type.py: -------------------------------------------------------------------------------- 1 | from django.http import HttpResponse 2 | import json 3 | 4 | def foo(request): 5 | json_response = json.dumps({ "user_input": request.GET.get("input") }) 6 | return HttpResponse(json_response) 7 | -------------------------------------------------------------------------------- /tests/samples/flask_request.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from flask import Flask, request 3 | 4 | app = Flask(__name__) 5 | 6 | 7 | @app.route("/example") 8 | def example(): 9 | url = request.args["url"] 10 | requests.get(url) 11 | -------------------------------------------------------------------------------- /tests/samples/django_receiver_on_top.py: -------------------------------------------------------------------------------- 1 | from django.dispatch import receiver 2 | from django.views.decorators.csrf import csrf_exempt 3 | from django.core.signals import request_finished 4 | 5 | @csrf_exempt 6 | @receiver(request_finished) 7 | def foo(): 8 | pass 9 | -------------------------------------------------------------------------------- /src/codemodder/codemods/test/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa: F401 2 | from .integration_utils import BaseIntegrationTest, SonarIntegrationTest 3 | from .utils import ( 4 | BaseCodemodTest, 5 | BaseDjangoCodemodTest, 6 | BaseSASTCodemodTest, 7 | DiffError, 8 | ) 9 | -------------------------------------------------------------------------------- /tests/samples/secure_cookie.py: -------------------------------------------------------------------------------- 1 | from flask import Flask, session, make_response 2 | 3 | app = Flask(__name__) 4 | 5 | @app.route('/') 6 | def index(): 7 | resp = make_response('Custom Cookie Set') 8 | resp.set_cookie('custom_cookie', 'value') 9 | return resp 10 | -------------------------------------------------------------------------------- /src/codemodder/project_analysis/file_parsers/__init__.py: -------------------------------------------------------------------------------- 1 | # ruff: noqa: F401 2 | from .pyproject_toml_file_parser import PyprojectTomlParser 3 | from .requirements_txt_file_parser import RequirementsTxtParser 4 | from .setup_cfg_file_parser import SetupCfgParser 5 | from .setup_py_file_parser import SetupPyParser 6 | -------------------------------------------------------------------------------- /tests/samples/flask_json_response_type.py: -------------------------------------------------------------------------------- 1 | from flask import make_response, Flask 2 | import json 3 | 4 | app = Flask(__name__) 5 | 6 | @app.route("/test") 7 | def foo(request): 8 | json_response = json.dumps({ "user_input": request.GET.get("input") }) 9 | return make_response(json_response) 10 | -------------------------------------------------------------------------------- /.github/pull_request_template.md: -------------------------------------------------------------------------------- 1 | ## Overview 2 | *One sentence, high-level explanation of WHY changes are introduced and/or business need* 3 | 4 | ## Description 5 | 6 | * What/WHY/how these changes are needed 7 | 8 | ## Additional Details 9 | * Any follow up tickets or discussion 10 | * Any specific merge / deploy details 11 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_invert-boolean-check.md: -------------------------------------------------------------------------------- 1 | This codemod flips boolean `not` comparisons to their more readable equivalent comparisons. 2 | 3 | The changes from this codemod look like this: 4 | 5 | ```diff 6 | - assert not user_input == "yes" 7 | - z = not m <= n 8 | + assert user_input != "yes" 9 | + z = m > n 10 | ``` 11 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_break-or-continue-out-of-loop.md: -------------------------------------------------------------------------------- 1 | Any `break` or `continue` statements that are not inside a `for` or `while` loop will result in a `SyntaxError`. This codemod will remove them. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | def f(): 7 | print('not in a loop') 8 | - break 9 | ``` 10 | -------------------------------------------------------------------------------- /tests/codemods/conftest.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | 4 | @pytest.fixture(autouse=True) 5 | def disable_semgrep_run(): 6 | """ 7 | Override the fixture defined in conftest.py 8 | """ 9 | 10 | 11 | @pytest.fixture(autouse=True) 12 | def disable_update_code(): 13 | """ 14 | Override the fixture defined in conftest.py 15 | """ 16 | -------------------------------------------------------------------------------- /tests/project_analysis/test_python_repo_manager.py: -------------------------------------------------------------------------------- 1 | from codemodder.project_analysis.python_repo_manager import PythonRepoManager 2 | 3 | 4 | class TestPythonRepoManager: 5 | def test_package_stores(self, pkg_with_reqs_txt): 6 | rm = PythonRepoManager(pkg_with_reqs_txt) 7 | stores = rm.package_stores 8 | assert len(stores) == 1 9 | -------------------------------------------------------------------------------- /.coveragerc: -------------------------------------------------------------------------------- 1 | [run] 2 | source = codemodder 3 | patch = subprocess 4 | omit = 5 | */codemodder/scripts/* 6 | */codemodder/_version.py 7 | */core_codemods/refactor/* 8 | 9 | [paths] 10 | codemodder = 11 | */src/codemodder 12 | */site-packages/codemodder 13 | core_codemods = 14 | */src/core_codemods 15 | */site-packages/core_codemods 16 | -------------------------------------------------------------------------------- /integration_tests/conftest.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | import requests 5 | 6 | 7 | @pytest.fixture(scope="session") 8 | def codetf_schema(): 9 | schema_path = "https://raw.githubusercontent.com/pixee/codemodder-specs/main/codetf.schema.json" 10 | response = requests.get(schema_path, timeout=60) 11 | yield json.loads(response.text) 12 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_use-set-literal.md: -------------------------------------------------------------------------------- 1 | This codemod converts Python set constructions using literal list arguments into more efficient and readable set literals. It simplifies expressions like `set([1, 2, 3])` to `{1, 2, 3}`, enhancing both performance and code clarity. 2 | 3 | Our changes look like this: 4 | ```diff 5 | -x = set([1, 2, 3]) 6 | +x = {1, 2, 3} 7 | ``` 8 | -------------------------------------------------------------------------------- /.semgrepignore: -------------------------------------------------------------------------------- 1 | # This is the file that will be used for the codemodder internal semgrep run. 2 | 3 | # Common large paths 4 | node_modules/ 5 | build/ 6 | dist/ 7 | vendor/ 8 | .env/ 9 | .venv/ 10 | .tox/ 11 | *.min.js 12 | .npm/ 13 | 14 | # Common test paths 15 | # NONE 16 | 17 | # Semgrep rules folder 18 | .semgrep 19 | 20 | # Semgrep-action log folder 21 | .semgrep_logs/ 22 | -------------------------------------------------------------------------------- /tests/samples/fix_sonar_sql_parameterization.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | 3 | from flask import Flask, request 4 | 5 | app = Flask(__name__) 6 | 7 | 8 | @app.route("/example") 9 | def f(): 10 | user = request.args["user"] 11 | sql = """SELECT user FROM users WHERE user = \'%s\'""" 12 | 13 | conn = sqlite3.connect("example") 14 | conn.cursor().execute(sql % (user)) 15 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_fix_assert_tuple.py: -------------------------------------------------------------------------------- 1 | from core_codemods.fix_assert_tuple import FixAssertTuple 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarFixAssertTuple = SonarCodemod.from_core_codemod( 5 | name="fix-assert-tuple", 6 | other=FixAssertTuple, 7 | rule_id="python:S5905", 8 | rule_name="Assert should not be called on a tuple literal", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_url_sandbox.py: -------------------------------------------------------------------------------- 1 | from core_codemods.sonar.api import SonarCodemod 2 | from core_codemods.url_sandbox import UrlSandbox 3 | 4 | SonarUrlSandbox = SonarCodemod.from_core_codemod( 5 | name="url-sandbox", 6 | other=UrlSandbox, 7 | rule_id="pythonsecurity:S5144", 8 | rule_name="Server-side requests should not be vulnerable to forging attacks", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_secure_random.py: -------------------------------------------------------------------------------- 1 | from core_codemods.secure_random import SecureRandom 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarSecureRandom = SonarCodemod.from_core_codemod( 5 | name="secure-random", 6 | other=SecureRandom, 7 | rule_id="python:S2245", 8 | rule_name="Using pseudorandom number generators (PRNGs) is security-sensitive", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_exception-without-raise.md: -------------------------------------------------------------------------------- 1 | This codemod fixes cases where an exception is referenced by itself in a statement without being raised. This most likely indicates a bug: you probably meant to actually raise the exception. 2 | 3 | Our changes look something like this: 4 | ```diff 5 | try: 6 | - ValueError 7 | + raise ValueError 8 | except: 9 | pass 10 | ``` 11 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_tempfile_mktemp.py: -------------------------------------------------------------------------------- 1 | from core_codemods.sonar.api import SonarCodemod 2 | from core_codemods.tempfile_mktemp import TempfileMktemp 3 | 4 | SonarTempfileMktemp = SonarCodemod.from_core_codemod( 5 | name="secure-tempfile", 6 | other=TempfileMktemp, 7 | rule_id="python:S5445", 8 | rule_name="Insecure temporary file creation methods should not be used", 9 | ) 10 | -------------------------------------------------------------------------------- /src/codemodder/codemods/base_detector.py: -------------------------------------------------------------------------------- 1 | from abc import ABCMeta, abstractmethod 2 | 3 | from codemodder.context import CodemodExecutionContext 4 | from codemodder.result import ResultSet 5 | 6 | 7 | class BaseDetector(metaclass=ABCMeta): 8 | @abstractmethod 9 | def apply( 10 | self, 11 | codemod_id: str, 12 | context: CodemodExecutionContext, 13 | ) -> ResultSet: ... 14 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_fix_float_equality.py: -------------------------------------------------------------------------------- 1 | from core_codemods.fix_float_equality import FixFloatEquality 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarFixFloatEquality = SonarCodemod.from_core_codemod( 5 | name="fix-float-equality", 6 | other=FixFloatEquality, 7 | rule_id="python:S1244", 8 | rule_name="Floating point numbers should not be tested for equality", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_invert_boolean_check.py: -------------------------------------------------------------------------------- 1 | from core_codemods.invert_boolean_check import InvertedBooleanCheck 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarInvertedBooleanCheck = SonarCodemod.from_core_codemod( 5 | name="invert-boolean-check", 6 | other=InvertedBooleanCheck, 7 | rule_id="python:S1940", 8 | rule_name="Boolean checks should not be inverted", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_numpy_nan_equality.py: -------------------------------------------------------------------------------- 1 | from core_codemods.numpy_nan_equality import NumpyNanEquality 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarNumpyNanEquality = SonarCodemod.from_core_codemod( 5 | name="numpy-nan-equality", 6 | other=NumpyNanEquality, 7 | rule_id="python:S6725", 8 | rule_name="Equality checks should not be made against `numpy.nan`", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_remove-module-global.md: -------------------------------------------------------------------------------- 1 | Using the `global` keyword is necessary only when you intend to modify a module-level (aka global) variable within a non-global scope, such as within a class or function. It is unnecessary to call `global` at the module-level. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | price = 25 7 | print("hello") 8 | - global price 9 | price = 30 10 | ``` 11 | -------------------------------------------------------------------------------- /renovate.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://docs.renovatebot.com/renovate-schema.json", 3 | "extends": [ 4 | "config:recommended", 5 | "group:allNonMajor" 6 | ], 7 | "packageRules": [ 8 | { 9 | "matchPackageNames": ["pydantic"], 10 | "enabled": false 11 | }, 12 | { 13 | "matchPackageNames": ["numpy"], 14 | "matchCurrentValue": "==2.2.6", 15 | "enabled": false 16 | } 17 | ] 18 | } 19 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_exception_without_raise.py: -------------------------------------------------------------------------------- 1 | from core_codemods.exception_without_raise import ExceptionWithoutRaise 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarExceptionWithoutRaise = SonarCodemod.from_core_codemod( 5 | name="exception-without-raise", 6 | other=ExceptionWithoutRaise, 7 | rule_id="python:S3984", 8 | rule_name="Exceptions should not be created without being raised", 9 | ) 10 | -------------------------------------------------------------------------------- /tests/test_file_context.py: -------------------------------------------------------------------------------- 1 | from codemodder.file_context import FileContext 2 | 3 | 4 | def test_file_context(mocker): 5 | directory = mocker.MagicMock() 6 | path = mocker.MagicMock() 7 | file_context = FileContext(directory, path) 8 | assert file_context.base_directory is directory 9 | assert file_context.file_path is path 10 | assert file_context.line_exclude == [] 11 | assert file_context.line_include == [] 12 | -------------------------------------------------------------------------------- /tests/samples/django_model.py: -------------------------------------------------------------------------------- 1 | import django 2 | from django.conf import settings 3 | from django.db import models 4 | 5 | # required to run this module standalone for testing 6 | if not settings.configured: 7 | settings.configure() 8 | django.setup() 9 | 10 | 11 | class User(models.Model): 12 | name = models.CharField(max_length=100) 13 | phone = models.IntegerField(blank=True) 14 | class Meta: 15 | app_label = 'myapp' 16 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_numpy-nan-equality.md: -------------------------------------------------------------------------------- 1 | Comparisons against `numpy.nan` always result in `False`. Thus comparing an expression directly against `numpy.nan` is always unintended. The correct way to compare a value for `NaN` is to use the `numpy.isnan` function. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | import numpy as np 7 | 8 | a = np.nan 9 | -if a == np.nan: 10 | +if np.isnan(a): 11 | pass 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_fix_missing_self_or_cls.py: -------------------------------------------------------------------------------- 1 | from core_codemods.fix_missing_self_or_cls import FixMissingSelfOrCls 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarFixMissingSelfOrCls = SonarCodemod.from_core_codemod( 5 | name="fix-missing-self-or-cls", 6 | other=FixMissingSelfOrCls, 7 | rule_id="python:S5719", 8 | rule_name="Instance and class methods should have at least one positional parameter", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_sandbox_process_creation.py: -------------------------------------------------------------------------------- 1 | from core_codemods.process_creation_sandbox import ProcessSandbox 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarSandboxProcessCreation = SonarCodemod.from_core_codemod( 5 | name="sandbox-process-creation", 6 | other=ProcessSandbox(), 7 | rule_id="pythonsecurity:S2076", 8 | rule_name="OS commands should not be vulnerable to command injection attacks", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_sql_parameterization.py: -------------------------------------------------------------------------------- 1 | from core_codemods.sonar.api import SonarCodemod 2 | from core_codemods.sql_parameterization import SQLQueryParameterization 3 | 4 | SonarSQLParameterization = SonarCodemod.from_core_codemod( 5 | name="sql-parameterization", 6 | other=SQLQueryParameterization, 7 | rule_id="pythonsecurity:S3649", 8 | rule_name="Database queries should not be vulnerable to injection attacks", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_str-concat-in-sequence-literals.md: -------------------------------------------------------------------------------- 1 | This codemod fixes cases of implicit string concatenation inside lists, sets, or tuples. This is most likely a mistake: you probably meant include a comma in between the concatenated strings. 2 | 3 | Our changes look something like this: 4 | ```diff 5 | bad = [ 6 | - "ab" 7 | + "ab", 8 | "cd", 9 | "ef", 10 | - "gh" 11 | + "gh", 12 | "ij", 13 | ] 14 | ``` 15 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_enable_jinja2_autoescape.py: -------------------------------------------------------------------------------- 1 | from core_codemods.enable_jinja2_autoescape import EnableJinja2Autoescape 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarEnableJinja2Autoescape = SonarCodemod.from_core_codemod( 5 | name="enable-jinja2-autoescape", 6 | other=EnableJinja2Autoescape, 7 | rule_id="python:S5247", 8 | rule_name="Disabling auto-escaping in template engines is security-sensitive", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_flask_json_response_type.py: -------------------------------------------------------------------------------- 1 | from core_codemods.flask_json_response_type import FlaskJsonResponseType 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarFlaskJsonResponseType = SonarCodemod.from_core_codemod( 5 | name="flask-json-response-type", 6 | other=FlaskJsonResponseType, 7 | rule_id="pythonsecurity:S5131", 8 | rule_name="Endpoints should not be vulnerable to reflected XSS attacks (Flask)", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_break_or_continue_out_of_loop.py: -------------------------------------------------------------------------------- 1 | from core_codemods.break_or_continue_out_of_loop import BreakOrContinueOutOfLoop 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarBreakOrContinueOutOfLoop = SonarCodemod.from_core_codemod( 5 | name="break-or-continue-out-of-loop", 6 | other=BreakOrContinueOutOfLoop, 7 | rule_id="python:S1716", 8 | rule_name='"break" and "continue" should not be used outside a loop', 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_django_json_response_type.py: -------------------------------------------------------------------------------- 1 | from core_codemods.django_json_response_type import DjangoJsonResponseType 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarDjangoJsonResponseType = SonarCodemod.from_core_codemod( 5 | name="django-json-response-type", 6 | other=DjangoJsonResponseType, 7 | rule_id="pythonsecurity:S5131", 8 | rule_name="Endpoints should not be vulnerable to reflected XSS attacks (Django)", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_disable_graphql_introspection.py: -------------------------------------------------------------------------------- 1 | from core_codemods.disable_graphql_introspection import DisableGraphQLIntrospection 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarDisableGraphQLIntrospection = SonarCodemod.from_core_codemod( 5 | name="disable-graphql-introspection", 6 | other=DisableGraphQLIntrospection, 7 | rule_id="python:S6786", 8 | rule_name="GraphQL introspection should be disabled in production", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_django_model_without_dunder_str.py: -------------------------------------------------------------------------------- 1 | from core_codemods.django_model_without_dunder_str import DjangoModelWithoutDunderStr 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarDjangoModelWithoutDunderStr = SonarCodemod.from_core_codemod( 5 | name="django-model-without-dunder-str", 6 | other=DjangoModelWithoutDunderStr, 7 | rule_id="python:S6554", 8 | rule_name='Django models should define a "__str__" method', 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_django_receiver_on_top.py: -------------------------------------------------------------------------------- 1 | from core_codemods.django_receiver_on_top import DjangoReceiverOnTop 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarDjangoReceiverOnTop = SonarCodemod.from_core_codemod( 5 | name="django-receiver-on-top", 6 | other=DjangoReceiverOnTop, 7 | rule_id="python:S6552", 8 | rule_name="Django signal handler functions should have the `@receiver` decorator on top of all other decorators", 9 | ) 10 | -------------------------------------------------------------------------------- /tests/samples/jwt_decode_verify.py: -------------------------------------------------------------------------------- 1 | import jwt 2 | 3 | SECRET_KEY = "mysecretkey" 4 | payload = { 5 | "user_id": 123, 6 | "username": "john", 7 | } 8 | 9 | encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256") 10 | 11 | decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False) 12 | decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False}) 13 | 14 | var = "something" 15 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_https-connection.md: -------------------------------------------------------------------------------- 1 | This codemod replaces calls to `urllib3.connectionpool.HTTPConnectionPool` and `urllib3.HTTPConnectionPool` with their secure variant (`HTTPSConnectionPool`). 2 | 3 | Programmers should opt to use HTTPS over HTTP for secure encrypted communication whenever possible. 4 | 5 | ```diff 6 | import urllib3 7 | - urllib3.HTTPConnectionPool("www.example.com","80") 8 | + urllib3.HTTPSConnectionPool("www.example.com","80") 9 | ``` 10 | -------------------------------------------------------------------------------- /src/codemodder/utils/abc_dataclass.py: -------------------------------------------------------------------------------- 1 | from abc import ABC 2 | from dataclasses import dataclass 3 | 4 | 5 | @dataclass(frozen=True) 6 | class ABCDataclass(ABC): 7 | """Inspired by https://stackoverflow.com/a/60669138""" 8 | 9 | def __new__(cls, *args, **kwargs): 10 | del args, kwargs 11 | if cls == ABCDataclass or cls.__bases__[0] == ABCDataclass: 12 | raise TypeError("Cannot instantiate abstract class.") 13 | return super().__new__(cls) 14 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_literal_or_new_object_identity.py: -------------------------------------------------------------------------------- 1 | from core_codemods.literal_or_new_object_identity import LiteralOrNewObjectIdentity 2 | from core_codemods.sonar.api import SonarCodemod 3 | 4 | SonarLiteralOrNewObjectIdentity = SonarCodemod.from_core_codemod( 5 | name="literal-or-new-object-identity", 6 | other=LiteralOrNewObjectIdentity, 7 | rule_id="python:S5796", 8 | rule_name="New objects should not be created only to check their identity", 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-hasattr-call.md: -------------------------------------------------------------------------------- 1 | This codemod fixes cases where `hasattr` is used to check if an object is a callable. You likely want to use `callable` instead. This is because using `hasattr` will return different results in some cases, such as when the class implements a `__getattr__` method. 2 | 3 | Our changes look something like this: 4 | ```diff 5 | class Test: 6 | pass 7 | 8 | obj = Test() 9 | - hasattr(obj, "__call__") 10 | + callable(obj) 11 | ``` 12 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_remove-debug-breakpoint.md: -------------------------------------------------------------------------------- 1 | This codemod removes any calls to `breakpoint()` or `pdb.set_trace()` which are generally only used for interactive debugging and should not be deployed in production code. 2 | 3 | In most cases if these calls are included in committed code, they were left there by mistake and indicate a potential problem. 4 | 5 | Our changes look something like this: 6 | 7 | ```diff 8 | print("hello") 9 | - breakpoint() 10 | print("world") 11 | ``` 12 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-assert-tuple.md: -------------------------------------------------------------------------------- 1 | An assertion on a non-empty tuple will always evaluate to `True`. This means that `assert` statements involving non-empty tuple literals are likely unintentional and should be rewritten. This codemod rewrites the original `assert` statement by creating a new `assert` for each item in the original tuple. 2 | 3 | The changes from this codemod look like this: 4 | 5 | ```diff 6 | - assert (1 == 1, 2 == 2) 7 | + assert 1 == 1 8 | + assert 2 == 2 9 | ``` 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_remove-unnecessary-f-str.md: -------------------------------------------------------------------------------- 1 | This codemod converts any f-strings without interpolated variables into regular strings. 2 | In these cases the use of f-string is not necessary; a simple string literal is sufficient. 3 | 4 | While in some (extreme) cases we might expect a very modest performance 5 | improvement, in general this is a fix that improves the overall cleanliness and 6 | quality of your code. 7 | 8 | ```diff 9 | - var = f"hello" 10 | + var = "hello" 11 | ... 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_django-session-cookie-secure-off.md: -------------------------------------------------------------------------------- 1 | This codemod will set Django's `SESSION_COOKIE_SECURE` flag to `True` if it's `False` or missing on the `settings.py` file within Django's default directory structure. 2 | 3 | ```diff 4 | + SESSION_COOKIE_SECURE = True 5 | ``` 6 | 7 | Setting this flag on ensures that the session cookies are only sent under an HTTPS connection. Leaving this flag off may enable an attacker to use a sniffer to capture the unencrypted session cookie and hijack the user's session. 8 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_timezone_aware_datetime.py: -------------------------------------------------------------------------------- 1 | from core_codemods.sonar.api import SonarCodemod 2 | from core_codemods.timezone_aware_datetime import TimezoneAwareDatetime 3 | 4 | SonarTimezoneAwareDatetime = SonarCodemod.from_core_codemod( 5 | name="timezone-aware-datetime", 6 | other=TimezoneAwareDatetime, 7 | rule_id="python:S6903", 8 | rule_name='Using timezone-aware "datetime" objects should be preferred over using "datetime.datetime.utcnow" and "datetime.datetime.utcfromtimestamp"', 9 | ) 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_literal-or-new-object-identity.md: -------------------------------------------------------------------------------- 1 | The `is` and `is not` operators only evaluate to `True` when the expressions on each side have the same `id`. In other words, `a is b` is equivalent to `id(a) == id(b)`. With few exceptions, objects and literals have unique identities and thus shouldn't generally be compared by using the `is` or `is not` operators. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | def foo(l): 7 | - return l is [1,2,3] 8 | + return l == [1,2,3] 9 | ``` 10 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_remove_assertion_in_pytest_raises.py: -------------------------------------------------------------------------------- 1 | from core_codemods.remove_assertion_in_pytest_raises import ( 2 | RemoveAssertionInPytestRaises, 3 | ) 4 | from core_codemods.sonar.api import SonarCodemod 5 | 6 | SonarRemoveAssertionInPytestRaises = SonarCodemod.from_core_codemod( 7 | name="remove-assertion-in-pytest-raises", 8 | other=RemoveAssertionInPytestRaises, 9 | rule_id="python:S5915", 10 | rule_name="Assertions should not be made at the end of blocks expecting an exception", 11 | ) 12 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-async-task-instantiation.md: -------------------------------------------------------------------------------- 1 | The `asyncio` [documentation](https://docs.python.org/3/library/asyncio-task.html#asyncio.Task) explicitly discourages manual instantiation of a `Task` instance and instead recommends calling `create_task`. This keeps your code in line with recommended best practices and promotes maintainability. 2 | 3 | Our changes look like the following: 4 | ```diff 5 | import asyncio 6 | 7 | - task = asyncio.Task(my_coroutine(), name="my task") 8 | + task = asyncio.create_task(my_coroutine(), name="my task") 9 | ``` 10 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_django-debug-flag-on.md: -------------------------------------------------------------------------------- 1 | This codemod will flip Django's `DEBUG` flag to `False` if it's `True` on the `settings.py` file within Django's default directory structure. 2 | 3 | Having the debug flag on may result in sensitive information exposure. When an exception occurs while the `DEBUG` flag in on, it will dump metadata of your environment, including the settings module. The attacker can purposefully request a non-existing url to trigger an exception and gather information about your system. 4 | 5 | ```diff 6 | - DEBUG = True 7 | + DEBUG = False 8 | ``` 9 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_jwt_decode_verify.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.libcst_transformer import LibcstTransformerPipeline 2 | from core_codemods.jwt_decode_verify import ( 3 | JwtDecodeVerify, 4 | JwtDecodeVerifySASTTransformer, 5 | ) 6 | from core_codemods.sonar.api import SonarCodemod 7 | 8 | SonarJwtDecodeVerify = SonarCodemod.from_core_codemod( 9 | name="jwt-decode-verify", 10 | other=JwtDecodeVerify, 11 | rule_id="python:S5659", 12 | rule_name="JWT should be signed and verified", 13 | transformer=LibcstTransformerPipeline(JwtDecodeVerifySASTTransformer), 14 | ) 15 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_harden-ruamel.md: -------------------------------------------------------------------------------- 1 | This codemod hardens any unsafe [`ruamel.yaml.YAML()`](https://yaml.readthedocs.io/en/latest/) calls against attacks that could result from deserializing untrusted data. 2 | 3 | The fix uses a safety check that already exists in the `ruamel` module, replacing an unsafe `typ` argument with `typ="safe"`. 4 | The changes from this codemod look like this: 5 | 6 | ```diff 7 | from ruamel.yaml import YAML 8 | - serializer = YAML(typ="unsafe") 9 | - serializer = YAML(typ="base") 10 | + serializer = YAML(typ="safe") 11 | + serializer = YAML(typ="safe") 12 | ``` 13 | -------------------------------------------------------------------------------- /.github/workflows/sonar_pixee.yml: -------------------------------------------------------------------------------- 1 | name: "Publish Sonar JSON to Pixee" 2 | on: 3 | check_run: 4 | types: [completed] 5 | 6 | permissions: 7 | contents: read 8 | id-token: write 9 | 10 | jobs: 11 | share: 12 | name: Upload Sonar Results to Pixeebot 13 | runs-on: ubuntu-latest 14 | if: ${{ github.event.check_run.name == 'SonarCloud Code Analysis' }} 15 | steps: 16 | - uses: pixee/upload-tool-results-action@v2.5.0 17 | with: 18 | tool: sonar 19 | sonar-token: ${{ secrets.SONAR_TOKEN }} 20 | sonar-component-key: ${{ secrets.SONAR_COMPONENT_KEY }} 21 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-missing-self-or-cls.md: -------------------------------------------------------------------------------- 1 | Python instance methods must be defined with `self` as the first argument. Likewise, class methods must have `cls` as the first argument. This codemod will add these arguments when the method/class method has no arguments defined. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | class MyClass: 7 | - def instance_method(): 8 | + def instance_method(self): 9 | print("instance_method") 10 | 11 | @classmethod 12 | - def class_method(): 13 | + def class_method(cls): 14 | print("class_method") 15 | ``` 16 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_use_defused_xml.py: -------------------------------------------------------------------------------- 1 | from core_codemods.semgrep.api import SemgrepCodemod, ToolRule, semgrep_url_from_id 2 | from core_codemods.use_defused_xml import UseDefusedXml 3 | 4 | SemgrepUseDefusedXml = SemgrepCodemod.from_core_codemod( 5 | name="use-defusedxml", 6 | other=UseDefusedXml, 7 | rules=[ 8 | ToolRule( 9 | id=( 10 | rule_id := "python.lang.security.use-defused-xml-parse.use-defused-xml-parse" 11 | ), 12 | name="use-defused-xml-parse", 13 | url=semgrep_url_from_id(rule_id), 14 | ) 15 | ], 16 | ) 17 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_use-walrus-if.md: -------------------------------------------------------------------------------- 1 | This codemod updates places where two separate statements involving an assignment and conditional can be replaced with a single Assignment Expression (commonly known as the walrus operator). 2 | 3 | Many developers use this operator in new code that they write but don't have the time to find and update every place in existing code. So we do it for you! We believe this leads to more concise and readable code. 4 | 5 | The changes from this codemod look like this: 6 | 7 | ```diff 8 | - x = foo() 9 | - if x is not None: 10 | + if (x := foo()) is not None: 11 | print(x) 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_limit-readline.md: -------------------------------------------------------------------------------- 1 | This codemod hardens all [`readline()`](https://docs.python.org/3/library/io.html#io.IOBase.readline) calls from file objects returned from an `open()` call, `StringIO` and `BytesIO` against denial of service attacks. A stream influenced by an attacker could keep providing bytes until the system runs out of memory, causing a crash. 2 | 3 | Fixing it is straightforward by providing adding a size argument to any `readline()` calls. 4 | The changes from this codemod look like this: 5 | 6 | ```diff 7 | file = open('some_file.txt') 8 | - file.readline() 9 | + file.readline(5_000_000) 10 | ``` 11 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-deprecated-logging-warn.md: -------------------------------------------------------------------------------- 1 | The `warn` method from `logging` has been [deprecated](https://docs.python.org/3/library/logging.html#logging.Logger.warning) in favor of `warning` since Python 3.3. Since the old method `warn` has been retained for a long time, there are a lot of developers that are unaware of this change and consequently a lot of code using the older method. 2 | 3 | Our changes look like the following: 4 | ```diff 5 | import logging 6 | 7 | - logging.warn("hello") 8 | + logging.warning("hello") 9 | ... 10 | log = logging.getLogger("my logger") 11 | - log.warn("hello") 12 | + log.warning("hello") 13 | ``` 14 | -------------------------------------------------------------------------------- /src/core_codemods/docs/defectdojo_python_django-secure-set-cookie.md: -------------------------------------------------------------------------------- 1 | This codemod sets the most secure parameters when Django applications call `set_cookie` on a response object. Without these parameters, your Django application cookies may be vulnerable to being intercepted and used to gain access to sensitive data. 2 | 3 | The changes from this codemod look like this: 4 | 5 | ```diff 6 | from django.shortcuts import render 7 | def index(request): 8 | resp = render(request, 'index.html') 9 | - resp.set_cookie('custom_cookie', 'value') 10 | + resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax') 11 | return resp 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_sandbox_process_creation.py: -------------------------------------------------------------------------------- 1 | from core_codemods.process_creation_sandbox import ProcessSandbox 2 | from core_codemods.semgrep.api import SemgrepCodemod, ToolRule, semgrep_url_from_id 3 | 4 | SemgrepSandboxProcessCreation = SemgrepCodemod.from_core_codemod( 5 | name="sandbox-process-creation", 6 | other=ProcessSandbox(), 7 | rules=[ 8 | ToolRule( 9 | id=( 10 | rule_id := "python.lang.security.dangerous-system-call.dangerous-system-call" 11 | ), 12 | name="dangerous-system-call", 13 | url=semgrep_url_from_id(rule_id), 14 | ), 15 | ], 16 | ) 17 | -------------------------------------------------------------------------------- /tests/samples/disable_graphql_introspection.py: -------------------------------------------------------------------------------- 1 | from graphql_server.flask.views import GraphQLView 2 | from flask import Flask 3 | from graphql import ( 4 | GraphQLSchema, GraphQLObjectType, GraphQLField, GraphQLString) 5 | 6 | schema = GraphQLSchema( 7 | query=GraphQLObjectType( 8 | name='RootQueryType', 9 | fields={ 10 | 'hello': GraphQLField( 11 | GraphQLString, 12 | resolve=lambda obj, info: 'world') 13 | })) 14 | 15 | app = Flask(__name__) 16 | 17 | app.add_url_rule("/api", 18 | view_func=GraphQLView.as_view( 19 | name="api", 20 | schema=schema, 21 | ), 22 | ) 23 | -------------------------------------------------------------------------------- /integration_tests/test_fix_assert_tuple.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_assert_tuple import FixAssertTuple, FixAssertTupleTransform 3 | 4 | 5 | class TestFixAssertTuple(BaseIntegrationTest): 6 | codemod = FixAssertTuple 7 | original_code = """ 8 | assert (1 == 1, 2 == 2) 9 | """ 10 | replacement_lines = [(1, "assert 1 == 1\n"), (2, "assert 2 == 2\n")] 11 | expected_diff = "--- \n+++ \n@@ -1 +1,2 @@\n-assert (1 == 1, 2 == 2)\n+assert 1 == 1\n+assert 2 == 2\n" 12 | expected_line_change = "1" 13 | change_description = FixAssertTupleTransform.change_description 14 | num_changes = 2 15 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-deprecated-abstractproperty.md: -------------------------------------------------------------------------------- 1 | The `@abstractproperty`, `@abstractclassmethod`, and `@abstractstaticmethod` decorators from `abc` has been [deprecated](https://docs.python.org/3/library/abc.html) since Python 3.3. This is because it's possible to use `@property`, `@classmethod`, and `@staticmethod` in combination with `@abstractmethod`. 2 | 3 | Our changes look like the following: 4 | ```diff 5 | import abc 6 | 7 | class Foo: 8 | - @abc.abstractproperty 9 | + @property 10 | + @abc.abstractmethod 11 | def bar(): 12 | ... 13 | ``` 14 | 15 | and similarly for `@abstractclassmethod` and `@abstractstaticmethod`. 16 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_bad-lock-with-statement.md: -------------------------------------------------------------------------------- 1 | This codemod separates creating a threading lock instance from calling it as a context manager. Calling `with threading.Lock()` does not have the effect you would expect. The lock is not acquired. Instead, to correctly acquire a lock, create the instance separately, before calling it as a context manager. 2 | 3 | The change will apply to any of these `threading` classes: `Lock`, `RLock`, `Condition`, `Semaphore`, and `BoundedSemaphore`. 4 | 5 | The change looks like this: 6 | 7 | ```diff 8 | import threading 9 | - with threading.Lock(): 10 | + lock = threading.Lock() 11 | + with lock: 12 | ... 13 | ``` 14 | -------------------------------------------------------------------------------- /tests/test_logging.py: -------------------------------------------------------------------------------- 1 | from pythonjsonlogger import json 2 | 3 | from codemodder.logging import OutputFormat, configure_logger 4 | 5 | 6 | def test_json_logger(mocker): 7 | basic_config = mocker.patch("logging.basicConfig") 8 | configure_logger(False, OutputFormat.JSON, "test-project") 9 | assert basic_config.call_count == 1 10 | assert basic_config.call_args[1]["format"] == "%(message)s" 11 | assert isinstance( 12 | basic_config.call_args[1]["handlers"][0].formatter, 13 | json.JsonFormatter, 14 | ) 15 | assert ( 16 | basic_config.call_args[1]["handlers"][0].formatter.project_name 17 | == "test-project" 18 | ) 19 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-empty-sequence-comparison.md: -------------------------------------------------------------------------------- 1 | Empty sequences in Python always evaluate to `False`. This means that comparison expressions that use empty sequences can sometimes be simplified. In these cases no explicit comparison is required: instead we can rely on the [truth value](https://docs.python.org/3/library/stdtypes.html#truth-value-testing) of the object under comparison. This is sometimes referred to as "implicit" comparison. Using implicit boolean comparison expressions is considered best practice and can lead to better code. 2 | 3 | Our changes look like the following: 4 | ```diff 5 | x = [1] 6 | 7 | - if x != []: 8 | + if x: 9 | pass 10 | ``` 11 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_subprocess-shell-false.md: -------------------------------------------------------------------------------- 1 | This codemod sets the `shell` keyword argument to `False` in `subprocess` module function calls that have set it to `True`. 2 | 3 | Setting `shell=True` will execute the provided command through the system shell which can lead to shell injection vulnerabilities. In the worst case this can give an attacker the ability to run arbitrary commands on your system. In most cases using `shell=False` is sufficient and leads to much safer code. 4 | 5 | The changes from this codemod look like this: 6 | 7 | ```diff 8 | import subprocess 9 | - subprocess.run("echo 'hi'", shell=True) 10 | + subprocess.run("echo 'hi'", shell=False) 11 | ``` 12 | -------------------------------------------------------------------------------- /integration_tests/test_fix_empty_sequence_comparison.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_empty_sequence_comparison import FixEmptySequenceComparison 3 | 4 | 5 | class TestFixEmptySequenceComparison(BaseIntegrationTest): 6 | codemod = FixEmptySequenceComparison 7 | original_code = """ 8 | x = [1] 9 | if x != []: 10 | pass 11 | """ 12 | replacement_lines = [(2, "if x:\n")] 13 | 14 | expected_diff = ( 15 | "--- \n+++ \n@@ -1,3 +1,3 @@\n x = [1]\n-if x != []:\n+if x:\n pass\n" 16 | ) 17 | expected_line_change = "2" 18 | change_description = FixEmptySequenceComparison.change_description 19 | -------------------------------------------------------------------------------- /integration_tests/test_unnecessary_f_str.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.remove_unnecessary_f_str import ( 3 | RemoveUnnecessaryFStr, 4 | RemoveUnnecessaryFStrTransform, 5 | ) 6 | 7 | 8 | class TestFStr(BaseIntegrationTest): 9 | codemod = RemoveUnnecessaryFStr 10 | original_code = """ 11 | bad = f"hello" 12 | good = f"{2+3}" 13 | """ 14 | replacement_lines = [(1, 'bad = "hello"\n')] 15 | 16 | expected_diff = '--- \n+++ \n@@ -1,2 +1,2 @@\n-bad = f"hello"\n+bad = "hello"\n good = f"{2+3}"\n' 17 | expected_line_change = "1" 18 | change_description = RemoveUnnecessaryFStrTransform.change_description 19 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_secure-flask-session-configuration.md: -------------------------------------------------------------------------------- 1 | Flask applications can configure sessions behavior at the application level. 2 | This codemod looks for Flask application configuration that set `SESSION_COOKIE_HTTPONLY`, `SESSION_COOKIE_SECURE`, or `SESSION_COOKIE_SAMESITE` to an insecure value and changes it to a secure one. 3 | 4 | The changes from this codemod look like this: 5 | 6 | ```diff 7 | from flask import Flask 8 | app = Flask(__name__) 9 | - app.config['SESSION_COOKIE_HTTPONLY'] = False 10 | - app.config.update(SESSION_COOKIE_SECURE=False) 11 | + app.config['SESSION_COOKIE_HTTPONLY'] = True 12 | + app.config.update(SESSION_COOKIE_SECURE=True) 13 | ``` 14 | -------------------------------------------------------------------------------- /integration_tests/test_use_set_literal.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.use_set_literal import UseSetLiteral 3 | 4 | 5 | class TestUseSetLiteral(BaseIntegrationTest): 6 | codemod = UseSetLiteral 7 | original_code = """ 8 | x = set([1, 2, 3]) 9 | y = set([]) 10 | """ 11 | replacement_lines = [(1, "x = {1, 2, 3}\n"), (2, "y = set()\n")] 12 | 13 | expected_diff = """\ 14 | --- 15 | +++ 16 | @@ -1,2 +1,2 @@ 17 | -x = set([1, 2, 3]) 18 | -y = set([]) 19 | +x = {1, 2, 3} 20 | +y = set() 21 | """ 22 | 23 | expected_line_change = "1" 24 | num_changes = 2 25 | change_description = UseSetLiteral.change_description 26 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_enable-jinja2-autoescape.md: -------------------------------------------------------------------------------- 1 | This codemod enables autoescaping of HTML content in `jinja2`. Unfortunately, the jinja2 default behavior is to not autoescape when rendering templates, which makes your applications potentially vulnerable to Cross-Site Scripting (XSS) attacks. 2 | 3 | Our codemod checks if you forgot to enable autoescape or if you explicitly disabled it. The change looks as follows: 4 | 5 | ```diff 6 | from jinja2 import Environment 7 | 8 | - env = Environment() 9 | - env = Environment(autoescape=False, loader=some_loader) 10 | + env = Environment(autoescape=True) 11 | + env = Environment(autoescape=True, loader=some_loader) 12 | ... 13 | ``` 14 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_remove-future-imports.md: -------------------------------------------------------------------------------- 1 | Many older codebases have `__future__` imports for forwards compatibility with features. As of this writing, all but one of those features is now stable in all currently supported versions of Python and so the imports are no longer needed. While such imports are harmless, they are also unnecessary and in most cases you probably just forgot to remove them. 2 | 3 | This codemod removes all such `__future__` imports, preserving only those that are still necessary for forwards compatibility. 4 | 5 | Our changes look like the following: 6 | ```diff 7 | import os 8 | -from __future__ import print_function 9 | 10 | print("HELLO") 11 | ``` 12 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_combine-isinstance-issubclass.md: -------------------------------------------------------------------------------- 1 | Many developers are not necessarily aware that the `isinstance` and `issubclass` builtin methods can accept a tuple of classes to match. This means that there is a lot of code that uses boolean expressions such as `isinstance(x, str) or isinstance(x, bytes)` instead of the simpler expression `isinstance(x, (str, bytes))`. 2 | 3 | This codemod simplifies the boolean expressions where possible which leads to cleaner and more concise code. 4 | 5 | The changes from this codemod look like this: 6 | 7 | ```diff 8 | x = 'foo' 9 | - if isinstance(x, str) or isinstance(x, bytes): 10 | + if isinstance(x, (str, bytes)): 11 | ... 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_combine-startswith-endswith.md: -------------------------------------------------------------------------------- 1 | Many developers are not necessarily aware that the `startswith` and `endswith` methods of `str` objects can accept a tuple of strings to match. This means that there is a lot of code that uses boolean expressions such as `x.startswith('foo') or x.startswith('bar')` instead of the simpler expression `x.startswith(('foo', 'bar'))`. 2 | 3 | This codemod simplifies the boolean expressions where possible which leads to cleaner and more concise code. 4 | 5 | The changes from this codemod look like this: 6 | 7 | ```diff 8 | x = 'foo' 9 | - if x.startswith("foo") or x.startswith("bar"): 10 | + if x.startswith(("foo", "bar")): 11 | ... 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_lazy-logging.md: -------------------------------------------------------------------------------- 1 | This codemod converts "eager" logging into "lazy" logging, which is preferred for performance efficiency and resource optimization. 2 | Lazy logging defers the actual construction and formatting of log messages until it's confirmed that the message will be logged based on the current log level, thereby avoiding unnecessary computation for messages that will not be logged. 3 | 4 | Our changes look something like this: 5 | 6 | ```diff 7 | import logging 8 | e = "Some error" 9 | - logging.error("Error occurred: %s" % e) 10 | - logging.error("Error occurred: " + e) 11 | + logging.error("Error occurred: %s", e) 12 | + logging.error("Error occurred: %s", e) 13 | ``` 14 | -------------------------------------------------------------------------------- /integration_tests/test_remove_module_global.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.remove_module_global import RemoveModuleGlobal 3 | 4 | 5 | class TestRemoveModuleGlobal(BaseIntegrationTest): 6 | codemod = RemoveModuleGlobal 7 | original_code = """ 8 | price = 25 9 | print("hello") 10 | global price 11 | price = 30 12 | """ 13 | expected_new_code = """ 14 | price = 25 15 | print("hello") 16 | price = 30 17 | """ 18 | expected_diff = '--- \n+++ \n@@ -1,4 +1,3 @@\n price = 25\n print("hello")\n-global price\n price = 30' 19 | expected_line_change = "3" 20 | change_description = RemoveModuleGlobal.change_description 21 | -------------------------------------------------------------------------------- /integration_tests/test_remove_debug_breakpoint.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.remove_debug_breakpoint import RemoveDebugBreakpoint 3 | 4 | 5 | class TestRemoveDebugBreakpoint(BaseIntegrationTest): 6 | codemod = RemoveDebugBreakpoint 7 | original_code = """ 8 | print("hello") 9 | breakpoint() 10 | print("world") 11 | """ 12 | expected_new_code = """ 13 | print("hello") 14 | print("world") 15 | """ 16 | expected_diff = ( 17 | '--- \n+++ \n@@ -1,3 +1,2 @@\n print("hello")\n-breakpoint()\n print("world")' 18 | ) 19 | expected_line_change = "2" 20 | change_description = RemoveDebugBreakpoint.change_description 21 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_subprocess_shell_false.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.base_codemod import ToolRule 2 | from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id 3 | from core_codemods.subprocess_shell_false import SubprocessShellFalse 4 | 5 | SemgrepSubprocessShellFalse = SemgrepCodemod.from_core_codemod( 6 | name="subprocess-shell-false", 7 | other=SubprocessShellFalse, 8 | rules=[ 9 | ToolRule( 10 | id=( 11 | rule_id := "python.lang.security.audit.subprocess-shell-true.subprocess-shell-true" 12 | ), 13 | name="subprocess-shell-true", 14 | url=semgrep_url_from_id(rule_id), 15 | ) 16 | ], 17 | ) 18 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_fix_assert_tuple.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.fix_assert_tuple import FixAssertTupleTransform 3 | from core_codemods.sonar.sonar_fix_assert_tuple import SonarFixAssertTuple 4 | 5 | 6 | class TestFixAssertTuple(SonarIntegrationTest): 7 | codemod = SonarFixAssertTuple 8 | code_path = "tests/samples/fix_assert_tuple.py" 9 | replacement_lines = [(1, "assert 1 == 1\n"), (2, "assert 2 == 2\n")] 10 | expected_diff = "--- \n+++ \n@@ -1 +1,2 @@\n-assert (1 == 1, 2 == 2)\n+assert 1 == 1\n+assert 2 == 2\n" 11 | expected_line_change = "1" 12 | change_description = FixAssertTupleTransform.change_description 13 | num_changes = 2 14 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_secure-flask-cookie.md: -------------------------------------------------------------------------------- 1 | This codemod sets the most secure parameters when Flask applications call `set_cookie` on a response object. Without these parameters, your Flask 2 | application cookies may be vulnerable to being intercepted and used to gain access to sensitive data. 3 | 4 | The changes from this codemod look like this: 5 | 6 | ```diff 7 | from flask import Flask, session, make_response 8 | app = Flask(__name__) 9 | @app.route('/') 10 | def index(): 11 | resp = make_response('Custom Cookie Set') 12 | - resp.set_cookie('custom_cookie', 'value') 13 | + resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax') 14 | return resp 15 | ``` 16 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_enable_jinja2_autoescape.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.base_codemod import ToolRule 2 | from core_codemods.enable_jinja2_autoescape import EnableJinja2Autoescape 3 | from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id 4 | 5 | SemgrepEnableJinja2Autoescape = SemgrepCodemod.from_core_codemod( 6 | name="enable-jinja2-autoescape", 7 | other=EnableJinja2Autoescape, 8 | rules=[ 9 | ToolRule( 10 | id=( 11 | rule_id := "python.flask.security.xss.audit.direct-use-of-jinja2.direct-use-of-jinja2" 12 | ), 13 | name="direct-use-of-jinja2", 14 | url=semgrep_url_from_id(rule_id), 15 | ) 16 | ], 17 | ) 18 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_remove-assertion-in-pytest-raises.md: -------------------------------------------------------------------------------- 1 | The context manager object `pytest.raises()` will assert if the code contained within its scope will raise an exception of type ``. The documentation points that the exception must be raised in the last line of its scope and any line afterwards won't be executed. 2 | Including asserts at the end of the scope is a common error. This codemod addresses that by moving them out of the scope. 3 | Our changes look something like this: 4 | 5 | ```diff 6 | import pytest 7 | 8 | def test_foo(): 9 | with pytest.raises(ZeroDivisionError): 10 | error = 1/0 11 | - assert 1 12 | - assert 2 13 | + assert 1 14 | + assert 2 15 | ``` 16 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_safe-lxml-parsing.md: -------------------------------------------------------------------------------- 1 | This codemod sets the `parser` parameter in calls to `lxml.etree.parse` and `lxml.etree.fromstring` if omitted or set to `None` (the default value). Unfortunately, the default `parser=None` means `lxml` will rely on an unsafe parser, making your code potentially vulnerable to entity expansion attacks and external entity (XXE) attacks. 2 | 3 | The changes look as follows: 4 | 5 | ```diff 6 | import lxml.etree 7 | - lxml.etree.parse("path_to_file") 8 | - lxml.etree.fromstring("xml_str") 9 | + lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False)) 10 | + lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False)) 11 | ``` 12 | -------------------------------------------------------------------------------- /integration_tests/test_remove_unused_imports.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.remove_unused_imports import RemoveUnusedImports 3 | 4 | 5 | class TestRemoveUnusedImports(BaseIntegrationTest): 6 | codemod = RemoveUnusedImports 7 | original_code = """ 8 | import abc 9 | from builtins import complex, dict 10 | 11 | abc 12 | complex 13 | """ 14 | replacement_lines = [(2, """from builtins import complex\n""")] 15 | expected_diff = "--- \n+++ \n@@ -1,5 +1,5 @@\n import abc\n-from builtins import complex, dict\n+from builtins import complex\n \n abc\n complex\n" 16 | expected_line_change = 2 17 | change_description = RemoveUnusedImports.change_description 18 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_fix_math_isclose.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.sonar.sonar_fix_math_isclose import ( 3 | FixMathIsCloseSonarTransformer, 4 | SonarFixMathIsClose, 5 | ) 6 | 7 | 8 | class TestSonarFixMathIsClose(SonarIntegrationTest): 9 | codemod = SonarFixMathIsClose 10 | code_path = "tests/samples/fix_math_isclose.py" 11 | replacement_lines = [(5, " return math.isclose(a, 0, abs_tol=1e-09)\n")] 12 | expected_diff = "--- \n+++ \n@@ -2,4 +2,4 @@\n \n \n def foo(a):\n- return math.isclose(a, 0)\n+ return math.isclose(a, 0, abs_tol=1e-09)\n" 13 | expected_line_change = "5" 14 | change_description = FixMathIsCloseSonarTransformer.change_description 15 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_django-receiver-on-top.md: -------------------------------------------------------------------------------- 1 | Django uses signals to notify and handle actions that happens elsewhere in the application. You can define a response to a given signal by decorating a function with the `@receiver(signal)` decorator. The order in which the decorators are declared for this function is important. If the `@receiver` decorator is not on top, any decorators before it will be ignored. 2 | Our changes look something like this: 3 | 4 | ```diff 5 | from django.dispatch import receiver 6 | from django.views.decorators.csrf import csrf_exempt 7 | from django.core.signals import request_finished 8 | 9 | +@receiver(request_finished) 10 | @csrf_exempt 11 | -@receiver(request_finished) 12 | def foo(): 13 | pass 14 | ``` 15 | -------------------------------------------------------------------------------- /integration_tests/test_lxml_safe_parser_defaults.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.lxml_safe_parser_defaults import LxmlSafeParserDefaults 3 | 4 | 5 | class TestLxmlSafeParserDefaults(BaseIntegrationTest): 6 | codemod = LxmlSafeParserDefaults 7 | original_code = """ 8 | import lxml.etree 9 | parser = lxml.etree.XMLParser() 10 | """ 11 | replacement_lines = [(2, "parser = lxml.etree.XMLParser(resolve_entities=False)\n")] 12 | expected_diff = "--- \n+++ \n@@ -1,2 +1,2 @@\n import lxml.etree\n-parser = lxml.etree.XMLParser()\n+parser = lxml.etree.XMLParser(resolve_entities=False)\n" 13 | expected_line_change = "2" 14 | change_description = LxmlSafeParserDefaults.change_description 15 | -------------------------------------------------------------------------------- /integration_tests/test_upgrade_sslcontext_tls.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.upgrade_sslcontext_tls import UpgradeSSLContextTLS 3 | 4 | 5 | class TestUpgradeWeakTLS(BaseIntegrationTest): 6 | codemod = UpgradeSSLContextTLS 7 | 8 | original_code = """ 9 | import ssl 10 | 11 | ssl.SSLContext(ssl.PROTOCOL_SSLv2) 12 | """ 13 | expected_new_code = """ 14 | import ssl 15 | 16 | ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT) 17 | """ 18 | expected_diff = "--- \n+++ \n@@ -1,3 +1,3 @@\n import ssl\n \n-ssl.SSLContext(ssl.PROTOCOL_SSLv2)\n+ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)" 19 | expected_line_change = "3" 20 | change_description = UpgradeSSLContextTLS.change_description 21 | -------------------------------------------------------------------------------- /integration_tests/test_fix_deprecated_logging_warn.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_deprecated_logging_warn import FixDeprecatedLoggingWarn 3 | 4 | 5 | class TestFixDeprecatedLoggingWarn(BaseIntegrationTest): 6 | codemod = FixDeprecatedLoggingWarn 7 | original_code = """ 8 | import logging 9 | 10 | log = logging.getLogger("my logger") 11 | log.warn("hello") 12 | """ 13 | replacement_lines = [(4, 'log.warning("hello")\n')] 14 | expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n import logging\n \n log = logging.getLogger("my logger")\n-log.warn("hello")\n+log.warning("hello")\n' 15 | expected_line_change = "4" 16 | change_description = FixDeprecatedLoggingWarn.change_description 17 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-math-isclose.md: -------------------------------------------------------------------------------- 1 | The default value for the `abs_tol` argument to a `math.isclose` call is `0`. Using this default when comparing a value against `0`, such as in `math.isclose(a, 0)` is equivalent to a strict equality check to `0`, which is not the intended use of the `math.isclose` function. 2 | 3 | This codemod adds `abs_tol=1e-09` to any call to `math.isclose` with one of of the first arguments evaluating to `0` if `abs_tol` is not already specified. `1e-09` is a starting point for you to consider depending on your calculation needs. 4 | 5 | Our changes look like the following: 6 | ```diff 7 | +import math 8 | + 9 | def foo(a): 10 | - return math.isclose(a, 0) 11 | + return math.isclose(a, 0, abs_tol=1e-09) 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_django_secure_set_cookie.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.base_codemod import ToolRule 2 | from core_codemods.defectdojo.semgrep.django_secure_set_cookie import ( 3 | DjangoSecureSetCookie, 4 | ) 5 | from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id 6 | 7 | SemgrepDjangoSecureSetCookie = SemgrepCodemod.from_core_codemod( 8 | name="django-secure-set-cookie", 9 | other=DjangoSecureSetCookie, 10 | rules=[ 11 | ToolRule( 12 | id=( 13 | rule_id := "python.django.security.audit.secure-cookies.django-secure-set-cookie" 14 | ), 15 | name="django-secure-set-cookie", 16 | url=semgrep_url_from_id(rule_id), 17 | ) 18 | ], 19 | ) 20 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_upgrade-sslcontext-minimum-version.md: -------------------------------------------------------------------------------- 1 | This codemod replaces all unsafe and/or deprecated SSL/TLS versions when used 2 | to set the `ssl.SSLContext.minimum_version` attribute. It uses 3 | `ssl.TLSVersion.TLSv1_2` instead, which ensures a safe default minimum TLS 4 | version. 5 | 6 | Our change involves modifying the `minimum_version` attribute of 7 | `ssl.SSLContext` instances to use `ssl.TLSVersion.TLSv1_2`. 8 | 9 | ```diff 10 | import ssl 11 | context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT) 12 | - context.minimum_version = ssl.TLSVersion.SSLv3 13 | + context.minimum_version = ssl.TLSVersion.TLSv1_2 14 | ``` 15 | 16 | There is no functional difference between the unsafe and safe versions, and all modern servers offer TLSv1.2. 17 | -------------------------------------------------------------------------------- /.github/workflows/pre-commit-autoupdate.yml: -------------------------------------------------------------------------------- 1 | name: Pre-commit auto-update 2 | 3 | on: 4 | # every day at midnight 5 | schedule: 6 | - cron: "0 0 * * *" 7 | # on demand 8 | workflow_dispatch: 9 | 10 | jobs: 11 | auto-update: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v6 15 | - uses: actions/setup-python@v6 16 | - uses: browniebroke/pre-commit-autoupdate-action@main 17 | - uses: peter-evans/create-pull-request@v7 18 | with: 19 | token: ${{ secrets.GITHUB_TOKEN }} 20 | branch: update/pre-commit-hooks 21 | title: Update pre-commit hooks 22 | commit-message: "chore: update pre-commit hooks" 23 | body: Update versions of pre-commit hooks to latest version. 24 | -------------------------------------------------------------------------------- /integration_tests/test_combine_startswith_endswith.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.combine_startswith_endswith import CombineStartswithEndswith 3 | 4 | 5 | class TestCombineStartswithEndswith(BaseIntegrationTest): 6 | codemod = CombineStartswithEndswith 7 | original_code = """ 8 | x = 'foo' 9 | if x.startswith("foo") or x.startswith("bar"): 10 | print("Yes") 11 | """ 12 | replacement_lines = [(2, 'if x.startswith(("foo", "bar")):\n')] 13 | 14 | expected_diff = '--- \n+++ \n@@ -1,3 +1,3 @@\n x = \'foo\'\n-if x.startswith("foo") or x.startswith("bar"):\n+if x.startswith(("foo", "bar")):\n print("Yes")\n' 15 | expected_line_change = "2" 16 | change_description = CombineStartswithEndswith.change_description 17 | -------------------------------------------------------------------------------- /integration_tests/test_combine_isinstance_issubclass.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.combine_isinstance_issubclass import CombineIsinstanceIssubclass 3 | 4 | 5 | class TestCombineStartswithEndswith(BaseIntegrationTest): 6 | codemod = CombineIsinstanceIssubclass 7 | original_code = """ 8 | x = 'foo' 9 | if isinstance(x, str) or isinstance(x, bytes): 10 | print("Yes") 11 | """ 12 | replacement_lines = [(2, "if isinstance(x, (str, bytes)):\n")] 13 | 14 | expected_diff = "--- \n+++ \n@@ -1,3 +1,3 @@\n x = 'foo'\n-if isinstance(x, str) or isinstance(x, bytes):\n+if isinstance(x, (str, bytes)):\n print(\"Yes\")\n" 15 | expected_line_change = "2" 16 | change_description = CombineIsinstanceIssubclass.change_description 17 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_timezone-aware-datetime.md: -------------------------------------------------------------------------------- 1 | Some `datetime` object calls use the machine's local timezone instead of a reasonable default like UTC. This may be okay in some cases, but it can lead to bugs. Misinterpretation of dates have been the culprit for serious issues in banking, satellite communications, and other industries. 2 | 3 | The `datetime` [documentation](https://docs.python.org/3/library/datetime.html#datetime.datetime.utcnow) explicitly encourages using timezone aware objects to prevent bugs. 4 | 5 | Our changes look like the following: 6 | ```diff 7 | from datetime import datetime 8 | import time 9 | 10 | - datetime.utcnow() 11 | - datetime.utcfromtimestamp(time.time()) 12 | + datetime.now(tz=timezone.utc) 13 | + datetime.fromtimestamp(time.time(), tz=timezone.utc) 14 | ``` 15 | -------------------------------------------------------------------------------- /integration_tests/test_with_threading_lock.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.with_threading_lock import WithThreadingLock 3 | 4 | 5 | class TestWithThreadingLock(BaseIntegrationTest): 6 | codemod = WithThreadingLock 7 | original_code = """ 8 | import threading 9 | with threading.Lock(): 10 | print("Hello") 11 | """ 12 | replacement_lines = [ 13 | (2, "lock = threading.Lock()\n"), 14 | (3, "with lock:\n"), 15 | (5, ' print("Hello")\n'), 16 | ] 17 | 18 | expected_diff = '--- \n+++ \n@@ -1,3 +1,4 @@\n import threading\n-with threading.Lock():\n+lock = threading.Lock()\n+with lock:\n print("Hello")\n' 19 | expected_line_change = "2" 20 | change_description = WithThreadingLock.change_description 21 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-file-resource-leak.md: -------------------------------------------------------------------------------- 1 | This codemod wraps assignments of `open` calls in a with statement. Without explicit closing, these resources will be "leaked" and won't be re-claimed until garbage collection. In situations where these resources are leaked rapidly (either through malicious repetitive action or unusually spiky usage), connection pool or file handle exhaustion will occur. These types of failures tend to be catastrophic, resulting in downtime and many times affect downstream applications. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | import tempfile 7 | path = tempfile.NamedTemporaryFile().name 8 | -file = open(path, 'w', encoding='utf-8') 9 | -file.write('Hello World') 10 | +with open(path, 'w', encoding='utf-8') as file: 11 | + file.write('Hello World') 12 | ``` 13 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_django-json-response-type.md: -------------------------------------------------------------------------------- 1 | The default `content_type` for `HttpResponse` in Django is `'text/html'`. This is true even when the response contains JSON data. 2 | If the JSON contains (unsanitized) user-supplied input, a malicious user may supply HTML code which leaves the application vulnerable to cross-site scripting (XSS). 3 | This fix explicitly sets the response type to `application/json` when the response body is JSON data to avoid this vulnerability. Our changes look something like this: 4 | 5 | ```diff 6 | from django.http import HttpResponse 7 | import json 8 | 9 | def foo(request): 10 | json_response = json.dumps({ "user_input": request.GET.get("input") }) 11 | - return HttpResponse(json_response) 12 | + return HttpResponse(json_response, content_type="application/json") 13 | ``` 14 | -------------------------------------------------------------------------------- /integration_tests/test_tempfile_mktemp.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.tempfile_mktemp import TempfileMktemp, TempfileMktempTransformer 3 | 4 | 5 | class TestTempfileMktemp(BaseIntegrationTest): 6 | codemod = TempfileMktemp 7 | original_code = """ 8 | import tempfile 9 | 10 | filename = tempfile.mktemp() 11 | """ 12 | replacement_lines = [ 13 | (3, "with tempfile.NamedTemporaryFile(delete=False) as tf:\n"), 14 | (4, " filename = tf.name\n"), 15 | ] 16 | expected_diff = "--- \n+++ \n@@ -1,3 +1,4 @@\n import tempfile\n \n-filename = tempfile.mktemp()\n+with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename = tf.name\n" 17 | expected_line_change = "3" 18 | change_description = TempfileMktempTransformer.change_description 19 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_sql-parameterization.md: -------------------------------------------------------------------------------- 1 | This codemod refactors SQL statements to be parameterized, rather than built by hand. 2 | 3 | Without parameterization, developers must remember to escape string inputs using the rules for that column type and database. This usually results in bugs -- and sometimes vulnerabilities. Although we can't tell for sure if your code is actually exploitable, this change will make the code more robust in case the conditions which prevent exploitation today ever go away. 4 | 5 | Our changes look something like this: 6 | 7 | ```diff 8 | import sqlite3 9 | 10 | name = input() 11 | connection = sqlite3.connect("my_db.db") 12 | cursor = connection.cursor() 13 | - cursor.execute("SELECT * from USERS WHERE name ='" + name + "'") 14 | + cursor.execute("SELECT * from USERS WHERE name =?", (name, )) 15 | ``` 16 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | PYTEST = pytest -v 2 | COV_FLAGS = --cov=codemodder --cov=core_codemods 3 | XDIST_FLAGS = --numprocesses auto 4 | 5 | test: 6 | COVERAGE_CORE=sysmon ${PYTEST} ${COV_FLAGS} tests ${XDIST_FLAGS} && coverage json && coverage-threshold 7 | 8 | integration-test: 9 | COVERAGE_CORE=sysmon ${PYTEST} integration_tests --cov=core_codemods ${XDIST_FLAGS} && coverage json && coverage-threshold --line-coverage-min 80 10 | 11 | pygoat-test: 12 | ${PYTEST} -v ci_tests/test_pygoat_findings.py 13 | 14 | lint: 15 | ruff check src tests integration_tests --exclude tests/samples/ 16 | 17 | radon: 18 | radon cc codemodder --min A --total-average 19 | 20 | # threshold for pipeline to fail if we go below average, module, or block complexity 21 | # https://github.com/rubik/xenon 22 | xenon: 23 | xenon codemodder --max-average A --max-modules C --max-absolute C 24 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_tempfile_mktemp.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.sonar.sonar_tempfile_mktemp import SonarTempfileMktemp 3 | from core_codemods.tempfile_mktemp import TempfileMktempTransformer 4 | 5 | 6 | class TestTempfileMktemp(SonarIntegrationTest): 7 | codemod = SonarTempfileMktemp 8 | code_path = "tests/samples/tempfile_mktemp.py" 9 | replacement_lines = [ 10 | (3, "with tempfile.NamedTemporaryFile(delete=False) as tf:\n"), 11 | (4, " filename = tf.name\n"), 12 | ] 13 | expected_diff = "--- \n+++ \n@@ -1,3 +1,4 @@\n import tempfile\n \n-filename = tempfile.mktemp()\n+with tempfile.NamedTemporaryFile(delete=False) as tf:\n+ filename = tf.name\n" 14 | expected_line_change = "3" 15 | change_description = TempfileMktempTransformer.change_description 16 | -------------------------------------------------------------------------------- /integration_tests/test_use_generator.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.use_generator import UseGenerator 3 | 4 | 5 | class TestUseGenerator(BaseIntegrationTest): 6 | codemod = UseGenerator 7 | original_code = """ 8 | def some(iterable): 9 | for i in iterable: 10 | yield i 11 | 12 | 13 | x = sum([i for i in range(1000)]) 14 | y = some([i for i in range(1000)]) 15 | """ 16 | replacement_lines = [(6, "x = sum(i for i in range(1000))\n")] 17 | 18 | expected_diff = """\ 19 | --- 20 | +++ 21 | @@ -3,5 +3,5 @@ 22 | yield i 23 | 24 | 25 | -x = sum([i for i in range(1000)]) 26 | +x = sum(i for i in range(1000)) 27 | y = some([i for i in range(1000)]) 28 | """ 29 | 30 | expected_line_change = "6" 31 | change_description = UseGenerator.change_description 32 | -------------------------------------------------------------------------------- /.github/workflows/autoformat-pixeebot-prs.yaml: -------------------------------------------------------------------------------- 1 | name: Format Pixeebot PRs 2 | 3 | on: 4 | pull_request: 5 | types: [opened, synchronize] 6 | 7 | jobs: 8 | apply-black: 9 | if: github.event.pull_request.user.login == 'pixeebot[bot]' 10 | runs-on: ubuntu-latest 11 | permissions: 12 | contents: write 13 | 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v6 17 | 18 | - name: Set up Python 19 | uses: actions/setup-python@v6 20 | with: 21 | python-version: "3.14" 22 | 23 | - name: Install black 24 | run: pip install black 25 | 26 | - name: Apply black formatting 27 | run: black . 28 | 29 | - name: Commit and push changes 30 | uses: stefanzweifel/git-auto-commit-action@v7 31 | with: 32 | commit_message: ":art: Apply formatting" 33 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_url_sandbox.py: -------------------------------------------------------------------------------- 1 | from core_codemods.semgrep.api import SemgrepCodemod, ToolRule, semgrep_url_from_id 2 | from core_codemods.url_sandbox import UrlSandbox 3 | 4 | SemgrepUrlSandbox = SemgrepCodemod.from_core_codemod( 5 | name="url-sandbox", 6 | other=UrlSandbox, 7 | rules=[ 8 | ToolRule( 9 | id=( 10 | rule_id := "python.django.security.injection.ssrf.ssrf-injection-requests.ssrf-injection-requests" 11 | ), 12 | name="ssrf-injection-requests", 13 | url=semgrep_url_from_id(rule_id), 14 | ), 15 | ToolRule( 16 | id=( 17 | rule_id := "python.flask.security.injection.ssrf-requests.ssrf-requests" 18 | ), 19 | name="ssrf-requests", 20 | url=semgrep_url_from_id(rule_id), 21 | ), 22 | ], 23 | ) 24 | -------------------------------------------------------------------------------- /integration_tests/test_django_debug_flag_on.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.django_debug_flag_on import DjangoDebugFlagOn 3 | 4 | 5 | class TestDjangoDebugFlagFlip(BaseIntegrationTest): 6 | codemod = DjangoDebugFlagOn 7 | code_filename = "settings.py" 8 | original_code = """ 9 | # SECURITY WARNING: don't run with debug turned on in production! 10 | DEBUG = True 11 | """ 12 | replacement_lines = [(2, "DEBUG = False\n")] 13 | # fmt: off 14 | expected_diff = ( 15 | """--- \n""" 16 | """+++ \n""" 17 | """@@ -1,2 +1,2 @@\n""" 18 | """ # SECURITY WARNING: don't run with debug turned on in production!\n""" 19 | """-DEBUG = True\n""" 20 | """+DEBUG = False\n""" 21 | ) 22 | # fmt: on 23 | expected_line_change = "2" 24 | change_description = DjangoDebugFlagOn.change_description 25 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_flask-json-response-type.md: -------------------------------------------------------------------------------- 1 | The default `mimetype` for `make_response` in Flask is `'text/html'`. This is true even when the response contains JSON data. 2 | If the JSON contains (unsanitized) user-supplied input, a malicious user may supply HTML code which leaves the application vulnerable to cross-site scripting (XSS). 3 | This fix explicitly sets the response type to `application/json` when the response body is JSON data to avoid this vulnerability. Our changes look something like this: 4 | 5 | ```diff 6 | from flask import make_response, Flask 7 | import json 8 | 9 | app = Flask(__name__) 10 | 11 | @app.route("/test") 12 | def foo(request): 13 | json_response = json.dumps({ "user_input": request.GET.get("input") }) 14 | - return make_response(json_response) 15 | + return make_response(json_response, {'Content-Type':'application/json'}) 16 | ``` 17 | -------------------------------------------------------------------------------- /integration_tests/test_remove_future_imports.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.remove_future_imports import RemoveFutureImports 3 | 4 | 5 | class TestRemoveFutureImports(BaseIntegrationTest): 6 | codemod = RemoveFutureImports 7 | original_code = """ 8 | from __future__ import absolute_import 9 | from __future__ import * 10 | 11 | print("HEY") 12 | """ 13 | expected_new_code = """ 14 | from __future__ import annotations 15 | 16 | print("HEY") 17 | """ 18 | 19 | expected_diff = """\ 20 | --- 21 | +++ 22 | @@ -1,4 +1,3 @@ 23 | -from __future__ import absolute_import 24 | -from __future__ import * 25 | +from __future__ import annotations 26 | 27 | print("HEY")""" 28 | 29 | num_changes = 2 30 | expected_line_change = "1" 31 | change_description = RemoveFutureImports.change_description 32 | -------------------------------------------------------------------------------- /integration_tests/test_limit_readline.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.limit_readline import LimitReadline 3 | 4 | 5 | class TestLimitReadline(BaseIntegrationTest): 6 | codemod = LimitReadline 7 | original_code = """ 8 | file = open("some_file.txt") 9 | file.readline() 10 | """ 11 | replacement_lines = [(2, "file.readline(5_000_000)\n")] 12 | # fmt: off 13 | expected_diff = ( 14 | """--- \n""" 15 | """+++ \n""" 16 | """@@ -1,2 +1,2 @@\n""" 17 | """ file = open("some_file.txt")\n""" 18 | """-file.readline()\n""" 19 | """+file.readline(5_000_000)\n""") 20 | # fmt: on 21 | expected_line_change = "2" 22 | change_description = LimitReadline.change_description 23 | # expected because output code points to fake file 24 | allowed_exceptions = (FileNotFoundError,) 25 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_jwt_decode_verify.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.base_codemod import ToolRule 2 | from codemodder.codemods.libcst_transformer import LibcstTransformerPipeline 3 | from core_codemods.jwt_decode_verify import ( 4 | JwtDecodeVerify, 5 | JwtDecodeVerifySASTTransformer, 6 | ) 7 | from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id 8 | 9 | SemgrepJwtDecodeVerify = SemgrepCodemod.from_core_codemod( 10 | name="jwt-decode-verify", 11 | other=JwtDecodeVerify, 12 | rules=[ 13 | ToolRule( 14 | id=( 15 | rule_id := "python.jwt.security.unverified-jwt-decode.unverified-jwt-decode" 16 | ), 17 | name="unverified-jwt-decode", 18 | url=semgrep_url_from_id(rule_id), 19 | ) 20 | ], 21 | transformer=LibcstTransformerPipeline(JwtDecodeVerifySASTTransformer), 22 | ) 23 | -------------------------------------------------------------------------------- /.github/workflows/deploy_to_pypi.yml: -------------------------------------------------------------------------------- 1 | name: Deploy to PyPI 2 | 3 | on: 4 | push: 5 | tags: 6 | - "[0-9]+.[0-9]+.[0-9]+" 7 | 8 | jobs: 9 | build-and-release: 10 | name: Build and Release 11 | runs-on: ubuntu-24.04 12 | timeout-minutes: 5 13 | steps: 14 | - name: Set Up Python 15 | uses: actions/setup-python@v6 16 | with: 17 | python-version: '3.14' 18 | - name: Check out code 19 | uses: actions/checkout@v6 20 | - name: Install build dependencies 21 | run: pip install build twine 22 | - name: Build package 23 | run: python -m build . 24 | - name: Twine Check 25 | run: twine check dist/* 26 | - name: Publish to PyPI 27 | env: 28 | TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }} 29 | TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} 30 | run: twine upload dist/* 31 | -------------------------------------------------------------------------------- /src/core_codemods/secure_cookie_mixin.py: -------------------------------------------------------------------------------- 1 | from libcst import matchers 2 | 3 | from codemodder.codemods.libcst_transformer import NewArg 4 | 5 | 6 | class SecureCookieMixin: 7 | def _choose_new_args(self, original_node): 8 | new_args = [ 9 | NewArg(name="secure", value="True", add_if_missing=True), 10 | NewArg(name="httponly", value="True", add_if_missing=True), 11 | ] 12 | 13 | samesite = matchers.Arg( 14 | keyword=matchers.Name(value="samesite"), 15 | value=matchers.SimpleString(value="'Strict'"), 16 | ) 17 | 18 | # samesite=Strict is OK because it's more restrictive than Lax. 19 | if not any(matchers.matches(arg, samesite) for arg in original_node.args): 20 | new_args.append( 21 | NewArg(name="samesite", value="'Lax'", add_if_missing=True), 22 | ) 23 | 24 | return new_args 25 | -------------------------------------------------------------------------------- /integration_tests/test_fix_hasattr_call.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_hasattr_call import TransformFixHasattrCall 3 | 4 | 5 | class TestTransformFixHasattrCall(BaseIntegrationTest): 6 | codemod = TransformFixHasattrCall 7 | original_code = """ 8 | class Test: 9 | pass 10 | 11 | obj = Test() 12 | hasattr(obj, "__call__") 13 | """ 14 | 15 | replacement_lines = [ 16 | (5, """callable(obj)\n"""), 17 | ] 18 | 19 | # fmt: off 20 | expected_diff = ( 21 | """--- \n""" 22 | """+++ \n""" 23 | """@@ -2,4 +2,4 @@\n""" 24 | """ pass\n""" 25 | """ \n""" 26 | """ obj = Test()\n""" 27 | """-hasattr(obj, "__call__")\n""" 28 | """+callable(obj)\n""" 29 | ) 30 | # fmt: on 31 | 32 | expected_line_change = "5" 33 | change_description = TransformFixHasattrCall.change_description 34 | -------------------------------------------------------------------------------- /integration_tests/test_subprocess_shell_false.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.subprocess_shell_false import ( 3 | SubprocessShellFalse, 4 | SubprocessShellFalseTransformer, 5 | ) 6 | 7 | 8 | class TestSubprocessShellFalse(BaseIntegrationTest): 9 | codemod = SubprocessShellFalse 10 | original_code = """ 11 | import subprocess 12 | subprocess.run(['ls', '-l'], shell=True) 13 | """ 14 | replacement_lines = [(2, "subprocess.run(['ls', '-l'], shell=False)\n")] 15 | 16 | expected_diff = "--- \n+++ \n@@ -1,2 +1,2 @@\n import subprocess\n-subprocess.run(['ls', '-l'], shell=True)\n+subprocess.run(['ls', '-l'], shell=False)\n" 17 | expected_line_change = "2" 18 | change_description = SubprocessShellFalseTransformer.change_description 19 | # expected because output code points to fake file 20 | allowed_exceptions = (FileNotFoundError,) 21 | -------------------------------------------------------------------------------- /integration_tests/test_break_or_continue_out_of_loop.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import BaseIntegrationTest 2 | from core_codemods.break_or_continue_out_of_loop import ( 3 | BreakOrContinueOutOfLoop, 4 | BreakOrContinueOutOfLoopTransformer, 5 | ) 6 | 7 | 8 | class TestBreakOrContinueOutOfLoop(BaseIntegrationTest): 9 | codemod = BreakOrContinueOutOfLoop 10 | original_code = """ 11 | def f(): 12 | continue 13 | """ 14 | replacement_lines = [ 15 | (2, """ pass\n"""), 16 | ] 17 | 18 | # fmt: off 19 | expected_diff = ( 20 | """--- \n""" 21 | """+++ \n""" 22 | """@@ -1,2 +1,2 @@\n""" 23 | """ def f():\n""" 24 | """- continue\n""" 25 | """+ pass\n""" 26 | ) 27 | # fmt: on 28 | 29 | expected_line_change = "2" 30 | change_description = BreakOrContinueOutOfLoopTransformer.change_description 31 | num_changed_files = 1 32 | -------------------------------------------------------------------------------- /src/codemodder/codemods/codeql.py: -------------------------------------------------------------------------------- 1 | from functools import cache 2 | 3 | from codemodder.codemods.base_detector import BaseDetector 4 | from codemodder.codeql import CodeQLResultSet 5 | from codemodder.context import CodemodExecutionContext 6 | from codemodder.result import ResultSet 7 | 8 | 9 | class CodeQLSarifFileDetector(BaseDetector): 10 | def apply( 11 | self, 12 | codemod_id: str, 13 | context: CodemodExecutionContext, 14 | ) -> ResultSet: 15 | del codemod_id 16 | return process_codeql_findings( 17 | tuple(context.tool_result_files_map.get("codeql", ())) 18 | ) # Convert list to tuple for cache hashability 19 | 20 | 21 | @cache 22 | def process_codeql_findings(codeql_sarif_files: tuple[str]) -> ResultSet: 23 | results = CodeQLResultSet() 24 | for file in codeql_sarif_files or (): 25 | results |= CodeQLResultSet.from_sarif(file) 26 | return results 27 | -------------------------------------------------------------------------------- /src/codemodder/utils/timer.py: -------------------------------------------------------------------------------- 1 | import time 2 | from collections import defaultdict 3 | from contextlib import contextmanager 4 | 5 | from typing_extensions import Self 6 | 7 | 8 | class Timer: 9 | _times: defaultdict 10 | 11 | def __init__(self): 12 | self._times = defaultdict(float) 13 | 14 | @contextmanager 15 | def measure(self, name: str): 16 | start = time.monotonic() 17 | try: 18 | yield 19 | finally: 20 | end = time.monotonic() 21 | self._add_time(name, end - start) 22 | 23 | def _add_time(self, name: str, val: float) -> None: 24 | self._times[name] = self._times.get(name, 0) + val 25 | 26 | def get_time_ms(self, name: str) -> int: 27 | return int(self._times.get(name, 0) * 1000) 28 | 29 | def aggregate(self, other: Self) -> None: 30 | for key, val in other._times.items(): 31 | self._add_time(key, val) 32 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-float-equality.md: -------------------------------------------------------------------------------- 1 | In most programming languages, floating point arithmetic is imprecise due to the way floating point numbers are stored as binary representations. Moreover, the result of calculations with floats can vary based on when rounding happens. Using equality or inequality to compare floats or their operations will almost always be imprecise and lead to bugs. 2 | 3 | For these reasons, this codemod changes any operations involving equality or inequality with floats to the recommended `math.isclose` function. This codemod uses the default parameter values `rel_tol=1e-09` and `abs_tol=0.0` but makes them explicit as a starting point for you to consider depending on your calculation needs. 4 | 5 | Our changes look like the following: 6 | ```diff 7 | +import math 8 | + 9 | def foo(a, b): 10 | - return a == b - 0.1 11 | + return math.isclose(a, b - 0.1, rel_tol=1e-09, abs_tol=0.0) 12 | ``` 13 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_secure_random.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest 2 | from core_codemods.secure_random import SecureRandomTransformer 3 | from core_codemods.sonar.sonar_secure_random import SonarSecureRandom 4 | 5 | 6 | class TestSonarSecureRandom(SonarRemediationIntegrationTest): 7 | codemod = SonarSecureRandom 8 | code_path = "tests/samples/secure_random.py" 9 | expected_diff_per_change = [ 10 | "--- \n+++ \n@@ -1,4 +1,5 @@\n import random\n+import secrets\n \n-random.random()\n+secrets.SystemRandom().random()\n random.getrandbits(1)\n", 11 | "--- \n+++ \n@@ -1,4 +1,5 @@\n import random\n+import secrets\n \n random.random()\n-random.getrandbits(1)\n+secrets.SystemRandom().getrandbits(1)\n", 12 | ] 13 | 14 | expected_lines_changed = [3, 4] 15 | change_description = SecureRandomTransformer.change_description 16 | num_changes = 2 17 | -------------------------------------------------------------------------------- /integration_tests/test_harden_ruamel.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest 2 | from core_codemods.harden_ruamel import HardenRuamel 3 | 4 | 5 | class TestHardenRuamel(BaseRemediationIntegrationTest): 6 | codemod = HardenRuamel 7 | original_code = """ 8 | from ruamel.yaml import YAML 9 | 10 | serializer = YAML(typ="unsafe") 11 | serializer = YAML(typ="base") 12 | """ 13 | expected_diff_per_change = [ 14 | '--- \n+++ \n@@ -1,4 +1,4 @@\n from ruamel.yaml import YAML\n \n-serializer = YAML(typ="unsafe")\n+serializer = YAML(typ="safe")\n serializer = YAML(typ="base")', 15 | '--- \n+++ \n@@ -1,4 +1,4 @@\n from ruamel.yaml import YAML\n \n serializer = YAML(typ="unsafe")\n-serializer = YAML(typ="base")\n+serializer = YAML(typ="safe")', 16 | ] 17 | 18 | expected_lines_changed = [3, 4] 19 | num_changes = 2 20 | change_description = HardenRuamel.change_description 21 | -------------------------------------------------------------------------------- /integration_tests/test_exception_without_raise.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.exception_without_raise import ( 3 | ExceptionWithoutRaise, 4 | ExceptionWithoutRaiseTransformer, 5 | ) 6 | 7 | 8 | class TestExceptionWithoutRaise(BaseIntegrationTest): 9 | codemod = ExceptionWithoutRaise 10 | original_code = """ 11 | try: 12 | ValueError 13 | except: 14 | pass 15 | """ 16 | replacement_lines = [(2, """ raise ValueError\n""")] 17 | # fmt: off 18 | expected_diff = ( 19 | """--- \n""" 20 | """+++ \n""" 21 | """@@ -1,4 +1,4 @@\n""" 22 | """ try:\n""" 23 | """- ValueError\n""" 24 | """+ raise ValueError\n""" 25 | """ except:\n""" 26 | """ pass\n""" 27 | ) 28 | # fmt: on 29 | 30 | expected_line_change = "2" 31 | change_description = ExceptionWithoutRaiseTransformer.change_description 32 | num_changed_files = 1 33 | -------------------------------------------------------------------------------- /integration_tests/test_literal_or_new_object_identity.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.literal_or_new_object_identity import ( 3 | LiteralOrNewObjectIdentity, 4 | LiteralOrNewObjectIdentityTransformer, 5 | ) 6 | 7 | 8 | class TestLiteralOrNewObjectIdentity(BaseIntegrationTest): 9 | codemod = LiteralOrNewObjectIdentity 10 | original_code = """ 11 | def foo(l): 12 | return l is [1,2,3] 13 | """ 14 | replacement_lines = [(2, """ return l == [1,2,3]\n""")] 15 | 16 | # fmt: off 17 | expected_diff = ( 18 | """--- \n""" 19 | """+++ \n""" 20 | """@@ -1,2 +1,2 @@\n""" 21 | """ def foo(l):\n""" 22 | """- return l is [1,2,3]\n""" 23 | """+ return l == [1,2,3]\n""" 24 | 25 | ) 26 | # fmt: on 27 | 28 | expected_line_change = "2" 29 | change_description = LiteralOrNewObjectIdentityTransformer.change_description 30 | num_changed_files = 1 31 | -------------------------------------------------------------------------------- /src/core_codemods/semgrep/semgrep_harden_pyyaml.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.base_codemod import ToolRule 2 | from core_codemods.harden_pyyaml import HardenPyyaml 3 | from core_codemods.semgrep.api import SemgrepCodemod, semgrep_url_from_id 4 | 5 | SemgrepHardenPyyaml = SemgrepCodemod.from_core_codemod( 6 | name="harden-pyyaml", 7 | other=HardenPyyaml, 8 | rules=[ 9 | ToolRule( 10 | id=( 11 | rule_id := "python.lang.security.deserialization.avoid-pyyaml-load.avoid-pyyaml-load" 12 | ), 13 | name=" avoid-pyyaml-load", 14 | url=semgrep_url_from_id(rule_id), 15 | ), 16 | ToolRule( 17 | id=( 18 | rule_id := "python.django.security.audit.avoid-insecure-deserialization.avoid-insecure-deserialization" 19 | ), 20 | name="avoid-insecure-deserialization", 21 | url=semgrep_url_from_id(rule_id), 22 | ), 23 | ], 24 | ) 25 | -------------------------------------------------------------------------------- /tests/test_llm.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | import pytest 4 | 5 | from codemodder.llm import MODELS, TokenUsage, models 6 | 7 | 8 | class TestModels: 9 | def test_get_model_name(self): 10 | assert MODELS.gpt_4_turbo_2024_04_09 == "gpt-4-turbo-2024-04-09" 11 | 12 | @pytest.mark.parametrize("model", models) 13 | def test_model_get_name_from_env(self, mocker, model): 14 | name = "my-awesome-deployment" 15 | attr_name = model.replace("-", "_") 16 | mocker.patch.dict( 17 | os.environ, 18 | { 19 | f"CODEMODDER_AZURE_OPENAI_{attr_name.upper()}_DEPLOYMENT": name, 20 | }, 21 | ) 22 | assert getattr(MODELS, attr_name) == name 23 | 24 | 25 | def test_token_usage(): 26 | token_usage = TokenUsage() 27 | token_usage += TokenUsage(10, 5) 28 | assert token_usage.completion_tokens == 10 29 | assert token_usage.prompt_tokens == 5 30 | assert token_usage.total == 15 31 | -------------------------------------------------------------------------------- /integration_tests/test_django_session_cookie_secure_off.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.django_session_cookie_secure_off import DjangoSessionCookieSecureOff 3 | 4 | 5 | class TestDjangoSessionCookieSecureOff(BaseIntegrationTest): 6 | codemod = DjangoSessionCookieSecureOff 7 | code_filename = "settings.py" 8 | 9 | original_code = """ 10 | # django settings 11 | # SESSION_COOKIE_SECURE is not defined 12 | """ 13 | replacement_lines = [(3, "SESSION_COOKIE_SECURE = True\n")] 14 | 15 | # fmt: off 16 | expected_diff = ( 17 | """--- \n""" 18 | """+++ \n""" 19 | """@@ -1,2 +1,3 @@\n""" 20 | """ # django settings\n""" 21 | """ # SESSION_COOKIE_SECURE is not defined\n""" 22 | """+SESSION_COOKIE_SECURE = True\n""" 23 | ) 24 | # fmt: on 25 | expected_line_change = "3" 26 | change_description = DjangoSessionCookieSecureOff.change_description 27 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_numpy_nan_equality.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.numpy_nan_equality import NumpyNanEqualityTransformer 3 | from core_codemods.sonar.sonar_numpy_nan_equality import SonarNumpyNanEquality 4 | 5 | 6 | class TestNumpyNanEquality(SonarIntegrationTest): 7 | codemod = SonarNumpyNanEquality 8 | code_path = "tests/samples/numpy_nan_equality.py" 9 | replacement_lines = [ 10 | (4, """if np.isnan(a):\n"""), 11 | ] 12 | 13 | # fmt: off 14 | expected_diff = ( 15 | """--- \n""" 16 | """+++ \n""" 17 | """@@ -1,5 +1,5 @@\n""" 18 | """ import numpy as np\n""" 19 | """ \n""" 20 | """ a = np.nan\n""" 21 | """-if a == np.nan:\n""" 22 | """+if np.isnan(a):\n""" 23 | """ pass\n""" 24 | ) 25 | # fmt: on 26 | 27 | expected_line_change = "4" 28 | change_description = NumpyNanEqualityTransformer.change_description 29 | num_changed_files = 1 30 | -------------------------------------------------------------------------------- /integration_tests/test_secure_random.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.secure_random import SecureRandom, SecureRandomTransformer 3 | 4 | 5 | class TestSecureRandom(BaseIntegrationTest): 6 | codemod = SecureRandom 7 | original_code = """ 8 | import random 9 | random.random() 10 | var = "hello" 11 | """ 12 | replacement_lines = [ 13 | (1, """import secrets\n\n"""), 14 | (2, """secrets.SystemRandom().random()\n"""), 15 | ] 16 | # fmt: off 17 | expected_diff = ( 18 | """--- \n""" 19 | """+++ \n""" 20 | """@@ -1,3 +1,4 @@\n""" 21 | """-import random\n""" 22 | """-random.random()\n""" 23 | """+import secrets\n""" 24 | """+\n""" 25 | """+secrets.SystemRandom().random()\n""" 26 | """ var = "hello"\n""") 27 | # fmt: on 28 | expected_line_change = "2" 29 | change_description = SecureRandomTransformer.change_description 30 | -------------------------------------------------------------------------------- /integration_tests/test_secure_flask_session_config.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.secure_flask_session_config import SecureFlaskSessionConfig 3 | 4 | 5 | class TestSecureFlaskSessionConfig(BaseIntegrationTest): 6 | codemod = SecureFlaskSessionConfig 7 | original_code = """ 8 | from flask import Flask 9 | app = Flask(__name__) 10 | app.config['SESSION_COOKIE_HTTPONLY'] = False 11 | @app.route('/') 12 | def hello_world(): 13 | return 'Hello World!' 14 | """ 15 | replacement_lines = [(3, "app.config['SESSION_COOKIE_HTTPONLY'] = True\n")] 16 | expected_diff = "--- \n+++ \n@@ -1,6 +1,6 @@\n from flask import Flask\n app = Flask(__name__)\n-app.config['SESSION_COOKIE_HTTPONLY'] = False\n+app.config['SESSION_COOKIE_HTTPONLY'] = True\n @app.route('/')\n def hello_world():\n return 'Hello World!'\n" 17 | expected_line_change = "3" 18 | change_description = SecureFlaskSessionConfig.change_description 19 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_exception_without_raise.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.exception_without_raise import ExceptionWithoutRaiseTransformer 3 | from core_codemods.sonar.sonar_exception_without_raise import SonarExceptionWithoutRaise 4 | 5 | 6 | class TestSonarExceptionWithoutRaise(SonarIntegrationTest): 7 | codemod = SonarExceptionWithoutRaise 8 | code_path = "tests/samples/exception_without_raise.py" 9 | replacement_lines = [ 10 | (2, """ raise ValueError\n"""), 11 | ] 12 | # fmt: off 13 | expected_diff = ( 14 | """--- \n""" 15 | """+++ \n""" 16 | """@@ -1,4 +1,4 @@\n""" 17 | """ try:\n""" 18 | """- ValueError\n""" 19 | """+ raise ValueError\n""" 20 | """ except:\n""" 21 | """ pass\n""" 22 | ) 23 | # fmt: on 24 | 25 | expected_line_change = "2" 26 | change_description = ExceptionWithoutRaiseTransformer.change_description 27 | num_changed_files = 1 28 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_url_sandbox.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.sonar.sonar_url_sandbox import SonarUrlSandbox 3 | from core_codemods.url_sandbox import UrlSandboxTransformer 4 | 5 | 6 | class TestSonarUrlSandbox(SonarIntegrationTest): 7 | codemod = SonarUrlSandbox 8 | code_path = "tests/samples/flask_request.py" 9 | replacement_lines = [ 10 | (1, "from flask import Flask, request\n"), 11 | (2, "from security import safe_requests\n"), 12 | (10, " safe_requests.get(url)\n"), 13 | ] 14 | expected_diff = '--- \n+++ \n@@ -1,5 +1,5 @@\n-import requests\n from flask import Flask, request\n+from security import safe_requests\n \n app = Flask(__name__)\n \n@@ -7,4 +7,4 @@\n @app.route("/example")\n def example():\n url = request.args["url"]\n- requests.get(url)\n+ safe_requests.get(url)\n' 15 | expected_line_change = "10" 16 | change_description = UrlSandboxTransformer.change_description 17 | -------------------------------------------------------------------------------- /integration_tests/test_numpy_nan_equality.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.numpy_nan_equality import ( 3 | NumpyNanEquality, 4 | NumpyNanEqualityTransformer, 5 | ) 6 | 7 | 8 | class TestNumpyNanEquality(BaseIntegrationTest): 9 | codemod = NumpyNanEquality 10 | original_code = """ 11 | import numpy as np 12 | 13 | a = np.nan 14 | if a == np.nan: 15 | pass 16 | """ 17 | replacement_lines = [ 18 | (4, """if np.isnan(a):\n"""), 19 | ] 20 | # fmt: off 21 | expected_diff = ( 22 | """--- \n""" 23 | """+++ \n""" 24 | """@@ -1,5 +1,5 @@\n""" 25 | """ import numpy as np\n""" 26 | """ \n""" 27 | """ a = np.nan\n""" 28 | """-if a == np.nan:\n""" 29 | """+if np.isnan(a):\n""" 30 | """ pass\n""" 31 | ) 32 | # fmt: on 33 | 34 | expected_line_change = "4" 35 | change_description = NumpyNanEqualityTransformer.change_description 36 | num_changed_files = 1 37 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_break_or_continue_out_of_loop.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.break_or_continue_out_of_loop import ( 3 | BreakOrContinueOutOfLoopTransformer, 4 | ) 5 | from core_codemods.sonar.sonar_break_or_continue_out_of_loop import ( 6 | SonarBreakOrContinueOutOfLoop, 7 | ) 8 | 9 | 10 | class TestSonarSQLParameterization(SonarIntegrationTest): 11 | codemod = SonarBreakOrContinueOutOfLoop 12 | code_path = "tests/samples/break_or_continue_out_of_loop.py" 13 | replacement_lines = [ 14 | (2, """ pass\n"""), 15 | ] 16 | 17 | # fmt: off 18 | expected_diff = ( 19 | """--- \n""" 20 | """+++ \n""" 21 | """@@ -1,2 +1,2 @@\n""" 22 | """ def f():\n""" 23 | """- continue\n""" 24 | """+ pass\n""" 25 | ) 26 | # fmt: on 27 | 28 | expected_line_change = "2" 29 | change_description = BreakOrContinueOutOfLoopTransformer.change_description 30 | num_changed_files = 1 31 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_use_secure_protocols.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.sonar.sonar_use_secure_protocols import ( 3 | SonarUseSecureProtocols, 4 | SonarUseSecureProtocolsTransformer, 5 | ) 6 | 7 | 8 | class TestSonarUseSecureProtocols(SonarIntegrationTest): 9 | codemod = SonarUseSecureProtocols 10 | code_path = "tests/samples/use_secure_protocols.py" 11 | replacement_lines = [ 12 | ( 13 | 4, 14 | """url = "https://example.com"\n""", 15 | ), 16 | ] 17 | # fmt: off 18 | expected_diff = ( 19 | """--- \n""" 20 | """+++ \n""" 21 | """@@ -1,4 +1,4 @@\n""" 22 | ''' import ftplib\n''' 23 | ''' import smtplib\n''' 24 | ''' \n''' 25 | '''-url = "http://example.com"\n''' 26 | '''+url = "https://example.com"\n''' 27 | ) 28 | # fmt: on 29 | expected_line_change = "4" 30 | change_description = SonarUseSecureProtocolsTransformer.change_description 31 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_jwt-decode-verify.md: -------------------------------------------------------------------------------- 1 | This codemod ensures calls to [jwt.decode](https://pyjwt.readthedocs.io/en/stable/api.html#jwt.decode) do not disable signature validation and other verifications. It checks that both the `verify` parameter (soon to be deprecated) and any `verify` key in the `options` dict parameter are not assigned to `False`. 2 | 3 | Our change looks as follows: 4 | 5 | ```diff 6 | import jwt 7 | ... 8 | - decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False) 9 | + decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True) 10 | ... 11 | - decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False, "verify_exp": False}) 12 | + decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True, "verify_exp": True}) 13 | ``` 14 | 15 | Any `verify` parameter not listed relies on the secure `True` default value. 16 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_secure-random.md: -------------------------------------------------------------------------------- 1 | This codemod replaces all instances of functions in the `random` module (e.g. `random.random()` with their, much more secure, equivalents from the `secrets` module (e.g. `secrets.SystemRandom().random()`). 2 | 3 | There is significant algorithmic complexity in getting computers to generate genuinely unguessable random bits. The `random.random()` function uses a method of pseudo-random number generation that unfortunately emits fairly predictable numbers. 4 | 5 | If the numbers it emits are predictable, then it's obviously not safe to use in cryptographic operations, file name creation, token construction, password generation, and anything else that's related to security. In fact, it may affect security even if it's not directly obvious. 6 | 7 | Switching to a more secure version is simple and the changes look something like this: 8 | 9 | ```diff 10 | - import random 11 | + import secrets 12 | ... 13 | - random.random() 14 | + secrets.SystemRandom().random() 15 | ``` 16 | -------------------------------------------------------------------------------- /integration_tests/test_fix_math_isclose.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_math_isclose import FixMathIsClose, FixMathIsCloseTransformer 3 | 4 | 5 | class TestFixMathIsClose(BaseIntegrationTest): 6 | codemod = FixMathIsClose 7 | original_code = """ 8 | import math 9 | 10 | def foo(a): 11 | return math.isclose(a, 0) 12 | """ 13 | expected_new_code = """ 14 | import math 15 | 16 | def foo(a): 17 | return math.isclose(a, 0, abs_tol=1e-09) 18 | """ 19 | # fmt: off 20 | expected_diff = ( 21 | """--- \n""" 22 | """+++ \n""" 23 | """@@ -1,4 +1,4 @@\n""" 24 | """ import math\n""" 25 | """ \n""" 26 | """ def foo(a):\n""" 27 | """- return math.isclose(a, 0)\n""" 28 | """+ return math.isclose(a, 0, abs_tol=1e-09)""" 29 | ) 30 | # fmt: on 31 | expected_line_change = "4" 32 | change_description = FixMathIsCloseTransformer.change_description 33 | -------------------------------------------------------------------------------- /src/core_codemods/api/core_codemod.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.api import FindAndFixCodemod, RemediationCodemod 2 | from codemodder.codemods.api import SimpleCodemod as _SimpleCodemod 3 | 4 | 5 | class CoreCodemodDocsMixin: 6 | """ 7 | Mixin for all codemods with docs provided by this package. 8 | """ 9 | 10 | @property 11 | def docs_module_path(self): 12 | return "core_codemods.docs" 13 | 14 | 15 | class CoreCodemod(CoreCodemodDocsMixin, FindAndFixCodemod): 16 | """ 17 | Base class for all core codemods provided by this package. 18 | """ 19 | 20 | @property 21 | def origin(self): 22 | return "pixee" 23 | 24 | 25 | class SASTCodemod(CoreCodemodDocsMixin, RemediationCodemod): 26 | """ 27 | Base class for all SAST codemods provided by this package. 28 | """ 29 | 30 | 31 | class SimpleCodemod(_SimpleCodemod): 32 | """ 33 | Base class for all core codemods with a single detector and transformer. 34 | """ 35 | 36 | codemod_base = CoreCodemod 37 | -------------------------------------------------------------------------------- /integration_tests/test_lazy_logging.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest 2 | from core_codemods.lazy_logging import LazyLogging 3 | 4 | 5 | class TestLazyLogging(BaseRemediationIntegrationTest): 6 | codemod = LazyLogging 7 | original_code = """ 8 | import logging 9 | e = "Some error" 10 | logging.error("Error occurred: %s" % e) 11 | logging.error("Error occurred: " + e) 12 | """ 13 | expected_diff_per_change = [ 14 | '--- \n+++ \n@@ -1,4 +1,4 @@\n import logging\n e = "Some error"\n-logging.error("Error occurred: %s" % e)\n+logging.error("Error occurred: %s", e)\n logging.error("Error occurred: " + e)', 15 | '--- \n+++ \n@@ -1,4 +1,4 @@\n import logging\n e = "Some error"\n logging.error("Error occurred: %s" % e)\n-logging.error("Error occurred: " + e)\n+logging.error("Error occurred: %s", e)', 16 | ] 17 | 18 | expected_lines_changed = [3, 4] 19 | change_description = LazyLogging.change_description 20 | num_changes = 2 21 | -------------------------------------------------------------------------------- /src/core_codemods/docs/sonar_python_use-secure-protocols.md: -------------------------------------------------------------------------------- 1 | Communication using clear-text protocols allows an attacker to sniff or tamper with the transported data. 2 | 3 | This codemod will replace any detected clear text protocol with their cryptographic enabled version. 4 | 5 | Our changes look like the following: 6 | ```diff 7 | - url = "http://example.com" 8 | + url = "https://example.com" 9 | 10 | - ftp_con = ftplib.FTP("ftp.example.com") 11 | + ftp_con = ftplib.FTP_TLS("ftp.example.com") 12 | + smtp_context = ssl.create_default_context() 13 | + smtp_context.verify_mode = ssl.CERT_REQUIRED 14 | + smtp_context.check_hostname = True 15 | smtp_con = smtplib.SMTP("smtp.example.com", port=587) 16 | + smtp.starttls(context=smtp_context) 17 | 18 | 19 | + smtp_context_1 = ssl.create_default_context() 20 | + smtp_context_1.verify_mode = ssl.CERT_REQUIRED 21 | + smtp_context_1.check_hostname = True 22 | - smtp_con_2 = smtplib.SMTP("smtp.gmail.com") 23 | + smtp_con_2 = smtplib.SMTP_SSL("smtp.gmail.com", context=smtp_context_1) 24 | ``` 25 | -------------------------------------------------------------------------------- /tests/test_codemod_docs.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from codemodder.codemods.api import BaseCodemod 4 | from codemodder.registry import load_registered_codemods 5 | from codemodder.scripts.generate_docs import ALL_CODEMODS_METADATA 6 | 7 | 8 | def pytest_generate_tests(metafunc): 9 | registry = load_registered_codemods() 10 | if "codemod" in metafunc.fixturenames: 11 | ids = [codemod.id for codemod in registry.codemods] 12 | metafunc.parametrize("codemod", registry.codemods, ids=ids) 13 | 14 | 15 | def test_load_codemod_docs_info(codemod: BaseCodemod): 16 | if codemod.name in ["order-imports"]: 17 | pytest.xfail(reason=f"{codemod.name} has no description") 18 | 19 | assert codemod.description 20 | assert codemod.review_guidance in ( 21 | "Merge After Review", 22 | "Merge After Cursory Review", 23 | "Merge Without Review", 24 | ) 25 | assert ( 26 | codemod.name in ALL_CODEMODS_METADATA 27 | ), f"{codemod.name} has not been added to generate_docs.py" 28 | -------------------------------------------------------------------------------- /.github/workflows/integration_test.yml: -------------------------------------------------------------------------------- 1 | name: Integration Tests 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - develop 8 | pull_request: 9 | branches: 10 | - main 11 | - develop 12 | merge_group: 13 | 14 | concurrency: 15 | group: ${{ github.workflow }}-${{ github.ref }} 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | test: 20 | name: Run pytest 21 | runs-on: ubuntu-24.04 22 | timeout-minutes: 15 23 | strategy: 24 | matrix: 25 | python-version: ['3.12', '3.13', '3.14'] 26 | steps: 27 | - name: Check out code 28 | uses: actions/checkout@v6 29 | - name: Set Up Python 30 | uses: actions/setup-python@v6 31 | with: 32 | python-version: ${{ matrix.python-version }} 33 | cache: 'pip' 34 | - name: Install Codemodder Package 35 | run: pip install . 36 | - name: Install Dependencies 37 | run: pip install ".[test]" 38 | - name: Run integration tests 39 | run: make integration-test 40 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_fix_missing_self_or_cls.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest 2 | from core_codemods.fix_missing_self_or_cls import FixMissingSelfOrClsTransformer 3 | from core_codemods.sonar.sonar_fix_missing_self_or_cls import SonarFixMissingSelfOrCls 4 | 5 | 6 | class TestSonarFixMissingSelfOrCls(SonarRemediationIntegrationTest): 7 | codemod = SonarFixMissingSelfOrCls 8 | code_path = "tests/samples/fix_missing_self_or_cls.py" 9 | 10 | expected_diff_per_change = [ 11 | '--- \n+++ \n@@ -1,5 +1,5 @@\n class MyClass:\n- def instance_method():\n+ def instance_method(self):\n print("instance_method")\n \n @classmethod\n', 12 | '--- \n+++ \n@@ -3,5 +3,5 @@\n print("instance_method")\n \n @classmethod\n- def class_method():\n+ def class_method(cls):\n print("class_method")\n', 13 | ] 14 | 15 | expected_lines_changed = [2, 6] 16 | change_description = FixMissingSelfOrClsTransformer.change_description 17 | num_changes = 2 18 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_literal_or_new_object_identity.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.literal_or_new_object_identity import ( 3 | LiteralOrNewObjectIdentityTransformer, 4 | ) 5 | from core_codemods.sonar.sonar_literal_or_new_object_identity import ( 6 | SonarLiteralOrNewObjectIdentity, 7 | ) 8 | 9 | 10 | class TestLiteralOrNewObjectIdentity(SonarIntegrationTest): 11 | codemod = SonarLiteralOrNewObjectIdentity 12 | code_path = "tests/samples/literal_or_new_object_identity.py" 13 | replacement_lines = [ 14 | (2, """ return l == [1,2,3]\n"""), 15 | ] 16 | 17 | # fmt: off 18 | expected_diff = ( 19 | """--- \n""" 20 | """+++ \n""" 21 | """@@ -1,2 +1,2 @@\n""" 22 | """ def foo(l):\n""" 23 | """- return l is [1,2,3]\n""" 24 | """+ return l == [1,2,3]\n""" 25 | 26 | ) 27 | # fmt: on 28 | 29 | expected_line_change = "2" 30 | change_description = LiteralOrNewObjectIdentityTransformer.change_description 31 | num_changed_files = 1 32 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_jinja2_autoescape.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest 2 | from core_codemods.enable_jinja2_autoescape import EnableJinja2AutoescapeTransformer 3 | from core_codemods.sonar.sonar_enable_jinja2_autoescape import ( 4 | SonarEnableJinja2Autoescape, 5 | ) 6 | 7 | 8 | class TestSonarEnableJinja2Autoescape(SonarRemediationIntegrationTest): 9 | codemod = SonarEnableJinja2Autoescape 10 | code_path = "tests/samples/jinja2_autoescape.py" 11 | expected_diff_per_change = [ 12 | "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n-env = Environment()\n+env = Environment(autoescape=True)\n env = Environment(autoescape=False)\n", 13 | "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n env = Environment()\n-env = Environment(autoescape=False)\n+env = Environment(autoescape=True)\n", 14 | ] 15 | 16 | expected_lines_changed = [3, 4] 17 | num_changes = 2 18 | change_description = EnableJinja2AutoescapeTransformer.change_description 19 | -------------------------------------------------------------------------------- /integration_tests/test_fix_float_equality.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_float_equality import ( 3 | FixFloatEquality, 4 | FixFloatEqualityTransformer, 5 | ) 6 | 7 | 8 | class TestFixFloatEquality(BaseIntegrationTest): 9 | codemod = FixFloatEquality 10 | original_code = """ 11 | def foo(a, b): 12 | return a == b - 0.1 13 | """ 14 | expected_new_code = """ 15 | import math 16 | 17 | def foo(a, b): 18 | return math.isclose(a, b - 0.1, rel_tol=1e-09, abs_tol=0.0) 19 | """ 20 | # fmt: off 21 | expected_diff = ( 22 | """--- \n""" 23 | """+++ \n""" 24 | """@@ -1,2 +1,4 @@\n""" 25 | """+import math\n""" 26 | """+\n""" 27 | """ def foo(a, b):\n""" 28 | """- return a == b - 0.1\n""" 29 | """+ return math.isclose(a, b - 0.1, rel_tol=1e-09, abs_tol=0.0)""" 30 | ) 31 | # fmt: on 32 | expected_line_change = "2" 33 | change_description = FixFloatEqualityTransformer.change_description 34 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_use-defusedxml.md: -------------------------------------------------------------------------------- 1 | You might be surprised to learn that Python's built-in XML libraries are [considered insecure](https://docs.python.org/3/library/xml.html#xml-vulnerabilities) against various kinds of attacks. 2 | 3 | In fact, the [Python documentation itself](https://docs.python.org/3/library/xml.html#the-defusedxml-package) recommends the use of [defusedxml](https://pypi.org/project/defusedxml/) for parsing untrusted XML data. `defusedxml` is an [open-source](https://github.com/tiran/defusedxml), permissively licensed project that is intended as a drop-in replacement for Python's standard library XML parsers. 4 | 5 | This codemod updates all relevant uses of the standard library parsers with safe versions from `defusedxml`. It also adds the `defusedxml` dependency to your project where possible. 6 | 7 | The changes from this codemod look like this: 8 | ```diff 9 | - from xml.etree.ElementTree import parse 10 | + import defusedxml.ElementTree 11 | 12 | - et = parse('data.xml') 13 | + et = defusedxml.ElementTree.parse('data.xml') 14 | ``` 15 | -------------------------------------------------------------------------------- /integration_tests/test_jinja2_autoescape.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest 2 | from core_codemods.enable_jinja2_autoescape import ( 3 | EnableJinja2Autoescape, 4 | EnableJinja2AutoescapeTransformer, 5 | ) 6 | 7 | 8 | class TestEnableJinja2Autoescape(BaseRemediationIntegrationTest): 9 | codemod = EnableJinja2Autoescape 10 | original_code = """ 11 | from jinja2 import Environment 12 | 13 | env = Environment() 14 | env = Environment(autoescape=False) 15 | """ 16 | 17 | expected_diff_per_change = [ 18 | "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n-env = Environment()\n+env = Environment(autoescape=True)\n env = Environment(autoescape=False)", 19 | "--- \n+++ \n@@ -1,4 +1,4 @@\n from jinja2 import Environment\n \n env = Environment()\n-env = Environment(autoescape=False)\n+env = Environment(autoescape=True)", 20 | ] 21 | 22 | expected_lines_changed = [3, 4] 23 | num_changes = 2 24 | change_description = EnableJinja2AutoescapeTransformer.change_description 25 | -------------------------------------------------------------------------------- /integration_tests/test_lxml_safe_parsing.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest 2 | from core_codemods.lxml_safe_parsing import LxmlSafeParsing 3 | 4 | 5 | class TestLxmlSafeParsing(BaseRemediationIntegrationTest): 6 | codemod = LxmlSafeParsing 7 | original_code = """ 8 | import lxml.etree 9 | lxml.etree.parse("path_to_file") 10 | lxml.etree.fromstring("xml_str") 11 | """ 12 | expected_lines_changed = [2, 3] 13 | expected_diff_per_change = [ 14 | '--- \n+++ \n@@ -1,3 +1,3 @@\n import lxml.etree\n-lxml.etree.parse("path_to_file")\n+lxml.etree.parse("path_to_file", parser=lxml.etree.XMLParser(resolve_entities=False))\n lxml.etree.fromstring("xml_str")', 15 | '--- \n+++ \n@@ -1,3 +1,3 @@\n import lxml.etree\n lxml.etree.parse("path_to_file")\n-lxml.etree.fromstring("xml_str")\n+lxml.etree.fromstring("xml_str", parser=lxml.etree.XMLParser(resolve_entities=False))', 16 | ] 17 | num_changes = 2 18 | change_description = LxmlSafeParsing.change_description 19 | allowed_exceptions = (OSError,) 20 | -------------------------------------------------------------------------------- /integration_tests/test_str_concat_in_seq_literals.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.str_concat_in_seq_literal import StrConcatInSeqLiteral 3 | 4 | 5 | class TestStrConcatInSeqLiteral(BaseIntegrationTest): 6 | codemod = StrConcatInSeqLiteral 7 | original_code = """ 8 | bad = [ 9 | "ab" 10 | "cd", 11 | "ef", 12 | "gh" 13 | "ij", 14 | ] 15 | """ 16 | replacement_lines = [ 17 | (2, """ "ab",\n"""), 18 | (5, """ "gh",\n"""), 19 | ] 20 | # fmt: off 21 | expected_diff = ( 22 | """--- \n""" 23 | """+++ \n""" 24 | """@@ -1,7 +1,7 @@\n""" 25 | """ bad = [\n""" 26 | """- "ab"\n""" 27 | """+ "ab",\n""" 28 | """ "cd",\n""" 29 | """ "ef",\n""" 30 | """- "gh"\n""" 31 | """+ "gh",\n""" 32 | """ "ij",\n""" 33 | """ ]\n""") 34 | # fmt: on 35 | 36 | expected_line_change = "1" 37 | change_description = StrConcatInSeqLiteral.change_description 38 | num_changes = 2 39 | -------------------------------------------------------------------------------- /integration_tests/test_harden_pyyaml.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | from codemodder.codemods.test import BaseIntegrationTest 4 | from core_codemods.harden_pyyaml import HardenPyyaml, HardenPyyamlTransformer 5 | 6 | 7 | class TestHardenPyyaml(BaseIntegrationTest): 8 | codemod = HardenPyyaml 9 | original_code = """ 10 | import yaml 11 | 12 | data = b"!!python/object/apply:subprocess.Popen \\\\n- ls" 13 | deserialized_data = yaml.load(data, Loader=yaml.Loader) 14 | """ 15 | replacement_lines = [ 16 | (4, "deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)\n") 17 | ] 18 | expected_diff = '--- \n+++ \n@@ -1,4 +1,4 @@\n import yaml\n \n data = b"!!python/object/apply:subprocess.Popen \\\\n- ls"\n-deserialized_data = yaml.load(data, Loader=yaml.Loader)\n+deserialized_data = yaml.load(data, Loader=yaml.SafeLoader)\n' 19 | expected_line_change = "4" 20 | change_description = HardenPyyamlTransformer.change_description 21 | # expected exception because the yaml.SafeLoader protects against unsafe code 22 | allowed_exceptions = (yaml.constructor.ConstructorError,) 23 | -------------------------------------------------------------------------------- /tests/transformations/test_remove_unused_imports.py: -------------------------------------------------------------------------------- 1 | from libcst.codemod import CodemodTest 2 | 3 | from codemodder.codemods.transformations.remove_unused_imports import ( 4 | RemoveUnusedImportsCodemod, 5 | ) 6 | 7 | 8 | class TestCleanImports(CodemodTest): 9 | TRANSFORM = RemoveUnusedImportsCodemod 10 | 11 | def test_remove_unused(self): 12 | before = """ 13 | import a 14 | from b import c 15 | """ 16 | 17 | after = "" 18 | 19 | self.assertCodemod(before, after) 20 | 21 | def test_keep_used(self): 22 | before = """ 23 | import a 24 | from b import c 25 | print(c) 26 | """ 27 | 28 | after = """ 29 | from b import c 30 | print(c) 31 | """ 32 | 33 | self.assertCodemod(before, after) 34 | 35 | def test_keep_future(self): 36 | before = """ 37 | from __future__ import absolute_import 38 | """ 39 | 40 | after = """ 41 | from __future__ import absolute_import 42 | """ 43 | 44 | self.assertCodemod(before, after) 45 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_add-requests-timeouts.md: -------------------------------------------------------------------------------- 1 | Many developers will be surprised to learn that `requests` library calls do not include timeouts by default. This means that an attempted request could hang indefinitely if no connection is established or if no data is received from the server. 2 | 3 | The [requests documentation](https://requests.readthedocs.io/en/latest/user/advanced/#timeouts) suggests that most calls should explicitly include a `timeout` parameter. This codemod adds a default timeout value in order to set an upper bound on connection times and ensure that requests connect or fail in a timely manner. This value also ensures the connection will timeout if the server does not respond with data within a reasonable amount of time. 4 | 5 | While timeout values will be application dependent, we believe that this codemod adds a reasonable default that serves as an appropriate ceiling for most situations. 6 | 7 | Our changes look like the following: 8 | ```diff 9 | import requests 10 | 11 | - requests.get("http://example.com") 12 | + requests.get("http://example.com", timeout=60) 13 | ``` 14 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_secure-tempfile.md: -------------------------------------------------------------------------------- 1 | This codemod replaces all `tempfile.mktemp` calls with the more secure `tempfile.NamedTemporaryFile` 2 | 3 | The Python [tempfile documentation](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) is explicit that `tempfile.mktemp` should be deprecated to avoid an unsafe and unexpected race condition. `tempfile.mktemp` does not handle the possibility that the returned file name could already be used by another process by the time your code opens the file. A more secure approach to create temporary files is to use `tempfile.NamedTemporaryFile` which will create the file for you and handle all security conditions. 4 | 5 | The changes from this codemod look like this: 6 | 7 | ```diff 8 | import tempfile 9 | - filename = tempfile.mktemp() 10 | + with tempfile.NamedTemporaryFile(delete=False) as tf: 11 | + filename = tf.name 12 | ``` 13 | 14 | The change sets `delete=False` to closely follow your code's intention when calling `tempfile.mktemp`. However, you should use this as a starting point to determine when your temporary file should be deleted. 15 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_fix-dataclass-defaults.md: -------------------------------------------------------------------------------- 1 | When defining a Python dataclass it is not safe to use mutable datatypes (such as `list`, `dict`, or `set`) as defaults for the attributes. This is because the defined attribute will be shared by all instances of the dataclass type. Using such a mutable default will ultimately result in a `ValueError` at runtime. This codemod updates attributes of `dataclasses.dataclass` with mutable defaults to use `dataclasses.field` instead. The [dataclass documentation](https://docs.python.org/3/library/dataclasses.html#mutable-default-values) providesmore details about why using `field(default_factory=...)` is the recommended pattern. 2 | 3 | Our changes look something like this: 4 | 5 | ```diff 6 | -from dataclasses import dataclass 7 | +from dataclasses import field, dataclass 8 | 9 | @dataclass 10 | class Person: 11 | name: str = "" 12 | - phones: list = [] 13 | - friends: dict = {} 14 | - family: set = set() 15 | + phones: list = field(default_factory=list) 16 | + friends: dict = field(default_factory=dict) 17 | + family: set = field(default_factory=set) 18 | ``` 19 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_replace-flask-send-file.md: -------------------------------------------------------------------------------- 1 | The `Flask` `send_file` function from Flask is susceptible to a path traversal attack if its input is not properly validated. 2 | In a path traversal attack, the malicious agent can craft a path containing special paths like `./` or `../` to resolve a file outside of the expected directory path. This potentially allows the agent to overwrite, delete or read arbitrary files. In the case of `flask.send_file`, the result is that a malicious user could potentially download sensitive files that exist on the filesystem where the application is being hosted. 3 | Flask offers a native solution with the `flask.send_from_directory` function that validates the given path. 4 | 5 | Our changes look something like this: 6 | 7 | ```diff 8 | -from flask import Flask, send_file 9 | +from flask import Flask 10 | +import flask 11 | +from pathlib import Path 12 | 13 | app = Flask(__name__) 14 | 15 | @app.route("/uploads/") 16 | def download_file(name): 17 | - return send_file(f'path/to/{name}.txt') 18 | + return flask.send_from_directory((p := Path(f'path/to/{name}.txt')).parent, p.name) 19 | ``` 20 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_flask-enable-csrf-protection.md: -------------------------------------------------------------------------------- 1 | Cross-site request forgery (CSRF) is an attack where a user is tricked by a malicious agent to submit a unintended request (e.g login requests). A common way to mitigate this issue is to embed an additional token into requests to identify requests from unauthorized locations. 2 | 3 | Flask views using `FlaskForm` have CSRF protection enabled by default. However other views may use AJAX to perform unsafe HTTP methods. FlaskWTF provides a way to enable CSRF protection globally for all views of a Flask app. 4 | 5 | The changes in this codemod may require manual additions to maintain proper functionality. You need to setup either a flask `SECRET_KEY` or a `WTF_CSRF_SECRET_KEY` in you app configuration and adjust any views with HTML forms and javascript requests to include the CSRF token. See the [FlaskWTF docs](https://flask-wtf.readthedocs.io/en/1.2.x/csrf/) for examples on how to do it. 6 | 7 | Our changes look something like this: 8 | 9 | ```diff 10 | from flask import Flask 11 | +from flask_wtf.csrf import CSRFProtect 12 | 13 | app = Flask(__name__) 14 | +csrf_app = CSRFProtect(app) 15 | ``` 16 | -------------------------------------------------------------------------------- /ci_tests/test_pygoat_findings.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | import pytest 4 | 5 | EXPECTED_FINDINGS = [ 6 | "pixee:python/add-requests-timeouts", 7 | "pixee:python/secure-random", 8 | "pixee:python/sandbox-process-creation", 9 | "pixee:python/subprocess-shell-false", 10 | "pixee:python/django-session-cookie-secure-off", 11 | "pixee:python/django-model-without-dunder-str", 12 | "pixee:python/harden-pyyaml", 13 | "pixee:python/django-debug-flag-on", 14 | "pixee:python/url-sandbox", 15 | "pixee:python/use-defusedxml", 16 | "pixee:python/use-walrus-if", 17 | "pixee:python/timezone-aware-datetime", 18 | ] 19 | 20 | 21 | @pytest.fixture(scope="session") 22 | def pygoat_findings(): 23 | with open("output.codetf") as ff: 24 | results = json.load(ff) 25 | 26 | yield set([x["codemod"] for x in results["results"] if x["changeset"]]) 27 | 28 | 29 | def test_num_pygoat_findings(pygoat_findings): 30 | assert len(pygoat_findings) == len(EXPECTED_FINDINGS) 31 | 32 | 33 | @pytest.mark.parametrize("finding", EXPECTED_FINDINGS) 34 | def test_pygoat_findings(pygoat_findings, finding): 35 | assert finding in pygoat_findings 36 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_fix_float_equality.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.fix_float_equality import FixFloatEqualityTransformer 3 | from core_codemods.sonar.sonar_fix_float_equality import SonarFixFloatEquality 4 | 5 | 6 | class TestSonarFixFloatEquality(SonarIntegrationTest): 7 | codemod = SonarFixFloatEquality 8 | code_path = "tests/samples/fix_float_equality.py" 9 | replacement_lines = [ 10 | (1, "import math\n"), 11 | (2, "\n"), 12 | (3, "def foo(a, b):\n"), 13 | (4, " return math.isclose(a, b - 0.1, rel_tol=1e-09, abs_tol=0.0)\n"), 14 | ] 15 | # fmt: off 16 | expected_diff = ( 17 | """--- \n""" 18 | """+++ \n""" 19 | """@@ -1,2 +1,4 @@\n""" 20 | """+import math\n""" 21 | """+\n""" 22 | """ def foo(a, b):\n""" 23 | """- return a == b - 0.1\n""" 24 | """+ return math.isclose(a, b - 0.1, rel_tol=1e-09, abs_tol=0.0)\n""" 25 | ) 26 | # fmt: on 27 | expected_line_change = "2" 28 | change_description = FixFloatEqualityTransformer.change_description 29 | num_changes = 1 30 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_secure_cookie.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest 2 | from core_codemods.sonar.sonar_secure_cookie import ( 3 | SonarSecureCookie, 4 | SonarSecureCookieTransformer, 5 | ) 6 | 7 | 8 | class TestSonarSecureCookie(SonarRemediationIntegrationTest): 9 | codemod = SonarSecureCookie 10 | code_path = "tests/samples/secure_cookie.py" 11 | expected_diff_per_change = [ 12 | "--- \n+++ \n@@ -5,5 +5,5 @@\n @app.route('/')\n def index():\n resp = make_response('Custom Cookie Set')\n- resp.set_cookie('custom_cookie', 'value')\n+ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n return resp\n", 13 | "--- \n+++ \n@@ -5,5 +5,5 @@\n @app.route('/')\n def index():\n resp = make_response('Custom Cookie Set')\n- resp.set_cookie('custom_cookie', 'value')\n+ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n return resp\n", 14 | ] 15 | 16 | expected_lines_changed = [8, 8] 17 | num_changes = 2 18 | change_description = SonarSecureCookieTransformer.change_description 19 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Developing codemodder-python 2 | 3 | ## Pre-commit install 4 | 5 | We use [pre-commit](https://pre-commit.com/) to run some quick linting such as `black` and `ruff`. 6 | This also runs in CI. 7 | 8 | 9 | ## Local Development 10 | 11 | In general you can rely on Github workflows to see how to run things like linting and tests, 12 | but for local development here is a good workflow. 13 | 14 | 1. To use virtualenv, create an environment with `virtualenv pixeeenv` or `/usr/bin/env python3 -m venv pixeeenv` 15 | to specify a specific Python version. If using `bash` or any compatible shell, activate with `source pixeeenv/bin/activate`. Otherwise, look at [`venv`'s documentation](https://docs.python.org/3/library/venv.html) for instructions. 16 | 17 | 2. `cd codemodder-python` and `pip install -e .` to install the package in development mode 18 | 19 | 3. Run `pip install ".[all]"` to install packages used for development and testing 20 | 21 | 4. You should now be able to run `ruff`, `pytest`, etc. 22 | 23 | 24 | ## Docker 25 | 26 | You can build the docker image with `docker build -t codemodder .` and run it with `docker run`. You can also do 27 | `docker run codemodder ...` 28 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_sql_parameterization.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.sonar.sonar_sql_parameterization import SonarSQLParameterization 3 | from core_codemods.sql_parameterization import SQLQueryParameterizationTransformer 4 | 5 | 6 | class TestSonarSQLParameterization(SonarIntegrationTest): 7 | codemod = SonarSQLParameterization 8 | code_path = "tests/samples/fix_sonar_sql_parameterization.py" 9 | replacement_lines = [ 10 | (11, ' sql = """SELECT user FROM users WHERE user = ?"""\n'), 11 | (14, " conn.cursor().execute(sql, ((user), ))\n"), 12 | ] 13 | expected_diff = """\ 14 | --- 15 | +++ 16 | @@ -8,7 +8,7 @@ 17 | @app.route("/example") 18 | def f(): 19 | user = request.args["user"] 20 | - sql = \"\"\"SELECT user FROM users WHERE user = \\'%s\\'\"\"\" 21 | + sql = \"\"\"SELECT user FROM users WHERE user = ?\"\"\" 22 | 23 | conn = sqlite3.connect("example") 24 | - conn.cursor().execute(sql % (user)) 25 | + conn.cursor().execute(sql, ((user), )) 26 | """ 27 | expected_line_change = "14" 28 | change_description = SQLQueryParameterizationTransformer.change_description 29 | -------------------------------------------------------------------------------- /integration_tests/test_secure_flask_cookie.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.secure_flask_cookie import SecureCookieTransformer, SecureFlaskCookie 3 | 4 | 5 | class TestSecureFlaskCookie(BaseIntegrationTest): 6 | codemod = SecureFlaskCookie 7 | original_code = """ 8 | from flask import Flask, session, make_response 9 | 10 | app = Flask(__name__) 11 | 12 | @app.route('/') 13 | def index(): 14 | resp = make_response('Custom Cookie Set') 15 | resp.set_cookie('custom_cookie', 'value') 16 | return resp 17 | """ 18 | replacement_lines = [ 19 | ( 20 | 8, 21 | """ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n""", 22 | ), 23 | ] 24 | expected_diff = "--- \n+++ \n@@ -5,5 +5,5 @@\n @app.route('/')\n def index():\n resp = make_response('Custom Cookie Set')\n- resp.set_cookie('custom_cookie', 'value')\n+ resp.set_cookie('custom_cookie', 'value', secure=True, httponly=True, samesite='Lax')\n return resp\n" 25 | expected_line_change = "8" 26 | change_description = SecureCookieTransformer.change_description 27 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_upgrade-sslcontext-tls.md: -------------------------------------------------------------------------------- 1 | This codemod replaces the use of all unsafe and/or deprecated SSL/TLS versions 2 | in the `ssl.SSLContext` constructor. It uses `PROTOCOL_TLS_CLIENT` instead, 3 | which ensures a safe default TLS version. It also sets the `protocol` parameter 4 | to `PROTOCOL_TLS_CLIENT` in calls without it, which is now deprecated. 5 | 6 | Our change involves modifying the argument to `ssl.SSLContext()` to 7 | use `PROTOCOL_TLS_CLIENT`. 8 | 9 | ```diff 10 | import ssl 11 | - context = ssl.SSLContext() 12 | + context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT) 13 | - context = ssl.SSLContext(protocol=PROTOCOL_SSLv3) 14 | + context = ssl.SSLContext(protocol=PROTOCOL_TLS_CLIENT) 15 | ``` 16 | 17 | There is no functional difference between the unsafe and safe versions, and all modern servers offer TLSv1.2. 18 | 19 | The use of explicit TLS versions (even safe ones) is deprecated by the `ssl` 20 | module, so it is necessary to choose either `PROTOCOL_TLS_CLIENT` or 21 | `PROTOCOL_TLS_SERVER`. Using `PROTOCOL_TLS_CLIENT` is expected to be the 22 | correct choice for most applications but in some cases it will be necessary to 23 | use `PROTOCOL_TLS_SERVER` instead. 24 | -------------------------------------------------------------------------------- /src/core_codemods/fix_hasattr_call.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | 3 | from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod 4 | 5 | 6 | class TransformFixHasattrCall(SimpleCodemod): 7 | metadata = Metadata( 8 | name="fix-hasattr-call", 9 | summary="Use `callable` builtin to check for callables", 10 | review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW, 11 | references=[ 12 | Reference(url="https://docs.python.org/3/library/functions.html#callable"), 13 | Reference(url="https://docs.python.org/3/library/functions.html#hasattr"), 14 | ], 15 | ) 16 | detector_pattern = """ 17 | - patterns: 18 | - pattern: hasattr(..., "__call__") 19 | - pattern-not: $MODULE.hasattr(...) 20 | """ 21 | 22 | change_description = "Replace `hasattr` function call with `callable`" 23 | 24 | def on_result_found(self, original_node, updated_node): 25 | del original_node 26 | return updated_node.with_changes( 27 | func=updated_node.func.with_changes(value="callable"), 28 | args=[updated_node.args[0].with_changes(comma=cst.MaybeSentinel.DEFAULT)], 29 | ) 30 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_django_receiver_on_top.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.django_receiver_on_top import DjangoReceiverOnTopTransformer 3 | from core_codemods.sonar.sonar_django_receiver_on_top import SonarDjangoReceiverOnTop 4 | 5 | 6 | class TestDjangoReceiverOnTop(SonarIntegrationTest): 7 | codemod = SonarDjangoReceiverOnTop 8 | code_path = "tests/samples/django_receiver_on_top.py" 9 | replacement_lines = [ 10 | (5, """@receiver(request_finished)\n"""), 11 | (6, """@csrf_exempt\n"""), 12 | ] 13 | # fmt: off 14 | expected_diff = ( 15 | """--- \n""" 16 | """+++ \n""" 17 | """@@ -2,7 +2,7 @@\n""" 18 | """ from django.views.decorators.csrf import csrf_exempt\n""" 19 | """ from django.core.signals import request_finished\n""" 20 | """ \n""" 21 | """+@receiver(request_finished)\n""" 22 | """ @csrf_exempt\n""" 23 | """-@receiver(request_finished)\n""" 24 | """ def foo():\n""" 25 | """ pass\n""" 26 | ) 27 | # fmt: on 28 | 29 | expected_line_change = "6" 30 | change_description = DjangoReceiverOnTopTransformer.change_description 31 | num_changed_files = 1 32 | num_changes = 2 33 | -------------------------------------------------------------------------------- /tests/codemods/sonar/test_sonar_exception_without_raise.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from codemodder.codemods.test import BaseSASTCodemodTest 4 | from core_codemods.sonar.sonar_exception_without_raise import SonarExceptionWithoutRaise 5 | 6 | 7 | class TestSonarExceptionWithoutRaise(BaseSASTCodemodTest): 8 | codemod = SonarExceptionWithoutRaise 9 | tool = "sonar" 10 | 11 | def test_name(self): 12 | assert self.codemod.name == "exception-without-raise" 13 | 14 | def test_simple(self, tmpdir): 15 | input_code = """ 16 | ValueError 17 | """ 18 | expected = """ 19 | raise ValueError 20 | """ 21 | issues = { 22 | "issues": [ 23 | { 24 | "rule": "python:S3984", 25 | "status": "OPEN", 26 | "component": "code.py", 27 | "textRange": { 28 | "startLine": 2, 29 | "endLine": 2, 30 | "startOffset": 1, 31 | "endOffset": 10, 32 | }, 33 | } 34 | ] 35 | } 36 | self.run_and_assert(tmpdir, input_code, expected, results=json.dumps(issues)) 37 | -------------------------------------------------------------------------------- /.github/workflows/codemod_pygoat.yml: -------------------------------------------------------------------------------- 1 | name: Codemod Pygoat 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - develop 8 | pull_request: 9 | branches: 10 | - main 11 | - develop 12 | merge_group: 13 | 14 | concurrency: 15 | group: (${{ github.workflow }}-${{ github.event.inputs.branch || github.event.pull_request.head.ref }}) 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | test: 20 | name: Run Codemod on Pygoat 21 | runs-on: ubuntu-24.04 22 | timeout-minutes: 10 23 | steps: 24 | - name: Check out codemodder 25 | uses: actions/checkout@v6 26 | - name: Set Up Python 27 | uses: actions/setup-python@v6 28 | with: 29 | python-version: '3.14' 30 | cache: 'pip' 31 | - name: Install Codemodder Package 32 | run: pip install . 33 | - name: Install Test Dependencies 34 | run: pip install ".[test]" 35 | - name: Check out Pygoat 36 | uses: actions/checkout@v6 37 | with: 38 | repository: pixee/pygoat 39 | path: pygoat 40 | - name: Run Codemodder 41 | run: codemodder --dry-run --output output.codetf pygoat 42 | - name: Check PyGoat Findings 43 | run: make pygoat-test 44 | -------------------------------------------------------------------------------- /.github/workflows/lint.yml: -------------------------------------------------------------------------------- 1 | name: Lint 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - develop 8 | pull_request: 9 | branches: 10 | - main 11 | - develop 12 | merge_group: 13 | 14 | concurrency: 15 | group: (${{ github.workflow }}-${{ github.event.inputs.branch || github.event.pull_request.head.ref }}) 16 | cancel-in-progress: true 17 | 18 | jobs: 19 | complexity: 20 | name: Code Complexity 21 | runs-on: ubuntu-24.04 22 | timeout-minutes: 3 23 | steps: 24 | - name: Check out code 25 | uses: actions/checkout@v6 26 | - name: Set Up Python 27 | uses: actions/setup-python@v6 28 | with: 29 | python-version: '3.14' 30 | cache: 'pip' 31 | - name: Install Dependencies 32 | run: | 33 | pip install ".[complexity]" 34 | - name: Run Radon 35 | run: make radon 36 | - name: Run Xenon 37 | # threshold for pipeline to fail if we go below average, module, or block complexity 38 | # https://github.com/rubik/xenon 39 | run: make xenon 40 | 41 | pre-commit: 42 | runs-on: ubuntu-latest 43 | steps: 44 | - uses: actions/checkout@v6 45 | - uses: actions/setup-python@v6 46 | - uses: pre-commit/action@v3.0.1 47 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_django_json_response_type.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.django_json_response_type import DjangoJsonResponseTypeTransformer 3 | from core_codemods.sonar.sonar_django_json_response_type import ( 4 | SonarDjangoJsonResponseType, 5 | ) 6 | 7 | 8 | class TestSonarDjangoJsonResponseType(SonarIntegrationTest): 9 | codemod = SonarDjangoJsonResponseType 10 | code_path = "tests/samples/django_json_response_type.py" 11 | replacement_lines = [ 12 | ( 13 | 6, 14 | """ return HttpResponse(json_response, content_type="application/json")\n""", 15 | ), 16 | ] 17 | 18 | # fmt: off 19 | expected_diff = ( 20 | """--- \n""" 21 | """+++ \n""" 22 | """@@ -3,4 +3,4 @@\n""" 23 | """ \n""" 24 | """ def foo(request):\n""" 25 | """ json_response = json.dumps({ "user_input": request.GET.get("input") })\n""" 26 | """- return HttpResponse(json_response)\n""" 27 | """+ return HttpResponse(json_response, content_type="application/json")\n""" 28 | ) 29 | # fmt: on 30 | 31 | expected_line_change = "6" 32 | change_description = DjangoJsonResponseTypeTransformer.change_description 33 | num_changed_files = 1 34 | -------------------------------------------------------------------------------- /src/core_codemods/combine_isinstance_issubclass.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | from libcst import matchers as m 3 | 4 | from core_codemods.api import Metadata, ReviewGuidance 5 | 6 | from .combine_calls_base import CombineCallsBaseCodemod 7 | 8 | 9 | class CombineIsinstanceIssubclass(CombineCallsBaseCodemod): 10 | metadata = Metadata( 11 | name="combine-isinstance-issubclass", 12 | summary="Simplify Boolean Expressions Using `isinstance` and `issubclass`", 13 | review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW, 14 | references=[], 15 | ) 16 | change_description = "Use tuple of matches instead of boolean expression with `isinstance` or `issubclass`" 17 | 18 | combinable_funcs = ["isinstance", "issubclass"] 19 | dedupilcation_attr = "value" 20 | args_to_combine = [1] 21 | args_to_keep_as_is = [0] 22 | 23 | def make_call_matcher(self, func_name: str) -> m.Call: 24 | return m.Call( 25 | func=m.Name(func_name), 26 | args=[m.Arg(value=m.Name()), m.Arg(value=m.Name() | m.Tuple())], 27 | ) 28 | 29 | def check_calls_same_instance( 30 | self, left_call: cst.Call, right_call: cst.Call 31 | ) -> bool: 32 | return left_call.args[0].value.value == right_call.args[0].value.value 33 | -------------------------------------------------------------------------------- /integration_tests/test_upgrade_sslcontext_minimum_version.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.upgrade_sslcontext_minimum_version import ( 3 | UpgradeSSLContextMinimumVersion, 4 | ) 5 | 6 | 7 | class TestUpgradeSSLContextMininumVersion(BaseIntegrationTest): 8 | codemod = UpgradeSSLContextMinimumVersion 9 | original_code = """ 10 | from ssl import PROTOCOL_TLS_CLIENT, SSLContext, TLSVersion 11 | 12 | my_ctx = SSLContext(protocol=PROTOCOL_TLS_CLIENT) 13 | 14 | print("FOO") 15 | 16 | my_ctx.maximum_version = TLSVersion.MAXIMUM_SUPPORTED 17 | my_ctx.minimum_version = TLSVersion.TLSv1_1 18 | """ 19 | replacement_lines = [ 20 | (2, "import ssl\n\n"), 21 | (8, "my_ctx.minimum_version = ssl.TLSVersion.TLSv1_2\n"), 22 | ] 23 | 24 | expected_diff = '--- \n+++ \n@@ -1,8 +1,9 @@\n from ssl import PROTOCOL_TLS_CLIENT, SSLContext, TLSVersion\n+import ssl\n \n my_ctx = SSLContext(protocol=PROTOCOL_TLS_CLIENT)\n \n print("FOO")\n \n my_ctx.maximum_version = TLSVersion.MAXIMUM_SUPPORTED\n-my_ctx.minimum_version = TLSVersion.TLSv1_1\n+my_ctx.minimum_version = ssl.TLSVersion.TLSv1_2\n' 25 | expected_line_change = "8" 26 | change_description = UpgradeSSLContextMinimumVersion.change_description 27 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_requests-verify.md: -------------------------------------------------------------------------------- 1 | This codemod checks that calls to the `requests` module API or the `httpx` library use `verify=True` or a path to a CA bundle to ensure TLS certificate validation. 2 | 3 | The [requests documentation](https://requests.readthedocs.io/en/latest/api/) warns that the `verify` flag 4 | > When set to False, requests will accept any TLS certificate presented by the server, and will ignore hostname mismatches and/or expired certificates, which will make your application vulnerable to man-in-the-middle (MitM) attacks. Setting verify to False may be useful during local development or testing. 5 | 6 | Similarly, setting `verify=False` when using the `httpx` library to make requests disables certificate verification. 7 | 8 | The changes from this codemod look like this: 9 | 10 | 11 | ```diff 12 | import requests 13 | 14 | - requests.get("www.google.com", ...,verify=False) 15 | + requests.get("www.google.com", ...,verify=True) 16 | ... 17 | import httpx 18 | 19 | - httpx.get("www.google.com", ...,verify=False) 20 | + httpx.get("www.google.com", ...,verify=True) 21 | 22 | ``` 23 | 24 | This codemod also checks other methods in the `requests` module and `httpx` library that accept a `verify` flag (e.g. `requests.post`, `httpx.AsyncClient`, etc.) 25 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_remove_assertion_in_pytest_raises.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.remove_assertion_in_pytest_raises import ( 3 | RemoveAssertionInPytestRaisesTransformer, 4 | ) 5 | from core_codemods.sonar.sonar_remove_assertion_in_pytest_raises import ( 6 | SonarRemoveAssertionInPytestRaises, 7 | ) 8 | 9 | 10 | class TestSonarRemoveAssertionInPytestRaises(SonarIntegrationTest): 11 | codemod = SonarRemoveAssertionInPytestRaises 12 | code_path = "tests/samples/remove_assertion_in_pytest_raises.py" 13 | replacement_lines = [ 14 | (6, """ assert 1\n"""), 15 | (7, """ assert 2\n"""), 16 | ] 17 | 18 | # fmt: off 19 | expected_diff = ( 20 | """--- \n""" 21 | """+++ \n""" 22 | """@@ -3,5 +3,5 @@\n""" 23 | """ def test_foo():\n""" 24 | """ with pytest.raises(ZeroDivisionError):\n""" 25 | """ error = 1/0\n""" 26 | """- assert 1\n""" 27 | """- assert 2\n""" 28 | """+ assert 1\n""" 29 | """+ assert 2\n""" 30 | ) 31 | # fmt: on 32 | 33 | expected_line_change = "4" 34 | change_description = RemoveAssertionInPytestRaisesTransformer.change_description 35 | num_changed_files = 1 36 | num_changes = 1 37 | -------------------------------------------------------------------------------- /src/codemodder/project_analysis/file_parsers/setup_cfg_file_parser.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | from pathlib import Path 3 | 4 | from codemodder.logging import logger 5 | from codemodder.project_analysis.file_parsers.package_store import ( 6 | FileType, 7 | PackageStore, 8 | ) 9 | 10 | from .base_parser import BaseParser 11 | 12 | 13 | class SetupCfgParser(BaseParser): 14 | @property 15 | def file_type(self): 16 | return FileType.SETUP_CFG 17 | 18 | def _parse_file(self, file: Path) -> PackageStore | None: 19 | config = configparser.ConfigParser() 20 | try: 21 | config.read(file) 22 | except configparser.ParsingError: 23 | logger.debug("Unable to parse setup.cfg file.") 24 | return None # pragma: no cover 25 | 26 | if "options" not in config: 27 | return None 28 | 29 | dependency_lines = config["options"].get("install_requires", "").split("\n") 30 | python_requires = config["options"].get("python_requires", "") 31 | 32 | return PackageStore( 33 | type=self.file_type, 34 | file=file, 35 | dependencies=set(line for line in dependency_lines if line), 36 | py_versions=[python_requires] if python_requires else [], 37 | ) 38 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_safe-lxml-parser-defaults.md: -------------------------------------------------------------------------------- 1 | This codemod configures safe parameter values when initializing `lxml.etree.XMLParser`, `lxml.etree.ETCompatXMLParser`, `lxml.etree.XMLTreeBuilder`, or `lxml.etree.XMLPullParser`. If parameters `resolve_entities`, `no_network`, and `dtd_validation` are not set to safe values, your code may be vulnerable to entity expansion attacks and external entity (XXE) attacks. 2 | 3 | Parameters `no_network` and `dtd_validation` have safe default values of `True` and `False`, respectively, so this codemod will set each to the default safe value if your code has assigned either to an unsafe value. 4 | 5 | Parameter `resolve_entities` has an unsafe default value of `True`. This codemod will set `resolve_entities=False` if set to `True` or omitted. 6 | 7 | The changes look as follows: 8 | 9 | ```diff 10 | import lxml.etree 11 | 12 | - parser = lxml.etree.XMLParser() 13 | - parser = lxml.etree.XMLParser(resolve_entities=True) 14 | - parser = lxml.etree.XMLParser(resolve_entities=True, no_network=False, dtd_validation=True) 15 | + parser = lxml.etree.XMLParser(resolve_entities=False) 16 | + parser = lxml.etree.XMLParser(resolve_entities=False) 17 | + parser = lxml.etree.XMLParser(resolve_entities=False, no_network=True, dtd_validation=False) 18 | ``` 19 | -------------------------------------------------------------------------------- /tests/codemods/sonar/test_sonar_literal_or_new_object_identity.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from codemodder.codemods.test import BaseSASTCodemodTest 4 | from core_codemods.sonar.sonar_literal_or_new_object_identity import ( 5 | SonarLiteralOrNewObjectIdentity, 6 | ) 7 | 8 | 9 | class TestSonarLiteralOrNewObjectIdentity(BaseSASTCodemodTest): 10 | codemod = SonarLiteralOrNewObjectIdentity 11 | tool = "sonar" 12 | 13 | def test_name(self): 14 | assert self.codemod.name == "literal-or-new-object-identity" 15 | 16 | def test_list(self, tmpdir): 17 | input_code = """ 18 | l is [1,2,3] 19 | """ 20 | expected = """ 21 | l == [1,2,3] 22 | """ 23 | issues = { 24 | "issues": [ 25 | { 26 | "rule": "python:S5796", 27 | "status": "OPEN", 28 | "component": "code.py", 29 | "textRange": { 30 | "startLine": 2, 31 | "endLine": 2, 32 | "startOffset": 2, 33 | "endOffset": 4, 34 | }, 35 | } 36 | ] 37 | } 38 | self.run_and_assert(tmpdir, input_code, expected, results=json.dumps(issues)) 39 | -------------------------------------------------------------------------------- /src/core_codemods/remove_module_global.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import libcst as cst 4 | from libcst.metadata import GlobalScope, ScopeProvider 5 | 6 | from codemodder.codemods.utils_mixin import NameResolutionMixin 7 | from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod 8 | 9 | 10 | class RemoveModuleGlobal(SimpleCodemod, NameResolutionMixin): 11 | metadata = Metadata( 12 | name="remove-module-global", 13 | summary="Remove `global` Usage at Module Level", 14 | review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW, 15 | references=[], 16 | ) 17 | change_description = "Remove `global` usage at module level." 18 | 19 | def leave_Global( 20 | self, 21 | original_node: cst.Global, 22 | updated_node: cst.Global, 23 | ) -> Union[ 24 | cst.Global, 25 | cst.RemovalSentinel, 26 | ]: 27 | if not self.filter_by_path_includes_or_excludes( 28 | self.node_position(original_node) 29 | ): 30 | return updated_node 31 | scope = self.get_metadata(ScopeProvider, original_node) 32 | if isinstance(scope, GlobalScope): 33 | self.report_change(original_node) 34 | return cst.RemovalSentinel.REMOVE 35 | return original_node 36 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_flask_json_response_type.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import SonarIntegrationTest 2 | from core_codemods.flask_json_response_type import FlaskJsonResponseTypeTransformer 3 | from core_codemods.sonar.sonar_flask_json_response_type import ( 4 | SonarFlaskJsonResponseType, 5 | ) 6 | 7 | 8 | class TestSonarFlaskJsonResponseType(SonarIntegrationTest): 9 | codemod = SonarFlaskJsonResponseType 10 | code_path = "tests/samples/flask_json_response_type.py" 11 | replacement_lines = [ 12 | ( 13 | 9, 14 | """ return make_response(json_response, {'Content-Type': 'application/json'})\n""", 15 | ), 16 | ] 17 | 18 | # fmt: off 19 | expected_diff = ( 20 | """--- \n""" 21 | """+++ \n""" 22 | """@@ -6,4 +6,4 @@\n""" 23 | """ @app.route("/test")\n""" 24 | """ def foo(request):\n""" 25 | """ json_response = json.dumps({ "user_input": request.GET.get("input") })\n""" 26 | """- return make_response(json_response)\n""" 27 | """+ return make_response(json_response, {'Content-Type': 'application/json'})\n""" 28 | ) 29 | # fmt: on 30 | 31 | expected_line_change = "9" 32 | change_description = FlaskJsonResponseTypeTransformer.change_description 33 | num_changed_files = 1 34 | -------------------------------------------------------------------------------- /src/codemodder/utils/update_finding_metadata.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | 3 | import typing 4 | 5 | if typing.TYPE_CHECKING: 6 | from codemodder.codemods.base_codemod import ToolRule 7 | 8 | from codemodder.codetf import Change, ChangeSet 9 | 10 | 11 | def update_finding_metadata( 12 | tool_rules: list[ToolRule], 13 | changesets: list[ChangeSet], 14 | ) -> list[ChangeSet]: 15 | if not (tool_rule_map := {rule.id: (rule.name, rule.url) for rule in tool_rules}): 16 | return changesets 17 | 18 | new_changesets: list[ChangeSet] = [] 19 | for changeset in changesets: 20 | new_changes: list[Change] = [] 21 | for change in changeset.changes: 22 | new_changes.append( 23 | change.with_findings( 24 | [ 25 | ( 26 | finding.with_rule(*tool_rule_map[finding.rule.id]) 27 | if finding.rule.id in tool_rule_map 28 | else finding 29 | ) 30 | for finding in change.fixedFindings or [] 31 | ] 32 | or None 33 | ) 34 | ) 35 | new_changesets.append(changeset.with_changes(new_changes)) 36 | 37 | return new_changesets 38 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_harden-pickle-load.md: -------------------------------------------------------------------------------- 1 | Python's `pickle` module is notoriouly insecure. While it is very useful for serializing and deserializing Python objects, it is not safe to use `pickle` to load data from untrusted sources. This is because `pickle` can execute arbitrary code when loading data. This can be exploited by an attacker to execute arbitrary code on your system. Unlike `yaml` there is no concept of a "safe" loader in `pickle`. Therefore, it is recommended to avoid `pickle` and to use a different serialization format such as `json` or `yaml` when working with untrusted data. 2 | 3 | However, if you must use `pickle` to load data from an untrusted source, we recommend using the open-source `fickling` library. `fickling` is a drop-in replacement for `pickle` that validates the data before loading it and checks for the possibility of code execution. This makes it much safer (although still not entirely safe) to use `pickle` to load data from untrusted sources. 4 | 5 | This codemod replaces calls to `pickle.load` with `fickling.load` in Python code. It also adds an import statement for `fickling` if it is not already present. 6 | 7 | The changes look like the following: 8 | ```diff 9 | - import pickle 10 | + import fickling 11 | 12 | - data = pickle.load(file) 13 | + data = fickling.load(file) 14 | ``` 15 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_harden-pyyaml.md: -------------------------------------------------------------------------------- 1 | The default loaders in PyYAML are not safe to use with untrusted data. They potentially make your application vulnerable to arbitrary code execution attacks. If you open a YAML file from an untrusted source, and the file is loaded with the default loader, an attacker could execute arbitrary code on your machine. 2 | 3 | This codemod hardens all [`yaml.load()`](https://pyyaml.org/wiki/PyYAMLDocumentation) calls against such attacks by replacing the default loader with `yaml.SafeLoader`. This is the recommended loader for loading untrusted data. For most use cases it functions as a drop-in replacement for the default loader. 4 | 5 | Calling `yaml.load()` without an explicit loader argument is equivalent to calling it with `Loader=yaml.Loader`, which is unsafe. This usage [has been deprecated](https://github.com/yaml/pyyaml/wiki/PyYAML-yaml.load(input\)-Deprecation) since PyYAML 5.1. This codemod will add an explicit `SafeLoader` argument to all `yaml.load()` calls that don't use an explicit loader. 6 | 7 | The changes from this codemod look like the following: 8 | ```diff 9 | import yaml 10 | data = b'!!python/object/apply:subprocess.Popen \\n- ls' 11 | - deserialized_data = yaml.load(data, yaml.Loader) 12 | + deserialized_data = yaml.load(data, Loader=yaml.SafeLoader) 13 | ``` 14 | -------------------------------------------------------------------------------- /tests/codemods/sonar/test_sonar_numpy_nan_equality.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from codemodder.codemods.test import BaseSASTCodemodTest 4 | from core_codemods.sonar.sonar_numpy_nan_equality import SonarNumpyNanEquality 5 | 6 | 7 | class TestSonarNumpyNanEquality(BaseSASTCodemodTest): 8 | codemod = SonarNumpyNanEquality 9 | tool = "sonar" 10 | 11 | def test_name(self): 12 | assert self.codemod.name == "numpy-nan-equality" 13 | 14 | def test_simple(self, tmpdir): 15 | input_code = """ 16 | import numpy 17 | if a == numpy.nan: 18 | pass 19 | """ 20 | expected = """ 21 | import numpy 22 | if numpy.isnan(a): 23 | pass 24 | """ 25 | issues = { 26 | "issues": [ 27 | { 28 | "rule": "python:S6725", 29 | "status": "OPEN", 30 | "component": "code.py", 31 | "textRange": { 32 | "startLine": 3, 33 | "endLine": 3, 34 | "startOffset": 3, 35 | "endOffset": 17, 36 | }, 37 | } 38 | ] 39 | } 40 | self.run_and_assert(tmpdir, input_code, expected, results=json.dumps(issues)) 41 | -------------------------------------------------------------------------------- /tests/codemods/sonar/test_sonar_break_or_continue_out_of_loop.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from codemodder.codemods.test import BaseSASTCodemodTest 4 | from core_codemods.sonar.sonar_break_or_continue_out_of_loop import ( 5 | SonarBreakOrContinueOutOfLoop, 6 | ) 7 | 8 | 9 | class TestSonarSQLParameterization(BaseSASTCodemodTest): 10 | codemod = SonarBreakOrContinueOutOfLoop 11 | tool = "sonar" 12 | 13 | def test_name(self): 14 | assert self.codemod.name == "break-or-continue-out-of-loop" 15 | 16 | def test_simple(self, tmpdir): 17 | input_code = """\ 18 | def f(): 19 | continue 20 | """ 21 | expected = """\ 22 | def f(): 23 | pass 24 | """ 25 | issues = { 26 | "issues": [ 27 | { 28 | "rule": "python:S1716", 29 | "status": "OPEN", 30 | "component": "code.py", 31 | "textRange": { 32 | "startLine": 2, 33 | "endLine": 2, 34 | "startOffset": 4, 35 | "endOffset": 12, 36 | }, 37 | } 38 | ] 39 | } 40 | self.run_and_assert(tmpdir, input_code, expected, results=json.dumps(issues)) 41 | -------------------------------------------------------------------------------- /integration_tests/test_remove_assertion_in_pytest_raises.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.remove_assertion_in_pytest_raises import ( 3 | RemoveAssertionInPytestRaises, 4 | RemoveAssertionInPytestRaisesTransformer, 5 | ) 6 | 7 | 8 | class TestRemoveAssertionInPytestRaises(BaseIntegrationTest): 9 | codemod = RemoveAssertionInPytestRaises 10 | original_code = """ 11 | import pytest 12 | 13 | def test_foo(): 14 | with pytest.raises(ZeroDivisionError): 15 | error = 1/0 16 | assert 1 17 | assert 2 18 | """ 19 | replacement_lines = [ 20 | (6, """ assert 1\n"""), 21 | (7, """ assert 2\n"""), 22 | ] 23 | 24 | # fmt: off 25 | expected_diff = ( 26 | """--- \n""" 27 | """+++ \n""" 28 | """@@ -3,5 +3,5 @@\n""" 29 | """ def test_foo():\n""" 30 | """ with pytest.raises(ZeroDivisionError):\n""" 31 | """ error = 1/0\n""" 32 | """- assert 1\n""" 33 | """- assert 2\n""" 34 | """+ assert 1\n""" 35 | """+ assert 2\n""" 36 | ) 37 | # fmt: on 38 | 39 | expected_line_change = "4" 40 | change_description = RemoveAssertionInPytestRaisesTransformer.change_description 41 | num_changed_files = 1 42 | num_changes = 1 43 | -------------------------------------------------------------------------------- /src/codemodder/codemods/test/validations.py: -------------------------------------------------------------------------------- 1 | import importlib.util 2 | import tempfile 3 | from types import ModuleType 4 | from typing import Optional 5 | 6 | 7 | def execute_code(*, path=None, code=None, allowed_exceptions=None): 8 | """ 9 | Ensure that code written in `path` or in `code` str is executable. 10 | """ 11 | assert (path is None) != ( 12 | code is None 13 | ), "Must pass either path to code or code as a str." 14 | 15 | if path: 16 | return _run_code(path, allowed_exceptions) 17 | with tempfile.NamedTemporaryFile(suffix=".py", mode="w+t") as temp: 18 | temp.write(code) 19 | return _run_code(temp.name, allowed_exceptions) 20 | 21 | 22 | def _run_code(path, allowed_exceptions=None) -> Optional[ModuleType]: 23 | """ 24 | Execute the code in `path` in its own namespace. 25 | Return loaded module for any additional testing later on. 26 | """ 27 | allowed_exceptions = allowed_exceptions or () 28 | 29 | if not (spec := importlib.util.spec_from_file_location("output_code", path)): 30 | return None 31 | 32 | module = importlib.util.module_from_spec(spec) 33 | if not spec.loader: 34 | return None 35 | try: 36 | spec.loader.exec_module(module) 37 | except allowed_exceptions: 38 | pass 39 | 40 | return module 41 | -------------------------------------------------------------------------------- /integration_tests/test_file_resource_leak.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.file_resource_leak import ( 3 | FileResourceLeak, 4 | FileResourceLeakTransformer, 5 | ) 6 | 7 | 8 | class TestFileResourceLeak(BaseIntegrationTest): 9 | codemod = FileResourceLeak 10 | original_code = """ 11 | import tempfile 12 | path = tempfile.NamedTemporaryFile().name 13 | file = open(path, 'w', encoding='utf-8') 14 | pass 15 | file.write('Hello World') 16 | """ 17 | replacement_lines = [ 18 | (3, """with open(path, 'w', encoding='utf-8') as file:\n"""), 19 | (4, """ pass\n"""), 20 | (5, """ file.write('Hello World')\n"""), 21 | ] 22 | # fmt: off 23 | expected_diff = ( 24 | """--- \n""" 25 | """+++ \n""" 26 | """@@ -1,5 +1,5 @@\n""" 27 | """ import tempfile\n""" 28 | """ path = tempfile.NamedTemporaryFile().name\n""" 29 | """-file = open(path, 'w', encoding='utf-8')\n""" 30 | """-pass\n""" 31 | """-file.write('Hello World')\n""" 32 | """+with open(path, 'w', encoding='utf-8') as file:\n""" 33 | """+ pass\n""" 34 | """+ file.write('Hello World')\n""") 35 | # fmt: on 36 | 37 | expected_line_change = "3" 38 | change_description = FileResourceLeakTransformer.change_description 39 | num_changed_files = 1 40 | -------------------------------------------------------------------------------- /src/codemodder/project_analysis/file_parsers/base_parser.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from pathlib import Path 3 | from typing import List 4 | 5 | from codemodder.logging import logger 6 | 7 | from .package_store import FileType, PackageStore 8 | 9 | 10 | class BaseParser(ABC): 11 | parent_directory: Path 12 | 13 | def __init__(self, parent_directory: Path): 14 | self.parent_directory = parent_directory 15 | 16 | @property 17 | @abstractmethod 18 | def file_type(self) -> FileType: 19 | pass 20 | 21 | @abstractmethod 22 | def _parse_file(self, file: Path) -> PackageStore | None: 23 | pass 24 | 25 | def find_file_locations(self) -> List[Path]: 26 | return list(Path(self.parent_directory).rglob(self.file_type.value)) 27 | 28 | def parse(self) -> list[PackageStore]: 29 | """ 30 | Find 0 or more project config or dependency files within a project repo. 31 | """ 32 | stores = [] 33 | req_files = self.find_file_locations() 34 | for file in req_files: 35 | try: 36 | store = self._parse_file(file) 37 | except Exception as e: 38 | logger.debug("Error parsing file: %s", file, exc_info=e) 39 | continue 40 | 41 | if store: 42 | stores.append(store) 43 | return stores 44 | -------------------------------------------------------------------------------- /integration_tests/test_use_walrus_if.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.use_walrus_if import UseWalrusIf 3 | 4 | 5 | class TestUseWalrusIf(BaseIntegrationTest): 6 | codemod = UseWalrusIf 7 | 8 | original_code = """ 9 | x = sum([1, 2]) 10 | if x is not None: 11 | print(x) 12 | 13 | y = max([1, 2]) 14 | if y: 15 | print(y) 16 | 17 | z = min([1, 2]) 18 | print(z) 19 | 20 | 21 | def whatever(): 22 | b = int("2") 23 | if b == 10: 24 | print(b) 25 | """ 26 | expected_new_code = """ 27 | if (x := sum([1, 2])) is not None: 28 | print(x) 29 | 30 | if y := max([1, 2]): 31 | print(y) 32 | 33 | z = min([1, 2]) 34 | print(z) 35 | 36 | 37 | def whatever(): 38 | if (b := int("2")) == 10: 39 | print(b) 40 | """ 41 | 42 | expected_diff = '--- \n+++ \n@@ -1,9 +1,7 @@\n-x = sum([1, 2])\n-if x is not None:\n+if (x := sum([1, 2])) is not None:\n print(x)\n \n-y = max([1, 2])\n-if y:\n+if y := max([1, 2]):\n print(y)\n \n z = min([1, 2])\n@@ -11,6 +9,5 @@\n \n \n def whatever():\n- b = int("2")\n- if b == 10:\n+ if (b := int("2")) == 10:\n print(b)' 43 | num_changes = 3 44 | expected_line_change = 1 45 | change_description = UseWalrusIf.change_description 46 | -------------------------------------------------------------------------------- /integration_tests/test_django_json_response_type.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.django_json_response_type import ( 3 | DjangoJsonResponseType, 4 | DjangoJsonResponseTypeTransformer, 5 | ) 6 | 7 | 8 | class TestDjangoJsonResponseType(BaseIntegrationTest): 9 | codemod = DjangoJsonResponseType 10 | original_code = """ 11 | from django.http import HttpResponse 12 | import json 13 | 14 | def foo(request): 15 | json_response = json.dumps({ "user_input": request.GET.get("input") }) 16 | return HttpResponse(json_response) 17 | """ 18 | replacement_lines = [ 19 | ( 20 | 6, 21 | """ return HttpResponse(json_response, content_type="application/json")\n""", 22 | ), 23 | ] 24 | 25 | # fmt: off 26 | expected_diff = ( 27 | """--- \n""" 28 | """+++ \n""" 29 | """@@ -3,4 +3,4 @@\n""" 30 | """ \n""" 31 | """ def foo(request):\n""" 32 | """ json_response = json.dumps({ "user_input": request.GET.get("input") })\n""" 33 | """- return HttpResponse(json_response)\n""" 34 | """+ return HttpResponse(json_response, content_type="application/json")\n""" 35 | ) 36 | # fmt: on 37 | 38 | expected_line_change = "6" 39 | change_description = DjangoJsonResponseTypeTransformer.change_description 40 | num_changed_files = 1 41 | -------------------------------------------------------------------------------- /tests/codemods/test_use_set_literal.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseCodemodTest 2 | from core_codemods.use_set_literal import UseSetLiteral 3 | 4 | 5 | class TestUseSetLiteral(BaseCodemodTest): 6 | codemod = UseSetLiteral 7 | 8 | def test_simple(self, tmpdir): 9 | original_code = """ 10 | x = set([1, 2, 3]) 11 | """ 12 | expected_code = """ 13 | x = {1, 2, 3} 14 | """ 15 | self.run_and_assert(tmpdir, original_code, expected_code) 16 | 17 | def test_empty_list(self, tmpdir): 18 | original_code = """ 19 | x = set([]) 20 | """ 21 | expected_code = """ 22 | x = set() 23 | """ 24 | self.run_and_assert(tmpdir, original_code, expected_code) 25 | 26 | def test_already_empty(self, tmpdir): 27 | original_code = """ 28 | x = set() 29 | """ 30 | self.run_and_assert(tmpdir, original_code, original_code) 31 | 32 | def test_not_builtin(self, tmpdir): 33 | original_code = """ 34 | from whatever import set 35 | x = set([1, 2, 3]) 36 | """ 37 | self.run_and_assert(tmpdir, original_code, original_code) 38 | 39 | def test_not_list_literal(self, tmpdir): 40 | original_code = """ 41 | x = set(some_previously_defined_list) 42 | """ 43 | self.run_and_assert(tmpdir, original_code, original_code) 44 | -------------------------------------------------------------------------------- /integration_tests/test_django_receiver_on_top.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.django_receiver_on_top import ( 3 | DjangoReceiverOnTop, 4 | DjangoReceiverOnTopTransformer, 5 | ) 6 | 7 | 8 | class TestDjangoReceiverOnTop(BaseIntegrationTest): 9 | codemod = DjangoReceiverOnTop 10 | original_code = """ 11 | from django.dispatch import receiver 12 | from django.views.decorators.csrf import csrf_exempt 13 | from django.core.signals import request_finished 14 | 15 | @csrf_exempt 16 | @receiver(request_finished) 17 | def foo(): 18 | pass 19 | """ 20 | replacement_lines = [ 21 | (5, """@receiver(request_finished)\n"""), 22 | (6, """@csrf_exempt\n"""), 23 | ] 24 | 25 | # fmt: off 26 | expected_diff = ( 27 | """--- \n""" 28 | """+++ \n""" 29 | """@@ -2,7 +2,7 @@\n""" 30 | """ from django.views.decorators.csrf import csrf_exempt\n""" 31 | """ from django.core.signals import request_finished\n""" 32 | """ \n""" 33 | """+@receiver(request_finished)\n""" 34 | """ @csrf_exempt\n""" 35 | """-@receiver(request_finished)\n""" 36 | """ def foo():\n""" 37 | """ pass\n""" 38 | ) 39 | # fmt: on 40 | 41 | expected_line_change = "6" 42 | change_description = DjangoReceiverOnTopTransformer.change_description 43 | num_changed_files = 1 44 | num_changes = 2 45 | -------------------------------------------------------------------------------- /integration_tests/test_fix_task_instantiation.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_async_task_instantiation import FixAsyncTaskInstantiation 3 | 4 | 5 | class TestFixAsyncTaskInstantiation(BaseIntegrationTest): 6 | codemod = FixAsyncTaskInstantiation 7 | original_code = """ 8 | import asyncio 9 | 10 | async def my_coroutine(): 11 | await asyncio.sleep(1) 12 | print("Task completed") 13 | 14 | async def main(): 15 | task = asyncio.Task(my_coroutine(), name="my task") 16 | await task 17 | 18 | asyncio.run(main()) 19 | """ 20 | replacement_lines = [ 21 | ( 22 | 8, 23 | """ task = asyncio.create_task(my_coroutine(), name="my task")\n""", 24 | ), 25 | ] 26 | # fmt: off 27 | expected_diff = ( 28 | """--- \n""" 29 | """+++ \n""" 30 | """@@ -5,7 +5,7 @@\n""" 31 | """ print("Task completed")\n""" 32 | """ \n""" 33 | """ async def main():\n""" 34 | """- task = asyncio.Task(my_coroutine(), name="my task")\n""" 35 | """+ task = asyncio.create_task(my_coroutine(), name="my task")\n""" 36 | """ await task\n""" 37 | """ \n""" 38 | """ asyncio.run(main())\n""" 39 | ) 40 | # fmt: on 41 | 42 | expected_line_change = "8" 43 | change_description = FixAsyncTaskInstantiation.change_description 44 | num_changed_files = 1 45 | -------------------------------------------------------------------------------- /tests/test_semgrep.py: -------------------------------------------------------------------------------- 1 | from pathlib import Path 2 | 3 | import pytest 4 | from sarif_pydantic import Sarif 5 | 6 | from codemodder.codemods.semgrep import SemgrepSarifFileDetector 7 | from codemodder.context import CodemodExecutionContext 8 | from codemodder.semgrep import SemgrepResultSet, SemgrepSarifToolDetector 9 | 10 | SAMPLE_DATA_PATH = Path(__file__).parent / "samples" 11 | 12 | 13 | @pytest.mark.parametrize( 14 | "filename, expected", 15 | [ 16 | ("pygoat.semgrep.sarif.json", True), 17 | ("webgoat_v8.2.0_codeql.sarif", False), 18 | ], 19 | ) 20 | def test_semgrep_sarif_tool_detector(filename, expected): 21 | detector = SemgrepSarifToolDetector() 22 | sarif_path = SAMPLE_DATA_PATH / filename 23 | data = Sarif.model_validate_json(sarif_path.read_text()) 24 | assert detector.detect(data.runs[0]) is expected 25 | 26 | 27 | def test_semgrep_sarif_codemode_detector(mocker): 28 | detector = SemgrepSarifFileDetector() 29 | 30 | context = mocker.MagicMock(spec=CodemodExecutionContext) 31 | context.tool_result_files_map = { 32 | "semgrep": [SAMPLE_DATA_PATH / "pygoat.semgrep.sarif.json"] 33 | } 34 | results = detector.apply(codemod_id="foo", context=context) 35 | assert isinstance(results, SemgrepResultSet) 36 | assert len(results) == 25 37 | assert ( 38 | "python.django.security.audit.secure-cookies.django-secure-set-cookie" 39 | in results 40 | ) 41 | -------------------------------------------------------------------------------- /tests/test_registry.py: -------------------------------------------------------------------------------- 1 | from codemodder.registry import ( 2 | CodemodCollection, 3 | CodemodRegistry, 4 | load_registered_codemods, 5 | ) 6 | 7 | 8 | def test_default_extensions(mocker): 9 | registry = CodemodRegistry() 10 | assert registry.default_include_paths == [] 11 | 12 | CodemodA = mocker.MagicMock(default_extensions=[".py"]) 13 | CodemodB = mocker.MagicMock(default_extensions=[".py", ".txt"]) 14 | 15 | registry.add_codemod_collection( 16 | CodemodCollection(origin="origin", codemods=[CodemodA, CodemodB]) 17 | ) 18 | 19 | assert sorted(registry.default_include_paths) == [ 20 | "**/*.py", 21 | "**/*.txt", 22 | "*.py", 23 | "*.txt", 24 | ] 25 | 26 | 27 | def test_codemods_by_tool(mocker): 28 | registry = CodemodRegistry() 29 | assert not registry._codemods_by_tool 30 | 31 | CodemodA = mocker.MagicMock() 32 | CodemodB = mocker.MagicMock() 33 | 34 | registry.add_codemod_collection( 35 | CodemodCollection(origin="origin", codemods=[CodemodA, CodemodB]) 36 | ) 37 | 38 | assert len(registry.codemods_by_tool("origin")) == 2 39 | 40 | 41 | def test_current_codemods_by_tool(): 42 | codemod_registry = load_registered_codemods() 43 | assert len(codemod_registry.codemods_by_tool("sonar")) > 0 44 | assert len(codemod_registry.codemods_by_tool("semgrep")) > 0 45 | assert len(codemod_registry.codemods_by_tool("pixee")) > 0 46 | -------------------------------------------------------------------------------- /src/core_codemods/docs/pixee_python_disable-graphql-introspection.md: -------------------------------------------------------------------------------- 1 | Introspection allows a client to query the schema of a GraphQL API. Allowing introspection in production code may allow a malicious user to gather information about data types and operations for a potential attack. 2 | 3 | Introspection is often enabled by default in GraphQL without authentication. This codemod disables introspection altogether at the view level by introducing a validation rule. The required rules may be dependent on the framework that you are using. Please check your framework documentation for specific rules for disabling introspection. 4 | 5 | Our changes look something like this: 6 | ```diff 7 | from graphql_server.flask.views import GraphQLView 8 | from flask import Flask 9 | from graphql import ( 10 | GraphQLSchema, GraphQLObjectType, GraphQLField, GraphQLString) 11 | +from graphql.validation import NoSchemaIntrospectionCustomRule 12 | 13 | schema = GraphQLSchema( 14 | query=GraphQLObjectType( 15 | name='RootQueryType', 16 | fields={ 17 | 'hello': GraphQLField( 18 | GraphQLString, 19 | resolve=lambda obj, info: 'world') 20 | })) 21 | 22 | app = Flask(__name__) 23 | 24 | app.add_url_rule("/api", 25 | view_func=GraphQLView.as_view( # Noncompliant 26 | name="api", 27 | schema=schema, 28 | + validation_rules = [NoSchemaIntrospectionCustomRule] 29 | ), 30 | ) 31 | ``` 32 | -------------------------------------------------------------------------------- /tests/codemods/sonar/test_sonar_fix_float_equality.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from codemodder.codemods.test import BaseSASTCodemodTest 4 | from core_codemods.sonar.sonar_fix_float_equality import SonarFixFloatEquality 5 | 6 | 7 | class TestSonarFixFloatEquality(BaseSASTCodemodTest): 8 | codemod = SonarFixFloatEquality 9 | tool = "sonar" 10 | 11 | def test_name(self): 12 | assert self.codemod.name == "fix-float-equality" 13 | 14 | def test_simple(self, tmpdir): 15 | input_code = """ 16 | def foo(a, b): 17 | return a == b - 0.1 18 | """ 19 | expected_output = """ 20 | import math 21 | 22 | def foo(a, b): 23 | return math.isclose(a, b - 0.1, rel_tol=1e-09, abs_tol=0.0) 24 | """ 25 | issues = { 26 | "issues": [ 27 | { 28 | "rule": "python:S1244", 29 | "status": "OPEN", 30 | "component": "code.py", 31 | "textRange": { 32 | "startLine": 3, 33 | "endLine": 3, 34 | "startOffset": 11, 35 | "endOffset": 23, 36 | }, 37 | } 38 | ] 39 | } 40 | self.run_and_assert( 41 | tmpdir, 42 | input_code, 43 | expected_output, 44 | results=json.dumps(issues), 45 | ) 46 | -------------------------------------------------------------------------------- /src/core_codemods/limit_readline.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | 3 | from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod 4 | 5 | default_limit = "5_000_000" 6 | 7 | 8 | class LimitReadline(SimpleCodemod): 9 | metadata = Metadata( 10 | name="limit-readline", 11 | summary="Limit readline()", 12 | review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, 13 | references=[ 14 | Reference(url="https://cwe.mitre.org/data/definitions/400"), 15 | ], 16 | ) 17 | change_description = "Adds a size limit argument to readline() calls." 18 | detector_pattern = """ 19 | rules: 20 | - id: limit-readline 21 | mode: taint 22 | pattern-sources: 23 | - pattern-either: 24 | - patterns: 25 | - pattern: io.StringIO(...) 26 | - pattern-inside: | 27 | import io 28 | ... 29 | - patterns: 30 | - pattern: io.BytesIO(...) 31 | - pattern-inside: | 32 | import io 33 | ... 34 | - pattern: open(...) 35 | pattern-sinks: 36 | - pattern: $SINK.readline() 37 | """ 38 | 39 | def on_result_found(self, _, updated_node): 40 | return self.update_arg_target(updated_node, [cst.Integer(default_limit)]) 41 | -------------------------------------------------------------------------------- /integration_tests/test_request_verify.py: -------------------------------------------------------------------------------- 1 | from requests import exceptions 2 | 3 | from codemodder.codemods.test.integration_utils import BaseRemediationIntegrationTest 4 | from core_codemods.requests_verify import RequestsVerify 5 | 6 | 7 | class TestRequestsVerify(BaseRemediationIntegrationTest): 8 | codemod = RequestsVerify 9 | original_code = """ 10 | import requests 11 | 12 | requests.get("https://www.google.com", verify=False) 13 | requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=False) 14 | var = "hello" 15 | """ 16 | 17 | expected_diff_per_change = [ 18 | '--- \n+++ \n@@ -1,5 +1,5 @@\n import requests\n \n-requests.get("https://www.google.com", verify=False)\n+requests.get("https://www.google.com", verify=True)\n requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=False)\n var = "hello"', 19 | '--- \n+++ \n@@ -1,5 +1,5 @@\n import requests\n \n requests.get("https://www.google.com", verify=False)\n-requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=False)\n+requests.post("https://some-api/", json={"id": 1234, "price": 18}, verify=True)\n var = "hello"', 20 | ] 21 | 22 | expected_lines_changed = [3, 4] 23 | num_changes = 2 24 | change_description = RequestsVerify.change_description 25 | # expected because when executing the output code it will make a request which fails, which is OK. 26 | allowed_exceptions = (exceptions.ConnectionError,) 27 | -------------------------------------------------------------------------------- /src/core_codemods/use_set_literal.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | 3 | from codemodder.codemods.utils_mixin import NameResolutionMixin 4 | from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod 5 | 6 | 7 | class UseSetLiteral(SimpleCodemod, NameResolutionMixin): 8 | metadata = Metadata( 9 | name="use-set-literal", 10 | summary="Use Set Literals Instead of Sets from Lists", 11 | review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW, 12 | references=[], 13 | ) 14 | change_description = "Replace sets from lists with set literals" 15 | 16 | def leave_Call(self, original_node: cst.Call, updated_node: cst.Call): 17 | if not self.filter_by_path_includes_or_excludes( 18 | self.node_position(original_node) 19 | ): 20 | return updated_node 21 | 22 | match original_node.func: 23 | case cst.Name("set"): 24 | if self.is_builtin_function(original_node): 25 | match original_node.args: 26 | case [cst.Arg(value=cst.List(elements=elements))]: 27 | self.report_change(original_node) 28 | 29 | # Can't use set literal for empty set 30 | if len(elements) == 0: 31 | return updated_node.with_changes(args=[]) 32 | 33 | return cst.Set(elements=elements) 34 | 35 | return updated_node 36 | -------------------------------------------------------------------------------- /integration_tests/test_fix_missing_self_or_cls.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_missing_self_or_cls import ( 3 | FixMissingSelfOrCls, 4 | FixMissingSelfOrClsTransformer, 5 | ) 6 | 7 | 8 | class TestFixMissingSelfOrCls(BaseIntegrationTest): 9 | codemod = FixMissingSelfOrCls 10 | original_code = """ 11 | class MyClass: 12 | def instance_method(): 13 | print("instance_method") 14 | 15 | @classmethod 16 | def class_method(): 17 | print("class_method") 18 | """ 19 | replacement_lines = [ 20 | ( 21 | 2, 22 | """ def instance_method(self):\n""", 23 | ), 24 | ( 25 | 6, 26 | """ def class_method(cls):\n""", 27 | ), 28 | ] 29 | # fmt: off 30 | expected_diff = ( 31 | """--- \n""" 32 | """+++ \n""" 33 | """@@ -1,7 +1,7 @@\n""" 34 | """ class MyClass:\n""" 35 | """- def instance_method():\n""" 36 | """+ def instance_method(self):\n""" 37 | """ print("instance_method")\n""" 38 | """ \n""" 39 | """ @classmethod\n""" 40 | """- def class_method():\n""" 41 | """+ def class_method(cls):\n""" 42 | """ print("class_method")\n""" 43 | ) 44 | # fmt: on 45 | 46 | expected_line_change = "2" 47 | change_description = FixMissingSelfOrClsTransformer.change_description 48 | num_changes = 2 49 | -------------------------------------------------------------------------------- /integration_tests/test_flask_json_response_type.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.flask_json_response_type import ( 3 | FlaskJsonResponseType, 4 | FlaskJsonResponseTypeTransformer, 5 | ) 6 | 7 | 8 | class TestFlaskJsonResponseType(BaseIntegrationTest): 9 | codemod = FlaskJsonResponseType 10 | original_code = """ 11 | from flask import make_response, Flask 12 | import json 13 | 14 | app = Flask(__name__) 15 | 16 | @app.route("/test") 17 | def foo(request): 18 | json_response = json.dumps({ "user_input": request.GET.get("input") }) 19 | return make_response(json_response) 20 | """ 21 | replacement_lines = [ 22 | ( 23 | 9, 24 | """ return make_response(json_response, {'Content-Type': 'application/json'})\n""", 25 | ), 26 | ] 27 | # fmt: off 28 | expected_diff = ( 29 | """--- \n""" 30 | """+++ \n""" 31 | """@@ -6,4 +6,4 @@\n""" 32 | """ @app.route("/test")\n""" 33 | """ def foo(request):\n""" 34 | """ json_response = json.dumps({ "user_input": request.GET.get("input") })\n""" 35 | """- return make_response(json_response)\n""" 36 | """+ return make_response(json_response, {'Content-Type': 'application/json'})\n""" 37 | ) 38 | # fmt: on 39 | 40 | expected_line_change = "9" 41 | change_description = FlaskJsonResponseTypeTransformer.change_description 42 | num_changed_files = 1 43 | -------------------------------------------------------------------------------- /src/core_codemods/combine_startswith_endswith.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | from libcst import matchers as m 3 | 4 | from core_codemods.api import Metadata, ReviewGuidance 5 | 6 | from .combine_calls_base import CombineCallsBaseCodemod 7 | 8 | 9 | class CombineStartswithEndswith(CombineCallsBaseCodemod): 10 | metadata = Metadata( 11 | name="combine-startswith-endswith", 12 | summary="Simplify Boolean Expressions Using `startswith` and `endswith`", 13 | review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW, 14 | references=[], 15 | ) 16 | change_description = "Use tuple of matches instead of boolean expression" 17 | 18 | combinable_funcs = ["startswith", "endswith"] 19 | dedupilcation_attr = "evaluated_value" 20 | args_to_combine = [0] 21 | args_to_keep_as_is = [] 22 | 23 | def make_call_matcher(self, func_name: str) -> m.Call: 24 | return m.Call( 25 | func=m.Attribute(value=m.Name(), attr=m.Name(func_name)), 26 | args=[ 27 | m.Arg( 28 | value=m.Tuple() 29 | | m.SimpleString() 30 | | m.ConcatenatedString() 31 | | m.FormattedString() 32 | | m.Name() 33 | ) 34 | ], 35 | ) 36 | 37 | def check_calls_same_instance( 38 | self, left_call: cst.Call, right_call: cst.Call 39 | ) -> bool: 40 | return left_call.func.value.value == right_call.func.value.value 41 | -------------------------------------------------------------------------------- /integration_tests/test_fix_mutable_params.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseIntegrationTest 2 | from core_codemods.fix_mutable_params import ( 3 | FixMutableParams, 4 | FixMutableParamsTransformer, 5 | ) 6 | 7 | 8 | class TestFixMutableParams(BaseIntegrationTest): 9 | codemod = FixMutableParams 10 | original_code = """ 11 | def foo(x, y=[]): 12 | y.append(x) 13 | print(y) 14 | 15 | 16 | def bar(x="hello"): 17 | print(x) 18 | 19 | 20 | def baz(x={"foo": 42}, y=set()): 21 | print(x) 22 | print(y) 23 | """ 24 | expected_new_code = """ 25 | def foo(x, y=None): 26 | y = [] if y is None else y 27 | y.append(x) 28 | print(y) 29 | 30 | 31 | def bar(x="hello"): 32 | print(x) 33 | 34 | 35 | def baz(x=None, y=None): 36 | x = {"foo": 42} if x is None else x 37 | y = set() if y is None else y 38 | print(x) 39 | print(y) 40 | """ 41 | 42 | expected_diff = '--- \n+++ \n@@ -1,4 +1,5 @@\n-def foo(x, y=[]):\n+def foo(x, y=None):\n+ y = [] if y is None else y\n y.append(x)\n print(y)\n \n@@ -7,6 +8,8 @@\n print(x)\n \n \n-def baz(x={"foo": 42}, y=set()):\n+def baz(x=None, y=None):\n+ x = {"foo": 42} if x is None else x\n+ y = set() if y is None else y\n print(x)\n print(y)' 43 | expected_line_change = 1 44 | num_changes = 2 45 | change_description = FixMutableParamsTransformer.change_description 46 | -------------------------------------------------------------------------------- /tests/codemods/test_django_debug_flag_on.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseDjangoCodemodTest 2 | from core_codemods.django_debug_flag_on import DjangoDebugFlagOn 3 | 4 | 5 | class TestDjangoDebugFlagOn(BaseDjangoCodemodTest): 6 | codemod = DjangoDebugFlagOn 7 | 8 | def test_name(self): 9 | assert self.codemod.name == "django-debug-flag-on" 10 | 11 | def test_settings_dot_py(self, tmpdir): 12 | django_root, settings_folder = self.create_dir_structure(tmpdir) 13 | (django_root / "manage.py").touch() 14 | file_path = settings_folder / "settings.py" 15 | input_code = """DEBUG = True""" 16 | expected = """DEBUG = False""" 17 | self.run_and_assert_filepath(django_root, file_path, input_code, expected) 18 | 19 | def test_not_settings_dot_py(self, tmpdir): 20 | django_root, settings_folder = self.create_dir_structure(tmpdir) 21 | (django_root / "manage.py").touch() 22 | file_path = settings_folder / "code.py" 23 | input_code = """DEBUG = True""" 24 | expected = input_code 25 | self.run_and_assert_filepath(django_root, file_path, input_code, expected) 26 | 27 | def test_no_manage_dot_py(self, tmpdir): 28 | django_root, settings_folder = self.create_dir_structure(tmpdir) 29 | file_path = settings_folder / "settings.py" 30 | input_code = """DEBUG = True""" 31 | expected = input_code 32 | self.run_and_assert_filepath(django_root, file_path, input_code, expected) 33 | -------------------------------------------------------------------------------- /tests/codemods/test_remove_unnecessary_f_str.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseCodemodTest 2 | from core_codemods.remove_unnecessary_f_str import RemoveUnnecessaryFStr 3 | 4 | 5 | class TestFStr(BaseCodemodTest): 6 | codemod = RemoveUnnecessaryFStr 7 | 8 | def test_no_change(self, tmpdir): 9 | before = r""" 10 | good: str = "good" 11 | good: str = f"with_arg{arg}" 12 | good = "good{arg1}".format(1234) 13 | good = "good".format() 14 | good = "good" % {} 15 | good = "good" % () 16 | good = rf"good\d+{bar}" 17 | good = f"wow i don't have args but don't mess my braces {{ up }}" 18 | """ 19 | self.run_and_assert(tmpdir, before, before) 20 | 21 | def test_change(self, tmpdir): 22 | before = r""" 23 | bad: str = f"bad" + "bad" 24 | bad: str = f'bad' 25 | bad: str = rf'bad\d+' 26 | """ 27 | after = r""" 28 | bad: str = "bad" + "bad" 29 | bad: str = 'bad' 30 | bad: str = r'bad\d+' 31 | """ 32 | self.run_and_assert(tmpdir, before, after, num_changes=3) 33 | 34 | def test_exclude_line(self, tmpdir): 35 | input_code = ( 36 | expected 37 | ) = """ 38 | bad: str = f"bad" + "bad" 39 | """ 40 | lines_to_exclude = [2] 41 | self.run_and_assert( 42 | tmpdir, 43 | input_code, 44 | expected, 45 | lines_to_exclude=lines_to_exclude, 46 | ) 47 | -------------------------------------------------------------------------------- /integration_tests/sonar/test_sonar_jwt_decode_verify.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test.integration_utils import SonarRemediationIntegrationTest 2 | from core_codemods.sonar.sonar_jwt_decode_verify import ( 3 | JwtDecodeVerifySASTTransformer, 4 | SonarJwtDecodeVerify, 5 | ) 6 | 7 | 8 | class TestJwtDecodeVerify(SonarRemediationIntegrationTest): 9 | codemod = SonarJwtDecodeVerify 10 | code_path = "tests/samples/jwt_decode_verify.py" 11 | 12 | expected_diff_per_change = [ 13 | '--- \n+++ \n@@ -8,7 +8,7 @@\n \n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=True)\n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n \n var = "something"\n', 14 | '--- \n+++ \n@@ -9,6 +9,6 @@\n encoded_jwt = jwt.encode(payload, SECRET_KEY, algorithm="HS256")\n \n decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], verify=False)\n-decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": False})\n+decoded_payload = jwt.decode(encoded_jwt, SECRET_KEY, algorithms=["HS256"], options={"verify_signature": True})\n \n var = "something"\n', 15 | ] 16 | 17 | expected_lines_changed = [11, 12] 18 | num_changes = 2 19 | change_description = JwtDecodeVerifySASTTransformer.change_description 20 | -------------------------------------------------------------------------------- /tests/codemods/sonar/test_sonar_fix_math_isclose.py: -------------------------------------------------------------------------------- 1 | import json 2 | 3 | from codemodder.codemods.test import BaseSASTCodemodTest 4 | from core_codemods.sonar.sonar_fix_math_isclose import SonarFixMathIsClose 5 | 6 | 7 | class TestSonarFixMathIsClose(BaseSASTCodemodTest): 8 | codemod = SonarFixMathIsClose 9 | tool = "sonar" 10 | 11 | def test_name(self): 12 | assert self.codemod.name == "fix-math-isclose" 13 | 14 | def test_simple(self, tmpdir): 15 | input_code = """ 16 | import math 17 | 18 | def foo(a): 19 | return math.isclose(a, 0) 20 | """ 21 | expected_output = """ 22 | import math 23 | 24 | def foo(a): 25 | return math.isclose(a, 0, abs_tol=1e-09) 26 | """ 27 | hotspots = { 28 | "issues": [ 29 | { 30 | "rule": "python:S6727", 31 | "status": "OPEN", 32 | "component": "code.py", 33 | "textRange": { 34 | "startLine": 5, 35 | "endLine": 5, 36 | "startOffset": 11, 37 | "endOffset": 23, 38 | }, 39 | } 40 | ] 41 | } 42 | self.run_and_assert( 43 | tmpdir, 44 | input_code, 45 | expected_output, 46 | results=json.dumps(hotspots), 47 | num_changes=1, 48 | ) 49 | -------------------------------------------------------------------------------- /src/core_codemods/django_debug_flag_on.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | 3 | from codemodder.codemods.utils import is_django_settings_file 4 | from core_codemods.api import Metadata, Reference, ReviewGuidance, SimpleCodemod 5 | 6 | 7 | class DjangoDebugFlagOn(SimpleCodemod): 8 | metadata = Metadata( 9 | name="django-debug-flag-on", 10 | summary="Disable Django Debug Mode", 11 | review_guidance=ReviewGuidance.MERGE_AFTER_CURSORY_REVIEW, 12 | references=[ 13 | Reference( 14 | url="https://owasp.org/www-project-top-ten/2017/A3_2017-Sensitive_Data_Exposure" 15 | ), 16 | Reference( 17 | url="https://docs.djangoproject.com/en/4.2/ref/settings/#std-setting-DEBUG" 18 | ), 19 | Reference(url="https://cwe.mitre.org/data/definitions/489"), 20 | ], 21 | ) 22 | change_description = "Flip `Django` debug flag to off." 23 | detector_pattern = """ 24 | rules: 25 | - id: django-debug-flag-on 26 | pattern: DEBUG = True 27 | paths: 28 | include: 29 | - settings.py 30 | """ 31 | 32 | def visit_Module(self, _: cst.Module) -> bool: 33 | """ 34 | Only visit module with this codemod if it's a settings.py file. 35 | """ 36 | return is_django_settings_file(self.file_context.file_path) 37 | 38 | def on_result_found(self, _, updated_node): 39 | return updated_node.with_changes(value=cst.Name("False")) 40 | -------------------------------------------------------------------------------- /src/core_codemods/sonar/sonar_fix_math_isclose.py: -------------------------------------------------------------------------------- 1 | import libcst as cst 2 | 3 | from codemodder.codemods.libcst_transformer import LibcstTransformerPipeline 4 | from codemodder.result import fuzzy_column_match, same_line 5 | from core_codemods.fix_math_isclose import FixMathIsClose, FixMathIsCloseTransformer 6 | from core_codemods.sonar.api import SonarCodemod 7 | 8 | 9 | class FixMathIsCloseSonarTransformer(FixMathIsCloseTransformer): 10 | def filter_by_result(self, node) -> bool: 11 | """ 12 | Special case result-matching for this rule because the sonar 13 | results returned match only the `math.isclose` call without `(...args...)` 14 | """ 15 | match node: 16 | case cst.Call(): 17 | pos_to_match = self.node_position(node) 18 | return any( 19 | self.match_location(pos_to_match, result) 20 | for result in self.results or [] 21 | ) 22 | return False 23 | 24 | def match_location(self, pos, result): 25 | return any( 26 | same_line(pos, location) and fuzzy_column_match(pos, location) 27 | for location in result.locations 28 | ) 29 | 30 | 31 | SonarFixMathIsClose = SonarCodemod.from_core_codemod( 32 | name="fix-math-isclose", 33 | other=FixMathIsClose, 34 | rule_id="python:S6727", 35 | rule_name="The abs_tol parameter should be provided when using math.isclose to compare values to 0", 36 | transformer=LibcstTransformerPipeline(FixMathIsCloseSonarTransformer), 37 | ) 38 | -------------------------------------------------------------------------------- /src/core_codemods/remove_debug_breakpoint.py: -------------------------------------------------------------------------------- 1 | from typing import Union 2 | 3 | import libcst as cst 4 | 5 | from codemodder.codemods.utils_mixin import AncestorPatternsMixin, NameResolutionMixin 6 | from core_codemods.api import Metadata, ReviewGuidance, SimpleCodemod 7 | 8 | 9 | class RemoveDebugBreakpoint(SimpleCodemod, NameResolutionMixin, AncestorPatternsMixin): 10 | metadata = Metadata( 11 | name="remove-debug-breakpoint", 12 | summary="Remove Calls to `builtin` `breakpoint` and `pdb.set_trace", 13 | review_guidance=ReviewGuidance.MERGE_WITHOUT_REVIEW, 14 | references=[], 15 | ) 16 | change_description = "Remove breakpoint call" 17 | 18 | def leave_Expr( 19 | self, 20 | original_node: cst.Expr, 21 | updated_node: cst.Expr, 22 | ) -> Union[cst.Expr, cst.RemovalSentinel]: 23 | if not self.filter_by_path_includes_or_excludes( 24 | self.node_position(original_node) 25 | ): 26 | return updated_node 27 | 28 | match call_node := original_node.value: 29 | case cst.Call(): 30 | if self.find_base_name(call_node) == "builtins.breakpoint": 31 | self.report_change(original_node) 32 | return cst.RemovalSentinel.REMOVE 33 | if self.find_base_name(call_node) == "pdb.set_trace": 34 | self.remove_unused_import(call_node) 35 | self.report_change(original_node) 36 | return cst.RemovalSentinel.REMOVE 37 | 38 | return updated_node 39 | -------------------------------------------------------------------------------- /tests/codemods/test_limit_readline.py: -------------------------------------------------------------------------------- 1 | from codemodder.codemods.test import BaseCodemodTest 2 | from core_codemods.limit_readline import LimitReadline 3 | 4 | 5 | class TestLimitReadline(BaseCodemodTest): 6 | codemod = LimitReadline 7 | 8 | def test_name(self): 9 | assert self.codemod.name == "limit-readline" 10 | 11 | def test_file_readline(self, tmpdir): 12 | input_code = """file = open('some_file.txt') 13 | file.readline() 14 | """ 15 | expected = """file = open('some_file.txt') 16 | file.readline(5_000_000) 17 | """ 18 | self.run_and_assert(tmpdir, input_code, expected) 19 | 20 | def test_StringIO_readline(self, tmpdir): 21 | input_code = """import io 22 | io.StringIO('some_string').readline() 23 | """ 24 | 25 | expected = """import io 26 | io.StringIO('some_string').readline(5_000_000) 27 | """ 28 | 29 | self.run_and_assert(tmpdir, input_code, expected) 30 | 31 | def test_BytesIO_readline(self, tmpdir): 32 | input_code = """import io 33 | io.BytesIO(b'some_string').readline() 34 | """ 35 | 36 | expected = """import io 37 | io.BytesIO(b'some_string').readline(5_000_000) 38 | """ 39 | 40 | self.run_and_assert(tmpdir, input_code, expected) 41 | 42 | def test_taint_tracking(self, tmpdir): 43 | input_code = """file = open('some_file.txt') 44 | arg = file 45 | arg.readline(5_000_000) 46 | """ 47 | 48 | expected = """file = open('some_file.txt') 49 | arg = file 50 | arg.readline(5_000_000) 51 | """ 52 | 53 | self.run_and_assert(tmpdir, input_code, expected) 54 | --------------------------------------------------------------------------------