├── VERSION ├── tests ├── __init__.py ├── test_file.py ├── test_file_transformed.py ├── test_cli.py ├── test_main.py ├── test_helpers.py ├── test_observable_transformations.py ├── test_pyggester.py ├── test_command_handlers.py ├── test_module_importer.py ├── test_wrappers.py └── test_observables.py ├── pyggester ├── __init__.py ├── data │ └── help_files │ │ ├── __init__.py │ │ ├── static_helper.md │ │ └── dynamic_helper.md ├── observable_collector.py ├── text_formatters.py ├── main.py ├── message_handler.py ├── helpers.py ├── cli.py ├── command_handlers.py ├── module_importer.py ├── observable_transformations.py ├── pyggester.py ├── wrappers.py └── observables.py ├── pytest.ini ├── pyggester_logo.png ├── pyggester-abstract-execution-flow.png ├── requirements.txt ├── setup.py ├── LICENSE ├── .gitignore ├── contributing.md └── README.md /VERSION: -------------------------------------------------------------------------------- 1 | 1.0.1 -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyggester/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyggester/data/help_files/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pyggester/data/help_files/static_helper.md: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /pytest.ini: -------------------------------------------------------------------------------- 1 | [pytest] 2 | testpaths=tests 3 | -------------------------------------------------------------------------------- /tests/test_file.py: -------------------------------------------------------------------------------- 1 | def func1(): 2 | pass 3 | -------------------------------------------------------------------------------- /pyggester/observable_collector.py: -------------------------------------------------------------------------------- 1 | OBSERVABLE_COLLECTOR = [] 2 | -------------------------------------------------------------------------------- /pyggester_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ValdonVitija/pyggester/HEAD/pyggester_logo.png -------------------------------------------------------------------------------- /pyggester-abstract-execution-flow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/ValdonVitija/pyggester/HEAD/pyggester-abstract-execution-flow.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | astor==0.8.1 2 | click==8.1.7 3 | markdown-it-py==3.0.0 4 | mdurl==0.1.2 5 | numpy==1.26.2 6 | pandas==2.1.4 7 | Pygments==2.17.2 8 | python-dateutil==2.8.2 9 | pytz==2023.3.post1 10 | rich==13.7.0 11 | scipy==1.11.4 12 | six==1.16.0 13 | typer==0.9.0 14 | typing_extensions==4.9.0 15 | tzdata==2023.3 -------------------------------------------------------------------------------- /tests/test_file_transformed.py: -------------------------------------------------------------------------------- 1 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 2 | from pyggester.observables import ObservableList, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple, ObservableNumpyArray, ObservablePandasDataFrame 3 | 4 | 5 | def func1(): 6 | pass 7 | 8 | 9 | for observable in OBSERVABLE_COLLECTOR: 10 | observable.run() 11 | -------------------------------------------------------------------------------- /pyggester/text_formatters.py: -------------------------------------------------------------------------------- 1 | from rich.console import Console 2 | from rich.panel import Panel 3 | 4 | 5 | def custom_print( 6 | message: str = "", 7 | style: str = "bold", 8 | border_style: str = "", 9 | title: str = "", 10 | ): 11 | if message: 12 | panel_ = Panel( 13 | f"[bold yellow]{message}", 14 | style=style, 15 | border_style=border_style, 16 | title=title, 17 | ) 18 | Console().print(panel_) 19 | -------------------------------------------------------------------------------- /tests/test_cli.py: -------------------------------------------------------------------------------- 1 | from typer.testing import CliRunner 2 | from pyggester.cli import app 3 | 4 | runner = CliRunner() 5 | 6 | 7 | def test_static_analysis(): 8 | result = runner.invoke(app, ["static", "--path", "test_file.py"]) 9 | assert result.exit_code == 0 10 | 11 | 12 | def test_dynamic_transformation(): 13 | result = runner.invoke(app, ["transform", "tests/test_file.py"]) 14 | assert result.exit_code == 0 15 | 16 | 17 | def test_help(): 18 | result = runner.invoke(app, ["--help"]) 19 | assert result.exit_code == 0 20 | -------------------------------------------------------------------------------- /pyggester/main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from pyggester.cli import get_app 3 | 4 | 5 | PYGGESTER_LOGO = """ 6 | _____ 7 | _____________ ________ _______ ______________ /_____________ 8 | ___ __ \_ / / /_ __ `/_ __ `/ _ \_ ___/ __/ _ \_ ___/ 9 | __ /_/ / /_/ /_ /_/ /_ /_/ // __/(__ )/ /_ / __/ / 10 | _ .___/_\__, / _\__, / _\__, / \___//____/ \__/ \___//_/ 11 | /_/ /____/ /____/ /____/ 12 | """ 13 | 14 | 15 | def main(): 16 | args = " ".join(sys.argv[1:]) 17 | if (not args or "--help" in args) and len(sys.argv) < 3: 18 | print(PYGGESTER_LOGO) 19 | get_app() 20 | 21 | 22 | if __name__ == "__main__": 23 | main() 24 | -------------------------------------------------------------------------------- /tests/test_main.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pytest 3 | from unittest.mock import patch 4 | from io import StringIO 5 | from pyggester.main import main, PYGGESTER_LOGO 6 | 7 | 8 | def test_main_with_help(): 9 | with patch.object(sys, "argv", ["pyggest"]): 10 | with patch("sys.stdout", new_callable=StringIO) as mock_stdout: 11 | with pytest.raises(SystemExit) as e: 12 | main() 13 | 14 | assert e.value.code == 0 15 | output = mock_stdout.getvalue() 16 | assert PYGGESTER_LOGO in output 17 | 18 | 19 | def test_main_without_help(): 20 | with patch.object(sys, "argv", ["pyggest"]): 21 | with patch("pyggester.main.get_app") as mock_get_app: 22 | main() 23 | 24 | mock_get_app.assert_called_once() 25 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | 3 | import setuptools 4 | 5 | install_requires = [] 6 | with open("requirements.txt", "r", encoding="UTF-8") as f_stream: 7 | for pack in f_stream: 8 | install_requires.append(pack) 9 | 10 | 11 | setuptools.setup( 12 | name="pyggester", 13 | version=open("VERSION").read().strip(), 14 | packages=setuptools.find_packages(include=["pyggester", "pyggester.*"]), 15 | long_description=open("README.md").read(), 16 | long_description_content_type="text/markdown", 17 | author="Valdon Vitija", 18 | author_email="valdonvitijaa@gmail.com", 19 | license="MIT", 20 | install_requires=install_requires, 21 | entry_points={ 22 | "console_scripts": [ 23 | "pyggest=pyggester.main:main", 24 | ], 25 | }, 26 | package_data={"pyggester": ["data/*", "data/help_files/*"]}, 27 | ) 28 | -------------------------------------------------------------------------------- /pyggester/message_handler.py: -------------------------------------------------------------------------------- 1 | """ 2 | Message Handler by default should stream messages into the standard console, but it would be 3 | better if we add the capability of streaming the messages/suggestions into files with different formats 4 | """ 5 | from typing import List, Tuple 6 | from pyggester.text_formatters import custom_print 7 | 8 | 9 | class MessageHandler: 10 | __slots__: Tuple[str] = ("messages", "line_nr", "file_path") 11 | 12 | def __init__(self, line_nr, file_path) -> None: 13 | self.messages: List[str] = [] 14 | self.line_nr: int = line_nr 15 | self.file_path: str = file_path 16 | 17 | def print_messages(self) -> None: 18 | messages__ = [] 19 | if self.messages: 20 | messages__.append(f"{self.line_nr} | Suggestions({self.file_path}):") 21 | for message in self.messages: 22 | messages__.append(f" [*] {message}") 23 | custom_print("\n".join(messages__), border_style="green") 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 ValdonVitijaa 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /tests/test_helpers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from pyggester.helpers import ( 3 | source_code_to_str, 4 | PathMissingSourceCodeConversionError, 5 | not_implemented, 6 | ) 7 | 8 | 9 | @pytest.fixture 10 | def get_single_file_abs_path(): 11 | return "/root/pyggester/tests/test_file.py" 12 | 13 | 14 | @pytest.fixture 15 | def get_code_as_str(): 16 | code = """def func1(): 17 | pass 18 | """ 19 | return code 20 | 21 | 22 | @pytest.fixture 23 | def get_code_from_file(get_single_file_abs_path): 24 | with open(get_single_file_abs_path, "r", encoding="UTF-8") as f_stream: 25 | return f_stream.read() 26 | 27 | 28 | def test_source_code_to_str_with_path(get_code_as_str, get_code_from_file): 29 | assert get_code_from_file == get_code_as_str 30 | 31 | 32 | def test_source_code_to_str_without_path(): 33 | with pytest.raises(PathMissingSourceCodeConversionError): 34 | source_code_to_str() 35 | 36 | 37 | @not_implemented 38 | def example_function(): 39 | pass 40 | 41 | 42 | def test_not_implemented_decorator(): 43 | with pytest.raises( 44 | NotImplementedError, match="example_function is not yet implemented" 45 | ): 46 | example_function() 47 | -------------------------------------------------------------------------------- /tests/test_observable_transformations.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import astor 3 | from pyggester.observable_transformations import ( 4 | ObservableCollectorAppender, 5 | ObservableRunner, 6 | apply_observable_collector_transformations, 7 | ) 8 | 9 | 10 | def test_observable_collector_appender(): 11 | source_code = "list_ = ObservableList([1,2,3])" 12 | tree = ast.parse(source_code) 13 | transformer = ObservableCollectorAppender() 14 | transformed_tree = transformer.visit(tree) 15 | 16 | transformed_code = astor.to_source(transformed_tree) 17 | assert "OBSERVABLE_COLLECTOR.append(list_)" in transformed_code 18 | 19 | 20 | def test_observable_runner(): 21 | tree = ast.parse("import module1\nimport module2") 22 | transformer = ObservableRunner() 23 | transformed_tree = transformer.visit(tree) 24 | 25 | transformed_code = astor.to_source(transformed_tree) 26 | 27 | assert "for observable in OBSERVABLE_COLLECTOR:" in transformed_code 28 | assert "observable.run()" in transformed_code 29 | 30 | 31 | def test_apply_observable_collector_transformations(): 32 | source_code = "import module1\nimport module2" 33 | tree = ast.parse(source_code) 34 | transformed_code = apply_observable_collector_transformations( 35 | tree, run_observables=True 36 | ) 37 | 38 | assert ( 39 | "from pyggester.observables import" in transformed_code 40 | or "import pyggester.observables" in transformed_code 41 | ) 42 | -------------------------------------------------------------------------------- /tests/test_pyggester.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import tempfile 3 | import pathlib 4 | from unittest.mock import patch 5 | from pyggester.pyggester import ( 6 | PyggesterDynamic, 7 | ) 8 | 9 | 10 | @pytest.fixture 11 | def temp_dir(): 12 | with tempfile.TemporaryDirectory() as tmpdirname: 13 | yield pathlib.Path(tmpdirname) 14 | 15 | 16 | @pytest.fixture 17 | def temp_file(temp_dir): 18 | temp_file = temp_dir / "test_file.py" 19 | temp_file.write_text("print('Hello, World!')", encoding="UTF-8") 20 | return temp_file 21 | 22 | 23 | def test_initialization(): 24 | path = "/path/to/directory" 25 | pyggester = PyggesterDynamic(path) 26 | assert pyggester.path_ == pathlib.Path(path).absolute() 27 | 28 | 29 | def test_existence_check(): 30 | with pytest.raises(FileNotFoundError): 31 | pyggester = PyggesterDynamic("/non/existent/path") 32 | pyggester.run() 33 | 34 | 35 | def test_file_transformation(temp_file): 36 | pyggester = PyggesterDynamic(str(temp_file)) 37 | pyggester.run() 38 | transformed_file = ( 39 | temp_file.parent / f"{temp_file.stem}_transformed{temp_file.suffix}" 40 | ) 41 | assert transformed_file.exists() 42 | 43 | 44 | def test_directory_transformation(temp_dir, temp_file): 45 | with patch("builtins.input", return_value="test_file.py"): 46 | pyggester = PyggesterDynamic(str(temp_dir)) 47 | pyggester.run() 48 | transformed_dir = temp_dir.parent / f"{temp_dir.name}_transformed" 49 | assert transformed_dir.exists() and transformed_dir.is_dir() 50 | -------------------------------------------------------------------------------- /tests/test_command_handlers.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | import typer 3 | import unittest 4 | from unittest.mock import patch, Mock 5 | from pyggester.command_handlers import PyggestTransform 6 | from collections import namedtuple 7 | 8 | 9 | @pytest.fixture 10 | def pyggest_transform_instance(): 11 | return PyggestTransform(path_="test_path", help_="test_help") 12 | 13 | 14 | def test_pyggest_transform_initialization(pyggest_transform_instance): 15 | assert pyggest_transform_instance.path_ == "test_path" 16 | assert pyggest_transform_instance.help_ == "test_help" 17 | 18 | 19 | class TestPyggestTransform(unittest.TestCase): 20 | @patch("pyggester.pyggester.PyggesterDynamic.run") 21 | def test_process_with_help(self, mock_run): 22 | pyggest_transform = PyggestTransform(path_="your_path", help_=True) 23 | with self.assertRaises(typer.Exit) as context: 24 | pyggest_transform.process() 25 | 26 | assert context.exception.__class__ == typer.Exit 27 | 28 | @patch("pyggester.pyggester.PyggesterDynamic.run") 29 | def test_process_without_help(self, mock_run): 30 | pyggest_transform = PyggestTransform(path_="your_path", help_=False) 31 | pyggest_transform.process() 32 | mock_run.assert_called_once() 33 | 34 | @patch( 35 | "pyggester.pyggester.PyggesterDynamic.run", 36 | side_effect=typer.Exit("Test Exception"), 37 | ) 38 | def test_process_exception_handling(self, mock_run): 39 | pyggest_transform = PyggestTransform(path_="your_path", help_=True) 40 | with self.assertRaises(typer.Exit) as context: 41 | pyggest_transform.process() 42 | 43 | self.assertEqual(str(context.exception), "") 44 | -------------------------------------------------------------------------------- /pyggester/helpers.py: -------------------------------------------------------------------------------- 1 | import pathlib 2 | import os 3 | from functools import lru_cache 4 | 5 | 6 | @lru_cache 7 | def get_help_files_dir() -> pathlib.Path: 8 | """ 9 | Get the directory path where help files are located. 10 | 11 | This function returns the directory path as a pathlib.Path object. 12 | The directory is determined relative to the location of the current script. 13 | 14 | Returns: 15 | pathlib.Path: The directory path for help files. 16 | """ 17 | help_files_dir = pathlib.Path( 18 | os.path.join( 19 | pathlib.Path(__file__).parent, 20 | "data", 21 | "help_files", 22 | ) 23 | ) 24 | return help_files_dir 25 | 26 | 27 | class PathMissingSourceCodeConversionError(Exception): 28 | """ 29 | Exception Class to be thrown when path misses for source code conversion to str 30 | """ 31 | 32 | def __init__(self, *args: object) -> None: 33 | super().__init__(*args) 34 | 35 | 36 | def source_code_to_str(path=None) -> str: 37 | """ 38 | Convert module source_code to a multiline string. 39 | By default it converts the source code of the module where this function is being called 40 | """ 41 | if not path: 42 | raise PathMissingSourceCodeConversionError() 43 | 44 | with open(path, "r", encoding="UTF-8") as f_stream: 45 | return f_stream.read() 46 | 47 | 48 | def not_implemented(func): 49 | """ 50 | Decorator to flag a function as not yet implemented. 51 | 52 | This decorator raises a NotImplementedError when the decorated function is called, 53 | indicating that the function is not yet fully implemented. 54 | 55 | Args: 56 | func (callable): The function to be decorated. 57 | 58 | Returns: 59 | callable: A wrapper function that raises a NotImplementedError. 60 | """ 61 | 62 | def wrapper(*args, **kwargs): 63 | raise NotImplementedError(f"{func.__name__} is not yet implemented") 64 | 65 | return wrapper 66 | -------------------------------------------------------------------------------- /pyggester/cli.py: -------------------------------------------------------------------------------- 1 | """ 2 | The structure of this CLI app based on typer: 3 | app (typer) - pyggest: 4 | static - subcommand: 5 | Options: ... 6 | dynamic - subcommand 7 | Options: ... 8 | """ 9 | 10 | from functools import lru_cache 11 | from typing import List 12 | import typer 13 | from typing_extensions import Annotated 14 | from pyggester.command_handlers import PyggestTransform 15 | from pyggester.helpers import not_implemented 16 | 17 | __all__: List[str] = ["get_app"] 18 | 19 | app = typer.Typer(no_args_is_help=True) 20 | 21 | 22 | # pylint: disable=W0613 23 | @app.command(no_args_is_help=False, name="static") 24 | def static_analysis( 25 | path_: Annotated[str, typer.Option("--path", help="path to file/files")] = None, 26 | lists_: Annotated[ 27 | bool, 28 | typer.Option( 29 | "--lists", 30 | help="Use this option to include lists in analysis", 31 | ), 32 | ] = False, 33 | dicts_: Annotated[ 34 | bool, 35 | typer.Option( 36 | "--dicts", 37 | help="Use this option to include dicts in analysis", 38 | ), 39 | ] = False, 40 | sets_: Annotated[ 41 | bool, 42 | typer.Option( 43 | "--sets", 44 | help="Use this option to include sets in analysis", 45 | ), 46 | ] = False, 47 | tuples_: Annotated[ 48 | bool, 49 | typer.Option( 50 | "--tuples", 51 | help="Use this option to include tuples in analysis", 52 | ), 53 | ] = False, 54 | all_: Annotated[ 55 | bool, 56 | typer.Option( 57 | "--all", 58 | help="If you want pyggester to use all its capabilities use this option", 59 | ), 60 | ] = False, 61 | help_: Annotated[ 62 | bool, typer.Option("--help", help="Get full documentation") 63 | ] = False, 64 | ): 65 | """ 66 | Perform static analysis using PyggestStatic. 67 | 68 | This command allows you to perform static analysis using PyggestStatic, a tool for 69 | analyzing Python code. You can specify various options to customize the analysis. 70 | 71 | """ 72 | typer.Exit("Not implemented currently.") 73 | 74 | 75 | @app.command(no_args_is_help=True, name="transform") 76 | def dynamic_transformation( 77 | path_: Annotated[str, typer.Argument(help="path to file/files")] = ".", 78 | help_: Annotated[ 79 | bool, typer.Option("--help", help="Get full documentation") 80 | ] = False, 81 | ): 82 | """ 83 | Perform dynamic transformation using PyggesterDynamic. 84 | """ 85 | command_handler = PyggestTransform(path_=path_, help_=help_) 86 | command_handler.process() 87 | 88 | 89 | @lru_cache 90 | def get_app(): 91 | """ 92 | Get the main typer cli app 93 | """ 94 | return app() 95 | -------------------------------------------------------------------------------- /pyggester/command_handlers.py: -------------------------------------------------------------------------------- 1 | import abc 2 | import os 3 | import pathlib 4 | import typer 5 | from typing import Dict, List, ClassVar, Union, Tuple 6 | from rich.console import Console 7 | from rich.markdown import Markdown 8 | from enum import Enum, auto 9 | from pyggester.text_formatters import custom_print 10 | from pyggester.helpers import get_help_files_dir 11 | from pyggester.pyggester import PyggesterDynamic 12 | 13 | __all__: List[str] = ["PyggestTransform"] 14 | 15 | README_FILES_DIR: pathlib.Path = get_help_files_dir() 16 | 17 | 18 | class CommandHandler(abc.ABC): 19 | """ 20 | Template command handler. 21 | Add as many methods in the classes that derive from this base handler as you need. 22 | If each command only needed a single function to process the logic this design pattern 23 | wouldn't be necesseary. The main reason why each handler is a class it is beacause classes 24 | can act like namespaces, so we can have same function names and variable names under a different namespace(class) 25 | """ 26 | 27 | @abc.abstractmethod 28 | def process(self) -> None: 29 | ... 30 | 31 | def handle_help_(self) -> Union[None, typer.Exit]: 32 | """ 33 | Handle the --HELP option by displaying the README file. 34 | 35 | If the --HELP option is specified, this function reads and displays the README file 36 | using the Rich library's Console and Markdown features. It then raises a Typer Exit 37 | to terminate the program, because if the --HELP option gets used no other operation 38 | should take place 39 | 40 | Returns: 41 | Union[None, Exit]: None if the function doesn't return anything, or a Typer Exit object. 42 | """ 43 | # pylint: disable=E1101 44 | if self.help_: 45 | console = Console() 46 | with open(os.path.join(README_FILES_DIR, self.README)) as readme: 47 | markdown = Markdown(readme.read()) 48 | console.print(markdown) 49 | raise typer.Exit() 50 | 51 | def handle_no_valid_combination(self) -> Union[None, typer.Exit]: 52 | """ 53 | Handle the case when there is no valid combination/usage of options. 54 | 55 | This function displays an error message using the custom_print function and raises 56 | a Typer Exit to terminate the program. 57 | """ 58 | custom_print( 59 | "No valid combination/usage of options! Try --help or --HELP", 60 | border_style="red", 61 | title="EXIT INFO", 62 | ) 63 | raise typer.Exit() 64 | 65 | 66 | class PyggestTransform(CommandHandler): 67 | """ 68 | This class handles the variations of options supported under: 69 | pyggest dynamic 70 | """ 71 | 72 | __slots__: ClassVar[tuple[str]] = "path_", "help_" 73 | 74 | def __init__(self, path_, help_) -> None: 75 | self.README = pathlib.Path("dynamic_helper.md") 76 | self.path_ = path_ 77 | self.help_ = help_ 78 | 79 | super().__init__() 80 | 81 | def process(self) -> None: 82 | try: 83 | if self.help_: 84 | self.handle_help_() 85 | pyggester = PyggesterDynamic(self.path_) 86 | pyggester.run() 87 | 88 | except Exception as ex: 89 | if isinstance(ex, typer.Exit): 90 | raise ex 91 | print(ex) 92 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ 161 | .vscode/* 162 | output/* 163 | *.build/* 164 | *.dist/* 165 | venv*/* 166 | scripts/* 167 | -------------------------------------------------------------------------------- /tests/test_module_importer.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import unittest 3 | import pytest 4 | from pyggester.module_importer import ( 5 | ImportsVisitor, 6 | ImportModuleTransformer, 7 | add_imports, 8 | ) 9 | from pyggester.wrappers import get_wrappers_as_strings 10 | 11 | 12 | class TestImportsVisitor(unittest.TestCase): 13 | def test_import_detection(self): 14 | code_import = "import module_name" 15 | self.assertTrue(self._check_import(code_import, "module_name")) 16 | 17 | code_import_alias = "import module_name as alias_name" 18 | self.assertTrue(self._check_import(code_import_alias, "module_name")) 19 | 20 | code_from_import = "from module_name import name1, name2" 21 | self.assertTrue(self._check_import(code_from_import, "module_name")) 22 | 23 | code_from_import_alias = ( 24 | "from module_name import name1 as alias_name1, name2 as alias_name2" 25 | ) 26 | self.assertTrue(self._check_import(code_from_import_alias, "module_name")) 27 | 28 | code_non_matching_import = "import other_module" 29 | self.assertFalse(self._check_import(code_non_matching_import, "module_name")) 30 | 31 | code_non_matching_from_import = "from other_module import name" 32 | self.assertFalse( 33 | self._check_import(code_non_matching_from_import, "module_name") 34 | ) 35 | 36 | def _check_import(self, code, module_name): 37 | tree = ast.parse(code) 38 | visitor = ImportsVisitor(module_name, set()) 39 | visitor.visit(tree) 40 | return visitor.imported 41 | 42 | 43 | @pytest.mark.parametrize( 44 | "wrapper_name", 45 | [ 46 | "ObservableListWrapper", 47 | "ObservableDictWrapper", 48 | "ObservableTupleWrapper", 49 | "ObservableSetWrapper", 50 | "ObservableNamedTupleWrapper", 51 | "ObservableNumpyArrayWrapper", 52 | "ObservablePandasDataFrameWrapper", 53 | ], 54 | ) 55 | def test_import_addition(wrapper_name): 56 | code = "print('Hello, world!')" 57 | transformer = ImportModuleTransformer( 58 | ast.parse(code), "pyggester.wrappers", {wrapper_name} 59 | ) 60 | transformed_code = _apply_transformer(transformer) 61 | 62 | assert wrapper_name in transformed_code 63 | assert "print('Hello, world!')" in transformed_code 64 | 65 | 66 | @pytest.mark.parametrize( 67 | "wrapper_name", 68 | [ 69 | "ObservableListWrapper", 70 | "ObservableDictWrapper", 71 | "ObservableTupleWrapper", 72 | "ObservableSetWrapper", 73 | "ObservableNamedTupleWrapper", 74 | "ObservableNumpyArrayWrapper", 75 | "ObservablePandasDataFrameWrapper", 76 | ], 77 | ) 78 | def test_no_import_change(wrapper_name): 79 | code = f"from pyggester.wrappers import {wrapper_name}\nprint('Hello, world!')" 80 | transformer = ImportModuleTransformer( 81 | ast.parse(code), "pyggester.wrappers", {wrapper_name} 82 | ) 83 | transformed_code = _apply_transformer(transformer) 84 | assert code in transformed_code 85 | 86 | 87 | def _apply_transformer(transformer): 88 | transformed_tree = transformer.visit(transformer.tree_) 89 | return ast.unparse(transformed_tree) 90 | 91 | 92 | @pytest.mark.parametrize( 93 | "wrapper_cls", 94 | [ 95 | "ObservableListWrapper", 96 | "ObservableDictWrapper", 97 | "ObservableTupleWrapper", 98 | "ObservableSetWrapper", 99 | "ObservableNamedTupleWrapper", 100 | "ObservableNumpyArrayWrapper", 101 | "ObservablePandasDataFrameWrapper", 102 | ], 103 | ) 104 | def test_add_imports(wrapper_cls): 105 | code = "print('Hello, world!')" 106 | tree = ast.parse(code) 107 | expected_code = f"from pyggester.observables import {wrapper_cls}\n{code}" 108 | transformed_tree = add_imports(tree, "pyggester.observables", [wrapper_cls]) 109 | assert ast.unparse(transformed_tree) == expected_code 110 | -------------------------------------------------------------------------------- /pyggester/module_importer.py: -------------------------------------------------------------------------------- 1 | import ast 2 | from typing import Any, Tuple, Set 3 | 4 | 5 | class ImportsVisitor(ast.NodeVisitor): 6 | """ 7 | AST visitor to check if a specific module or names are imported in the code. 8 | """ 9 | 10 | __slots__: Tuple[str] = ("module_name", "imported", "names") 11 | 12 | def __init__(self, module_name: str, names: Set[str]) -> None: 13 | """ 14 | Args: 15 | module_name (str): The name of the module to check for. 16 | imported (bool): Whether the module is imported or not. 17 | names (Set[str]): Names to check for in case of 'from import' (default is None). 18 | """ 19 | self.module_name = module_name 20 | self.imported = False 21 | self.names = names 22 | 23 | def visit_Import(self, node: ast.Import) -> Any: 24 | """ 25 | Visit an Import node. 26 | 27 | Check if the specified module is imported. 28 | 29 | Args: 30 | node (ast.Import): The Import node to visit. 31 | """ 32 | for name in node.names: 33 | if name.name == self.module_name: 34 | self.imported = True 35 | 36 | def visit_ImportFrom(self, node: ast.ImportFrom) -> Any: 37 | """ 38 | Visit an ImportFrom node. 39 | 40 | Check if the specified module is imported using 'from import'. 41 | 42 | Args: 43 | node (ast.ImportFrom): The ImportFrom node to visit. 44 | """ 45 | if node.module == self.module_name: 46 | self.imported = True 47 | 48 | 49 | class ImportModuleTransformer(ast.NodeTransformer): 50 | """AST transformer to add or update an import statement for a specific module.""" 51 | 52 | __slots__: Tuple[str] = ("module_name", "names", "tree_", "imports_visitor") 53 | 54 | def __init__( 55 | self, tree_: ast.AST, module_name: str, names: Set[str] = None 56 | ) -> None: 57 | """ 58 | Args: 59 | module_name (str): Current module being transformed. 60 | names (Set[str]): All ObservableWrappers needed to be imported on each module. 61 | tree_ (ast.AST): Abstract syntax tree of the module. 62 | imports_visitor (ImportsVisitor): Information fetcher for imported modules. 63 | """ 64 | self.module_name = module_name 65 | self.names = names 66 | self.tree_ = tree_ 67 | self.imports_visitor = ImportsVisitor(module_name, names) 68 | 69 | def visit_Module(self, node: ast.Module) -> Any: 70 | """ 71 | Visit a Module node. 72 | 73 | Replace any existing import statement for 'pyggester.wrappers' with a new import statement. 74 | 75 | Args: 76 | node (ast.Module): The Module node to visit. 77 | 78 | Returns: 79 | ast.Module: The transformed Module node. 80 | """ 81 | self.imports_visitor.visit(self.tree_) 82 | import_stmt = None 83 | if self.imports_visitor.imported: 84 | for node_ in ast.walk(node): 85 | if ( 86 | isinstance(node_, ast.ImportFrom) 87 | and node_.module == self.module_name 88 | ): 89 | node.body.remove(node_) 90 | 91 | elif isinstance(node_, ast.Import): 92 | for name_ in node_.names: 93 | if name_.name == self.module_name: 94 | node.body.remove(node_) 95 | if self.names: 96 | import_stmt = ast.ImportFrom( 97 | module=self.module_name, 98 | names=[ast.alias(name=name, asname=None) for name in self.names], 99 | level=0, 100 | ) 101 | if import_stmt: 102 | node.body.insert(0, import_stmt) 103 | else: 104 | if self.names: 105 | import_stmt = ast.ImportFrom( 106 | module=self.module_name, 107 | names=[ast.alias(name=name, asname=None) for name in self.names], 108 | level=0, 109 | ) 110 | if import_stmt: 111 | node.body.insert(0, import_stmt) 112 | 113 | return node 114 | 115 | 116 | def add_imports(tree: str, module_, wrappers) -> ast.AST: 117 | """ 118 | Adds Wrapper imports to each module being transformed. This is meant to be ran 119 | for each module/file in the process of transformation. 120 | """ 121 | transformer = ImportModuleTransformer(tree, module_, wrappers) 122 | tree = transformer.visit(tree) 123 | 124 | return tree 125 | -------------------------------------------------------------------------------- /pyggester/observable_transformations.py: -------------------------------------------------------------------------------- 1 | from _ast import Assign, Module 2 | import ast 3 | import astor 4 | from typing import Any, Tuple 5 | from pyggester.module_importer import add_imports 6 | from pyggester.wrappers import apply_wrappers, get_wrappers_as_strings 7 | 8 | 9 | class ObservableCollectorAppender(ast.NodeTransformer): 10 | """ 11 | * Collects each observable instance by appending it into the 12 | OBSERVBALE_COLLECTOR 13 | ---------------------------------- 14 | import module1 15 | import module2 16 | ...(other import stmts) 17 | 18 | OBSERVABLE_COLLECTOR = [] 19 | ...(other stmts) 20 | 21 | list_ = ObservableList([1,2,3]) 22 | OBSERVABLE_COLLECTOR.append(list_) 23 | --------------------------------- 24 | """ 25 | 26 | __slots__: Tuple[str] = () 27 | 28 | def visit_Assign(self, node: ast.Assign) -> Any: 29 | """ 30 | Visit each Assign node to find and collect instances of observable types, 31 | indicated by 'Observable' being part of the function name. 32 | """ 33 | if isinstance(node.value, ast.Call): 34 | func_node = node.value.func 35 | func_name = "" 36 | 37 | if isinstance(func_node, ast.Name): 38 | func_name = func_node.id 39 | elif isinstance(func_node, ast.Attribute): 40 | func_name = func_node.attr 41 | 42 | if "Observable" in func_name: 43 | append_to_list_code = ( 44 | f"OBSERVABLE_COLLECTOR.append({node.targets[0].id})" 45 | ) 46 | return [node, ast.parse(append_to_list_code)] 47 | 48 | return node 49 | 50 | 51 | class ObservableRunner(ast.NodeTransformer): 52 | """ 53 | * This transformer inserts the code that runs every observable. 54 | Observables don't explicitly run themselves to print the collected suggestions, 55 | because they might still be in use elsewhere. 56 | For example, they could have been passed as function parameters. 57 | However, by running the observables in the global scope after everything in the module, 58 | we ensure that collections declared in that scope have been fully processed, 59 | even if they were given or injected into other modules, classes, or functions. 60 | ----------------------------------- 61 | import module1 62 | import module2 63 | ... 64 | (functions, classes and every possible python construct) 65 | ... 66 | for observable in OBSERVABLE_COLLECTOR: 67 | observable.run() 68 | ----------------------------------- 69 | """ 70 | 71 | __slots__: Tuple[str] = () 72 | 73 | def visit_Module(self, node: Module) -> Any: 74 | observable_runner_code = ( 75 | """for observable in OBSERVABLE_COLLECTOR: observable.run()""" 76 | ) 77 | observable_runner_parsed = ast.parse(observable_runner_code) 78 | # We don't need to index the running code of observables because 79 | # if we just appended, the append method take care of it. 80 | # It is always going to be inserted at the end of the module in global scope 81 | node.body.append(observable_runner_parsed) 82 | return node 83 | 84 | 85 | def apply_observable_collector_transformations( 86 | tree: ast.AST, run_observables=False 87 | ) -> str: 88 | """ 89 | Basically does anything needed for pyggester to do its analysis and returns the modified 90 | code. The result of this function should be stored into a new file that replicates the original 91 | one. 92 | """ 93 | tree = add_imports(tree, "pyggester.observables", get_wrappers_as_strings()) 94 | tree = add_imports(tree, "pyggester.observable_collector", ["OBSERVABLE_COLLECTOR"]) 95 | tree = apply_wrappers(tree) 96 | tree = apply_observable_collector_modifications(tree, run_observables) 97 | 98 | return astor.to_source(tree) 99 | 100 | 101 | def apply_observable_collector_modifications(tree: ast.AST, run_observables) -> ast.AST: 102 | """ 103 | Applying observable collector related modifications to the modules ast represenation. 104 | 1. Declare the observable collector 105 | 2. Append each observable into the observable collector 106 | 3. Put the code that actually runs the collected observables. 107 | 108 | Since this procedure will be ran per module, it means we suggest on the go. 109 | If anything has been found in the module being analyzed, we will suggest on the go and then immediatly move to the next module/file 110 | for analysis if there are any other modules/files. 111 | """ 112 | 113 | transformer_appender = ObservableCollectorAppender() 114 | transformer_appender_tree = transformer_appender.visit(tree) 115 | if run_observables: 116 | transformer_runner = ObservableRunner() 117 | transformer_runner_tree = transformer_runner.visit(transformer_appender_tree) 118 | return transformer_runner_tree 119 | 120 | return transformer_appender_tree 121 | -------------------------------------------------------------------------------- /pyggester/pyggester.py: -------------------------------------------------------------------------------- 1 | import ast 2 | import os 3 | import shutil 4 | from typing import List, Tuple 5 | import pathlib 6 | from pyggester.observable_transformations import ( 7 | apply_observable_collector_transformations, 8 | ) 9 | from pyggester.text_formatters import custom_print 10 | 11 | 12 | class PyggesterDynamic: 13 | """ 14 | A class for dynamically transforming files / directories 15 | This is the main 'engine' that glues everything together for pyggester to work under 'pyggester transform' 16 | 17 | Args: 18 | path_ (str): The path to the file or directory to be transformed. 19 | 20 | Attributes: 21 | path_ (pathlib.Path): The absolute path to the file or directory. 22 | 23 | Methods: 24 | run(): Runs the transformation process based on the type of path provided. 25 | _transform_file(file_path, run_observable): Transforms a single file. 26 | _transform_directory(): Transforms all files in a directory. 27 | """ 28 | 29 | __slots__ = ("path_",) 30 | 31 | def __init__(self, path_: str) -> None: 32 | self.path_ = pathlib.Path(path_).absolute() 33 | 34 | def run(self): 35 | """ 36 | Runs the transformation process based on the type of path provided. 37 | """ 38 | if not self.path_.exists(): 39 | raise FileNotFoundError(f"The path '{self.path_}' does not exist.") 40 | 41 | if self.path_.is_file(): 42 | self._transform_file(self.path_, run_observable=True) 43 | custom_print("File transformed successfully!", border_style="green") 44 | elif self.path_.is_dir(): 45 | self._transform_directory() 46 | custom_print("Directory transformed successfully!", border_style="green") 47 | 48 | def _transform_file(self, file_path: pathlib.Path, run_observable: bool) -> None: 49 | """ 50 | Transforms a single file by applying observable collector transformations. 51 | 52 | This method reads the content of the specified file, applies observable collector transformations 53 | to the abstract syntax tree (AST) representation of the code, and writes the transformed code 54 | to a new file. 55 | 56 | The observable collector transformations include analyzing and modifying the AST to collect 57 | observables and perform any necessary transformations based on the `run_observable` flag. 58 | 59 | Args: 60 | file_path (pathlib.Path): The path to the file to be transformed. 61 | run_observable (bool): Indicates whether to run observables in the file. 62 | 63 | Returns: 64 | None 65 | """ 66 | code = file_path.read_text() 67 | transformed_code = apply_observable_collector_transformations( 68 | ast.parse(code), run_observables=run_observable 69 | ) 70 | transformed_file_path = ( 71 | file_path.parent / f"{file_path.stem}_transformed{file_path.suffix}" 72 | ) 73 | transformed_file_path.write_text(transformed_code) 74 | 75 | def _transform_directory(self) -> None: 76 | """ 77 | Transforms all files in a directory. 78 | 79 | This method takes the name of the main file as input and transforms all the files in the directory 80 | specified by `self.path_`. It creates a new directory named "{self.path_.name}_transformed" in the 81 | parent directory of `self.path_` to store the transformed files. 82 | 83 | For each file in the directory, it checks if the file path matches the main file path. If it does, 84 | the file is considered as the main file and is transformed with the `run_observable` flag set to True. 85 | Otherwise, the file is transformed with the `run_observable` flag set to False. 86 | 87 | The transformed file is then moved to the corresponding location in the transformed directory, while 88 | preserving the directory structure. 89 | 90 | Args: 91 | None 92 | 93 | Returns: 94 | None 95 | """ 96 | main_file_name = input("Enter the name of the main file: ") 97 | main_file_path = self.path_ / main_file_name 98 | 99 | if not main_file_path.exists(): 100 | raise FileNotFoundError(f"The main file '{main_file_path}' does not exist.") 101 | 102 | transformed_dir_path = self.path_.parent / f"{self.path_.name}_transformed" 103 | os.makedirs(transformed_dir_path, exist_ok=True) 104 | 105 | excluded_dirs = {"__pycache__", ".git", ".venv"} 106 | 107 | for root, dirs, files in os.walk(self.path_): 108 | dirs[:] = [d for d in dirs if d not in excluded_dirs] 109 | for dir_name in dirs: 110 | os.makedirs(transformed_dir_path / dir_name, exist_ok=True) 111 | for file_name in files: 112 | if file_name.endswith(".py"): 113 | file_path = pathlib.Path(root) / file_name 114 | run_observable = file_path == main_file_path 115 | self._transform_file(file_path, run_observable=run_observable) 116 | 117 | relative_path = file_path.relative_to(self.path_) 118 | transformed_file_path = transformed_dir_path / relative_path 119 | transformed_file_path.parent.mkdir(parents=True, exist_ok=True) 120 | shutil.move( 121 | file_path.with_name( 122 | f"{file_path.stem}_transformed{file_path.suffix}" 123 | ), 124 | transformed_file_path, 125 | ) 126 | else: 127 | file_path = pathlib.Path(root) / file_name 128 | relative_path = file_path.relative_to(self.path_) 129 | transformed_file_path = transformed_dir_path / relative_path 130 | transformed_file_path.parent.mkdir(parents=True, exist_ok=True) 131 | shutil.copy( 132 | file_path, 133 | transformed_file_path, 134 | ) 135 | -------------------------------------------------------------------------------- /tests/test_wrappers.py: -------------------------------------------------------------------------------- 1 | # pylint: disable=W0611 2 | import ast 3 | import pytest 4 | from pyggester.wrappers import ( 5 | ObservableListWrapper, 6 | ObservableDictWrapper, 7 | ObservableTupleWrapper, 8 | ObservableSetWrapper, 9 | ObservableNamedTupleWrapper, 10 | ObservableNumpyArrayWrapper, # noqa: F401 11 | ObservablePandasDataFrameWrapper, # noqa: F401 12 | ) 13 | 14 | from pyggester.observables import ( 15 | ObservableDict, # noqa: F401 16 | ObservableList, # noqa: F401 17 | ObservableNamedTuple, # noqa: F401 18 | ObservableNumpyArray, # noqa: F401 19 | ObservablePandasDataFrame, # noqa: F401 20 | ObservableTuple, # noqa: F401 21 | ObservableSet, # noqa: F401 22 | ) 23 | 24 | 25 | @pytest.fixture 26 | def example_list_node(): 27 | return ast.parse("[1, 2, 3]").body[0].value 28 | 29 | 30 | @pytest.fixture 31 | def example_dict_node(): 32 | return ast.parse("{1: 'one', 2: 'two'}").body[0].value 33 | 34 | 35 | @pytest.fixture 36 | def example_tuple_node(): 37 | return ast.parse("(1, 2, 3)").body[0].value 38 | 39 | 40 | @pytest.fixture 41 | def example_set_node(): 42 | return ast.parse("{1, 2, 3}").body[0].value 43 | 44 | 45 | def test_observable_list_wrapper(example_list_node): 46 | transformer = ObservableListWrapper() 47 | transformed_node = transformer.visit(example_list_node) 48 | assert isinstance(transformed_node, ast.Call) 49 | assert ast.unparse(transformed_node) == "ObservableList([1, 2, 3])" 50 | 51 | 52 | def test_observable_dict_wrapper(example_dict_node): 53 | transformer = ObservableDictWrapper() 54 | transformed_node = transformer.visit(example_dict_node) 55 | assert isinstance(transformed_node, ast.Call) 56 | assert ast.unparse(transformed_node) == "ObservableDict({1: 'one', 2: 'two'})" 57 | 58 | 59 | def test_observable_tuple_wrapper(example_tuple_node): 60 | transformer = ObservableTupleWrapper() 61 | transformed_node = transformer.visit(example_tuple_node) 62 | assert isinstance(transformed_node, ast.Call) 63 | assert ast.unparse(transformed_node) == "ObservableTuple((1, 2, 3))" 64 | 65 | 66 | def test_observable_set_wrapper(example_set_node): 67 | transformer = ObservableSetWrapper() 68 | transformed_node = transformer.visit(example_set_node) 69 | assert isinstance(transformed_node, ast.Call) 70 | assert ast.unparse(transformed_node) == "ObservableSet({1, 2, 3})" 71 | 72 | 73 | class TestObservableNamedTupleWrapper: 74 | @staticmethod 75 | def transform_and_get_code(code): 76 | tree = ast.parse(code) 77 | transformer = ObservableNamedTupleWrapper(tree) 78 | transformed_tree = transformer.visit(tree) 79 | return ast.unparse(transformed_tree) 80 | 81 | @staticmethod 82 | def assert_transformed_code_equals(code, expected_result): 83 | transformed_code = TestObservableNamedTupleWrapper.transform_and_get_code(code) 84 | assert transformed_code.strip() == expected_result.strip() 85 | 86 | def test_simple_namedtuple(self): 87 | code = """ 88 | from collections import namedtuple 89 | Point = namedtuple('Point', ['x', 'y']) 90 | p = Point(1, 2) 91 | """ 92 | expected_result = """ 93 | from collections import namedtuple 94 | Point = namedtuple('Point', ['x', 'y']) 95 | p = Point(1, 2) 96 | p_wrapper = ObservableNamedTuple(*p) 97 | """ 98 | self.assert_transformed_code_equals(code, expected_result) 99 | 100 | def test_nested_namedtuple(self): 101 | code = """ 102 | from collections import namedtuple 103 | Point = namedtuple('Point', ['x', 'y']) 104 | Circle = namedtuple('Circle', ['center', 'radius']) 105 | c = Circle(Point(0, 0), 5) 106 | """ 107 | expected_result = """ 108 | from collections import namedtuple 109 | Point = namedtuple('Point', ['x', 'y']) 110 | Circle = namedtuple('Circle', ['center', 'radius']) 111 | c = Circle(Point(0, 0), 5) 112 | c_wrapper = ObservableNamedTuple(*c) 113 | """ 114 | self.assert_transformed_code_equals(code, expected_result) 115 | 116 | 117 | def test_wrap_numpy_array(): 118 | code = """ 119 | import numpy as np 120 | arr = np.array([1, 2, 3]) 121 | """ 122 | expected_result = """ 123 | import numpy as np 124 | arr = np.array([1, 2, 3]) 125 | arr_numpy_wrapper = ObservableNumpyArray(arr) 126 | """ 127 | transformed_code = transform_code_numpy_array(code) 128 | assert transformed_code.strip() == expected_result.strip() 129 | 130 | 131 | def test_wrap_nested_numpy_array(): 132 | code = """ 133 | from numpy import array as arr 134 | nested_arr = arr([arr([1, 2]), arr([3, 4])]) 135 | """ 136 | expected_result = """ 137 | from numpy import array as arr 138 | nested_arr = arr([arr([1, 2]), arr([3, 4])]) 139 | nested_arr_numpy_wrapper = ObservableNumpyArray(nested_arr) 140 | """ 141 | transformed_code = transform_code_numpy_array(code) 142 | assert transformed_code.strip() == expected_result.strip() 143 | 144 | 145 | def transform_code_numpy_array(code): 146 | tree = ast.parse(code) 147 | transformer = ObservableNumpyArrayWrapper(tree) 148 | transformed_tree = transformer.visit(tree) 149 | transformed_code = ast.unparse(transformed_tree) 150 | return transformed_code 151 | 152 | 153 | def test_wrap_pandas_dataframe(): 154 | code = """ 155 | import pandas as pd 156 | df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) 157 | """ 158 | expected_result = """ 159 | import pandas as pd 160 | df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]}) 161 | df_pandas_wrapper = ObservablePandasDataFrame(df) 162 | """ 163 | transformed_code = transform_code_pandas_data_frame(code) 164 | assert transformed_code.strip() == expected_result.strip() 165 | 166 | 167 | def test_wrap_nested_pandas_dataframe(): 168 | code = """ 169 | from pandas import DataFrame as df 170 | nested_df = df({'A': df([1, 2]), 'B': df([3, 4])}) 171 | """ 172 | expected_result = """ 173 | from pandas import DataFrame as df 174 | nested_df = df({'A': df([1, 2]), 'B': df([3, 4])}) 175 | nested_df_pandas_wrapper = ObservablePandasDataFrame(nested_df) 176 | """ 177 | transformed_code = transform_code_pandas_data_frame(code) 178 | assert transformed_code.strip() == expected_result.strip() 179 | 180 | 181 | def transform_code_pandas_data_frame(code): 182 | tree = ast.parse(code) 183 | transformer = ObservablePandasDataFrameWrapper(tree) 184 | transformed_tree = transformer.visit(tree) 185 | transformed_code = ast.unparse(transformed_tree) 186 | return transformed_code 187 | -------------------------------------------------------------------------------- /pyggester/data/help_files/dynamic_helper.md: -------------------------------------------------------------------------------- 1 | # Usage (Step-by-Step) 2 | 3 | ## Single File Usage 4 | 5 | 6 | Lets suppose you have a single python file that you want to dynamically analyze(run-time analysis) 7 | 8 | ### 1. Preparation 9 | 10 | Before code transformation with pyggester: 11 | ```bash 12 | (venv) root@user:~/my_app> ls 13 | app.py 14 | ``` 15 | 16 | Content of app.py: 17 | 18 | ```python 19 | def sum_of_integers(integer_list): 20 | total = sum(integer_list) 21 | return total 22 | 23 | my_list = [1, 2, 3, 4, 5] 24 | print(sum_of_integers(my_list)) 25 | 26 | ``` 27 | ### 2. Transformation 28 | 29 | > [!IMPORTANT] 30 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.** 31 | 32 | ```bash 33 | (venv) root@devs04:~/my_app> pyggest transform app.py 34 | ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 35 | │ File transformed successfully! │ 36 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 37 | ``` 38 | ### 3. Post-Transformation 39 | 40 | ```bash 41 | (venv) root@devs04:~/my_app> ls 42 | app.py app_transformed.py 43 | ``` 44 | 45 | Content of app_transformed.py: 46 | 47 | ```python 48 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 49 | from pyggester.observables import ObservableNumpyArray, ObservableNamedTuple, ObservableSet, ObservablePandasDataFrame, ObservableList, ObservableDict, ObservableTuple 50 | 51 | 52 | def sum_of_integers(integer_list): 53 | total = sum(integer_list) 54 | return total 55 | 56 | 57 | my_list = ObservableList([1, 2, 3, 4, 5]) 58 | OBSERVABLE_COLLECTOR.append(my_list) 59 | print(sum_of_integers(my_list)) 60 | 61 | for observable in OBSERVABLE_COLLECTOR: 62 | observable.run() 63 | 64 | ``` 65 | 66 | > [!IMPORTANT] 67 | > We now have a new file, automatically created, that mirrors the original file. This new file includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app_transformed.py'. Rest assured, everything from 'app.py' is retained in 'app_transformed.py'. 68 | 69 | ### 4. Running the Transformed Code 70 | 71 | ```bash 72 | (venv) root@devs04:~/my_app> python3 app_transformed.py 73 | 15 74 | ╭────────────────────────────────────────────────────────────────────────────╮ 75 | │ 10 | Suggestions(/root/my_app/app_transformed.py): │ 76 | │ [*] Consider using an array.array instead of a list, for optimal │ 77 | │ memory consumption │ 78 | │ [*] Consider using a set instead of a list, because of unique elements │ 79 | ╰────────────────────────────────────────────────────────────────────────────╯ 80 | ``` 81 | 82 | ## Directory Usage 83 | 84 | Lets suppose you have a python project(directory/repo) that you want to dynamically analyze(run-time analysis) 85 | 86 | ### 1. Preparation 87 | 88 | Before code transformation with pyggester: 89 | ```bash 90 | (venv) root@devs04:~/python_demo/app_dir> ls 91 | __pycache__ app.py temperature.py weather.py 92 | ``` 93 | 94 | Content of app.py: 95 | 96 | ```python 97 | import weather 98 | import temperature 99 | 100 | 101 | def main(): 102 | city = input('Enter a city name: ') 103 | weather_condition = weather.get_weather(city) 104 | avg_temp = temperature.get_average_temperature() 105 | print(f'Weather in {city}: {weather_condition}') 106 | print(f'Average temperature: {avg_temp} degrees Celsius') 107 | 108 | 109 | main() 110 | ``` 111 | 112 | Content of temperature.py: 113 | ```python 114 | temperatures = list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17, 20]) 115 | 116 | 117 | def get_average_temperature(): 118 | return sum(temperatures) / len(temperatures) 119 | 120 | ``` 121 | 122 | Content of weather.py: 123 | ```python 124 | weather_conditions = ['Sunny', 'Rainy', 'Cloudy', 'Windy', 'Sunny', 'Cloudy'] 125 | 126 | def get_weather(city): 127 | return weather_conditions.pop() 128 | ``` 129 | 130 | ### 2. Transformation 131 | 132 | > [!IMPORTANT] 133 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.** 134 | 135 | ```bash 136 | (venv) root@devs04:~/python_demo> pyggest transform app_dir/ 137 | Enter the name of the main file: app.py 138 | ╭──────────────────────────────────────────────────────────────────────────╮ 139 | │ Directory transformed successfully! │ 140 | ╰──────────────────────────────────────────────────────────────────────────╯ 141 | ``` 142 | > [!IMPORTANT] 143 | > When a directory or project is specified as an argument, pyggester prompts us to specify the main file of our project. This file should be the entry point of your project, indicated by its file name. 144 | 145 | ### 3. Post-Transformation 146 | 147 | ```bash 148 | (venv) root@devs04:~/python_demo> ls 149 | app_dir app_dir_transformed 150 | ``` 151 | 152 | Content of app_dir_transformed/: 153 | 154 | ```python 155 | (venv) root@devs04:~/python_demo/app_dir_transformed> ls 156 | app.py temperature.py weather.py 157 | ``` 158 | 159 | Content of app.py: 160 | ```python 161 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 162 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple 163 | import weather 164 | import temperature 165 | 166 | 167 | def main(): 168 | city = input('Enter a city name: ') 169 | weather_condition = weather.get_weather(city) 170 | avg_temp = temperature.get_average_temperature() 171 | print(f'Weather in {city}: {weather_condition}') 172 | print(f'Average temperature: {avg_temp} degrees Celsius') 173 | 174 | 175 | main() 176 | for observable in OBSERVABLE_COLLECTOR: 177 | observable.run() 178 | 179 | ``` 180 | 181 | Content of temperature.py: 182 | ```python 183 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 184 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple 185 | temperatures = ObservableList(list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17, 186 | 20])) 187 | OBSERVABLE_COLLECTOR.append(temperatures) 188 | 189 | 190 | def get_average_temperature(): 191 | return sum(temperatures) / len(temperatures) 192 | 193 | ``` 194 | 195 | Content of weather.py: 196 | 197 | ```python 198 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 199 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple 200 | weather_conditions = ObservableList(['Sunny', 'Rainy', 'Cloudy', 'Windy', 201 | 'Sunny', 'Cloudy']) 202 | OBSERVABLE_COLLECTOR.append(weather_conditions) 203 | 204 | 205 | def get_weather(city): 206 | return weather_conditions.pop() 207 | 208 | ``` 209 | 210 | > [!IMPORTANT] 211 | > We now have a new directory, automatically created, that mirrors the original directory. This new directory includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app.py' that resides inside 'app_dir_transformed/'. Rest assured, everything from 'app_dir' is retained in 'app_dir_transformed/'. 212 | 213 | ### 4. Running the Transformed Code 214 | 215 | ```bash 216 | (venv) root@devs04:~/python_demo/app_dir_transformed> python3 app.py 217 | Enter a city name: Pristina 218 | Weather in Pristina: Cloudy 219 | Average temperature: 19.545454545454547 degrees Celsius 220 | ╭─────────────────────────────────────────────────────────────────────────────────────╮ 221 | │ 3 | Suggestions(/root/python_demo/app_dir_transformed/temperature.py): │ 222 | │ [*] Consider using an array.array instead of a list, for optimal memory │ 223 | │ consumption │ 224 | ╰─────────────────────────────────────────────────────────────────────────────────────╯ 225 | ``` -------------------------------------------------------------------------------- /contributing.md: -------------------------------------------------------------------------------- 1 | # Welcome to pyggester! 2 | 3 | Thank you for your interest in contributing to pyggester. Whether you're a developer, designer, tester, or someone with great ideas, your contributions are valuable. 4 | 5 | ## Getting Started 6 | 7 | 1. **Fork the Repository:** Start by forking the [Project Repository](https://github.com/ValdonVitija/pyggester) on GitHub. This creates a copy of the project under your GitHub account. 8 | 9 | ```bash 10 | git clone https://github.com/ValdonVitija/pyggester.git 11 | ``` 12 | 13 | 2. **Create a Branch:** Move into the project's directory and create a new branch for your contribution: 14 | 15 | ```bash 16 | cd pyggester 17 | git checkout -b your-branch-name 18 | ``` 19 | 20 | ## Making Changes 21 | 22 | Changes can encompass various aspects, provided they are reasonable. We welcome modifications to overall logic, naming conventions, hierarchy, and directory structure (with meticulous attention, especially for alterations to the project directory). 23 | 24 | # Wrappers 25 | 26 | Includes classes designed to encapsulate collections within observables. Every observable extends from ast.NodeTransformer, enabling the classes to effectively wrap individual data structures. Each specific wrapper is tailored to implement only the visitor method relevant to the data structure it encapsulates. 27 | 28 | Built-in wrappers are all already done, because all we need to do is wrap the original 29 | data structure declarations with observables. 30 | 31 | Example (ObservableListWrapper): 32 | ```python 33 | class ObservableListWrapper(ast.NodeTransformer): 34 | """AST transformer to wrap lists with ObservableList.""" 35 | 36 | __slots__: Tuple[str] = () 37 | 38 | def visit_List(self, node: ast.List) -> Union[ast.Call, ast.AST]: 39 | """ 40 | Transform a List node to an ObservableList node. 41 | 42 | Args: 43 | node (ast.List): The original List node. 44 | 45 | Returns: 46 | Union[ast.Call, ast.AST]: The transformed node. 47 | """ 48 | return ast.Call( 49 | func=ast.Name(id="ObservableList", ctx=ast.Load()), args=[node], keywords=[] 50 | ) 51 | ``` 52 | 53 | Specialized collections from the collections library in python are a bit different. We cannot directly 'dervie' from them, but we can pass by reference the declared data structure objects to our custom Observables. 54 | 55 | Such Wrappers are: 56 | - ObservableNumpyArrayWrapper 57 | - ObservableNamedTupleWrapper 58 | - ObservablePandasDataFrameWrapper 59 | 60 | Example (ObservableNumpyArrayWrapper): 61 | 62 | ```python 63 | class ObservableNumpyArrayWrapper(ast.NodeTransformer): 64 | """AST transformer to wrap NumPy array instances with ObservableNumpyArray.""" 65 | 66 | class NumpyImportsVisitor(ast.NodeVisitor): 67 | def __init__(self): 68 | self.alias_name = None 69 | self.alias_asname = None 70 | 71 | def visit_Import(self, node): 72 | """ 73 | Check numpy imports, because we need to determine how to 74 | wrap the initiated array instances 75 | 76 | [*] import numpy 77 | [*] import numpy as np 78 | [*] import numpy as 'alias' 79 | """ 80 | for name in node.names: 81 | if name.name == "numpy": 82 | self.alias_name = name.name 83 | if name.name == "numpy" and getattr(name, "asname"): 84 | self.alias_asname = name.asname 85 | 86 | def visit_ImportFrom(self, node): 87 | """ 88 | Check 'from' numpy imports, because we need to determine how to wrao 89 | the initiated array instances 90 | 91 | [*] from numpy import array 92 | [*] from numpy import array as arr 93 | [*] from numpy import ones 94 | ... 95 | """ 96 | if node.module == "numpy": 97 | for name in node.names: 98 | if name.name in ["array", "zeros", "ones", "empty"]: 99 | self.alias_name = name.name 100 | if getattr(name, "asname"): 101 | self.alias_asname = name.asname 102 | 103 | def __init__(self, tree) -> None: 104 | self.imports_visitor = self.NumpyImportsVisitor() 105 | self.imports_visitor.visit(tree) 106 | 107 | def visit_Assign(self, node: ast.Assign) -> ast.AST: 108 | """ 109 | Now visit each Assign node and check if that node is a numpy array instance. If thats the case, wrap each instance into an ObservableNumpyArray, 110 | so that we can analyze its internal structure for potential suggestions. 111 | """ 112 | if getattr(node, "value") and isinstance(node.value, ast.Call): 113 | if getattr(node.value, "func"): 114 | if isinstance(node.value.func, ast.Name): 115 | id_ = self.get_alias_name() 116 | if node.value.func.id == id_: 117 | return self.wrap_numpy_array(node) 118 | 119 | elif isinstance(node.value.func, ast.Attribute): 120 | id_ = self.get_alias_name() 121 | if node.value.func.value.id == id_: 122 | return self.wrap_numpy_array(node) 123 | 124 | return node 125 | 126 | def get_alias_name(self): 127 | return self.imports_visitor.alias_asname or self.imports_visitor.alias_name 128 | 129 | def wrap_numpy_array(self, node): 130 | wrapper_code = f"{node.targets[0].id}_numpy_wrapper = ObservableNumpyArray({node.targets[0].id})" 131 | wrapper_node = ast.parse(wrapper_code).body[0] 132 | return [node, wrapper_node] 133 | 134 | ``` 135 | 136 | >[!NOTE] 137 | > Not every data structure from the collections library has a Wrapper and an Observable version right now. I expect potential contributors to work on them. 138 | 139 | 140 | # 👀 Observables 141 | 142 | The core functionality of pyggester revolves around observables, particularly enhanced versions of python data structures/collections that fully preserve the original functionality offered by these python data structures. These observables attempt to suggest alternative data structures if any issues are detected. 143 | 144 | Standard built-in collections/data structures: 145 | - list 146 | - tuple 147 | - set 148 | - dict 149 | 150 | > [!NOTE] 151 | > Python's built-in collections can be customized by adding your own methods and variables. This lets you analyze the collection more effectively without changing its basic features. 152 | 153 | Specialized collections(part of the collections library): 154 | - ChainMap 155 | - Counter 156 | - OrderedDict 157 | - UserDict 158 | - UserList 159 | - UserString 160 | - defaultdict 161 | - deque 162 | - namedtuple 163 | 164 | Third-Party popular collections: 165 | - Numpy Arrays 166 | - Pandas DataFrame 167 | - Pandas Series 168 | 169 | 170 | Abstract Observable Representation (e.g : list): 171 | ```Python 172 | class ObservableList(list): 173 | """ 174 | The ObservableList is an enhanced version of a list that 175 | preserves the full original functionality of a list, but 176 | adds more features to it so that we keep track of anything that 177 | potentially happens in order to do dynamic analysis to each declared 178 | list. 179 | """ 180 | __slots__: Tuple[str] = ( 181 | "appended", 182 | "extended", 183 | "inserted", 184 | "removed", 185 | "count_", 186 | "in_operator_used", 187 | "message_handler", 188 | ) 189 | 190 | def __init__(self, *args, **kwargs) -> None: 191 | ... 192 | def append(self, item) -> None: 193 | super().append(item) 194 | self.appended = True 195 | 196 | def extend(self, iterable) -> None: ... 197 | def insert(self, index, item) -> None: ... 198 | def remove(self, item) -> None: ... 199 | def count(self, __value: Any) -> int: ... 200 | def __contains__(self, __key: object) -> bool: ... 201 | def get_list_dimension(self, lst): ... 202 | def check_numpy_array_instead_of_list(self): ... 203 | def check_array_instead_of_list(self): ... 204 | def can_list_be_converted_to_array(self): ... 205 | def check_list_to_set_conversion(self): ... 206 | def check_set_instead_of_list(self): ... 207 | def check_Counter_insteaf_of_list(self): ... 208 | def check_tuple_instead_of_list(self): ... 209 | def run(self): 210 | """ 211 | Only run checkers so that we offer a better running interface 212 | for each observable. 213 | """ 214 | self.check_array_instead_of_list() 215 | self.check_numpy_array_instead_of_list() 216 | self.check_set_instead_of_list() 217 | self.check_Counter_insteaf_of_list() 218 | self.message_handler.print_messages() 219 | 220 | ``` 221 | 222 | If you make sure to preserve the original functionality of built in collections, the folowing statements are exactly the same: 223 | ```Python 224 | #List declarations 225 | list_ = [1,2,3] 226 | list_ = ObservableList([1,2,3]) 227 | list_ = ObservableList(list([1,2,3])) 228 | 229 | #Dict declarations 230 | dict_ = {"key":"value"} 231 | dict_ = ObservableDict({"key":"value"}) 232 | dict_ = ObservableDict(dict({"key":"value"})) 233 | 234 | #Tuple declarations 235 | tuple_ = (1,2,3) 236 | tuple_ = ObservableTuple([1,2,3]) 237 | tuple_ = ObservableTuple(tuple([1,2,3])) 238 | 239 | #Set declarations 240 | set_ = {1,2,3} 241 | set_ = ObservableSet({1,2,3}) 242 | set_ = ObservableSet(set({1,2,3})) 243 | ``` 244 | 245 | Currently, the supported observables are: 246 | 247 | - ObservableList 248 | - ObservableSet 249 | - ObservableTuple 250 | - ObservableDict 251 | - ObservableNumpyArray 252 | - ObservablePandasDataFrame 253 | - ObservableNamedTuple 254 | 255 | 256 | >[!IMPORTANT] 257 | > Other modules in pyggester are more specific and typically remain unchanged unless you're modifying the analysis approach. However, if you discover an improved method for analyzing or observing collections, or for the execution process post-code transformations, you're encouraged to submit a Pull Request (PR) with an explanation of your ideas. Please note that proposals involving substantial changes must be thoroughly documented, and test cases should be provided to demonstrate the advantages of your approach. 258 | 259 | 260 | >[!NOTE] 261 | >Additional examples can be found by reviewing the codebase directly, where docstrings provide a comprehensive understanding of Pyggester's architecture. 262 | 263 | 264 | 265 | 266 | ## Submitting Changes 267 | 268 | 1. **Commit Changes:** Commit your changes with a clear and concise commit message: 269 | 270 | ```bash 271 | git add . 272 | git commit -m "Brief description of your changes" 273 | ``` 274 | 275 | 2. **Push Changes:** Push your changes to your forked repository: 276 | 277 | ```bash 278 | git push origin your-branch-name 279 | ``` 280 | 3. **Open a Pull Request:** On GitHub, open a pull request from your branch to the main project repository. Provide a detailed description of your changes and any relevant information. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 |

7 | Alacritty Logo 8 |

9 | 10 |

Pyggester - dynamic/static python analysis

11 | 12 | 13 | # 📘 About 14 | 15 | pyggester - (python + suggester) functions as both a dynamic and static analyzer. Its primary purpose lies in offering suggestions to enhance the efficiency of Python code by addressing suboptimal usage of data structures. 16 | 17 | # ⭐ Features 18 | 19 | Pyggester offers a pretty decent cli interface for its functionalities. The cli is built on top of [typer](https://github.com/tiangolo/typer) 20 | 21 | `Execution command`: 22 | ```bash 23 | pyggest 24 | ``` 25 | `output`: 26 | ``` 27 | _____ 28 | _____________ ________ _______ ______________ /_____________ 29 | ___ __ \_ / / /_ __ `/_ __ `/ _ \_ ___/ __/ _ \_ ___/ 30 | __ /_/ / /_/ /_ /_/ /_ /_/ // __/(__ )/ /_ / __/ / 31 | _ .___/_\__, / _\__, / _\__, / \___//____/ \__/ \___//_/ 32 | /_/ /____/ /____/ /____/ 33 | 34 | 35 | Usage: pyggest [OPTIONS] COMMAND [ARGS]... 36 | 37 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 38 | │ --install-completion Install completion for the current shell. │ 39 | │ --show-completion Show completion for the current shell, to copy it or customize the installation. │ 40 | │ --help Show this message and exit. │ 41 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 42 | ╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 43 | │ static Perform static analysis using PyggestStatic. │ 44 | │ transform Perform dynamic transformation using PyggesterDynamic. │ 45 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 46 | ``` 47 | 48 | The pyggester CLI presents two distinct features: 49 | - Static Analysis: This feature comprehensively examines your code without executing it, providing insightful insights into its structure and potential improvements. 50 | 51 | `Execution command` 52 | 53 | > [!NOTE] 54 | > The 'static' subcommand exists, but has no functionalities implemented, because we already have good static analyzers(pylint, ruff, flake8). In future iterations, should we identify suggestions that can be established through static analysis, we will incorporate them into this feature. 55 | 56 | ```bash 57 | pyggest static 58 | ``` 59 | 60 | - Dynamic/Automatic Transformation: This feature adds extra code to your python files to analyze your data structures at runtime. Your original code stays the same; it won't be changed. A new file is created that's just like the original but with additional code. This works for both single files and whole directories(full project structures). 61 | 62 | `Execution command` 63 | 64 | ``` bash 65 | pyggest transform 66 | ``` 67 | 68 | > [!INFO] 69 | > pyggester offers built-in documentation for detailed usage 70 | 71 | ```bash 72 | pyggest transform --help 73 | pyggest static --help #NOT IMPLEMENTED 74 | ``` 75 | 76 | 77 | 78 | # 🔧 Installation 79 | 80 | ### Using Pip 81 | You can easily install the Python library using pip. Open your terminal and run the following command: 82 | ```bash 83 | pip install pyggester 84 | ``` 85 | 86 | 87 | ### Cloning the GitHub Repository 88 | 89 | 1. **Clone the Repository:** Open your terminal and run the following command to clone the GitHub repository to your local machine: 90 | 91 | ```bash 92 | git clone git@github.com:ValdonVitija/pyggester.git 93 | ``` 94 | 2. **Navigate to the Repository:** Change your working directory to the cloned repository: 95 | 96 | ```bash 97 | cd pyggester 98 | ``` 99 | 3. **Install pyggester as a pacakge locally:** 100 | > [!IMPORTANT] 101 | > Consider doing this within a virtual environment (venv) if possible. 102 | 103 | ```bash 104 | pip install . 105 | ``` 106 | 107 | # Usage (Step-by-Step) 108 | 109 | ## Single File Usage 110 | 111 | 112 | Lets suppose you have a single python file that you want to dynamically analyze(run-time analysis) 113 | 114 | ### 1. Preparation 115 | 116 | Before code transformation with pyggester: 117 | ```bash 118 | (venv) root@user:~/my_app> ls 119 | app.py 120 | ``` 121 | 122 | Content of app.py: 123 | 124 | ```python 125 | def sum_of_integers(integer_list): 126 | total = sum(integer_list) 127 | return total 128 | 129 | my_list = [1, 2, 3, 4, 5] 130 | print(sum_of_integers(my_list)) 131 | 132 | ``` 133 | ### 2. Transformation 134 | 135 | > [!IMPORTANT] 136 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.** 137 | 138 | ```bash 139 | (venv) root@devs04:~/my_app> pyggest transform app.py 140 | ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮ 141 | │ File transformed successfully! │ 142 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯ 143 | ``` 144 | ### 3. Post-Transformation 145 | 146 | ```bash 147 | (venv) root@devs04:~/my_app> ls 148 | app.py app_transformed.py 149 | ``` 150 | 151 | Content of app_transformed.py: 152 | 153 | ```python 154 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 155 | from pyggester.observables import ObservableNumpyArray, ObservableNamedTuple, ObservableSet, ObservablePandasDataFrame, ObservableList, ObservableDict, ObservableTuple 156 | 157 | 158 | def sum_of_integers(integer_list): 159 | total = sum(integer_list) 160 | return total 161 | 162 | 163 | my_list = ObservableList([1, 2, 3, 4, 5]) 164 | OBSERVABLE_COLLECTOR.append(my_list) 165 | print(sum_of_integers(my_list)) 166 | 167 | for observable in OBSERVABLE_COLLECTOR: 168 | observable.run() 169 | 170 | ``` 171 | 172 | > [!IMPORTANT] 173 | > We now have a new file, automatically created, that mirrors the original file. This new file includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app_transformed.py'. Rest assured, everything from 'app.py' is retained in 'app_transformed.py'. 174 | 175 | ### 4. Running the Transformed Code 176 | 177 | ```bash 178 | (venv) root@devs04:~/my_app> python3 app_transformed.py 179 | 15 180 | ╭────────────────────────────────────────────────────────────────────────────╮ 181 | │ 10 | Suggestions(/root/my_app/app_transformed.py): │ 182 | │ [*] Consider using an array.array instead of a list, for optimal │ 183 | │ memory consumption │ 184 | │ [*] Consider using a set instead of a list, because of unique elements │ 185 | ╰────────────────────────────────────────────────────────────────────────────╯ 186 | ``` 187 | 188 | ## Directory Usage 189 | 190 | Lets suppose you have a python project(directory/repo) that you want to dynamically analyze(run-time analysis) 191 | 192 | ### 1. Preparation 193 | 194 | Before code transformation with pyggester: 195 | ```bash 196 | (venv) root@devs04:~/python_demo/app_dir> ls 197 | __pycache__ app.py temperature.py weather.py 198 | ``` 199 | 200 | Content of app.py: 201 | 202 | ```python 203 | import weather 204 | import temperature 205 | 206 | 207 | def main(): 208 | city = input('Enter a city name: ') 209 | weather_condition = weather.get_weather(city) 210 | avg_temp = temperature.get_average_temperature() 211 | print(f'Weather in {city}: {weather_condition}') 212 | print(f'Average temperature: {avg_temp} degrees Celsius') 213 | 214 | 215 | main() 216 | ``` 217 | 218 | Content of temperature.py: 219 | ```python 220 | temperatures = list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17, 20]) 221 | 222 | 223 | def get_average_temperature(): 224 | return sum(temperatures) / len(temperatures) 225 | 226 | ``` 227 | 228 | Content of weather.py: 229 | ```python 230 | weather_conditions = ['Sunny', 'Rainy', 'Cloudy', 'Windy', 'Sunny', 'Cloudy'] 231 | 232 | def get_weather(city): 233 | return weather_conditions.pop() 234 | ``` 235 | 236 | ### 2. Transformation 237 | 238 | > [!IMPORTANT] 239 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.** 240 | 241 | ```bash 242 | (venv) root@devs04:~/python_demo> pyggest transform app_dir/ 243 | Enter the name of the main file: app.py 244 | ╭──────────────────────────────────────────────────────────────────────────╮ 245 | │ Directory transformed successfully! │ 246 | ╰──────────────────────────────────────────────────────────────────────────╯ 247 | ``` 248 | > [!IMPORTANT] 249 | > When a directory or project is specified as an argument, pyggester prompts us to specify the main file of our project. This file should be the entry point of your project, indicated by its file name. 250 | 251 | ### 3. Post-Transformation 252 | 253 | ```bash 254 | (venv) root@devs04:~/python_demo> ls 255 | app_dir app_dir_transformed 256 | ``` 257 | 258 | Content of app_dir_transformed/: 259 | 260 | ```python 261 | (venv) root@devs04:~/python_demo/app_dir_transformed> ls 262 | app.py temperature.py weather.py 263 | ``` 264 | 265 | Content of app.py: 266 | ```python 267 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 268 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple 269 | import weather 270 | import temperature 271 | 272 | 273 | def main(): 274 | city = input('Enter a city name: ') 275 | weather_condition = weather.get_weather(city) 276 | avg_temp = temperature.get_average_temperature() 277 | print(f'Weather in {city}: {weather_condition}') 278 | print(f'Average temperature: {avg_temp} degrees Celsius') 279 | 280 | 281 | main() 282 | for observable in OBSERVABLE_COLLECTOR: 283 | observable.run() 284 | 285 | ``` 286 | 287 | Content of temperature.py: 288 | ```python 289 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 290 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple 291 | temperatures = ObservableList(list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17, 292 | 20])) 293 | OBSERVABLE_COLLECTOR.append(temperatures) 294 | 295 | 296 | def get_average_temperature(): 297 | return sum(temperatures) / len(temperatures) 298 | 299 | ``` 300 | 301 | Content of weather.py: 302 | 303 | ```python 304 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR 305 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple 306 | weather_conditions = ObservableList(['Sunny', 'Rainy', 'Cloudy', 'Windy', 307 | 'Sunny', 'Cloudy']) 308 | OBSERVABLE_COLLECTOR.append(weather_conditions) 309 | 310 | 311 | def get_weather(city): 312 | return weather_conditions.pop() 313 | 314 | ``` 315 | 316 | > [!IMPORTANT] 317 | > We now have a new directory, automatically created, that mirrors the original directory. This new directory includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app.py' that resides inside 'app_dir_transformed/'. Rest assured, everything from 'app_dir' is retained in 'app_dir_transformed/'. 318 | 319 | ### 4. Running the Transformed Code 320 | 321 | ```bash 322 | (venv) root@devs04:~/python_demo/app_dir_transformed> python3 app.py 323 | Enter a city name: Pristina 324 | Weather in Pristina: Cloudy 325 | Average temperature: 19.545454545454547 degrees Celsius 326 | ╭─────────────────────────────────────────────────────────────────────────────────────╮ 327 | │ 3 | Suggestions(/root/python_demo/app_dir_transformed/temperature.py): │ 328 | │ [*] Consider using an array.array instead of a list, for optimal memory │ 329 | │ consumption │ 330 | ╰─────────────────────────────────────────────────────────────────────────────────────╯ 331 | ``` 332 | 333 | # 📁 Directory Structure 334 | ```bash 335 | . 336 | ├── LICENSE 337 | ├── README.md #main readme file. The one you are currently reading. 338 | ├── VERSION #version of pyggester 339 | ├── contributing.md 340 | ├── pyggester # directory containing the full source code of pyggester 341 | │   ├── __init__.py 342 | │   ├── cli.py #defines the typer cli structure(command & options) 343 | │   ├── command_handlers.py #Handles subcommands and every option variation per subcommand. 344 | │   ├── data #data/config files related to pyggester. 345 | │   │   └── help_files #build in help files for the pyggester cli 346 | │   │   ├── __init__.py 347 | │   │   ├── transform_helper.md #detailed built-in documentation for the transform subcommand of pyggest 348 | │   │   └── static_helper.md #detailed built-in documentation for the static subcommand of pyggest 349 | │   ├── helpers.py #helper functions to be used by other modules 350 | │   ├── main.py #The entry point of pyggest execution. Initializes the typer cli app and prints the ascii logo of pyggester 351 | │   ├── message_handler.py #Manages how the collected messages will be printed to the user. 352 | │   ├── module_importer.py #Contains the mechanism to automatically import observables 353 | │   ├── observable_collector.py #Contains the list that will be used to collect all observables. 354 | │   ├── observable_transformations.py #Contains the mechanism that will automatically add code that collects observables and glues together all ast modules 355 | │   ├── observables.py #Contains all the defined observables(enhanced version of python collections) 356 | │   ├── pyggester.py #The 'engine' of pyggester. This module glues everything together 357 | │   ├── text_formatters.py #Contains text formatters, to beautify text in stdout. 358 | │   └── wrappers.py #Contains the mechanism that wrap each observable. 359 | ├── pyggester_abstract_execution_flow.png 360 | ├── pyggester_logo.png 361 | ├── pytest.ini #pytest config file 362 | ├── requirements.txt #Every pyggester dependecy resides here 363 | ├── setup.py #Creates the pyggester pacakge and defines pyggest as the entry point command to execute pyggester 364 | └── tests 365 | ├── __init__.py 366 | ├── test_cli.py 367 | ├── test_command_handlers.py 368 | ├── test_file.py 369 | ├── test_file_transformed.py 370 | ├── test_helpers.py 371 | ├── test_main.py 372 | ├── test_message_handler.py 373 | ├── test_module_importer.py 374 | ├── test_observable_transformations.py 375 | ├── test_observables.py 376 | ├── test_pyggester.py 377 | └── test_wrappers.py 378 | ``` 379 | # Abstract Execution Flow 380 | 381 | The following flow diagram illustrates key components of Pyggester and provides a comprehensive overview of the execution sequence. 382 | 383 | ![Alt text](pyggester-abstract-execution-flow.png) 384 | 385 | 386 | # 👥 Contribution 387 | 388 | To contribute to this project, please refer to the comprehensive [contribution guide](contributing.md) for detailed instructions and best practices. 389 | 390 | # ©️ License 391 | 392 | MIT License 393 | 394 | Copyright (c) 2023 ValdonVitijaa 395 | 396 | Permission is hereby granted, free of charge, to any person obtaining a copy 397 | of this software and associated documentation files (the "Software"), to deal 398 | in the Software without restriction, including without limitation the rights 399 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 400 | copies of the Software, and to permit persons to whom the Software is 401 | furnished to do so, subject to the following conditions: 402 | 403 | The above copyright notice and this permission notice shall be included in all 404 | copies or substantial portions of the Software. 405 | 406 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 407 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 408 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 409 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 410 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 411 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 412 | SOFTWARE. 413 | 414 | 415 | -------------------------------------------------------------------------------- /pyggester/wrappers.py: -------------------------------------------------------------------------------- 1 | from _ast import AST, Assert, Assign, ClassDef, Expr, Module, Tuple 2 | import ast 3 | import inspect 4 | from astor import to_source 5 | from typing import Any, ClassVar, Tuple, Union, Set 6 | import pathlib 7 | from pyggester.helpers import source_code_to_str 8 | from pyggester.module_importer import add_imports 9 | 10 | 11 | # ---------------------------------------------------------- 12 | 13 | # The following wrappers are used for built-in standard python data structures. 14 | # List of standard python data structures: 15 | 16 | # list -> [] or list() 17 | # dict -> {} or dict() 18 | # set -> {} or set() 19 | # tuple -> () or tuple() 20 | 21 | # These datastructures can be directly derived to create a single wrappers that 22 | # can wrap the original datastructure declarations without changing their core 23 | # behaviour 24 | 25 | 26 | # ---------------------------------------------------------- 27 | 28 | 29 | class ObservableListWrapper(ast.NodeTransformer): 30 | """AST transformer to wrap lists with ObservableList.""" 31 | 32 | __slots__: Tuple[str] = () 33 | 34 | def visit_List(self, node: ast.List) -> Union[ast.Call, ast.AST]: 35 | """ 36 | Transform a List node to an ObservableList node. 37 | 38 | Args: 39 | node (ast.List): The original List node. 40 | 41 | Returns: 42 | Union[ast.Call, ast.AST]: The transformed node. 43 | """ 44 | return ast.Call( 45 | func=ast.Name(id="ObservableList", ctx=ast.Load()), args=[node], keywords=[] 46 | ) 47 | 48 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]: 49 | if isinstance(node.func, ast.Name) and node.func.id == "list": 50 | return ast.Call( 51 | func=ast.Name(id="ObservableList", ctx=ast.Load()), 52 | args=[node], 53 | keywords=[], 54 | ) 55 | return node 56 | 57 | 58 | class ObservableDictWrapper(ast.NodeTransformer): 59 | """AST transformer to wrap dicts with ObservableDict.""" 60 | 61 | __slots__: Tuple[str] = () 62 | 63 | def visit_Dict(self, node: ast.Dict) -> Union[ast.Call, ast.AST]: 64 | """ 65 | Transform a Dict node to an ObservableDict node. 66 | 67 | Args: 68 | node (ast.Dict): The original Dict node. 69 | 70 | Returns: 71 | Union[ast.Call, ast.AST]: The transformed node. 72 | """ 73 | return ast.Call( 74 | func=ast.Name(id="ObservableDict", ctx=ast.Load()), args=[node], keywords=[] 75 | ) 76 | 77 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]: 78 | if isinstance(node.func, ast.Name) and node.func.id == "dict": 79 | return ast.Call( 80 | func=ast.Name(id="ObservableDict", ctx=ast.Load()), 81 | args=[node], 82 | keywords=[], 83 | ) 84 | return node 85 | 86 | 87 | class ObservableTupleWrapper(ast.NodeTransformer): 88 | """AST transformer to wrap tuples with ObservableTuple.""" 89 | 90 | __slots__: Tuple[str] = () 91 | 92 | def visit_Tuple(self, node: ast.Tuple) -> Union[ast.Call, ast.AST]: 93 | """ 94 | Transform a Tuple node to an ObservableTuple node. 95 | 96 | Args: 97 | node (ast.Tuple): The original Tuple node. 98 | 99 | Returns: 100 | Union[ast.Call, ast.AST]: The transformed node. 101 | """ 102 | return ast.Call( 103 | func=ast.Name(id="ObservableTuple", ctx=ast.Load()), 104 | args=[node], 105 | keywords=[], 106 | ) 107 | 108 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]: 109 | if isinstance(node.func, ast.Name) and node.func.id == "tuple": 110 | return ast.Call( 111 | func=ast.Name(id="ObservableTuple", ctx=ast.Load()), 112 | args=[node], 113 | keywords=[], 114 | ) 115 | return node 116 | 117 | 118 | class ObservableSetWrapper(ast.NodeTransformer): 119 | """AST transformer to wrap tuples with ObservableTuple.""" 120 | 121 | __slots__: Tuple[str] = () 122 | 123 | def visit_Set(self, node: ast.Set) -> Union[ast.Call, ast.AST]: 124 | """ 125 | Transform a Set node to an ObservableSet node. 126 | 127 | Args: 128 | node (ast.Set): The original Tuple node. 129 | 130 | Returns: 131 | Union[ast.Call, ast.AST]: The transformed node. 132 | """ 133 | return ast.Call( 134 | func=ast.Name(id="ObservableSet", ctx=ast.Load()), 135 | args=[node], 136 | keywords=[], 137 | ) 138 | 139 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]: 140 | if isinstance(node.func, ast.Name) and node.func.id == "set": 141 | return ast.Call( 142 | func=ast.Name(id="ObservableSet", ctx=ast.Load()), 143 | args=[node], 144 | keywords=[], 145 | ) 146 | return node 147 | 148 | 149 | # ---------------------------------------------------------- 150 | 151 | # The following wrappers are part of the collections built-in python module. 152 | # List of all container datatypes: 153 | 154 | # ChainMap 155 | # Counter 156 | # OrderedDict 157 | # UserDict 158 | # UserList 159 | # UserString 160 | # defaultdict 161 | # deque 162 | # namedtuple 163 | 164 | # These collections cannot be directly derived to create a single wrapper that 165 | # can wrap the original datastructure declarations without changing its core 166 | # behaviour 167 | 168 | # ---------------------------------------------------------- 169 | 170 | 171 | class ObservableNamedTupleWrapper(ast.NodeTransformer): 172 | """AST transformer to wrap namedtuples with ObservableNamedTuple.""" 173 | 174 | class NamedTupleVisitor(ast.NodeVisitor): 175 | """ 176 | NamedTuple visitor to be used internally only by the outer-class. 177 | The purpose of this class is specifically to get all namedtuple instances 178 | in the current module being analyzed 179 | """ 180 | 181 | def __init__(self) -> None: 182 | self.namedtuple_instances = set() 183 | 184 | def visit_Assign(self, node: ast.Assign) -> Any: 185 | """ 186 | Visit each Assign node, because namedtuple declaration are all 187 | Assign nodes in the python's ast. 188 | """ 189 | if getattr(node, "value") and isinstance(node.value, ast.Call): 190 | if getattr(node.value, "func"): 191 | if isinstance(node.value.func, ast.Name): 192 | if node.value.func.id == "namedtuple": 193 | for target in node.targets: 194 | if isinstance(target, ast.Name): 195 | self.namedtuple_instances.add(target.id) 196 | 197 | def __init__(self, tree) -> None: 198 | """ 199 | Immediatly initialize the tuple visitor and collect all namedtuple constructor declarations. 200 | """ 201 | self.namedtuple_visitor = self.NamedTupleVisitor() 202 | self.namedtuple_visitor.visit(tree) 203 | self.modified_nodes = [] 204 | 205 | def visit_Assign(self, node: ast.Assign) -> Any: 206 | """ 207 | Now visit each Assign node and check if that node is a namedtuple instance of a collected 208 | type by NamedTupleVisitor. If thats the case, wrap each instance into an ObservableNamedTupleWrapper, 209 | so that we can analyze its internal structure for potential suggestions. 210 | """ 211 | if getattr(node, "value") and isinstance(node.value, ast.Call): 212 | if getattr(node.value, "func"): 213 | if isinstance(node.value.func, ast.Name): 214 | if ( 215 | node.value.func.id 216 | in self.namedtuple_visitor.namedtuple_instances 217 | ): 218 | for target in node.targets: 219 | if isinstance(target, ast.Name): 220 | wrapper_code = f"{target.id}_wrapper = ObservableNamedTuple(*{target.id})" 221 | wrapper_node = ast.parse(wrapper_code).body[0] 222 | return [node, wrapper_node] 223 | return node 224 | 225 | 226 | # ---------------------------------------------------------- 227 | 228 | # The following wrappers are third party libraries. 229 | # List of all supported third party datatypes: 230 | 231 | # NumPy Arrays 232 | # Pandas 233 | # Polars(soon to be supported) 234 | # More to be added 235 | 236 | # ---------------------------------------------------------- 237 | 238 | # TODO MIGHT MERGE THE FOLLOWING TWO CLASSES TOGETHER, BECAUSE OF A LOT OF 239 | # CODE REPETITIONS, OR MAYBE AN ABSTRACT CLASS THAT WILL REQUIRE BOTH OF THEM 240 | # TO IMPLEMENT SOME SPECIFIC, WHILE OFFERING PRE-IMPLEMENTED FEATURES 241 | 242 | 243 | class ObservableNumpyArrayWrapper(ast.NodeTransformer): 244 | """AST transformer to wrap NumPy array instances with ObservableNumpyArray.""" 245 | 246 | class NumpyImportsVisitor(ast.NodeVisitor): 247 | def __init__(self): 248 | self.alias_name = None 249 | self.alias_asname = None 250 | 251 | def visit_Import(self, node): 252 | """ 253 | Check numpy imports, because we need to determine how to 254 | wrap the initiated array instances 255 | 256 | [*] import numpy 257 | [*] import numpy as np 258 | [*] import numpy as 'alias' 259 | """ 260 | for name in node.names: 261 | if name.name == "numpy": 262 | self.alias_name = name.name 263 | if name.name == "numpy" and getattr(name, "asname"): 264 | self.alias_asname = name.asname 265 | 266 | def visit_ImportFrom(self, node): 267 | """ 268 | Check 'from' numpy imports, because we need to determine how to wrao 269 | the initiated array instances 270 | 271 | [*] from numpy import array 272 | [*] from numpy import array as arr 273 | [*] from numpy import ones 274 | ... 275 | """ 276 | if node.module == "numpy": 277 | for name in node.names: 278 | if name.name in ["array", "zeros", "ones", "empty"]: 279 | self.alias_name = name.name 280 | if getattr(name, "asname"): 281 | self.alias_asname = name.asname 282 | 283 | def __init__(self, tree) -> None: 284 | self.imports_visitor = self.NumpyImportsVisitor() 285 | self.imports_visitor.visit(tree) 286 | 287 | def visit_Assign(self, node: ast.Assign) -> ast.AST: 288 | """ 289 | Now visit each Assign node and check if that node is a numpy array instance. If thats the case, wrap each instance into an ObservableNumpyArray, 290 | so that we can analyze its internal structure for potential suggestions. 291 | """ 292 | if getattr(node, "value") and isinstance(node.value, ast.Call): 293 | if getattr(node.value, "func"): 294 | if isinstance(node.value.func, ast.Name): 295 | id_ = self.get_alias_name() 296 | if node.value.func.id == id_: 297 | return self.wrap_numpy_array(node) 298 | 299 | elif isinstance(node.value.func, ast.Attribute): 300 | id_ = self.get_alias_name() 301 | if node.value.func.value.id == id_: 302 | return self.wrap_numpy_array(node) 303 | 304 | return node 305 | 306 | def get_alias_name(self): 307 | return self.imports_visitor.alias_asname or self.imports_visitor.alias_name 308 | 309 | def wrap_numpy_array(self, node): 310 | wrapper_code = f"{node.targets[0].id}_numpy_wrapper = ObservableNumpyArray({node.targets[0].id})" 311 | wrapper_node = ast.parse(wrapper_code).body[0] 312 | return [node, wrapper_node] 313 | 314 | 315 | class ObservablePandasDataFrameWrapper(ast.NodeTransformer): 316 | """AST transformer to wrap Pandas DataFrame instances with ObservablePandasDataFrame""" 317 | 318 | class PandasImportsVisitor(ast.NodeVisitor): 319 | def __init__(self): 320 | self.alias_name = None 321 | self.alias_asname = None 322 | 323 | def visit_Import(self, node): 324 | """ 325 | Check numpy imports, because we need to determine how to 326 | wrap the initiated array instances 327 | 328 | [*] import pandas 329 | [*] import pandas as pd 330 | [*] import pandas as 'alias' 331 | """ 332 | for name in node.names: 333 | if name.name == "pandas": 334 | self.alias_name = name.name 335 | if name.name == "pandas" and getattr(name, "asname"): 336 | self.alias_asname = name.asname 337 | 338 | def visit_ImportFrom(self, node): 339 | """ 340 | Check 'from' pandas imports, because we need to determine how to wrap 341 | the initiated DataFrame instances 342 | 343 | [*] from pandas import DataFrame 344 | ... 345 | """ 346 | if node.module == "pandas": 347 | for name in node.names: 348 | # Using a list, because we might add some other consturct. Most likely not but.. 349 | if name.name in ["DataFrame"]: 350 | self.alias_name = name.name 351 | if getattr(name, "asname"): 352 | self.alias_asname = name.asname 353 | 354 | def __init__(self, tree) -> None: 355 | self.imports_visitor = self.PandasImportsVisitor() 356 | self.imports_visitor.visit(tree) 357 | 358 | def visit_Assign(self, node: ast.Assign) -> ast.AST: 359 | if getattr(node, "value") and isinstance(node.value, ast.Call): 360 | if getattr(node.value, "func"): 361 | if isinstance(node.value.func, ast.Name): 362 | id_ = self.get_alias_name() 363 | if node.value.func.id == id_: 364 | return self.wrap_numpy_array(node) 365 | 366 | elif isinstance(node.value.func, ast.Attribute): 367 | id_ = self.get_alias_name() 368 | if node.value.func.value.id == id_: 369 | return self.wrap_numpy_array(node) 370 | 371 | return node 372 | 373 | def get_alias_name(self): 374 | return self.imports_visitor.alias_asname or self.imports_visitor.alias_name 375 | 376 | def wrap_numpy_array(self, node): 377 | wrapper_code = f"{node.targets[0].id}_pandas_wrapper = ObservablePandasDataFrame({node.targets[0].id})" 378 | wrapper_node = ast.parse(wrapper_code).body[0] 379 | return [node, wrapper_node] 380 | 381 | 382 | class WrapperCollector(ast.NodeVisitor): 383 | """ 384 | AST visitor to collect class names that are wrappers. 385 | """ 386 | 387 | __slots__: Tuple[str] = ("observables",) 388 | 389 | def __init__(self) -> None: 390 | self.observables: Set[str] = set() 391 | 392 | def visit_ClassDef(self, node: ast.ClassDef) -> Any: 393 | """ 394 | Visit a ClassDef node. 395 | 396 | If the class name is not the same as the WrapperCollector class name, 397 | add the class name to the observables set, because this class is only used to automatically 398 | collector ObservableWrappers. 399 | 400 | Args: 401 | node (ast.ClassDef): The ClassDef node to visit. 402 | """ 403 | if node.name != self.__class__.__name__: 404 | self.observables.add(node.name.split("Wrapper")[0]) 405 | 406 | 407 | def get_wrappers_as_strings() -> Set[str]: 408 | """ 409 | Get observable wrappers as a set of strings. 410 | This will be used by module importer to import these wrappers in each module selected for 411 | transformation 412 | 413 | """ 414 | wrapper_visitor = WrapperCollector() 415 | wrapper_visitor.visit(ast.parse(source_code_to_str(path=pathlib.Path(__file__)))) 416 | return wrapper_visitor.observables 417 | 418 | 419 | WRAPPERS = { 420 | "standard_containers": { 421 | "list": ObservableListWrapper, 422 | "dict": ObservableDictWrapper, 423 | "set": ObservableSetWrapper, 424 | "tuple": ObservableTupleWrapper, 425 | }, 426 | "collector_containers": {"namedtuple": ObservableNamedTupleWrapper}, 427 | "third_party": { 428 | "numpy_array": ObservableNumpyArrayWrapper, 429 | "pandas_dataframe": ObservablePandasDataFrameWrapper, 430 | # "pandas_series": ObservablePandasSeriesWrapper, 431 | }, 432 | } 433 | 434 | 435 | def apply_wrappers(tree: ast.AST) -> ast.AST: 436 | """ 437 | Function that offers api wrapper functionality. 438 | This function takes the source code as a string and soley based on that does automatic 439 | code transformations. 440 | First of all it adds imports at the top of the module for ObservableWrappers 441 | """ 442 | for _, wrapper in WRAPPERS["standard_containers"].items(): 443 | tree = wrapper().visit(tree) 444 | for _, wrapper in WRAPPERS["collector_containers"].items(): 445 | tree = wrapper(tree).visit(tree) 446 | for _, wrapper in WRAPPERS["third_party"].items(): 447 | tree = wrapper(tree).visit(tree) 448 | 449 | return tree 450 | -------------------------------------------------------------------------------- /tests/test_observables.py: -------------------------------------------------------------------------------- 1 | from collections import namedtuple 2 | import numpy 3 | import pandas as pd 4 | from pyggester.observables import ( 5 | ObservableList, 6 | ObservableDict, 7 | ObservableNamedTuple, 8 | ObservableNumpyArray, 9 | ObservablePandasDataFrame, 10 | ObservableSet, 11 | ObservableTuple, 12 | ) 13 | 14 | 15 | def test_different_ways_of_list_initialization(): 16 | assert isinstance(ObservableList([1, 2, 3]), list) 17 | assert isinstance(ObservableList(list([1, 2, 3])), list) 18 | 19 | 20 | def test_original_list_behavior(): 21 | obs_list = ObservableList([1, 2, 3]) 22 | regular_list = [1, 2, 3] 23 | 24 | assert obs_list == regular_list 25 | assert len(obs_list) == len(regular_list) 26 | assert obs_list[1] == regular_list[1] 27 | assert obs_list[1:3] == regular_list[1:3] 28 | assert list(obs_list) == regular_list 29 | assert 2 in obs_list 30 | assert 4 not in obs_list 31 | assert obs_list.copy() == regular_list.copy() 32 | obs_list.clear() 33 | regular_list.clear() 34 | assert len(obs_list) == 0 35 | assert len(regular_list) == 0 36 | obs_list.extend([4, 5, 6]) 37 | regular_list.extend([4, 5, 6]) 38 | assert obs_list == regular_list 39 | assert obs_list.pop() == regular_list.pop() 40 | assert obs_list == regular_list 41 | obs_list.remove(4) 42 | regular_list.remove(4) 43 | assert obs_list == regular_list 44 | obs_list.reverse() 45 | regular_list.reverse() 46 | assert obs_list == regular_list 47 | obs_list.sort() 48 | regular_list.sort() 49 | assert obs_list == regular_list 50 | 51 | 52 | def check_observable_list_additional_attributes_test(): 53 | obs_list = ObservableList([1, 2, 3]) 54 | assert hasattr(obs_list, "appended") 55 | assert hasattr(obs_list, "extended") 56 | assert hasattr(obs_list, "inserted") 57 | assert hasattr(obs_list, "removed") 58 | assert hasattr(obs_list, "count_") 59 | assert hasattr(obs_list, "in_operator_used") 60 | assert hasattr(obs_list, "message_handler") 61 | 62 | 63 | def test_check_numpy_array_instead_of_list(): 64 | obs_list = ObservableList([[1, 2], [3, 4]]) 65 | obs_list.check_numpy_array_instead_of_list() 66 | 67 | assert ( 68 | "Consider using a numpy array instead of a list, for faster computations and optimized memory utilization" 69 | in obs_list.message_handler.messages 70 | ) 71 | 72 | obs_list = ObservableList([1, 2, 3]) 73 | obs_list.check_numpy_array_instead_of_list() 74 | assert ( 75 | "Consider using a numpy array instead of a list, for faster computations and optimized memory utilization" 76 | not in obs_list.message_handler.messages 77 | ) 78 | 79 | 80 | def test_check_array_instead_of_list(): 81 | obs_list = ObservableList([1, 2, 3]) 82 | obs_list.check_array_instead_of_list() 83 | assert ( 84 | "Consider using an array.array instead of a list, for optimal memory consumption" 85 | in obs_list.message_handler.messages 86 | ) 87 | 88 | obs_list = ObservableList(["a", "b", "c"]) 89 | obs_list.check_array_instead_of_list() 90 | assert ( 91 | "Consider using an array.array instead of a list, for optimal memory consumption" 92 | in obs_list.message_handler.messages 93 | ) 94 | 95 | 96 | def test_check_list_to_set_conversion(): 97 | obs_list = ObservableList([1, 2, 2, 3]) 98 | result = obs_list.check_list_to_set_conversion() 99 | assert not result 100 | 101 | obs_list = ObservableList([1, 2, 3]) 102 | result = obs_list.check_list_to_set_conversion() 103 | assert result 104 | 105 | 106 | def test_check_set_instead_of_list(): 107 | obs_list = ObservableList([1, 2, 3]) 108 | obs_list.in_operator_used = True 109 | obs_list.check_set_instead_of_list() 110 | assert ( 111 | "Consider using a set instead of a list, because of unique elements and element existence checking" 112 | in obs_list.message_handler.messages 113 | ) 114 | 115 | obs_list = ObservableList([1, 2, 3]) 116 | obs_list.in_operator_used = False 117 | obs_list.check_set_instead_of_list() 118 | assert ( 119 | "Consider using a set instead of a list, because of unique elements" 120 | in obs_list.message_handler.messages 121 | ) 122 | 123 | 124 | def test_check_Counter_insteaf_of_list(): 125 | obs_list = ObservableList([1, 2, 2, 3]) 126 | obs_list.count_ = True 127 | obs_list.check_Counter_insteaf_of_list() 128 | assert ( 129 | "Consider using a collections.Counter, to count occurences of elements" 130 | in obs_list.message_handler.messages 131 | ) 132 | 133 | obs_list = ObservableList([1, 2, 3]) 134 | obs_list.count_ = False 135 | obs_list.check_Counter_insteaf_of_list() 136 | assert ( 137 | "Consider using a collections.Counter, to count occurences of elements" 138 | not in obs_list.message_handler.messages 139 | ) 140 | 141 | 142 | def test_check_tuple_instead_of_list(): 143 | obs_list = ObservableList(["A", "B", "C"]) 144 | obs_list.appended = False 145 | obs_list.extended = False 146 | obs_list.removed = False 147 | obs_list.inserted = False 148 | obs_list.check_tuple_instead_of_list() 149 | assert ( 150 | "Consider using a tuple since all elements seem to be constants, because the list was never modified" 151 | in obs_list.message_handler.messages 152 | ) 153 | 154 | obs_list = ObservableList(["a", "b", "c"]) 155 | obs_list.appended = True 156 | obs_list.extended = True 157 | obs_list.removed = True 158 | obs_list.inserted = True 159 | obs_list.check_tuple_instead_of_list() 160 | assert ( 161 | "Consider using a tuple since all elements seem to be constants, because the list was never modified" 162 | not in obs_list.message_handler.messages 163 | ) 164 | 165 | 166 | def test_different_ways_of_set_initialization(): 167 | assert isinstance(ObservableSet({1, 2, 3}), set) 168 | assert isinstance(ObservableSet(set({1, 2, 3})), set) 169 | 170 | 171 | def test_original_set_behavior(): 172 | obs_set = ObservableSet({1, 2, 3}) 173 | regular_set = {1, 2, 3} 174 | assert obs_set == regular_set 175 | assert len(obs_set) == len(regular_set) 176 | assert set(obs_set) == regular_set 177 | assert 2 in obs_set 178 | assert 4 not in obs_set 179 | obs_set.add(4) 180 | regular_set.add(4) 181 | assert obs_set == regular_set 182 | obs_set.discard(3) 183 | regular_set.discard(3) 184 | assert obs_set == regular_set 185 | obs_set.clear() 186 | regular_set.clear() 187 | assert len(obs_set) == 0 188 | assert len(regular_set) == 0 189 | obs_set.update({4, 5, 6}) 190 | regular_set.update({4, 5, 6}) 191 | assert obs_set == regular_set 192 | obs_set.remove(4) 193 | regular_set.remove(4) 194 | assert obs_set == regular_set 195 | popped_obs = obs_set.pop() 196 | popped_regular = regular_set.pop() 197 | assert popped_obs == popped_regular 198 | diff_obs = obs_set.difference({5, 6}) 199 | diff_regular = regular_set.difference({5, 6}) 200 | assert diff_obs == diff_regular 201 | union_obs = obs_set.union({6, 7}) 202 | union_regular = regular_set.union({6, 7}) 203 | assert union_obs == union_regular 204 | intersection_obs = obs_set.intersection({5, 6, 7}) 205 | intersection_regular = regular_set.intersection({5, 6, 7}) 206 | assert intersection_obs == intersection_regular 207 | sym_diff_obs = obs_set.symmetric_difference({6, 7, 8}) 208 | sym_diff_regular = regular_set.symmetric_difference({6, 7, 8}) 209 | assert sym_diff_obs == sym_diff_regular 210 | 211 | 212 | def check_observable_set_additional_attributes_test(): 213 | obs_set = ObservableSet({1, 2, 3}) 214 | assert hasattr(obs_set, "poped") 215 | assert hasattr(obs_set, "removed") 216 | assert hasattr(obs_set, "added") 217 | assert hasattr(obs_set, "updated") 218 | assert hasattr(obs_set, "if_it_was_a_list") 219 | assert hasattr(obs_set, "message_handler") 220 | 221 | 222 | def test_check_frozenset_instead_of_set(): 223 | obs_set = ObservableSet({1, 2, 3}) 224 | obs_set.check_frozenset_instead_of_set() 225 | assert ( 226 | "Consider using a frozenset, because no modification operation has been used on set." 227 | in obs_set.message_handler.messages 228 | ) 229 | 230 | 231 | def test_check_list_instead_of_set(): 232 | obs_set = ObservableSet({}) 233 | obs_set.add(1) 234 | obs_set.add(1) 235 | obs_set.add(1) 236 | obs_set.check_list_instead_of_set() 237 | assert ( 238 | "If you inteded to keep duplicates use a list instead, because we noticed a lot of duplicates entered the set" 239 | in obs_set.message_handler.messages 240 | ) 241 | 242 | 243 | def test_different_ways_of_tuple_initialization(): 244 | assert isinstance(ObservableTuple((1, 2, 3)), tuple) 245 | assert isinstance(ObservableTuple(tuple([1, 2, 3])), tuple) 246 | 247 | 248 | def test_original_tuple_behavior(): 249 | obs_tuple = ObservableTuple((1, 2, 3)) 250 | regular_tuple = (1, 2, 3) 251 | assert obs_tuple == regular_tuple 252 | assert len(obs_tuple) == len(regular_tuple) 253 | assert obs_tuple[1] == regular_tuple[1] 254 | assert obs_tuple[1:3] == regular_tuple[1:3] 255 | assert tuple(obs_tuple) == regular_tuple 256 | assert 2 in obs_tuple 257 | assert 4 not in obs_tuple 258 | concat_obs = obs_tuple + (4, 5) 259 | concat_regular = regular_tuple + (4, 5) 260 | assert concat_obs == concat_regular 261 | repeat_obs = obs_tuple * 2 262 | repeat_regular = regular_tuple * 2 263 | assert repeat_obs == repeat_regular 264 | 265 | 266 | def check_observable_tuple_additional_attributes_test(): 267 | obs_tuple = ObservableTuple((1, 2, 3)) 268 | assert hasattr(obs_tuple, "mul_") 269 | assert hasattr(obs_tuple, "message_handler") 270 | 271 | 272 | def test_check_mutable_inside_tuple(): 273 | obs_tuple = ObservableTuple((1, [2, 3], 4)) 274 | obs_tuple.check_mutable_inside_tuple() 275 | assert ( 276 | "Mutable element inside of a tuple. Consider using only immutables for optimal performance" 277 | in obs_tuple.message_handler.messages 278 | ) 279 | 280 | 281 | def test_check_set_instead_of_tuple(): 282 | obs_tuple = ObservableTuple((1, 2, 3)) 283 | obs_tuple.check_set_instead_of_tuple() 284 | assert ( 285 | "Consider using a set since elements are all unique" 286 | in obs_tuple.message_handler.messages 287 | ) 288 | 289 | 290 | def test_check_tuple_multiplication(): 291 | obs_tuple = ObservableTuple((1, 2, 3)) 292 | _ = obs_tuple * 2 293 | obs_tuple.check_tuple_multiplication() 294 | assert ( 295 | "You multipled the tuple with a scalar value. If you inteded to multiply each element by that value, use a numpy array instead of a tuple." 296 | in obs_tuple.message_handler.messages 297 | ) 298 | 299 | 300 | def test_different_ways_of_dict_initialization(): 301 | assert isinstance(ObservableDict({"a": 1, "b": 2, "c": 3}), dict) 302 | assert isinstance(ObservableDict(dict({"a": 1, "b": 2, "c": 3})), dict) 303 | 304 | 305 | def test_original_dict_behavior(): 306 | obs_dict = ObservableDict({"a": 1, "b": 2, "c": 3}) 307 | regular_dict = {"a": 1, "b": 2, "c": 3} 308 | assert obs_dict == regular_dict 309 | assert len(obs_dict) == len(regular_dict) 310 | assert obs_dict["a"] == regular_dict["a"] 311 | obs_dict["d"] = 4 312 | assert obs_dict == {"a": 1, "b": 2, "c": 3, "d": 4} 313 | del obs_dict["a"] 314 | assert obs_dict == {"b": 2, "c": 3, "d": 4} 315 | regular_dict["d"] = 4 316 | assert regular_dict == {"a": 1, "b": 2, "c": 3, "d": 4} 317 | del regular_dict["a"] 318 | assert regular_dict == {"b": 2, "c": 3, "d": 4} 319 | popped_item_obs = obs_dict.popitem() 320 | popped_item_reg = regular_dict.popitem() 321 | assert popped_item_obs == popped_item_reg 322 | popped_obs = obs_dict.pop("b") 323 | popped_reg = regular_dict.pop("b") 324 | assert popped_obs == popped_reg 325 | obs_dict.update({"e": 5}) 326 | print(obs_dict) 327 | assert obs_dict == {"c": 3, "e": 5} 328 | regular_dict.update({"e": 5}) 329 | assert regular_dict == {"c": 3, "e": 5} 330 | copy_obs = obs_dict.copy() 331 | assert copy_obs == obs_dict 332 | assert set(obs_dict.keys()) == set(regular_dict.keys()) 333 | assert set(obs_dict.values()) == set(regular_dict.values()) 334 | assert set(obs_dict.items()) == set(regular_dict.items()) 335 | 336 | 337 | def check_observable_dict_additional_attributes_test(): 338 | obs_dict = ObservableDict({"a": 1, "b": 2, "c": 3}) 339 | assert hasattr(obs_dict, "keys_") 340 | assert hasattr(obs_dict, "update_") 341 | assert hasattr(obs_dict, "setitem_") 342 | assert hasattr(obs_dict, "delitem_") 343 | assert hasattr(obs_dict, "getitem_") 344 | assert hasattr(obs_dict, "pop_") 345 | assert hasattr(obs_dict, "items_") 346 | assert hasattr(obs_dict, "clear_") 347 | assert hasattr(obs_dict, "values_") 348 | assert hasattr(obs_dict, "message_handler") 349 | 350 | 351 | def test_check_Counter_instead_of_dict(): 352 | obs_dict = ObservableDict(a=1, b=2, c=3) 353 | obs_dict.check_Counter_instead_of_dict() 354 | assert ( 355 | "If you are using this dict to store occurences of elements, consider using a collections.Counter" 356 | in obs_dict.message_handler.messages 357 | ) 358 | 359 | 360 | def test_check_dict_get_method(): 361 | obs_dict = ObservableDict(a=1, b=2, c=3) 362 | _ = obs_dict["a"] 363 | obs_dict.check_dict_get_method() 364 | assert ( 365 | "For dict key retreval, always consider using 'your_dict'.get('key') instead of 'your_dict'['key']" 366 | in obs_dict.message_handler.messages 367 | ) 368 | 369 | 370 | def test_check_list_instead_of_dict(): 371 | obs_dict = ObservableDict(a=1, b=2, c=3) 372 | _ = obs_dict.values() 373 | obs_dict.check_list_instead_of_dict() 374 | assert ( 375 | "It seems like you never used this dict for anything otherthan somehow using the values, use a list/array" 376 | in obs_dict.message_handler.messages 377 | ) 378 | 379 | 380 | def test_check_array_data_type(): 381 | arr = numpy.array([1, 2, 3], dtype=numpy.int64) 382 | obs_array = ObservableNumpyArray(arr) 383 | obs_array.check_array_data_type() 384 | assert ( 385 | "Array was initiated with int64 integers, but values do not exceed 3. Consider using uint8 for optimization." 386 | in obs_array.message_handler.messages 387 | ) 388 | 389 | 390 | def test_check_array_sparsity(): 391 | arr = numpy.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0]) 392 | obs_array = ObservableNumpyArray(arr) 393 | obs_array.check_array_sparsity() 394 | assert ( 395 | "The array is highly sparse (sparsity: 90.00%). Consider using a sparse array representation for memory efficiency." 396 | in obs_array.message_handler.messages 397 | ) 398 | 399 | 400 | def test_check_for_nan_values(): 401 | arr = numpy.array([1, numpy.nan, 3]) 402 | obs_array = ObservableNumpyArray(arr) 403 | obs_array.check_for_nan_values() 404 | assert ( 405 | "The array contains NaN values. Consider using masked arrays or handling NaN values appropriately." 406 | in obs_array.message_handler.messages 407 | ) 408 | 409 | 410 | def test_check_for_monotonicity(): 411 | arr = numpy.array([1, 2, 3, 4, 5]) 412 | obs_array = ObservableNumpyArray(arr) 413 | obs_array.check_for_monotonicity() 414 | assert ( 415 | "The array is monotonic. Consider using specialized algorithms or data structures for monotonic arrays." 416 | in obs_array.message_handler.messages 417 | ) 418 | 419 | 420 | def test_check_for_categorical_data(): 421 | arr = numpy.array( 422 | ["dog", "cat", "dog", "bird", "dog", "cat", "bird", "bird", "cat"] 423 | ) 424 | 425 | obs_array = ObservableNumpyArray(arr) 426 | obs_array.check_for_categorical_data() 427 | assert ( 428 | "The array contains categorical data with 3 unique values. Consider using categorical data types for efficiency, like pd.Categorical()" 429 | in obs_array.message_handler.messages 430 | ) 431 | 432 | 433 | def test_check_for_symmetry(): 434 | arr = numpy.array([[1, 2], [2, 1]]) 435 | obs_array = ObservableNumpyArray(arr) 436 | obs_array.check_for_symmetry() 437 | assert ( 438 | "The array is symmetric. Consider using specialized algorithms to operate on symmetric arrays, for example functions from scipy" 439 | in obs_array.message_handler.messages 440 | ) 441 | 442 | 443 | def test_check_for_constant_values(): 444 | arr = numpy.array([1, 1, 1, 1]) 445 | obs_array = ObservableNumpyArray(arr) 446 | obs_array.check_for_constant_values() 447 | assert ( 448 | "All elements in the array are the same. Consider using a single value, a constant or collections.Counter for memory efficiency." 449 | in obs_array.message_handler.messages 450 | ) 451 | 452 | 453 | def test_check_for_missing_values(): 454 | df = pd.DataFrame({"A": [1, 2, None], "B": [4, 5, 6]}) 455 | observable_df = ObservablePandasDataFrame(df) 456 | observable_df.check_for_missing_values() 457 | assert ( 458 | "The DataFrame contains missing values. Consider handling missing values." 459 | in observable_df.message_handler.messages 460 | ) 461 | 462 | 463 | def test_check_for_constant_columns(): 464 | df = pd.DataFrame({"A": [1, 1, 1], "B": [4, 5, 6]}) 465 | observable_df = ObservablePandasDataFrame(df) 466 | observable_df.check_for_constant_columns() 467 | assert ( 468 | "The DataFrame contains constant columns (['A']). Consider dropping them for memory efficiency." 469 | in observable_df.message_handler.messages 470 | ) 471 | 472 | 473 | def test_check_for_duplicate_rows(): 474 | df = pd.DataFrame({"A": [1, 2, 2], "B": [4, 5, 5]}) 475 | observable_df = ObservablePandasDataFrame(df) 476 | observable_df.check_for_duplicate_rows() 477 | assert ( 478 | "The DataFrame contains duplicate rows. Consider handling duplicate rows appropriately." 479 | in observable_df.message_handler.messages 480 | ) 481 | 482 | 483 | def test_check_series_instead_of_dataframe(): 484 | df = pd.DataFrame({"A": [1, 2, 3]}) 485 | observable_df = ObservablePandasDataFrame(df) 486 | observable_df.check_series_insteafd_of_dataframe() 487 | assert ( 488 | "Consider using a Series instead of a DataFrame when you have only one column of data." 489 | in observable_df.message_handler.messages 490 | ) 491 | 492 | 493 | def test_check_numpy_instead_of_dataframe(): 494 | df = pd.DataFrame({"A": range(15000), "B": range(15000)}) 495 | observable_df = ObservablePandasDataFrame(df) 496 | observable_df.check_numpy_instead_of_dataframe() 497 | assert ( 498 | "Consider using a NumPy array or a specialized data structure if you have a large number of rows and a small number of columns." 499 | in observable_df.message_handler.messages 500 | ) 501 | 502 | 503 | def test_check_for_excessive_nesting(): 504 | InnerTuple = namedtuple("InnerTuple", "field1 field2") 505 | OuterTuple = namedtuple("OuterTuple", "inner") 506 | outer_instance = OuterTuple(InnerTuple(1, 2)) 507 | observable_tuple = ObservableNamedTuple(outer_instance) 508 | observable_tuple.check_for_excessive_nesting() 509 | assert ( 510 | "Avoid excessive nesting of namedtuples to keep the structure simple and readable. Consider usina a class instead" 511 | in observable_tuple.message_handler.messages 512 | ) 513 | 514 | 515 | def test_check_for_ignoring_type_annotations(): 516 | MyTuple = namedtuple("MyTuple", "field1 field2") 517 | my_tuple_instance = MyTuple(1, 2) 518 | observable_tuple = ObservableNamedTuple(my_tuple_instance) 519 | observable_tuple.check_for_ignoring_type_annotations() 520 | assert ( 521 | "Consider using type annotations for field in namedtuples for better documentation." 522 | in observable_tuple.message_handler.messages 523 | ) 524 | 525 | 526 | def test_check_for_ignoring_namedtuple_advantages(): 527 | ManyFields = namedtuple("ManyFields", " ".join(f"field{i}" for i in range(11))) 528 | many_fields_instance = ManyFields(*(range(11))) 529 | observable_tuple = ObservableNamedTuple(many_fields_instance) 530 | observable_tuple.check_for_ignoring_namedtuple_advantages() 531 | assert ( 532 | "Consider using namedtuples for simpler data structures with fewer fields for better readability." 533 | in observable_tuple.message_handler.messages 534 | ) 535 | -------------------------------------------------------------------------------- /pyggester/observables.py: -------------------------------------------------------------------------------- 1 | from _collections_abc import dict_items, dict_keys, dict_values 2 | from typing import List, Tuple, Dict, Any, Iterable 3 | from collections import namedtuple 4 | import numpy 5 | from pyggester.message_handler import MessageHandler 6 | import array 7 | import scipy.sparse as sp 8 | import inspect 9 | from typing import List, Dict, Any, Tuple, Set, NamedTuple 10 | 11 | # TODO MIGHT CONSIDER CREATING AN OBSERVABLE ABSTRACT BASE CLASS, 12 | # TO MAKE EACH OBSERVABLE FOLLOW A SPECIFIC CONTRACT 13 | 14 | 15 | class ObservableList(list): 16 | """ 17 | The ObservableList is an enhanced version of a list that 18 | preserves the full original functionality of a list, but 19 | adds more features to it so that we keep track of anything that 20 | potentially happens in order to do dynamic analysis to each declared 21 | list. 22 | """ 23 | 24 | __slots__: Tuple[str] = ( 25 | "appended", 26 | "extended", 27 | "inserted", 28 | "removed", 29 | "count_", 30 | "in_operator_used", 31 | "message_handler", 32 | ) 33 | 34 | def __init__(self, *args, **kwargs) -> None: 35 | super().__init__(*args, **kwargs) 36 | # The following methods keep track of base list methods. 37 | # False if not used(ever), True if used 38 | self.appended: bool = False 39 | self.extended: bool = False 40 | self.inserted: bool = False 41 | self.removed: bool = False 42 | self.count_: bool = False 43 | self.in_operator_used: bool = False 44 | """ 45 | Get the context of the current list being analyzed 46 | """ 47 | caller_frame = inspect.currentframe().f_back 48 | line_number: int = caller_frame.f_lineno 49 | file_path: str = caller_frame.f_globals["__file__"] 50 | 51 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 52 | 53 | def append(self, item) -> None: 54 | super().append(item) 55 | self.appended = True 56 | 57 | def extend(self, iterable) -> None: 58 | super().extend(iterable) 59 | self.extended = True 60 | 61 | def insert(self, index, item) -> None: 62 | super().insert(index, item) 63 | self.inserted = True 64 | 65 | def remove(self, item) -> None: 66 | super().remove(item) 67 | self.removed = True 68 | 69 | def count(self, __value: Any) -> int: 70 | self.count_ = True 71 | return super().count(__value) 72 | 73 | def __contains__(self, __key: object) -> bool: 74 | self.in_operator_used = True 75 | return super().__contains__(__key) 76 | 77 | def get_list_dimension(self, lst): 78 | """ """ 79 | if not isinstance(lst, list): 80 | return 0 81 | else: 82 | inner_dimensions = [self.get_list_dimension(item) for item in lst] 83 | return 1 + max(inner_dimensions, default=0) 84 | 85 | def check_numpy_array_instead_of_list(self): 86 | """ """ 87 | try: 88 | if self.get_list_dimension(self) >= 2: 89 | numpy.array(self) 90 | self.message_handler.messages.append( 91 | "Consider using a numpy array instead of a list, for faster computations and optimized memory utilization" 92 | ) 93 | except Exception: 94 | pass 95 | 96 | def check_array_instead_of_list(self): 97 | if self.can_list_be_converted_to_array(): 98 | self.message_handler.messages.append( 99 | "Consider using an array.array instead of a list, for optimal memory consumption" 100 | ) 101 | 102 | def can_list_be_converted_to_array(self): 103 | """ 104 | Check if the list can be converted to an array. 105 | 106 | Returns: 107 | bool: True if the list can be converted, False otherwise. 108 | """ 109 | if all(isinstance(item, int) for item in self): 110 | return True 111 | elif all(isinstance(item, float) for item in self): 112 | return True 113 | elif all(isinstance(item, str) and len(item) == 1 for item in self): 114 | try: 115 | array.array("u", self) 116 | return True 117 | except ValueError: 118 | return False 119 | else: 120 | return False 121 | 122 | def check_list_to_set_conversion(self): 123 | """ 124 | Check if the list can be converted to a set. 125 | 126 | Returns: 127 | bool: True if the list can be converted, False otherwise. 128 | """ 129 | if self.get_list_dimension(self) == 1: 130 | if len(self) == len(set(list(self))): 131 | return True 132 | return False 133 | 134 | def check_set_instead_of_list(self): 135 | if self.check_list_to_set_conversion(): 136 | if self.in_operator_used: 137 | self.message_handler.messages.append( 138 | "Consider using a set instead of a list, because of unique elements and element existence checking" 139 | ) 140 | else: 141 | self.message_handler.messages.append( 142 | "Consider using a set instead of a list, because of unique elements" 143 | ) 144 | 145 | def check_Counter_insteaf_of_list(self): 146 | if self.count_: 147 | self.message_handler.messages.append( 148 | "Consider using a collections.Counter, to count occurences of elements" 149 | ) 150 | 151 | def check_tuple_instead_of_list(self): 152 | all__ = [] 153 | for x in self: 154 | if isinstance(x, str): 155 | if x.isupper() or x[0].isupper(): 156 | all__.append(True) 157 | 158 | if len(all__) == len(self) and not any( 159 | [self.appended, self.extended, self.removed, self.inserted] 160 | ): 161 | self.message_handler.messages.append( 162 | "Consider using a tuple since all elements seem to be constants, because the list was never modified" 163 | ) 164 | 165 | def run(self): 166 | """ 167 | Only run checkers so that we offer a better running interface 168 | for each observable. 169 | 170 | Added checkers should be called here in sequence 171 | Might need to refactor this to add priority levels and maybe 172 | only give a single suggestion, but that needs way more specific analysis 173 | """ 174 | self.check_array_instead_of_list() 175 | self.check_numpy_array_instead_of_list() 176 | self.check_set_instead_of_list() 177 | self.check_Counter_insteaf_of_list() 178 | self.message_handler.print_messages() 179 | 180 | 181 | class ObservableSet(set): 182 | """ 183 | The ObservableSet is an enhanced version of a set that 184 | preserves the full original functionality of a set, but 185 | adds more features to it so that we keep track of anything that 186 | potentially happens in order to do dynamic analysis to each declared 187 | set. 188 | """ 189 | 190 | __slots__: Tuple[set] = ( 191 | "poped", 192 | "removed", 193 | "added", 194 | "updated", 195 | "message_handler", 196 | "if_it_was_a_list", 197 | ) 198 | 199 | def __init__(self, iterable=None) -> None: 200 | super().__init__(iterable) 201 | self.poped: bool = False 202 | self.removed: bool = False 203 | self.added: bool = False 204 | self.updated: bool = False 205 | self.if_it_was_a_list: List[Any] = [] 206 | 207 | caller_frame = inspect.currentframe().f_back 208 | line_number: int = caller_frame.f_lineno 209 | file_path: str = caller_frame.f_globals["__file__"] 210 | 211 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 212 | 213 | def add(self, element: Any) -> None: 214 | super().add(element) 215 | self.added = True 216 | self.if_it_was_a_list.append(element) 217 | 218 | def pop(self) -> Any: 219 | self.poped = True 220 | return super().pop() 221 | 222 | def remove(self, element: Any) -> None: 223 | super().remove(element) 224 | self.removed = True 225 | 226 | def update(self, *others: Iterable) -> None: 227 | super().update(*others) 228 | self.updated = True 229 | for elem_ in others: 230 | self.if_it_was_a_list.append(elem_) 231 | 232 | def check_frozenset_instead_of_set(self): 233 | if not any([self.added, self.removed, self.updated, self.poped]): 234 | self.message_handler.messages.append( 235 | "Consider using a frozenset, because no modification operation has been used on set." 236 | ) 237 | 238 | def check_list_instead_of_set(self): 239 | """ 240 | The suggestion here is quite subjective. 241 | NOTE: Might need to refactor this one 242 | """ 243 | if len(self.if_it_was_a_list) > 1.2 * len(self) and any( 244 | [self.added, self.removed, self.updated, self.poped] 245 | ): 246 | self.message_handler.messages.append( 247 | "If you inteded to keep duplicates use a list instead, because we noticed a lot of duplicates entered the set" 248 | ) 249 | 250 | def run(self): 251 | self.check_frozenset_instead_of_set() 252 | self.check_list_instead_of_set() 253 | self.message_handler.print_messages() 254 | 255 | 256 | class ObservableTuple(tuple): 257 | """ 258 | The ObservableTuple is an enhanced version of a tuple that 259 | preserves the full original functionality of a tuple, but 260 | adds more features to it so that we keep track of anything that 261 | potentially happens in order to do dynamic analysis to each declared 262 | tuple. 263 | """ 264 | 265 | def __new__(cls, *args, **kwargs): 266 | return super().__new__(cls, *args) 267 | 268 | def __init__(self, *args: Any, **kwargs) -> None: 269 | super().__init__() 270 | self.mul_: bool = False 271 | 272 | caller_frame = inspect.currentframe().f_back 273 | line_number: int = caller_frame.f_lineno 274 | file_path: str = caller_frame.f_globals["__file__"] 275 | 276 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 277 | 278 | def __mul__(self, n: int) -> "ObservableTuple": 279 | self.mul_ = True 280 | result = super().__mul__(n) 281 | return result 282 | 283 | def check_mutable_inside_tuple(self) -> None: 284 | for elem_ in self: 285 | if isinstance(elem_, (list, dict, set)): 286 | self.message_handler.messages.append( 287 | "Mutable element inside of a tuple. Consider using only immutables for optimal performance" 288 | ) 289 | 290 | def check_set_instead_of_tuple(self) -> None: 291 | try: 292 | if len(set(tuple(self))) == len(self): 293 | self.message_handler.messages.append( 294 | "Consider using a set since elements are all unique" 295 | ) 296 | except Exception: 297 | pass 298 | 299 | def check_tuple_multiplication(self) -> None: 300 | if self.mul_: 301 | self.message_handler.messages.append( 302 | "You multipled the tuple with a scalar value. If you inteded to multiply each element by that value, use a numpy array instead of a tuple." 303 | ) 304 | 305 | def run(self) -> None: 306 | self.check_mutable_inside_tuple() 307 | self.check_tuple_multiplication() 308 | self.check_set_instead_of_tuple() 309 | self.message_handler.print_messages() 310 | 311 | 312 | class ObservableDict(dict): 313 | """ 314 | The ObservableDict is an enhanced version of a dict that 315 | preserves the full original functionality of a dict, but 316 | adds more features to it so that we keep track of anything that 317 | potentially happens in order to do dynamic analysis to each declared 318 | dict. 319 | """ 320 | 321 | __slots__: Tuple[str] = ( 322 | "keys_", 323 | "update_", 324 | "setitem_", 325 | "delitem_", 326 | "getitem_", 327 | "pop_", 328 | "items_", 329 | "clear_", 330 | "values_", 331 | "message_handler", 332 | ) 333 | 334 | def __init__(self, *args, **kwargs) -> None: 335 | super().__init__(*args, **kwargs) 336 | self.keys_: bool = False 337 | self.update_: bool = False 338 | self.setitem_: bool = False 339 | self.delitem_: bool = False 340 | self.getitem_: bool = False 341 | self.pop_: bool = False 342 | self.items_: bool = False 343 | self.clear_: bool = False 344 | self.values_: bool = False 345 | 346 | caller_frame = inspect.currentframe().f_back 347 | line_number: int = caller_frame.f_lineno 348 | file_path: str = caller_frame.f_globals["__file__"] 349 | 350 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 351 | 352 | def __setitem__(self, key, value) -> None: 353 | super().__setitem__(key, value) 354 | self.setitem_ = True 355 | 356 | def __delitem__(self, key) -> None: 357 | super().__delitem__(key) 358 | self.delitem_ = True 359 | 360 | def __getitem__(self, __key: Any) -> Any: 361 | self.getitem_ = True 362 | return super().__getitem__(__key) 363 | 364 | def clear(self) -> None: 365 | super().clear() 366 | self.clear_ = True 367 | 368 | def pop(self, key, default=None) -> "ObservableDict": 369 | result = super().pop(key, default) 370 | return result 371 | 372 | def popitem(self) -> "ObservableDict": 373 | result = super().popitem() 374 | return result 375 | 376 | def update(self, *args, **kwargs) -> None: 377 | super().update(*args, **kwargs) 378 | self.update_ = True 379 | 380 | def setdefault(self, key, default=None) -> "ObservableDict": 381 | result = super().setdefault(key, default) 382 | return result 383 | 384 | def copy(self) -> "ObservableDict": 385 | result = super().copy() 386 | return result 387 | 388 | def keys(self) -> dict_keys: 389 | self.keys_ = True 390 | return super().keys() 391 | 392 | def values(self) -> dict_values: 393 | self.values_ = True 394 | return super().values() 395 | 396 | def items(self) -> dict_items: 397 | self.items_ = True 398 | return super().items() 399 | 400 | def check_Counter_instead_of_dict(self) -> None: 401 | if all([True for value in self.values() if isinstance(value, int)]): 402 | self.message_handler.messages.append( 403 | "If you are using this dict to store occurences of elements, consider using a collections.Counter" 404 | ) 405 | 406 | def check_dict_get_method(self) -> None: 407 | if self.getitem_: 408 | self.message_handler.messages.append( 409 | "For dict key retreval, always consider using 'your_dict'.get('key') instead of 'your_dict'['key']" 410 | ) 411 | 412 | def check_list_instead_of_dict(self) -> None: 413 | """ 414 | Suggest to use a list when a dict seems to not be used optimally 415 | """ 416 | if (not any([self.getitem_, self.keys_, self.items_]) and self.values_) or ( 417 | not any([self.getitem_, self.items_, self.values_]) and self.keys_ 418 | ): 419 | self.message_handler.messages.append( 420 | "It seems like you never used this dict for anything otherthan somehow using the values, use a list/array" 421 | ) 422 | 423 | def run(self) -> None: 424 | self.check_Counter_instead_of_dict() 425 | self.check_dict_get_method() 426 | self.check_list_instead_of_dict() 427 | self.message_handler.print_messages() 428 | 429 | 430 | class ObservableNumpyArray: 431 | """ 432 | The ObservableNumpyArray is a numpy analyzer that takes the declared numpy array 433 | and does internal attribute and value checkings for potential improvement suggestions. 434 | """ 435 | 436 | __slots__: Tuple[str] = ("arr__", "message_handler") 437 | 438 | def __init__(self, arr__) -> None: 439 | self.arr__ = arr__ 440 | 441 | caller_frame = inspect.currentframe().f_back 442 | line_number: int = caller_frame.f_lineno 443 | file_path: str = caller_frame.f_globals["__file__"] 444 | 445 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 446 | 447 | def check_array_data_type(self) -> None: 448 | """ """ 449 | current_dtype = self.arr__.dtype 450 | min_dtype = numpy.min_scalar_type(numpy.max(self.arr__)) 451 | max_number = numpy.max(self.arr__) 452 | if current_dtype != min_dtype: 453 | self.message_handler.messages.append( 454 | f"Array was initiated with {current_dtype} integers, but values do not exceed {max_number}. Consider using {min_dtype} for optimization." 455 | ) 456 | 457 | def check_array_sparsity(self, threshold: float = 0.8) -> None: 458 | """Suggests using sparse arrays for highly sparse data to save memory.""" 459 | 460 | sparsity = 1.0 - numpy.count_nonzero(self.arr__) / float(self.arr__.size) 461 | if sparsity > threshold: 462 | try: 463 | _ = sp.csr_matrix(self.arr__) 464 | self.message_handler.messages.append( 465 | f"The array is highly sparse (sparsity: {sparsity:.2%}). Consider using a sparse array representation for memory efficiency." 466 | ) 467 | except Exception: 468 | pass 469 | 470 | def check_for_nan_values(self) -> None: 471 | """Suggests using masked arrays or handling NaN values.""" 472 | 473 | if numpy.isnan(self.arr__).any(): 474 | try: 475 | _ = numpy.ma.masked_array(self.arr__, mask=numpy.isnan(self.arr__)) 476 | self.message_handler.messages.append( 477 | "The array contains NaN values. Consider using masked arrays or handling NaN values appropriately." 478 | ) 479 | except Exception: 480 | pass 481 | 482 | def check_for_monotonicity(self) -> None: 483 | """Suggests using specialized algorithms or data structures for monotonic arrays.""" 484 | 485 | if numpy.all(numpy.diff(self.arr__) >= 0) or numpy.all( 486 | numpy.diff(self.arr__) <= 0 487 | ): 488 | self.message_handler.messages.append( 489 | "The array is monotonic. Consider using specialized algorithms or data structures for monotonic arrays." 490 | ) 491 | 492 | def check_for_categorical_data(self) -> None: 493 | """Suggests using categorical data types for arrays with a small number of unique values.""" 494 | 495 | unique_values_count = len(numpy.unique(self.arr__)) 496 | if unique_values_count < len(self.arr__) / 2: 497 | self.message_handler.messages.append( 498 | f"The array contains categorical data with {unique_values_count} unique values. Consider using categorical data types for efficiency, like pd.Categorical()" 499 | ) 500 | 501 | def check_for_symmetry(self) -> None: 502 | """Suggests using specialized algorithms or data structures for symmetric arrays.""" 503 | if numpy.array_equal(self.arr__, self.arr__.T): 504 | self.message_handler.messages.append( 505 | "The array is symmetric. Consider using specialized algorithms to operate on symmetric arrays, for example functions from scipy" 506 | ) 507 | 508 | def check_for_constant_values(self) -> None: 509 | """Suggests using a single value or a constant data type if all elements are the same.""" 510 | if numpy.all(self.arr__ == self.arr__[0]): 511 | self.message_handler.messages.append( 512 | "All elements in the array are the same. Consider using a single value, a constant or collections.Counter for memory efficiency." 513 | ) 514 | 515 | def run(self) -> None: 516 | self.check_array_data_type() 517 | self.check_array_sparsity() 518 | self.check_for_categorical_data() 519 | self.check_for_constant_values() 520 | self.check_for_nan_values() 521 | self.check_for_monotonicity() 522 | self.check_for_symmetry() 523 | self.message_handler.print_messages() 524 | 525 | 526 | class ObservablePandasDataFrame: 527 | """ 528 | The ObservablePandasDataFrame is a Pandas DataFrame analyzer that takes the declared DataFrame 529 | and does internal attribute and value checkings for potential improvement suggestions. 530 | """ 531 | 532 | __slots__ = ("df__", "message_handler") 533 | 534 | def __init__(self, df__) -> None: 535 | self.df__ = df__ 536 | 537 | caller_frame = inspect.currentframe().f_back 538 | line_number: int = caller_frame.f_lineno 539 | file_path: str = caller_frame.f_globals["__file__"] 540 | 541 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 542 | 543 | def check_for_missing_values(self) -> None: 544 | """Suggests handling missing values appropriately.""" 545 | 546 | if self.df__.isnull().any().any(): 547 | self.message_handler.messages.append( 548 | "The DataFrame contains missing values. Consider handling missing values." 549 | ) 550 | 551 | def check_for_constant_columns(self) -> None: 552 | """Suggests dropping constant columns for memory efficiency.""" 553 | 554 | constant_columns = self.df__.columns[self.df__.nunique() == 1] 555 | if constant_columns.any(): 556 | self.message_handler.messages.append( 557 | f"The DataFrame contains constant columns ({constant_columns.tolist()}). Consider dropping them for memory efficiency." 558 | ) 559 | 560 | def check_for_duplicate_rows(self) -> None: 561 | """Suggests handling duplicate rows appropriately.""" 562 | 563 | if self.df__.duplicated().any(): 564 | self.message_handler.messages.append( 565 | "The DataFrame contains duplicate rows. Consider handling duplicate rows appropriately." 566 | ) 567 | 568 | def check_series_insteafd_of_dataframe(self) -> None: 569 | """Suggests using alternative data structures for specific scenarios.""" 570 | if len(self.df__.columns) == 1: 571 | self.message_handler.messages.append( 572 | "Consider using a Series instead of a DataFrame when you have only one column of data." 573 | ) 574 | 575 | def check_numpy_instead_of_dataframe(self) -> None: 576 | """""" 577 | if len(self.df__.index) > 10000 and len(self.df__.columns) < 5: 578 | self.message_handler.messages.append( 579 | "Consider using a NumPy array or a specialized data structure if you have a large number of rows and a small number of columns." 580 | ) 581 | 582 | def run(self) -> None: 583 | self.check_for_constant_columns() 584 | self.check_for_duplicate_rows() 585 | self.check_for_missing_values() 586 | self.check_numpy_instead_of_dataframe() 587 | self.check_series_insteafd_of_dataframe() 588 | self.message_handler.print_messages() 589 | 590 | 591 | class ObservableNamedTuple: 592 | """ 593 | The ObservableNamedTuple is an enhanced version of a namedtuple that 594 | preserves the full original functionality of a namedtuple, but 595 | adds more features to it so that we keep track of anything that 596 | potentially happens in order to do dynamic analysis to each declared 597 | namedtuple. 598 | """ 599 | 600 | __slots__: Tuple[set] = ("namedtuple__", "message_handler") 601 | 602 | def __init__(self, namedtuple__) -> None: 603 | self.namedtuple__ = namedtuple__ 604 | 605 | caller_frame = inspect.currentframe().f_back 606 | line_number: int = caller_frame.f_lineno 607 | file_path: str = caller_frame.f_globals["__file__"] 608 | 609 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path) 610 | 611 | def check_for_excessive_nesting(self) -> None: 612 | """Suggests avoiding excessive nesting of namedtuples.""" 613 | 614 | for field_name in self.namedtuple__._fields: 615 | if isinstance(getattr(self.namedtuple__, field_name), tuple): 616 | self.message_handler.messages.append( 617 | "Avoid excessive nesting of namedtuples to keep the structure simple and readable. Consider usina a class instead" 618 | ) 619 | break 620 | 621 | def check_for_ignoring_type_annotations(self) -> None: 622 | """Suggests using type annotations to document the expected types of each field.""" 623 | class_annotations = getattr(self.namedtuple__, "__annotations__", {}) 624 | if not class_annotations: 625 | self.message_handler.messages.append( 626 | "Consider using type annotations for field in namedtuples for better documentation." 627 | ) 628 | 629 | def check_for_ignoring_namedtuple_advantages(self) -> None: 630 | """Suggests taking advantage of the simplicity of namedtuples.""" 631 | 632 | if len(self.namedtuple__._fields) > 10: 633 | self.message_handler.messages.append( 634 | "Consider using namedtuples for simpler data structures with fewer fields for better readability." 635 | ) 636 | 637 | def run(self): 638 | self.check_for_ignoring_type_annotations() 639 | self.check_for_ignoring_namedtuple_advantages() 640 | self.check_for_excessive_nesting() 641 | self.message_handler.print_messages() 642 | --------------------------------------------------------------------------------