├── VERSION
├── tests
├── __init__.py
├── test_file.py
├── test_file_transformed.py
├── test_cli.py
├── test_main.py
├── test_helpers.py
├── test_observable_transformations.py
├── test_pyggester.py
├── test_command_handlers.py
├── test_module_importer.py
├── test_wrappers.py
└── test_observables.py
├── pyggester
├── __init__.py
├── data
│ └── help_files
│ │ ├── __init__.py
│ │ ├── static_helper.md
│ │ └── dynamic_helper.md
├── observable_collector.py
├── text_formatters.py
├── main.py
├── message_handler.py
├── helpers.py
├── cli.py
├── command_handlers.py
├── module_importer.py
├── observable_transformations.py
├── pyggester.py
├── wrappers.py
└── observables.py
├── pytest.ini
├── pyggester_logo.png
├── pyggester-abstract-execution-flow.png
├── requirements.txt
├── setup.py
├── LICENSE
├── .gitignore
├── contributing.md
└── README.md
/VERSION:
--------------------------------------------------------------------------------
1 | 1.0.1
--------------------------------------------------------------------------------
/tests/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pyggester/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pyggester/data/help_files/__init__.py:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pyggester/data/help_files/static_helper.md:
--------------------------------------------------------------------------------
1 |
--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | testpaths=tests
3 |
--------------------------------------------------------------------------------
/tests/test_file.py:
--------------------------------------------------------------------------------
1 | def func1():
2 | pass
3 |
--------------------------------------------------------------------------------
/pyggester/observable_collector.py:
--------------------------------------------------------------------------------
1 | OBSERVABLE_COLLECTOR = []
2 |
--------------------------------------------------------------------------------
/pyggester_logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ValdonVitija/pyggester/HEAD/pyggester_logo.png
--------------------------------------------------------------------------------
/pyggester-abstract-execution-flow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/ValdonVitija/pyggester/HEAD/pyggester-abstract-execution-flow.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | astor==0.8.1
2 | click==8.1.7
3 | markdown-it-py==3.0.0
4 | mdurl==0.1.2
5 | numpy==1.26.2
6 | pandas==2.1.4
7 | Pygments==2.17.2
8 | python-dateutil==2.8.2
9 | pytz==2023.3.post1
10 | rich==13.7.0
11 | scipy==1.11.4
12 | six==1.16.0
13 | typer==0.9.0
14 | typing_extensions==4.9.0
15 | tzdata==2023.3
--------------------------------------------------------------------------------
/tests/test_file_transformed.py:
--------------------------------------------------------------------------------
1 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
2 | from pyggester.observables import ObservableList, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple, ObservableNumpyArray, ObservablePandasDataFrame
3 |
4 |
5 | def func1():
6 | pass
7 |
8 |
9 | for observable in OBSERVABLE_COLLECTOR:
10 | observable.run()
11 |
--------------------------------------------------------------------------------
/pyggester/text_formatters.py:
--------------------------------------------------------------------------------
1 | from rich.console import Console
2 | from rich.panel import Panel
3 |
4 |
5 | def custom_print(
6 | message: str = "",
7 | style: str = "bold",
8 | border_style: str = "",
9 | title: str = "",
10 | ):
11 | if message:
12 | panel_ = Panel(
13 | f"[bold yellow]{message}",
14 | style=style,
15 | border_style=border_style,
16 | title=title,
17 | )
18 | Console().print(panel_)
19 |
--------------------------------------------------------------------------------
/tests/test_cli.py:
--------------------------------------------------------------------------------
1 | from typer.testing import CliRunner
2 | from pyggester.cli import app
3 |
4 | runner = CliRunner()
5 |
6 |
7 | def test_static_analysis():
8 | result = runner.invoke(app, ["static", "--path", "test_file.py"])
9 | assert result.exit_code == 0
10 |
11 |
12 | def test_dynamic_transformation():
13 | result = runner.invoke(app, ["transform", "tests/test_file.py"])
14 | assert result.exit_code == 0
15 |
16 |
17 | def test_help():
18 | result = runner.invoke(app, ["--help"])
19 | assert result.exit_code == 0
20 |
--------------------------------------------------------------------------------
/pyggester/main.py:
--------------------------------------------------------------------------------
1 | import sys
2 | from pyggester.cli import get_app
3 |
4 |
5 | PYGGESTER_LOGO = """
6 | _____
7 | _____________ ________ _______ ______________ /_____________
8 | ___ __ \_ / / /_ __ `/_ __ `/ _ \_ ___/ __/ _ \_ ___/
9 | __ /_/ / /_/ /_ /_/ /_ /_/ // __/(__ )/ /_ / __/ /
10 | _ .___/_\__, / _\__, / _\__, / \___//____/ \__/ \___//_/
11 | /_/ /____/ /____/ /____/
12 | """
13 |
14 |
15 | def main():
16 | args = " ".join(sys.argv[1:])
17 | if (not args or "--help" in args) and len(sys.argv) < 3:
18 | print(PYGGESTER_LOGO)
19 | get_app()
20 |
21 |
22 | if __name__ == "__main__":
23 | main()
24 |
--------------------------------------------------------------------------------
/tests/test_main.py:
--------------------------------------------------------------------------------
1 | import sys
2 | import pytest
3 | from unittest.mock import patch
4 | from io import StringIO
5 | from pyggester.main import main, PYGGESTER_LOGO
6 |
7 |
8 | def test_main_with_help():
9 | with patch.object(sys, "argv", ["pyggest"]):
10 | with patch("sys.stdout", new_callable=StringIO) as mock_stdout:
11 | with pytest.raises(SystemExit) as e:
12 | main()
13 |
14 | assert e.value.code == 0
15 | output = mock_stdout.getvalue()
16 | assert PYGGESTER_LOGO in output
17 |
18 |
19 | def test_main_without_help():
20 | with patch.object(sys, "argv", ["pyggest"]):
21 | with patch("pyggester.main.get_app") as mock_get_app:
22 | main()
23 |
24 | mock_get_app.assert_called_once()
25 |
--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 |
3 | import setuptools
4 |
5 | install_requires = []
6 | with open("requirements.txt", "r", encoding="UTF-8") as f_stream:
7 | for pack in f_stream:
8 | install_requires.append(pack)
9 |
10 |
11 | setuptools.setup(
12 | name="pyggester",
13 | version=open("VERSION").read().strip(),
14 | packages=setuptools.find_packages(include=["pyggester", "pyggester.*"]),
15 | long_description=open("README.md").read(),
16 | long_description_content_type="text/markdown",
17 | author="Valdon Vitija",
18 | author_email="valdonvitijaa@gmail.com",
19 | license="MIT",
20 | install_requires=install_requires,
21 | entry_points={
22 | "console_scripts": [
23 | "pyggest=pyggester.main:main",
24 | ],
25 | },
26 | package_data={"pyggester": ["data/*", "data/help_files/*"]},
27 | )
28 |
--------------------------------------------------------------------------------
/pyggester/message_handler.py:
--------------------------------------------------------------------------------
1 | """
2 | Message Handler by default should stream messages into the standard console, but it would be
3 | better if we add the capability of streaming the messages/suggestions into files with different formats
4 | """
5 | from typing import List, Tuple
6 | from pyggester.text_formatters import custom_print
7 |
8 |
9 | class MessageHandler:
10 | __slots__: Tuple[str] = ("messages", "line_nr", "file_path")
11 |
12 | def __init__(self, line_nr, file_path) -> None:
13 | self.messages: List[str] = []
14 | self.line_nr: int = line_nr
15 | self.file_path: str = file_path
16 |
17 | def print_messages(self) -> None:
18 | messages__ = []
19 | if self.messages:
20 | messages__.append(f"{self.line_nr} | Suggestions({self.file_path}):")
21 | for message in self.messages:
22 | messages__.append(f" [*] {message}")
23 | custom_print("\n".join(messages__), border_style="green")
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 ValdonVitijaa
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/tests/test_helpers.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | from pyggester.helpers import (
3 | source_code_to_str,
4 | PathMissingSourceCodeConversionError,
5 | not_implemented,
6 | )
7 |
8 |
9 | @pytest.fixture
10 | def get_single_file_abs_path():
11 | return "/root/pyggester/tests/test_file.py"
12 |
13 |
14 | @pytest.fixture
15 | def get_code_as_str():
16 | code = """def func1():
17 | pass
18 | """
19 | return code
20 |
21 |
22 | @pytest.fixture
23 | def get_code_from_file(get_single_file_abs_path):
24 | with open(get_single_file_abs_path, "r", encoding="UTF-8") as f_stream:
25 | return f_stream.read()
26 |
27 |
28 | def test_source_code_to_str_with_path(get_code_as_str, get_code_from_file):
29 | assert get_code_from_file == get_code_as_str
30 |
31 |
32 | def test_source_code_to_str_without_path():
33 | with pytest.raises(PathMissingSourceCodeConversionError):
34 | source_code_to_str()
35 |
36 |
37 | @not_implemented
38 | def example_function():
39 | pass
40 |
41 |
42 | def test_not_implemented_decorator():
43 | with pytest.raises(
44 | NotImplementedError, match="example_function is not yet implemented"
45 | ):
46 | example_function()
47 |
--------------------------------------------------------------------------------
/tests/test_observable_transformations.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import astor
3 | from pyggester.observable_transformations import (
4 | ObservableCollectorAppender,
5 | ObservableRunner,
6 | apply_observable_collector_transformations,
7 | )
8 |
9 |
10 | def test_observable_collector_appender():
11 | source_code = "list_ = ObservableList([1,2,3])"
12 | tree = ast.parse(source_code)
13 | transformer = ObservableCollectorAppender()
14 | transformed_tree = transformer.visit(tree)
15 |
16 | transformed_code = astor.to_source(transformed_tree)
17 | assert "OBSERVABLE_COLLECTOR.append(list_)" in transformed_code
18 |
19 |
20 | def test_observable_runner():
21 | tree = ast.parse("import module1\nimport module2")
22 | transformer = ObservableRunner()
23 | transformed_tree = transformer.visit(tree)
24 |
25 | transformed_code = astor.to_source(transformed_tree)
26 |
27 | assert "for observable in OBSERVABLE_COLLECTOR:" in transformed_code
28 | assert "observable.run()" in transformed_code
29 |
30 |
31 | def test_apply_observable_collector_transformations():
32 | source_code = "import module1\nimport module2"
33 | tree = ast.parse(source_code)
34 | transformed_code = apply_observable_collector_transformations(
35 | tree, run_observables=True
36 | )
37 |
38 | assert (
39 | "from pyggester.observables import" in transformed_code
40 | or "import pyggester.observables" in transformed_code
41 | )
42 |
--------------------------------------------------------------------------------
/tests/test_pyggester.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import tempfile
3 | import pathlib
4 | from unittest.mock import patch
5 | from pyggester.pyggester import (
6 | PyggesterDynamic,
7 | )
8 |
9 |
10 | @pytest.fixture
11 | def temp_dir():
12 | with tempfile.TemporaryDirectory() as tmpdirname:
13 | yield pathlib.Path(tmpdirname)
14 |
15 |
16 | @pytest.fixture
17 | def temp_file(temp_dir):
18 | temp_file = temp_dir / "test_file.py"
19 | temp_file.write_text("print('Hello, World!')", encoding="UTF-8")
20 | return temp_file
21 |
22 |
23 | def test_initialization():
24 | path = "/path/to/directory"
25 | pyggester = PyggesterDynamic(path)
26 | assert pyggester.path_ == pathlib.Path(path).absolute()
27 |
28 |
29 | def test_existence_check():
30 | with pytest.raises(FileNotFoundError):
31 | pyggester = PyggesterDynamic("/non/existent/path")
32 | pyggester.run()
33 |
34 |
35 | def test_file_transformation(temp_file):
36 | pyggester = PyggesterDynamic(str(temp_file))
37 | pyggester.run()
38 | transformed_file = (
39 | temp_file.parent / f"{temp_file.stem}_transformed{temp_file.suffix}"
40 | )
41 | assert transformed_file.exists()
42 |
43 |
44 | def test_directory_transformation(temp_dir, temp_file):
45 | with patch("builtins.input", return_value="test_file.py"):
46 | pyggester = PyggesterDynamic(str(temp_dir))
47 | pyggester.run()
48 | transformed_dir = temp_dir.parent / f"{temp_dir.name}_transformed"
49 | assert transformed_dir.exists() and transformed_dir.is_dir()
50 |
--------------------------------------------------------------------------------
/tests/test_command_handlers.py:
--------------------------------------------------------------------------------
1 | import pytest
2 | import typer
3 | import unittest
4 | from unittest.mock import patch, Mock
5 | from pyggester.command_handlers import PyggestTransform
6 | from collections import namedtuple
7 |
8 |
9 | @pytest.fixture
10 | def pyggest_transform_instance():
11 | return PyggestTransform(path_="test_path", help_="test_help")
12 |
13 |
14 | def test_pyggest_transform_initialization(pyggest_transform_instance):
15 | assert pyggest_transform_instance.path_ == "test_path"
16 | assert pyggest_transform_instance.help_ == "test_help"
17 |
18 |
19 | class TestPyggestTransform(unittest.TestCase):
20 | @patch("pyggester.pyggester.PyggesterDynamic.run")
21 | def test_process_with_help(self, mock_run):
22 | pyggest_transform = PyggestTransform(path_="your_path", help_=True)
23 | with self.assertRaises(typer.Exit) as context:
24 | pyggest_transform.process()
25 |
26 | assert context.exception.__class__ == typer.Exit
27 |
28 | @patch("pyggester.pyggester.PyggesterDynamic.run")
29 | def test_process_without_help(self, mock_run):
30 | pyggest_transform = PyggestTransform(path_="your_path", help_=False)
31 | pyggest_transform.process()
32 | mock_run.assert_called_once()
33 |
34 | @patch(
35 | "pyggester.pyggester.PyggesterDynamic.run",
36 | side_effect=typer.Exit("Test Exception"),
37 | )
38 | def test_process_exception_handling(self, mock_run):
39 | pyggest_transform = PyggestTransform(path_="your_path", help_=True)
40 | with self.assertRaises(typer.Exit) as context:
41 | pyggest_transform.process()
42 |
43 | self.assertEqual(str(context.exception), "")
44 |
--------------------------------------------------------------------------------
/pyggester/helpers.py:
--------------------------------------------------------------------------------
1 | import pathlib
2 | import os
3 | from functools import lru_cache
4 |
5 |
6 | @lru_cache
7 | def get_help_files_dir() -> pathlib.Path:
8 | """
9 | Get the directory path where help files are located.
10 |
11 | This function returns the directory path as a pathlib.Path object.
12 | The directory is determined relative to the location of the current script.
13 |
14 | Returns:
15 | pathlib.Path: The directory path for help files.
16 | """
17 | help_files_dir = pathlib.Path(
18 | os.path.join(
19 | pathlib.Path(__file__).parent,
20 | "data",
21 | "help_files",
22 | )
23 | )
24 | return help_files_dir
25 |
26 |
27 | class PathMissingSourceCodeConversionError(Exception):
28 | """
29 | Exception Class to be thrown when path misses for source code conversion to str
30 | """
31 |
32 | def __init__(self, *args: object) -> None:
33 | super().__init__(*args)
34 |
35 |
36 | def source_code_to_str(path=None) -> str:
37 | """
38 | Convert module source_code to a multiline string.
39 | By default it converts the source code of the module where this function is being called
40 | """
41 | if not path:
42 | raise PathMissingSourceCodeConversionError()
43 |
44 | with open(path, "r", encoding="UTF-8") as f_stream:
45 | return f_stream.read()
46 |
47 |
48 | def not_implemented(func):
49 | """
50 | Decorator to flag a function as not yet implemented.
51 |
52 | This decorator raises a NotImplementedError when the decorated function is called,
53 | indicating that the function is not yet fully implemented.
54 |
55 | Args:
56 | func (callable): The function to be decorated.
57 |
58 | Returns:
59 | callable: A wrapper function that raises a NotImplementedError.
60 | """
61 |
62 | def wrapper(*args, **kwargs):
63 | raise NotImplementedError(f"{func.__name__} is not yet implemented")
64 |
65 | return wrapper
66 |
--------------------------------------------------------------------------------
/pyggester/cli.py:
--------------------------------------------------------------------------------
1 | """
2 | The structure of this CLI app based on typer:
3 | app (typer) - pyggest:
4 | static - subcommand:
5 | Options: ...
6 | dynamic - subcommand
7 | Options: ...
8 | """
9 |
10 | from functools import lru_cache
11 | from typing import List
12 | import typer
13 | from typing_extensions import Annotated
14 | from pyggester.command_handlers import PyggestTransform
15 | from pyggester.helpers import not_implemented
16 |
17 | __all__: List[str] = ["get_app"]
18 |
19 | app = typer.Typer(no_args_is_help=True)
20 |
21 |
22 | # pylint: disable=W0613
23 | @app.command(no_args_is_help=False, name="static")
24 | def static_analysis(
25 | path_: Annotated[str, typer.Option("--path", help="path to file/files")] = None,
26 | lists_: Annotated[
27 | bool,
28 | typer.Option(
29 | "--lists",
30 | help="Use this option to include lists in analysis",
31 | ),
32 | ] = False,
33 | dicts_: Annotated[
34 | bool,
35 | typer.Option(
36 | "--dicts",
37 | help="Use this option to include dicts in analysis",
38 | ),
39 | ] = False,
40 | sets_: Annotated[
41 | bool,
42 | typer.Option(
43 | "--sets",
44 | help="Use this option to include sets in analysis",
45 | ),
46 | ] = False,
47 | tuples_: Annotated[
48 | bool,
49 | typer.Option(
50 | "--tuples",
51 | help="Use this option to include tuples in analysis",
52 | ),
53 | ] = False,
54 | all_: Annotated[
55 | bool,
56 | typer.Option(
57 | "--all",
58 | help="If you want pyggester to use all its capabilities use this option",
59 | ),
60 | ] = False,
61 | help_: Annotated[
62 | bool, typer.Option("--help", help="Get full documentation")
63 | ] = False,
64 | ):
65 | """
66 | Perform static analysis using PyggestStatic.
67 |
68 | This command allows you to perform static analysis using PyggestStatic, a tool for
69 | analyzing Python code. You can specify various options to customize the analysis.
70 |
71 | """
72 | typer.Exit("Not implemented currently.")
73 |
74 |
75 | @app.command(no_args_is_help=True, name="transform")
76 | def dynamic_transformation(
77 | path_: Annotated[str, typer.Argument(help="path to file/files")] = ".",
78 | help_: Annotated[
79 | bool, typer.Option("--help", help="Get full documentation")
80 | ] = False,
81 | ):
82 | """
83 | Perform dynamic transformation using PyggesterDynamic.
84 | """
85 | command_handler = PyggestTransform(path_=path_, help_=help_)
86 | command_handler.process()
87 |
88 |
89 | @lru_cache
90 | def get_app():
91 | """
92 | Get the main typer cli app
93 | """
94 | return app()
95 |
--------------------------------------------------------------------------------
/pyggester/command_handlers.py:
--------------------------------------------------------------------------------
1 | import abc
2 | import os
3 | import pathlib
4 | import typer
5 | from typing import Dict, List, ClassVar, Union, Tuple
6 | from rich.console import Console
7 | from rich.markdown import Markdown
8 | from enum import Enum, auto
9 | from pyggester.text_formatters import custom_print
10 | from pyggester.helpers import get_help_files_dir
11 | from pyggester.pyggester import PyggesterDynamic
12 |
13 | __all__: List[str] = ["PyggestTransform"]
14 |
15 | README_FILES_DIR: pathlib.Path = get_help_files_dir()
16 |
17 |
18 | class CommandHandler(abc.ABC):
19 | """
20 | Template command handler.
21 | Add as many methods in the classes that derive from this base handler as you need.
22 | If each command only needed a single function to process the logic this design pattern
23 | wouldn't be necesseary. The main reason why each handler is a class it is beacause classes
24 | can act like namespaces, so we can have same function names and variable names under a different namespace(class)
25 | """
26 |
27 | @abc.abstractmethod
28 | def process(self) -> None:
29 | ...
30 |
31 | def handle_help_(self) -> Union[None, typer.Exit]:
32 | """
33 | Handle the --HELP option by displaying the README file.
34 |
35 | If the --HELP option is specified, this function reads and displays the README file
36 | using the Rich library's Console and Markdown features. It then raises a Typer Exit
37 | to terminate the program, because if the --HELP option gets used no other operation
38 | should take place
39 |
40 | Returns:
41 | Union[None, Exit]: None if the function doesn't return anything, or a Typer Exit object.
42 | """
43 | # pylint: disable=E1101
44 | if self.help_:
45 | console = Console()
46 | with open(os.path.join(README_FILES_DIR, self.README)) as readme:
47 | markdown = Markdown(readme.read())
48 | console.print(markdown)
49 | raise typer.Exit()
50 |
51 | def handle_no_valid_combination(self) -> Union[None, typer.Exit]:
52 | """
53 | Handle the case when there is no valid combination/usage of options.
54 |
55 | This function displays an error message using the custom_print function and raises
56 | a Typer Exit to terminate the program.
57 | """
58 | custom_print(
59 | "No valid combination/usage of options! Try --help or --HELP",
60 | border_style="red",
61 | title="EXIT INFO",
62 | )
63 | raise typer.Exit()
64 |
65 |
66 | class PyggestTransform(CommandHandler):
67 | """
68 | This class handles the variations of options supported under:
69 | pyggest dynamic
70 | """
71 |
72 | __slots__: ClassVar[tuple[str]] = "path_", "help_"
73 |
74 | def __init__(self, path_, help_) -> None:
75 | self.README = pathlib.Path("dynamic_helper.md")
76 | self.path_ = path_
77 | self.help_ = help_
78 |
79 | super().__init__()
80 |
81 | def process(self) -> None:
82 | try:
83 | if self.help_:
84 | self.handle_help_()
85 | pyggester = PyggesterDynamic(self.path_)
86 | pyggester.run()
87 |
88 | except Exception as ex:
89 | if isinstance(ex, typer.Exit):
90 | raise ex
91 | print(ex)
92 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
161 | .vscode/*
162 | output/*
163 | *.build/*
164 | *.dist/*
165 | venv*/*
166 | scripts/*
167 |
--------------------------------------------------------------------------------
/tests/test_module_importer.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import unittest
3 | import pytest
4 | from pyggester.module_importer import (
5 | ImportsVisitor,
6 | ImportModuleTransformer,
7 | add_imports,
8 | )
9 | from pyggester.wrappers import get_wrappers_as_strings
10 |
11 |
12 | class TestImportsVisitor(unittest.TestCase):
13 | def test_import_detection(self):
14 | code_import = "import module_name"
15 | self.assertTrue(self._check_import(code_import, "module_name"))
16 |
17 | code_import_alias = "import module_name as alias_name"
18 | self.assertTrue(self._check_import(code_import_alias, "module_name"))
19 |
20 | code_from_import = "from module_name import name1, name2"
21 | self.assertTrue(self._check_import(code_from_import, "module_name"))
22 |
23 | code_from_import_alias = (
24 | "from module_name import name1 as alias_name1, name2 as alias_name2"
25 | )
26 | self.assertTrue(self._check_import(code_from_import_alias, "module_name"))
27 |
28 | code_non_matching_import = "import other_module"
29 | self.assertFalse(self._check_import(code_non_matching_import, "module_name"))
30 |
31 | code_non_matching_from_import = "from other_module import name"
32 | self.assertFalse(
33 | self._check_import(code_non_matching_from_import, "module_name")
34 | )
35 |
36 | def _check_import(self, code, module_name):
37 | tree = ast.parse(code)
38 | visitor = ImportsVisitor(module_name, set())
39 | visitor.visit(tree)
40 | return visitor.imported
41 |
42 |
43 | @pytest.mark.parametrize(
44 | "wrapper_name",
45 | [
46 | "ObservableListWrapper",
47 | "ObservableDictWrapper",
48 | "ObservableTupleWrapper",
49 | "ObservableSetWrapper",
50 | "ObservableNamedTupleWrapper",
51 | "ObservableNumpyArrayWrapper",
52 | "ObservablePandasDataFrameWrapper",
53 | ],
54 | )
55 | def test_import_addition(wrapper_name):
56 | code = "print('Hello, world!')"
57 | transformer = ImportModuleTransformer(
58 | ast.parse(code), "pyggester.wrappers", {wrapper_name}
59 | )
60 | transformed_code = _apply_transformer(transformer)
61 |
62 | assert wrapper_name in transformed_code
63 | assert "print('Hello, world!')" in transformed_code
64 |
65 |
66 | @pytest.mark.parametrize(
67 | "wrapper_name",
68 | [
69 | "ObservableListWrapper",
70 | "ObservableDictWrapper",
71 | "ObservableTupleWrapper",
72 | "ObservableSetWrapper",
73 | "ObservableNamedTupleWrapper",
74 | "ObservableNumpyArrayWrapper",
75 | "ObservablePandasDataFrameWrapper",
76 | ],
77 | )
78 | def test_no_import_change(wrapper_name):
79 | code = f"from pyggester.wrappers import {wrapper_name}\nprint('Hello, world!')"
80 | transformer = ImportModuleTransformer(
81 | ast.parse(code), "pyggester.wrappers", {wrapper_name}
82 | )
83 | transformed_code = _apply_transformer(transformer)
84 | assert code in transformed_code
85 |
86 |
87 | def _apply_transformer(transformer):
88 | transformed_tree = transformer.visit(transformer.tree_)
89 | return ast.unparse(transformed_tree)
90 |
91 |
92 | @pytest.mark.parametrize(
93 | "wrapper_cls",
94 | [
95 | "ObservableListWrapper",
96 | "ObservableDictWrapper",
97 | "ObservableTupleWrapper",
98 | "ObservableSetWrapper",
99 | "ObservableNamedTupleWrapper",
100 | "ObservableNumpyArrayWrapper",
101 | "ObservablePandasDataFrameWrapper",
102 | ],
103 | )
104 | def test_add_imports(wrapper_cls):
105 | code = "print('Hello, world!')"
106 | tree = ast.parse(code)
107 | expected_code = f"from pyggester.observables import {wrapper_cls}\n{code}"
108 | transformed_tree = add_imports(tree, "pyggester.observables", [wrapper_cls])
109 | assert ast.unparse(transformed_tree) == expected_code
110 |
--------------------------------------------------------------------------------
/pyggester/module_importer.py:
--------------------------------------------------------------------------------
1 | import ast
2 | from typing import Any, Tuple, Set
3 |
4 |
5 | class ImportsVisitor(ast.NodeVisitor):
6 | """
7 | AST visitor to check if a specific module or names are imported in the code.
8 | """
9 |
10 | __slots__: Tuple[str] = ("module_name", "imported", "names")
11 |
12 | def __init__(self, module_name: str, names: Set[str]) -> None:
13 | """
14 | Args:
15 | module_name (str): The name of the module to check for.
16 | imported (bool): Whether the module is imported or not.
17 | names (Set[str]): Names to check for in case of 'from import' (default is None).
18 | """
19 | self.module_name = module_name
20 | self.imported = False
21 | self.names = names
22 |
23 | def visit_Import(self, node: ast.Import) -> Any:
24 | """
25 | Visit an Import node.
26 |
27 | Check if the specified module is imported.
28 |
29 | Args:
30 | node (ast.Import): The Import node to visit.
31 | """
32 | for name in node.names:
33 | if name.name == self.module_name:
34 | self.imported = True
35 |
36 | def visit_ImportFrom(self, node: ast.ImportFrom) -> Any:
37 | """
38 | Visit an ImportFrom node.
39 |
40 | Check if the specified module is imported using 'from import'.
41 |
42 | Args:
43 | node (ast.ImportFrom): The ImportFrom node to visit.
44 | """
45 | if node.module == self.module_name:
46 | self.imported = True
47 |
48 |
49 | class ImportModuleTransformer(ast.NodeTransformer):
50 | """AST transformer to add or update an import statement for a specific module."""
51 |
52 | __slots__: Tuple[str] = ("module_name", "names", "tree_", "imports_visitor")
53 |
54 | def __init__(
55 | self, tree_: ast.AST, module_name: str, names: Set[str] = None
56 | ) -> None:
57 | """
58 | Args:
59 | module_name (str): Current module being transformed.
60 | names (Set[str]): All ObservableWrappers needed to be imported on each module.
61 | tree_ (ast.AST): Abstract syntax tree of the module.
62 | imports_visitor (ImportsVisitor): Information fetcher for imported modules.
63 | """
64 | self.module_name = module_name
65 | self.names = names
66 | self.tree_ = tree_
67 | self.imports_visitor = ImportsVisitor(module_name, names)
68 |
69 | def visit_Module(self, node: ast.Module) -> Any:
70 | """
71 | Visit a Module node.
72 |
73 | Replace any existing import statement for 'pyggester.wrappers' with a new import statement.
74 |
75 | Args:
76 | node (ast.Module): The Module node to visit.
77 |
78 | Returns:
79 | ast.Module: The transformed Module node.
80 | """
81 | self.imports_visitor.visit(self.tree_)
82 | import_stmt = None
83 | if self.imports_visitor.imported:
84 | for node_ in ast.walk(node):
85 | if (
86 | isinstance(node_, ast.ImportFrom)
87 | and node_.module == self.module_name
88 | ):
89 | node.body.remove(node_)
90 |
91 | elif isinstance(node_, ast.Import):
92 | for name_ in node_.names:
93 | if name_.name == self.module_name:
94 | node.body.remove(node_)
95 | if self.names:
96 | import_stmt = ast.ImportFrom(
97 | module=self.module_name,
98 | names=[ast.alias(name=name, asname=None) for name in self.names],
99 | level=0,
100 | )
101 | if import_stmt:
102 | node.body.insert(0, import_stmt)
103 | else:
104 | if self.names:
105 | import_stmt = ast.ImportFrom(
106 | module=self.module_name,
107 | names=[ast.alias(name=name, asname=None) for name in self.names],
108 | level=0,
109 | )
110 | if import_stmt:
111 | node.body.insert(0, import_stmt)
112 |
113 | return node
114 |
115 |
116 | def add_imports(tree: str, module_, wrappers) -> ast.AST:
117 | """
118 | Adds Wrapper imports to each module being transformed. This is meant to be ran
119 | for each module/file in the process of transformation.
120 | """
121 | transformer = ImportModuleTransformer(tree, module_, wrappers)
122 | tree = transformer.visit(tree)
123 |
124 | return tree
125 |
--------------------------------------------------------------------------------
/pyggester/observable_transformations.py:
--------------------------------------------------------------------------------
1 | from _ast import Assign, Module
2 | import ast
3 | import astor
4 | from typing import Any, Tuple
5 | from pyggester.module_importer import add_imports
6 | from pyggester.wrappers import apply_wrappers, get_wrappers_as_strings
7 |
8 |
9 | class ObservableCollectorAppender(ast.NodeTransformer):
10 | """
11 | * Collects each observable instance by appending it into the
12 | OBSERVBALE_COLLECTOR
13 | ----------------------------------
14 | import module1
15 | import module2
16 | ...(other import stmts)
17 |
18 | OBSERVABLE_COLLECTOR = []
19 | ...(other stmts)
20 |
21 | list_ = ObservableList([1,2,3])
22 | OBSERVABLE_COLLECTOR.append(list_)
23 | ---------------------------------
24 | """
25 |
26 | __slots__: Tuple[str] = ()
27 |
28 | def visit_Assign(self, node: ast.Assign) -> Any:
29 | """
30 | Visit each Assign node to find and collect instances of observable types,
31 | indicated by 'Observable' being part of the function name.
32 | """
33 | if isinstance(node.value, ast.Call):
34 | func_node = node.value.func
35 | func_name = ""
36 |
37 | if isinstance(func_node, ast.Name):
38 | func_name = func_node.id
39 | elif isinstance(func_node, ast.Attribute):
40 | func_name = func_node.attr
41 |
42 | if "Observable" in func_name:
43 | append_to_list_code = (
44 | f"OBSERVABLE_COLLECTOR.append({node.targets[0].id})"
45 | )
46 | return [node, ast.parse(append_to_list_code)]
47 |
48 | return node
49 |
50 |
51 | class ObservableRunner(ast.NodeTransformer):
52 | """
53 | * This transformer inserts the code that runs every observable.
54 | Observables don't explicitly run themselves to print the collected suggestions,
55 | because they might still be in use elsewhere.
56 | For example, they could have been passed as function parameters.
57 | However, by running the observables in the global scope after everything in the module,
58 | we ensure that collections declared in that scope have been fully processed,
59 | even if they were given or injected into other modules, classes, or functions.
60 | -----------------------------------
61 | import module1
62 | import module2
63 | ...
64 | (functions, classes and every possible python construct)
65 | ...
66 | for observable in OBSERVABLE_COLLECTOR:
67 | observable.run()
68 | -----------------------------------
69 | """
70 |
71 | __slots__: Tuple[str] = ()
72 |
73 | def visit_Module(self, node: Module) -> Any:
74 | observable_runner_code = (
75 | """for observable in OBSERVABLE_COLLECTOR: observable.run()"""
76 | )
77 | observable_runner_parsed = ast.parse(observable_runner_code)
78 | # We don't need to index the running code of observables because
79 | # if we just appended, the append method take care of it.
80 | # It is always going to be inserted at the end of the module in global scope
81 | node.body.append(observable_runner_parsed)
82 | return node
83 |
84 |
85 | def apply_observable_collector_transformations(
86 | tree: ast.AST, run_observables=False
87 | ) -> str:
88 | """
89 | Basically does anything needed for pyggester to do its analysis and returns the modified
90 | code. The result of this function should be stored into a new file that replicates the original
91 | one.
92 | """
93 | tree = add_imports(tree, "pyggester.observables", get_wrappers_as_strings())
94 | tree = add_imports(tree, "pyggester.observable_collector", ["OBSERVABLE_COLLECTOR"])
95 | tree = apply_wrappers(tree)
96 | tree = apply_observable_collector_modifications(tree, run_observables)
97 |
98 | return astor.to_source(tree)
99 |
100 |
101 | def apply_observable_collector_modifications(tree: ast.AST, run_observables) -> ast.AST:
102 | """
103 | Applying observable collector related modifications to the modules ast represenation.
104 | 1. Declare the observable collector
105 | 2. Append each observable into the observable collector
106 | 3. Put the code that actually runs the collected observables.
107 |
108 | Since this procedure will be ran per module, it means we suggest on the go.
109 | If anything has been found in the module being analyzed, we will suggest on the go and then immediatly move to the next module/file
110 | for analysis if there are any other modules/files.
111 | """
112 |
113 | transformer_appender = ObservableCollectorAppender()
114 | transformer_appender_tree = transformer_appender.visit(tree)
115 | if run_observables:
116 | transformer_runner = ObservableRunner()
117 | transformer_runner_tree = transformer_runner.visit(transformer_appender_tree)
118 | return transformer_runner_tree
119 |
120 | return transformer_appender_tree
121 |
--------------------------------------------------------------------------------
/pyggester/pyggester.py:
--------------------------------------------------------------------------------
1 | import ast
2 | import os
3 | import shutil
4 | from typing import List, Tuple
5 | import pathlib
6 | from pyggester.observable_transformations import (
7 | apply_observable_collector_transformations,
8 | )
9 | from pyggester.text_formatters import custom_print
10 |
11 |
12 | class PyggesterDynamic:
13 | """
14 | A class for dynamically transforming files / directories
15 | This is the main 'engine' that glues everything together for pyggester to work under 'pyggester transform'
16 |
17 | Args:
18 | path_ (str): The path to the file or directory to be transformed.
19 |
20 | Attributes:
21 | path_ (pathlib.Path): The absolute path to the file or directory.
22 |
23 | Methods:
24 | run(): Runs the transformation process based on the type of path provided.
25 | _transform_file(file_path, run_observable): Transforms a single file.
26 | _transform_directory(): Transforms all files in a directory.
27 | """
28 |
29 | __slots__ = ("path_",)
30 |
31 | def __init__(self, path_: str) -> None:
32 | self.path_ = pathlib.Path(path_).absolute()
33 |
34 | def run(self):
35 | """
36 | Runs the transformation process based on the type of path provided.
37 | """
38 | if not self.path_.exists():
39 | raise FileNotFoundError(f"The path '{self.path_}' does not exist.")
40 |
41 | if self.path_.is_file():
42 | self._transform_file(self.path_, run_observable=True)
43 | custom_print("File transformed successfully!", border_style="green")
44 | elif self.path_.is_dir():
45 | self._transform_directory()
46 | custom_print("Directory transformed successfully!", border_style="green")
47 |
48 | def _transform_file(self, file_path: pathlib.Path, run_observable: bool) -> None:
49 | """
50 | Transforms a single file by applying observable collector transformations.
51 |
52 | This method reads the content of the specified file, applies observable collector transformations
53 | to the abstract syntax tree (AST) representation of the code, and writes the transformed code
54 | to a new file.
55 |
56 | The observable collector transformations include analyzing and modifying the AST to collect
57 | observables and perform any necessary transformations based on the `run_observable` flag.
58 |
59 | Args:
60 | file_path (pathlib.Path): The path to the file to be transformed.
61 | run_observable (bool): Indicates whether to run observables in the file.
62 |
63 | Returns:
64 | None
65 | """
66 | code = file_path.read_text()
67 | transformed_code = apply_observable_collector_transformations(
68 | ast.parse(code), run_observables=run_observable
69 | )
70 | transformed_file_path = (
71 | file_path.parent / f"{file_path.stem}_transformed{file_path.suffix}"
72 | )
73 | transformed_file_path.write_text(transformed_code)
74 |
75 | def _transform_directory(self) -> None:
76 | """
77 | Transforms all files in a directory.
78 |
79 | This method takes the name of the main file as input and transforms all the files in the directory
80 | specified by `self.path_`. It creates a new directory named "{self.path_.name}_transformed" in the
81 | parent directory of `self.path_` to store the transformed files.
82 |
83 | For each file in the directory, it checks if the file path matches the main file path. If it does,
84 | the file is considered as the main file and is transformed with the `run_observable` flag set to True.
85 | Otherwise, the file is transformed with the `run_observable` flag set to False.
86 |
87 | The transformed file is then moved to the corresponding location in the transformed directory, while
88 | preserving the directory structure.
89 |
90 | Args:
91 | None
92 |
93 | Returns:
94 | None
95 | """
96 | main_file_name = input("Enter the name of the main file: ")
97 | main_file_path = self.path_ / main_file_name
98 |
99 | if not main_file_path.exists():
100 | raise FileNotFoundError(f"The main file '{main_file_path}' does not exist.")
101 |
102 | transformed_dir_path = self.path_.parent / f"{self.path_.name}_transformed"
103 | os.makedirs(transformed_dir_path, exist_ok=True)
104 |
105 | excluded_dirs = {"__pycache__", ".git", ".venv"}
106 |
107 | for root, dirs, files in os.walk(self.path_):
108 | dirs[:] = [d for d in dirs if d not in excluded_dirs]
109 | for dir_name in dirs:
110 | os.makedirs(transformed_dir_path / dir_name, exist_ok=True)
111 | for file_name in files:
112 | if file_name.endswith(".py"):
113 | file_path = pathlib.Path(root) / file_name
114 | run_observable = file_path == main_file_path
115 | self._transform_file(file_path, run_observable=run_observable)
116 |
117 | relative_path = file_path.relative_to(self.path_)
118 | transformed_file_path = transformed_dir_path / relative_path
119 | transformed_file_path.parent.mkdir(parents=True, exist_ok=True)
120 | shutil.move(
121 | file_path.with_name(
122 | f"{file_path.stem}_transformed{file_path.suffix}"
123 | ),
124 | transformed_file_path,
125 | )
126 | else:
127 | file_path = pathlib.Path(root) / file_name
128 | relative_path = file_path.relative_to(self.path_)
129 | transformed_file_path = transformed_dir_path / relative_path
130 | transformed_file_path.parent.mkdir(parents=True, exist_ok=True)
131 | shutil.copy(
132 | file_path,
133 | transformed_file_path,
134 | )
135 |
--------------------------------------------------------------------------------
/tests/test_wrappers.py:
--------------------------------------------------------------------------------
1 | # pylint: disable=W0611
2 | import ast
3 | import pytest
4 | from pyggester.wrappers import (
5 | ObservableListWrapper,
6 | ObservableDictWrapper,
7 | ObservableTupleWrapper,
8 | ObservableSetWrapper,
9 | ObservableNamedTupleWrapper,
10 | ObservableNumpyArrayWrapper, # noqa: F401
11 | ObservablePandasDataFrameWrapper, # noqa: F401
12 | )
13 |
14 | from pyggester.observables import (
15 | ObservableDict, # noqa: F401
16 | ObservableList, # noqa: F401
17 | ObservableNamedTuple, # noqa: F401
18 | ObservableNumpyArray, # noqa: F401
19 | ObservablePandasDataFrame, # noqa: F401
20 | ObservableTuple, # noqa: F401
21 | ObservableSet, # noqa: F401
22 | )
23 |
24 |
25 | @pytest.fixture
26 | def example_list_node():
27 | return ast.parse("[1, 2, 3]").body[0].value
28 |
29 |
30 | @pytest.fixture
31 | def example_dict_node():
32 | return ast.parse("{1: 'one', 2: 'two'}").body[0].value
33 |
34 |
35 | @pytest.fixture
36 | def example_tuple_node():
37 | return ast.parse("(1, 2, 3)").body[0].value
38 |
39 |
40 | @pytest.fixture
41 | def example_set_node():
42 | return ast.parse("{1, 2, 3}").body[0].value
43 |
44 |
45 | def test_observable_list_wrapper(example_list_node):
46 | transformer = ObservableListWrapper()
47 | transformed_node = transformer.visit(example_list_node)
48 | assert isinstance(transformed_node, ast.Call)
49 | assert ast.unparse(transformed_node) == "ObservableList([1, 2, 3])"
50 |
51 |
52 | def test_observable_dict_wrapper(example_dict_node):
53 | transformer = ObservableDictWrapper()
54 | transformed_node = transformer.visit(example_dict_node)
55 | assert isinstance(transformed_node, ast.Call)
56 | assert ast.unparse(transformed_node) == "ObservableDict({1: 'one', 2: 'two'})"
57 |
58 |
59 | def test_observable_tuple_wrapper(example_tuple_node):
60 | transformer = ObservableTupleWrapper()
61 | transformed_node = transformer.visit(example_tuple_node)
62 | assert isinstance(transformed_node, ast.Call)
63 | assert ast.unparse(transformed_node) == "ObservableTuple((1, 2, 3))"
64 |
65 |
66 | def test_observable_set_wrapper(example_set_node):
67 | transformer = ObservableSetWrapper()
68 | transformed_node = transformer.visit(example_set_node)
69 | assert isinstance(transformed_node, ast.Call)
70 | assert ast.unparse(transformed_node) == "ObservableSet({1, 2, 3})"
71 |
72 |
73 | class TestObservableNamedTupleWrapper:
74 | @staticmethod
75 | def transform_and_get_code(code):
76 | tree = ast.parse(code)
77 | transformer = ObservableNamedTupleWrapper(tree)
78 | transformed_tree = transformer.visit(tree)
79 | return ast.unparse(transformed_tree)
80 |
81 | @staticmethod
82 | def assert_transformed_code_equals(code, expected_result):
83 | transformed_code = TestObservableNamedTupleWrapper.transform_and_get_code(code)
84 | assert transformed_code.strip() == expected_result.strip()
85 |
86 | def test_simple_namedtuple(self):
87 | code = """
88 | from collections import namedtuple
89 | Point = namedtuple('Point', ['x', 'y'])
90 | p = Point(1, 2)
91 | """
92 | expected_result = """
93 | from collections import namedtuple
94 | Point = namedtuple('Point', ['x', 'y'])
95 | p = Point(1, 2)
96 | p_wrapper = ObservableNamedTuple(*p)
97 | """
98 | self.assert_transformed_code_equals(code, expected_result)
99 |
100 | def test_nested_namedtuple(self):
101 | code = """
102 | from collections import namedtuple
103 | Point = namedtuple('Point', ['x', 'y'])
104 | Circle = namedtuple('Circle', ['center', 'radius'])
105 | c = Circle(Point(0, 0), 5)
106 | """
107 | expected_result = """
108 | from collections import namedtuple
109 | Point = namedtuple('Point', ['x', 'y'])
110 | Circle = namedtuple('Circle', ['center', 'radius'])
111 | c = Circle(Point(0, 0), 5)
112 | c_wrapper = ObservableNamedTuple(*c)
113 | """
114 | self.assert_transformed_code_equals(code, expected_result)
115 |
116 |
117 | def test_wrap_numpy_array():
118 | code = """
119 | import numpy as np
120 | arr = np.array([1, 2, 3])
121 | """
122 | expected_result = """
123 | import numpy as np
124 | arr = np.array([1, 2, 3])
125 | arr_numpy_wrapper = ObservableNumpyArray(arr)
126 | """
127 | transformed_code = transform_code_numpy_array(code)
128 | assert transformed_code.strip() == expected_result.strip()
129 |
130 |
131 | def test_wrap_nested_numpy_array():
132 | code = """
133 | from numpy import array as arr
134 | nested_arr = arr([arr([1, 2]), arr([3, 4])])
135 | """
136 | expected_result = """
137 | from numpy import array as arr
138 | nested_arr = arr([arr([1, 2]), arr([3, 4])])
139 | nested_arr_numpy_wrapper = ObservableNumpyArray(nested_arr)
140 | """
141 | transformed_code = transform_code_numpy_array(code)
142 | assert transformed_code.strip() == expected_result.strip()
143 |
144 |
145 | def transform_code_numpy_array(code):
146 | tree = ast.parse(code)
147 | transformer = ObservableNumpyArrayWrapper(tree)
148 | transformed_tree = transformer.visit(tree)
149 | transformed_code = ast.unparse(transformed_tree)
150 | return transformed_code
151 |
152 |
153 | def test_wrap_pandas_dataframe():
154 | code = """
155 | import pandas as pd
156 | df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
157 | """
158 | expected_result = """
159 | import pandas as pd
160 | df = pd.DataFrame({'A': [1, 2], 'B': [3, 4]})
161 | df_pandas_wrapper = ObservablePandasDataFrame(df)
162 | """
163 | transformed_code = transform_code_pandas_data_frame(code)
164 | assert transformed_code.strip() == expected_result.strip()
165 |
166 |
167 | def test_wrap_nested_pandas_dataframe():
168 | code = """
169 | from pandas import DataFrame as df
170 | nested_df = df({'A': df([1, 2]), 'B': df([3, 4])})
171 | """
172 | expected_result = """
173 | from pandas import DataFrame as df
174 | nested_df = df({'A': df([1, 2]), 'B': df([3, 4])})
175 | nested_df_pandas_wrapper = ObservablePandasDataFrame(nested_df)
176 | """
177 | transformed_code = transform_code_pandas_data_frame(code)
178 | assert transformed_code.strip() == expected_result.strip()
179 |
180 |
181 | def transform_code_pandas_data_frame(code):
182 | tree = ast.parse(code)
183 | transformer = ObservablePandasDataFrameWrapper(tree)
184 | transformed_tree = transformer.visit(tree)
185 | transformed_code = ast.unparse(transformed_tree)
186 | return transformed_code
187 |
--------------------------------------------------------------------------------
/pyggester/data/help_files/dynamic_helper.md:
--------------------------------------------------------------------------------
1 | # Usage (Step-by-Step)
2 |
3 | ## Single File Usage
4 |
5 |
6 | Lets suppose you have a single python file that you want to dynamically analyze(run-time analysis)
7 |
8 | ### 1. Preparation
9 |
10 | Before code transformation with pyggester:
11 | ```bash
12 | (venv) root@user:~/my_app> ls
13 | app.py
14 | ```
15 |
16 | Content of app.py:
17 |
18 | ```python
19 | def sum_of_integers(integer_list):
20 | total = sum(integer_list)
21 | return total
22 |
23 | my_list = [1, 2, 3, 4, 5]
24 | print(sum_of_integers(my_list))
25 |
26 | ```
27 | ### 2. Transformation
28 |
29 | > [!IMPORTANT]
30 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.**
31 |
32 | ```bash
33 | (venv) root@devs04:~/my_app> pyggest transform app.py
34 | ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
35 | │ File transformed successfully! │
36 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
37 | ```
38 | ### 3. Post-Transformation
39 |
40 | ```bash
41 | (venv) root@devs04:~/my_app> ls
42 | app.py app_transformed.py
43 | ```
44 |
45 | Content of app_transformed.py:
46 |
47 | ```python
48 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
49 | from pyggester.observables import ObservableNumpyArray, ObservableNamedTuple, ObservableSet, ObservablePandasDataFrame, ObservableList, ObservableDict, ObservableTuple
50 |
51 |
52 | def sum_of_integers(integer_list):
53 | total = sum(integer_list)
54 | return total
55 |
56 |
57 | my_list = ObservableList([1, 2, 3, 4, 5])
58 | OBSERVABLE_COLLECTOR.append(my_list)
59 | print(sum_of_integers(my_list))
60 |
61 | for observable in OBSERVABLE_COLLECTOR:
62 | observable.run()
63 |
64 | ```
65 |
66 | > [!IMPORTANT]
67 | > We now have a new file, automatically created, that mirrors the original file. This new file includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app_transformed.py'. Rest assured, everything from 'app.py' is retained in 'app_transformed.py'.
68 |
69 | ### 4. Running the Transformed Code
70 |
71 | ```bash
72 | (venv) root@devs04:~/my_app> python3 app_transformed.py
73 | 15
74 | ╭────────────────────────────────────────────────────────────────────────────╮
75 | │ 10 | Suggestions(/root/my_app/app_transformed.py): │
76 | │ [*] Consider using an array.array instead of a list, for optimal │
77 | │ memory consumption │
78 | │ [*] Consider using a set instead of a list, because of unique elements │
79 | ╰────────────────────────────────────────────────────────────────────────────╯
80 | ```
81 |
82 | ## Directory Usage
83 |
84 | Lets suppose you have a python project(directory/repo) that you want to dynamically analyze(run-time analysis)
85 |
86 | ### 1. Preparation
87 |
88 | Before code transformation with pyggester:
89 | ```bash
90 | (venv) root@devs04:~/python_demo/app_dir> ls
91 | __pycache__ app.py temperature.py weather.py
92 | ```
93 |
94 | Content of app.py:
95 |
96 | ```python
97 | import weather
98 | import temperature
99 |
100 |
101 | def main():
102 | city = input('Enter a city name: ')
103 | weather_condition = weather.get_weather(city)
104 | avg_temp = temperature.get_average_temperature()
105 | print(f'Weather in {city}: {weather_condition}')
106 | print(f'Average temperature: {avg_temp} degrees Celsius')
107 |
108 |
109 | main()
110 | ```
111 |
112 | Content of temperature.py:
113 | ```python
114 | temperatures = list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17, 20])
115 |
116 |
117 | def get_average_temperature():
118 | return sum(temperatures) / len(temperatures)
119 |
120 | ```
121 |
122 | Content of weather.py:
123 | ```python
124 | weather_conditions = ['Sunny', 'Rainy', 'Cloudy', 'Windy', 'Sunny', 'Cloudy']
125 |
126 | def get_weather(city):
127 | return weather_conditions.pop()
128 | ```
129 |
130 | ### 2. Transformation
131 |
132 | > [!IMPORTANT]
133 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.**
134 |
135 | ```bash
136 | (venv) root@devs04:~/python_demo> pyggest transform app_dir/
137 | Enter the name of the main file: app.py
138 | ╭──────────────────────────────────────────────────────────────────────────╮
139 | │ Directory transformed successfully! │
140 | ╰──────────────────────────────────────────────────────────────────────────╯
141 | ```
142 | > [!IMPORTANT]
143 | > When a directory or project is specified as an argument, pyggester prompts us to specify the main file of our project. This file should be the entry point of your project, indicated by its file name.
144 |
145 | ### 3. Post-Transformation
146 |
147 | ```bash
148 | (venv) root@devs04:~/python_demo> ls
149 | app_dir app_dir_transformed
150 | ```
151 |
152 | Content of app_dir_transformed/:
153 |
154 | ```python
155 | (venv) root@devs04:~/python_demo/app_dir_transformed> ls
156 | app.py temperature.py weather.py
157 | ```
158 |
159 | Content of app.py:
160 | ```python
161 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
162 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple
163 | import weather
164 | import temperature
165 |
166 |
167 | def main():
168 | city = input('Enter a city name: ')
169 | weather_condition = weather.get_weather(city)
170 | avg_temp = temperature.get_average_temperature()
171 | print(f'Weather in {city}: {weather_condition}')
172 | print(f'Average temperature: {avg_temp} degrees Celsius')
173 |
174 |
175 | main()
176 | for observable in OBSERVABLE_COLLECTOR:
177 | observable.run()
178 |
179 | ```
180 |
181 | Content of temperature.py:
182 | ```python
183 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
184 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple
185 | temperatures = ObservableList(list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17,
186 | 20]))
187 | OBSERVABLE_COLLECTOR.append(temperatures)
188 |
189 |
190 | def get_average_temperature():
191 | return sum(temperatures) / len(temperatures)
192 |
193 | ```
194 |
195 | Content of weather.py:
196 |
197 | ```python
198 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
199 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple
200 | weather_conditions = ObservableList(['Sunny', 'Rainy', 'Cloudy', 'Windy',
201 | 'Sunny', 'Cloudy'])
202 | OBSERVABLE_COLLECTOR.append(weather_conditions)
203 |
204 |
205 | def get_weather(city):
206 | return weather_conditions.pop()
207 |
208 | ```
209 |
210 | > [!IMPORTANT]
211 | > We now have a new directory, automatically created, that mirrors the original directory. This new directory includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app.py' that resides inside 'app_dir_transformed/'. Rest assured, everything from 'app_dir' is retained in 'app_dir_transformed/'.
212 |
213 | ### 4. Running the Transformed Code
214 |
215 | ```bash
216 | (venv) root@devs04:~/python_demo/app_dir_transformed> python3 app.py
217 | Enter a city name: Pristina
218 | Weather in Pristina: Cloudy
219 | Average temperature: 19.545454545454547 degrees Celsius
220 | ╭─────────────────────────────────────────────────────────────────────────────────────╮
221 | │ 3 | Suggestions(/root/python_demo/app_dir_transformed/temperature.py): │
222 | │ [*] Consider using an array.array instead of a list, for optimal memory │
223 | │ consumption │
224 | ╰─────────────────────────────────────────────────────────────────────────────────────╯
225 | ```
--------------------------------------------------------------------------------
/contributing.md:
--------------------------------------------------------------------------------
1 | # Welcome to pyggester!
2 |
3 | Thank you for your interest in contributing to pyggester. Whether you're a developer, designer, tester, or someone with great ideas, your contributions are valuable.
4 |
5 | ## Getting Started
6 |
7 | 1. **Fork the Repository:** Start by forking the [Project Repository](https://github.com/ValdonVitija/pyggester) on GitHub. This creates a copy of the project under your GitHub account.
8 |
9 | ```bash
10 | git clone https://github.com/ValdonVitija/pyggester.git
11 | ```
12 |
13 | 2. **Create a Branch:** Move into the project's directory and create a new branch for your contribution:
14 |
15 | ```bash
16 | cd pyggester
17 | git checkout -b your-branch-name
18 | ```
19 |
20 | ## Making Changes
21 |
22 | Changes can encompass various aspects, provided they are reasonable. We welcome modifications to overall logic, naming conventions, hierarchy, and directory structure (with meticulous attention, especially for alterations to the project directory).
23 |
24 | # Wrappers
25 |
26 | Includes classes designed to encapsulate collections within observables. Every observable extends from ast.NodeTransformer, enabling the classes to effectively wrap individual data structures. Each specific wrapper is tailored to implement only the visitor method relevant to the data structure it encapsulates.
27 |
28 | Built-in wrappers are all already done, because all we need to do is wrap the original
29 | data structure declarations with observables.
30 |
31 | Example (ObservableListWrapper):
32 | ```python
33 | class ObservableListWrapper(ast.NodeTransformer):
34 | """AST transformer to wrap lists with ObservableList."""
35 |
36 | __slots__: Tuple[str] = ()
37 |
38 | def visit_List(self, node: ast.List) -> Union[ast.Call, ast.AST]:
39 | """
40 | Transform a List node to an ObservableList node.
41 |
42 | Args:
43 | node (ast.List): The original List node.
44 |
45 | Returns:
46 | Union[ast.Call, ast.AST]: The transformed node.
47 | """
48 | return ast.Call(
49 | func=ast.Name(id="ObservableList", ctx=ast.Load()), args=[node], keywords=[]
50 | )
51 | ```
52 |
53 | Specialized collections from the collections library in python are a bit different. We cannot directly 'dervie' from them, but we can pass by reference the declared data structure objects to our custom Observables.
54 |
55 | Such Wrappers are:
56 | - ObservableNumpyArrayWrapper
57 | - ObservableNamedTupleWrapper
58 | - ObservablePandasDataFrameWrapper
59 |
60 | Example (ObservableNumpyArrayWrapper):
61 |
62 | ```python
63 | class ObservableNumpyArrayWrapper(ast.NodeTransformer):
64 | """AST transformer to wrap NumPy array instances with ObservableNumpyArray."""
65 |
66 | class NumpyImportsVisitor(ast.NodeVisitor):
67 | def __init__(self):
68 | self.alias_name = None
69 | self.alias_asname = None
70 |
71 | def visit_Import(self, node):
72 | """
73 | Check numpy imports, because we need to determine how to
74 | wrap the initiated array instances
75 |
76 | [*] import numpy
77 | [*] import numpy as np
78 | [*] import numpy as 'alias'
79 | """
80 | for name in node.names:
81 | if name.name == "numpy":
82 | self.alias_name = name.name
83 | if name.name == "numpy" and getattr(name, "asname"):
84 | self.alias_asname = name.asname
85 |
86 | def visit_ImportFrom(self, node):
87 | """
88 | Check 'from' numpy imports, because we need to determine how to wrao
89 | the initiated array instances
90 |
91 | [*] from numpy import array
92 | [*] from numpy import array as arr
93 | [*] from numpy import ones
94 | ...
95 | """
96 | if node.module == "numpy":
97 | for name in node.names:
98 | if name.name in ["array", "zeros", "ones", "empty"]:
99 | self.alias_name = name.name
100 | if getattr(name, "asname"):
101 | self.alias_asname = name.asname
102 |
103 | def __init__(self, tree) -> None:
104 | self.imports_visitor = self.NumpyImportsVisitor()
105 | self.imports_visitor.visit(tree)
106 |
107 | def visit_Assign(self, node: ast.Assign) -> ast.AST:
108 | """
109 | Now visit each Assign node and check if that node is a numpy array instance. If thats the case, wrap each instance into an ObservableNumpyArray,
110 | so that we can analyze its internal structure for potential suggestions.
111 | """
112 | if getattr(node, "value") and isinstance(node.value, ast.Call):
113 | if getattr(node.value, "func"):
114 | if isinstance(node.value.func, ast.Name):
115 | id_ = self.get_alias_name()
116 | if node.value.func.id == id_:
117 | return self.wrap_numpy_array(node)
118 |
119 | elif isinstance(node.value.func, ast.Attribute):
120 | id_ = self.get_alias_name()
121 | if node.value.func.value.id == id_:
122 | return self.wrap_numpy_array(node)
123 |
124 | return node
125 |
126 | def get_alias_name(self):
127 | return self.imports_visitor.alias_asname or self.imports_visitor.alias_name
128 |
129 | def wrap_numpy_array(self, node):
130 | wrapper_code = f"{node.targets[0].id}_numpy_wrapper = ObservableNumpyArray({node.targets[0].id})"
131 | wrapper_node = ast.parse(wrapper_code).body[0]
132 | return [node, wrapper_node]
133 |
134 | ```
135 |
136 | >[!NOTE]
137 | > Not every data structure from the collections library has a Wrapper and an Observable version right now. I expect potential contributors to work on them.
138 |
139 |
140 | # 👀 Observables
141 |
142 | The core functionality of pyggester revolves around observables, particularly enhanced versions of python data structures/collections that fully preserve the original functionality offered by these python data structures. These observables attempt to suggest alternative data structures if any issues are detected.
143 |
144 | Standard built-in collections/data structures:
145 | - list
146 | - tuple
147 | - set
148 | - dict
149 |
150 | > [!NOTE]
151 | > Python's built-in collections can be customized by adding your own methods and variables. This lets you analyze the collection more effectively without changing its basic features.
152 |
153 | Specialized collections(part of the collections library):
154 | - ChainMap
155 | - Counter
156 | - OrderedDict
157 | - UserDict
158 | - UserList
159 | - UserString
160 | - defaultdict
161 | - deque
162 | - namedtuple
163 |
164 | Third-Party popular collections:
165 | - Numpy Arrays
166 | - Pandas DataFrame
167 | - Pandas Series
168 |
169 |
170 | Abstract Observable Representation (e.g : list):
171 | ```Python
172 | class ObservableList(list):
173 | """
174 | The ObservableList is an enhanced version of a list that
175 | preserves the full original functionality of a list, but
176 | adds more features to it so that we keep track of anything that
177 | potentially happens in order to do dynamic analysis to each declared
178 | list.
179 | """
180 | __slots__: Tuple[str] = (
181 | "appended",
182 | "extended",
183 | "inserted",
184 | "removed",
185 | "count_",
186 | "in_operator_used",
187 | "message_handler",
188 | )
189 |
190 | def __init__(self, *args, **kwargs) -> None:
191 | ...
192 | def append(self, item) -> None:
193 | super().append(item)
194 | self.appended = True
195 |
196 | def extend(self, iterable) -> None: ...
197 | def insert(self, index, item) -> None: ...
198 | def remove(self, item) -> None: ...
199 | def count(self, __value: Any) -> int: ...
200 | def __contains__(self, __key: object) -> bool: ...
201 | def get_list_dimension(self, lst): ...
202 | def check_numpy_array_instead_of_list(self): ...
203 | def check_array_instead_of_list(self): ...
204 | def can_list_be_converted_to_array(self): ...
205 | def check_list_to_set_conversion(self): ...
206 | def check_set_instead_of_list(self): ...
207 | def check_Counter_insteaf_of_list(self): ...
208 | def check_tuple_instead_of_list(self): ...
209 | def run(self):
210 | """
211 | Only run checkers so that we offer a better running interface
212 | for each observable.
213 | """
214 | self.check_array_instead_of_list()
215 | self.check_numpy_array_instead_of_list()
216 | self.check_set_instead_of_list()
217 | self.check_Counter_insteaf_of_list()
218 | self.message_handler.print_messages()
219 |
220 | ```
221 |
222 | If you make sure to preserve the original functionality of built in collections, the folowing statements are exactly the same:
223 | ```Python
224 | #List declarations
225 | list_ = [1,2,3]
226 | list_ = ObservableList([1,2,3])
227 | list_ = ObservableList(list([1,2,3]))
228 |
229 | #Dict declarations
230 | dict_ = {"key":"value"}
231 | dict_ = ObservableDict({"key":"value"})
232 | dict_ = ObservableDict(dict({"key":"value"}))
233 |
234 | #Tuple declarations
235 | tuple_ = (1,2,3)
236 | tuple_ = ObservableTuple([1,2,3])
237 | tuple_ = ObservableTuple(tuple([1,2,3]))
238 |
239 | #Set declarations
240 | set_ = {1,2,3}
241 | set_ = ObservableSet({1,2,3})
242 | set_ = ObservableSet(set({1,2,3}))
243 | ```
244 |
245 | Currently, the supported observables are:
246 |
247 | - ObservableList
248 | - ObservableSet
249 | - ObservableTuple
250 | - ObservableDict
251 | - ObservableNumpyArray
252 | - ObservablePandasDataFrame
253 | - ObservableNamedTuple
254 |
255 |
256 | >[!IMPORTANT]
257 | > Other modules in pyggester are more specific and typically remain unchanged unless you're modifying the analysis approach. However, if you discover an improved method for analyzing or observing collections, or for the execution process post-code transformations, you're encouraged to submit a Pull Request (PR) with an explanation of your ideas. Please note that proposals involving substantial changes must be thoroughly documented, and test cases should be provided to demonstrate the advantages of your approach.
258 |
259 |
260 | >[!NOTE]
261 | >Additional examples can be found by reviewing the codebase directly, where docstrings provide a comprehensive understanding of Pyggester's architecture.
262 |
263 |
264 |
265 |
266 | ## Submitting Changes
267 |
268 | 1. **Commit Changes:** Commit your changes with a clear and concise commit message:
269 |
270 | ```bash
271 | git add .
272 | git commit -m "Brief description of your changes"
273 | ```
274 |
275 | 2. **Push Changes:** Push your changes to your forked repository:
276 |
277 | ```bash
278 | git push origin your-branch-name
279 | ```
280 | 3. **Open a Pull Request:** On GitHub, open a pull request from your branch to the main project repository. Provide a detailed description of your changes and any relevant information.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 | Pyggester - dynamic/static python analysis
11 |
12 |
13 | # 📘 About
14 |
15 | pyggester - (python + suggester) functions as both a dynamic and static analyzer. Its primary purpose lies in offering suggestions to enhance the efficiency of Python code by addressing suboptimal usage of data structures.
16 |
17 | # ⭐ Features
18 |
19 | Pyggester offers a pretty decent cli interface for its functionalities. The cli is built on top of [typer](https://github.com/tiangolo/typer)
20 |
21 | `Execution command`:
22 | ```bash
23 | pyggest
24 | ```
25 | `output`:
26 | ```
27 | _____
28 | _____________ ________ _______ ______________ /_____________
29 | ___ __ \_ / / /_ __ `/_ __ `/ _ \_ ___/ __/ _ \_ ___/
30 | __ /_/ / /_/ /_ /_/ /_ /_/ // __/(__ )/ /_ / __/ /
31 | _ .___/_\__, / _\__, / _\__, / \___//____/ \__/ \___//_/
32 | /_/ /____/ /____/ /____/
33 |
34 |
35 | Usage: pyggest [OPTIONS] COMMAND [ARGS]...
36 |
37 | ╭─ Options ────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
38 | │ --install-completion Install completion for the current shell. │
39 | │ --show-completion Show completion for the current shell, to copy it or customize the installation. │
40 | │ --help Show this message and exit. │
41 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
42 | ╭─ Commands ───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
43 | │ static Perform static analysis using PyggestStatic. │
44 | │ transform Perform dynamic transformation using PyggesterDynamic. │
45 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
46 | ```
47 |
48 | The pyggester CLI presents two distinct features:
49 | - Static Analysis: This feature comprehensively examines your code without executing it, providing insightful insights into its structure and potential improvements.
50 |
51 | `Execution command`
52 |
53 | > [!NOTE]
54 | > The 'static' subcommand exists, but has no functionalities implemented, because we already have good static analyzers(pylint, ruff, flake8). In future iterations, should we identify suggestions that can be established through static analysis, we will incorporate them into this feature.
55 |
56 | ```bash
57 | pyggest static
58 | ```
59 |
60 | - Dynamic/Automatic Transformation: This feature adds extra code to your python files to analyze your data structures at runtime. Your original code stays the same; it won't be changed. A new file is created that's just like the original but with additional code. This works for both single files and whole directories(full project structures).
61 |
62 | `Execution command`
63 |
64 | ``` bash
65 | pyggest transform
66 | ```
67 |
68 | > [!INFO]
69 | > pyggester offers built-in documentation for detailed usage
70 |
71 | ```bash
72 | pyggest transform --help
73 | pyggest static --help #NOT IMPLEMENTED
74 | ```
75 |
76 |
77 |
78 | # 🔧 Installation
79 |
80 | ### Using Pip
81 | You can easily install the Python library using pip. Open your terminal and run the following command:
82 | ```bash
83 | pip install pyggester
84 | ```
85 |
86 |
87 | ### Cloning the GitHub Repository
88 |
89 | 1. **Clone the Repository:** Open your terminal and run the following command to clone the GitHub repository to your local machine:
90 |
91 | ```bash
92 | git clone git@github.com:ValdonVitija/pyggester.git
93 | ```
94 | 2. **Navigate to the Repository:** Change your working directory to the cloned repository:
95 |
96 | ```bash
97 | cd pyggester
98 | ```
99 | 3. **Install pyggester as a pacakge locally:**
100 | > [!IMPORTANT]
101 | > Consider doing this within a virtual environment (venv) if possible.
102 |
103 | ```bash
104 | pip install .
105 | ```
106 |
107 | # Usage (Step-by-Step)
108 |
109 | ## Single File Usage
110 |
111 |
112 | Lets suppose you have a single python file that you want to dynamically analyze(run-time analysis)
113 |
114 | ### 1. Preparation
115 |
116 | Before code transformation with pyggester:
117 | ```bash
118 | (venv) root@user:~/my_app> ls
119 | app.py
120 | ```
121 |
122 | Content of app.py:
123 |
124 | ```python
125 | def sum_of_integers(integer_list):
126 | total = sum(integer_list)
127 | return total
128 |
129 | my_list = [1, 2, 3, 4, 5]
130 | print(sum_of_integers(my_list))
131 |
132 | ```
133 | ### 2. Transformation
134 |
135 | > [!IMPORTANT]
136 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.**
137 |
138 | ```bash
139 | (venv) root@devs04:~/my_app> pyggest transform app.py
140 | ╭──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╮
141 | │ File transformed successfully! │
142 | ╰──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────╯
143 | ```
144 | ### 3. Post-Transformation
145 |
146 | ```bash
147 | (venv) root@devs04:~/my_app> ls
148 | app.py app_transformed.py
149 | ```
150 |
151 | Content of app_transformed.py:
152 |
153 | ```python
154 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
155 | from pyggester.observables import ObservableNumpyArray, ObservableNamedTuple, ObservableSet, ObservablePandasDataFrame, ObservableList, ObservableDict, ObservableTuple
156 |
157 |
158 | def sum_of_integers(integer_list):
159 | total = sum(integer_list)
160 | return total
161 |
162 |
163 | my_list = ObservableList([1, 2, 3, 4, 5])
164 | OBSERVABLE_COLLECTOR.append(my_list)
165 | print(sum_of_integers(my_list))
166 |
167 | for observable in OBSERVABLE_COLLECTOR:
168 | observable.run()
169 |
170 | ```
171 |
172 | > [!IMPORTANT]
173 | > We now have a new file, automatically created, that mirrors the original file. This new file includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app_transformed.py'. Rest assured, everything from 'app.py' is retained in 'app_transformed.py'.
174 |
175 | ### 4. Running the Transformed Code
176 |
177 | ```bash
178 | (venv) root@devs04:~/my_app> python3 app_transformed.py
179 | 15
180 | ╭────────────────────────────────────────────────────────────────────────────╮
181 | │ 10 | Suggestions(/root/my_app/app_transformed.py): │
182 | │ [*] Consider using an array.array instead of a list, for optimal │
183 | │ memory consumption │
184 | │ [*] Consider using a set instead of a list, because of unique elements │
185 | ╰────────────────────────────────────────────────────────────────────────────╯
186 | ```
187 |
188 | ## Directory Usage
189 |
190 | Lets suppose you have a python project(directory/repo) that you want to dynamically analyze(run-time analysis)
191 |
192 | ### 1. Preparation
193 |
194 | Before code transformation with pyggester:
195 | ```bash
196 | (venv) root@devs04:~/python_demo/app_dir> ls
197 | __pycache__ app.py temperature.py weather.py
198 | ```
199 |
200 | Content of app.py:
201 |
202 | ```python
203 | import weather
204 | import temperature
205 |
206 |
207 | def main():
208 | city = input('Enter a city name: ')
209 | weather_condition = weather.get_weather(city)
210 | avg_temp = temperature.get_average_temperature()
211 | print(f'Weather in {city}: {weather_condition}')
212 | print(f'Average temperature: {avg_temp} degrees Celsius')
213 |
214 |
215 | main()
216 | ```
217 |
218 | Content of temperature.py:
219 | ```python
220 | temperatures = list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17, 20])
221 |
222 |
223 | def get_average_temperature():
224 | return sum(temperatures) / len(temperatures)
225 |
226 | ```
227 |
228 | Content of weather.py:
229 | ```python
230 | weather_conditions = ['Sunny', 'Rainy', 'Cloudy', 'Windy', 'Sunny', 'Cloudy']
231 |
232 | def get_weather(city):
233 | return weather_conditions.pop()
234 | ```
235 |
236 | ### 2. Transformation
237 |
238 | > [!IMPORTANT]
239 | > **Make sure you're in a virtual environment with pyggester installed before going to the next step.**
240 |
241 | ```bash
242 | (venv) root@devs04:~/python_demo> pyggest transform app_dir/
243 | Enter the name of the main file: app.py
244 | ╭──────────────────────────────────────────────────────────────────────────╮
245 | │ Directory transformed successfully! │
246 | ╰──────────────────────────────────────────────────────────────────────────╯
247 | ```
248 | > [!IMPORTANT]
249 | > When a directory or project is specified as an argument, pyggester prompts us to specify the main file of our project. This file should be the entry point of your project, indicated by its file name.
250 |
251 | ### 3. Post-Transformation
252 |
253 | ```bash
254 | (venv) root@devs04:~/python_demo> ls
255 | app_dir app_dir_transformed
256 | ```
257 |
258 | Content of app_dir_transformed/:
259 |
260 | ```python
261 | (venv) root@devs04:~/python_demo/app_dir_transformed> ls
262 | app.py temperature.py weather.py
263 | ```
264 |
265 | Content of app.py:
266 | ```python
267 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
268 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple
269 | import weather
270 | import temperature
271 |
272 |
273 | def main():
274 | city = input('Enter a city name: ')
275 | weather_condition = weather.get_weather(city)
276 | avg_temp = temperature.get_average_temperature()
277 | print(f'Weather in {city}: {weather_condition}')
278 | print(f'Average temperature: {avg_temp} degrees Celsius')
279 |
280 |
281 | main()
282 | for observable in OBSERVABLE_COLLECTOR:
283 | observable.run()
284 |
285 | ```
286 |
287 | Content of temperature.py:
288 | ```python
289 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
290 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple
291 | temperatures = ObservableList(list([20, 22, 15, 18, 20, 21, 22, 22, 18, 17,
292 | 20]))
293 | OBSERVABLE_COLLECTOR.append(temperatures)
294 |
295 |
296 | def get_average_temperature():
297 | return sum(temperatures) / len(temperatures)
298 |
299 | ```
300 |
301 | Content of weather.py:
302 |
303 | ```python
304 | from pyggester.observable_collector import OBSERVABLE_COLLECTOR
305 | from pyggester.observables import ObservableNumpyArray, ObservableList, ObservablePandasDataFrame, ObservableNamedTuple, ObservableSet, ObservableDict, ObservableTuple
306 | weather_conditions = ObservableList(['Sunny', 'Rainy', 'Cloudy', 'Windy',
307 | 'Sunny', 'Cloudy'])
308 | OBSERVABLE_COLLECTOR.append(weather_conditions)
309 |
310 |
311 | def get_weather(city):
312 | return weather_conditions.pop()
313 |
314 | ```
315 |
316 | > [!IMPORTANT]
317 | > We now have a new directory, automatically created, that mirrors the original directory. This new directory includes all the contents of the original, plus extra code for analyzing your code during runtime. Instead of running the original 'app.py', you should now run 'app.py' that resides inside 'app_dir_transformed/'. Rest assured, everything from 'app_dir' is retained in 'app_dir_transformed/'.
318 |
319 | ### 4. Running the Transformed Code
320 |
321 | ```bash
322 | (venv) root@devs04:~/python_demo/app_dir_transformed> python3 app.py
323 | Enter a city name: Pristina
324 | Weather in Pristina: Cloudy
325 | Average temperature: 19.545454545454547 degrees Celsius
326 | ╭─────────────────────────────────────────────────────────────────────────────────────╮
327 | │ 3 | Suggestions(/root/python_demo/app_dir_transformed/temperature.py): │
328 | │ [*] Consider using an array.array instead of a list, for optimal memory │
329 | │ consumption │
330 | ╰─────────────────────────────────────────────────────────────────────────────────────╯
331 | ```
332 |
333 | # 📁 Directory Structure
334 | ```bash
335 | .
336 | ├── LICENSE
337 | ├── README.md #main readme file. The one you are currently reading.
338 | ├── VERSION #version of pyggester
339 | ├── contributing.md
340 | ├── pyggester # directory containing the full source code of pyggester
341 | │ ├── __init__.py
342 | │ ├── cli.py #defines the typer cli structure(command & options)
343 | │ ├── command_handlers.py #Handles subcommands and every option variation per subcommand.
344 | │ ├── data #data/config files related to pyggester.
345 | │ │ └── help_files #build in help files for the pyggester cli
346 | │ │ ├── __init__.py
347 | │ │ ├── transform_helper.md #detailed built-in documentation for the transform subcommand of pyggest
348 | │ │ └── static_helper.md #detailed built-in documentation for the static subcommand of pyggest
349 | │ ├── helpers.py #helper functions to be used by other modules
350 | │ ├── main.py #The entry point of pyggest execution. Initializes the typer cli app and prints the ascii logo of pyggester
351 | │ ├── message_handler.py #Manages how the collected messages will be printed to the user.
352 | │ ├── module_importer.py #Contains the mechanism to automatically import observables
353 | │ ├── observable_collector.py #Contains the list that will be used to collect all observables.
354 | │ ├── observable_transformations.py #Contains the mechanism that will automatically add code that collects observables and glues together all ast modules
355 | │ ├── observables.py #Contains all the defined observables(enhanced version of python collections)
356 | │ ├── pyggester.py #The 'engine' of pyggester. This module glues everything together
357 | │ ├── text_formatters.py #Contains text formatters, to beautify text in stdout.
358 | │ └── wrappers.py #Contains the mechanism that wrap each observable.
359 | ├── pyggester_abstract_execution_flow.png
360 | ├── pyggester_logo.png
361 | ├── pytest.ini #pytest config file
362 | ├── requirements.txt #Every pyggester dependecy resides here
363 | ├── setup.py #Creates the pyggester pacakge and defines pyggest as the entry point command to execute pyggester
364 | └── tests
365 | ├── __init__.py
366 | ├── test_cli.py
367 | ├── test_command_handlers.py
368 | ├── test_file.py
369 | ├── test_file_transformed.py
370 | ├── test_helpers.py
371 | ├── test_main.py
372 | ├── test_message_handler.py
373 | ├── test_module_importer.py
374 | ├── test_observable_transformations.py
375 | ├── test_observables.py
376 | ├── test_pyggester.py
377 | └── test_wrappers.py
378 | ```
379 | # Abstract Execution Flow
380 |
381 | The following flow diagram illustrates key components of Pyggester and provides a comprehensive overview of the execution sequence.
382 |
383 | 
384 |
385 |
386 | # 👥 Contribution
387 |
388 | To contribute to this project, please refer to the comprehensive [contribution guide](contributing.md) for detailed instructions and best practices.
389 |
390 | # ©️ License
391 |
392 | MIT License
393 |
394 | Copyright (c) 2023 ValdonVitijaa
395 |
396 | Permission is hereby granted, free of charge, to any person obtaining a copy
397 | of this software and associated documentation files (the "Software"), to deal
398 | in the Software without restriction, including without limitation the rights
399 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
400 | copies of the Software, and to permit persons to whom the Software is
401 | furnished to do so, subject to the following conditions:
402 |
403 | The above copyright notice and this permission notice shall be included in all
404 | copies or substantial portions of the Software.
405 |
406 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
407 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
408 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
409 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
410 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
411 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
412 | SOFTWARE.
413 |
414 |
415 |
--------------------------------------------------------------------------------
/pyggester/wrappers.py:
--------------------------------------------------------------------------------
1 | from _ast import AST, Assert, Assign, ClassDef, Expr, Module, Tuple
2 | import ast
3 | import inspect
4 | from astor import to_source
5 | from typing import Any, ClassVar, Tuple, Union, Set
6 | import pathlib
7 | from pyggester.helpers import source_code_to_str
8 | from pyggester.module_importer import add_imports
9 |
10 |
11 | # ----------------------------------------------------------
12 |
13 | # The following wrappers are used for built-in standard python data structures.
14 | # List of standard python data structures:
15 |
16 | # list -> [] or list()
17 | # dict -> {} or dict()
18 | # set -> {} or set()
19 | # tuple -> () or tuple()
20 |
21 | # These datastructures can be directly derived to create a single wrappers that
22 | # can wrap the original datastructure declarations without changing their core
23 | # behaviour
24 |
25 |
26 | # ----------------------------------------------------------
27 |
28 |
29 | class ObservableListWrapper(ast.NodeTransformer):
30 | """AST transformer to wrap lists with ObservableList."""
31 |
32 | __slots__: Tuple[str] = ()
33 |
34 | def visit_List(self, node: ast.List) -> Union[ast.Call, ast.AST]:
35 | """
36 | Transform a List node to an ObservableList node.
37 |
38 | Args:
39 | node (ast.List): The original List node.
40 |
41 | Returns:
42 | Union[ast.Call, ast.AST]: The transformed node.
43 | """
44 | return ast.Call(
45 | func=ast.Name(id="ObservableList", ctx=ast.Load()), args=[node], keywords=[]
46 | )
47 |
48 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]:
49 | if isinstance(node.func, ast.Name) and node.func.id == "list":
50 | return ast.Call(
51 | func=ast.Name(id="ObservableList", ctx=ast.Load()),
52 | args=[node],
53 | keywords=[],
54 | )
55 | return node
56 |
57 |
58 | class ObservableDictWrapper(ast.NodeTransformer):
59 | """AST transformer to wrap dicts with ObservableDict."""
60 |
61 | __slots__: Tuple[str] = ()
62 |
63 | def visit_Dict(self, node: ast.Dict) -> Union[ast.Call, ast.AST]:
64 | """
65 | Transform a Dict node to an ObservableDict node.
66 |
67 | Args:
68 | node (ast.Dict): The original Dict node.
69 |
70 | Returns:
71 | Union[ast.Call, ast.AST]: The transformed node.
72 | """
73 | return ast.Call(
74 | func=ast.Name(id="ObservableDict", ctx=ast.Load()), args=[node], keywords=[]
75 | )
76 |
77 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]:
78 | if isinstance(node.func, ast.Name) and node.func.id == "dict":
79 | return ast.Call(
80 | func=ast.Name(id="ObservableDict", ctx=ast.Load()),
81 | args=[node],
82 | keywords=[],
83 | )
84 | return node
85 |
86 |
87 | class ObservableTupleWrapper(ast.NodeTransformer):
88 | """AST transformer to wrap tuples with ObservableTuple."""
89 |
90 | __slots__: Tuple[str] = ()
91 |
92 | def visit_Tuple(self, node: ast.Tuple) -> Union[ast.Call, ast.AST]:
93 | """
94 | Transform a Tuple node to an ObservableTuple node.
95 |
96 | Args:
97 | node (ast.Tuple): The original Tuple node.
98 |
99 | Returns:
100 | Union[ast.Call, ast.AST]: The transformed node.
101 | """
102 | return ast.Call(
103 | func=ast.Name(id="ObservableTuple", ctx=ast.Load()),
104 | args=[node],
105 | keywords=[],
106 | )
107 |
108 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]:
109 | if isinstance(node.func, ast.Name) and node.func.id == "tuple":
110 | return ast.Call(
111 | func=ast.Name(id="ObservableTuple", ctx=ast.Load()),
112 | args=[node],
113 | keywords=[],
114 | )
115 | return node
116 |
117 |
118 | class ObservableSetWrapper(ast.NodeTransformer):
119 | """AST transformer to wrap tuples with ObservableTuple."""
120 |
121 | __slots__: Tuple[str] = ()
122 |
123 | def visit_Set(self, node: ast.Set) -> Union[ast.Call, ast.AST]:
124 | """
125 | Transform a Set node to an ObservableSet node.
126 |
127 | Args:
128 | node (ast.Set): The original Tuple node.
129 |
130 | Returns:
131 | Union[ast.Call, ast.AST]: The transformed node.
132 | """
133 | return ast.Call(
134 | func=ast.Name(id="ObservableSet", ctx=ast.Load()),
135 | args=[node],
136 | keywords=[],
137 | )
138 |
139 | def visit_Call(self, node: ast.Call) -> Union[ast.Call, ast.AST]:
140 | if isinstance(node.func, ast.Name) and node.func.id == "set":
141 | return ast.Call(
142 | func=ast.Name(id="ObservableSet", ctx=ast.Load()),
143 | args=[node],
144 | keywords=[],
145 | )
146 | return node
147 |
148 |
149 | # ----------------------------------------------------------
150 |
151 | # The following wrappers are part of the collections built-in python module.
152 | # List of all container datatypes:
153 |
154 | # ChainMap
155 | # Counter
156 | # OrderedDict
157 | # UserDict
158 | # UserList
159 | # UserString
160 | # defaultdict
161 | # deque
162 | # namedtuple
163 |
164 | # These collections cannot be directly derived to create a single wrapper that
165 | # can wrap the original datastructure declarations without changing its core
166 | # behaviour
167 |
168 | # ----------------------------------------------------------
169 |
170 |
171 | class ObservableNamedTupleWrapper(ast.NodeTransformer):
172 | """AST transformer to wrap namedtuples with ObservableNamedTuple."""
173 |
174 | class NamedTupleVisitor(ast.NodeVisitor):
175 | """
176 | NamedTuple visitor to be used internally only by the outer-class.
177 | The purpose of this class is specifically to get all namedtuple instances
178 | in the current module being analyzed
179 | """
180 |
181 | def __init__(self) -> None:
182 | self.namedtuple_instances = set()
183 |
184 | def visit_Assign(self, node: ast.Assign) -> Any:
185 | """
186 | Visit each Assign node, because namedtuple declaration are all
187 | Assign nodes in the python's ast.
188 | """
189 | if getattr(node, "value") and isinstance(node.value, ast.Call):
190 | if getattr(node.value, "func"):
191 | if isinstance(node.value.func, ast.Name):
192 | if node.value.func.id == "namedtuple":
193 | for target in node.targets:
194 | if isinstance(target, ast.Name):
195 | self.namedtuple_instances.add(target.id)
196 |
197 | def __init__(self, tree) -> None:
198 | """
199 | Immediatly initialize the tuple visitor and collect all namedtuple constructor declarations.
200 | """
201 | self.namedtuple_visitor = self.NamedTupleVisitor()
202 | self.namedtuple_visitor.visit(tree)
203 | self.modified_nodes = []
204 |
205 | def visit_Assign(self, node: ast.Assign) -> Any:
206 | """
207 | Now visit each Assign node and check if that node is a namedtuple instance of a collected
208 | type by NamedTupleVisitor. If thats the case, wrap each instance into an ObservableNamedTupleWrapper,
209 | so that we can analyze its internal structure for potential suggestions.
210 | """
211 | if getattr(node, "value") and isinstance(node.value, ast.Call):
212 | if getattr(node.value, "func"):
213 | if isinstance(node.value.func, ast.Name):
214 | if (
215 | node.value.func.id
216 | in self.namedtuple_visitor.namedtuple_instances
217 | ):
218 | for target in node.targets:
219 | if isinstance(target, ast.Name):
220 | wrapper_code = f"{target.id}_wrapper = ObservableNamedTuple(*{target.id})"
221 | wrapper_node = ast.parse(wrapper_code).body[0]
222 | return [node, wrapper_node]
223 | return node
224 |
225 |
226 | # ----------------------------------------------------------
227 |
228 | # The following wrappers are third party libraries.
229 | # List of all supported third party datatypes:
230 |
231 | # NumPy Arrays
232 | # Pandas
233 | # Polars(soon to be supported)
234 | # More to be added
235 |
236 | # ----------------------------------------------------------
237 |
238 | # TODO MIGHT MERGE THE FOLLOWING TWO CLASSES TOGETHER, BECAUSE OF A LOT OF
239 | # CODE REPETITIONS, OR MAYBE AN ABSTRACT CLASS THAT WILL REQUIRE BOTH OF THEM
240 | # TO IMPLEMENT SOME SPECIFIC, WHILE OFFERING PRE-IMPLEMENTED FEATURES
241 |
242 |
243 | class ObservableNumpyArrayWrapper(ast.NodeTransformer):
244 | """AST transformer to wrap NumPy array instances with ObservableNumpyArray."""
245 |
246 | class NumpyImportsVisitor(ast.NodeVisitor):
247 | def __init__(self):
248 | self.alias_name = None
249 | self.alias_asname = None
250 |
251 | def visit_Import(self, node):
252 | """
253 | Check numpy imports, because we need to determine how to
254 | wrap the initiated array instances
255 |
256 | [*] import numpy
257 | [*] import numpy as np
258 | [*] import numpy as 'alias'
259 | """
260 | for name in node.names:
261 | if name.name == "numpy":
262 | self.alias_name = name.name
263 | if name.name == "numpy" and getattr(name, "asname"):
264 | self.alias_asname = name.asname
265 |
266 | def visit_ImportFrom(self, node):
267 | """
268 | Check 'from' numpy imports, because we need to determine how to wrao
269 | the initiated array instances
270 |
271 | [*] from numpy import array
272 | [*] from numpy import array as arr
273 | [*] from numpy import ones
274 | ...
275 | """
276 | if node.module == "numpy":
277 | for name in node.names:
278 | if name.name in ["array", "zeros", "ones", "empty"]:
279 | self.alias_name = name.name
280 | if getattr(name, "asname"):
281 | self.alias_asname = name.asname
282 |
283 | def __init__(self, tree) -> None:
284 | self.imports_visitor = self.NumpyImportsVisitor()
285 | self.imports_visitor.visit(tree)
286 |
287 | def visit_Assign(self, node: ast.Assign) -> ast.AST:
288 | """
289 | Now visit each Assign node and check if that node is a numpy array instance. If thats the case, wrap each instance into an ObservableNumpyArray,
290 | so that we can analyze its internal structure for potential suggestions.
291 | """
292 | if getattr(node, "value") and isinstance(node.value, ast.Call):
293 | if getattr(node.value, "func"):
294 | if isinstance(node.value.func, ast.Name):
295 | id_ = self.get_alias_name()
296 | if node.value.func.id == id_:
297 | return self.wrap_numpy_array(node)
298 |
299 | elif isinstance(node.value.func, ast.Attribute):
300 | id_ = self.get_alias_name()
301 | if node.value.func.value.id == id_:
302 | return self.wrap_numpy_array(node)
303 |
304 | return node
305 |
306 | def get_alias_name(self):
307 | return self.imports_visitor.alias_asname or self.imports_visitor.alias_name
308 |
309 | def wrap_numpy_array(self, node):
310 | wrapper_code = f"{node.targets[0].id}_numpy_wrapper = ObservableNumpyArray({node.targets[0].id})"
311 | wrapper_node = ast.parse(wrapper_code).body[0]
312 | return [node, wrapper_node]
313 |
314 |
315 | class ObservablePandasDataFrameWrapper(ast.NodeTransformer):
316 | """AST transformer to wrap Pandas DataFrame instances with ObservablePandasDataFrame"""
317 |
318 | class PandasImportsVisitor(ast.NodeVisitor):
319 | def __init__(self):
320 | self.alias_name = None
321 | self.alias_asname = None
322 |
323 | def visit_Import(self, node):
324 | """
325 | Check numpy imports, because we need to determine how to
326 | wrap the initiated array instances
327 |
328 | [*] import pandas
329 | [*] import pandas as pd
330 | [*] import pandas as 'alias'
331 | """
332 | for name in node.names:
333 | if name.name == "pandas":
334 | self.alias_name = name.name
335 | if name.name == "pandas" and getattr(name, "asname"):
336 | self.alias_asname = name.asname
337 |
338 | def visit_ImportFrom(self, node):
339 | """
340 | Check 'from' pandas imports, because we need to determine how to wrap
341 | the initiated DataFrame instances
342 |
343 | [*] from pandas import DataFrame
344 | ...
345 | """
346 | if node.module == "pandas":
347 | for name in node.names:
348 | # Using a list, because we might add some other consturct. Most likely not but..
349 | if name.name in ["DataFrame"]:
350 | self.alias_name = name.name
351 | if getattr(name, "asname"):
352 | self.alias_asname = name.asname
353 |
354 | def __init__(self, tree) -> None:
355 | self.imports_visitor = self.PandasImportsVisitor()
356 | self.imports_visitor.visit(tree)
357 |
358 | def visit_Assign(self, node: ast.Assign) -> ast.AST:
359 | if getattr(node, "value") and isinstance(node.value, ast.Call):
360 | if getattr(node.value, "func"):
361 | if isinstance(node.value.func, ast.Name):
362 | id_ = self.get_alias_name()
363 | if node.value.func.id == id_:
364 | return self.wrap_numpy_array(node)
365 |
366 | elif isinstance(node.value.func, ast.Attribute):
367 | id_ = self.get_alias_name()
368 | if node.value.func.value.id == id_:
369 | return self.wrap_numpy_array(node)
370 |
371 | return node
372 |
373 | def get_alias_name(self):
374 | return self.imports_visitor.alias_asname or self.imports_visitor.alias_name
375 |
376 | def wrap_numpy_array(self, node):
377 | wrapper_code = f"{node.targets[0].id}_pandas_wrapper = ObservablePandasDataFrame({node.targets[0].id})"
378 | wrapper_node = ast.parse(wrapper_code).body[0]
379 | return [node, wrapper_node]
380 |
381 |
382 | class WrapperCollector(ast.NodeVisitor):
383 | """
384 | AST visitor to collect class names that are wrappers.
385 | """
386 |
387 | __slots__: Tuple[str] = ("observables",)
388 |
389 | def __init__(self) -> None:
390 | self.observables: Set[str] = set()
391 |
392 | def visit_ClassDef(self, node: ast.ClassDef) -> Any:
393 | """
394 | Visit a ClassDef node.
395 |
396 | If the class name is not the same as the WrapperCollector class name,
397 | add the class name to the observables set, because this class is only used to automatically
398 | collector ObservableWrappers.
399 |
400 | Args:
401 | node (ast.ClassDef): The ClassDef node to visit.
402 | """
403 | if node.name != self.__class__.__name__:
404 | self.observables.add(node.name.split("Wrapper")[0])
405 |
406 |
407 | def get_wrappers_as_strings() -> Set[str]:
408 | """
409 | Get observable wrappers as a set of strings.
410 | This will be used by module importer to import these wrappers in each module selected for
411 | transformation
412 |
413 | """
414 | wrapper_visitor = WrapperCollector()
415 | wrapper_visitor.visit(ast.parse(source_code_to_str(path=pathlib.Path(__file__))))
416 | return wrapper_visitor.observables
417 |
418 |
419 | WRAPPERS = {
420 | "standard_containers": {
421 | "list": ObservableListWrapper,
422 | "dict": ObservableDictWrapper,
423 | "set": ObservableSetWrapper,
424 | "tuple": ObservableTupleWrapper,
425 | },
426 | "collector_containers": {"namedtuple": ObservableNamedTupleWrapper},
427 | "third_party": {
428 | "numpy_array": ObservableNumpyArrayWrapper,
429 | "pandas_dataframe": ObservablePandasDataFrameWrapper,
430 | # "pandas_series": ObservablePandasSeriesWrapper,
431 | },
432 | }
433 |
434 |
435 | def apply_wrappers(tree: ast.AST) -> ast.AST:
436 | """
437 | Function that offers api wrapper functionality.
438 | This function takes the source code as a string and soley based on that does automatic
439 | code transformations.
440 | First of all it adds imports at the top of the module for ObservableWrappers
441 | """
442 | for _, wrapper in WRAPPERS["standard_containers"].items():
443 | tree = wrapper().visit(tree)
444 | for _, wrapper in WRAPPERS["collector_containers"].items():
445 | tree = wrapper(tree).visit(tree)
446 | for _, wrapper in WRAPPERS["third_party"].items():
447 | tree = wrapper(tree).visit(tree)
448 |
449 | return tree
450 |
--------------------------------------------------------------------------------
/tests/test_observables.py:
--------------------------------------------------------------------------------
1 | from collections import namedtuple
2 | import numpy
3 | import pandas as pd
4 | from pyggester.observables import (
5 | ObservableList,
6 | ObservableDict,
7 | ObservableNamedTuple,
8 | ObservableNumpyArray,
9 | ObservablePandasDataFrame,
10 | ObservableSet,
11 | ObservableTuple,
12 | )
13 |
14 |
15 | def test_different_ways_of_list_initialization():
16 | assert isinstance(ObservableList([1, 2, 3]), list)
17 | assert isinstance(ObservableList(list([1, 2, 3])), list)
18 |
19 |
20 | def test_original_list_behavior():
21 | obs_list = ObservableList([1, 2, 3])
22 | regular_list = [1, 2, 3]
23 |
24 | assert obs_list == regular_list
25 | assert len(obs_list) == len(regular_list)
26 | assert obs_list[1] == regular_list[1]
27 | assert obs_list[1:3] == regular_list[1:3]
28 | assert list(obs_list) == regular_list
29 | assert 2 in obs_list
30 | assert 4 not in obs_list
31 | assert obs_list.copy() == regular_list.copy()
32 | obs_list.clear()
33 | regular_list.clear()
34 | assert len(obs_list) == 0
35 | assert len(regular_list) == 0
36 | obs_list.extend([4, 5, 6])
37 | regular_list.extend([4, 5, 6])
38 | assert obs_list == regular_list
39 | assert obs_list.pop() == regular_list.pop()
40 | assert obs_list == regular_list
41 | obs_list.remove(4)
42 | regular_list.remove(4)
43 | assert obs_list == regular_list
44 | obs_list.reverse()
45 | regular_list.reverse()
46 | assert obs_list == regular_list
47 | obs_list.sort()
48 | regular_list.sort()
49 | assert obs_list == regular_list
50 |
51 |
52 | def check_observable_list_additional_attributes_test():
53 | obs_list = ObservableList([1, 2, 3])
54 | assert hasattr(obs_list, "appended")
55 | assert hasattr(obs_list, "extended")
56 | assert hasattr(obs_list, "inserted")
57 | assert hasattr(obs_list, "removed")
58 | assert hasattr(obs_list, "count_")
59 | assert hasattr(obs_list, "in_operator_used")
60 | assert hasattr(obs_list, "message_handler")
61 |
62 |
63 | def test_check_numpy_array_instead_of_list():
64 | obs_list = ObservableList([[1, 2], [3, 4]])
65 | obs_list.check_numpy_array_instead_of_list()
66 |
67 | assert (
68 | "Consider using a numpy array instead of a list, for faster computations and optimized memory utilization"
69 | in obs_list.message_handler.messages
70 | )
71 |
72 | obs_list = ObservableList([1, 2, 3])
73 | obs_list.check_numpy_array_instead_of_list()
74 | assert (
75 | "Consider using a numpy array instead of a list, for faster computations and optimized memory utilization"
76 | not in obs_list.message_handler.messages
77 | )
78 |
79 |
80 | def test_check_array_instead_of_list():
81 | obs_list = ObservableList([1, 2, 3])
82 | obs_list.check_array_instead_of_list()
83 | assert (
84 | "Consider using an array.array instead of a list, for optimal memory consumption"
85 | in obs_list.message_handler.messages
86 | )
87 |
88 | obs_list = ObservableList(["a", "b", "c"])
89 | obs_list.check_array_instead_of_list()
90 | assert (
91 | "Consider using an array.array instead of a list, for optimal memory consumption"
92 | in obs_list.message_handler.messages
93 | )
94 |
95 |
96 | def test_check_list_to_set_conversion():
97 | obs_list = ObservableList([1, 2, 2, 3])
98 | result = obs_list.check_list_to_set_conversion()
99 | assert not result
100 |
101 | obs_list = ObservableList([1, 2, 3])
102 | result = obs_list.check_list_to_set_conversion()
103 | assert result
104 |
105 |
106 | def test_check_set_instead_of_list():
107 | obs_list = ObservableList([1, 2, 3])
108 | obs_list.in_operator_used = True
109 | obs_list.check_set_instead_of_list()
110 | assert (
111 | "Consider using a set instead of a list, because of unique elements and element existence checking"
112 | in obs_list.message_handler.messages
113 | )
114 |
115 | obs_list = ObservableList([1, 2, 3])
116 | obs_list.in_operator_used = False
117 | obs_list.check_set_instead_of_list()
118 | assert (
119 | "Consider using a set instead of a list, because of unique elements"
120 | in obs_list.message_handler.messages
121 | )
122 |
123 |
124 | def test_check_Counter_insteaf_of_list():
125 | obs_list = ObservableList([1, 2, 2, 3])
126 | obs_list.count_ = True
127 | obs_list.check_Counter_insteaf_of_list()
128 | assert (
129 | "Consider using a collections.Counter, to count occurences of elements"
130 | in obs_list.message_handler.messages
131 | )
132 |
133 | obs_list = ObservableList([1, 2, 3])
134 | obs_list.count_ = False
135 | obs_list.check_Counter_insteaf_of_list()
136 | assert (
137 | "Consider using a collections.Counter, to count occurences of elements"
138 | not in obs_list.message_handler.messages
139 | )
140 |
141 |
142 | def test_check_tuple_instead_of_list():
143 | obs_list = ObservableList(["A", "B", "C"])
144 | obs_list.appended = False
145 | obs_list.extended = False
146 | obs_list.removed = False
147 | obs_list.inserted = False
148 | obs_list.check_tuple_instead_of_list()
149 | assert (
150 | "Consider using a tuple since all elements seem to be constants, because the list was never modified"
151 | in obs_list.message_handler.messages
152 | )
153 |
154 | obs_list = ObservableList(["a", "b", "c"])
155 | obs_list.appended = True
156 | obs_list.extended = True
157 | obs_list.removed = True
158 | obs_list.inserted = True
159 | obs_list.check_tuple_instead_of_list()
160 | assert (
161 | "Consider using a tuple since all elements seem to be constants, because the list was never modified"
162 | not in obs_list.message_handler.messages
163 | )
164 |
165 |
166 | def test_different_ways_of_set_initialization():
167 | assert isinstance(ObservableSet({1, 2, 3}), set)
168 | assert isinstance(ObservableSet(set({1, 2, 3})), set)
169 |
170 |
171 | def test_original_set_behavior():
172 | obs_set = ObservableSet({1, 2, 3})
173 | regular_set = {1, 2, 3}
174 | assert obs_set == regular_set
175 | assert len(obs_set) == len(regular_set)
176 | assert set(obs_set) == regular_set
177 | assert 2 in obs_set
178 | assert 4 not in obs_set
179 | obs_set.add(4)
180 | regular_set.add(4)
181 | assert obs_set == regular_set
182 | obs_set.discard(3)
183 | regular_set.discard(3)
184 | assert obs_set == regular_set
185 | obs_set.clear()
186 | regular_set.clear()
187 | assert len(obs_set) == 0
188 | assert len(regular_set) == 0
189 | obs_set.update({4, 5, 6})
190 | regular_set.update({4, 5, 6})
191 | assert obs_set == regular_set
192 | obs_set.remove(4)
193 | regular_set.remove(4)
194 | assert obs_set == regular_set
195 | popped_obs = obs_set.pop()
196 | popped_regular = regular_set.pop()
197 | assert popped_obs == popped_regular
198 | diff_obs = obs_set.difference({5, 6})
199 | diff_regular = regular_set.difference({5, 6})
200 | assert diff_obs == diff_regular
201 | union_obs = obs_set.union({6, 7})
202 | union_regular = regular_set.union({6, 7})
203 | assert union_obs == union_regular
204 | intersection_obs = obs_set.intersection({5, 6, 7})
205 | intersection_regular = regular_set.intersection({5, 6, 7})
206 | assert intersection_obs == intersection_regular
207 | sym_diff_obs = obs_set.symmetric_difference({6, 7, 8})
208 | sym_diff_regular = regular_set.symmetric_difference({6, 7, 8})
209 | assert sym_diff_obs == sym_diff_regular
210 |
211 |
212 | def check_observable_set_additional_attributes_test():
213 | obs_set = ObservableSet({1, 2, 3})
214 | assert hasattr(obs_set, "poped")
215 | assert hasattr(obs_set, "removed")
216 | assert hasattr(obs_set, "added")
217 | assert hasattr(obs_set, "updated")
218 | assert hasattr(obs_set, "if_it_was_a_list")
219 | assert hasattr(obs_set, "message_handler")
220 |
221 |
222 | def test_check_frozenset_instead_of_set():
223 | obs_set = ObservableSet({1, 2, 3})
224 | obs_set.check_frozenset_instead_of_set()
225 | assert (
226 | "Consider using a frozenset, because no modification operation has been used on set."
227 | in obs_set.message_handler.messages
228 | )
229 |
230 |
231 | def test_check_list_instead_of_set():
232 | obs_set = ObservableSet({})
233 | obs_set.add(1)
234 | obs_set.add(1)
235 | obs_set.add(1)
236 | obs_set.check_list_instead_of_set()
237 | assert (
238 | "If you inteded to keep duplicates use a list instead, because we noticed a lot of duplicates entered the set"
239 | in obs_set.message_handler.messages
240 | )
241 |
242 |
243 | def test_different_ways_of_tuple_initialization():
244 | assert isinstance(ObservableTuple((1, 2, 3)), tuple)
245 | assert isinstance(ObservableTuple(tuple([1, 2, 3])), tuple)
246 |
247 |
248 | def test_original_tuple_behavior():
249 | obs_tuple = ObservableTuple((1, 2, 3))
250 | regular_tuple = (1, 2, 3)
251 | assert obs_tuple == regular_tuple
252 | assert len(obs_tuple) == len(regular_tuple)
253 | assert obs_tuple[1] == regular_tuple[1]
254 | assert obs_tuple[1:3] == regular_tuple[1:3]
255 | assert tuple(obs_tuple) == regular_tuple
256 | assert 2 in obs_tuple
257 | assert 4 not in obs_tuple
258 | concat_obs = obs_tuple + (4, 5)
259 | concat_regular = regular_tuple + (4, 5)
260 | assert concat_obs == concat_regular
261 | repeat_obs = obs_tuple * 2
262 | repeat_regular = regular_tuple * 2
263 | assert repeat_obs == repeat_regular
264 |
265 |
266 | def check_observable_tuple_additional_attributes_test():
267 | obs_tuple = ObservableTuple((1, 2, 3))
268 | assert hasattr(obs_tuple, "mul_")
269 | assert hasattr(obs_tuple, "message_handler")
270 |
271 |
272 | def test_check_mutable_inside_tuple():
273 | obs_tuple = ObservableTuple((1, [2, 3], 4))
274 | obs_tuple.check_mutable_inside_tuple()
275 | assert (
276 | "Mutable element inside of a tuple. Consider using only immutables for optimal performance"
277 | in obs_tuple.message_handler.messages
278 | )
279 |
280 |
281 | def test_check_set_instead_of_tuple():
282 | obs_tuple = ObservableTuple((1, 2, 3))
283 | obs_tuple.check_set_instead_of_tuple()
284 | assert (
285 | "Consider using a set since elements are all unique"
286 | in obs_tuple.message_handler.messages
287 | )
288 |
289 |
290 | def test_check_tuple_multiplication():
291 | obs_tuple = ObservableTuple((1, 2, 3))
292 | _ = obs_tuple * 2
293 | obs_tuple.check_tuple_multiplication()
294 | assert (
295 | "You multipled the tuple with a scalar value. If you inteded to multiply each element by that value, use a numpy array instead of a tuple."
296 | in obs_tuple.message_handler.messages
297 | )
298 |
299 |
300 | def test_different_ways_of_dict_initialization():
301 | assert isinstance(ObservableDict({"a": 1, "b": 2, "c": 3}), dict)
302 | assert isinstance(ObservableDict(dict({"a": 1, "b": 2, "c": 3})), dict)
303 |
304 |
305 | def test_original_dict_behavior():
306 | obs_dict = ObservableDict({"a": 1, "b": 2, "c": 3})
307 | regular_dict = {"a": 1, "b": 2, "c": 3}
308 | assert obs_dict == regular_dict
309 | assert len(obs_dict) == len(regular_dict)
310 | assert obs_dict["a"] == regular_dict["a"]
311 | obs_dict["d"] = 4
312 | assert obs_dict == {"a": 1, "b": 2, "c": 3, "d": 4}
313 | del obs_dict["a"]
314 | assert obs_dict == {"b": 2, "c": 3, "d": 4}
315 | regular_dict["d"] = 4
316 | assert regular_dict == {"a": 1, "b": 2, "c": 3, "d": 4}
317 | del regular_dict["a"]
318 | assert regular_dict == {"b": 2, "c": 3, "d": 4}
319 | popped_item_obs = obs_dict.popitem()
320 | popped_item_reg = regular_dict.popitem()
321 | assert popped_item_obs == popped_item_reg
322 | popped_obs = obs_dict.pop("b")
323 | popped_reg = regular_dict.pop("b")
324 | assert popped_obs == popped_reg
325 | obs_dict.update({"e": 5})
326 | print(obs_dict)
327 | assert obs_dict == {"c": 3, "e": 5}
328 | regular_dict.update({"e": 5})
329 | assert regular_dict == {"c": 3, "e": 5}
330 | copy_obs = obs_dict.copy()
331 | assert copy_obs == obs_dict
332 | assert set(obs_dict.keys()) == set(regular_dict.keys())
333 | assert set(obs_dict.values()) == set(regular_dict.values())
334 | assert set(obs_dict.items()) == set(regular_dict.items())
335 |
336 |
337 | def check_observable_dict_additional_attributes_test():
338 | obs_dict = ObservableDict({"a": 1, "b": 2, "c": 3})
339 | assert hasattr(obs_dict, "keys_")
340 | assert hasattr(obs_dict, "update_")
341 | assert hasattr(obs_dict, "setitem_")
342 | assert hasattr(obs_dict, "delitem_")
343 | assert hasattr(obs_dict, "getitem_")
344 | assert hasattr(obs_dict, "pop_")
345 | assert hasattr(obs_dict, "items_")
346 | assert hasattr(obs_dict, "clear_")
347 | assert hasattr(obs_dict, "values_")
348 | assert hasattr(obs_dict, "message_handler")
349 |
350 |
351 | def test_check_Counter_instead_of_dict():
352 | obs_dict = ObservableDict(a=1, b=2, c=3)
353 | obs_dict.check_Counter_instead_of_dict()
354 | assert (
355 | "If you are using this dict to store occurences of elements, consider using a collections.Counter"
356 | in obs_dict.message_handler.messages
357 | )
358 |
359 |
360 | def test_check_dict_get_method():
361 | obs_dict = ObservableDict(a=1, b=2, c=3)
362 | _ = obs_dict["a"]
363 | obs_dict.check_dict_get_method()
364 | assert (
365 | "For dict key retreval, always consider using 'your_dict'.get('key') instead of 'your_dict'['key']"
366 | in obs_dict.message_handler.messages
367 | )
368 |
369 |
370 | def test_check_list_instead_of_dict():
371 | obs_dict = ObservableDict(a=1, b=2, c=3)
372 | _ = obs_dict.values()
373 | obs_dict.check_list_instead_of_dict()
374 | assert (
375 | "It seems like you never used this dict for anything otherthan somehow using the values, use a list/array"
376 | in obs_dict.message_handler.messages
377 | )
378 |
379 |
380 | def test_check_array_data_type():
381 | arr = numpy.array([1, 2, 3], dtype=numpy.int64)
382 | obs_array = ObservableNumpyArray(arr)
383 | obs_array.check_array_data_type()
384 | assert (
385 | "Array was initiated with int64 integers, but values do not exceed 3. Consider using uint8 for optimization."
386 | in obs_array.message_handler.messages
387 | )
388 |
389 |
390 | def test_check_array_sparsity():
391 | arr = numpy.array([0, 0, 1, 0, 0, 0, 0, 0, 0, 0])
392 | obs_array = ObservableNumpyArray(arr)
393 | obs_array.check_array_sparsity()
394 | assert (
395 | "The array is highly sparse (sparsity: 90.00%). Consider using a sparse array representation for memory efficiency."
396 | in obs_array.message_handler.messages
397 | )
398 |
399 |
400 | def test_check_for_nan_values():
401 | arr = numpy.array([1, numpy.nan, 3])
402 | obs_array = ObservableNumpyArray(arr)
403 | obs_array.check_for_nan_values()
404 | assert (
405 | "The array contains NaN values. Consider using masked arrays or handling NaN values appropriately."
406 | in obs_array.message_handler.messages
407 | )
408 |
409 |
410 | def test_check_for_monotonicity():
411 | arr = numpy.array([1, 2, 3, 4, 5])
412 | obs_array = ObservableNumpyArray(arr)
413 | obs_array.check_for_monotonicity()
414 | assert (
415 | "The array is monotonic. Consider using specialized algorithms or data structures for monotonic arrays."
416 | in obs_array.message_handler.messages
417 | )
418 |
419 |
420 | def test_check_for_categorical_data():
421 | arr = numpy.array(
422 | ["dog", "cat", "dog", "bird", "dog", "cat", "bird", "bird", "cat"]
423 | )
424 |
425 | obs_array = ObservableNumpyArray(arr)
426 | obs_array.check_for_categorical_data()
427 | assert (
428 | "The array contains categorical data with 3 unique values. Consider using categorical data types for efficiency, like pd.Categorical()"
429 | in obs_array.message_handler.messages
430 | )
431 |
432 |
433 | def test_check_for_symmetry():
434 | arr = numpy.array([[1, 2], [2, 1]])
435 | obs_array = ObservableNumpyArray(arr)
436 | obs_array.check_for_symmetry()
437 | assert (
438 | "The array is symmetric. Consider using specialized algorithms to operate on symmetric arrays, for example functions from scipy"
439 | in obs_array.message_handler.messages
440 | )
441 |
442 |
443 | def test_check_for_constant_values():
444 | arr = numpy.array([1, 1, 1, 1])
445 | obs_array = ObservableNumpyArray(arr)
446 | obs_array.check_for_constant_values()
447 | assert (
448 | "All elements in the array are the same. Consider using a single value, a constant or collections.Counter for memory efficiency."
449 | in obs_array.message_handler.messages
450 | )
451 |
452 |
453 | def test_check_for_missing_values():
454 | df = pd.DataFrame({"A": [1, 2, None], "B": [4, 5, 6]})
455 | observable_df = ObservablePandasDataFrame(df)
456 | observable_df.check_for_missing_values()
457 | assert (
458 | "The DataFrame contains missing values. Consider handling missing values."
459 | in observable_df.message_handler.messages
460 | )
461 |
462 |
463 | def test_check_for_constant_columns():
464 | df = pd.DataFrame({"A": [1, 1, 1], "B": [4, 5, 6]})
465 | observable_df = ObservablePandasDataFrame(df)
466 | observable_df.check_for_constant_columns()
467 | assert (
468 | "The DataFrame contains constant columns (['A']). Consider dropping them for memory efficiency."
469 | in observable_df.message_handler.messages
470 | )
471 |
472 |
473 | def test_check_for_duplicate_rows():
474 | df = pd.DataFrame({"A": [1, 2, 2], "B": [4, 5, 5]})
475 | observable_df = ObservablePandasDataFrame(df)
476 | observable_df.check_for_duplicate_rows()
477 | assert (
478 | "The DataFrame contains duplicate rows. Consider handling duplicate rows appropriately."
479 | in observable_df.message_handler.messages
480 | )
481 |
482 |
483 | def test_check_series_instead_of_dataframe():
484 | df = pd.DataFrame({"A": [1, 2, 3]})
485 | observable_df = ObservablePandasDataFrame(df)
486 | observable_df.check_series_insteafd_of_dataframe()
487 | assert (
488 | "Consider using a Series instead of a DataFrame when you have only one column of data."
489 | in observable_df.message_handler.messages
490 | )
491 |
492 |
493 | def test_check_numpy_instead_of_dataframe():
494 | df = pd.DataFrame({"A": range(15000), "B": range(15000)})
495 | observable_df = ObservablePandasDataFrame(df)
496 | observable_df.check_numpy_instead_of_dataframe()
497 | assert (
498 | "Consider using a NumPy array or a specialized data structure if you have a large number of rows and a small number of columns."
499 | in observable_df.message_handler.messages
500 | )
501 |
502 |
503 | def test_check_for_excessive_nesting():
504 | InnerTuple = namedtuple("InnerTuple", "field1 field2")
505 | OuterTuple = namedtuple("OuterTuple", "inner")
506 | outer_instance = OuterTuple(InnerTuple(1, 2))
507 | observable_tuple = ObservableNamedTuple(outer_instance)
508 | observable_tuple.check_for_excessive_nesting()
509 | assert (
510 | "Avoid excessive nesting of namedtuples to keep the structure simple and readable. Consider usina a class instead"
511 | in observable_tuple.message_handler.messages
512 | )
513 |
514 |
515 | def test_check_for_ignoring_type_annotations():
516 | MyTuple = namedtuple("MyTuple", "field1 field2")
517 | my_tuple_instance = MyTuple(1, 2)
518 | observable_tuple = ObservableNamedTuple(my_tuple_instance)
519 | observable_tuple.check_for_ignoring_type_annotations()
520 | assert (
521 | "Consider using type annotations for field in namedtuples for better documentation."
522 | in observable_tuple.message_handler.messages
523 | )
524 |
525 |
526 | def test_check_for_ignoring_namedtuple_advantages():
527 | ManyFields = namedtuple("ManyFields", " ".join(f"field{i}" for i in range(11)))
528 | many_fields_instance = ManyFields(*(range(11)))
529 | observable_tuple = ObservableNamedTuple(many_fields_instance)
530 | observable_tuple.check_for_ignoring_namedtuple_advantages()
531 | assert (
532 | "Consider using namedtuples for simpler data structures with fewer fields for better readability."
533 | in observable_tuple.message_handler.messages
534 | )
535 |
--------------------------------------------------------------------------------
/pyggester/observables.py:
--------------------------------------------------------------------------------
1 | from _collections_abc import dict_items, dict_keys, dict_values
2 | from typing import List, Tuple, Dict, Any, Iterable
3 | from collections import namedtuple
4 | import numpy
5 | from pyggester.message_handler import MessageHandler
6 | import array
7 | import scipy.sparse as sp
8 | import inspect
9 | from typing import List, Dict, Any, Tuple, Set, NamedTuple
10 |
11 | # TODO MIGHT CONSIDER CREATING AN OBSERVABLE ABSTRACT BASE CLASS,
12 | # TO MAKE EACH OBSERVABLE FOLLOW A SPECIFIC CONTRACT
13 |
14 |
15 | class ObservableList(list):
16 | """
17 | The ObservableList is an enhanced version of a list that
18 | preserves the full original functionality of a list, but
19 | adds more features to it so that we keep track of anything that
20 | potentially happens in order to do dynamic analysis to each declared
21 | list.
22 | """
23 |
24 | __slots__: Tuple[str] = (
25 | "appended",
26 | "extended",
27 | "inserted",
28 | "removed",
29 | "count_",
30 | "in_operator_used",
31 | "message_handler",
32 | )
33 |
34 | def __init__(self, *args, **kwargs) -> None:
35 | super().__init__(*args, **kwargs)
36 | # The following methods keep track of base list methods.
37 | # False if not used(ever), True if used
38 | self.appended: bool = False
39 | self.extended: bool = False
40 | self.inserted: bool = False
41 | self.removed: bool = False
42 | self.count_: bool = False
43 | self.in_operator_used: bool = False
44 | """
45 | Get the context of the current list being analyzed
46 | """
47 | caller_frame = inspect.currentframe().f_back
48 | line_number: int = caller_frame.f_lineno
49 | file_path: str = caller_frame.f_globals["__file__"]
50 |
51 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
52 |
53 | def append(self, item) -> None:
54 | super().append(item)
55 | self.appended = True
56 |
57 | def extend(self, iterable) -> None:
58 | super().extend(iterable)
59 | self.extended = True
60 |
61 | def insert(self, index, item) -> None:
62 | super().insert(index, item)
63 | self.inserted = True
64 |
65 | def remove(self, item) -> None:
66 | super().remove(item)
67 | self.removed = True
68 |
69 | def count(self, __value: Any) -> int:
70 | self.count_ = True
71 | return super().count(__value)
72 |
73 | def __contains__(self, __key: object) -> bool:
74 | self.in_operator_used = True
75 | return super().__contains__(__key)
76 |
77 | def get_list_dimension(self, lst):
78 | """ """
79 | if not isinstance(lst, list):
80 | return 0
81 | else:
82 | inner_dimensions = [self.get_list_dimension(item) for item in lst]
83 | return 1 + max(inner_dimensions, default=0)
84 |
85 | def check_numpy_array_instead_of_list(self):
86 | """ """
87 | try:
88 | if self.get_list_dimension(self) >= 2:
89 | numpy.array(self)
90 | self.message_handler.messages.append(
91 | "Consider using a numpy array instead of a list, for faster computations and optimized memory utilization"
92 | )
93 | except Exception:
94 | pass
95 |
96 | def check_array_instead_of_list(self):
97 | if self.can_list_be_converted_to_array():
98 | self.message_handler.messages.append(
99 | "Consider using an array.array instead of a list, for optimal memory consumption"
100 | )
101 |
102 | def can_list_be_converted_to_array(self):
103 | """
104 | Check if the list can be converted to an array.
105 |
106 | Returns:
107 | bool: True if the list can be converted, False otherwise.
108 | """
109 | if all(isinstance(item, int) for item in self):
110 | return True
111 | elif all(isinstance(item, float) for item in self):
112 | return True
113 | elif all(isinstance(item, str) and len(item) == 1 for item in self):
114 | try:
115 | array.array("u", self)
116 | return True
117 | except ValueError:
118 | return False
119 | else:
120 | return False
121 |
122 | def check_list_to_set_conversion(self):
123 | """
124 | Check if the list can be converted to a set.
125 |
126 | Returns:
127 | bool: True if the list can be converted, False otherwise.
128 | """
129 | if self.get_list_dimension(self) == 1:
130 | if len(self) == len(set(list(self))):
131 | return True
132 | return False
133 |
134 | def check_set_instead_of_list(self):
135 | if self.check_list_to_set_conversion():
136 | if self.in_operator_used:
137 | self.message_handler.messages.append(
138 | "Consider using a set instead of a list, because of unique elements and element existence checking"
139 | )
140 | else:
141 | self.message_handler.messages.append(
142 | "Consider using a set instead of a list, because of unique elements"
143 | )
144 |
145 | def check_Counter_insteaf_of_list(self):
146 | if self.count_:
147 | self.message_handler.messages.append(
148 | "Consider using a collections.Counter, to count occurences of elements"
149 | )
150 |
151 | def check_tuple_instead_of_list(self):
152 | all__ = []
153 | for x in self:
154 | if isinstance(x, str):
155 | if x.isupper() or x[0].isupper():
156 | all__.append(True)
157 |
158 | if len(all__) == len(self) and not any(
159 | [self.appended, self.extended, self.removed, self.inserted]
160 | ):
161 | self.message_handler.messages.append(
162 | "Consider using a tuple since all elements seem to be constants, because the list was never modified"
163 | )
164 |
165 | def run(self):
166 | """
167 | Only run checkers so that we offer a better running interface
168 | for each observable.
169 |
170 | Added checkers should be called here in sequence
171 | Might need to refactor this to add priority levels and maybe
172 | only give a single suggestion, but that needs way more specific analysis
173 | """
174 | self.check_array_instead_of_list()
175 | self.check_numpy_array_instead_of_list()
176 | self.check_set_instead_of_list()
177 | self.check_Counter_insteaf_of_list()
178 | self.message_handler.print_messages()
179 |
180 |
181 | class ObservableSet(set):
182 | """
183 | The ObservableSet is an enhanced version of a set that
184 | preserves the full original functionality of a set, but
185 | adds more features to it so that we keep track of anything that
186 | potentially happens in order to do dynamic analysis to each declared
187 | set.
188 | """
189 |
190 | __slots__: Tuple[set] = (
191 | "poped",
192 | "removed",
193 | "added",
194 | "updated",
195 | "message_handler",
196 | "if_it_was_a_list",
197 | )
198 |
199 | def __init__(self, iterable=None) -> None:
200 | super().__init__(iterable)
201 | self.poped: bool = False
202 | self.removed: bool = False
203 | self.added: bool = False
204 | self.updated: bool = False
205 | self.if_it_was_a_list: List[Any] = []
206 |
207 | caller_frame = inspect.currentframe().f_back
208 | line_number: int = caller_frame.f_lineno
209 | file_path: str = caller_frame.f_globals["__file__"]
210 |
211 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
212 |
213 | def add(self, element: Any) -> None:
214 | super().add(element)
215 | self.added = True
216 | self.if_it_was_a_list.append(element)
217 |
218 | def pop(self) -> Any:
219 | self.poped = True
220 | return super().pop()
221 |
222 | def remove(self, element: Any) -> None:
223 | super().remove(element)
224 | self.removed = True
225 |
226 | def update(self, *others: Iterable) -> None:
227 | super().update(*others)
228 | self.updated = True
229 | for elem_ in others:
230 | self.if_it_was_a_list.append(elem_)
231 |
232 | def check_frozenset_instead_of_set(self):
233 | if not any([self.added, self.removed, self.updated, self.poped]):
234 | self.message_handler.messages.append(
235 | "Consider using a frozenset, because no modification operation has been used on set."
236 | )
237 |
238 | def check_list_instead_of_set(self):
239 | """
240 | The suggestion here is quite subjective.
241 | NOTE: Might need to refactor this one
242 | """
243 | if len(self.if_it_was_a_list) > 1.2 * len(self) and any(
244 | [self.added, self.removed, self.updated, self.poped]
245 | ):
246 | self.message_handler.messages.append(
247 | "If you inteded to keep duplicates use a list instead, because we noticed a lot of duplicates entered the set"
248 | )
249 |
250 | def run(self):
251 | self.check_frozenset_instead_of_set()
252 | self.check_list_instead_of_set()
253 | self.message_handler.print_messages()
254 |
255 |
256 | class ObservableTuple(tuple):
257 | """
258 | The ObservableTuple is an enhanced version of a tuple that
259 | preserves the full original functionality of a tuple, but
260 | adds more features to it so that we keep track of anything that
261 | potentially happens in order to do dynamic analysis to each declared
262 | tuple.
263 | """
264 |
265 | def __new__(cls, *args, **kwargs):
266 | return super().__new__(cls, *args)
267 |
268 | def __init__(self, *args: Any, **kwargs) -> None:
269 | super().__init__()
270 | self.mul_: bool = False
271 |
272 | caller_frame = inspect.currentframe().f_back
273 | line_number: int = caller_frame.f_lineno
274 | file_path: str = caller_frame.f_globals["__file__"]
275 |
276 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
277 |
278 | def __mul__(self, n: int) -> "ObservableTuple":
279 | self.mul_ = True
280 | result = super().__mul__(n)
281 | return result
282 |
283 | def check_mutable_inside_tuple(self) -> None:
284 | for elem_ in self:
285 | if isinstance(elem_, (list, dict, set)):
286 | self.message_handler.messages.append(
287 | "Mutable element inside of a tuple. Consider using only immutables for optimal performance"
288 | )
289 |
290 | def check_set_instead_of_tuple(self) -> None:
291 | try:
292 | if len(set(tuple(self))) == len(self):
293 | self.message_handler.messages.append(
294 | "Consider using a set since elements are all unique"
295 | )
296 | except Exception:
297 | pass
298 |
299 | def check_tuple_multiplication(self) -> None:
300 | if self.mul_:
301 | self.message_handler.messages.append(
302 | "You multipled the tuple with a scalar value. If you inteded to multiply each element by that value, use a numpy array instead of a tuple."
303 | )
304 |
305 | def run(self) -> None:
306 | self.check_mutable_inside_tuple()
307 | self.check_tuple_multiplication()
308 | self.check_set_instead_of_tuple()
309 | self.message_handler.print_messages()
310 |
311 |
312 | class ObservableDict(dict):
313 | """
314 | The ObservableDict is an enhanced version of a dict that
315 | preserves the full original functionality of a dict, but
316 | adds more features to it so that we keep track of anything that
317 | potentially happens in order to do dynamic analysis to each declared
318 | dict.
319 | """
320 |
321 | __slots__: Tuple[str] = (
322 | "keys_",
323 | "update_",
324 | "setitem_",
325 | "delitem_",
326 | "getitem_",
327 | "pop_",
328 | "items_",
329 | "clear_",
330 | "values_",
331 | "message_handler",
332 | )
333 |
334 | def __init__(self, *args, **kwargs) -> None:
335 | super().__init__(*args, **kwargs)
336 | self.keys_: bool = False
337 | self.update_: bool = False
338 | self.setitem_: bool = False
339 | self.delitem_: bool = False
340 | self.getitem_: bool = False
341 | self.pop_: bool = False
342 | self.items_: bool = False
343 | self.clear_: bool = False
344 | self.values_: bool = False
345 |
346 | caller_frame = inspect.currentframe().f_back
347 | line_number: int = caller_frame.f_lineno
348 | file_path: str = caller_frame.f_globals["__file__"]
349 |
350 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
351 |
352 | def __setitem__(self, key, value) -> None:
353 | super().__setitem__(key, value)
354 | self.setitem_ = True
355 |
356 | def __delitem__(self, key) -> None:
357 | super().__delitem__(key)
358 | self.delitem_ = True
359 |
360 | def __getitem__(self, __key: Any) -> Any:
361 | self.getitem_ = True
362 | return super().__getitem__(__key)
363 |
364 | def clear(self) -> None:
365 | super().clear()
366 | self.clear_ = True
367 |
368 | def pop(self, key, default=None) -> "ObservableDict":
369 | result = super().pop(key, default)
370 | return result
371 |
372 | def popitem(self) -> "ObservableDict":
373 | result = super().popitem()
374 | return result
375 |
376 | def update(self, *args, **kwargs) -> None:
377 | super().update(*args, **kwargs)
378 | self.update_ = True
379 |
380 | def setdefault(self, key, default=None) -> "ObservableDict":
381 | result = super().setdefault(key, default)
382 | return result
383 |
384 | def copy(self) -> "ObservableDict":
385 | result = super().copy()
386 | return result
387 |
388 | def keys(self) -> dict_keys:
389 | self.keys_ = True
390 | return super().keys()
391 |
392 | def values(self) -> dict_values:
393 | self.values_ = True
394 | return super().values()
395 |
396 | def items(self) -> dict_items:
397 | self.items_ = True
398 | return super().items()
399 |
400 | def check_Counter_instead_of_dict(self) -> None:
401 | if all([True for value in self.values() if isinstance(value, int)]):
402 | self.message_handler.messages.append(
403 | "If you are using this dict to store occurences of elements, consider using a collections.Counter"
404 | )
405 |
406 | def check_dict_get_method(self) -> None:
407 | if self.getitem_:
408 | self.message_handler.messages.append(
409 | "For dict key retreval, always consider using 'your_dict'.get('key') instead of 'your_dict'['key']"
410 | )
411 |
412 | def check_list_instead_of_dict(self) -> None:
413 | """
414 | Suggest to use a list when a dict seems to not be used optimally
415 | """
416 | if (not any([self.getitem_, self.keys_, self.items_]) and self.values_) or (
417 | not any([self.getitem_, self.items_, self.values_]) and self.keys_
418 | ):
419 | self.message_handler.messages.append(
420 | "It seems like you never used this dict for anything otherthan somehow using the values, use a list/array"
421 | )
422 |
423 | def run(self) -> None:
424 | self.check_Counter_instead_of_dict()
425 | self.check_dict_get_method()
426 | self.check_list_instead_of_dict()
427 | self.message_handler.print_messages()
428 |
429 |
430 | class ObservableNumpyArray:
431 | """
432 | The ObservableNumpyArray is a numpy analyzer that takes the declared numpy array
433 | and does internal attribute and value checkings for potential improvement suggestions.
434 | """
435 |
436 | __slots__: Tuple[str] = ("arr__", "message_handler")
437 |
438 | def __init__(self, arr__) -> None:
439 | self.arr__ = arr__
440 |
441 | caller_frame = inspect.currentframe().f_back
442 | line_number: int = caller_frame.f_lineno
443 | file_path: str = caller_frame.f_globals["__file__"]
444 |
445 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
446 |
447 | def check_array_data_type(self) -> None:
448 | """ """
449 | current_dtype = self.arr__.dtype
450 | min_dtype = numpy.min_scalar_type(numpy.max(self.arr__))
451 | max_number = numpy.max(self.arr__)
452 | if current_dtype != min_dtype:
453 | self.message_handler.messages.append(
454 | f"Array was initiated with {current_dtype} integers, but values do not exceed {max_number}. Consider using {min_dtype} for optimization."
455 | )
456 |
457 | def check_array_sparsity(self, threshold: float = 0.8) -> None:
458 | """Suggests using sparse arrays for highly sparse data to save memory."""
459 |
460 | sparsity = 1.0 - numpy.count_nonzero(self.arr__) / float(self.arr__.size)
461 | if sparsity > threshold:
462 | try:
463 | _ = sp.csr_matrix(self.arr__)
464 | self.message_handler.messages.append(
465 | f"The array is highly sparse (sparsity: {sparsity:.2%}). Consider using a sparse array representation for memory efficiency."
466 | )
467 | except Exception:
468 | pass
469 |
470 | def check_for_nan_values(self) -> None:
471 | """Suggests using masked arrays or handling NaN values."""
472 |
473 | if numpy.isnan(self.arr__).any():
474 | try:
475 | _ = numpy.ma.masked_array(self.arr__, mask=numpy.isnan(self.arr__))
476 | self.message_handler.messages.append(
477 | "The array contains NaN values. Consider using masked arrays or handling NaN values appropriately."
478 | )
479 | except Exception:
480 | pass
481 |
482 | def check_for_monotonicity(self) -> None:
483 | """Suggests using specialized algorithms or data structures for monotonic arrays."""
484 |
485 | if numpy.all(numpy.diff(self.arr__) >= 0) or numpy.all(
486 | numpy.diff(self.arr__) <= 0
487 | ):
488 | self.message_handler.messages.append(
489 | "The array is monotonic. Consider using specialized algorithms or data structures for monotonic arrays."
490 | )
491 |
492 | def check_for_categorical_data(self) -> None:
493 | """Suggests using categorical data types for arrays with a small number of unique values."""
494 |
495 | unique_values_count = len(numpy.unique(self.arr__))
496 | if unique_values_count < len(self.arr__) / 2:
497 | self.message_handler.messages.append(
498 | f"The array contains categorical data with {unique_values_count} unique values. Consider using categorical data types for efficiency, like pd.Categorical()"
499 | )
500 |
501 | def check_for_symmetry(self) -> None:
502 | """Suggests using specialized algorithms or data structures for symmetric arrays."""
503 | if numpy.array_equal(self.arr__, self.arr__.T):
504 | self.message_handler.messages.append(
505 | "The array is symmetric. Consider using specialized algorithms to operate on symmetric arrays, for example functions from scipy"
506 | )
507 |
508 | def check_for_constant_values(self) -> None:
509 | """Suggests using a single value or a constant data type if all elements are the same."""
510 | if numpy.all(self.arr__ == self.arr__[0]):
511 | self.message_handler.messages.append(
512 | "All elements in the array are the same. Consider using a single value, a constant or collections.Counter for memory efficiency."
513 | )
514 |
515 | def run(self) -> None:
516 | self.check_array_data_type()
517 | self.check_array_sparsity()
518 | self.check_for_categorical_data()
519 | self.check_for_constant_values()
520 | self.check_for_nan_values()
521 | self.check_for_monotonicity()
522 | self.check_for_symmetry()
523 | self.message_handler.print_messages()
524 |
525 |
526 | class ObservablePandasDataFrame:
527 | """
528 | The ObservablePandasDataFrame is a Pandas DataFrame analyzer that takes the declared DataFrame
529 | and does internal attribute and value checkings for potential improvement suggestions.
530 | """
531 |
532 | __slots__ = ("df__", "message_handler")
533 |
534 | def __init__(self, df__) -> None:
535 | self.df__ = df__
536 |
537 | caller_frame = inspect.currentframe().f_back
538 | line_number: int = caller_frame.f_lineno
539 | file_path: str = caller_frame.f_globals["__file__"]
540 |
541 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
542 |
543 | def check_for_missing_values(self) -> None:
544 | """Suggests handling missing values appropriately."""
545 |
546 | if self.df__.isnull().any().any():
547 | self.message_handler.messages.append(
548 | "The DataFrame contains missing values. Consider handling missing values."
549 | )
550 |
551 | def check_for_constant_columns(self) -> None:
552 | """Suggests dropping constant columns for memory efficiency."""
553 |
554 | constant_columns = self.df__.columns[self.df__.nunique() == 1]
555 | if constant_columns.any():
556 | self.message_handler.messages.append(
557 | f"The DataFrame contains constant columns ({constant_columns.tolist()}). Consider dropping them for memory efficiency."
558 | )
559 |
560 | def check_for_duplicate_rows(self) -> None:
561 | """Suggests handling duplicate rows appropriately."""
562 |
563 | if self.df__.duplicated().any():
564 | self.message_handler.messages.append(
565 | "The DataFrame contains duplicate rows. Consider handling duplicate rows appropriately."
566 | )
567 |
568 | def check_series_insteafd_of_dataframe(self) -> None:
569 | """Suggests using alternative data structures for specific scenarios."""
570 | if len(self.df__.columns) == 1:
571 | self.message_handler.messages.append(
572 | "Consider using a Series instead of a DataFrame when you have only one column of data."
573 | )
574 |
575 | def check_numpy_instead_of_dataframe(self) -> None:
576 | """"""
577 | if len(self.df__.index) > 10000 and len(self.df__.columns) < 5:
578 | self.message_handler.messages.append(
579 | "Consider using a NumPy array or a specialized data structure if you have a large number of rows and a small number of columns."
580 | )
581 |
582 | def run(self) -> None:
583 | self.check_for_constant_columns()
584 | self.check_for_duplicate_rows()
585 | self.check_for_missing_values()
586 | self.check_numpy_instead_of_dataframe()
587 | self.check_series_insteafd_of_dataframe()
588 | self.message_handler.print_messages()
589 |
590 |
591 | class ObservableNamedTuple:
592 | """
593 | The ObservableNamedTuple is an enhanced version of a namedtuple that
594 | preserves the full original functionality of a namedtuple, but
595 | adds more features to it so that we keep track of anything that
596 | potentially happens in order to do dynamic analysis to each declared
597 | namedtuple.
598 | """
599 |
600 | __slots__: Tuple[set] = ("namedtuple__", "message_handler")
601 |
602 | def __init__(self, namedtuple__) -> None:
603 | self.namedtuple__ = namedtuple__
604 |
605 | caller_frame = inspect.currentframe().f_back
606 | line_number: int = caller_frame.f_lineno
607 | file_path: str = caller_frame.f_globals["__file__"]
608 |
609 | self.message_handler = MessageHandler(line_nr=line_number, file_path=file_path)
610 |
611 | def check_for_excessive_nesting(self) -> None:
612 | """Suggests avoiding excessive nesting of namedtuples."""
613 |
614 | for field_name in self.namedtuple__._fields:
615 | if isinstance(getattr(self.namedtuple__, field_name), tuple):
616 | self.message_handler.messages.append(
617 | "Avoid excessive nesting of namedtuples to keep the structure simple and readable. Consider usina a class instead"
618 | )
619 | break
620 |
621 | def check_for_ignoring_type_annotations(self) -> None:
622 | """Suggests using type annotations to document the expected types of each field."""
623 | class_annotations = getattr(self.namedtuple__, "__annotations__", {})
624 | if not class_annotations:
625 | self.message_handler.messages.append(
626 | "Consider using type annotations for field in namedtuples for better documentation."
627 | )
628 |
629 | def check_for_ignoring_namedtuple_advantages(self) -> None:
630 | """Suggests taking advantage of the simplicity of namedtuples."""
631 |
632 | if len(self.namedtuple__._fields) > 10:
633 | self.message_handler.messages.append(
634 | "Consider using namedtuples for simpler data structures with fewer fields for better readability."
635 | )
636 |
637 | def run(self):
638 | self.check_for_ignoring_type_annotations()
639 | self.check_for_ignoring_namedtuple_advantages()
640 | self.check_for_excessive_nesting()
641 | self.message_handler.print_messages()
642 |
--------------------------------------------------------------------------------