├── src
    ├── ploosh
    │   ├── version.py
    │   ├── __main__.py
    │   ├── engines
    │   │   ├── compare_engine.py
    │   │   ├── load_engine.py
    │   │   ├── load_engine_spark.py
    │   │   └── load_engine_native.py
    │   ├── exporters
    │   │   ├── exporter.py
    │   │   ├── __init__.py
    │   │   ├── exporter_csv.py
    │   │   ├── exporter_json.py
    │   │   └── exporter_trx.py
    │   ├── connectors
    │   │   ├── connector.py
    │   │   ├── connector_empty.py
    │   │   ├── connector_sql_spark.py
    │   │   ├── connector_delta_spark.py
    │   │   ├── connector_parquet_spark.py
    │   │   ├── connector_excel.py
    │   │   ├── connector_empty_spark.py
    │   │   ├── connector_json.py
    │   │   ├── connector_json_spark.py
    │   │   ├── __init__.py
    │   │   ├── connector_databricks.py
    │   │   ├── connector_parquet.py
    │   │   ├── connector_bigquery.py
    │   │   ├── connector_csv_spark.py
    │   │   ├── connector_snowflake.py
    │   │   ├── connector_postgresql.py
    │   │   ├── connector_mysql.py
    │   │   ├── connector_mssql.py
    │   │   ├── connector_odbc.py
    │   │   ├── connector_csv.py
    │   │   ├── connector_semantic_model_xmla.py
    │   │   └── connector_analysis_services.py
    │   ├── __init__.py
    │   ├── parameters.py
    │   ├── execute.py
    │   ├── logs.py
    │   └── case.py
    ├── setup-core.py
    ├── requirements.txt
    ├── setup-full.py
    └── setup.py
├── tests
    ├── .env
    │   ├── excel
    │   │   └── sales.xlsx
    │   ├── parquet
    │   │   └── sales.parquet
    │   ├── delta
    │   │   └── sales
    │   │   │   ├── part-00001-4cd9f0b3-de7c-470d-83e2-ede8afcbf39a-c000.snappy.parquet
    │   │   │   └── _delta_log
    │   │   │       └── 00000000000000000000.json
    │   ├── spark
    │   │   └── setup.sql
    │   └── csv
    │   │   ├── sales_with_cr.csv
    │   │   ├── sales_with_tab.csv
    │   │   ├── sales_with_comma.csv
    │   │   └── sales_with_iso_8859_1.csv
    ├── .data
    │   ├── sales_by_seller.csv
    │   └── sales.csv
    ├── connectors
    │   ├── test_excel.py
    │   ├── test_csv.py
    │   ├── test_parquet_spark.py
    │   ├── test_delta_spark.py
    │   ├── test_sql_spark.py
    │   ├── test_mysql.py
    │   ├── test_postgresql.py
    │   ├── test_mssql.py
    │   ├── test_json.py
    │   ├── test_parquet.py
    │   └── test_csv_spark.py
    └── load_engine
    │   ├── test_native.py
    │   └── test_spark.py
├── pyproject.toml
├── .vscode
    ├── settings.json
    └── launch.json
├── .gitignore
├── docs
    ├── connectors
    │   ├── native
    │   │   ├── empty.md
    │   │   ├── excel.md
    │   │   ├── parquet.md
    │   │   ├── big_query.md
    │   │   ├── odbc.md
    │   │   ├── databricks.md
    │   │   ├── snowflake.md
    │   │   ├── csv.md
    │   │   ├── postgresql.md
    │   │   ├── mysql.md
    │   │   └── sqlserver.md
    │   └── spark
    │   │   ├── empty.md
    │   │   ├── sql.md
    │   │   ├── delta.md
    │   │   └── csv.md
    ├── configuration
    │   ├── custom_parameters.md
    │   ├── command_line.md
    │   ├── spark.md
    │   └── options.md
    ├── exporters
    │   ├── trx.md
    │   ├── csv.md
    │   └── json.md
    ├── pipelines
    │   └── azure_devops.md
    └── home.md
├── .github
    └── workflows
    │   ├── linter.yml
    │   ├── release.yml
    │   └── unit_tests.yml
├── readme.md
└── debug
    └── setup.sh


/src/ploosh/version.py:
--------------------------------------------------------------------------------
1 | """Current version of ploosh"""
2 | 
3 | PLOOSH_VERSION = "0.3.8"


--------------------------------------------------------------------------------
/tests/.env/excel/sales.xlsx:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSharplie/ploosh/HEAD/tests/.env/excel/sales.xlsx


--------------------------------------------------------------------------------
/tests/.env/parquet/sales.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSharplie/ploosh/HEAD/tests/.env/parquet/sales.parquet


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
1 | [tool.pytest.ini_options]
2 | addopts = [
3 |     "--import-mode=importlib",
4 | ]
5 | pythonpath = [
6 |   "src", "tests"
7 | ]


--------------------------------------------------------------------------------
/src/setup-core.py:
--------------------------------------------------------------------------------
1 | """Setup script for ploosh light package"""
2 | 
3 | from setup import setup_ploosh
4 | 
5 | setup_ploosh("ploosh-core", [])
6 | 


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "python.testing.pytestArgs": [
3 |         "tests"
4 |     ],
5 |     "python.testing.unittestEnabled": false,
6 |     "python.testing.pytestEnabled": true
7 | }


--------------------------------------------------------------------------------
/tests/.env/delta/sales/part-00001-4cd9f0b3-de7c-470d-83e2-ede8afcbf39a-c000.snappy.parquet:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/CSharplie/ploosh/HEAD/tests/.env/delta/sales/part-00001-4cd9f0b3-de7c-470d-83e2-ede8afcbf39a-c000.snappy.parquet


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .venv
 2 | .pytest_cache
 3 | __pycache__
 4 | /src/build
 5 | /src/dist
 6 | /src/ploosh.egg-info
 7 | /src/ploosh_core.egg-info
 8 | /src/ploosh/output
 9 | /src/ploosh/logs
10 | /tests/connectors/connections/
11 | /data
12 | /logs
13 | /test_cases
14 | /output
15 | /metastore_db
16 | /connections.yml
17 | spark_setup_file_tmp
18 | *.log
19 | 


--------------------------------------------------------------------------------
/src/ploosh/__main__.py:
--------------------------------------------------------------------------------
 1 | """Automatized Testing Framework"""
 2 | 
 3 | from execute import execute
 4 | 
 5 | 
 6 | def main():
 7 |     """Entry point for conda execution"""
 8 |     # Call the main execution function
 9 |     execute()
10 | 
11 | 
12 | # Check if the script is being run directly
13 | if __name__ == "__main__":
14 |     # Call the main execution function
15 |     main()
16 | 


--------------------------------------------------------------------------------
/tests/.data/sales_by_seller.csv:
--------------------------------------------------------------------------------
 1 | seller_name,total_sales
 2 | Jane Smith,615.00
 3 | Emma Green,500.00
 4 | Lucas Harris,500.00
 5 | Henry King,492.00
 6 | Harper Lewis,440.00
 7 | Ethan White,415.00
 8 | Alex Johnson,390.00
 9 | Chris Brown,360.00
10 | John Doe,333.00
11 | Amelia Clark,330.00
12 | Sophia Wilson,320.00
13 | Mason Martinez,310.00
14 | Liam Brown,245.00
15 | Sarah White,222.00
16 | Isabella Moore,220.00
17 | Olivia Taylor,165.00
18 | Evelyn Walker,160.00


--------------------------------------------------------------------------------
/src/requirements.txt:
--------------------------------------------------------------------------------
 1 | colorama==0.4.6
 2 | PyYAML==6.0.1
 3 | Pyjeb==0.2.1
 4 | pytest==8.3.3
 5 | numpy==1.26.3
 6 | pandas==2.1.4
 7 | openpyxl==3.1.2
 8 | sqlalchemy==1.4.51
 9 | pyspark==3.5.4
10 | pyodbc==5.0.1
11 | pymysql==1.1.0
12 | pg8000==1.30.3
13 | snowflake-sqlalchemy==1.5.1
14 | databricks-sql-connector==2.9.3
15 | sqlalchemy-bigquery==1.9.0
16 | google-cloud-bigquery-storage==2.24.0
17 | pandas-gbq==0.23.0
18 | pydata-google-auth==1.8.2
19 | azure-identity==1.19.0
20 | delta-spark==3.3.0
21 | deltalake==0.23.2
22 | pyadomd==0.1.1


--------------------------------------------------------------------------------
/tests/.env/spark/setup.sql:
--------------------------------------------------------------------------------
 1 | DROP TABLE IF EXISTS sales;
 2 | 
 3 | CREATE EXTERNAL TABLE IF NOT EXISTS sales (
 4 |     sale_id INT,
 5 |     seller_name STRING,
 6 |     card_name STRING,
 7 |     card_rarity STRING,
 8 |     card_condition STRING,
 9 |     price DOUBLE,
10 |     quantity INT,
11 |     sale_date DATE,
12 |     card_set STRING,
13 |     buyer_name STRING,
14 |     transaction_status STRING
15 | )
16 | USING csv
17 | OPTIONS (
18 |     path '{{pwd}}/tests/.data/sales.csv',
19 |     header 'true',
20 |     inferSchema 'true'
21 | );


--------------------------------------------------------------------------------
/src/ploosh/engines/compare_engine.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903,W0613
 2 | """ Base class for the comparison engines """
 3 | 
 4 | class CompareEngine:
 5 |     """Base class for the comparison engines"""
 6 | 
 7 |     success_rate = 1
 8 |     error_type = None
 9 |     error_message = None
10 |     df_compare_gap = None
11 |     df_source = None
12 |     df_expected = None
13 |     options = None
14 |     mode = None
15 | 
16 |     def compare(self) -> bool:
17 |         """Compare the source and expected datasets"""
18 |         return False
19 | 


--------------------------------------------------------------------------------
/src/setup-full.py:
--------------------------------------------------------------------------------
 1 | """Setup script for ploosh package"""
 2 | 
 3 | from setup import setup_ploosh
 4 | 
 5 | install_requires = [
 6 |     "pyodbc==5.0.1",
 7 |     "pymysql==1.1.0",
 8 |     "pg8000==1.30.3",
 9 |     "snowflake-sqlalchemy==1.5.1",
10 |     "databricks-sql-connector==2.9.3",
11 |     "sqlalchemy-bigquery==1.9.0",
12 |     "google-cloud-bigquery-storage==2.24.0",
13 |     "pandas-gbq==0.23.0",
14 |     "pydata-google-auth==1.8.2",
15 |     "azure-identity==1.19.0",
16 |     "pyadomd==0.1.1"
17 | ]
18 | 
19 | setup_ploosh("ploosh", install_requires)
20 | 


--------------------------------------------------------------------------------
/docs/connectors/native/empty.md:
--------------------------------------------------------------------------------
 1 | This connector is used to return an empty dataframe with 0 rows and 0 columns 
 2 | 
 3 | # Connection configuration
 4 | No connection is required by this connector
 5 | 
 6 | # Test case configuration
 7 | ## Test case configuration
 8 | Test case configuration parameter is required by this connector
 9 | 
10 | ## Example
11 | ``` yaml
12 | Example Empty:
13 |   source:
14 |     connection: mysql_example
15 |     type: mysql
16 |     query: | 
17 |         select * 
18 |             from employees
19 |             where hire_date < "2000-01-01"
20 |   expected:
21 |     type: empty
22 | ```


--------------------------------------------------------------------------------
/docs/configuration/custom_parameters.md:
--------------------------------------------------------------------------------
1 | # Custom parameters usefulness
2 | It is possible to add a custom parameter in the connection configuration file to avoid hardcoding sensitive information like passwords or credentials.
3 | 
4 | To use a custom parameter, you need to add a parameter in the connection configuration file and use it in the connection configuration. The parameter value can be passed as an environment variable or as a command line argument.
5 | 
6 | # Syntax and usage
7 | The syntax of for the parameter is `$var.<parameter_name>` in the connection configuration file.
8 | 
9 | The parameter value can be passed as a command line argument using the `--p_<parameter_name>` option.


--------------------------------------------------------------------------------
/src/ploosh/engines/load_engine.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903,W0613
 2 | """Base class for all load engines"""
 3 | 
 4 | class LoadEngine:
 5 |     """Base class for all load engines"""
 6 | 
 7 |     count = None
 8 |     configuration = None
 9 |     options = None
10 |     connection = None
11 |     df_data = None
12 | 
13 |     def get_insensitive_item(self, name: str, items: list) -> str:
14 |         """Get item from list case-insensitively"""
15 |         for item in items:
16 |             if name.upper().strip() == item.upper().strip():
17 |                 return item
18 |         return name
19 | 
20 |     def execute(self):
21 |         """Execute the load engine"""
22 |         return None
23 | 


--------------------------------------------------------------------------------
/src/ploosh/exporters/exporter.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903,W0613
 2 | """Test result exporter"""
 3 | from datetime import datetime
 4 | 
 5 | 
 6 | class Exporter:
 7 |     """Test result exporter"""
 8 |     name = None  # Name of the exporter
 9 |     output_path = None  # Output path for the exported results
10 | 
11 |     @staticmethod
12 |     def date_to_string(data):
13 |         """Convert datetime to string in ISO 8601 format"""
14 |         if not isinstance(data, datetime):
15 |             return None
16 | 
17 |         return data.strftime("%Y-%m-%dT%H:%M:%SZ")
18 | 
19 |     def export(self, cases: dict):
20 |         """Export test case results to the destination"""
21 |         return None
22 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903,W0613
 2 | """Connector to access to remote data"""
 3 | 
 4 | 
 5 | class Connector:
 6 |     """Connector to access to remote data"""
 7 |     name = None  # Name of the connector
 8 |     connection_definition = None  # Definition of the connection parameters
 9 |     configuration_definition = None  # Definition of the configuration parameters
10 |     is_spark = False  # Flag to indicate if the connector uses Spark
11 |     spark = None  # Spark session object
12 | 
13 |     def get_data(self, configuration: dict, connection: dict):
14 |         """Get data from connector"""
15 |         return None  # This method should be overridden by subclasses to fetch data
16 | 


--------------------------------------------------------------------------------
/docs/connectors/native/excel.md:
--------------------------------------------------------------------------------
 1 | This connector is used to read Excel files from local file system.
 2 | 
 3 | # Connection configuration
 4 | No connection is required by this connector
 5 | 
 6 | # Test case configuration
 7 | # Definition
 8 | 
 9 | | Name      | Mandatory | Default | Description |
10 | |-----------|:---------:|:-------:|-------------|
11 | | path      | yes       |         | The path to the Excel file to read |
12 | | sheet     | yes       |         | The sheet name or index to read from the Excel file
13 | 
14 | # Example
15 | 
16 | ``` yaml
17 | Example Excel:
18 |   source:
19 |     type: mysql
20 |     
21 |   expected:
22 |     type: excel
23 |     path: data/employees.xlsx
24 |     sheet: employees
25 | 
26 | 


--------------------------------------------------------------------------------
/docs/configuration/command_line.md:
--------------------------------------------------------------------------------
1 | # Mandatory arguments
2 | - `--connection <connection_file>`: The connection file name to use
3 | - `--cases <case_folder>`: The test case folder where the test cases yaml files are stored
4 | 
5 | # Optional arguments
6 | - `--export <export_type>`: The export format to use. Can be `JSON`, `CSV` or `TRX`. Default is `JSON`
7 | - `--filter <wildcard>`: A wildcard to filter the test cases to execute. The default value is `*.yml` which means all the test cases will be executed
8 | - `--p_<parameter_name> <parameter_value>`: The parameter value to use. The parameter name is the name of the parameter in the connection file. The parameter value can be passed as an environment variable or as a command line argument


--------------------------------------------------------------------------------
/.github/workflows/linter.yml:
--------------------------------------------------------------------------------
 1 | name: 'Linter'
 2 | 
 3 | on:
 4 |   pull_request:
 5 |     branches:
 6 |       - main
 7 |       - develop
 8 |     paths:
 9 |       - 'src/**'
10 |       - 'tests/**'
11 | jobs:
12 |   lint:
13 |     name: 'Lint code'
14 |     runs-on: ubuntu-22.04
15 |     defaults:
16 |       run:
17 |         shell: bash
18 |     steps:
19 |     - name: Checkout
20 |       uses: actions/checkout@v3
21 |     - name: Set up Python
22 |       uses: actions/setup-python@v5
23 |       with:
24 |         python-version: "3.12.8"
25 |     - name: Install requirements
26 |       run: | 
27 |         pip install -r src/requirements.txt
28 |         pip install pylint==3.3.3
29 |     - name: Lint code
30 |       run: | 
31 |         pylint --fail-under=9.3 src/
32 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_empty.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to return empty"""
 3 | 
 4 | import pandas as pd
 5 | from connectors.connector import Connector
 6 | 
 7 | 
 8 | class ConnectorEmpty(Connector):
 9 |     """Connector to return empty"""
10 | 
11 |     def __init__(self):
12 |         # Initialize the connector with its name and empty definitions
13 |         self.name = "EMPTY"
14 |         self.connection_definition = []  # No specific connection parameters required
15 |         self.configuration_definition = []  # No specific configuration parameters required
16 | 
17 |     def get_data(self, configuration: dict, connection: dict):
18 |         """Return empty value"""
19 |         # Create an empty pandas DataFrame
20 |         df = pd.DataFrame()
21 |         return df
22 | 


--------------------------------------------------------------------------------
/docs/exporters/trx.md:
--------------------------------------------------------------------------------
 1 | # Structure
 2 | ```
 3 | output/
 4 | ├─ trx/
 5 | │  ├─ test_results.xml
 6 | │  ├─ test_results/
 7 | │  │  ├─ execution ID (guid)/
 8 | │  │  │  ├─ test case 1.xlsx
 9 | │  │  ├─ execution ID (guid)/
10 | │  │  │  ├─ test case 2.xlsx
11 | │  │  └─ ...
12 | ```
13 | 
14 | # test_results.xml
15 | The `test_results.xml` file use the TRX format (Visual Studio Test Results File). It will contain the details of the test cases results in XML format.
16 | 
17 | This file can be opened with Visual Studio or any other tool that support the TRX format. 
18 | 
19 | It can be used with Azure DevOps to publish the test results.
20 | 
21 | # test_results folder
22 | The `test_results` folder will contain one xlsx file per test case. Each file will contain a sheet with the gap between the source and the expected dataset


--------------------------------------------------------------------------------
/docs/connectors/spark/empty.md:
--------------------------------------------------------------------------------
 1 | This connector is used to return an empty dataframe with 0 rows and 0 columns 
 2 | 
 3 | ⚠️ A spark connector can be use only with another spark connector. It is not possible to use a spark connector with a non spark connector.
 4 | 
 5 | See [Spark documentation](/docs/configuration-spark-mode/) for more information.
 6 | 
 7 | # Connection configuration
 8 | No connection is required by this connector
 9 | 
10 | # Test case configuration
11 | ## Test case configuration
12 | Test case configuration parameter is required by this connector
13 | 
14 | ## Example
15 | ``` yaml
16 | Example Empty Spark:
17 |   source:
18 |     type: sql_spark
19 |     query: |
20 |       select * 
21 |           from employees
22 |           where hire_date < "2000-01-01"
23 |   expected:
24 |     type: empty_spark
25 | ```


--------------------------------------------------------------------------------
/tests/connectors/test_excel.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | from pyjeb import control_and_setup
 5 | from ploosh.connectors.connector_excel import ConnectorExcel
 6 | 
 7 | @pytest.fixture
 8 | def connector():
 9 |     return ConnectorExcel()
10 | 
11 | @pytest.fixture
12 | def df_sales():
13 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", date_format = "%Y-%m-%d", parse_dates=["sale_date"])
14 | 
15 | def test_get_data(connector, df_sales):
16 |     configuration = {
17 |         "path": "./tests/.env/excel/sales.xlsx",
18 |         "sheet_name": "sales"
19 |     }
20 |     
21 |     configuration = control_and_setup(configuration, connector.configuration_definition)
22 |     
23 |     df_test = connector.get_data(configuration, None)
24 |     
25 |     assert len(df_test.compare(df_sales)) == 0


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_sql_spark.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read SQL file"""
 3 | 
 4 | from connectors.connector import Connector
 5 | 
 6 | 
 7 | class ConnectorSQLSpark(Connector):
 8 |     """Connector to execute SQL query over Spark"""
 9 | 
10 |     def __init__(self):
11 |         # Initialize the connector with its name and indicate it uses Spark
12 |         self.name = "SQL_SPARK"
13 |         self.is_spark = True
14 |         self.connection_definition = []  # No specific connection parameters required
15 |         self.configuration_definition = [
16 |             {"name": "query"}  # SQL query to execute
17 |         ]
18 | 
19 |     def get_data(self, configuration: dict, connection: dict):
20 |         """Get data from source"""
21 | 
22 |         # Execute the SQL query using Spark and return the resulting DataFrame
23 |         df = self.spark.sql(configuration["query"])
24 | 
25 |         return df
26 | 


--------------------------------------------------------------------------------
/docs/connectors/spark/sql.md:
--------------------------------------------------------------------------------
 1 | This connector is used to execute spark SQL. 
 2 | 
 3 | ⚠️ A spark connector can be use only with another spark connector. It is not possible to use a spark connector with a non spark connector.
 4 | 
 5 | See [Spark documentation](/docs/configuration-spark-mode/) for more information.
 6 | 
 7 | # Connection configuration
 8 | No connection is required by this connector
 9 | 
10 | # Configuration
11 | ## Test case configuration
12 | | Name              | Mandatory | Default                       | Description |
13 | |-------------------|:---------:|:-----------------------------:|-------------|
14 | | query             | yes       |                               | The query to execute to the database
15 | 
16 | ## Example
17 | ``` yaml
18 | Example Empty Spark:
19 |   source:
20 |     type: sql_spark
21 |     query: |
22 |       select * 
23 |           from employees
24 |           where hire_date < "2000-01-01"
25 |   expected:
26 |     type: empty_spark
27 | ```


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_delta_spark.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read Delta table"""
 3 | 
 4 | from connectors.connector import Connector
 5 | 
 6 | 
 7 | class ConnectorDeltaSpark(Connector):
 8 |     """Connector to read Delta table with Spark"""
 9 | 
10 |     def __init__(self):
11 |         # Initialize the connector with its name and configuration definitions
12 |         self.name = "DELTA_SPARK"
13 |         self.is_spark = True  # Indicates that this connector uses Spark
14 |         self.connection_definition = []  # No specific connection parameters required
15 |         self.configuration_definition = [
16 |             {"name": "path"},  # Path to the Delta table
17 |         ]
18 | 
19 |     def get_data(self, configuration: dict, connection: dict):
20 |         """Get data from source"""
21 | 
22 |         # Read the Delta table using Spark with the specified path
23 |         df = self.spark.read.format("delta").load(configuration["path"])
24 | 
25 |         return df
26 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_parquet_spark.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read Parquet file"""
 3 | 
 4 | from connectors.connector import Connector
 5 | 
 6 | 
 7 | class ConnectorParquetSpark(Connector):
 8 |     """Connector to read Parquet file"""
 9 | 
10 |     def __init__(self):
11 |         # Initialize the connector with its name and configuration definitions
12 |         self.name = "PARQUET_SPARK"
13 |         self.is_spark = True
14 |         self.connection_definition = []  # No specific connection parameters required
15 |         self.configuration_definition = [
16 |             {"name": "path"}  # Path to the Parquet file
17 |         ]
18 | 
19 |     def get_data(self, configuration: dict, connection: dict):
20 |         """Get data from source"""
21 | 
22 |         # Extract the path and configuration parameters
23 |         path = configuration["path"]
24 | 
25 |         # Read the Parquet file using pandas
26 |         df = self.spark.read.parquet(path)
27 | 
28 |         return df
29 | 


--------------------------------------------------------------------------------
/src/ploosh/engines/load_engine_spark.py:
--------------------------------------------------------------------------------
 1 | from engines.load_engine import LoadEngine
 2 | 
 3 | class LoadEngineSpark(LoadEngine):
 4 |     """Load engine for Spark"""
 5 | 
 6 |     def __init__(self, configuration, options, connection):
 7 |         """Initialize the LoadEngineSpark class"""
 8 | 
 9 |         self.configuration = configuration
10 |         self.options = options
11 |         self.connection = connection
12 | 
13 |     def execute(self, df_data):
14 |         """Execute the load engine"""
15 |         self.count = df_data.count()
16 | 
17 |         # Cast columns to specified types
18 |         for column in self.options["cast"]:
19 |             column_name = self.get_insensitive_item(column["name"], df_data.columns)
20 |             column_type = column["type"]
21 |             if column_type == "datetime":
22 |                 column_type = "timestamp"
23 |                 
24 |             df_data = df_data.withColumn(column_name, df_data[column_name].cast(column_type))
25 | 
26 |         return df_data
27 | 


--------------------------------------------------------------------------------
/docs/connectors/spark/delta.md:
--------------------------------------------------------------------------------
 1 | This connector is used to read Detla table files using Spark. 
 2 | 
 3 | ⚠️ A spark connector can be use only with another spark connector. It is not possible to use a spark connector with a non spark connector.
 4 | 
 5 | See [Spark documentation](/docs/configuration-spark-mode/) for more information.
 6 | 
 7 | # Connection configuration
 8 | No connection is required by this connector
 9 | 
10 | # Configuration
11 | ## Test case configuration
12 | | Name              | Mandatory | Default                       | Description |
13 | |-------------------|:---------:|:-----------------------------:|-------------|
14 | | path              | yes       |                               | Path to the Delta table
15 | 
16 | ## Example
17 | ``` yaml
18 | Example Delta Spark:
19 |   source:
20 |     type: delta_spark
21 |     path: data/employees
22 |   expected:
23 |     type: sql_spark
24 |     query: |
25 |       select * 
26 |           from employees
27 |           where hire_date < "2000-01-01"
28 | ```
29 | 


--------------------------------------------------------------------------------
/tests/connectors/test_csv.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | from pyjeb import control_and_setup
 5 | from ploosh.connectors.connector_csv import ConnectorCSV
 6 | 
 7 | @pytest.fixture
 8 | def connector():
 9 |     return ConnectorCSV()
10 | 
11 | @pytest.fixture
12 | def df_sales():
13 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",")
14 | 
15 | def test_connection_with_tabulation(connector, df_sales):
16 |     configuration = {
17 |        "path": "./tests/.env/csv/sales_with_tab.csv",
18 |        "delimiter": "\t"
19 |     }
20 | 
21 |     configuration = control_and_setup(configuration, connector.configuration_definition)
22 | 
23 |     df_test = connector.get_data(configuration, None)
24 | 
25 |     assert len(df_test.compare(df_sales)) == 0
26 | 
27 | def test_connection_with_default(connector, df_sales):
28 |     configuration = {
29 |        "path": "./tests/.env/csv/sales_with_comma.csv"
30 |     }
31 | 
32 |     configuration = control_and_setup(configuration, connector.configuration_definition)
33 | 
34 |     df_test = connector.get_data(configuration, None)
35 | 
36 |     assert len(df_test.compare(df_sales)) == 0


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_excel.py:
--------------------------------------------------------------------------------
 1 | """Connector to read Excel file"""
 2 | 
 3 | import pandas as pd
 4 | from connectors.connector import Connector
 5 | 
 6 | 
 7 | class ConnectorExcel(Connector):
 8 |     """Connector to read Excel file"""
 9 | 
10 |     def __init__(self):
11 |         # Initialize the connector with its name and configuration definitions
12 |         self.name = "EXCEL"
13 |         self.connection_definition = []  # No specific connection parameters required
14 |         self.configuration_definition = [
15 |             {"name": "path"},  # Path to the Excel file
16 |             {"name": "sheet_name"},  # Sheet name
17 |             {"name": "skiprows", "type": "integer", "default": 0},  # Number of rows to skip
18 |         ]
19 | 
20 |     def get_data(self, configuration: dict, connection: dict):
21 |         """Get data from source"""
22 |         # Read the Excel file using pandas with the specified configuration options
23 |         df = pd.read_excel(
24 |             configuration["path"],
25 |             sheet_name=configuration["sheet_name"],
26 |             skiprows=configuration["skiprows"],
27 |         )
28 |         return df
29 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_empty_spark.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to return empty"""
 3 | 
 4 | from pyspark.sql.types import StructType
 5 | from connectors.connector import Connector
 6 | 
 7 | 
 8 | class ConnectorEmptySpark(Connector):
 9 |     """Connector to return empty"""
10 | 
11 |     def __init__(self):
12 |         # Initialize the connector with its name and indicate it uses Spark
13 |         self.name = "EMPTY_SPARK"
14 |         self.is_spark = True
15 |         self.connection_definition = []  # No specific connection parameters required
16 |         self.configuration_definition = []  # No specific configuration parameters required
17 | 
18 |     def get_data(self, configuration: dict, connection: dict):
19 |         """Return empty value"""
20 | 
21 |         # Create an empty RDD (Resilient Distributed Dataset)
22 |         empty_rdd = self.spark.sparkContext.emptyRDD()
23 | 
24 |         # Define an empty schema (no columns)
25 |         columns = StructType([])
26 | 
27 |         # Create an empty DataFrame using the empty RDD and schema
28 |         df = self.spark.createDataFrame(data = empty_rdd, schema = columns)
29 | 
30 |         return df
31 | 


--------------------------------------------------------------------------------
/src/ploosh/exporters/__init__.py:
--------------------------------------------------------------------------------
 1 | """Result exporter"""
 2 | from importlib import import_module
 3 | import os
 4 | import inspect
 5 | 
 6 | 
 7 | def get_exporters():
 8 |     """Get all existing exporters"""
 9 |     connectors = {}
10 | 
11 |     # List all Python files in the current directory that start with "exporter_"
12 |     files = [
13 |         name
14 |         for name in os.listdir(os.path.dirname(__file__))
15 |         if name.endswith(".py") and name.startswith("exporter_")
16 |     ]
17 | 
18 |     for file in files:
19 |         module_name = file[:-3]  # Remove the ".py" extension to get the module name
20 | 
21 |         # Import the module dynamically
22 |         module = import_module(f"exporters.{module_name}")
23 | 
24 |         # Inspect the module to find classes that start with "Exporter"
25 |         for name, obj in inspect.getmembers(module):
26 |             if inspect.isclass(obj) and name.startswith("Exporter"):
27 |                 current_connector = obj()  # Instantiate the exporter class
28 |                 connectors[
29 |                     current_connector.name
30 |                 ] = current_connector  # Add the exporter to the connectors dictionary
31 | 
32 |     return connectors
33 | 


--------------------------------------------------------------------------------
/tests/connectors/test_parquet_spark.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from pyspark.sql import SparkSession
 4 | import pytest
 5 | from pyjeb import control_and_setup
 6 | from ploosh.engines.load_engine_spark import LoadEngineSpark
 7 | from ploosh.configuration import Configuration
 8 | from ploosh.connectors.connector_parquet_spark import ConnectorParquetSpark
 9 | 
10 | @pytest.fixture
11 | def connector():
12 |     spark = SparkSession.builder \
13 |         .appName("ploosh") \
14 |         .master("spark://localhost:7077") \
15 |         .config("spark.executor.memory", "1g") \
16 |         .config("spark.driver.memory", "1g") \
17 |         .getOrCreate()
18 |     
19 |     connector = ConnectorParquetSpark()
20 |     connector.spark = spark
21 | 
22 |     return connector
23 | 
24 | @pytest.fixture
25 | def df_sales():
26 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",")
27 | 
28 | def test_default(connector, df_sales):
29 |     configuration = {
30 |        "path": "./tests/.env/parquet/sales.parquet",
31 |     }
32 | 
33 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
34 |     df_test = connector.get_data(configuration, {}).toPandas()
35 | 
36 |     assert len(df_test.compare(df_sales)) == 0
37 | 


--------------------------------------------------------------------------------
/docs/connectors/native/parquet.md:
--------------------------------------------------------------------------------
 1 | This connector is used to read Parquet files from local file system.
 2 | 
 3 | # Connection configuration
 4 | No connection is required by this connector
 5 | 
 6 | # Test case configuration
 7 | ## Test case configuration
 8 | | Name              | Mandatory | Default                       | Description |
 9 | |-------------------|:---------:|:-----------------------------:|-------------|
10 | | path              | yes       |                               | Path to the Parquet file
11 | | columns           | no        |  None                         | Subset of columns to load
12 | | engine            | no        |  "auto"                       | Parquet engine to use ('auto', 'pyarrow', 'fastparquet')
13 | | filters           | no        |  None                         | Row group filters to apply (for 'pyarrow')
14 | 
15 | 
16 | ## Example
17 | ``` yaml
18 | Example PARQUET:
19 |   source:
20 |     type: parquet
21 |     path: ../data/parquet/source/example.parquet
22 |     columns:  ["id", "name"]
23 |     filters:
24 |         - column: "id"
25 |           operator: "!="
26 |           value: 2
27 | 
28 | 
29 |   expected:
30 |     type: csv
31 |     infer: True
32 |     delimiter: ";"
33 |     encoding: "utf-8"
34 |     engine: "python"
35 |     path: data/example.csv
36 | ```


--------------------------------------------------------------------------------
/tests/connectors/test_delta_spark.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from pyspark.sql import SparkSession
 4 | import pytest
 5 | from pyjeb import control_and_setup
 6 | from delta import configure_spark_with_delta_pip
 7 | from ploosh.connectors.connector_delta_spark import ConnectorDeltaSpark
 8 | 
 9 | @pytest.fixture
10 | def connector():
11 |     spark = SparkSession.builder \
12 |         .appName("ploosh") \
13 |         .master("spark://localhost:7077") \
14 |         .config("spark.sql.extensions", "io.delta.sql.DeltaSparkSessionExtension") \
15 |         .config("spark.sql.catalog.spark_catalog", "org.apache.spark.sql.delta.catalog.DeltaCatalog") \
16 |         .config("spark.jars.packages", "io.delta:delta-spark_2.12:3.2.0") \
17 |         .getOrCreate()
18 |     
19 |     connector = ConnectorDeltaSpark()
20 |     connector.spark = spark
21 | 
22 |     return connector
23 | 
24 | @pytest.fixture
25 | def df_sales():
26 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",")
27 | 
28 | #def test_load_data(connector, df_sales):
29 | #    configuration = {
30 | #       "path": "./tests/.env/delta/sales"
31 | #    }
32 | #
33 | #    configuration = control_and_setup(configuration, connector.configuration_definition)
34 | #
35 | #    df_test = connector.get_data(configuration, None).toPandas()
36 | #
37 | #    assert len(df_test.compare(df_sales)) == 0


--------------------------------------------------------------------------------
/.vscode/launch.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "version": "0.2.0",
 3 |     "configurations": [
 4 |         {
 5 |             "name": "Debug Native",
 6 |             "type": "python",
 7 |             "request": "launch",
 8 |             "program": "__main__.py",
 9 |             "cwd": "${workspaceFolder}/src/ploosh",
10 |             "console": "integratedTerminal",
11 |             "args": ["--cases", "../../test_cases", "--filter", "*.yaml"],
12 |             "justMyCode": true
13 |         }, 
14 |         {
15 |             "name": "Debug Native with connection file",
16 |             "type": "python",
17 |             "request": "launch",
18 |             "program": "__main__.py",
19 |             "cwd": "${workspaceFolder}/src/ploosh",
20 |             "console": "integratedTerminal",
21 |             "args": ["--cases", "../../test_cases", "--filter", "*.yaml", "--connections", "connections.yml"],
22 |             "justMyCode": true
23 |         },
24 |         {
25 |             "name": "Debug Spark",
26 |             "type": "python",
27 |             "request": "launch",
28 |             "program": "__main__.py",
29 |             "cwd": "${workspaceFolder}/src/ploosh",
30 |             "console": "integratedTerminal",
31 |             "args": ["--cases", "../../test_cases", "--filter", "*.yaml", "--spark", "True"],
32 |             "justMyCode": true
33 |         }
34 |     ]
35 | }


--------------------------------------------------------------------------------
/tests/connectors/test_sql_spark.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | from pyspark.sql import SparkSession
 4 | import pytest
 5 | from pyjeb import control_and_setup
 6 | from ploosh.engines.load_engine_spark import LoadEngineSpark
 7 | from ploosh.configuration import Configuration
 8 | from ploosh.connectors.connector_sql_spark import ConnectorSQLSpark
 9 | 
10 | @pytest.fixture
11 | def connector():
12 |     # connection with hive metastore
13 |     spark = SparkSession.builder \
14 |         .appName("ploosh") \
15 |         .master("spark://localhost:7077") \
16 |         .config("spark.executor.memory", "1g") \
17 |         .config("spark.driver.memory", "1g") \
18 |         .config("spark.sql.warehouse.dir", f"{os.getcwd()}/spark-warehouse") \
19 |         .enableHiveSupport() \
20 |         .getOrCreate()
21 |     
22 |     connector = ConnectorSQLSpark()
23 |     connector.spark = spark
24 | 
25 |     return connector
26 | 
27 | @pytest.fixture
28 | def df_sales():
29 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", date_format = "%Y-%m-%d", parse_dates=["sale_date"])
30 | 
31 | #def test_get_data(connector, df_sales):
32 | #    configuration = {
33 | #        "query": "select * from sales;"
34 | #    }
35 | #    
36 | #    connection = {}
37 | #
38 | #    df_test = connector.get_data(configuration, connection).toPandas()
39 | #
40 | #    assert len(df_test.compare(df_sales)) == 0


--------------------------------------------------------------------------------
/src/ploosh/__init__.py:
--------------------------------------------------------------------------------
 1 | """Initialization for command line"""
 2 | 
 3 | import os
 4 | import sys
 5 | 
 6 | # Add the current directory to the system path
 7 | sys.path.append(os.path.dirname(__file__))
 8 | 
 9 | from execute import execute
10 | 
11 | 
12 | def execute_cases(
13 |     cases=None,
14 |     connections=None,
15 |     spark=None,
16 |     spark_session=None,
17 |     filter=None,
18 |     path_output=None,
19 | ):
20 |     """Execute test cases with the given parameters"""
21 |     args = ["ploosh"]
22 | 
23 |     # Add cases parameter to arguments if provided
24 |     if cases is not None:
25 |         args.append("--cases")
26 |         args.append(cases)
27 | 
28 |     # Add connections parameter to arguments if provided
29 |     if connections is not None:
30 |         args.append("--connections")
31 |         args.append(connections)
32 | 
33 |     # Add spark parameter to arguments if provided
34 |     if spark is not None:
35 |         args.append("--spark")
36 |         args.append(spark)
37 | 
38 |     # Add filter parameter to arguments if provided
39 |     if filter is not None:
40 |         args.append("--filter")
41 |         args.append(filter)
42 | 
43 |     # Add output path parameter to arguments if provided
44 |     if path_output is not None:
45 |         args.append("--output")
46 |         args.append(path_output)
47 | 
48 |     # Execute the test cases with the constructed arguments
49 |     execute(args, spark_session)
50 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_json.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read JSON file"""
 3 | 
 4 | import pandas as pd
 5 | from connectors.connector import Connector
 6 | 
 7 | 
 8 | class ConnectorJSON(Connector):
 9 |     """Connector to read JSON file"""
10 | 
11 |     def __init__(self):
12 |         # Initialize the connector with its name and configuration definitions
13 |         self.name = "JSON"
14 |         self.connection_definition = []  # No specific connection parameters required
15 |         self.configuration_definition = [
16 |             {"name": "path"},  # Path to the JSON file
17 |             {"name": "encoding", "type": "string", "default": "utf-8"},  # Encoding to use when reading the JSON file.
18 |             {"name": "lines", "type": "boolean", "default": False},  # Whether to treat the file as line-delimited JSON (one JSON object per line).
19 |             {"name": "nrows", "type": "integer", "default": None}  # Number of lines to read from a line-delimited JSON file.
20 |         ]
21 | 
22 |     def get_data(self, configuration: dict, connection: dict):
23 |         """Get data from source"""
24 | 
25 |         # Read the JSON file using pandas with the specified delimiter
26 |         df = pd.read_json(configuration["path"],
27 |                          encoding = configuration["encoding"],
28 |                          lines = configuration["lines"],
29 |                          nrows = configuration["nrows"]
30 |                          )
31 |         return df
32 | 


--------------------------------------------------------------------------------
/docs/connectors/native/big_query.md:
--------------------------------------------------------------------------------
 1 | This connector allows you to connect to a bigquery instance and execute SQL queries.
 2 | 
 3 | # Connection configuration
 4 | ## Definition
 5 | | Name                     | Mandatory | Default    | Description |
 6 | |--------------------------|:---------:|:----------:|-------------|
 7 | | credentials              | yes       |            | The authentication use a [google keyfile](https://googleapis.dev/python/google-api-core/latest/auth.html) encoded in base 64
 8 | 
 9 | ⚠️ it's highly recommended to use a [parameter](/docs/configuration-custom-parameters/) to pass the credentials value
10 | 
11 | ## Example
12 | ``` yaml
13 | bigquery_example:
14 |   type: bigquery
15 |   credentials: $var.gbq_sample_token
16 | ```
17 | 
18 | # Test case configuration
19 | ## Definition
20 | | Name              | Mandatory | Default                       | Description |
21 | |-------------------|:---------:|:-----------------------------:|-------------|
22 | | connection        | yes       |                               | The connection to use 
23 | | query             | yes       |                               | The query to execute to the database
24 | 
25 | ## Example
26 | ``` yaml
27 | Example BigQuery:
28 |   source:
29 |     connection: bigquery_example
30 |     type: bigquery
31 |     query: | 
32 |         select * 
33 |             from `rh.employees`
34 |             where hire_date < "2000-01-01"
35 |   expected:
36 |     type: csv
37 |     path: data/employees_before_2000.csv
38 | ```


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_json_spark.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read json file"""
 3 | 
 4 | from connectors.connector import Connector
 5 | 
 6 | 
 7 | class ConnectorJSONSpark(Connector):
 8 |     """Connector to read json file with Spark"""
 9 | 
10 |     def __init__(self):
11 |         # Initialize the connector with its name and configuration definitions
12 |         self.name = "JSON_SPARK"
13 |         self.is_spark = True
14 |         self.connection_definition = []
15 |         self.configuration_definition = [
16 |             {"name": "path", "type": "string"},  # Path to the JSON file
17 |             {"name": "multiline", "type": "boolean", "default": True},  # Handles multi-line JSON files
18 |             {"name": "encoding", "type": "string", "default": "UTF-8"},  # Character encoding format used in the JSON file
19 |             {"name": "lineSep", "type": "string", "default": "\n"}  # Character used to denote a line break
20 |         ]
21 | 
22 |     def get_data(self, configuration: dict, connection: dict):
23 |         """Get data from source"""
24 | 
25 |         # Read the JSON file using Spark with the specified configuration options
26 |         df = self.spark.read.option("multiline", configuration["multiline"])    \
27 |                             .option("encoding", configuration["encoding"])      \
28 |                             .option("lineSep", configuration["lineSep"])        \
29 |                             .json(configuration["path"])
30 | 
31 |         return df
32 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/__init__.py:
--------------------------------------------------------------------------------
 1 | """Data connectors"""
 2 | from importlib import import_module
 3 | from logs import Log
 4 | import inspect
 5 | import os
 6 | 
 7 | 
 8 | def get_connectors(spark_session):
 9 |     """Get all existing connectors"""
10 | 
11 |     connectors = {}
12 | 
13 |     # List all Python files in the current directory that start with "connector_"
14 |     files = [
15 |         name
16 |         for name in os.listdir(os.path.dirname(__file__))
17 |         if name.endswith(".py") and name.startswith("connector_")
18 |     ]
19 | 
20 |     for file in files:
21 |         module_name = file[:-3]  # Remove the ".py" extension to get the module name
22 | 
23 |         try:
24 |             # Import the module dynamically
25 |             for name, obj in inspect.getmembers(import_module(f"connectors.{module_name}")):
26 |                 if inspect.isclass(obj) and name.startswith("Connector"):
27 |                     current_connector = obj()  # Instantiate the connector class
28 | 
29 |                     # If a Spark session is provided and the connector is Spark-based, set the Spark session
30 |                     if spark_session is not None and current_connector.is_spark:
31 |                         current_connector.spark = spark_session
32 | 
33 |                     # Add the connector to the connectors dictionary
34 |                     connectors[current_connector.name] = current_connector
35 |         except Exception as e:
36 |             Log.print_warning(f"Could not load connector {module_name}")
37 |             Log.print_warning(str(e))
38 | 
39 | 
40 |     return connectors
41 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yml:
--------------------------------------------------------------------------------
 1 | name: 'Release on PyPi'
 2 | 
 3 | on:
 4 |   push:
 5 |     branches:
 6 |       - develop
 7 |     tags:
 8 |       - '*'
 9 |     paths:
10 |       - 'src/**'
11 |       - 'tests/**'
12 |   workflow_dispatch:
13 | jobs:
14 |   publish:
15 |     name: 'Publish on PyPi'
16 |     runs-on: ubuntu-latest
17 |     defaults:
18 |       run:
19 |         shell: bash
20 |         working-directory: src/
21 |     steps:
22 |       - name: Checkout
23 |         uses: actions/checkout@v3
24 |       - name: Set up Python
25 |         uses: actions/setup-python@v5
26 |         with:
27 |           python-version: "3.12.8"
28 |       - name: Install requirements
29 |         run: | 
30 |           pip install -r requirements.txt
31 |           pip install wheel==0.44.0
32 |           pip install twine==6.0.1
33 |           pip install setuptools==75.1.0
34 |       - name: Build package (full)
35 |         run: python setup-full.py sdist bdist_wheel
36 |       - name: Build package (core)
37 |         run: python setup-core.py sdist bdist_wheel
38 |       - name: Check package
39 |         run: twine check dist/*
40 |       - name: Publish
41 |         run: | 
42 |           if [[ $GITHUB_REF == refs/tags/* ]]; then
43 |             echo "Deploying to production environment"
44 |             twine upload --repository-url https://upload.pypi.org/legacy/ dist/* -u ${{ secrets.PYPI_USER }} -p '${{ secrets.PYPI_PASSWORD }}' --verbose
45 | 
46 |           else 
47 |             echo "Deploying to test environment"
48 |             twine upload --repository-url https://test.pypi.org/legacy/ dist/* -u ${{ secrets.PYPI_TEST_USER }} -p '${{ secrets.PYPI_TEST_PASSWORD }}' --verbose
49 |           fi


--------------------------------------------------------------------------------
/src/ploosh/engines/load_engine_native.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pandas as pd
 3 | 
 4 | from engines.load_engine import LoadEngine
 5 | 
 6 | class LoadEngineNative(LoadEngine):
 7 |     """Load engine for native Pandas"""
 8 |     def __init__(self, configuration, options, connection):
 9 |         """Initialize the LoadEngineNative class"""
10 | 
11 |         self.configuration = configuration
12 |         self.options = options
13 |         self.connection = connection
14 | 
15 |     def execute(self, df_data):
16 |         """Execute the load engine"""
17 | 
18 |         self.count = len(df_data)
19 | 
20 |         # Cast columns to specified types
21 |         for column in self.options["cast"]:
22 |             column_name = self.get_insensitive_item(column["name"], df_data.columns)
23 |             column_type = column["type"]
24 |             if column_type == "datetime":
25 |                 column_type = "datetime64[ns]"
26 |             df_data[column_name] = df_data[column_name].astype(column_type, errors="ignore")
27 | 
28 |         # Remap bad columns type
29 |         for column in df_data.select_dtypes(include=["object"]).columns:
30 |             if len(df_data) == 0:
31 |                 continue
32 | 
33 |             if type(df_data[column][0]).__name__ == "Decimal":
34 |                 df_data[column] = df_data[column].astype(float, errors="ignore")
35 | 
36 |         # Remove time zones
37 |         date_columns = df_data.select_dtypes(include=["datetime64[ns, UTC]"]).columns
38 |         for date_column in date_columns:
39 |             df_data[date_column] = df_data[date_column].dt.tz_localize(None)
40 |         self.count = len(df_data)
41 | 
42 |         return df_data
43 | 


--------------------------------------------------------------------------------
/tests/connectors/test_mysql.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | from pyjeb import control_and_setup
 5 | import urllib
 6 | from ploosh.connectors.connector_mysql import ConnectorMYSQL
 7 | 
 8 | @pytest.fixture
 9 | def connector():
10 |     return ConnectorMYSQL()
11 | 
12 | @pytest.fixture
13 | def df_sales():
14 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", date_format = "%Y-%m-%d", parse_dates=["sale_date"])
15 | 
16 | def test_connection_with_password(connector, df_sales):
17 |     configuration = {
18 |         "query": "select * from sales;",
19 |         "connection": "debug"
20 |     }
21 | 
22 |     connection = {
23 |         "hostname": "localhost",
24 |         "username": "ploosh",
25 |         "password": os.environ.get("TEST_DB_PASSWORD"),
26 |         "database": "ploosh"
27 |     }
28 |     connection = control_and_setup(connection, connector.connection_definition)
29 | 
30 |     df_test = connector.get_data(configuration, connection)
31 | 
32 |     assert len(df_test.compare(df_sales)) == 0
33 | 
34 | def test_connection_with_connection_string(connector, df_sales):
35 |     configuration = {
36 |         "query": "select * from sales;",
37 |         "connection": "debug"
38 |     }
39 | 
40 |     password = urllib.parse.quote_plus(os.environ.get('TEST_DB_PASSWORD'))
41 |     connection = {
42 |         "mode": "connection_string",
43 |         "connection_string": f"mysql+pymysql://ploosh:{password}@localhost/ploosh"
44 |     }
45 | 
46 |     connection = control_and_setup(connection, connector.connection_definition)
47 | 
48 |     df_test = connector.get_data(configuration, connection)
49 | 
50 |     assert len(df_test.compare(df_sales)) == 0


--------------------------------------------------------------------------------
/tests/connectors/test_postgresql.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | from pyjeb import control_and_setup
 5 | import urllib
 6 | from ploosh.connectors.connector_postgresql import ConnectorPostgreSQL
 7 | 
 8 | @pytest.fixture
 9 | def connector():
10 |     return ConnectorPostgreSQL()
11 | 
12 | @pytest.fixture
13 | def df_sales():
14 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", date_format = "%Y-%m-%d", parse_dates=["sale_date"])
15 | 
16 | 
17 | def test_connection_with_password(connector, df_sales):
18 |     configuration = {
19 |         "query": "select * from sales;",
20 |         "connection": "debug"
21 |     }
22 | 
23 |     connection = {
24 |         "hostname": "localhost",
25 |         "username": "ploosh",
26 |         "password": os.environ.get("TEST_DB_PASSWORD"),
27 |         "database": "ploosh"
28 |     }
29 |     connection = control_and_setup(connection, connector.connection_definition)
30 | 
31 |     df_test = connector.get_data(configuration, connection)
32 | 
33 |     assert len(df_test.compare(df_sales)) == 0
34 | 
35 | def test_connection_with_connection_string(connector, df_sales):
36 |     configuration = {
37 |         "query": "select * from sales;",
38 |         "connection": "debug"
39 |     }
40 |     
41 |     password = urllib.parse.quote_plus(os.environ.get('TEST_DB_PASSWORD'))
42 |     connection = {
43 |         "mode": "connection_string",
44 |         "connection_string": f"postgresql+pg8000://ploosh:{password}@localhost/ploosh"
45 |     }
46 | 
47 |     connection = control_and_setup(connection, connector.connection_definition)
48 | 
49 |     df_test = connector.get_data(configuration, connection)
50 | 
51 |     assert len(df_test.compare(df_sales)) == 0
52 | 
53 |      
54 | 


--------------------------------------------------------------------------------
/docs/connectors/native/odbc.md:
--------------------------------------------------------------------------------
 1 | This connector allows to connect to a ODBC datasource and execute SQL queries.
 2 | 
 3 | # Connection configuration
 4 | ## Definition
 5 | | Name                     | Mandatory | Default    | Description |
 6 | |--------------------------|:---------:|:----------:|-------------|
 7 | | dsn                      | yes       |            | Data Source Name
 8 | | auto_commit              | no        | true       | Autocommit mode
 9 | | username                 | no        | null       | User name
10 | | password                 | no        | null       | User password
11 | | driver                   | no        | null       | ODBC driver name
12 | | encoding                 | no        | UTF-8      | Encoding to use for the connection
13 | 
14 | ⚠️ it's highly recommended to use a [parameter](/docs/configuration-custom-parameters/) to pass the password value
15 | 
16 | ## Example
17 | ``` yaml
18 | odbc_example:
19 |   type: odbc
20 |   dsn: my_dsn
21 |   username: pixel
22 |   password: $var.odbc_password
23 | ```
24 | 
25 | # Test case configuration
26 | ## Definition
27 | | Name              | Mandatory | Default                       | Description |
28 | |-------------------|:---------:|:-----------------------------:|-------------|
29 | | connection        | yes       |                               | The connection to use
30 | | query             | yes       |                               | The query to execute to the database
31 | 
32 | ## Example
33 | ``` yaml
34 | Example ODBC:
35 |   source:
36 |     connection: odbc_example
37 |     type: odbc
38 |     query: | 
39 |         select * 
40 |             from employees
41 |             where hire_date < '2000-01-01'
42 |     type: csv
43 |     path: data/employees_before_2000.csv
44 | ```


--------------------------------------------------------------------------------
/docs/connectors/spark/csv.md:
--------------------------------------------------------------------------------
 1 | This connector is used to read CSV files using Spark. 
 2 | 
 3 | ⚠️ A spark connector can be use only with another spark connector. It is not possible to use a spark connector with a non spark connector.
 4 | 
 5 | See [Spark documentation](/docs/configuration-spark-mode/) for more information.
 6 | 
 7 | # Connection configuration
 8 | No connection is required by this connector
 9 | 
10 | # Configuration
11 | ## Test case configuration
12 | | Name              | Mandatory | Default                       | Description |
13 | |-------------------|:---------:|:-----------------------------:|-------------|
14 | | path              | yes       |                               | Path to the CSV
15 | | delimiter         | no        |  ,                            | Column delimiter
16 | | header            | no        |  true                         | Use the first row as header
17 | | inferSchema       | no        |  False                        | Infers the input schema automatically from data
18 | | multiline         | no        |  False                        | Parse one record, which may span multiple lines, per file
19 | | quote             | no        |  '"'                          | Character used to denote the start and end of a quoted item
20 | | encoding         | no        |  "UTF-8"                       | Column delimiter
21 | | lineSep          | no        |  "\n"                          | Column delimiter
22 | 
23 | 
24 | ## Example
25 | ``` yaml
26 | Example CSV Spark:
27 |   source:
28 |     type: csv_spark
29 |     path: data/employees/*.csv
30 |     multiline: False
31 |     inferSchema: False
32 |     encoding: "UTF-8" 
33 |   expected:
34 |     type: sql_spark
35 |     query: |
36 |       select * 
37 |           from employees
38 |           where hire_date < "2000-01-01"
39 | ```


--------------------------------------------------------------------------------
/tests/connectors/test_mssql.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | import urllib
 5 | from pyjeb import control_and_setup
 6 | from ploosh.connectors.connector_mssql import ConnectorMSSQL
 7 | 
 8 | @pytest.fixture
 9 | def connector():
10 |     return ConnectorMSSQL()
11 | 
12 | @pytest.fixture
13 | def df_sales():
14 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", date_format = "%Y-%m-%d", parse_dates=["sale_date"])
15 | 
16 | def test_connection_with_password(connector, df_sales):
17 |     configuration = {
18 |         "query": "select * from sales;",
19 |         "connection": "debug"
20 |     }
21 | 
22 |     connection = {
23 |         "hostname": "localhost",
24 |         "username": "sa",
25 |         "password": os.environ.get("TEST_DB_PASSWORD"),
26 |         "database": "ploosh",
27 |         "driver": "ODBC Driver 17 for SQL Server",
28 |         "encrypt": False,
29 |     }
30 |     connection = control_and_setup(connection, connector.connection_definition)
31 | 
32 |     df_test = connector.get_data(configuration, connection)
33 | 
34 |     assert len(df_test.compare(df_sales)) == 0
35 | 
36 | 
37 | def test_connection_with_connection_string(connector, df_sales):
38 |     configuration = {
39 |         "query": "select * from sales;",
40 |         "connection": "debug"
41 |     }
42 | 
43 |     password = urllib.parse.quote_plus(os.environ.get('TEST_DB_PASSWORD'))
44 |     connection = {
45 |         "mode": "connection_string",
46 |         "connection_string": f"mssql+pyodbc://sa:{password}@localhost/ploosh?driver=ODBC+Driver+17+for+SQL+Server"
47 |     }
48 | 
49 |     connection = control_and_setup(connection, connector.connection_definition)
50 | 
51 |     df_test = connector.get_data(configuration, connection)
52 | 
53 |     assert len(df_test.compare(df_sales)) == 0


--------------------------------------------------------------------------------
/docs/connectors/native/databricks.md:
--------------------------------------------------------------------------------
 1 | This connector allows you to connect to a Databricks instance and execute SQL queries.
 2 | 
 3 | # Connection configuration
 4 | ## Definition
 5 | | Name                     | Mandatory | Default    | Description |
 6 | |--------------------------|:---------:|:----------:|-------------|
 7 | | token                    | yes       |            | a token generated from databricks. See the [documentation](https://docs.databricks.com/en/dev-tools/auth/pat.html)
 8 | | hostname                 | yes       |            | url to databricks
 9 | | database                 | yes       |            | name of the database
10 | | http_path                | yes       |            | the value is available on [JDBC/ODBC](https://docs.databricks.com/en/integrations/compute-details.html) settings 
11 | 
12 | ## Example
13 | ``` yaml
14 | databricks_example:
15 |   type: databricks
16 |   hostname: adb-myproject.8.azuredatabricks.net
17 |   database: default
18 |   token:  $var.databricks_token
19 |   http_path: /sql/1.0/warehouses/da000000000000000
20 | ```
21 | 
22 | # Test case configuration
23 | ## Definition
24 | | Name              | Mandatory | Default                       | Description |
25 | |-------------------|:---------:|:-----------------------------:|-------------|
26 | | connection        | yes       |                               | The connection to use 
27 | | query             | yes       |                               | The query to execute to the database
28 | 
29 | 
30 | ## Example
31 | ``` yaml
32 | Example Databricks:
33 |   source:
34 |     connection: databricks_example
35 |     type: databricks
36 |     query: | 
37 |         select * 
38 |             from `rh.employees`
39 |             where hire_date < "2000-01-01"
40 |   expected:
41 |     type: csv
42 |     path: data/employees_before_2000.csv
43 | ```


--------------------------------------------------------------------------------
/docs/connectors/native/snowflake.md:
--------------------------------------------------------------------------------
 1 | This connector allows to connect to a Snowflake instance and execute SQL queries.
 2 | 
 3 | # Connection configuration
 4 | ## Definition
 5 | | Name                     | Mandatory | Default    | Description |
 6 | |--------------------------|:---------:|:----------:|-------------|
 7 | | account_identifier       | yes       |            | Account identifier of snowflake instance
 8 | | username                 | yes       |            | User name
 9 | | password                 | yes       |            | User password
10 | | database                 | no        | null       | Target database name
11 | | schema                   | no        | null       | Target schema name
12 | | warehouse                | no        | null       | Target warehouse name
13 | | role                     | no        | null       | Target role name
14 | 
15 | ⚠️ it's highly recommended to use a [parameter](/docs/configuration-custom-parameters/) to pass the password value
16 | 
17 | ## Example
18 | ``` yaml
19 | snowflake_example:
20 |   type: snowflake
21 |   account_identifier: bjpwtqg-kt67582
22 |   schema: PUBLIC
23 |   warehouse: SF_TUTS_WH
24 |   database: SF_TUTS
25 |   username: pixel
26 |   password: $var.snowflake_password_db
27 | ```
28 | 
29 | # Test case configuration
30 | ## Definition
31 | | Name              | Mandatory | Default                       | Description |
32 | |-------------------|:---------:|:-----------------------------:|-------------|
33 | | connection        | yes       |                               | The connection to use 
34 | | query             | yes       |                               | The query to execute to the database
35 | 
36 | ## Example
37 | ``` yaml
38 | Example Snowflake:
39 |   source:
40 |     connection: snowflake_example
41 |     type: snowflake
42 |     query: | 
43 |         select * 
44 |             from RH.employees
45 |             where hire_date < '2000-01-01'
46 |   expected:
47 |     type: csv
48 |     path: data/employees_before_2000.csv
49 | ```


--------------------------------------------------------------------------------
/src/setup.py:
--------------------------------------------------------------------------------
 1 | """Setup PyPi module"""
 2 | # pylint: disable=C0103
 3 | 
 4 | from setuptools import setup
 5 | from ploosh.version import PLOOSH_VERSION
 6 | 
 7 | def setup_ploosh(name, install_requires):
 8 |     """Setup Ploosh module"""
 9 | 
10 |     with open("../readme.md", encoding="UTF-8") as f:
11 |         long_description = "".join(f.readlines())
12 | 
13 |     # replace relative link by absolute github link
14 |     long_description = long_description.replace("(/", "(https://github.com/CSharplie/ploosh/blob/main/")
15 | 
16 |     install_requires = install_requires + [
17 |             "colorama==0.4.6",
18 |             "PyYAML==6.0.1",
19 |             "Pyjeb==0.2.1",
20 |             "numpy==1.26.3",
21 |             "pandas==2.1.4",
22 |             "openpyxl==3.1.2",
23 |             "sqlalchemy==1.4.51",
24 |             "pyspark==3.5.4",
25 |             "deltalake==0.23.2",
26 |             "delta-spark==3.3.0",
27 |         ]
28 | 
29 |     setup (
30 |         name = name,
31 |         version = PLOOSH_VERSION,
32 |         description="A framework to automatize your tests for data projects",
33 |         long_description=long_description,
34 |         long_description_content_type="text/markdown",
35 |         url="https://github.com/CSharplie/ploosh/",
36 |         project_urls={
37 |             "Say Thanks!": "https://ploosh.io",
38 |             "Bug Tracker": "https://github.com/CSharplie/ploosh/issues",
39 |             "CI": "https://github.com/CSharplie/ploosh/actions",
40 |             "Documentation": "https://ploosh.io/docs/ploosh/",
41 |             "Source Code": "https://github.com/CSharplie/ploosh",
42 |         },
43 |         download_url="https://pypi.org/project/ploosh/",
44 |         platforms="Any",
45 |         python_requires=">=3.6",
46 |         license= "Apache License 2.0",
47 |         entry_points = {
48 |             "console_scripts": [
49 |                 "ploosh = ploosh.__main__:main"
50 |             ]
51 |         },
52 |         install_requires=install_requires,
53 |     )
54 | 


--------------------------------------------------------------------------------
/docs/exporters/csv.md:
--------------------------------------------------------------------------------
 1 | # Structure
 2 | ```
 3 | output/
 4 | ├─ csv/
 5 | │  ├─ test_results.csv
 6 | │  ├─ test_results/
 7 | │  │  ├─ test case 1.xlsx
 8 | │  │  ├─ test case 2.xlsx
 9 | │  │  └─ ...
10 | ```
11 | 
12 | The csv extractor will generate a `test_results.csv` file and a `test_results` folder containing the details of the test cases results in xlsx format.
13 | 
14 | # test_results.csv
15 | The `test_results.csv` file will contain the following columns:
16 | - `test_case`: the name of the test case
17 | - `status`: the status of the test case. Can be `success`, `failure` or `error`
18 | - `source_start`: the start time of the source extraction
19 | - `source_end`: the end time of the source extraction
20 | - `source_duration`: the duration of the source extraction
21 | - `source_count`: the count of the source dataset
22 | - `expected_start`: the start time of the expected extraction
23 | - `expected_end`: the end time of the expected extraction
24 | - `expected_duration`: the duration of the expected extraction
25 | - `expected_count`: the count of the expected dataset
26 | - `success_rate`: the success rate of the test case
27 | - `error_type`: the type of the error if the test case failed or raised an error
28 | - `error_message`: the error message if the test case failed or raised an error
29 | - 
30 | # test_results folder
31 | The `test_results` folder will contain one xlsx file per test case. Each file will contain a sheet with the gap between the source and the expected dataset
32 | 
33 | # Example
34 | ``` csv
35 | test_case,status,source_start,source_end,source_duration,source_count,expected_start,expected_end,expected_duration,expected_count,success_rate,error_type,error_message
36 | test 1,passed,2024-02-05T17:08:36Z,2024-02-05T17:08:36Z,0.0032982,100,2024-02-05T17:08:36Z,2024-02-05T17:08:36Z,6.0933333333333335e-05,100,1.0,,,
37 | test 2,failed,2024-02-05T17:08:36Z,2024-02-05T17:08:36Z,0.0032982,100,2024-02-05T17:08:36Z,2024-02-05T17:08:36Z,6.0933333333333335e-05,100,0.95,Data,Some rows are not equals between source dataset and expected dataset
38 | ```


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
 1 | # Ploosh
 2 | 
 3 | Ploosh is yaml based framework used to automatized the testing process in data projects. 
 4 | 
 5 | # Get started
 6 | Go to the [ploosh documentation](https://ploosh.io/docs/ploosh/) to find the get started tutorial.
 7 | 
 8 | ## Steps
 9 | 1. Install ploosh package
10 | 2. Run tests
11 | 3. Analyse results
12 | 
13 | ## Install Ploosh
14 | 
15 | Install from [PyPi](https://pypi.org/project/ploosh/) package manager:
16 | ``` shell
17 | pip install ploosh
18 | ```
19 | 
20 | ## Run tests
21 | ``` shell
22 | ploosh --connections "connections.yml" --cases "test_cases" --export "JSON" --p_my_sql_server_password "mypassword"
23 | ```
24 | 
25 | ![Execution result](http://ploosh.io/wp-content/uploads/2024/09/image.png)
26 | 
27 | ## Test results
28 | ``` json
29 | [
30 |   {
31 |     "name": "Test aggregated data",
32 |     "state": "passed",
33 |     "source": {
34 |       "start": "2024-02-05T17:08:36Z",
35 |       "end": "2024-02-05T17:08:36Z",
36 |       "duration": 0.0032982
37 |     },
38 |     "expected": {
39 |       "start": "2024-02-05T17:08:36Z",
40 |       "end": "2024-02-05T17:08:36Z",
41 |       "duration": 6.0933333333333335e-05
42 |     },
43 |     "compare": {
44 |       "start": "2024-02-05T17:08:36Z",
45 |       "end": "2024-02-05T17:08:36Z",
46 |       "duration": 0.00046468333333333334
47 |     }
48 |   },
49 |   {
50 |     "name": "Test unvalid data",
51 |     "state": "failed",
52 |     "source": {
53 |       "start": "2024-02-05T17:08:36Z",
54 |       "end": "2024-02-05T17:08:36Z",
55 |       "duration": 0.00178865
56 |     },
57 |     "expected": {
58 |       "start": "2024-02-05T17:08:36Z",
59 |       "end": "2024-02-05T17:08:36Z",
60 |       "duration": 1.49e-05
61 |     },
62 |     "compare": {
63 |       "start": "2024-02-05T17:08:36Z",
64 |       "end": "2024-02-05T17:08:36Z",
65 |       "duration": 1.8333333333333333e-07
66 |     },
67 |     "error": {
68 |       "type": "count",
69 |       "message": "The count in source dataset (55) is differant than the count the in expected dataset (0)"
70 |     }
71 |   }
72 | ]
73 | ```
74 | 


--------------------------------------------------------------------------------
/docs/connectors/native/csv.md:
--------------------------------------------------------------------------------
 1 | This connector is used to read CSV files from local file system.
 2 | 
 3 | # Connection configuration
 4 | No connection is required by this connector
 5 | 
 6 | # Test case configuration
 7 | ## Test case configuration
 8 | | Name              | Mandatory | Default                       | Description |
 9 | |-------------------|:---------:|:-----------------------------:|-------------|
10 | | path              | yes       |                               | Path to the CSV 
11 | | delimiter         | no        |  ,                            | Column delimiter
12 | | infer             | no        |  True                         | Infer the column names
13 | | names             | no        |  None                         | Sequence of column labels to apply
14 | | usecols           | no        |  None                         | Subset of columns to select
15 | | skiprows          | no        |  None                         | Line numbers to skip or number of lines to skip (int) at the start of the file
16 | | skipfooter        | no        |  0                            | Number of lines at bottom of file to skip (Unsupported with engine='c')
17 | | nrows             | no        |  None                         | Number of rows of file to read. Useful for reading pieces of large files
18 | | lineterminator    | no        |  None                         | Character used to denote a line break
19 | | quotechar         | no        |  '"'                          | Character used to denote the start and end of a quoted item
20 | | encoding          | no        |  "utf-8"                      | Encoding to use for UTF when reading/writing
21 | | engine            | no        |  None                         | Parser engine to use
22 | 
23 | ## Example
24 | ``` yaml
25 | Example CSV:
26 |   source:
27 |     connection: mysql_example
28 |     type: mysql
29 |     query: | 
30 |         select * 
31 |             from employees
32 |             where hire_date < "2000-01-01"
33 |   expected:
34 |     type: csv
35 |     infer: True
36 |     delimiter: ";"
37 |     encoding: "utf-8"
38 |     engine: "python"
39 |     path: data/employees_before_2000.csv
40 | ```


--------------------------------------------------------------------------------
/docs/configuration/spark.md:
--------------------------------------------------------------------------------
 1 | Ploosh can be executed over spark (in Databricks, Microsoft Fabric or local)) using spark connectors and by calling from python code.
 2 | 
 3 | # Examples
 4 | 
 5 | ### Microsoft Fabric
 6 | 
 7 | __Cell 1__ : Install Ploosh package from PyPi package manager
 8 | ``` shell
 9 | pip install ploosh
10 | ```
11 | 
12 | __Cell 2__ : Mount the lakehouse to acces the case and connection files
13 | ``` python
14 | mount_point = "/ploosh_config"
15 | workspace_name = "ploosh"
16 | lakehouse_name = "data"
17 | 
18 | if(mssparkutils.fs.mount(f"abfss://{workspace_name}@onelake.dfs.fabric.microsoft.com/{lakehouse_name}.Lakehouse/", mount_point)):
19 |     ploosh_config_path =  mssparkutils.fs.getMountPath(mountPoint = mount_point)
20 | ```
21 | 
22 | __Cell 3__ : Execute ploosh framework
23 | ``` python
24 | from ploosh import execute_cases
25 | 
26 | connections_file_path = f"{ploosh_config_path}/Files/connections.yaml"
27 | cases_folder_path = f"{ploosh_config_path}/Files/cases"
28 | 
29 | execute_cases(cases = cases_folder_path, connections = connections_file_path, spark_session = spark)
30 | ```
31 | 
32 | ## Databricks
33 | 
34 | __Cell 1__ : Install Ploosh package from PyPi package manager
35 | ``` shell
36 | %pip install ploosh
37 | ```
38 | 
39 | __Cell 2__ : Restart python to make the package available
40 | ``` python
41 | dbutils.library.restartPython()
42 | ```
43 | 
44 | __Cell 3__ : Execute ploosh framework
45 | ``` python
46 | from ploosh import execute_cases
47 | 
48 | root_folder = "/Workspace/Shared"
49 | 
50 | execute_cases(cases=f"{root_folder}/cases", path_output=f"{root_folder}/output", spark_session=spark)
51 | ```
52 | 
53 | ## Local
54 | 
55 | __Step 1__ : Install Ploosh package from PyPi package manager
56 | ``` shell
57 | pip install ploosh
58 | ```
59 | 
60 | __Step 2__ : Initialize the spark session
61 | ``` python
62 | from pyspark.sql import SparkSession
63 | 
64 | spark = SparkSession.builder.appName("Ploosh").getOrCreate()
65 | ```
66 | 
67 | __Step 3__ : Execute ploosh framework
68 | ``` python
69 | from ploosh import execute_cases
70 | 
71 | execute_cases(cases = "test_cases", connections = "connections.yml", spark_session = spark)
72 | ```
73 | 
74 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_databricks.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read Databricks database"""
 3 | 
 4 | import pandas as pd
 5 | from sqlalchemy import create_engine
 6 | from connectors.connector import Connector
 7 | 
 8 | 
 9 | class ConnectorDatabricks(Connector):
10 |     """Connector to read Databricks database"""
11 | 
12 |     def __init__(self):
13 |         # Initialize the connector with its name and connection definitions
14 |         self.name = "DATABRICKS"
15 |         self.connection_definition = [
16 |             {
17 |                 "name": "token",  # Token for authentication
18 |             },
19 |             {
20 |                 "name": "hostname",  # Hostname of the Databricks instance
21 |             },
22 |             {
23 |                 "name": "database",  # Database name
24 |             },
25 |             {
26 |                 "name": "http_path",  # HTTP path for the Databricks cluster
27 |             },
28 |             {
29 |                 "name": "port",  # Port number (default is 443)
30 |                 "default": 443,
31 |                 "type": "integer",
32 |             },
33 |         ]
34 |         self.configuration_definition = [
35 |             {"name": "query"},  # SQL query to execute
36 |             {"name": "connection"},  # Connection name
37 |         ]
38 | 
39 |     def get_data(self, configuration: dict, connection: dict):
40 |         """Get data from source"""
41 | 
42 |         # Extract connection parameters
43 |         token = connection["token"]
44 |         hostname = connection["hostname"]
45 |         database = connection["database"]
46 |         port = connection["port"]
47 |         http_path = connection["http_path"]
48 | 
49 |         # Create the connection string for Databricks
50 |         connection_string = (
51 |             f"databricks://token:{token}@{hostname}:{port}/{database}?http_path={http_path}"
52 |         )
53 | 
54 |         # Create a SQLAlchemy engine using the connection string
55 |         sql_connection = create_engine(connection_string, echo=False)
56 | 
57 |         # Execute the SQL query and read the data into a pandas DataFrame
58 |         df = pd.read_sql(configuration["query"], sql_connection)
59 | 
60 |         return df
61 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_parquet.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read Parquet file"""
 3 | 
 4 | import pandas as pd
 5 | from connectors.connector import Connector
 6 | 
 7 | 
 8 | class ConnectorParquet(Connector):
 9 |     """Connector to read Parquet file"""
10 | 
11 |     def __init__(self):
12 |         # Initialize the connector with its name and configuration definitions
13 |         self.name = "PARQUET"
14 |         self.connection_definition = []  # No specific connection parameters required
15 |         self.configuration_definition = [
16 |             {"name": "path"},  # Path to the Parquet file
17 |             {"name": "columns", "type": "list", "default": None},  # Subset of columns to load
18 |             {"name": "engine", "type": "string", "validset": ["auto", "pyarrow", "fastparquet"], "default": "auto"},  # Parquet engine to use ('auto', 'pyarrow', 'fastparquet')
19 |             {"name": "filters", "type": "list", "default": None},  # Row group filters to apply (for 'pyarrow')
20 |             {"name": "filters.column", "type": "string"},  # The name of the column to filter
21 |             {"name": "filters.operator", "type": "string", "validset": ["==", "=", ">", ">=", "<", "<=", "!="]},  # The operator to be used
22 |             {"name": "filters.value", "type": "integer"},  # The value to be used to filter the column
23 |         ]
24 | 
25 |     def get_data(self, configuration: dict, connection: dict):
26 |         """Get data from source"""
27 | 
28 |         # Extract the path and configuration parameters
29 |         path = configuration["path"]
30 |         columns = configuration["columns"]
31 |         engine = configuration["engine"]
32 |         filters = configuration["filters"]
33 |         list_filters = None
34 |         if filters is not None:
35 |             list_filters = (
36 |                 [(filter_spec["column"], filter_spec["operator"], filter_spec["value"]) for filter_spec in filters]
37 |                 if filters else None
38 |             )
39 | 
40 |         # Read the Parquet file using pandas
41 |         df = pd.read_parquet(path,
42 |                              columns=columns,
43 |                              engine=engine,
44 |                              filters=list_filters)
45 |         return df
46 | 


--------------------------------------------------------------------------------
/tests/connectors/test_json.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | from pyjeb import control_and_setup
 5 | from ploosh.connectors.connector_json import ConnectorJSON
 6 | 
 7 | @pytest.fixture
 8 | def connector():
 9 |     return ConnectorJSON()
10 | 
11 | @pytest.fixture
12 | def df_sales():
13 |     return pd.read_csv(f"{os.getcwd()}/tests/.data/sales.csv", delimiter=",")
14 | 
15 | 
16 | @pytest.fixture
17 | def df_sales_with_two_rows():
18 |     df = pd.read_csv(f"{os.getcwd()}/tests/.data/sales.csv", delimiter=",")
19 |     df_first_2_rows = df.head(2)
20 |     return df_first_2_rows
21 | 
22 | 
23 | def test_json_default(connector, df_sales):
24 |     configuration = {
25 |        "path": f"{os.getcwd()}/tests/.env/json/sales.json"
26 |     }
27 | 
28 |     configuration = control_and_setup(configuration, connector.configuration_definition)
29 | 
30 |     df_test = connector.get_data(configuration, None)
31 | 
32 |     assert len(df_test.compare(df_sales)) == 0
33 | 
34 | 
35 | def test_json_with_lines_true(connector, df_sales):
36 |     configuration = {
37 |        "path": f"{os.getcwd()}/tests/.env/json/sales_lines_true.json",
38 |        "lines": True
39 |     }
40 | 
41 |     configuration = control_and_setup(configuration, connector.configuration_definition)
42 | 
43 |     df_test = connector.get_data(configuration, None)
44 | 
45 |     assert len(df_test.compare(df_sales)) == 0
46 | 
47 | 
48 | def test_json_with_two_rows(connector, df_sales_with_two_rows):
49 |     configuration = {
50 |        "path": f"{os.getcwd()}/tests/.env/json/sales_lines_true.json",
51 |        "lines": True,
52 |        "nrows": 2
53 |     }
54 | 
55 |     configuration = control_and_setup(configuration, connector.configuration_definition)
56 | 
57 |     df_test = connector.get_data(configuration, None)
58 | 
59 |     assert len(df_test.compare(df_sales_with_two_rows)) == 0
60 | 
61 | 
62 | def test_json_with_specific_encoding(connector, df_sales):
63 |     configuration = {
64 |        "path": f"{os.getcwd()}/tests/.env/json/sales-ISO-8859-1.json"
65 |     }
66 | 
67 |     configuration = control_and_setup(configuration, connector.configuration_definition)
68 | 
69 |     df_test = connector.get_data(configuration, None)
70 | 
71 |     assert len(df_test.compare(df_sales)) == 0
72 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_bigquery.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read BigQuery database"""
 3 | 
 4 | import pandas as pd
 5 | import pandas_gbq
 6 | from sqlalchemy import create_engine
 7 | from connectors.connector import Connector
 8 | 
 9 | 
10 | class ConnectorBigQuery(Connector):
11 |     """Connector to read BigQuery database"""
12 | 
13 |     def __init__(self):
14 |         # Initialize the connector with its name and connection definitions
15 |         self.name = "BIGQUERY"
16 |         self.connection_definition = [
17 |             {
18 |                 "name": "credentials",  # Credentials for authentication
19 |                 "default": None,
20 |             },
21 |             {
22 |                 "name": "credentials_type",  # Type of credentials (service account or current user)
23 |                 "validset": ["service_account", "current_user"],
24 |                 "default": "service_account",
25 |             },
26 |             {
27 |                 "name": "project_id",  # Project ID for BigQuery
28 |                 "default": None,
29 |             },
30 |         ]
31 |         self.configuration_definition = [
32 |             {"name": "query"},  # SQL query to execute
33 |             {"name": "connection"},  # Connection name
34 |         ]
35 | 
36 |     def get_data(self, configuration: dict, connection: dict):
37 |         """Get data from source"""
38 |         # Extract credentials and credentials type from the connection
39 |         credentials = connection["credentials"]
40 |         credentials_type = connection["credentials_type"]
41 | 
42 |         # If using service account credentials, create a connection string and use SQLAlchemy
43 |         if credentials_type == "service_account":
44 |             connection_string = f"bigquery://?credentials_base64={credentials}"
45 |             sql_connection = create_engine(connection_string, echo=False)
46 |             df = pd.read_sql(configuration["query"], sql_connection)
47 |         # If using current user credentials, use pandas_gbq to read the data
48 |         elif credentials_type == "current_user":
49 |             df = pandas_gbq.read_gbq(
50 |                 configuration["query"], connection["project_id"], progress_bar_type=None
51 |             )
52 | 
53 |         return df
54 | 


--------------------------------------------------------------------------------
/docs/pipelines/azure_devops.md:
--------------------------------------------------------------------------------
 1 | Ploosh is easy to use and can be integrated with any CI/CD pipeline. 
 2 | The following steps are required to run Ploosh tests in Azure DevOps and publish the results into Azure DevOps Test Plans.
 3 | 
 4 | # Exemple of pipeline
 5 | 
 6 | 1. Install ODBC driver for SQL Server if SQL Server connector is used
 7 | 2. Install Ploosh package from PyPi
 8 | 3. Execute Ploosh
 9 |     1. Provide the connections file
10 |     2. Provide the test cases folder
11 |     3. Provide the export format (TRX for Azure DevOps Test Plans)
12 |     4. Disable the failure flag to avoid the pipeline to fail if a test fails
13 |     5. Provide the passwords as parameters from the variables group
14 | 4. Publish test results
15 | 
16 | ```yaml
17 | trigger:
18 | - main
19 | 
20 | variables:
21 | - group: demo
22 | stages:
23 |   - stage: 
24 |     displayName: Build
25 |     jobs:
26 |       - job: 
27 |         steps:
28 |           - checkout: self
29 |           - task: CmdLine@2
30 |             displayName: Install ODBC driver for SQL Server
31 |             inputs:
32 |               script: |
33 |                 curl https://packages.microsoft.com/keys/microsoft.asc | sudo tee /etc/apt/trusted.gpg.d/microsoft.asc
34 |                 curl https://packages.microsoft.com/config/ubuntu/$(lsb_release -rs)/prod.list | sudo tee /etc/apt/sources.list.d/mssql-release.list
35 |                 sudo apt-get update
36 |                 sudo ACCEPT_EULA=Y apt-get install -y msodbcsql18
37 |           - task: CmdLine@2
38 |             displayName: Install ploosh
39 |             inputs:
40 |               script: |
41 |                 pip install ploosh
42 |           - task: CmdLine@2
43 |             displayName: Execute ploosh
44 |             inputs:
45 |               script: ploosh  --connections "connections.yml" --cases "test_cases" --export "TRX" --failure False --p_mysql_password_db "$(mysql_password)" --p_mssql_password_db "$(mssql_password)" --p_postgresql_password_db "$(postgresql_password)"
46 |           - task: PublishTestResults@2
47 |             inputs:
48 |               testResultsFormat: 'VSTest'
49 |               testResultsFiles: '*.xml'
50 |               searchFolder: 'output/trx/'
51 |               mergeTestResults: true
52 |               testRunTitle: '$(Build.DefinitionName)'
53 | ```


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_csv_spark.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read CSV file"""
 3 | 
 4 | from connectors.connector import Connector
 5 | 
 6 | 
 7 | class ConnectorCSVSpark(Connector):
 8 |     """Connector to read CSV file with Spark"""
 9 | 
10 |     def __init__(self):
11 |         # Initialize the connector with its name and configuration definitions
12 |         self.name = "CSV_SPARK"
13 |         self.is_spark = True
14 |         self.connection_definition = []
15 |         self.configuration_definition = [
16 |             {"name": "path", "type": "string"},  # Path to the CSV file
17 |             {"name": "delimiter", "type": "string", "default": ","},  # Delimiter used in the CSV file
18 |             {"name": "header", "type": "boolean", "default": True},  # Whether the CSV file has a header row
19 |             {"name": "inferSchema", "type": "boolean", "default": False},  # Infers the input schema automatically from data
20 |             {"name": "multiline", "type": "boolean", "default": False},  # Parse one record, which may span multiple lines, per file
21 |             {"name": "quote", "type": "string", "default": '"'},  # Character used to denote the start and end of a quoted item
22 |             {"name": "encoding", "type": "string", "default": 'UTF-8'},  # Encoding to use for UTF when reading/writing
23 |             {"name": "lineSep", "type": "string", "default": "\n"},  # Character used to denote a line break
24 |         ]
25 | 
26 |     def get_data(self, configuration: dict, connection: dict):
27 |         """Get data from source"""
28 | 
29 |         # Read the CSV file using Spark with the specified configuration options
30 |         df = self.spark.read.option("delimiter", configuration["delimiter"])    \
31 |                             .option("header", configuration["header"])          \
32 |                             .option("inferSchema", configuration["inferSchema"])\
33 |                             .option("multiline", configuration["multiline"])    \
34 |                             .option("quote", configuration["quote"])            \
35 |                             .option("encoding", configuration["encoding"])      \
36 |                             .option("lineSep", configuration["lineSep"])        \
37 |                             .csv(configuration["path"])
38 | 
39 |         return df
40 | 


--------------------------------------------------------------------------------
/tests/connectors/test_parquet.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import pandas as pd
 3 | import pytest
 4 | from pyjeb import control_and_setup
 5 | from ploosh.connectors.connector_parquet import ConnectorParquet
 6 | 
 7 | @pytest.fixture
 8 | def connector():
 9 |     return ConnectorParquet()
10 | 
11 | 
12 | @pytest.fixture
13 | def df_sales():
14 |     return pd.read_csv(f"{os.getcwd()}/tests/.data/sales.csv", delimiter=",")
15 | 
16 | @pytest.fixture
17 | def df_sales_with_specific_columns():
18 |     df = pd.read_csv(f"{os.getcwd()}/tests/.data/sales.csv", delimiter=",")
19 |     df_selected_columns = df[["sale_id", "seller_name", "card_name", "quantity"]]
20 |     return df_selected_columns
21 | 
22 | @pytest.fixture
23 | def df_sales_with_filters():
24 |     df = pd.read_csv(f"{os.getcwd()}/tests/.data/sales.csv", delimiter=",")
25 |     df_filtered = df[(df["sale_id"] > 10) & (df['quantity'] == 1)]
26 |     df_filtered = df_filtered.reset_index(drop=True)
27 |     return df_filtered
28 | 
29 | 
30 | def test_default(connector, df_sales):
31 |     configuration = {
32 |        "path": "./tests/.env/parquet/sales.parquet",
33 |     }
34 | 
35 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
36 |     df_test = connector.get_data(configuration, {})
37 | 
38 |     assert len(df_test.compare(df_sales)) == 0
39 | 
40 | 
41 | def test_with_specific_columns(connector, df_sales_with_specific_columns):
42 |     configuration = {
43 |        "path": "./tests/.env/parquet/sales.parquet",
44 |        "columns" : ["sale_id", "seller_name", "card_name", "quantity"]
45 |     }
46 | 
47 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
48 |     df_test = connector.get_data(configuration, {})
49 | 
50 |     assert len(df_test.compare(df_sales_with_specific_columns)) == 0
51 | 
52 | 
53 | def test_with_filters(connector, df_sales_with_filters):
54 |     configuration = {
55 |        "path": "./tests/.env/parquet/sales.parquet",
56 |        "filters" : [{"column": "sale_id", "operator": ">", "value": 10},
57 |                     {"column": "quantity", "operator": "==", "value": 1}]
58 |     }
59 | 
60 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
61 |     df_test = connector.get_data(configuration, {})
62 | 
63 |     assert len(df_test.compare(df_sales_with_filters)) == 0
64 | 


--------------------------------------------------------------------------------
/debug/setup.sh:
--------------------------------------------------------------------------------
 1 | # Configuration
 2 | db_password=ThePasswordIs9293709B13?
 3 | 
 4 | # Setup dev envrionnement 
 5 | conda create -n ".ploosh" python=3.12.8 ipython
 6 | conda activate .ploosh
 7 | 
 8 | pip install -r ./src/requirements.txt
 9 | 
10 | # install connectors clients
11 | sudo apt-get update
12 | 
13 | sudo apt-get install -y postgresql-client
14 | sudo apt-get install -y mysql-client
15 | sudo ACCEPT_EULA=Y apt-get install -y mssql-tools unixodbc-dev
16 | 
17 | # install connectors servers 
18 | docker run --name ploosh-mysql \
19 |     -e MYSQL_ROOT_PASSWORD=$db_password \
20 |     -e MYSQL_PASSWORD=$db_password \
21 |     -e MYSQL_DATABASE=ploosh \
22 |     -e MYSQL_USER=ploosh \
23 |     -p 3306:3306 \
24 |     -d mysql
25 | 
26 | docker run --name ploosh-postgresql \
27 |     -e POSTGRES_USER=ploosh \
28 |     -e POSTGRES_PASSWORD=$db_password \
29 |     -e POSTGRES_DB=ploosh \
30 |     -p 5432:5432 \
31 |     -d postgres
32 | 
33 | docker run --name ploosh-mssql \
34 |     -e "ACCEPT_EULA=Y" \
35 |     -e "MSSQL_SA_PASSWORD=$db_password" \
36 |     --hostname ploosh \
37 |     -p 1433:1433 \
38 |     -d \
39 |     mcr.microsoft.com/mssql/server:2022-latest
40 | 
41 | docker run -d --name ploosh-spark-master \
42 |   -e SPARK_MODE=master \
43 |   -e SPARK_MASTER_HOST=ploosh-spark-master \
44 |   -p 7077:7077 -p 8081:8080 \
45 |   -v $(pwd)/tests/.data:$(pwd)/tests/.data \
46 |   -v $(pwd)/tests/.env:$(pwd)/tests/.env \
47 |   --hostname ploosh-spark-master \
48 |   bitnami/spark
49 |   
50 | docker run -d --name ploosh-spark-worker \
51 |   -e SPARK_MODE=worker \
52 |   -e SPARK_MASTER_URL=spark://ploosh-spark-master:7077 \
53 |   -v $(pwd)/tests/.data:$(pwd)/tests/.data \
54 |   -v $(pwd)/tests/.env:$(pwd)/tests/.env \
55 |   --link ploosh-spark-master:ploosh-spark-master \
56 |   bitnami/spark
57 | 
58 | 
59 | docker exec -it ploosh-spark-master pip install delta-spark==3.3.0
60 | docker exec -it ploosh-spark-worker pip install delta-spark==3.3.0
61 | 
62 | mysql -h 127.0.0.1 -u ploosh -p$db_password < tests/.env/mysql/setup.sql
63 | 
64 | export PGPASSWORD=$db_password;
65 | psql -h 127.0.0.1 -U ploosh -d ploosh -f tests/.env/postgresql/setup.sql
66 | 
67 | /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P $db_password -i tests/.env/mssql/setup.sql
68 | 
69 | spark_setup_file=$(pwd)/tests/.env/spark/setup.sql
70 | spark_setup_file_tmp=$(pwd)/tests/.env/spark/setup_tmp.sql
71 | sed "s|{{pwd}}|$(pwd)|g" $spark_setup_file > $spark_setup_file_tmp
72 | spark-sql -f$spark_setup_file_tmp


--------------------------------------------------------------------------------
/tests/.env/delta/sales/_delta_log/00000000000000000000.json:
--------------------------------------------------------------------------------
1 | {"protocol":{"minReaderVersion":1,"minWriterVersion":2}}
2 | {"metaData":{"id":"1f95c651-89a8-471e-b4cf-5350af0b517a","name":null,"description":null,"format":{"provider":"parquet","options":{}},"schemaString":"{\"type\":\"struct\",\"fields\":[{\"name\":\"sale_id\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"seller_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"card_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"card_rarity\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"card_condition\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"price\",\"type\":\"double\",\"nullable\":true,\"metadata\":{}},{\"name\":\"quantity\",\"type\":\"long\",\"nullable\":true,\"metadata\":{}},{\"name\":\"sale_date\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"card_set\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"buyer_name\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}},{\"name\":\"transaction_status\",\"type\":\"string\",\"nullable\":true,\"metadata\":{}}]}","partitionColumns":[],"createdTime":1736434780759,"configuration":{}}}
3 | {"add":{"path":"part-00001-4cd9f0b3-de7c-470d-83e2-ede8afcbf39a-c000.snappy.parquet","partitionValues":{},"size":6198,"modificationTime":1736434780761,"dataChange":true,"stats":"{\"numRecords\":74,\"minValues\":{\"card_rarity\":\"Common\",\"card_condition\":\"Excellent\",\"price\":5.0,\"seller_name\":\"Alex Johnson\",\"card_name\":\"Aerodactyl\",\"transaction_status\":\"Cancelled\",\"card_set\":\"Base Set\",\"buyer_name\":\"Alex Johnson\",\"sale_date\":\"2024-11-01\",\"quantity\":1,\"sale_id\":1},\"maxValues\":{\"buyer_name\":\"Sophia Wilson\",\"price\":320.0,\"sale_date\":\"2025-01-14\",\"transaction_status\":\"Pending\",\"sale_id\":74,\"card_name\":\"Zapdos\",\"card_rarity\":\"Ultra Rare\",\"card_condition\":\"Near Mint\",\"seller_name\":\"Sophia Wilson\",\"card_set\":\"Jungle\",\"quantity\":20},\"nullCount\":{\"card_name\":0,\"card_rarity\":0,\"sale_id\":0,\"sale_date\":0,\"price\":0,\"seller_name\":0,\"transaction_status\":0,\"card_set\":0,\"quantity\":0,\"buyer_name\":0,\"card_condition\":0}}","tags":null,"deletionVector":null,"baseRowId":null,"defaultRowCommitVersion":null,"clusteringProvider":null}}
4 | {"commitInfo":{"timestamp":1736434780761,"operation":"WRITE","operationParameters":{"mode":"Overwrite"},"operationMetrics":{"execution_time_ms":2,"num_added_files":1,"num_added_rows":74,"num_partitions":0,"num_removed_files":0},"clientVersion":"delta-rs.0.23.0"}}


--------------------------------------------------------------------------------
/tests/load_engine/test_native.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | import pytest
 3 | from pyjeb import control_and_setup
 4 | from ploosh.engines.load_engine_native import LoadEngineNative
 5 | from ploosh.configuration import Configuration
 6 | 
 7 | @pytest.fixture
 8 | def controls():
 9 |     controls = Configuration.case_definition
10 |     controls = [control for control in controls if control["name"].startswith("options")]
11 |     return controls
12 | 
13 | def test_count(controls):
14 |     df_data = pd.DataFrame({"A": [1, 2, 3], "B": [4, 5, 6]})
15 |     parameters = {}
16 |     options = control_and_setup(parameters, controls)["options"]
17 | 
18 |     load_engine = LoadEngineNative(None, options, None)
19 |     df_data = load_engine.execute(df_data)
20 |     assert load_engine.count == 3
21 | 
22 | def test_cast_datetime(controls):
23 |     df_data = pd.DataFrame({"A": [1, 2, 3], "B": ["2021-01-01", "2021-01-02", "2021-01-03"]})
24 |     parameters = {
25 |         "options": {
26 |             "cast": [
27 |                 {
28 |                     "name": "B",
29 |                     "type": "datetime"
30 |                 }
31 |             ]
32 |         }
33 |     }
34 |     options = control_and_setup(parameters, controls)["options"]
35 | 
36 |     load_engine = LoadEngineNative(None, options, None)
37 |     df_data = load_engine.execute(df_data)
38 |     assert df_data["B"].dtype == "datetime64[ns]"
39 |     assert df_data["A"].dtype == "int64"
40 | 
41 | def test_cast_int(controls):
42 |     df_data = pd.DataFrame({"A": [1, 2, 3], "B": ["4", "5", "6"]})
43 |     parameters = {
44 |         "options": {
45 |             "cast": [
46 |                 {
47 |                     "name": "B",
48 |                     "type": "int"
49 |                 }
50 |             ]
51 |         }
52 |     }
53 |     options = control_and_setup(parameters, controls)["options"]
54 | 
55 |     load_engine = LoadEngineNative(None, options, None)
56 |     df_data = load_engine.execute(df_data)
57 |     assert df_data["B"].dtype == "int64"
58 |     assert df_data["A"].dtype == "int64"
59 | 
60 | def test_cast_float(controls):
61 |     df_data = pd.DataFrame({"A": [1, 2, 3], "B": ["4.0", "5.0", "6.0"]})
62 |     parameters = {
63 |         "options": {
64 |             "cast": [
65 |                 {
66 |                     "name": "B",
67 |                     "type": "float"
68 |                 }
69 |             ]
70 |         }
71 |     }
72 |     options = control_and_setup(parameters, controls)["options"]
73 | 
74 |     load_engine = LoadEngineNative(None, options, None)
75 |     df_data = load_engine.execute(df_data)
76 |     assert df_data["B"].dtype == "float64"
77 |     assert df_data["A"].dtype == "int64"


--------------------------------------------------------------------------------
/src/ploosh/parameters.py:
--------------------------------------------------------------------------------
 1 | """Module for parsing input parameters"""
 2 | 
 3 | 
 4 | class Parameters:
 5 |     """Parse input parameters"""
 6 |     # Initialize class variables
 7 |     args = {}
 8 |     path_connection = None
 9 |     path_cases = None
10 |     path_cases_filter = None
11 |     path_output = None
12 |     export = None
13 |     failure_on_error = None
14 |     variables = {}
15 | 
16 |     def __init__(self, argv: list):
17 |         """Initialize Parameters with command-line arguments"""
18 |         # Set arguments and variables from the command-line input
19 |         self.set_args(argv[1:])
20 |         self.set_variables()
21 | 
22 |         # Set paths and other parameters from the arguments
23 |         self.path_connection = self.get_value("connections", None)
24 |         self.path_cases = self.get_value("cases", "./cases")
25 |         self.path_cases_filter = self.get_value("filter", "*.yml")
26 |         self.path_output = self.get_value("output", "./output")
27 |         self.export = self.get_value("export", "JSON").upper()
28 |         self.failure_on_error = self.get_value("failure", True)
29 |         self.spark_mode = self.get_value("spark", False)
30 | 
31 |     def set_args(self, args):
32 |         """Set dictionary of args with cleaned name"""
33 |         for i, name in enumerate(args):
34 |             if not name.startswith("-"):
35 |                 continue
36 | 
37 |             # Determine the value associated with the argument
38 |             value = False
39 |             if i != len(args) - 1:
40 |                 value = args[i + 1]
41 |                 if value.startswith("-"):
42 |                     value = True
43 |                 else:
44 |                     value = value.replace("'", "").replace("\"", "")
45 | 
46 |             # Clean the argument name and store it in the dictionary
47 |             name = name.replace("-", "")
48 |             self.args[name] = value
49 | 
50 |     def get_value(self, long_name: str, default):
51 |         """Get value or default value from args"""
52 |         if long_name in self.args:
53 |             value = self.args[long_name]
54 |             if str(value).upper() == "TRUE":
55 |                 return True
56 |             if str(value).upper() == "FALSE":
57 |                 return False
58 |             return value
59 | 
60 |         return default
61 | 
62 |     def set_variables(self):
63 |         """Set variable list from args"""
64 |         for name, value in self.args.items():
65 |             if not name.startswith("p_"):
66 |                 continue
67 | 
68 |             # Clean the variable name and store it in the dictionary
69 |             name = name.replace("p_", "")
70 |             self.variables[name] = value
71 | 


--------------------------------------------------------------------------------
/docs/connectors/native/postgresql.md:
--------------------------------------------------------------------------------
 1 | This connector allows to connect to a PostgreSQL database and execute SQL queries.
 2 | 
 3 | # Connection configuration
 4 | ## Password mode
 5 | ### Definition
 6 | | Name          | Mandatory | Default    | Description |
 7 | |---------------|:---------:|:----------:|-------------|
 8 | | mode          | no        |  password  | Change the connection mode. Can be "password" or "connection_string". "connection_string" mode allow to use a custom connection string.
 9 | | hostname      | yes       |            | Target host name
10 | | database      | yes       |            | Target database name
11 | | username      | yes       |            | User name
12 | | password      | yes       |            | User password
13 | | port          | no        | 3306       | Port to use by the connection
14 | | ssl_context   | No        | False      | Set True if the server require a secure transport
15 | 
16 | ⚠️ it's highly recommended to use a [parameter](/docs/configuration-custom-parameters/) to pass the password value
17 | 
18 | ### Example
19 | ``` yaml
20 | postgresql_example:
21 |   type: postgresql
22 |   hostname: ploosh.postgresql.database.azure.com
23 |   database: SampleDB
24 |   username: sa_ploosh
25 |   password: $var.sa_ploosh_password 
26 |   ssl_context: true
27 | ```
28 | 
29 | ### Definition
30 | ## Connection string mode
31 | | Name              | Mandatory | Default                       | Description |
32 | |-------------------|:---------:|:-----------------------------:|-------------|
33 | | mode              | no        |  password                     | Use "connection_string" value to use custom connection_string
34 | | connection_string | yes       |                               | Connection string use to access in the database. Refer to [SQLAlchemy documentation](https://docs.sqlalchemy.org/en/20/dialects/postgresql.html) to get the accepted format
35 | 
36 | ### Example
37 | ``` yaml
38 | postgresql_example:
39 |   type: postgresql
40 |   mode: connection_string
41 |   connection_string: "postgresql+pg8000://sa_ploosh:$var.sa_ploosh_password@ploosh.postgresql.database.azure.com/SampleDB"
42 | ```
43 | 
44 | # Test case configuration
45 | ## Definition
46 | | Name              | Mandatory | Default                       | Description |
47 | |-------------------|:---------:|:-----------------------------:|-------------|
48 | | connection        | yes       |                               | The connection to use 
49 | | query             | yes       |                               | The query to execute to the database
50 | 
51 | ## Example
52 | ``` yaml
53 | Example PostgreSQL:
54 |   source:
55 |     connection: postgresql_example
56 |     type: postgresql
57 |     query: | 
58 |         select * 
59 |             from employees
60 |             where hire_date < "2000-01-01"
61 |   expected:
62 |     type: csv
63 |     path: data/employees_before_2000.csv
64 | ```


--------------------------------------------------------------------------------
/docs/connectors/native/mysql.md:
--------------------------------------------------------------------------------
 1 | This connector allows to connect to a MySQL database and execute SQL queries.
 2 | 
 3 | # Connection configuration
 4 | ## Password mode
 5 | ### Definition
 6 | | Name                     | Mandatory | Default    | Description |
 7 | |--------------------------|:---------:|:----------:|-------------|
 8 | | mode                     | no        |  password  | Change the connection mode. Can be "password" or "connection_string". "connection_string" mode allow to use a custom connection string.
 9 | | hostname                 | yes       |            | Target host name
10 | | database                 | yes       |            | Target database name
11 | | username                 | yes       |            | User name
12 | | password                 | yes       |            | User password
13 | | port                     | no        | 3306       | Port to use by the connection
14 | | require_secure_transport | No        | False      | Set True if the server require a secure transport
15 | 
16 | ⚠️ it's highly recommended to use a [parameter](/docs/configuration-custom-parameters/) to pass the password value
17 | 
18 | ### Example
19 | ``` yaml
20 | mysql_example:
21 |   type: mysql
22 |   hostname: ploosh.mysql.database.azure.com
23 |   database: SampleDB
24 |   username: sa_ploosh
25 |   password: $var.sa_ploosh_password 
26 |   require_secure_transport: true
27 | ```
28 | 
29 | ### Definition
30 | ## Connection string mode
31 | | Name              | Mandatory | Default                       | Description |
32 | |-------------------|:---------:|:-----------------------------:|-------------|
33 | | mode              | no        |  password                     | Use "connection_string" value to use custom connection_string
34 | | connection_string | yes       |                               | Connection string use to access in the database. Refer to [SQLAlchemy documentation](https://docs.sqlalchemy.org/en/20/dialects/mysql.html) to get the accepted format
35 | 
36 | ### Example
37 | ``` yaml
38 | mysql_example:
39 |   type: mysql
40 |   mode: connection_string
41 |   connection_string: "mysql+mysqldb://sa_ploosh:$var.sa_ploosh_password@ploosh.mysql.database.azure.com/SampleDB"
42 | ```
43 | 
44 | # Test case configuration
45 | ## Definition
46 | | Name              | Mandatory | Default                       | Description |
47 | |-------------------|:---------:|:-----------------------------:|-------------|
48 | | connection        | yes       |                               | The connection to use 
49 | | query             | yes       |                               | The query to execute to the database
50 | ## Example
51 | 
52 | ``` yaml
53 | Example MySQL:
54 |   source:
55 |     connection: mysql_example
56 |     type: mysql
57 |     query: | 
58 |       select * 
59 |           from employees
60 |           where hire_date < "2000-01-01"
61 |   expected:
62 |     type: csv
63 |     path: data/employees_before_2000.csv
64 | ```


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_snowflake.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read Snowflake database"""
 3 | 
 4 | import pandas as pd
 5 | from sqlalchemy import create_engine
 6 | from connectors.connector import Connector
 7 | 
 8 | 
 9 | class ConnectorSnowflake(Connector):
10 |     """Connector to read Snowflake database"""
11 | 
12 |     def __init__(self):
13 |         # Initialize the connector with its name and connection definitions
14 |         self.name = "SNOWFLAKE"
15 |         self.connection_definition = [
16 |             {
17 |                 "name": "account_identifier",  # Snowflake account identifier
18 |             },
19 |             {
20 |                 "name": "username",  # Username for authentication
21 |             },
22 |             {
23 |                 "name": "password",  # Password for authentication
24 |             },
25 |             {
26 |                 "name": "database",  # Database name (optional)
27 |                 "default": None,
28 |             },
29 |             {
30 |                 "name": "schema",  # Schema name (optional)
31 |                 "default": None,
32 |             },
33 |             {
34 |                 "name": "warehouse",  # Warehouse name (optional)
35 |                 "default": None,
36 |             },
37 |             {
38 |                 "name": "role",  # Role name (optional)
39 |                 "default": None,
40 |             },
41 |         ]
42 |         self.configuration_definition = [{"name": "query"}, {"name": "connection"}]
43 | 
44 |     def get_data(self, configuration: dict, connection: dict):
45 |         """Get data from source"""
46 | 
47 |         # Extract connection parameters
48 |         account_identifier = connection["account_identifier"]
49 |         username = connection["username"]
50 |         password = connection["password"]
51 | 
52 |         # Create the base connection string for Snowflake
53 |         connection_string = f"snowflake://{username}:{password}@{account_identifier}/"
54 | 
55 |         # Append database and schema to the connection string if provided
56 |         if connection["database"] is not None:
57 |             connection_string += f"{connection['database']}/"
58 |         if connection["schema"] is not None:
59 |             connection_string += f"{connection['schema']}"
60 | 
61 |         # Add query parameters to the connection string
62 |         connection_string += "?1=1"
63 |         if connection["warehouse"] is not None:
64 |             connection_string += f"&warehouse={connection['warehouse']}"
65 |         if connection["role"] is not None:
66 |             connection_string += f"&role={connection['role']}"
67 | 
68 |         # Create a SQLAlchemy engine using the connection string
69 |         sql_connection = create_engine(connection_string, echo=False)
70 | 
71 |         # Execute the SQL query and read the data into a pandas DataFrame
72 |         df = pd.read_sql(configuration["query"], sql_connection)
73 | 
74 |         return df
75 | 


--------------------------------------------------------------------------------
/docs/exporters/json.md:
--------------------------------------------------------------------------------
 1 | # Structure
 2 | ```
 3 | output/
 4 | ├─ json/
 5 | │  ├─ test_results.json
 6 | │  ├─ test_results/
 7 | │  │  ├─ test case 1.xlsx
 8 | │  │  ├─ test case 2.xlsx
 9 | │  │  └─ ...
10 | ```
11 | 
12 | The json extractor will generate a `test_results.json` file and a `test_results` folder containing the details of the test cases results in xlsx format.
13 | 
14 | # test_results.json
15 | The `test_results.json` file will contain the following properties:
16 | - `test_case`: the name of the test case
17 | - `status`: the status of the test case. Can be `success`, `failure` or `error`
18 | - `error.type`: the type of the error if the test case failed or raised an error
19 | - `error.message`: the error message if the test case failed or raised an error
20 | - `source.start`: the start time of the source extraction
21 | - `source.end`: the end time of the source extraction
22 | - `source.duration`: the duration of the source extraction
23 | - `source.count`: the count of the source dataset
24 | - `expected.start`: the start time of the expected extraction
25 | - `expected.end`: the end time of the expected extraction
26 | - `expected.duration`: the duration of the expected extraction
27 | - `expected.count`: the count of the expected dataset
28 | - `compare.start`: the start time of the comparison
29 | - `compare.end`: the end time of the comparison
30 | - `compare.duration`: the duration of the comparison
31 | - `compare.success_rate`: the success rate of the test case
32 | 
33 | # test_results folder
34 | The `test_results` folder will contain one xlsx file per test case. Each file will contain a sheet with the gap between the source and the expected dataset
35 | 
36 | # Example
37 | ``` json
38 | {
39 |   "test_case": "test 1",
40 |   "status": "passed",
41 |   "source": {
42 |     "start": "2024-02-05T17:08:36Z",
43 |     "end": "2024-02-05T17:08:36Z",
44 |     "duration": 0.0032982,
45 |     "count": 100
46 |   },
47 |   "expected": {
48 |     "start": "2024-02-05T17:08:36Z",
49 |     "end": "2024-02-05T17:08:36Z",
50 |     "duration": 6.0933333333333335e-05,
51 |     "count": 100
52 |   },
53 |   "compare": {
54 |     "start": "2024-02-05T17:08:36Z",
55 |     "end": "2024-02-05T17:08:36Z",
56 |     "duration": 0.0032982,
57 |     "success_rate": 1.0
58 |   }
59 | },
60 | {
61 |   "test_case": "test 2",
62 |   "status": "failed",
63 |   "source": {
64 |     "start": "2024-02-05T17:08:36Z",
65 |     "end": "2024-02-05T17:08:36Z",
66 |     "duration": 0.0032982,
67 |     "count": 100
68 |   },
69 |   "expected": {
70 |     "start": "2024-02-05T17:08:36Z",
71 |     "end": "2024-02-05T17:08:36Z",
72 |     "duration": 6.0933333333333335e-05,
73 |     "count": 100
74 |   },
75 |   "compare": {
76 |     "start": "2024-02-05T17:08:36Z",
77 |     "end": "2024-02-05T17:08:36Z",
78 |     "duration": 0.0032982,
79 |     "success_rate": 0.95
80 |   },
81 |   "error": {
82 |     "type": "Data",
83 |     "message": "Some rows are not equals between source dataset and expected dataset"
84 |   }
85 | }
86 | ```


--------------------------------------------------------------------------------
/docs/configuration/options.md:
--------------------------------------------------------------------------------
  1 | Test case allow to define options for the test case execution. The options are defined in the `options` section of the test case configuration.
  2 | 
  3 | # Ignore
  4 | The `ignore` option allow to ignore specifics columns in the comparison. The `ignore` option is a list of columns to ignore in the comparison. The columns are defined by their name.
  5 | 
  6 | ## Example
  7 | ``` yaml
  8 | Example:
  9 |   options:
 10 |     ignore:
 11 |       - column_to_ignore_1
 12 |       - column_to_ignore_2
 13 |   source:
 14 |     connection: my_connection
 15 |     query: select * from my_table
 16 |   expected:
 17 |     connection: my_connection
 18 |     query: select * from my_table
 19 | ```
 20 | 
 21 | # Sort
 22 | The `sort` option allow to sort the dataset before the comparison. The `sort` option is a list of columns to sort the dataset. The columns are defined by their name.
 23 | 
 24 | ## Example
 25 | ``` yaml
 26 | Example:
 27 |   options:
 28 |     sort:
 29 |       - column_to_sort_1
 30 |       - column_to_sort_2
 31 |   source:
 32 |     connection: my_connection
 33 |     query: select * from my_table
 34 |   expected:
 35 |     connection: my_connection
 36 |     query: select * from my_table
 37 | ```
 38 | 
 39 | ⚠️ The best practice is to sort the dataset in the source and the expected query to ensure the comparison is done on the same order and provide a better performance.
 40 | 
 41 | # Cast
 42 | The `cast` option allow to cast the column type before the comparison. The `cast` option is a list of name and type to cast the column. The column name is defined by their name and the type.
 43 | 
 44 | The allowed types are:
 45 | - `int`
 46 | - `float`
 47 | - `string`
 48 | - `datetime`
 49 | 
 50 | ## Example
 51 | ``` yaml
 52 | Example:
 53 |   options:
 54 |     cast:
 55 |       - name: column_to_cast_1
 56 |         type: int
 57 |     - name: column_to_cast_2
 58 |         type: float
 59 |     source:
 60 |         connection: my_connection
 61 |         query: select * from my_table
 62 |     expected:
 63 |         connection: my_connection
 64 |         query: select * from my_table
 65 | ```
 66 | 
 67 | # Pass rate
 68 | The `pass_rate` option allow to define the pass rate of the test case. The pass rate is a float between 0 and 1. The pass rate is the percentage of the rows that need to be the same to pass the test case.
 69 | 
 70 | ## Example
 71 | ``` yaml
 72 | Example:
 73 |   options:
 74 |     pass_rate: 0.95
 75 |   source:
 76 |     connection: my_connection
 77 |     query: select * from my_table
 78 |   expected:
 79 |     connection: my_connection
 80 |     query: select * from my_table
 81 | ```
 82 | 
 83 | # Trim
 84 | The `trim` option allow to trim the string columns before the comparison. The `trim` option is a list of columns to trim. The columns are defined by their name.
 85 | 
 86 | ## Example
 87 | ``` yaml
 88 | Example:
 89 |   options:
 90 |     trim:
 91 |       - column_to_trim_1
 92 |       - column_to_trim_2
 93 |   source:
 94 |     connection: my_connection
 95 |     query: select * from my_table
 96 |   expected:
 97 |     connection: my_connection
 98 |     query: select * from my_table
 99 | ```
100 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_postgresql.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read PostgreSQL database"""
 3 | 
 4 | import pandas as pd
 5 | from sqlalchemy import create_engine
 6 | import urllib
 7 | from connectors.connector import Connector
 8 | 
 9 | 
10 | class ConnectorPostgreSQL(Connector):
11 |     """Connector to read PostgreSQL database"""
12 | 
13 |     def __init__(self):
14 |         # Initialize the connector with its name and connection definitions
15 |         self.name = "POSTGRESQL"
16 |         self.connection_definition = [
17 |             {
18 |                 "name": "mode",
19 |                 "default": "password",
20 |                 "validset": ["password", "connection_string"],
21 |             },
22 |             {
23 |                 "name": "hostname",
24 |                 "default": None,
25 |             },
26 |             {
27 |                 "name": "database",
28 |                 "default": None,
29 |             },
30 |             {
31 |                 "name": "username",
32 |                 "default": None,
33 |             },
34 |             {
35 |                 "name": "password",
36 |                 "default": None,
37 |             },
38 |             {
39 |                 "name": "port",
40 |                 "default": 5432,
41 |                 "type": "integer",
42 |             },
43 |             {
44 |                 "name": "ssl_context",
45 |                 "default": False,
46 |                 "type": "boolean",
47 |             },
48 |             {
49 |                 "name": "connection_string",
50 |                 "default": None,
51 |             },
52 |         ]
53 |         self.configuration_definition = [{"name": "query"}, {"name": "connection"}]
54 | 
55 |     def get_data(self, configuration: dict, connection: dict):
56 |         """Get data from source"""
57 | 
58 |         # Use the provided connection string if mode is "connection_string"
59 |         connection_string = connection["connection_string"]
60 |         if connection["mode"] == "password":
61 |             # Extract connection parameters
62 |             port = connection["port"]
63 |             hostname = connection["hostname"]
64 |             username = connection["username"]
65 |             password = connection["password"]
66 |             database = connection["database"]
67 |             # Create the connection string for PostgreSQL
68 |             password = urllib.parse.quote_plus(password)
69 |             connection_string = (
70 |                 f"postgresql+pg8000://{username}:{password}@{hostname}:{port}/{database}"
71 |             )
72 | 
73 |         # Additional connection arguments
74 |         connect_args = {}
75 |         if connection["ssl_context"]:
76 |             connect_args = {"ssl_context": True}
77 | 
78 |         # Create a SQLAlchemy engine using the connection string and additional arguments
79 |         sql_connection = create_engine(
80 |             connection_string, echo=False, connect_args=connect_args
81 |         )
82 | 
83 |         # Execute the SQL query and read the data into a pandas DataFrame
84 |         df = pd.read_sql(configuration["query"], sql_connection)
85 | 
86 |         return df
87 | 


--------------------------------------------------------------------------------
/src/ploosh/exporters/exporter_csv.py:
--------------------------------------------------------------------------------
 1 | """Export test case result to CSV format"""
 2 | import csv
 3 | import os
 4 | from exporters.exporter import Exporter
 5 | 
 6 | class ExporterCSV(Exporter):
 7 |     """Export test case result to CSV format"""
 8 | 
 9 |     def __init__(self):
10 |         # Set the name of the exporter
11 |         self.name = "CSV"
12 | 
13 |     def export(self, cases: dict):
14 |         """Export test case results to a CSV file"""
15 | 
16 |         # Define the output file path
17 |         output_file = f"{self.output_path}/csv/test_results.csv"
18 | 
19 |         # Initialize the data list with headers
20 |         data = [[
21 |             "name",
22 |             "state",
23 |             "source_start",
24 |             "source_end",
25 |             "source_duration",
26 |             "source_count",
27 |             "expected_start",
28 |             "expected_end",
29 |             "expected_duration",
30 |             "expected_count",
31 |             "compare_start",
32 |             "compare_end",
33 |             "compare_duration",
34 |             "success_rate",
35 |             "error_type",
36 |             "error_message",
37 |         ]]
38 | 
39 |         # Iterate over each test case and collect data
40 |         for name in cases:
41 |             case = cases[name]
42 | 
43 |             # Collect data for the current test case
44 |             case_data = [
45 |                 name,
46 |                 case.state,
47 |                 Exporter.date_to_string(case.source.duration.start),
48 |                 Exporter.date_to_string(case.source.duration.end),
49 |                 case.source.duration.duration,
50 |                 case.source.count,
51 |                 Exporter.date_to_string(case.expected.duration.start),
52 |                 Exporter.date_to_string(case.expected.duration.end),
53 |                 case.expected.duration.duration,
54 |                 case.expected.count,
55 |                 Exporter.date_to_string(case.compare_duration.start),
56 |                 Exporter.date_to_string(case.compare_duration.end),
57 |                 case.compare_duration.duration,
58 |                 case.success_rate,
59 |                 case.error_type,
60 |                 case.error_message,
61 |             ]
62 | 
63 |             # Append the collected data to the data list
64 |             data.append(case_data)
65 | 
66 |             # If there is a comparison gap, export it to an Excel file
67 |             if case.df_compare_gap is not None:
68 |                 detail_file_path = f"{self.output_path}/json/test_results/{name}.xlsx"
69 | 
70 |                 # Create directories if they do not exist
71 |                 os.makedirs(os.path.dirname(detail_file_path), exist_ok=True)
72 |                 case.df_compare_gap.to_excel(detail_file_path)
73 | 
74 |         # Create directories if they do not exist
75 |         os.makedirs(os.path.dirname(output_file), exist_ok=True)
76 | 
77 |         # Write the collected data to the CSV file
78 |         with open(output_file, "w", encoding="UTF-8") as f:
79 |             writer = csv.writer(f, lineterminator="\n")
80 |             writer.writerows(data)
81 |             f.close()
82 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_mysql.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read MYSQL database"""
 3 | 
 4 | import pandas as pd
 5 | from sqlalchemy import create_engine
 6 | import urllib
 7 | from connectors.connector import Connector
 8 | 
 9 | 
10 | class ConnectorMYSQL(Connector):
11 |     """Connector to read MYSQL database"""
12 | 
13 |     def __init__(self):
14 |         # Initialize the connector with its name and connection definitions
15 |         self.name = "MYSQL"
16 |         self.connection_definition = [
17 |             {
18 |                 "name": "mode",
19 |                 "default": "password",
20 |                 "validset": ["password", "connection_string"],
21 |             },
22 |             {
23 |                 "name": "hostname",
24 |                 "default": None,
25 |             },
26 |             {
27 |                 "name": "database",
28 |                 "default": None,
29 |             },
30 |             {
31 |                 "name": "username",
32 |                 "default": None,
33 |             },
34 |             {
35 |                 "name": "password",
36 |                 "default": None,
37 |             },
38 |             {
39 |                 "name": "port",
40 |                 "default": 3306,
41 |                 "type": "integer",
42 |             },
43 |             {
44 |                 "name": "require_secure_transport",
45 |                 "default": False,
46 |                 "type": "boolean",
47 |             },
48 |             {
49 |                 "name": "connection_string",
50 |                 "default": None,
51 |             },
52 |         ]
53 |         self.configuration_definition = [{"name": "query"}, {"name": "connection"}]
54 | 
55 |     def get_data(self, configuration: dict, connection: dict):
56 |         """Get data from source"""
57 | 
58 |         # Use the provided connection string if mode is "connection_string"
59 |         connection_string = connection["connection_string"]
60 |         if connection["mode"] == "password":
61 |             # Extract connection parameters
62 |             port = connection["port"]
63 |             hostname = connection["hostname"]
64 |             username = connection["username"]
65 |             password = connection["password"]
66 |             database = connection["database"]
67 |             # Create the connection string for MySQL
68 | 
69 |             password = urllib.parse.quote_plus(password)
70 |             connection_string = (
71 |                 f"mysql+pymysql://{username}:{password}@{hostname}:{port}/{database}"
72 |             )
73 | 
74 |         # Additional connection arguments
75 |         connect_args = {}
76 |         if connection["require_secure_transport"]:
77 |             connect_args = {"ssl": {"require_secure_transport": True}}
78 | 
79 |         # Create a SQLAlchemy engine using the connection string and additional arguments
80 |         sql_connection = create_engine(
81 |             connection_string, echo = False, connect_args = connect_args
82 |         )
83 | 
84 |         # Execute the SQL query and read the data into a pandas DataFrame
85 |         df = pd.read_sql(configuration["query"], sql_connection)
86 | 
87 |         return df
88 | 


--------------------------------------------------------------------------------
/src/ploosh/exporters/exporter_json.py:
--------------------------------------------------------------------------------
 1 | """Export test case result to JSON format"""
 2 | 
 3 | import json
 4 | import os
 5 | from exporters.exporter import Exporter
 6 | 
 7 | 
 8 | class ExporterJSON(Exporter):
 9 |     """Export test case result to JSON format"""
10 | 
11 |     def __init__(self):
12 |         # Set the name of the exporter
13 |         self.name = "JSON"
14 | 
15 |     def export(self, cases: dict):
16 |         """Export test case results to a JSON file"""
17 | 
18 |         # Define the output file path
19 |         output_file = f"{self.output_path}/json/test_results.json"
20 | 
21 |         data = []
22 |         # Iterate over each test case and collect data
23 |         for name in cases:
24 |             case = cases[name]
25 | 
26 |             # Collect basic data for the current test case
27 |             case_data = {
28 |                 "name": name,
29 |                 "state": case.state,
30 |             }
31 | 
32 |             # Collect source data if available
33 |             if case.source.duration.start is not None:
34 |                 case_data["source"] = {
35 |                     "start": Exporter.date_to_string(case.source.duration.start),
36 |                     "end": Exporter.date_to_string(case.source.duration.end),
37 |                     "duration": case.source.duration.duration,
38 |                     "count": case.source.count,
39 |                 }
40 | 
41 |             # Collect expected data if available
42 |             if case.expected.duration.start is not None:
43 |                 case_data["expected"] = {
44 |                     "start": Exporter.date_to_string(case.expected.duration.start),
45 |                     "end": Exporter.date_to_string(case.expected.duration.end),
46 |                     "duration": case.expected.duration.duration,
47 |                     "count": case.expected.count,
48 |                 }
49 | 
50 |             # Collect comparison data if available
51 |             if case.compare_duration.start is not None:
52 |                 case_data["compare"] = {
53 |                     "start": Exporter.date_to_string(case.compare_duration.start),
54 |                     "end": Exporter.date_to_string(case.compare_duration.end),
55 |                     "duration": case.compare_duration.duration,
56 |                     "success_rate": case.success_rate,
57 |                 }
58 | 
59 |             # Collect error data if the test case failed or encountered an error
60 |             if case.state in ["error", "failed"]:
61 |                 case_data["error"] = {
62 |                     "type": case.error_type,
63 |                     "message": case.error_message,
64 |                 }
65 | 
66 |             # Append the collected data to the data list
67 |             data.append(case_data)
68 | 
69 |             # If there is a comparison gap, export it to an Excel file
70 |             if case.df_compare_gap is not None:
71 |                 detail_file_path = f"{self.output_path}/json/test_results/{name}.xlsx"
72 | 
73 |                 # Create directories if they do not exist
74 |                 os.makedirs(os.path.dirname(detail_file_path), exist_ok=True)
75 |                 case.df_compare_gap.to_excel(detail_file_path)
76 | 
77 |         # Create directories if they do not exist
78 |         os.makedirs(os.path.dirname(output_file), exist_ok=True)
79 | 
80 |         # Write the collected data to the JSON file
81 |         with open(output_file, "w", encoding="UTF-8") as f:
82 |             f.write(json.dumps(data, indent=2))
83 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_mssql.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read MSSQL database"""
 3 | 
 4 | import pandas as pd
 5 | from sqlalchemy import create_engine
 6 | import urllib
 7 | from connectors.connector import Connector
 8 | 
 9 | 
10 | class ConnectorMSSQL(Connector):
11 |     """Connector to read MSSQL database"""
12 | 
13 |     def __init__(self):
14 |         # Initialize the connector with its name and connection definitions
15 |         self.name = "MSSQL"
16 |         self.connection_definition = [
17 |             {
18 |                 "name": "mode",
19 |                 "default": "password",
20 |                 "validset": ["password", "connection_string"],
21 |             },
22 |             {
23 |                 "name": "hostname",
24 |                 "default": None,
25 |             },
26 |             {
27 |                 "name": "database",
28 |                 "default": None,
29 |             },
30 |             {
31 |                 "name": "username",
32 |                 "default": None,
33 |             },
34 |             {
35 |                 "name": "password",
36 |                 "default": None,
37 |             },
38 |             {
39 |                 "name": "port",
40 |                 "default": 1433,
41 |                 "type": "integer",
42 |             },
43 |             {
44 |                 "name": "encrypt",
45 |                 "default": True,
46 |                 "type": "boolean",
47 |             },
48 |             {
49 |                 "name": "trust_server_certificate",
50 |                 "default": False,
51 |                 "type": "boolean",
52 |             },
53 |             {
54 |                 "name": "driver",
55 |                 "default": "ODBC Driver 18 for SQL Server",
56 |             },
57 |             {
58 |                 "name": "connection_string",
59 |                 "default": None,
60 |             },
61 |         ]
62 |         self.configuration_definition = [{"name": "query"}, {"name": "connection"}]
63 | 
64 |     def get_data(self, configuration: dict, connection: dict):
65 |         """Get data from source"""
66 | 
67 |         # Use the provided connection string if mode is "connection_string"
68 |         connection_string = connection["connection_string"]
69 |         if connection["mode"] == "password":
70 |             # Extract connection parameters
71 |             driver = connection["driver"]
72 |             port = connection["port"]
73 |             hostname = connection["hostname"]
74 |             username = connection["username"]
75 |             password = connection["password"]
76 |             database = connection["database"]
77 |             trust_server_certificate = (
78 |                 "yes" if connection["trust_server_certificate"] else "no"
79 |             )
80 |             encrypt = "yes" if connection["encrypt"] else "no"
81 | 
82 |             # Create the ODBC connection string
83 |             password = urllib.parse.quote_plus(password)
84 |             odbc_connect = f"Driver={driver};Server={hostname};Database={database};Uid={username};Pwd={password};Encrypt={encrypt};TrustServerCertificate={trust_server_certificate};"
85 |             connection_string = f"mssql+pyodbc:///?odbc_connect={odbc_connect}"
86 | 
87 |         # Create a SQLAlchemy engine using the connection string
88 |         sql_connection = create_engine(connection_string, echo=False)
89 | 
90 |         # Execute the SQL query and read the data into a pandas DataFrame
91 |         df = pd.read_sql(configuration["query"], sql_connection)
92 | 
93 |         return df
94 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_odbc.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read ODBC connection"""
 3 | 
 4 | import warnings
 5 | import pandas as pd
 6 | import pyodbc
 7 | from connectors.connector import Connector
 8 | 
 9 | 
10 | class ConnectorODCB(Connector):
11 |     """Connector to read ODBC connection"""
12 | 
13 |     def __init__(self):
14 |         # Initialize the connector with its name and connection definitions
15 |         self.name = "ODBC"
16 |         self.connection_definition = [
17 |             {
18 |                 "name": "mode",
19 |                 "default": "DSN",
20 |                 "validset": ["DSN", "connection_string"],
21 |             },
22 |             {
23 |                 "name": "DSN",  # Data Source Name for the ODBC connection
24 |                 "default": None
25 |             },
26 |             {
27 |                 "name": "connection_string",
28 |                 "default": None
29 |             },
30 |             {
31 |                 "name": "auto_commit",
32 |                 "type": "boolean",
33 |                 "default": True,  # Whether to enable auto-commit
34 |             },
35 |             {
36 |                 "name": "use_credentials",
37 |                 "type": "boolean",
38 |                 "default": False,  # Whether to use credentials for the connection
39 |             },
40 |             {
41 |                 "name": "user",
42 |                 "default": None,  # Username for the connection
43 |             },
44 |             {
45 |                 "name": "password",
46 |                 "default": None,  # Password for the connection
47 |             },
48 |             {
49 |                 "name": "encoding",
50 |                 "default": "UTF-8",  # Encoding to use for the connection
51 |             },
52 |         ]
53 |         self.configuration_definition = [{"name": "query"}, {"name": "connection"}]
54 | 
55 |     def get_data(self, configuration: dict, connection: dict):
56 |         """Get data from source"""
57 | 
58 |         if connection["mode"] == "DSN":
59 |             # Establish the ODBC connection using the provided DSN and optional credentials
60 |             if connection["use_credentials"]:
61 |                 odbc_connection = pyodbc.connect(
62 |                     f"DSN={connection['DSN']}",
63 |                     user=connection["user"],
64 |                     password=connection["password"],
65 |                     autocommit=connection["auto_commit"],
66 |                 )
67 |             else:
68 |                 odbc_connection = pyodbc.connect(
69 |                     f"DSN={connection['DSN']};", autocommit=connection["auto_commit"]
70 |                 )
71 |         else:
72 |             odbc_connection = pyodbc.connect(
73 |                 connection["connection_string"], autocommit=connection["auto_commit"]
74 |             )
75 | 
76 |         # Suppress warnings related to encoding settings
77 |         with warnings.catch_warnings():
78 |             warnings.simplefilter("ignore", UserWarning)
79 | 
80 |             # Set the encoding for the ODBC connection
81 |             odbc_connection.setdecoding(
82 |                 pyodbc.SQL_CHAR, encoding=connection["encoding"]
83 |             )
84 |             odbc_connection.setdecoding(
85 |                 pyodbc.SQL_WCHAR, encoding=connection["encoding"]
86 |             )
87 |             odbc_connection.setencoding(encoding=connection["encoding"])
88 | 
89 |             # Execute the SQL query and read the data into a pandas DataFrame
90 |             df = pd.read_sql(configuration["query"], odbc_connection)
91 | 
92 |         return df
93 | 


--------------------------------------------------------------------------------
/tests/connectors/test_csv_spark.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import pandas as pd
  3 | from pyspark.sql import SparkSession
  4 | import pytest
  5 | from pyjeb import control_and_setup
  6 | from ploosh.engines.load_engine_spark import LoadEngineSpark
  7 | from ploosh.configuration import Configuration
  8 | from ploosh.connectors.connector_csv_spark import ConnectorCSVSpark
  9 | 
 10 | @pytest.fixture
 11 | def connector():
 12 |     spark = SparkSession.builder \
 13 |         .appName("ploosh") \
 14 |         .master("spark://localhost:7077") \
 15 |         .config("spark.executor.memory", "1g") \
 16 |         .config("spark.driver.memory", "1g") \
 17 |         .getOrCreate()
 18 |     
 19 |     connector = ConnectorCSVSpark()
 20 |     connector.spark = spark
 21 | 
 22 |     return connector
 23 | 
 24 | @pytest.fixture
 25 | def df_sales():
 26 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", dtype=object, date_format = "%Y-%m-%d", parse_dates=["sale_date"])
 27 | 
 28 | @pytest.fixture
 29 | def df_sales_with_types():
 30 |     return pd.read_csv("./tests/.data/sales.csv", delimiter=",", dtype={"sale_id": "int64", "product_id": "int64", "sale_amount": "float64"}, date_format = "%Y-%m-%d", parse_dates=["sale_date"])
 31 | 
 32 | def test_default(connector, df_sales):
 33 |     configuration = {
 34 |        "path": f"{os.getcwd()}/tests/.data/sales.csv",
 35 |     }
 36 | 
 37 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
 38 | 
 39 |     df_test = connector.get_data(configuration, {}).toPandas()
 40 | 
 41 |     assert len(df_test.compare(df_sales)) == 0
 42 | 
 43 | def test_delimiter(connector, df_sales):
 44 |     configuration = {
 45 |        "path": f"{os.getcwd()}/tests/.env/csv/sales_with_tab.csv",
 46 |        "delimiter": "\t"
 47 |     }
 48 | 
 49 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
 50 | 
 51 |     df_test = connector.get_data(configuration, {}).toPandas()
 52 | 
 53 |     assert len(df_test.compare(df_sales)) == 0
 54 | 
 55 | def test_infer_schema(connector, df_sales_with_types):
 56 |     configuration = {
 57 |        "path": f"{os.getcwd()}/tests/.data/sales.csv",
 58 |        "inferSchema": True
 59 |     }
 60 | 
 61 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
 62 | 
 63 |     df_test = connector.get_data(configuration, {}).toPandas()
 64 | 
 65 |     assert len(df_test.compare(df_sales_with_types)) == 0
 66 | 
 67 | def test_quote(connector, df_sales):
 68 |     configuration = {
 69 |        "path": f"{os.getcwd()}/tests/.env/csv/sales_with_single_quote.csv",
 70 |        "quote": "'"
 71 |     }
 72 | 
 73 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
 74 | 
 75 |     df_test = connector.get_data(configuration, {}).toPandas()
 76 | 
 77 |     assert len(df_test.compare(df_sales)) == 0
 78 | 
 79 | def test_encoding(connector, df_sales):
 80 |     configuration = {
 81 |        "path": f"{os.getcwd()}/tests/.env/csv/sales_with_iso_8859_1.csv",
 82 |        "encoding": "ISO-8859-1"
 83 |     }
 84 | 
 85 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
 86 | 
 87 |     df_test = connector.get_data(configuration, {}).toPandas()
 88 | 
 89 |     assert len(df_test.compare(df_sales)) == 0
 90 | 
 91 | def test_line_sep(connector, df_sales):
 92 |     configuration = {
 93 |        "path": f"{os.getcwd()}/tests/.env/csv/sales_with_cr.csv",
 94 |        "lineSep": "\r"
 95 |     }
 96 | 
 97 |     configuration =  control_and_setup(configuration, connector.configuration_definition)
 98 | 
 99 |     df_test = connector.get_data(configuration, {}).toPandas()
100 | 
101 |     assert len(df_test.compare(df_sales)) == 0
102 | 


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_csv.py:
--------------------------------------------------------------------------------
 1 | # pylint: disable=R0903
 2 | """Connector to read CSV file"""
 3 | 
 4 | import json
 5 | import pandas as pd
 6 | from connectors.connector import Connector
 7 | 
 8 | 
 9 | class ConnectorCSV(Connector):
10 |     """Connector to read CSV file"""
11 | 
12 |     def __init__(self):
13 |         # Initialize the connector with its name and configuration definitions
14 |         self.name = "CSV"
15 |         self.connection_definition = []  # No specific connection parameters required
16 |         self.configuration_definition = [
17 |             {"name": "path"},  # Path to the CSV file
18 |             {"name": "delimiter", "default": ","},  # Delimiter used in the CSV file
19 |             {"name": "infer", "type": "boolean", "default": True},  # Infer the column names
20 |             {"name": "names", "type": "list", "default": None},  # Sequence of column labels to apply
21 |             {"name": "usecols", "type": "list", "default": None},  # Subset of columns to select
22 |             {"name": "skiprows", "type": "string", "default": None},  # Line numbers to skip (0-indexed) or number of lines to skip (int) at the start of the file
23 |             {"name": "skipfooter", "type": "integer", "default": 0},  # Number of lines at bottom of file to skip (Unsupported with engine='c')
24 |             {"name": "nrows", "type": "integer", "default": None},  # Number of rows of file to read. Useful for reading pieces of large files.
25 |             {"name": "lineterminator", "type": "string", "default": None},  # Character used to denote a line break.
26 |             {"name": "quotechar", "type": "string", "default": '"'},  # Character used to denote the start and end of a quoted item.
27 |             {"name": "encoding", "type": "string", "default": "utf-8"},  # Encoding to use for UTF when reading/writing.
28 |             {"name": "engine", "type": "string", "default": None},  # Parser engine to use.
29 |         ]
30 | 
31 |     def get_data(self, configuration: dict, connection: dict):
32 |         """Get data from source"""
33 | 
34 |         # Extract the path and delimiter from the configuration
35 |         path = configuration["path"]
36 |         delimiter = configuration["delimiter"]
37 |         header = None if configuration["infer"] is False else "infer"
38 |         names = configuration["names"]
39 |         usecols = configuration["usecols"]
40 |         skiprows = None
41 |         skipfooter = configuration["skipfooter"]
42 |         nrows = configuration["nrows"]
43 |         lineterminator = configuration["lineterminator"]
44 |         quotechar = configuration["quotechar"]
45 |         encoding = configuration["encoding"]
46 |         engine = configuration["engine"]
47 | 
48 |         if configuration["skiprows"] is not None:
49 |             try:
50 |                 skiprows = json.loads(configuration["skiprows"])
51 |             except json.JSONDecodeError:
52 |                 raise ValueError("The variable is neither a list nor an integer.")
53 | 
54 |         if skiprows is not None and not isinstance(skiprows, (list, int)):
55 |             raise ValueError("The variable is neither a list nor an integer.")
56 | 
57 |         # Read the CSV file using pandas with the specified delimiter
58 |         df = pd.read_csv(path,
59 |                          delimiter=delimiter,
60 |                          header=header,
61 |                          names=names,
62 |                          usecols=usecols,
63 |                          skiprows=skiprows,
64 |                          skipfooter=skipfooter,
65 |                          nrows=nrows,
66 |                          lineterminator=lineterminator,
67 |                          quotechar=quotechar,
68 |                          engine=engine,
69 |                          encoding=encoding)
70 |         return df


--------------------------------------------------------------------------------
/tests/load_engine/test_spark.py:
--------------------------------------------------------------------------------
  1 | from pyspark.sql import SparkSession
  2 | from pyspark.sql.types import StructType, StructField, IntegerType, StringType, TimestampType, FloatType
  3 | import pytest
  4 | from pyjeb import control_and_setup
  5 | from ploosh.engines.load_engine_spark import LoadEngineSpark
  6 | from ploosh.configuration import Configuration
  7 | 
  8 | @pytest.fixture
  9 | def controls():
 10 |     controls = Configuration.case_definition
 11 |     controls = [control for control in controls if control["name"].startswith("options")]
 12 |     return controls
 13 | 
 14 | 
 15 | @pytest.fixture
 16 | def spark():
 17 |     return SparkSession.builder \
 18 |         .appName("ploosh") \
 19 |         .master("spark://localhost:7077") \
 20 |         .config("spark.executor.memory", "1g") \
 21 |         .config("spark.driver.memory", "1g") \
 22 |         .getOrCreate()
 23 | 
 24 | def test_count(spark, controls):
 25 |     schema = StructType([
 26 |         StructField("A", IntegerType(), True),
 27 |         StructField("B", IntegerType(), True)
 28 |     ])
 29 |     df_data = spark.createDataFrame([(1, 4), (2, 5), (3, 6)], schema)
 30 |     parameters = {}
 31 |     options = control_and_setup(parameters, controls)["options"]
 32 | 
 33 |     load_engine = LoadEngineSpark(None, options, None)
 34 |     df_data = load_engine.execute(df_data)
 35 |     assert load_engine.count == 3
 36 | 
 37 | def test_cast_datetime(spark, controls):
 38 |     schema = StructType([
 39 |         StructField("A", IntegerType(), True),
 40 |         StructField("B", StringType(), True)
 41 |     ])
 42 |     df_data = spark.createDataFrame([(1, "2021-01-01"), (2, "2021-01-01"), (3, "2021-01-01")], schema)
 43 |     parameters = {
 44 |         "options": {
 45 |             "cast": [
 46 |                 {
 47 |                     "name": "B",
 48 |                     "type": "datetime"
 49 |                 }
 50 |             ]
 51 |         }
 52 |     }
 53 |     options = control_and_setup(parameters, controls)["options"]
 54 | 
 55 |     load_engine = LoadEngineSpark(None, options, None)
 56 |     df_data = load_engine.execute(df_data)
 57 | 
 58 |     assert df_data.schema["B"].dataType == TimestampType()
 59 |     assert df_data.schema["A"].dataType == IntegerType()
 60 | 
 61 | def test_cast_int(spark, controls):
 62 |     schema = StructType([
 63 |         StructField("A", IntegerType(), True),
 64 |         StructField("B", StringType(), True)
 65 |     ])
 66 |     df_data = spark.createDataFrame([(1, "4"), (2, "5"), (3, "6")], schema)
 67 |     parameters = {
 68 |         "options": {
 69 |             "cast": [
 70 |                 {
 71 |                     "name": "B",
 72 |                     "type": "int"
 73 |                 }
 74 |             ]
 75 |         }
 76 |     }
 77 |     options = control_and_setup(parameters, controls)["options"]
 78 | 
 79 |     load_engine = LoadEngineSpark(None, options, None)
 80 |     df_data = load_engine.execute(df_data)
 81 | 
 82 |     assert df_data.schema["B"].dataType == IntegerType()
 83 |     assert df_data.schema["A"].dataType == IntegerType()
 84 | 
 85 | def test_cast_float(spark, controls):
 86 |     schema = StructType([
 87 |         StructField("A", IntegerType(), True),
 88 |         StructField("B", StringType(), True)
 89 |     ])
 90 |     df_data = spark.createDataFrame([(1, "4.0"), (2, "5.0"), (3, "6.0")], schema)
 91 |     parameters = {
 92 |         "options": {
 93 |             "cast": [
 94 |                 {
 95 |                     "name": "B",
 96 |                     "type": "float"
 97 |                 }
 98 |             ]
 99 |         }
100 |     }
101 |     options = control_and_setup(parameters, controls)["options"]
102 | 
103 |     load_engine = LoadEngineSpark(None, options, None)
104 |     df_data = load_engine.execute(df_data)
105 | 
106 |     assert df_data.schema["B"].dataType == FloatType()
107 |     assert df_data.schema["A"].dataType == IntegerType()


--------------------------------------------------------------------------------
/docs/connectors/native/sqlserver.md:
--------------------------------------------------------------------------------
 1 | This connector allows to connect to a SQL Server database and execute SQL queries.
 2 | 
 3 | # Requirements
 4 | ODBC Driver 18 must be installed on the executing computer.
 5 | 
 6 | * For Linux, follow the instructions from [Microsoft documentation](https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/installing-the-microsoft-odbc-driver-for-sql-server?view=sql-server-ver15&tabs=ubuntu18-install%2Calpine17-install%2Cdebian8-install%2Credhat7-13-install%2Crhel7-offline#18)
 7 | * For Windows, follow the instructions from [Microsoft documentation](https://learn.microsoft.com/en-us/sql/connect/odbc/download-odbc-driver-for-sql-server?view=sql-server-ver15)
 8 | * For macOS, follow the instructions from [Microsoft documentation](https://learn.microsoft.com/en-us/sql/connect/odbc/linux-mac/install-microsoft-odbc-driver-sql-server-macos?view=sql-server-ver15)
 9 | 
10 | # Connection configuration
11 | ## Password mode
12 | ### Definition
13 | | Name                       | Mandatory | Default                       | Description |
14 | |----------------------------|:---------:|:-----------------------------:|-------------|
15 | | mode                       | no        |  password                     | Change the connection mode. Can be "password" or "connection_string". "connection_string" mode allow to use a custom connection string.
16 | | hostname                   | yes       |                               | Target host name
17 | | database                   | yes       |                               | Target database name
18 | | username                   | yes       |                               | Sql user name
19 | | password                   | yes       |                               | Sql user password
20 | | port                       | no        | 1433                          | Port to use by the connection
21 | | trust_server_certificate   | no        | false                         | Trust the server ssl connection
22 | | encrypt                    | no        | yes                           | Encrypt the connection
23 | | driver                     | no        | ODBC Driver 18 for SQL Server | Driver to use by the connection
24 | 
25 | ⚠️ it's highly recommended to use a [parameter](/docs/configuration-custom-parameters/) to pass the password value
26 | 
27 | ### Example
28 | ``` yaml
29 | mssql_example:
30 |   type: mssql
31 |   hostname: ploosh.database.windows.net
32 |   database: SampleDB
33 |   username: sa_ploosh
34 |   password: $var.sa_ploosh_password 
35 | ```
36 | 
37 | ### Definition
38 | ## Connection string mode
39 | | Name              | Mandatory | Default                       | Description |
40 | |-------------------|:---------:|:-----------------------------:|-------------|
41 | | mode              | no        |  password                     | Use "connection_string" value to use custom connection_string
42 | | connection_string | yes       |                               | Connection string use to access in the database. Refer to [SQLAlchemy documentation](https://docs.sqlalchemy.org/en/20/dialects/mssql.html) to get the accepted format
43 | 
44 | ### Example
45 | ``` yaml
46 | mssql_example:
47 |   type: mssql
48 |   mode: connection_string
49 |   connection_string: "mssql+pyodbc://ploosh01:1433/SampleDB?driver=ODBC+Driver+18+for+SQL+Server&TrustServerCertificate=yes&authentication=ActiveDirectoryIntegrated"
50 | ```
51 | 
52 | # Test case configuration
53 | ## Definition
54 | | Name              | Mandatory | Default                       | Description |
55 | |-------------------|:---------:|:-----------------------------:|-------------|
56 | | connection        | yes       |                               | The connection to use 
57 | | query             | yes       |                               | The query to execute to the database
58 | 
59 | ## Example
60 | ``` yaml
61 | Example SQL Server:
62 |   source:
63 |     connection: mssql_example
64 |     type: mssql
65 |     query: | 
66 |         select * 
67 |             from [rh].[employees]
68 |             where [hire_date] < '2000-01-01'
69 |   expected:
70 |     type: csv
71 |     path: data/employees_before_2000.csv
72 | ```


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_semantic_model_xmla.py:
--------------------------------------------------------------------------------
  1 | # pylint: disable=R0903
  2 | """Connector to read Semantic Model from Fabric XMLA endpoint"""
  3 | 
  4 | import pandas as pd
  5 | import requests
  6 | from azure.identity import ClientSecretCredential, InteractiveBrowserCredential, UsernamePasswordCredential
  7 | from connectors.connector import Connector
  8 | import json
  9 | 
 10 | class ConnectorSemanticModel(Connector):
 11 |     """Connector to read Semantic Model using Fabric XMLA endpoint"""
 12 | 
 13 |     def __init__(self):
 14 |         self.name = "SEMANTIC_MODEL"
 15 |         self.connection_definition = [
 16 |             {
 17 |                 "name": "mode",
 18 |                 "default": "oauth",
 19 |                 "validset": ["oauth"]  # , "token", "spn"] To add once tested
 20 |             },
 21 |             {
 22 |                 "name": "token",
 23 |                 "default": None
 24 |             },
 25 |             {
 26 |                 "name": "tenant_id",
 27 |                 "default": None
 28 |             },
 29 |             {
 30 |                 "name": "client_id",
 31 |                 "default": None
 32 |             },
 33 |             {
 34 |                 "name": "client_secret",
 35 |                 "default": None
 36 |             },
 37 |             {
 38 |                 "name": "dataset_id"
 39 |             }
 40 |         ]
 41 |         self.configuration_definition = [
 42 |             {
 43 |                 "name": "query"
 44 |             },
 45 |             {
 46 |                 "name": "body",
 47 |                 "default": None
 48 |             }
 49 |         ]
 50 | 
 51 |     def get_data(self, configuration: dict, connection: dict):
 52 |         """Get data from source"""
 53 | 
 54 |         mode = connection["mode"]
 55 |         dataset_id = connection["dataset_id"]
 56 |         query = configuration["query"]
 57 | 
 58 |         if mode == "oauth":
 59 |             try:
 60 |                 interactive_browser_credential_class = InteractiveBrowserCredential()
 61 |                 scope = 'https://analysis.windows.net/powerbi/api/.default'
 62 |                 access_token_class = interactive_browser_credential_class.get_token(scope)
 63 |                 token_string = access_token_class.token
 64 |             except Exception as connection_error:
 65 |                 raise ValueError(connection_error)
 66 | 
 67 |         # uses the token provided in the connection_definition
 68 |         elif mode == "token":
 69 |             token_string = connection["token"]
 70 | 
 71 |         # get a token from a registered azure app
 72 |         elif mode == "spn":
 73 |             scope = 'https://analysis.windows.net/powerbi/api/.default'
 74 |             tenant_id = connection["tenant_id"]
 75 |             client_id = connection["client_id"]
 76 |             client_secret = connection["client_secret"]
 77 |             authority = f'https://login.microsoftonline.com/'
 78 |             credential = ClientSecretCredential(tenant_id, client_id, client_secret, authority=authority)
 79 |             token = credential.get_token(scope)
 80 |             token_string = token.token  # need to define header
 81 | 
 82 |         # Initialize query
 83 |         post_query = f'https://api.powerbi.com/v1.0/myorg/datasets/{dataset_id}/executeQueries'
 84 |         header = {'Authorization': f'Bearer {token_string}', 'Content-Type': 'application/json'}
 85 |         body = '''{
 86 |         "queries": [
 87 |             {
 88 |             "query": "%s"
 89 |         }
 90 |         ],
 91 |         "serializerSettings": {
 92 |             "includeNulls": "true"
 93 |         }
 94 |         }''' % (query)
 95 | 
 96 |         post_r = requests.post(url=post_query, data=body, headers=header)
 97 | 
 98 |         if post_r.status_code == 200:
 99 |             output = post_r.json()
100 |             df_results = pd.DataFrame(output)
101 |             df_tables = pd.DataFrame(df_results["results"][0])
102 |             df_rows = pd.DataFrame(df_tables["tables"][0])
103 |             flatten_data = df_rows.values.flatten()
104 |             df = pd.json_normalize(flatten_data)  # type: ignore
105 | 
106 |             return df
107 | 
108 |         elif post_r.status_code == 400:
109 |             response = json.loads(post_r.text)
110 |             error_code = response['error']['code']
111 |             error_message = response['error']['pbi.error']['details'][0]['detail']['value']
112 |             raise ValueError(f"DAX Execution Error : {error_code}\n{error_message}")
113 | 
114 |         elif post_r.status_code == 404:
115 |             raise ValueError("Connection issue: PowerBIEntityNotFound")
116 | 
117 |         else:
118 |             raise ValueError("Execution Error")
119 | 


--------------------------------------------------------------------------------
/src/ploosh/execute.py:
--------------------------------------------------------------------------------
  1 | """Automatized Testing Framework"""
  2 | 
  3 | import sys
  4 | from colorama import Fore
  5 | from pyspark.sql import SparkSession
  6 | from case import StateStatistics
  7 | from connectors import get_connectors
  8 | from exporters import get_exporters
  9 | from parameters import Parameters
 10 | from configuration import Configuration
 11 | from logs import Log, print_compare_state, print_summary
 12 | 
 13 | 
 14 | def load_data(current_case, process_type, statistics):
 15 |     """Load data from source or expected"""
 16 |     try:
 17 |         # Attempt to load data for the current case
 18 |         current_case.load_data(process_type)
 19 |         return True
 20 |     except Exception as e:
 21 |         # Handle any errors that occur during data loading
 22 |         current_case.load_data_error(process_type, str(e))
 23 |         current_case.calculate_durations()
 24 |         statistics.add_state(current_case.state)
 25 |         Log.print_error(str(e))
 26 |     return False
 27 | 
 28 | 
 29 | def compare_data(current_case, statistics, spark_session):
 30 |     """Compare data between source and expected"""
 31 |     try:
 32 |         # Compare data using Spark if both connectors are Spark-based
 33 |         if current_case.source.connector.is_spark and current_case.expected.connector.is_spark:
 34 |             current_case.compare_dataframes_with_spark(spark_session)
 35 |         else:
 36 |             # Otherwise, use a standard comparison
 37 |             current_case.compare_dataframes()
 38 |         statistics.add_state(current_case.state)
 39 |         return True
 40 |     except Exception as e:
 41 |         # Handle any errors that occur during data comparison
 42 |         current_case.compare_dataframes_error(str(e))
 43 |         current_case.calculate_durations()
 44 |         statistics.add_state(current_case.state)
 45 |         Log.print_error(str(e))
 46 |     return False
 47 | 
 48 | 
 49 | def execute(args=None, spark_session=None):
 50 |     """Main function to execute test cases"""
 51 |     Log.init()
 52 |     Log.print_logo()
 53 | 
 54 |     statistics = StateStatistics()
 55 | 
 56 |     Log.print(f"{Fore.CYAN}Initialization[...]")
 57 |     try:
 58 |         # Parse command-line arguments
 59 |         if args is None:
 60 |             parameters = Parameters(sys.argv)
 61 |         else:
 62 |             parameters = Parameters(args)
 63 | 
 64 |         # Initialize Spark session if needed
 65 |         if parameters.spark_mode is True and spark_session is None:
 66 |             Log.print("Start spark session")
 67 |             spark_session = SparkSession.builder \
 68 |                 .master("local") \
 69 |                 .appName("ploosh") \
 70 |                 .getOrCreate()
 71 | 
 72 |         # Load connectors and exporters
 73 |         Log.print("Load connectors")
 74 |         connectors = get_connectors(spark_session)
 75 |         Log.print("Load exporters")
 76 |         exporters = get_exporters()
 77 | 
 78 |         # Load configuration and test cases
 79 |         Log.print("Load configuration")
 80 |         configuration = Configuration(parameters, connectors, exporters)
 81 |         cases = configuration.get_cases()
 82 |     except Exception as e:
 83 |         # Handle any errors that occur during initialization
 84 |         Log.print_error(str(e))
 85 |         sys.exit(1)
 86 | 
 87 |     Log.print(f"{Fore.CYAN}Start processing tests cases[...]")
 88 |     for i, case_name in enumerate(cases):
 89 |         current_case = cases[case_name]
 90 | 
 91 |         # Skip disabled test cases
 92 |         if current_case.disabled:
 93 |             Log.print(f"{Fore.MAGENTA}{case_name} [...] ({i + 1}/{len(cases)}) - Skipped")
 94 |             statistics.add_state(current_case.state)
 95 |             continue
 96 | 
 97 |         Log.print(f"{Fore.MAGENTA}{case_name} [...] ({i + 1}/{len(cases)}) - Started")
 98 | 
 99 |         # Load source data
100 |         Log.print("Load source data")
101 |         if not load_data(current_case, "source", statistics):
102 |             continue
103 | 
104 |         # Load expected data
105 |         Log.print("Load expected data")
106 |         if not load_data(current_case, "expected", statistics):
107 |             continue
108 | 
109 |         # Compare source and expected data
110 |         Log.print("Compare source and expected data")
111 |         if not compare_data(current_case, statistics, spark_session):
112 |             continue
113 | 
114 |         # Print comparison state and calculate durations
115 |         print_compare_state(current_case)
116 |         current_case.calculate_durations()
117 | 
118 |     Log.print(f"{Fore.CYAN}Export results[...]")
119 |     # Export test results
120 |     configuration.exporter.export(cases)
121 |     Log.print(f"{Fore.CYAN}Summary[...]")
122 |     # Print summary of test results
123 |     print_summary(cases, statistics)
124 | 
125 |     # Exit with error code if there were errors and failure_on_error is set
126 |     if statistics.error > 0 and parameters.failure_on_error:
127 |         sys.exit(1)
128 | 


--------------------------------------------------------------------------------
/.github/workflows/unit_tests.yml:
--------------------------------------------------------------------------------
  1 | name: 'Unit tests'
  2 | 
  3 | on:
  4 |   pull_request:
  5 |     branches:
  6 |       - main
  7 |       - develop
  8 |     paths:
  9 |       - 'src/**'
 10 |       - 'tests/**'
 11 | jobs:
 12 |     tests:
 13 |         name: 'Execute unit tests'
 14 |         runs-on: ubuntu-22.04
 15 |         defaults:
 16 |           run:
 17 |             shell: bash
 18 |         steps:
 19 |         - name: Checkout
 20 |           uses: actions/checkout@v3
 21 |         - name: Set up Python
 22 |           uses: actions/setup-python@v5
 23 |           with:
 24 |             python-version: "3.12.8"
 25 |         - name: Setup Java
 26 |           uses: actions/setup-java@v2
 27 |           with:
 28 |             distribution: 'microsoft'
 29 |             java-version: '17.0.1'
 30 |         - name: Install requirements
 31 |           run: | 
 32 |             pip install -r src/requirements.txt
 33 |             pip install pytest==8.3.3
 34 |             pip install pytest-timeout==2.3.1
 35 |     
 36 |             # Fix host file to avoid issues with Spark
 37 |             echo "127.0.0.1 localhost" | sudo tee /etc/hosts
 38 |             echo "::1 localhost ip6-localhost ip6-loopback" | sudo tee -a /etc/hosts
 39 |             echo "fe00::0 ip6-localnet" | sudo tee -a /etc/hosts
 40 |             echo "ff00::0 ip6-mcastprefix" | sudo tee -a /etc/hosts
 41 |             echo "ff02::1 ip6-allnodes" | sudo tee -a /etc/hosts
 42 |             echo "ff02::2 ip6-allrouters" | sudo tee -a /etc/hosts
 43 |             echo "ff02::3 ip6-allhosts" | sudo tee -a /etc/hosts
 44 |             echo "127.0.0.1 $(hostname)" | sudo tee -a /etc/hosts
 45 |         - name: Run MySQL container
 46 |           run: | 
 47 |             docker run --name ploosh_mysql \
 48 |               -e MYSQL_ROOT_PASSWORD="${{ secrets.TEST_LOCAL_DB_PASSWORD }}" \
 49 |               -e MYSQL_PASSWORD="${{ secrets.TEST_LOCAL_DB_PASSWORD }}" \
 50 |               -e MYSQL_DATABASE=ploosh \
 51 |               -e MYSQL_USER=ploosh \
 52 |               -p 3306:3306 \
 53 |               -d mysql
 54 |         - name: Run PostgreSQL container
 55 |           run: | 
 56 |             docker run --name ploosh_postgresql \
 57 |               -e POSTGRES_USER=ploosh \
 58 |               -e POSTGRES_PASSWORD="${{ secrets.TEST_LOCAL_DB_PASSWORD }}" \
 59 |               -e POSTGRES_DB=ploosh \
 60 |               -p 5432:5432 \
 61 |               -d postgres
 62 |         - name: Run SQL Server container
 63 |           run: | 
 64 |             docker run --name ploosh_mssql \
 65 |               -e ACCEPT_EULA="Y" \
 66 |               -e MSSQL_SA_PASSWORD="${{ secrets.TEST_LOCAL_DB_PASSWORD }}" \
 67 |               --hostname ploosh \
 68 |               -p 1433:1433 \
 69 |               -d \
 70 |               mcr.microsoft.com/mssql/server:2022-latest
 71 | 
 72 |         - name: Run Spark master container
 73 |           run: | 
 74 |             docker run -d --name ploosh-spark-master \
 75 |               -e SPARK_MODE=master \
 76 |               -e SPARK_MASTER_HOST=ploosh-spark-master \
 77 |               -p 7077:7077 -p 8081:8080 \
 78 |               -v $(pwd)/tests/.data:$(pwd)/tests/.data \
 79 |               -v $(pwd)/tests/.env:$(pwd)/tests/.env \
 80 |               --hostname ploosh-spark-master \
 81 |               bitnami/spark:3.5.4
 82 | 
 83 |             docker exec ploosh-spark-master pip install delta-spark==3.3.0
 84 |         - name: Run Spark worker container
 85 |           run: |         
 86 |             docker run -d --name ploosh-spark-worker \
 87 |               -e SPARK_MODE=worker \
 88 |               -e SPARK_MASTER_URL=spark://ploosh-spark-master:7077 \
 89 |               -e SPARK_WORKER_MEMORY=2g \
 90 |               -e SPARK_WORKER_CORES=1 \
 91 |               -v $(pwd)/tests/.data:$(pwd)/tests/.data \
 92 |               -v $(pwd)/tests/.env:$(pwd)/tests/.env \
 93 |               --link ploosh-spark-master:ploosh-spark-master \
 94 |               bitnami/spark:3.5.4
 95 |     
 96 |               docker exec ploosh-spark-worker pip install delta-spark==3.3.0
 97 |         - name: Feed databases
 98 |           run: | 
 99 |             sleep 30 # wait until all services are up
100 |             
101 |             mysql -h 127.0.0.1 -u ploosh -p'${{ secrets.TEST_LOCAL_DB_PASSWORD }}' < tests/.env/mysql/setup.sql
102 |     
103 |             export PGPASSWORD='${{ secrets.TEST_LOCAL_DB_PASSWORD }}';
104 |             psql -h 127.0.0.1 -U ploosh -d ploosh -f tests/.env/postgresql/setup.sql
105 |     
106 |             /opt/mssql-tools/bin/sqlcmd -S localhost -U sa -P "${{ secrets.TEST_LOCAL_DB_PASSWORD }}" -i tests/.env/mssql/setup.sql
107 |     
108 |             spark_setup_file=$(pwd)/tests/.env/spark/setup.sql
109 |             spark_setup_file_tmp=$(pwd)/tests/.env/spark/setup_tmp.sql
110 |             sed "s|{{pwd}}|$(pwd)|g" $spark_setup_file > $spark_setup_file_tmp
111 |             spark-sql -f$spark_setup_file_tmp
112 |         - name: Execute tests
113 |           run: | 
114 |             export TEST_DB_PASSWORD="${{ secrets.TEST_LOCAL_DB_PASSWORD }}"
115 |             pytest -rA ./tests 
116 |     
117 | 


--------------------------------------------------------------------------------
/docs/home.md:
--------------------------------------------------------------------------------
  1 | # What is Ploosh?
  2 | 
  3 | Ploosh is yaml based framework used to automatize the testing process in data projects. It is designed to be simple to use and to be easily integrated in any CI/CD pipelines and it is also designed to be easily extended to support new data connectors.
  4 | 
  5 | ## Connectors
  6 | | Type      | Native connectors | Spark connectors
  7 | |-----------|:----------|:----------|
  8 | | Databases | [![Big Query](https://ploosh.io/wp-content/uploads/2025/01/bigquery.png)](/docs/docs/connectors-native-big-query/) [![Databricks](https://ploosh.io/wp-content/uploads/2025/01/databricks.png)](/docs/connectors-native-databricks) [![Snowflake](https://ploosh.io/wp-content/uploads/2025/01/snowflake.png)](/docs/connectors-native-snowflake) [![Sql Server](http://ploosh.io/wp-content/uploads/2025/01/mssql.png)](SQL-Server) [![PostgreSQL](https://ploosh.io/wp-content/uploads/2025/01/postgresql.png)](/docs/connectors-native-postgreSQL) [![MySQL](https://ploosh.io/wp-content/uploads/2025/01/mysql.png)](/docs/connectors-native-mysql) | [![SQL](https://ploosh.io/wp-content/uploads/2025/01/sql.png)](/docs/connectors-spark-sql)
  9 | | Files     | [![CSV](http://ploosh.io/wp-content/uploads/2025/01/csv.png)](/docs/connectors-native-csv) [![Excel](http://ploosh.io/wp-content/uploads/2025/01/excel.png)](/docs/connectors-native-excel) [![Parquet](http://ploosh.io/wp-content/uploads/2025/01/parquet.png)](/docs/connectors-native-parquet) | [![Delta](http://ploosh.io/wp-content/uploads/2025/01/delta.png)](/docs/connectors-spark-delta) [![CSV](http://ploosh.io/wp-content/uploads/2025/01/csv.png)](/docs/connectors-spark-csv)
 10 | | Others    | [![CSV](http://ploosh.io/wp-content/uploads/2025/01/empty.png)](/docs/connectors-native-empty) | [![Empty](http://ploosh.io/wp-content/uploads/2025/01/empty.png)](/docs/connectors-spark-empty)
 11 | | Not yet but soon    | ![JSON](http://ploosh.io/wp-content/uploads/2025/01/json.png) ![Oracle](http://ploosh.io/wp-content/uploads/2025/01/oracle.png) | ![Parquet](http://ploosh.io/wp-content/uploads/2025/01/parquet.png)
 12 | 
 13 | # Get started
 14 | 
 15 | ## Steps
 16 | 1. Install Ploosh package
 17 | 2. Setup connection file
 18 | 3. Setup test cases
 19 | 4. Run tests
 20 | 4. Get results
 21 | 
 22 | ## Install Ploosh package
 23 | Install from [PyPi](https://pypi.org/project/ploosh/) package manager:
 24 | ``` shell
 25 | pip install ploosh
 26 | ```
 27 | 
 28 | ## Setup connection file
 29 | Add a yaml file with name "connections.yml" and following content:
 30 | ``` yaml
 31 | mssql_getstarted:
 32 |   type: mysql
 33 |   hostname: my_server_name.database.windows.net
 34 |   database: my_database_name
 35 |   username: my_user_name
 36 |   // using a parameter is highly recommended
 37 |   password: $var.my_sql_server_password 
 38 | ```
 39 | 
 40 | ## Setup test cases
 41 | Add a folder "test_cases" with a yaml file with any name. In this example "example.yaml". Add the following content:
 42 | 
 43 | ``` yaml
 44 | Test aggregated data:
 45 |   options:
 46 |     sort:
 47 |       - gender
 48 |       - domain
 49 |   source:
 50 |     connection: mysql_demo
 51 |     type: mysql
 52 |     query: | 
 53 |       select gender, right(email, length(email) - position("@" in email)) as domain, count(*) as count
 54 |         from users
 55 |         group by gender, domain
 56 |   expected:
 57 |     type: csv
 58 |     path: ./data/test_target_agg.csv
 59 | 
 60 | Test invalid data:
 61 |   source:
 62 |     connection: mysql_demo
 63 |     type: mysql
 64 |     query: | 
 65 |       select id, first_name, last_name, email, gender, ip_address
 66 |         from users 
 67 |         where email like "%%.gov"
 68 |   expected:
 69 |     type: empty
 70 | ```
 71 | 
 72 | ## Run tests
 73 | ``` shell
 74 | ploosh --connections "connections.yml" --cases "test_cases" --export "JSON" --p_my_sql_server_password "mypassword"
 75 | ```
 76 | 
 77 | ![Execution result](http://ploosh.io/wp-content/uploads/2024/09/image.png)
 78 | 
 79 | ## Test results
 80 | 
 81 | ``` json
 82 | [
 83 |   {
 84 |     "name": "Test aggregated data",
 85 |     "state": "passed",
 86 |     "source": {
 87 |       "start": "2024-02-05T17:08:36Z",
 88 |       "end": "2024-02-05T17:08:36Z",
 89 |       "duration": 0.0032982
 90 |     },
 91 |     "expected": {
 92 |       "start": "2024-02-05T17:08:36Z",
 93 |       "end": "2024-02-05T17:08:36Z",
 94 |       "duration": 6.0933333333333335e-05
 95 |     },
 96 |     "compare": {
 97 |       "start": "2024-02-05T17:08:36Z",
 98 |       "end": "2024-02-05T17:08:36Z",
 99 |       "duration": 0.00046468333333333334
100 |     }
101 |   },
102 |   {
103 |     "name": "Test invalid data",
104 |     "state": "failed",
105 |     "source": {
106 |       "start": "2024-02-05T17:08:36Z",
107 |       "end": "2024-02-05T17:08:36Z",
108 |       "duration": 0.00178865
109 |     },
110 |     "expected": {
111 |       "start": "2024-02-05T17:08:36Z",
112 |       "end": "2024-02-05T17:08:36Z",
113 |       "duration": 1.49e-05
114 |     },
115 |     "compare": {
116 |       "start": "2024-02-05T17:08:36Z",
117 |       "end": "2024-02-05T17:08:36Z",
118 |       "duration": 1.8333333333333333e-07
119 |     },
120 |     "error": {
121 |       "type": "count",
122 |       "message": "The count in source dataset (55) is different than the count in the expected dataset (0)"
123 |     }
124 |   }
125 | ]
126 | ```
127 | 
128 | # Run with spark
129 | It's possible to run the tests with spark. To do that, you need to install the spark package or use a platform that already has it installed like Databricks or Microsoft Fabric.
130 | 
131 | See the [Spark connector](/docs/configuration-spark-mode/) for more information.


--------------------------------------------------------------------------------
/src/ploosh/connectors/connector_analysis_services.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from connectors.connector import Connector
  3 | from azure.identity import ClientSecretCredential
  4 | from sys import path
  5 | from pathlib import Path
  6 | 
  7 | class ConnectorAnalysisServices(Connector):
  8 |     """Connector to read Analysis Services Model using ADOMD"""
  9 | 
 10 |     def __init__(self):
 11 |         ## ADOMD.dll ##
 12 |             # Using dll file put into src\ploosh\connectors\modules
 13 |             # The file need to be packaged to work here
 14 |             # need to use the absPath (!!! check if it works on linux !!!)
 15 |         root = Path(r"\Program Files\Microsoft.NET\ADOMD.NET\\")
 16 |         adomd_path = str(max((root).iterdir()))
 17 |         path.append(adomd_path)
 18 |         #absPath = os.path.dirname(__file__)
 19 |         #path.append(absPath + '\\modules')
 20 | 
 21 |             # NEED to pip install pythonnet to make pyadomd work !!
 22 |         global Pyadomd
 23 |         from pyadomd import Pyadomd
 24 |         ## ADOMD.dll -- END ##
 25 | 
 26 |         self.name = "ANALYSIS_SERVICES"
 27 |         self.connection_definition = [
 28 |             {
 29 |                 "name": "mode",
 30 |                 "default": "oauth",
 31 |                 "validset": ["oauth", "pbix"]  # , "token", "credentials", "spn"]
 32 |             },
 33 |             {
 34 |                 "name": "token",
 35 |                 "default": None
 36 |             },
 37 |             {
 38 |                 "name": "username",
 39 |                 "default": None
 40 |             },
 41 |             {
 42 |                 "name": "password",
 43 |                 "default": None
 44 |             },
 45 |             {
 46 |                 "name": "tenant_id",
 47 |                 "default": None
 48 |             },
 49 |             {
 50 |                 "name": "client_id",
 51 |                 "default": None
 52 |             },
 53 |             {
 54 |                 "name": "client_secret",
 55 |                 "default": None
 56 |             },
 57 |             {
 58 |                 "name": "scope",
 59 |                 "default": 'https://analysis.windows.net/powerbi/api/.default'
 60 |             },
 61 |             {
 62 |                 "name": "dataset_id"
 63 |             },
 64 |             {
 65 |                 "name": "server"
 66 |             }
 67 |         ]
 68 |         self.configuration_definition = [
 69 |             {
 70 |                 "name": "query"
 71 |             }
 72 |         ]
 73 | 
 74 |     def get_data(self, configuration: dict, connection: dict):
 75 |         """Get data from source"""
 76 | 
 77 |         mode = connection["mode"]
 78 | 
 79 |         server = connection["server"]
 80 |         dataset_id = connection["dataset_id"]  # For local .pbix --> Dataset ID: in DAX Studio, right click to model name and choose "copy Database ID"
 81 | 
 82 |         query = configuration["query"]  # DAX Query
 83 | 
 84 |         # will open a login page in browser (if local AS instance, will connect automatically)
 85 |         if mode == "oauth":
 86 |             connection_string = f'Provider=MSOLAP;Data Source={server};Catalog={dataset_id};'
 87 | 
 88 |         # will open a login page in browser (if local AS instance, will connect automatically)
 89 |         elif mode == "pbix":
 90 |             connection_string = f'Provider=MSOLAP;Data Source={server};Catalog={dataset_id};'
 91 | 
 92 |         # uses the token provided in the connection_definition
 93 |         elif mode == "token":
 94 |             token = connection["token"]
 95 |             connection_string = f'Provider=MSOLAP;Data Source={server};Catalog={dataset_id};User Id=;Password={token};Impersonation Level=Impersonate;'
 96 | 
 97 |         # get a token from a registered azure app
 98 |         elif mode == "spn":
 99 |             scope = connection["scope"]
100 |             tenant_id = connection["tenant_id"]
101 |             client_id = connection["client_id"]
102 |             client_secret = connection["client_secret"]
103 |             authority = f'https://login.microsoftonline.com/'
104 |             credential = ClientSecretCredential(tenant_id, client_id, client_secret)  # , authority=authority)
105 |             token = credential.get_token(scope)
106 |             token_string = token.token
107 |             connection_string = f'Provider=MSOLAP;Data Source={server};Catalog={dataset_id};User Id=;Password={token_string};Impersonation Level=Impersonate;'
108 | 
109 |         # uses username and password
110 |         elif mode == "credentials":
111 |             username = connection["username"]
112 |             password = connection["password"]
113 |             connection_string = f'Provider=MSOLAP;Data Source={server};Catalog={dataset_id};User Id={username};Password={password};'
114 | 
115 |         # Create and open connection to AS instance
116 |         con = Pyadomd(connection_string)
117 |         try:
118 |             con.open()  # Open the connection
119 |         except:
120 |             raise ValueError("Can't connect to the AS Instance")
121 | 
122 |         # execute DAX query
123 |         with con.cursor() as cur:
124 |             try:
125 |                 cur.execute(query)
126 |                 result = cur.fetchone()
127 |                 column_name = [i.name for i in cur.description]
128 |                 df = pd.DataFrame(result, columns=column_name)
129 | 
130 |                 # Proactively close connection to AS instance
131 |                 con.close()
132 | 
133 |                 return df
134 |             except Exception as query_error:
135 |                 error_message = str(query_error)
136 |                 # Keep only error message without Technical Details
137 |                 error_summary = error_message.split("Technical Details")[0].strip().split("\r\n   at")[0].strip()
138 |                 raise Exception(f"Erreur lors de l'exécution de la requête :\n{str(error_summary)}")
139 | 


--------------------------------------------------------------------------------
/tests/.env/csv/sales_with_cr.csv:
--------------------------------------------------------------------------------
1 | sale_id,seller_name,card_name,card_rarity,card_condition,price,quantity,sale_date,card_set,buyer_name,transaction_status1,John Doe,Charizard,Rare,Mint,250.00,1,2024-11-01,Base Set,Jane Smith,Completed2,Jane Smith,Blastoise,Holo Rare,Excellent,180.00,1,2024-11-02,Base Set,Alex Johnson,Completed3,Alex Johnson,Pikachu,Common,Near Mint,15.00,4,2024-11-03,Jungle,Chris Brown,Completed4,Chris Brown,Dragonite,Ultra Rare,Good,300.00,1,2024-11-04,Fossil,Emma Green,Pending5,Emma Green,Zapdos,Holo Rare,Mint,150.00,1,2024-11-05,Fossil,Sarah White,Completed6,Sarah White,Venusaur,Rare,Good,120.00,2,2024-11-06,Base Set,John Doe,Completed7,Liam Brown,Moltres,Rare,Near Mint,140.00,1,2024-11-07,Fossil,Jane Smith,Completed8,Olivia Taylor,Articuno,Rare,Excellent,100.00,1,2024-11-08,Fossil,Alex Johnson,Cancelled9,Sophia Wilson,Eevee,Common,Mint,20.00,3,2024-11-09,Jungle,Chris Brown,Completed10,Mason Martinez,Jolteon,Rare,Near Mint,80.00,1,2024-11-10,Jungle,Emma Green,Completed11,Ethan White,Flareon,Rare,Excellent,85.00,1,2024-11-11,Jungle,Sarah White,Completed12,Lucas Harris,Vaporeon,Rare,Mint,100.00,1,2024-11-12,Jungle,Liam Brown,Completed13,Amelia Clark,Machamp,Holo Rare,Mint,75.00,1,2024-11-13,Base Set,Olivia Taylor,Completed14,Harper Lewis,Gengar,Rare,Mint,80.00,1,2024-11-14,Fossil,Sophia Wilson,Completed15,Evelyn Walker,Snorlax,Rare,Mint,90.00,1,2024-11-15,Jungle,Mason Martinez,Completed16,Henry King,Charizard,Rare,Excellent,250.00,1,2024-11-16,Base Set,Ethan White,Completed17,Isabella Moore,Mewtwo,Holo Rare,Near Mint,220.00,1,2024-11-17,Fossil,Lucas Harris,Completed18,Sophia Wilson,Articuno,Rare,Mint,120.00,1,2024-11-18,Fossil,Amelia Clark,Completed19,Liam Brown,Pikachu,Common,Good,10.00,5,2024-11-19,Jungle,Harper Lewis,Pending20,Emma Green,Moltres,Rare,Excellent,140.00,1,2024-11-20,Fossil,Evelyn Walker,Completed21,Chris Brown,Blastoise,Holo Rare,Mint,180.00,1,2024-11-21,Base Set,Henry King,Completed22,Alex Johnson,Eevee,Common,Mint,25.00,2,2024-11-22,Jungle,Isabella Moore,Completed23,John Doe,Dragonite,Ultra Rare,Near Mint,320.00,1,2024-11-23,Base Set,Jane Smith,Pending24,Jane Smith,Machamp,Holo Rare,Good,70.00,1,2024-11-24,Base Set,Alex Johnson,Completed25,Sarah White,Vaporeon,Rare,Excellent,100.00,1,2024-11-25,Jungle,Chris Brown,Cancelled26,Olivia Taylor,Jolteon,Rare,Mint,85.00,1,2024-11-26,Jungle,Emma Green,Completed27,Henry King,Zapdos,Holo Rare,Good,140.00,1,2024-11-27,Fossil,Sophia Wilson,Completed28,Ethan White,Gengar,Rare,Excellent,75.00,1,2024-11-28,Fossil,Mason Martinez,Completed29,Amelia Clark,Mewtwo,Holo Rare,Mint,230.00,1,2024-11-29,Fossil,Ethan White,Completed30,Lucas Harris,Charizard,Rare,Near Mint,250.00,1,2024-11-30,Base Set,Lucas Harris,Completed31,Harper Lewis,Snorlax,Rare,Excellent,90.00,1,2024-12-01,Jungle,Liam Brown,Completed32,Sophia Wilson,Flareon,Rare,Good,85.00,1,2024-12-02,Jungle,Isabella Moore,Pending33,Mason Martinez,Articuno,Rare,Mint,120.00,1,2024-12-03,Fossil,Harper Lewis,Completed34,Emma Green,Moltres,Holo Rare,Mint,140.00,1,2024-12-04,Fossil,Henry King,Completed35,John Doe,Pikachu,Common,Mint,15.00,3,2024-12-05,Jungle,Chris Brown,Completed36,Jane Smith,Dragonite,Ultra Rare,Excellent,300.00,1,2024-12-06,Base Set,Sophia Wilson,Completed37,Alex Johnson,Machamp,Holo Rare,Mint,75.00,1,2024-12-07,Base Set,Emma Green,Completed38,Chris Brown,Vaporeon,Rare,Good,90.00,1,2024-12-08,Jungle,John Doe,Completed39,Olivia Taylor,Jolteon,Rare,Near Mint,80.00,1,2024-12-09,Jungle,Jane Smith,Pending40,Ethan White,Gengar,Rare,Mint,85.00,1,2024-12-10,Fossil,Liam Brown,Completed41,Amelia Clark,Eevee,Common,Excellent,25.00,3,2024-12-11,Jungle,Olivia Taylor,Completed42,Sophia Wilson,Charizard,Rare,Good,220.00,1,2024-12-12,Base Set,Alex Johnson,Cancelled43,Lucas Harris,Zapdos,Holo Rare,Mint,150.00,1,2024-12-13,Fossil,Emma Green,Completed44,Harper Lewis,Mewtwo,Ultra Rare,Near Mint,200.00,1,2024-12-14,Fossil,Sarah White,Completed45,Henry King,Lapras,Rare,Mint,95.00,1,2024-12-16,Fossil,Sophia Wilson,Completed46,Ethan White,Ditto,Rare,Excellent,85.00,1,2024-12-17,Fossil,Amelia Clark,Completed47,Sarah White,Bulbasaur,Common,Near Mint,12.00,5,2024-12-18,Base Set,Lucas Harris,Completed48,Emma Green,Charmander,Common,Mint,15.00,4,2024-12-19,Base Set,Chris Brown,Pending49,Jane Smith,Squirtle,Common,Good,10.00,6,2024-12-20,Base Set,Mason Martinez,Completed50,John Doe,Jigglypuff,Common,Excellent,8.00,10,2024-12-21,Jungle,Liam Brown,Completed51,Olivia Taylor,Clefairy,Rare,Mint,50.00,1,2024-12-22,Base Set,Ethan White,Completed52,Lucas Harris,Nidoking,Holo Rare,Good,125.00,1,2024-12-23,Base Set,John Doe,Cancelled53,Alex Johnson,Hitmonchan,Holo Rare,Near Mint,100.00,1,2024-12-24,Base Set,Jane Smith,Completed54,Sophia Wilson,Kangaskhan,Rare,Excellent,80.00,1,2024-12-25,Jungle,Henry King,Completed55,Chris Brown,Scyther,Rare,Mint,85.00,1,2024-12-26,Jungle,Emma Green,Completed56,Harper Lewis,Pinsir,Rare,Near Mint,70.00,1,2024-12-27,Jungle,Olivia Taylor,Completed57,Mason Martinez,Aerodactyl,Rare,Good,100.00,1,2024-12-28,Fossil,Sarah White,Completed58,Liam Brown,Kabutops,Rare,Mint,105.00,1,2024-12-29,Fossil,Alex Johnson,Completed59,Evelyn Walker,Magikarp,Common,Excellent,5.00,20,2024-12-30,Base Set,Lucas Harris,Completed60,Amelia Clark,Gyarados,Holo Rare,Near Mint,150.00,1,2024-12-31,Base Set,Sophia Wilson,Pending61,Sarah White,Ditto,Rare,Mint,90.00,1,2025-01-01,Fossil,Henry King,Completed62,Emma Green,Pidgeot,Rare,Good,70.00,1,2025-01-02,Jungle,Chris Brown,Completed63,John Doe,Electabuzz,Rare,Excellent,60.00,2,2025-01-03,Base Set,Liam Brown,Completed64,Jane Smith,Magmar,Rare,Mint,55.00,1,2025-01-04,Fossil,Mason Martinez,Completed65,Olivia Taylor,Jynx,Common,Excellent,30.00,3,2025-01-05,Base Set,Ethan White,Completed66,Alex Johnson,Alakazam,Holo Rare,Mint,175.00,1,2025-01-06,Base Set,Jane Smith,Completed67,Sophia Wilson,Chansey,Holo Rare,Good,100.00,1,2025-01-07,Base Set,Olivia Taylor,Completed68,Chris Brown,Geodude,Common,Near Mint,5.00,12,2025-01-08,Base Set,John Doe,Completed69,Henry King,Grimer,Common,Excellent,7.00,8,2025-01-09,Fossil,Emma Green,Completed70,Ethan White,Muk,Rare,Mint,85.00,1,2025-01-10,Fossil,Sophia Wilson,Completed71,Harper Lewis,Rhydon,Rare,Good,75.00,1,2025-01-11,Jungle,Chris Brown,Cancelled72,Mason Martinez,Tauros,Common,Near Mint,10.00,10,2025-01-12,Jungle,Alex Johnson,Completed73,Evelyn Walker,Exeggutor,Rare,Mint,65.00,1,2025-01-13,Jungle,Sarah White,Completed74,Lucas Harris,Venonat,Common,Excellent,5.00,15,2025-01-14,Jungle,Harper Lewis,Pending


--------------------------------------------------------------------------------
/src/ploosh/logs.py:
--------------------------------------------------------------------------------
  1 | """Module for log functions"""
  2 | 
  3 | import math
  4 | import os
  5 | import re
  6 | import shutil
  7 | from datetime import datetime
  8 | from colorama import Fore, Style
  9 | from version import PLOOSH_VERSION
 10 | 
 11 | class Log:
 12 |     """Log class contain all functions to log"""
 13 | 
 14 |     @staticmethod
 15 |     def init():
 16 |         """Initialize log settings and create log directory"""
 17 |         Log.LEVELS_PRINT = {
 18 |             "INFO": Fore.GREEN,
 19 |             "WARN": Fore.YELLOW,
 20 |             "ERRO": Fore.RED,
 21 |         }
 22 | 
 23 |         # Get terminal size and set console log space
 24 |         Log.CONSOLE_WIDTH = shutil.get_terminal_size(fallback=(120, 50)).columns
 25 |         Log.CONSOLE_WIDTH_GAP = 29
 26 |         Log.CONSOLE_LOG_SPACE = Log.CONSOLE_WIDTH - Log.CONSOLE_WIDTH_GAP
 27 | 
 28 |         # Set log folder and log file path
 29 |         Log.LOGS_FOLDER = "./logs"
 30 |         Log.LOGS_PATH = f"{Log.LOGS_FOLDER}/ploosh_{datetime.now().strftime('%Y%m%d%H%M%S')}.log"
 31 | 
 32 |         # Create log folder if it doesn't exist
 33 |         os.makedirs(Log.LOGS_FOLDER, exist_ok=True)
 34 | 
 35 |     @staticmethod
 36 |     def print(message: str, level: str = "INFO", filler: str = "."):
 37 |         """Print a message with all metadata informations"""
 38 |         date_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
 39 | 
 40 |         # Determine the number of filler characters needed
 41 |         count_filler = 1 if message.count("[...]") == 0 else message.count("[...]")
 42 | 
 43 |         # Remove ANSI escape sequences from the message
 44 |         raw_message = re.sub(r"[^\w ]*[\d]+m", "", message)
 45 |         print_length = len(raw_message)
 46 |         feed_characters = filler * math.trunc(
 47 |             (Log.CONSOLE_LOG_SPACE - print_length + (5 * count_filler)) / count_filler
 48 |         )
 49 |         message = message.replace("[...]", feed_characters)
 50 | 
 51 |         rows_to_print = [message]
 52 |         # Disable coloration for multi-line messages
 53 |         if print_length > Log.CONSOLE_LOG_SPACE or "\n" in message:
 54 |             rows_to_print = []
 55 |             message_rows = raw_message.split("\n")
 56 |             for row in message_rows:
 57 |                 rows_count = math.ceil(len(row) / Log.CONSOLE_LOG_SPACE)
 58 |                 for i in range(0, rows_count):
 59 |                     start = i * Log.CONSOLE_LOG_SPACE
 60 |                     end = (i + 1) * Log.CONSOLE_LOG_SPACE
 61 |                     rows_to_print.append(row[start:end])
 62 | 
 63 |         # Format each row with date, time, and log level
 64 |         rows_to_print = [
 65 |             f"{Fore.CYAN}[{date_time}] {Log.LEVELS_PRINT[level]}[{level}]{Style.RESET_ALL} {row}{Style.RESET_ALL}"
 66 |             for row in rows_to_print
 67 |         ]
 68 | 
 69 |         # Print each row to the console
 70 |         for row in rows_to_print:
 71 |             print(row)
 72 | 
 73 |         # Write the log to the log file
 74 |         with open(Log.LOGS_PATH, "a", encoding="UTF-8") as f:
 75 |             log_text = "\r\n".join(rows_to_print) + "\r\n"
 76 | 
 77 |             # Remove color codes from the log text
 78 |             for key in Fore.__dict__:
 79 |                 log_text = log_text.replace(Fore.__dict__[key], "")
 80 | 
 81 |             for key in Style.__dict__:
 82 |                 log_text = log_text.replace(Style.__dict__[key], "")
 83 | 
 84 |             f.write(log_text)
 85 | 
 86 |     @staticmethod
 87 |     def print_error(message: str):
 88 |         """Print an error message with all metadata informations"""
 89 |         Log.print(message, "ERRO")
 90 | 
 91 |     @staticmethod
 92 |     def print_warning(message: str):
 93 |         """Print a warning message with all metadata informations"""
 94 |         Log.print(message, "WARN")
 95 | 
 96 |     @staticmethod
 97 |     def print_logo():
 98 |         """Print the ATF logo"""
 99 |         Log.print(r"[...]", filler="~")
100 |         Log.print(r"[...]       .__                      .__     [...]", filler=" ")
101 |         Log.print(r"[...]______ |  |   ____   ____  _____|  |__  [...]", filler=" ")
102 |         Log.print(r"[...]\____ \|  |  /  _ \ /  _ \/  ___|  |  \ [...]", filler=" ")
103 |         Log.print(r"[...]|  |_> |  |_(  <_> (  <_> \___ \|   Y  \[...]", filler=" ")
104 |         Log.print(r"[...]|   __/|____/\____/ \____/____  |___|  /[...]", filler=" ")
105 |         Log.print(r"[...]|__|                          \/     \/ [...]", filler=" ")
106 |         Log.print(f"[...]Automatized Testing Framework (v {PLOOSH_VERSION})[...]", filler=" ")
107 |         Log.print(r"[...]", filler=" ")
108 |         Log.print(r"[...]https://github.com/CSharplie/ploosh", filler=" ")
109 |         Log.print(r"[...]", filler="~")
110 | 
111 | 
112 | def print_compare_state(current_case):
113 |     """Print the comparison state of a test case"""
114 | 
115 |     state = current_case.state.upper()
116 |     state_matrix = {
117 |         "FAILED": {"color": Fore.YELLOW, "function": Log.print_warning},
118 |         "ERROR": {"color": Fore.RED, "function": Log.print_error},
119 |         "PASSED": {"color": Fore.GREEN, "function": Log.print},
120 |     }
121 |     state_item = state_matrix[state]
122 |     state_item["function"](f"Compare state: {state_item['color']}{state}")
123 | 
124 |     if state != "PASSED":
125 |         state_item["function"](f"Error type   : {state_item['color']}{current_case.error_type.upper()}")
126 |         state_item["function"](f"Error message: {state_item['color']}{current_case.error_message}")
127 | 
128 | 
129 | def print_summary(cases, statistics):
130 |     """Print a summary of test case results"""
131 |     for case_name in cases:
132 |         state = cases[case_name].state
133 |         color = Fore.CYAN
134 | 
135 |         if state == "error":
136 |             color = Fore.RED
137 |         if state == "passed":
138 |             color = Fore.GREEN
139 |         if state == "failed":
140 |             color = Fore.YELLOW
141 | 
142 |         if state == "notExecuted":
143 |             state = "skipped"
144 | 
145 |         Log.print(f"{case_name} [...] {color}{state.upper()}")
146 | 
147 |     # Print overall statistics
148 |     message = f"passed: {Fore.GREEN}{statistics.passed}{Style.RESET_ALL}, "
149 |     message += f"failed: {Fore.YELLOW}{statistics.failed}{Style.RESET_ALL}, "
150 |     message += f"error: {Fore.RED}{statistics.error}{Style.RESET_ALL}, "
151 |     message += f"skipped: {Fore.CYAN}{statistics.not_executed}{Style.RESET_ALL}"
152 | 
153 |     Log.print(message)
154 | 


--------------------------------------------------------------------------------
/tests/.data/sales.csv:
--------------------------------------------------------------------------------
 1 | sale_id,seller_name,card_name,card_rarity,card_condition,price,quantity,sale_date,card_set,buyer_name,transaction_status
 2 | 1,John Doe,Charizard,Rare,Mint,250.00,1,2024-11-01,Base Set,Jane Smith,Completed
 3 | 2,Jane Smith,Blastoise,Holo Rare,Excellent,180.00,1,2024-11-02,Base Set,Alex Johnson,Completed
 4 | 3,Alex Johnson,Pikachu,Common,Near Mint,15.00,4,2024-11-03,Jungle,Chris Brown,Completed
 5 | 4,Chris Brown,Dragonite,Ultra Rare,Good,300.00,1,2024-11-04,Fossil,Emma Green,Pending
 6 | 5,Emma Green,Zapdos,Holo Rare,Mint,150.00,1,2024-11-05,Fossil,Sarah White,Completed
 7 | 6,Sarah White,Venusaur,Rare,Good,120.00,2,2024-11-06,Base Set,John Doe,Completed
 8 | 7,Liam Brown,Moltres,Rare,Near Mint,140.00,1,2024-11-07,Fossil,Jane Smith,Completed
 9 | 8,Olivia Taylor,Articuno,Rare,Excellent,100.00,1,2024-11-08,Fossil,Alex Johnson,Cancelled
10 | 9,Sophia Wilson,Eevee,Common,Mint,20.00,3,2024-11-09,Jungle,Chris Brown,Completed
11 | 10,Mason Martinez,Jolteon,Rare,Near Mint,80.00,1,2024-11-10,Jungle,Emma Green,Completed
12 | 11,Ethan White,Flareon,Rare,Excellent,85.00,1,2024-11-11,Jungle,Sarah White,Completed
13 | 12,Lucas Harris,Vaporeon,Rare,Mint,100.00,1,2024-11-12,Jungle,Liam Brown,Completed
14 | 13,Amelia Clark,Machamp,Holo Rare,Mint,75.00,1,2024-11-13,Base Set,Olivia Taylor,Completed
15 | 14,Harper Lewis,Gengar,Rare,Mint,80.00,1,2024-11-14,Fossil,Sophia Wilson,Completed
16 | 15,Evelyn Walker,Snorlax,Rare,Mint,90.00,1,2024-11-15,Jungle,Mason Martinez,Completed
17 | 16,Henry King,Charizard,Rare,Excellent,250.00,1,2024-11-16,Base Set,Ethan White,Completed
18 | 17,Isabella Moore,Mewtwo,Holo Rare,Near Mint,220.00,1,2024-11-17,Fossil,Lucas Harris,Completed
19 | 18,Sophia Wilson,Articuno,Rare,Mint,120.00,1,2024-11-18,Fossil,Amelia Clark,Completed
20 | 19,Liam Brown,Pikachu,Common,Good,10.00,5,2024-11-19,Jungle,Harper Lewis,Pending
21 | 20,Emma Green,Moltres,Rare,Excellent,140.00,1,2024-11-20,Fossil,Evelyn Walker,Completed
22 | 21,Chris Brown,Blastoise,Holo Rare,Mint,180.00,1,2024-11-21,Base Set,Henry King,Completed
23 | 22,Alex Johnson,Eevee,Common,Mint,25.00,2,2024-11-22,Jungle,Isabella Moore,Completed
24 | 23,John Doe,Dragonite,Ultra Rare,Near Mint,320.00,1,2024-11-23,Base Set,Jane Smith,Pending
25 | 24,Jane Smith,Machamp,Holo Rare,Good,70.00,1,2024-11-24,Base Set,Alex Johnson,Completed
26 | 25,Sarah White,Vaporeon,Rare,Excellent,100.00,1,2024-11-25,Jungle,Chris Brown,Cancelled
27 | 26,Olivia Taylor,Jolteon,Rare,Mint,85.00,1,2024-11-26,Jungle,Emma Green,Completed
28 | 27,Henry King,Zapdos,Holo Rare,Good,140.00,1,2024-11-27,Fossil,Sophia Wilson,Completed
29 | 28,Ethan White,Gengar,Rare,Excellent,75.00,1,2024-11-28,Fossil,Mason Martinez,Completed
30 | 29,Amelia Clark,Mewtwo,Holo Rare,Mint,230.00,1,2024-11-29,Fossil,Ethan White,Completed
31 | 30,Lucas Harris,Charizard,Rare,Near Mint,250.00,1,2024-11-30,Base Set,Lucas Harris,Completed
32 | 31,Harper Lewis,Snorlax,Rare,Excellent,90.00,1,2024-12-01,Jungle,Liam Brown,Completed
33 | 32,Sophia Wilson,Flareon,Rare,Good,85.00,1,2024-12-02,Jungle,Isabella Moore,Pending
34 | 33,Mason Martinez,Articuno,Rare,Mint,120.00,1,2024-12-03,Fossil,Harper Lewis,Completed
35 | 34,Emma Green,Moltres,Holo Rare,Mint,140.00,1,2024-12-04,Fossil,Henry King,Completed
36 | 35,John Doe,Pikachu,Common,Mint,15.00,3,2024-12-05,Jungle,Chris Brown,Completed
37 | 36,Jane Smith,Dragonite,Ultra Rare,Excellent,300.00,1,2024-12-06,Base Set,Sophia Wilson,Completed
38 | 37,Alex Johnson,Machamp,Holo Rare,Mint,75.00,1,2024-12-07,Base Set,Emma Green,Completed
39 | 38,Chris Brown,Vaporeon,Rare,Good,90.00,1,2024-12-08,Jungle,John Doe,Completed
40 | 39,Olivia Taylor,Jolteon,Rare,Near Mint,80.00,1,2024-12-09,Jungle,Jane Smith,Pending
41 | 40,Ethan White,Gengar,Rare,Mint,85.00,1,2024-12-10,Fossil,Liam Brown,Completed
42 | 41,Amelia Clark,Eevee,Common,Excellent,25.00,3,2024-12-11,Jungle,Olivia Taylor,Completed
43 | 42,Sophia Wilson,Charizard,Rare,Good,220.00,1,2024-12-12,Base Set,Alex Johnson,Cancelled
44 | 43,Lucas Harris,Zapdos,Holo Rare,Mint,150.00,1,2024-12-13,Fossil,Emma Green,Completed
45 | 44,Harper Lewis,Mewtwo,Ultra Rare,Near Mint,200.00,1,2024-12-14,Fossil,Sarah White,Completed
46 | 45,Henry King,Lapras,Rare,Mint,95.00,1,2024-12-16,Fossil,Sophia Wilson,Completed
47 | 46,Ethan White,Ditto,Rare,Excellent,85.00,1,2024-12-17,Fossil,Amelia Clark,Completed
48 | 47,Sarah White,Bulbasaur,Common,Near Mint,12.00,5,2024-12-18,Base Set,Lucas Harris,Completed
49 | 48,Emma Green,Charmander,Common,Mint,15.00,4,2024-12-19,Base Set,Chris Brown,Pending
50 | 49,Jane Smith,Squirtle,Common,Good,10.00,6,2024-12-20,Base Set,Mason Martinez,Completed
51 | 50,John Doe,Jigglypuff,Common,Excellent,8.00,10,2024-12-21,Jungle,Liam Brown,Completed
52 | 51,Olivia Taylor,Clefairy,Rare,Mint,50.00,1,2024-12-22,Base Set,Ethan White,Completed
53 | 52,Lucas Harris,Nidoking,Holo Rare,Good,125.00,1,2024-12-23,Base Set,John Doe,Cancelled
54 | 53,Alex Johnson,Hitmonchan,Holo Rare,Near Mint,100.00,1,2024-12-24,Base Set,Jane Smith,Completed
55 | 54,Sophia Wilson,Kangaskhan,Rare,Excellent,80.00,1,2024-12-25,Jungle,Henry King,Completed
56 | 55,Chris Brown,Scyther,Rare,Mint,85.00,1,2024-12-26,Jungle,Emma Green,Completed
57 | 56,Harper Lewis,Pinsir,Rare,Near Mint,70.00,1,2024-12-27,Jungle,Olivia Taylor,Completed
58 | 57,Mason Martinez,Aerodactyl,Rare,Good,100.00,1,2024-12-28,Fossil,Sarah White,Completed
59 | 58,Liam Brown,Kabutops,Rare,Mint,105.00,1,2024-12-29,Fossil,Alex Johnson,Completed
60 | 59,Evelyn Walker,Magikarp,Common,Excellent,5.00,20,2024-12-30,Base Set,Lucas Harris,Completed
61 | 60,Amelia Clark,Gyarados,Holo Rare,Near Mint,150.00,1,2024-12-31,Base Set,Sophia Wilson,Pending
62 | 61,Sarah White,Ditto,Rare,Mint,90.00,1,2025-01-01,Fossil,Henry King,Completed
63 | 62,Emma Green,Pidgeot,Rare,Good,70.00,1,2025-01-02,Jungle,Chris Brown,Completed
64 | 63,John Doe,Electabuzz,Rare,Excellent,60.00,2,2025-01-03,Base Set,Liam Brown,Completed
65 | 64,Jane Smith,Magmar,Rare,Mint,55.00,1,2025-01-04,Fossil,Mason Martinez,Completed
66 | 65,Olivia Taylor,Jynx,Common,Excellent,30.00,3,2025-01-05,Base Set,Ethan White,Completed
67 | 66,Alex Johnson,Alakazam,Holo Rare,Mint,175.00,1,2025-01-06,Base Set,Jane Smith,Completed
68 | 67,Sophia Wilson,Chansey,Holo Rare,Good,100.00,1,2025-01-07,Base Set,Olivia Taylor,Completed
69 | 68,Chris Brown,Geodude,Common,Near Mint,5.00,12,2025-01-08,Base Set,John Doe,Completed
70 | 69,Henry King,Grimer,Common,Excellent,7.00,8,2025-01-09,Fossil,Emma Green,Completed
71 | 70,Ethan White,Muk,Rare,Mint,85.00,1,2025-01-10,Fossil,Sophia Wilson,Completed
72 | 71,Harper Lewis,Rhydon,Rare,Good,75.00,1,2025-01-11,Jungle,Chris Brown,Cancelled
73 | 72,Mason Martinez,Tauros,Common,Near Mint,10.00,10,2025-01-12,Jungle,Alex Johnson,Completed
74 | 73,Evelyn Walker,Exeggutor,Rare,Mint,65.00,1,2025-01-13,Jungle,Sarah White,Completed
75 | 74,Lucas Harris,Venonat,Common,Excellent,5.00,15,2025-01-14,Jungle,Harper Lewis,Pending


--------------------------------------------------------------------------------
/tests/.env/csv/sales_with_tab.csv:
--------------------------------------------------------------------------------
 1 | sale_id	seller_name	card_name	card_rarity	card_condition	price	quantity	sale_date	card_set	buyer_name	transaction_status
 2 | 1	John Doe	Charizard	Rare	Mint	250.00	1	2024-11-01	Base Set	Jane Smith	Completed
 3 | 2	Jane Smith	Blastoise	Holo Rare	Excellent	180.00	1	2024-11-02	Base Set	Alex Johnson	Completed
 4 | 3	Alex Johnson	Pikachu	Common	Near Mint	15.00	4	2024-11-03	Jungle	Chris Brown	Completed
 5 | 4	Chris Brown	Dragonite	Ultra Rare	Good	300.00	1	2024-11-04	Fossil	Emma Green	Pending
 6 | 5	Emma Green	Zapdos	Holo Rare	Mint	150.00	1	2024-11-05	Fossil	Sarah White	Completed
 7 | 6	Sarah White	Venusaur	Rare	Good	120.00	2	2024-11-06	Base Set	John Doe	Completed
 8 | 7	Liam Brown	Moltres	Rare	Near Mint	140.00	1	2024-11-07	Fossil	Jane Smith	Completed
 9 | 8	Olivia Taylor	Articuno	Rare	Excellent	100.00	1	2024-11-08	Fossil	Alex Johnson	Cancelled
10 | 9	Sophia Wilson	Eevee	Common	Mint	20.00	3	2024-11-09	Jungle	Chris Brown	Completed
11 | 10	Mason Martinez	Jolteon	Rare	Near Mint	80.00	1	2024-11-10	Jungle	Emma Green	Completed
12 | 11	Ethan White	Flareon	Rare	Excellent	85.00	1	2024-11-11	Jungle	Sarah White	Completed
13 | 12	Lucas Harris	Vaporeon	Rare	Mint	100.00	1	2024-11-12	Jungle	Liam Brown	Completed
14 | 13	Amelia Clark	Machamp	Holo Rare	Mint	75.00	1	2024-11-13	Base Set	Olivia Taylor	Completed
15 | 14	Harper Lewis	Gengar	Rare	Mint	80.00	1	2024-11-14	Fossil	Sophia Wilson	Completed
16 | 15	Evelyn Walker	Snorlax	Rare	Mint	90.00	1	2024-11-15	Jungle	Mason Martinez	Completed
17 | 16	Henry King	Charizard	Rare	Excellent	250.00	1	2024-11-16	Base Set	Ethan White	Completed
18 | 17	Isabella Moore	Mewtwo	Holo Rare	Near Mint	220.00	1	2024-11-17	Fossil	Lucas Harris	Completed
19 | 18	Sophia Wilson	Articuno	Rare	Mint	120.00	1	2024-11-18	Fossil	Amelia Clark	Completed
20 | 19	Liam Brown	Pikachu	Common	Good	10.00	5	2024-11-19	Jungle	Harper Lewis	Pending
21 | 20	Emma Green	Moltres	Rare	Excellent	140.00	1	2024-11-20	Fossil	Evelyn Walker	Completed
22 | 21	Chris Brown	Blastoise	Holo Rare	Mint	180.00	1	2024-11-21	Base Set	Henry King	Completed
23 | 22	Alex Johnson	Eevee	Common	Mint	25.00	2	2024-11-22	Jungle	Isabella Moore	Completed
24 | 23	John Doe	Dragonite	Ultra Rare	Near Mint	320.00	1	2024-11-23	Base Set	Jane Smith	Pending
25 | 24	Jane Smith	Machamp	Holo Rare	Good	70.00	1	2024-11-24	Base Set	Alex Johnson	Completed
26 | 25	Sarah White	Vaporeon	Rare	Excellent	100.00	1	2024-11-25	Jungle	Chris Brown	Cancelled
27 | 26	Olivia Taylor	Jolteon	Rare	Mint	85.00	1	2024-11-26	Jungle	Emma Green	Completed
28 | 27	Henry King	Zapdos	Holo Rare	Good	140.00	1	2024-11-27	Fossil	Sophia Wilson	Completed
29 | 28	Ethan White	Gengar	Rare	Excellent	75.00	1	2024-11-28	Fossil	Mason Martinez	Completed
30 | 29	Amelia Clark	Mewtwo	Holo Rare	Mint	230.00	1	2024-11-29	Fossil	Ethan White	Completed
31 | 30	Lucas Harris	Charizard	Rare	Near Mint	250.00	1	2024-11-30	Base Set	Lucas Harris	Completed
32 | 31	Harper Lewis	Snorlax	Rare	Excellent	90.00	1	2024-12-01	Jungle	Liam Brown	Completed
33 | 32	Sophia Wilson	Flareon	Rare	Good	85.00	1	2024-12-02	Jungle	Isabella Moore	Pending
34 | 33	Mason Martinez	Articuno	Rare	Mint	120.00	1	2024-12-03	Fossil	Harper Lewis	Completed
35 | 34	Emma Green	Moltres	Holo Rare	Mint	140.00	1	2024-12-04	Fossil	Henry King	Completed
36 | 35	John Doe	Pikachu	Common	Mint	15.00	3	2024-12-05	Jungle	Chris Brown	Completed
37 | 36	Jane Smith	Dragonite	Ultra Rare	Excellent	300.00	1	2024-12-06	Base Set	Sophia Wilson	Completed
38 | 37	Alex Johnson	Machamp	Holo Rare	Mint	75.00	1	2024-12-07	Base Set	Emma Green	Completed
39 | 38	Chris Brown	Vaporeon	Rare	Good	90.00	1	2024-12-08	Jungle	John Doe	Completed
40 | 39	Olivia Taylor	Jolteon	Rare	Near Mint	80.00	1	2024-12-09	Jungle	Jane Smith	Pending
41 | 40	Ethan White	Gengar	Rare	Mint	85.00	1	2024-12-10	Fossil	Liam Brown	Completed
42 | 41	Amelia Clark	Eevee	Common	Excellent	25.00	3	2024-12-11	Jungle	Olivia Taylor	Completed
43 | 42	Sophia Wilson	Charizard	Rare	Good	220.00	1	2024-12-12	Base Set	Alex Johnson	Cancelled
44 | 43	Lucas Harris	Zapdos	Holo Rare	Mint	150.00	1	2024-12-13	Fossil	Emma Green	Completed
45 | 44	Harper Lewis	Mewtwo	Ultra Rare	Near Mint	200.00	1	2024-12-14	Fossil	Sarah White	Completed
46 | 45	Henry King	Lapras	Rare	Mint	95.00	1	2024-12-16	Fossil	Sophia Wilson	Completed
47 | 46	Ethan White	Ditto	Rare	Excellent	85.00	1	2024-12-17	Fossil	Amelia Clark	Completed
48 | 47	Sarah White	Bulbasaur	Common	Near Mint	12.00	5	2024-12-18	Base Set	Lucas Harris	Completed
49 | 48	Emma Green	Charmander	Common	Mint	15.00	4	2024-12-19	Base Set	Chris Brown	Pending
50 | 49	Jane Smith	Squirtle	Common	Good	10.00	6	2024-12-20	Base Set	Mason Martinez	Completed
51 | 50	John Doe	Jigglypuff	Common	Excellent	8.00	10	2024-12-21	Jungle	Liam Brown	Completed
52 | 51	Olivia Taylor	Clefairy	Rare	Mint	50.00	1	2024-12-22	Base Set	Ethan White	Completed
53 | 52	Lucas Harris	Nidoking	Holo Rare	Good	125.00	1	2024-12-23	Base Set	John Doe	Cancelled
54 | 53	Alex Johnson	Hitmonchan	Holo Rare	Near Mint	100.00	1	2024-12-24	Base Set	Jane Smith	Completed
55 | 54	Sophia Wilson	Kangaskhan	Rare	Excellent	80.00	1	2024-12-25	Jungle	Henry King	Completed
56 | 55	Chris Brown	Scyther	Rare	Mint	85.00	1	2024-12-26	Jungle	Emma Green	Completed
57 | 56	Harper Lewis	Pinsir	Rare	Near Mint	70.00	1	2024-12-27	Jungle	Olivia Taylor	Completed
58 | 57	Mason Martinez	Aerodactyl	Rare	Good	100.00	1	2024-12-28	Fossil	Sarah White	Completed
59 | 58	Liam Brown	Kabutops	Rare	Mint	105.00	1	2024-12-29	Fossil	Alex Johnson	Completed
60 | 59	Evelyn Walker	Magikarp	Common	Excellent	5.00	20	2024-12-30	Base Set	Lucas Harris	Completed
61 | 60	Amelia Clark	Gyarados	Holo Rare	Near Mint	150.00	1	2024-12-31	Base Set	Sophia Wilson	Pending
62 | 61	Sarah White	Ditto	Rare	Mint	90.00	1	2025-01-01	Fossil	Henry King	Completed
63 | 62	Emma Green	Pidgeot	Rare	Good	70.00	1	2025-01-02	Jungle	Chris Brown	Completed
64 | 63	John Doe	Electabuzz	Rare	Excellent	60.00	2	2025-01-03	Base Set	Liam Brown	Completed
65 | 64	Jane Smith	Magmar	Rare	Mint	55.00	1	2025-01-04	Fossil	Mason Martinez	Completed
66 | 65	Olivia Taylor	Jynx	Common	Excellent	30.00	3	2025-01-05	Base Set	Ethan White	Completed
67 | 66	Alex Johnson	Alakazam	Holo Rare	Mint	175.00	1	2025-01-06	Base Set	Jane Smith	Completed
68 | 67	Sophia Wilson	Chansey	Holo Rare	Good	100.00	1	2025-01-07	Base Set	Olivia Taylor	Completed
69 | 68	Chris Brown	Geodude	Common	Near Mint	5.00	12	2025-01-08	Base Set	John Doe	Completed
70 | 69	Henry King	Grimer	Common	Excellent	7.00	8	2025-01-09	Fossil	Emma Green	Completed
71 | 70	Ethan White	Muk	Rare	Mint	85.00	1	2025-01-10	Fossil	Sophia Wilson	Completed
72 | 71	Harper Lewis	Rhydon	Rare	Good	75.00	1	2025-01-11	Jungle	Chris Brown	Cancelled
73 | 72	Mason Martinez	Tauros	Common	Near Mint	10.00	10	2025-01-12	Jungle	Alex Johnson	Completed
74 | 73	Evelyn Walker	Exeggutor	Rare	Mint	65.00	1	2025-01-13	Jungle	Sarah White	Completed
75 | 74	Lucas Harris	Venonat	Common	Excellent	5.00	15	2025-01-14	Jungle	Harper Lewis	Pending


--------------------------------------------------------------------------------
/tests/.env/csv/sales_with_comma.csv:
--------------------------------------------------------------------------------
 1 | sale_id,seller_name,card_name,card_rarity,card_condition,price,quantity,sale_date,card_set,buyer_name,transaction_status
 2 | 1,John Doe,Charizard,Rare,Mint,250.00,1,2024-11-01,Base Set,Jane Smith,Completed
 3 | 2,Jane Smith,Blastoise,Holo Rare,Excellent,180.00,1,2024-11-02,Base Set,Alex Johnson,Completed
 4 | 3,Alex Johnson,Pikachu,Common,Near Mint,15.00,4,2024-11-03,Jungle,Chris Brown,Completed
 5 | 4,Chris Brown,Dragonite,Ultra Rare,Good,300.00,1,2024-11-04,Fossil,Emma Green,Pending
 6 | 5,Emma Green,Zapdos,Holo Rare,Mint,150.00,1,2024-11-05,Fossil,Sarah White,Completed
 7 | 6,Sarah White,Venusaur,Rare,Good,120.00,2,2024-11-06,Base Set,John Doe,Completed
 8 | 7,Liam Brown,Moltres,Rare,Near Mint,140.00,1,2024-11-07,Fossil,Jane Smith,Completed
 9 | 8,Olivia Taylor,Articuno,Rare,Excellent,100.00,1,2024-11-08,Fossil,Alex Johnson,Cancelled
10 | 9,Sophia Wilson,Eevee,Common,Mint,20.00,3,2024-11-09,Jungle,Chris Brown,Completed
11 | 10,Mason Martinez,Jolteon,Rare,Near Mint,80.00,1,2024-11-10,Jungle,Emma Green,Completed
12 | 11,Ethan White,Flareon,Rare,Excellent,85.00,1,2024-11-11,Jungle,Sarah White,Completed
13 | 12,Lucas Harris,Vaporeon,Rare,Mint,100.00,1,2024-11-12,Jungle,Liam Brown,Completed
14 | 13,Amelia Clark,Machamp,Holo Rare,Mint,75.00,1,2024-11-13,Base Set,Olivia Taylor,Completed
15 | 14,Harper Lewis,Gengar,Rare,Mint,80.00,1,2024-11-14,Fossil,Sophia Wilson,Completed
16 | 15,Evelyn Walker,Snorlax,Rare,Mint,90.00,1,2024-11-15,Jungle,Mason Martinez,Completed
17 | 16,Henry King,Charizard,Rare,Excellent,250.00,1,2024-11-16,Base Set,Ethan White,Completed
18 | 17,Isabella Moore,Mewtwo,Holo Rare,Near Mint,220.00,1,2024-11-17,Fossil,Lucas Harris,Completed
19 | 18,Sophia Wilson,Articuno,Rare,Mint,120.00,1,2024-11-18,Fossil,Amelia Clark,Completed
20 | 19,Liam Brown,Pikachu,Common,Good,10.00,5,2024-11-19,Jungle,Harper Lewis,Pending
21 | 20,Emma Green,Moltres,Rare,Excellent,140.00,1,2024-11-20,Fossil,Evelyn Walker,Completed
22 | 21,Chris Brown,Blastoise,Holo Rare,Mint,180.00,1,2024-11-21,Base Set,Henry King,Completed
23 | 22,Alex Johnson,Eevee,Common,Mint,25.00,2,2024-11-22,Jungle,Isabella Moore,Completed
24 | 23,John Doe,Dragonite,Ultra Rare,Near Mint,320.00,1,2024-11-23,Base Set,Jane Smith,Pending
25 | 24,Jane Smith,Machamp,Holo Rare,Good,70.00,1,2024-11-24,Base Set,Alex Johnson,Completed
26 | 25,Sarah White,Vaporeon,Rare,Excellent,100.00,1,2024-11-25,Jungle,Chris Brown,Cancelled
27 | 26,Olivia Taylor,Jolteon,Rare,Mint,85.00,1,2024-11-26,Jungle,Emma Green,Completed
28 | 27,Henry King,Zapdos,Holo Rare,Good,140.00,1,2024-11-27,Fossil,Sophia Wilson,Completed
29 | 28,Ethan White,Gengar,Rare,Excellent,75.00,1,2024-11-28,Fossil,Mason Martinez,Completed
30 | 29,Amelia Clark,Mewtwo,Holo Rare,Mint,230.00,1,2024-11-29,Fossil,Ethan White,Completed
31 | 30,Lucas Harris,Charizard,Rare,Near Mint,250.00,1,2024-11-30,Base Set,Lucas Harris,Completed
32 | 31,Harper Lewis,Snorlax,Rare,Excellent,90.00,1,2024-12-01,Jungle,Liam Brown,Completed
33 | 32,Sophia Wilson,Flareon,Rare,Good,85.00,1,2024-12-02,Jungle,Isabella Moore,Pending
34 | 33,Mason Martinez,Articuno,Rare,Mint,120.00,1,2024-12-03,Fossil,Harper Lewis,Completed
35 | 34,Emma Green,Moltres,Holo Rare,Mint,140.00,1,2024-12-04,Fossil,Henry King,Completed
36 | 35,John Doe,Pikachu,Common,Mint,15.00,3,2024-12-05,Jungle,Chris Brown,Completed
37 | 36,Jane Smith,Dragonite,Ultra Rare,Excellent,300.00,1,2024-12-06,Base Set,Sophia Wilson,Completed
38 | 37,Alex Johnson,Machamp,Holo Rare,Mint,75.00,1,2024-12-07,Base Set,Emma Green,Completed
39 | 38,Chris Brown,Vaporeon,Rare,Good,90.00,1,2024-12-08,Jungle,John Doe,Completed
40 | 39,Olivia Taylor,Jolteon,Rare,Near Mint,80.00,1,2024-12-09,Jungle,Jane Smith,Pending
41 | 40,Ethan White,Gengar,Rare,Mint,85.00,1,2024-12-10,Fossil,Liam Brown,Completed
42 | 41,Amelia Clark,Eevee,Common,Excellent,25.00,3,2024-12-11,Jungle,Olivia Taylor,Completed
43 | 42,Sophia Wilson,Charizard,Rare,Good,220.00,1,2024-12-12,Base Set,Alex Johnson,Cancelled
44 | 43,Lucas Harris,Zapdos,Holo Rare,Mint,150.00,1,2024-12-13,Fossil,Emma Green,Completed
45 | 44,Harper Lewis,Mewtwo,Ultra Rare,Near Mint,200.00,1,2024-12-14,Fossil,Sarah White,Completed
46 | 45,Henry King,Lapras,Rare,Mint,95.00,1,2024-12-16,Fossil,Sophia Wilson,Completed
47 | 46,Ethan White,Ditto,Rare,Excellent,85.00,1,2024-12-17,Fossil,Amelia Clark,Completed
48 | 47,Sarah White,Bulbasaur,Common,Near Mint,12.00,5,2024-12-18,Base Set,Lucas Harris,Completed
49 | 48,Emma Green,Charmander,Common,Mint,15.00,4,2024-12-19,Base Set,Chris Brown,Pending
50 | 49,Jane Smith,Squirtle,Common,Good,10.00,6,2024-12-20,Base Set,Mason Martinez,Completed
51 | 50,John Doe,Jigglypuff,Common,Excellent,8.00,10,2024-12-21,Jungle,Liam Brown,Completed
52 | 51,Olivia Taylor,Clefairy,Rare,Mint,50.00,1,2024-12-22,Base Set,Ethan White,Completed
53 | 52,Lucas Harris,Nidoking,Holo Rare,Good,125.00,1,2024-12-23,Base Set,John Doe,Cancelled
54 | 53,Alex Johnson,Hitmonchan,Holo Rare,Near Mint,100.00,1,2024-12-24,Base Set,Jane Smith,Completed
55 | 54,Sophia Wilson,Kangaskhan,Rare,Excellent,80.00,1,2024-12-25,Jungle,Henry King,Completed
56 | 55,Chris Brown,Scyther,Rare,Mint,85.00,1,2024-12-26,Jungle,Emma Green,Completed
57 | 56,Harper Lewis,Pinsir,Rare,Near Mint,70.00,1,2024-12-27,Jungle,Olivia Taylor,Completed
58 | 57,Mason Martinez,Aerodactyl,Rare,Good,100.00,1,2024-12-28,Fossil,Sarah White,Completed
59 | 58,Liam Brown,Kabutops,Rare,Mint,105.00,1,2024-12-29,Fossil,Alex Johnson,Completed
60 | 59,Evelyn Walker,Magikarp,Common,Excellent,5.00,20,2024-12-30,Base Set,Lucas Harris,Completed
61 | 60,Amelia Clark,Gyarados,Holo Rare,Near Mint,150.00,1,2024-12-31,Base Set,Sophia Wilson,Pending
62 | 61,Sarah White,Ditto,Rare,Mint,90.00,1,2025-01-01,Fossil,Henry King,Completed
63 | 62,Emma Green,Pidgeot,Rare,Good,70.00,1,2025-01-02,Jungle,Chris Brown,Completed
64 | 63,John Doe,Electabuzz,Rare,Excellent,60.00,2,2025-01-03,Base Set,Liam Brown,Completed
65 | 64,Jane Smith,Magmar,Rare,Mint,55.00,1,2025-01-04,Fossil,Mason Martinez,Completed
66 | 65,Olivia Taylor,Jynx,Common,Excellent,30.00,3,2025-01-05,Base Set,Ethan White,Completed
67 | 66,Alex Johnson,Alakazam,Holo Rare,Mint,175.00,1,2025-01-06,Base Set,Jane Smith,Completed
68 | 67,Sophia Wilson,Chansey,Holo Rare,Good,100.00,1,2025-01-07,Base Set,Olivia Taylor,Completed
69 | 68,Chris Brown,Geodude,Common,Near Mint,5.00,12,2025-01-08,Base Set,John Doe,Completed
70 | 69,Henry King,Grimer,Common,Excellent,7.00,8,2025-01-09,Fossil,Emma Green,Completed
71 | 70,Ethan White,Muk,Rare,Mint,85.00,1,2025-01-10,Fossil,Sophia Wilson,Completed
72 | 71,Harper Lewis,Rhydon,Rare,Good,75.00,1,2025-01-11,Jungle,Chris Brown,Cancelled
73 | 72,Mason Martinez,Tauros,Common,Near Mint,10.00,10,2025-01-12,Jungle,Alex Johnson,Completed
74 | 73,Evelyn Walker,Exeggutor,Rare,Mint,65.00,1,2025-01-13,Jungle,Sarah White,Completed
75 | 74,Lucas Harris,Venonat,Common,Excellent,5.00,15,2025-01-14,Jungle,Harper Lewis,Pending


--------------------------------------------------------------------------------
/tests/.env/csv/sales_with_iso_8859_1.csv:
--------------------------------------------------------------------------------
 1 | sale_id,seller_name,card_name,card_rarity,card_condition,price,quantity,sale_date,card_set,buyer_name,transaction_status
 2 | 1,John Doe,Charizard,Rare,Mint,250.00,1,2024-11-01,Base Set,Jane Smith,Completed
 3 | 2,Jane Smith,Blastoise,Holo Rare,Excellent,180.00,1,2024-11-02,Base Set,Alex Johnson,Completed
 4 | 3,Alex Johnson,Pikachu,Common,Near Mint,15.00,4,2024-11-03,Jungle,Chris Brown,Completed
 5 | 4,Chris Brown,Dragonite,Ultra Rare,Good,300.00,1,2024-11-04,Fossil,Emma Green,Pending
 6 | 5,Emma Green,Zapdos,Holo Rare,Mint,150.00,1,2024-11-05,Fossil,Sarah White,Completed
 7 | 6,Sarah White,Venusaur,Rare,Good,120.00,2,2024-11-06,Base Set,John Doe,Completed
 8 | 7,Liam Brown,Moltres,Rare,Near Mint,140.00,1,2024-11-07,Fossil,Jane Smith,Completed
 9 | 8,Olivia Taylor,Articuno,Rare,Excellent,100.00,1,2024-11-08,Fossil,Alex Johnson,Cancelled
10 | 9,Sophia Wilson,Eevee,Common,Mint,20.00,3,2024-11-09,Jungle,Chris Brown,Completed
11 | 10,Mason Martinez,Jolteon,Rare,Near Mint,80.00,1,2024-11-10,Jungle,Emma Green,Completed
12 | 11,Ethan White,Flareon,Rare,Excellent,85.00,1,2024-11-11,Jungle,Sarah White,Completed
13 | 12,Lucas Harris,Vaporeon,Rare,Mint,100.00,1,2024-11-12,Jungle,Liam Brown,Completed
14 | 13,Amelia Clark,Machamp,Holo Rare,Mint,75.00,1,2024-11-13,Base Set,Olivia Taylor,Completed
15 | 14,Harper Lewis,Gengar,Rare,Mint,80.00,1,2024-11-14,Fossil,Sophia Wilson,Completed
16 | 15,Evelyn Walker,Snorlax,Rare,Mint,90.00,1,2024-11-15,Jungle,Mason Martinez,Completed
17 | 16,Henry King,Charizard,Rare,Excellent,250.00,1,2024-11-16,Base Set,Ethan White,Completed
18 | 17,Isabella Moore,Mewtwo,Holo Rare,Near Mint,220.00,1,2024-11-17,Fossil,Lucas Harris,Completed
19 | 18,Sophia Wilson,Articuno,Rare,Mint,120.00,1,2024-11-18,Fossil,Amelia Clark,Completed
20 | 19,Liam Brown,Pikachu,Common,Good,10.00,5,2024-11-19,Jungle,Harper Lewis,Pending
21 | 20,Emma Green,Moltres,Rare,Excellent,140.00,1,2024-11-20,Fossil,Evelyn Walker,Completed
22 | 21,Chris Brown,Blastoise,Holo Rare,Mint,180.00,1,2024-11-21,Base Set,Henry King,Completed
23 | 22,Alex Johnson,Eevee,Common,Mint,25.00,2,2024-11-22,Jungle,Isabella Moore,Completed
24 | 23,John Doe,Dragonite,Ultra Rare,Near Mint,320.00,1,2024-11-23,Base Set,Jane Smith,Pending
25 | 24,Jane Smith,Machamp,Holo Rare,Good,70.00,1,2024-11-24,Base Set,Alex Johnson,Completed
26 | 25,Sarah White,Vaporeon,Rare,Excellent,100.00,1,2024-11-25,Jungle,Chris Brown,Cancelled
27 | 26,Olivia Taylor,Jolteon,Rare,Mint,85.00,1,2024-11-26,Jungle,Emma Green,Completed
28 | 27,Henry King,Zapdos,Holo Rare,Good,140.00,1,2024-11-27,Fossil,Sophia Wilson,Completed
29 | 28,Ethan White,Gengar,Rare,Excellent,75.00,1,2024-11-28,Fossil,Mason Martinez,Completed
30 | 29,Amelia Clark,Mewtwo,Holo Rare,Mint,230.00,1,2024-11-29,Fossil,Ethan White,Completed
31 | 30,Lucas Harris,Charizard,Rare,Near Mint,250.00,1,2024-11-30,Base Set,Lucas Harris,Completed
32 | 31,Harper Lewis,Snorlax,Rare,Excellent,90.00,1,2024-12-01,Jungle,Liam Brown,Completed
33 | 32,Sophia Wilson,Flareon,Rare,Good,85.00,1,2024-12-02,Jungle,Isabella Moore,Pending
34 | 33,Mason Martinez,Articuno,Rare,Mint,120.00,1,2024-12-03,Fossil,Harper Lewis,Completed
35 | 34,Emma Green,Moltres,Holo Rare,Mint,140.00,1,2024-12-04,Fossil,Henry King,Completed
36 | 35,John Doe,Pikachu,Common,Mint,15.00,3,2024-12-05,Jungle,Chris Brown,Completed
37 | 36,Jane Smith,Dragonite,Ultra Rare,Excellent,300.00,1,2024-12-06,Base Set,Sophia Wilson,Completed
38 | 37,Alex Johnson,Machamp,Holo Rare,Mint,75.00,1,2024-12-07,Base Set,Emma Green,Completed
39 | 38,Chris Brown,Vaporeon,Rare,Good,90.00,1,2024-12-08,Jungle,John Doe,Completed
40 | 39,Olivia Taylor,Jolteon,Rare,Near Mint,80.00,1,2024-12-09,Jungle,Jane Smith,Pending
41 | 40,Ethan White,Gengar,Rare,Mint,85.00,1,2024-12-10,Fossil,Liam Brown,Completed
42 | 41,Amelia Clark,Eevee,Common,Excellent,25.00,3,2024-12-11,Jungle,Olivia Taylor,Completed
43 | 42,Sophia Wilson,Charizard,Rare,Good,220.00,1,2024-12-12,Base Set,Alex Johnson,Cancelled
44 | 43,Lucas Harris,Zapdos,Holo Rare,Mint,150.00,1,2024-12-13,Fossil,Emma Green,Completed
45 | 44,Harper Lewis,Mewtwo,Ultra Rare,Near Mint,200.00,1,2024-12-14,Fossil,Sarah White,Completed
46 | 45,Henry King,Lapras,Rare,Mint,95.00,1,2024-12-16,Fossil,Sophia Wilson,Completed
47 | 46,Ethan White,Ditto,Rare,Excellent,85.00,1,2024-12-17,Fossil,Amelia Clark,Completed
48 | 47,Sarah White,Bulbasaur,Common,Near Mint,12.00,5,2024-12-18,Base Set,Lucas Harris,Completed
49 | 48,Emma Green,Charmander,Common,Mint,15.00,4,2024-12-19,Base Set,Chris Brown,Pending
50 | 49,Jane Smith,Squirtle,Common,Good,10.00,6,2024-12-20,Base Set,Mason Martinez,Completed
51 | 50,John Doe,Jigglypuff,Common,Excellent,8.00,10,2024-12-21,Jungle,Liam Brown,Completed
52 | 51,Olivia Taylor,Clefairy,Rare,Mint,50.00,1,2024-12-22,Base Set,Ethan White,Completed
53 | 52,Lucas Harris,Nidoking,Holo Rare,Good,125.00,1,2024-12-23,Base Set,John Doe,Cancelled
54 | 53,Alex Johnson,Hitmonchan,Holo Rare,Near Mint,100.00,1,2024-12-24,Base Set,Jane Smith,Completed
55 | 54,Sophia Wilson,Kangaskhan,Rare,Excellent,80.00,1,2024-12-25,Jungle,Henry King,Completed
56 | 55,Chris Brown,Scyther,Rare,Mint,85.00,1,2024-12-26,Jungle,Emma Green,Completed
57 | 56,Harper Lewis,Pinsir,Rare,Near Mint,70.00,1,2024-12-27,Jungle,Olivia Taylor,Completed
58 | 57,Mason Martinez,Aerodactyl,Rare,Good,100.00,1,2024-12-28,Fossil,Sarah White,Completed
59 | 58,Liam Brown,Kabutops,Rare,Mint,105.00,1,2024-12-29,Fossil,Alex Johnson,Completed
60 | 59,Evelyn Walker,Magikarp,Common,Excellent,5.00,20,2024-12-30,Base Set,Lucas Harris,Completed
61 | 60,Amelia Clark,Gyarados,Holo Rare,Near Mint,150.00,1,2024-12-31,Base Set,Sophia Wilson,Pending
62 | 61,Sarah White,Ditto,Rare,Mint,90.00,1,2025-01-01,Fossil,Henry King,Completed
63 | 62,Emma Green,Pidgeot,Rare,Good,70.00,1,2025-01-02,Jungle,Chris Brown,Completed
64 | 63,John Doe,Electabuzz,Rare,Excellent,60.00,2,2025-01-03,Base Set,Liam Brown,Completed
65 | 64,Jane Smith,Magmar,Rare,Mint,55.00,1,2025-01-04,Fossil,Mason Martinez,Completed
66 | 65,Olivia Taylor,Jynx,Common,Excellent,30.00,3,2025-01-05,Base Set,Ethan White,Completed
67 | 66,Alex Johnson,Alakazam,Holo Rare,Mint,175.00,1,2025-01-06,Base Set,Jane Smith,Completed
68 | 67,Sophia Wilson,Chansey,Holo Rare,Good,100.00,1,2025-01-07,Base Set,Olivia Taylor,Completed
69 | 68,Chris Brown,Geodude,Common,Near Mint,5.00,12,2025-01-08,Base Set,John Doe,Completed
70 | 69,Henry King,Grimer,Common,Excellent,7.00,8,2025-01-09,Fossil,Emma Green,Completed
71 | 70,Ethan White,Muk,Rare,Mint,85.00,1,2025-01-10,Fossil,Sophia Wilson,Completed
72 | 71,Harper Lewis,Rhydon,Rare,Good,75.00,1,2025-01-11,Jungle,Chris Brown,Cancelled
73 | 72,Mason Martinez,Tauros,Common,Near Mint,10.00,10,2025-01-12,Jungle,Alex Johnson,Completed
74 | 73,Evelyn Walker,Exeggutor,Rare,Mint,65.00,1,2025-01-13,Jungle,Sarah White,Completed
75 | 74,Lucas Harris,Venonat,Common,Excellent,5.00,15,2025-01-14,Jungle,Harper Lewis,Pending


--------------------------------------------------------------------------------
/src/ploosh/exporters/exporter_trx.py:
--------------------------------------------------------------------------------
  1 | """Export test case result to TRX format"""
  2 | 
  3 | import html
  4 | import os
  5 | import uuid
  6 | import xml.dom.minidom
  7 | import numpy as np
  8 | from exporters.exporter import Exporter
  9 | from case import StateStatistics
 10 | 
 11 | 
 12 | class ExporterTRX(Exporter):
 13 |     """Export test case result to TRX format"""
 14 | 
 15 |     def __init__(self):
 16 |         # Set the name of the exporter
 17 |         self.name = "TRX"
 18 | 
 19 |     def get_failed_blocks(self, case_name, current_case, execution_id, output_folder):
 20 |         """Get XML code for failed cases"""
 21 |         # Escape the error message to be XML-safe
 22 |         error_message = html.escape(current_case.error_message, quote=False)
 23 | 
 24 |         # Create the XML block for the error message
 25 |         output_message_xml = f"<Output><ErrorInfo><Message>{error_message}</Message></ErrorInfo></Output>"
 26 |         result_files_xml = ""
 27 | 
 28 |         # If there is a comparison gap, export it to an Excel file
 29 |         if current_case.df_compare_gap is not None:
 30 |             detail_file_path = f"{output_folder}/test_results/In/{execution_id}/{case_name}.xlsx"
 31 |             result_files_xml = f"<ResultFiles><ResultFile path='{case_name}.xlsx'/></ResultFiles>"
 32 | 
 33 |             # Create directories if they do not exist
 34 |             os.makedirs(os.path.dirname(detail_file_path), exist_ok=True)
 35 |             current_case.df_compare_gap.to_excel(detail_file_path)
 36 | 
 37 |         return output_message_xml, result_files_xml
 38 | 
 39 |     def export(self, cases: dict):
 40 |         """Export test case results to a TRX file"""
 41 | 
 42 |         # Generate a unique ID for the TRX file
 43 |         trx_id = str(uuid.uuid4())
 44 | 
 45 |         # Define the output folder and file path
 46 |         output_folder = f"{self.output_path}/trx"
 47 |         output_file = f"{output_folder}/test_results.xml"
 48 | 
 49 |         # Generate a unique ID for the test list
 50 |         test_list_id = str(uuid.uuid4())
 51 | 
 52 |         # Initialize lists to store execution and test IDs
 53 |         execution_id_list = []
 54 |         test_id_list = []
 55 | 
 56 |         # Generate unique IDs for each test case
 57 |         for _ in list(range(0, len(cases))):
 58 |             execution_id_list.append(str(uuid.uuid4()))
 59 |             test_id_list.append(str(uuid.uuid4()))
 60 | 
 61 |         # Initialize XML blocks for unit test results, test definitions, and test entries
 62 |         xml_unit_test_result = ""
 63 |         xml_test_definitions = ""
 64 |         xml_test_entry = ""
 65 | 
 66 |         # Initialize state statistics
 67 |         state_statistics = StateStatistics()
 68 | 
 69 |         # Initialize lists to store start and end times
 70 |         start_times = []
 71 |         end_times = []
 72 | 
 73 |         # Iterate over each test case and collect data
 74 |         for i, case_name in enumerate(cases):
 75 |             current_case = cases[case_name]
 76 | 
 77 |             # Collect start and end times for the test case
 78 |             if current_case.global_duration.start is not None:
 79 |                 start_times.append(current_case.global_duration.start)
 80 |                 end_times.append(current_case.global_duration.end)
 81 | 
 82 |             execution_id = execution_id_list[i]
 83 |             test_id = test_id_list[i]
 84 | 
 85 |             # Update state statistics
 86 |             state_statistics.add_state(current_case.state)
 87 | 
 88 |             output_message_xml = ""
 89 |             result_files_xml = ""
 90 | 
 91 |             # If the test case failed, get the XML blocks for the error message and result files
 92 |             if current_case.state != "passed" and current_case.error_message is not None:
 93 |                 output_message_xml, result_files_xml = self.get_failed_blocks(
 94 |                     case_name, current_case, execution_id_list[i], output_folder
 95 |                 )
 96 | 
 97 |             outcome = current_case.state
 98 |             if outcome == "error":
 99 |                 outcome = "failed"
100 | 
101 |             # Create the XML block for the unit test result
102 |             xml_unit_test_result += f"""<UnitTestResult
103 |                 executionId='{execution_id}'
104 |                 testId='{test_id}'
105 |                 testName='{case_name}'
106 |                 duration='{current_case.global_duration.duration}'
107 |                 startTime='{Exporter.date_to_string(current_case.global_duration.start)}'
108 |                 endTime='{Exporter.date_to_string(current_case.global_duration.end)}'
109 |                 outcome='{outcome}'
110 |                 testListId='{test_list_id}'>{output_message_xml}{result_files_xml}</UnitTestResult>"""
111 | 
112 |             # Create the XML block for the test definition
113 |             xml_test_definitions += f"<UnitTest id='{test_id}' name='{case_name}'><Execution id='{execution_id}'/></UnitTest>"
114 | 
115 |             # Create the XML block for the test entry
116 |             xml_test_entry += f"<TestEntry testId='{test_id}' executionId='{execution_id}' testListId='{test_list_id}'/>"
117 | 
118 |         # Get the global start and end times
119 |         global_start_date = Exporter.date_to_string(np.min(np.array(start_times)))
120 |         global_end_date = Exporter.date_to_string(np.max(np.array(end_times)))
121 | 
122 |         # Create the final XML string for the TRX file
123 |         xml_string = f"""<?xml version='1.0' encoding='UTF-8'?>
124 |             <TestRun xmlns='http://microsoft.com/schemas/VisualStudio/TeamTest/2010' id='{trx_id}'>
125 |                 <Times creation='{global_start_date}' queueing='{global_start_date}' start='{global_start_date}' finish='{global_end_date}' />
126 |                 <TestSettings id='{trx_id}'/>
127 |                 <Results>{xml_unit_test_result}</Results>
128 |                 <TestDefinitions>{xml_test_definitions}</TestDefinitions>
129 |                 <TestEntries>{xml_test_entry}</TestEntries>
130 |                 <TestLists><TestList id='{test_list_id}' name='All Loaded Results'/></TestLists>
131 |                 <ResultSummary outcome='Complete'>
132 |                     <Counters 
133 |                         total='{state_statistics.total}'
134 |                         executed='{state_statistics.executed}'
135 |                         passed='{state_statistics.passed}'
136 |                         failed='{state_statistics.failed}'
137 |                         error='{state_statistics.error}'
138 |                         notExecuted='{state_statistics.not_executed}' />
139 |                     <Output StdOut='' />
140 |                 </ResultSummary>
141 |             </TestRun>"""
142 | 
143 |         # Create directories if they do not exist
144 |         os.makedirs(os.path.dirname(output_file), exist_ok=True)
145 | 
146 |         # Write the XML string to the TRX file
147 |         with open(output_file, "w", encoding="UTF-8") as file:
148 |             dom_string = xml.dom.minidom.parseString(xml_string).toprettyxml()
149 |             dom_string = os.linesep.join([s for s in dom_string.splitlines() if s.strip()])
150 |             file.write(dom_string)
151 | 


--------------------------------------------------------------------------------
/src/ploosh/case.py:
--------------------------------------------------------------------------------
  1 | """Module to manage test case"""
  2 | from dataclasses import dataclass
  3 | from datetime import datetime
  4 | import numpy as np
  5 | from engines.compare_engine_native import CompareEngineNative
  6 | from engines.compare_engine_spark import CompareEngineSpark
  7 | from engines.load_engine_native import LoadEngineNative
  8 | from engines.load_engine_spark import LoadEngineSpark
  9 | 
 10 | @dataclass
 11 | class StateStatistics:
 12 |     """Statistics of test case executions"""
 13 |     not_executed = 0
 14 |     executed = 0
 15 |     passed = 0
 16 |     failed = 0
 17 |     error = 0
 18 |     total = 0
 19 | 
 20 |     def add_state(self, state):
 21 |         """Add new state to statistics"""
 22 |         if state == "passed":
 23 |             self.passed += 1
 24 |         if state == "failed":
 25 |             self.failed += 1
 26 |         if state == "error":
 27 |             self.error += 1
 28 |         if state == "notExecuted":
 29 |             self.not_executed += 1
 30 | 
 31 |         if state != "notExecuted":
 32 |             self.executed += 1
 33 | 
 34 |         self.total += 1
 35 | 
 36 | 
 37 | @dataclass
 38 | class ConnectionDescription:
 39 |     """Tuple of connection and connector"""
 40 |     connector = None
 41 |     connection = None
 42 | 
 43 |     def __init__(self, connector, connection):
 44 |         self.connector = connector
 45 |         self.connection = connection
 46 | 
 47 | 
 48 | @dataclass
 49 | class Duration:
 50 |     """Structure of duration"""
 51 |     start = None
 52 |     end = None
 53 |     duration = None
 54 | 
 55 |     def calculate_duration(self):
 56 |         """Calculate the duration between start and end date"""
 57 |         if self.end is not None:
 58 |             duration = self.end - self.start
 59 |             self.duration = duration.seconds + (duration.microseconds / 1000000)
 60 | 
 61 | 
 62 | @dataclass
 63 | class CaseItem:
 64 |     """Structure of case item (source or expected)"""
 65 |     connector = None
 66 |     connection = None
 67 |     configuration = None
 68 |     duration = None
 69 |     df_data = None
 70 |     count = 0
 71 | 
 72 |     def __init__(self, configuration, connector, connection):
 73 |         self.duration = Duration()
 74 |         self.connector = connector
 75 |         self.connection = connection
 76 |         self.configuration = configuration
 77 | 
 78 | 
 79 | class Case:
 80 |     """Test case item"""
 81 |     options = None
 82 |     source = None
 83 |     expected = None
 84 |     global_duration = None
 85 |     compare_duration = None
 86 |     state = "notExecuted"
 87 |     error_type = None
 88 |     error_message = None
 89 |     df_compare_gap = None
 90 |     disabled = None
 91 |     success_rate = 1
 92 | 
 93 |     def __init__(self, configuration, source, expected, options, disabled):
 94 |         self.source = CaseItem(configuration["source"], source.connector, source.connection)
 95 |         self.expected = CaseItem(configuration["expected"], expected.connector, expected.connection)
 96 |         self.options = options
 97 |         self.disabled = disabled
 98 |         self.global_duration = Duration()
 99 |         self.compare_duration = Duration()
100 | 
101 |     def get_insensitive_item(self, name: str, items: list) -> str:
102 |         """Get item from list case-insensitively"""
103 |         for item in items:
104 |             if name.upper().strip() == item.upper().strip():
105 |                 return item
106 |         return name
107 | 
108 |     def load_data(self, obj_type: str):
109 |         """Load data from connector"""
110 |         if obj_type == "source":
111 |             obj = self.source
112 |         else:
113 |             obj = self.expected
114 | 
115 |         obj.duration.start = datetime.now()
116 | 
117 |         if not self.source.connector.is_spark:
118 |             load_engine = LoadEngineNative(obj.configuration, self.options, obj.connection)
119 |         else:
120 |             load_engine = LoadEngineSpark(obj.configuration, self.options, obj.connection)
121 | 
122 |         # Load data from connector
123 |         obj.df_data = obj.connector.get_data(obj.configuration, obj.connection)
124 | 
125 |         # Execute load engine
126 |         obj.df_data = load_engine.execute(obj.df_data)
127 |         obj.count = load_engine.count
128 | 
129 |         obj.duration.end = datetime.now()
130 | 
131 |     def load_data_error(self, obj_type: str, message: str):
132 |         """Setup error message for data loading"""
133 |         if obj_type == "source":
134 |             obj = self.source
135 |         else:
136 |             obj = self.expected
137 | 
138 |         self.state = "error"
139 |         self.error_type = "data"
140 |         self.error_message = message
141 |         obj.duration.end = datetime.now()
142 | 
143 |     def compare_dataframes(self):
144 |         """Compare source and expected dataframe"""
145 |         self.compare_duration.start = datetime.now()
146 | 
147 |         compare_engine = CompareEngineNative(self.source.df_data, self.expected.df_data, self.options)
148 |         compare_state = compare_engine.compare()
149 | 
150 |         self.error_message = compare_engine.error_message
151 |         self.error_type = compare_engine.error_type
152 |         self.df_compare_gap = compare_engine.df_compare_gap
153 |         self.success_rate = compare_engine.success_rate
154 | 
155 |         self.compare_duration.end = datetime.now()
156 | 
157 |         if compare_state:
158 |             self.state = "passed"
159 |         else:
160 |             self.state = "failed"
161 | 
162 |     def compare_dataframes_with_spark(self, spark_session):
163 |         """Compare source and expected dataframe using Spark"""
164 |         self.compare_duration.start = datetime.now()
165 | 
166 |         compare_engine = CompareEngineSpark(self.source.df_data, self.expected.df_data, self.options)
167 |         compare_state = compare_engine.compare()
168 | 
169 |         self.error_message = compare_engine.error_message
170 |         self.error_type = compare_engine.error_type
171 |         self.df_compare_gap = compare_engine.df_compare_gap
172 |         self.success_rate = compare_engine.success_rate
173 | 
174 |         self.compare_duration.end = datetime.now()
175 | 
176 |         if compare_state:
177 |             self.state = "passed"
178 |         else:
179 |             self.state = "failed"
180 |             self.state = "failed"
181 | 
182 |     def compare_dataframes_error(self, message):
183 |         """Setup error message for compare engine"""
184 |         self.state = "Error"
185 |         self.error_type = "compare"
186 |         self.error_message = message
187 |         self.compare_duration.end = datetime.now()
188 | 
189 |     def calculate_durations(self):
190 |         """Calculate durations"""
191 |         self.source.duration.calculate_duration()
192 |         self.expected.duration.calculate_duration()
193 |         self.compare_duration.calculate_duration()
194 | 
195 |         ends = []
196 |         if self.source.duration.end is not None:
197 |             ends.append(self.source.duration.end)
198 |         if self.expected.duration.end is not None:
199 |             ends.append(self.expected.duration.end)
200 |         if self.compare_duration.end is not None:
201 |             ends.append(self.compare_duration.end)
202 | 
203 |         if len(ends) == 0:
204 |             self.global_duration.duration = 0
205 |         else:
206 |             self.global_duration.start = self.source.duration.start
207 |             self.global_duration.end = np.max(np.array(ends))
208 |         self.global_duration.calculate_duration()
209 | 


--------------------------------------------------------------------------------