├── src
    └── sequor
    │   ├── __init__.py
    │   ├── common
    │       ├── __init__.py
    │       ├── common.py
    │       ├── telemetry.py
    │       ├── data_loader.py
    │       └── executor_utils.py
    │   ├── core
    │       ├── __init__.py
    │       ├── user_error.py
    │       ├── execution_stack_entry.py
    │       ├── flow_log_entry.py
    │       ├── variable_bindings.py
    │       ├── environment.py
    │       ├── instance.py
    │       ├── registry.py
    │       ├── context.py
    │       ├── flow.py
    │       ├── job.py
    │       └── op.py
    │   ├── source
    │       ├── __init__.py
    │       ├── column.py
    │       ├── column_schema.py
    │       ├── sources
    │       │   ├── http_source.py
    │       │   ├── duckdb_source.py
    │       │   ├── sql_source.py
    │       │   ├── duckdb_connection.py
    │       │   └── sql_connection.py
    │       ├── table_address.py
    │       ├── data_type.py
    │       ├── model.py
    │       ├── source.py
    │       ├── connection.py
    │       └── row.py
    │   ├── _version.py
    │   ├── project
    │       ├── __init__.py
    │       ├── specification.py
    │       └── project.py
    │   ├── operations
    │       ├── __init__.py
    │       ├── print.py
    │       ├── registry.pybackup
    │       ├── block.py
    │       ├── transform.py
    │       ├── set_variable.py
    │       ├── run_flow.py
    │       ├── for_each.py
    │       ├── execute.py
    │       ├── if_op.py
    │       ├── migrate_schema.py
    │       └── http_request.py
    │   └── cli.py
├── release.sh
├── CODE_OF_CONDUCT.md
├── pyproject.toml
├── README.md
├── .gitignore
└── LICENSE


/src/sequor/__init__.py:
--------------------------------------------------------------------------------
1 | from ._version import __version__
2 | 


--------------------------------------------------------------------------------
/src/sequor/common/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Common utilities and shared functionality
3 | """ 


--------------------------------------------------------------------------------
/src/sequor/core/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Flow package for managing operation flows and execution
3 | """ 


--------------------------------------------------------------------------------
/src/sequor/source/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Source package for managing data sources and connections
3 | """ 


--------------------------------------------------------------------------------
/src/sequor/_version.py:
--------------------------------------------------------------------------------
1 | # This is the single source of truth for the version number
2 | __version__ = "1.2.0"


--------------------------------------------------------------------------------
/src/sequor/project/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Project package for managing project configuration and resources
3 | """ 


--------------------------------------------------------------------------------
/src/sequor/operations/__init__.py:
--------------------------------------------------------------------------------
1 | """
2 | Operations package containing all available operation implementations
3 | """ 


--------------------------------------------------------------------------------
/src/sequor/source/column.py:
--------------------------------------------------------------------------------
1 | from typing import Any
2 | 
3 | 
4 | class Column:
5 |     def __init__(self, name: str, value: Any):
6 |         self.name = name
7 |         self.value = value
8 | 
9 | 


--------------------------------------------------------------------------------
/src/sequor/core/user_error.py:
--------------------------------------------------------------------------------
1 | class UserError(Exception):
2 |     def __init__(self, message: str):
3 |         self.message = message
4 | 
5 |     def __str__(self):
6 |         return self.message
7 | 


--------------------------------------------------------------------------------
/release.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | set -e  # Exit on error
 3 | 
 4 | # Clean build artifacts
 5 | rm -rf dist/
 6 | 
 7 | # Build package
 8 | python -m build
 9 | 
10 | # Upload to PyPI
11 | python -m twine upload dist/*
12 | 
13 | echo "Release completed successfully."


--------------------------------------------------------------------------------
/src/sequor/project/specification.py:
--------------------------------------------------------------------------------
1 | from typing import Any, Dict
2 | 
3 | 
4 | class Specification:
5 |     def __init__(self, name: str, type: str, spec_def: Dict[str, Any]):
6 |         self.name = name
7 |         self.type = type
8 |         self.spec_def = spec_def
9 | 


--------------------------------------------------------------------------------
/src/sequor/source/column_schema.py:
--------------------------------------------------------------------------------
 1 | from sequor.source.data_type import DataType
 2 | 
 3 | 
 4 | class ColumnSchema:
 5 |     # do we need to add position: int
 6 |     def __init__(self, name: str, type: DataType):
 7 |         self.name = name
 8 |         self.type = type
 9 | 
10 |     
11 |     
12 | 
13 | 
14 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
1 | # Code of Conduct
2 | 
3 | This project adopts the [Contributor Covenant](https://www.contributor-covenant.org), version 2.1, available at [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html](https://www.contributor-covenant.org/version/2/1/code_of_conduct.html).
4 | 
5 | For questions or to report violations, please contact [maxim@paloaltodatabases.com](mailto:maxim@paloaltodatabases.com).
6 | 


--------------------------------------------------------------------------------
/src/sequor/source/sources/http_source.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | from sequor.source.source import Source
 3 | from sqlalchemy import create_engine, text
 4 | 
 5 | from sequor.source.sources.sql_connection import SQLConnection
 6 | from sequor.source.table_address import TableAddress
 7 | 
 8 | class HTTPSource(Source):
 9 |     def __init__(self, context: 'Context', name: str,  source_def: Dict[str, Any]):
10 |         super().__init__(context, name, source_def)
11 |     


--------------------------------------------------------------------------------
/src/sequor/core/execution_stack_entry.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import time
 3 | 
 4 | class ExecutionStackEntry:
 5 |     def __init__(self, op_title: str, flow_type_name: str, flow_name: str, flow_step_index: int, flow_step_index_name: str, parent: 'ExecutionStackEntry'):
 6 |         self.op_title = op_title
 7 |         self.flow_type_name = flow_type_name
 8 |         self.flow_name = flow_name
 9 |         self.flow_step_index = flow_step_index
10 |         self.flow_step_index_name = flow_step_index_name
11 |         self.parent = parent
12 |         self.start_time = datetime.now()
13 | 


--------------------------------------------------------------------------------
/src/sequor/core/flow_log_entry.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import logging
 3 | 
 4 | 
 5 | logger = logging.getLogger("sequor.job")
 6 | 
 7 | 
 8 | class FlowLogEntry:
 9 |     def __init__(self, message: str, start_time: datetime, end_time: datetime):
10 |         self.message = message
11 |         self.start_time = start_time
12 |         self.end_time = end_time
13 | 
14 | 
15 |     def to_dict(self) -> dict:
16 |         d = {}
17 |         d['message'] = self.message
18 |         d['start_time'] = self.start_time
19 |         d['end_time'] = self.end_time
20 |         return d
21 | 
22 |  
23 | 


--------------------------------------------------------------------------------
/src/sequor/common/common.py:
--------------------------------------------------------------------------------
 1 | import traceback
 2 | import sys
 3 | 
 4 | class Common:
 5 |     def __init__(self):
 6 |         pass
 7 | 
 8 |     @staticmethod
 9 |     def get_line_number(node, key_name):
10 |         key_lc = node.lc.key(key_name) if key_name in node and hasattr(node, 'lc') else None
11 |         return key_lc[0] + 1 if key_lc else None
12 |     
13 |     @staticmethod
14 |     def get_exception_traceback():
15 |         """Get the exception's traceback as a string."""
16 |         exc_type, exc_value, exc_traceback = sys.exc_info()
17 |         # Format the traceback exactly like Python's default exception handler
18 |         trace_lines = traceback.format_exception(exc_type, exc_value, exc_traceback)
19 |         return ''.join(trace_lines)
20 | 


--------------------------------------------------------------------------------
/src/sequor/core/variable_bindings.py:
--------------------------------------------------------------------------------
 1 | from collections import namedtuple
 2 | 
 3 | VariableEntry = namedtuple("VariableEntry", ["type", "value"])
 4 | 
 5 | class VariableBindings:
 6 |     def __init__(self):
 7 |         self._bindings = {}
 8 | 
 9 |     def set(self, name, value, var_type="text"):
10 |         self._bindings[name] = VariableEntry(type=var_type, value=value)
11 | 
12 |     def get(self, name):
13 |         binding = self._bindings.get(name)
14 |         if binding is None:
15 |             return None
16 |         else:
17 |             return binding.value
18 | 
19 |     def get_type(self, name):
20 |         binding = self._bindings.get(name)
21 |         if binding is None:
22 |             return None
23 |         else:
24 |             return binding.type
25 | 


--------------------------------------------------------------------------------
/src/sequor/source/table_address.py:
--------------------------------------------------------------------------------
 1 | from typing import Any
 2 | 
 3 | 
 4 | class TableAddress:
 5 |     def __init__(self, source_name, database_name, namespace_name, table_name, model_def: Any = None, data: list = None, write_mode: str = None):
 6 |         self.source_name = source_name
 7 |         self.database_name = database_name
 8 |         self.namespace_name = namespace_name
 9 |         self.table_name = table_name
10 |         self.model_def = model_def
11 |         self.data = data
12 |         self.write_mode = write_mode
13 |     
14 |     def clone(self):
15 |         return TableAddress(
16 |             source_name=self.source_name,
17 |             database_name=self.database_name,
18 |             namespace_name=self.namespace_name, 
19 |             table_name=self.table_name,
20 |             model_def=self.model_def,
21 |             data=self.data,
22 |             write_mode=self.write_mode
23 |         )
24 | 


--------------------------------------------------------------------------------
/src/sequor/source/data_type.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Union
 2 | 
 3 | 
 4 | class DataType:
 5 |     def __init__(self, name: str, precision: Union[int, None] = None, scale: Union[int, None] = None):
 6 |         self.name = name
 7 |         self.precision = precision
 8 |         self.scale = scale
 9 |     
10 |     @classmethod
11 |     def from_column_def(cls, col_def: Dict[str, Any]):
12 |         type_def = col_def.get("type")
13 |         if isinstance(type_def, str):
14 |             dt = {
15 |                 "name": type_def,
16 |                 "precision": 0,
17 |                 "scale": 0
18 |             }
19 |         else:
20 |             dt = {
21 |                 "name": type_def.get("name"),
22 |                 "precision": type_def.get("precision", 0), 
23 |                 "scale": type_def.get("scale", 0)
24 |             }
25 |         return cls(dt["name"], dt["precision"], dt["scale"])
26 | 
27 |     def __str__(self): 
28 |         return f"{self.name}"


--------------------------------------------------------------------------------
/src/sequor/operations/print.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja
 5 | from sequor.core.op import Op
 6 | 
 7 | 
 8 | # @Op.register('print')
 9 | class PrintOp(Op):
10 |     def __init__(self, proj, op_def: Dict[str, Any]):
11 |         super().__init__(proj, op_def)
12 | 
13 |     def get_title(self) -> str:
14 |         op_title = self.op_def.get('title')
15 |         if (op_title is not None):
16 |             title = self.name + ": " + op_title
17 |         else:
18 |             message = self.op_def.get('message')
19 |             cut_off = 50
20 |             if message is not None and len(message) > cut_off:
21 |                 message = message[:cut_off] + "..."
22 |             title = self.name + ": " + message if message else "unknown"
23 |         return title
24 | 
25 |     def run(self, context, op_options: Dict[str, Any]):
26 |         logger = logging.getLogger("sequor.ops.print")
27 |         self.op_def = render_jinja(context, self.op_def)
28 |         message = self.op_def.get('message')
29 |         logger.info(f"Message: {message}")
30 |         context.add_to_log_op_finished(logger, f"Finished")


--------------------------------------------------------------------------------
/src/sequor/operations/registry.pybackup:
--------------------------------------------------------------------------------
 1 | # import importlib
 2 | # import logging
 3 | # import os
 4 | # import pkgutil
 5 | # from pathlib import Path
 6 | # from typing import Optional
 7 | 
 8 | # logger = logging.getLogger("sequor.operations.registry")
 9 | 
10 | # def register_all_operations() -> None:
11 | #     """Automatically import all operations from the operations package"""
12 | #     operations_pkg = 'operations'
13 | #     # Get the directory containing the operations
14 | #     operations_dir = Path(__file__).parent
15 |     
16 | #     # Iterate through all .py files in the operations directory
17 | #     for module_info in pkgutil.iter_modules([str(operations_dir)]):
18 | #         # Skip __init__.py and registry.py
19 | #         if module_info.name in ['__init__', 'registry']:
20 | #             continue
21 |             
22 | #         try:
23 | #             # Import each module
24 | #             importlib.import_module(f'{operations_pkg}.{module_info.name}')
25 | #             logger.debug(f"Successfully imported operation module: {module_info.name}")
26 | #         except Exception as e:
27 | #             logger.error(f"Failed to import operation module {module_info.name}: {str(e)}")
28 | #             # Don't raise the exception - we want to continue loading other operations 


--------------------------------------------------------------------------------
/src/sequor/source/sources/duckdb_source.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | from sequor.source.source import Source
 3 | from sqlalchemy import create_engine, text
 4 | 
 5 | from sequor.source.sources.duckdb_connection import DuckDBConnection
 6 | from sequor.source.sources.sql_connection import SQLConnection
 7 | from sequor.source.sources.sql_source import SQLSource
 8 | from sequor.source.table_address import TableAddress
 9 | 
10 | class DuckDBSource(SQLSource):
11 |     """Class representing a SQL data source"""
12 |     def __init__(self, context: 'Context', name: str,  source_def: Dict[str, Any]):
13 |         super().__init__(context, name, source_def)
14 |         source_rendered_def = self.get_rendered_def()
15 |         self.connStr = source_rendered_def.get('conn_str')
16 |     
17 |     def connect(self):
18 |         return DuckDBConnection(self)
19 | 
20 |     def get_default_namespace_name(self):
21 |         return "main"
22 | 
23 |     def get_qualified_name(self, table_addr: TableAddress):
24 |         return f"{table_addr.namespace_name}.{table_addr.table_name}" if table_addr.namespace_name else table_addr.table_name
25 | 
26 |     def get_create_table_sql(self, query: str, table_addr: TableAddress) -> str:
27 |         target_table_qualified = self.get_qualified_name(table_addr)
28 |         query = f"CREATE TABLE {target_table_qualified} AS {query}"
29 |         return query


--------------------------------------------------------------------------------
/src/sequor/core/environment.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from pathlib import Path
 4 | 
 5 | import yaml
 6 | 
 7 | 
 8 | # from sequor.core.instance import Instance
 9 | from sequor.core.user_error import UserError
10 | 
11 | 
12 | 
13 | class Environment:
14 |     def __init__(self, env_name: str, home_dir):  # instance: Instance,
15 |         self.env_name = env_name
16 |         self.home_dir = home_dir
17 |         # self.instance = instance
18 | 
19 | 
20 |    
21 |     @classmethod
22 |     def create_empty(cls) -> 'Environment': # instance: Instance,
23 |         env = Environment.__new__(Environment)
24 |         env.env_name = None
25 |         # env.instance = instance
26 |         env.env_vars = {}
27 |         return env 
28 | 
29 |     def load(self):
30 |         env_file = self.home_dir / "envs" / (self.env_name + ".yaml")
31 |         if not env_file.exists():
32 |             raise UserError(f"Environment does not exist: file {env_file.resolve()} not found.")
33 |         with open(env_file, 'r') as f:
34 |             try:
35 |                 env_file_data = yaml.safe_load(f) or {}
36 |             except Exception as e:
37 |                 raise UserError(f"Error parsing environment file {env_file.resolve()}: {e}")
38 |         self.env_vars = env_file_data.get("variables", {})
39 |         
40 |     def get_variable_value(self, var_name: str):
41 |         value = self.env_vars.get(var_name)
42 |         return value
43 | 


--------------------------------------------------------------------------------
/src/sequor/source/model.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, List
 2 | from sequor.source.column_schema import ColumnSchema
 3 | from sequor.source.data_type import DataType
 4 | 
 5 | 
 6 | class Model:
 7 |     def __init__(self):
 8 |         self.columns = []
 9 | 
10 |     @classmethod
11 |     def from_columns(cls, columns: List[ColumnSchema]):
12 |         model = cls()
13 |         model.columns = columns
14 |         return model
15 | 
16 |     @classmethod
17 |     def from_model_def(cls, model_def: Dict[str, Any]):
18 |         columns_def = model_def.get("columns", [])
19 | 
20 |         columns_def_list = None
21 |         if isinstance(columns_def, dict):
22 |             # Convert compact object notation
23 |             columns_def_list = [
24 |                 {"name": name, "type": type_def}
25 |                 for name, type_def in columns_def.items()
26 |             ]
27 |         else:
28 |             columns_def_list = columns_def
29 | 
30 |         # load columns
31 |         columns = []
32 |         for col_def in columns_def_list:
33 |             name = col_def.get("name")
34 |             type = DataType.from_column_def(col_def)
35 |             columns.append(ColumnSchema(name, type))
36 | 
37 |         return Model.from_columns(columns)
38 |     
39 |     def get_column(self, name: str) -> ColumnSchema:
40 |         for column in self.columns:
41 |             if column.name == name:
42 |                 return column
43 |         return None


--------------------------------------------------------------------------------
/src/sequor/source/sources/sql_source.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | from sequor.source.source import Source
 3 | from sqlalchemy import create_engine, text
 4 | 
 5 | from sequor.source.sources.sql_connection import SQLConnection
 6 | from sequor.source.table_address import TableAddress
 7 | 
 8 | class SQLSource(Source):
 9 |     """Class representing a SQL data source"""
10 |     def __init__(self, context: 'Context', name: str,  source_def: Dict[str, Any]):
11 |         super().__init__(context, name, source_def)
12 |         source_rendered_def = self.get_rendered_def()
13 |         self.username = source_rendered_def.get('username')
14 |         self.password = source_rendered_def.get('password')
15 |         self.connStr = source_rendered_def.get('conn_str')
16 |     
17 |     def connect(self):
18 |         return SQLConnection(self)
19 | 
20 |     def get_default_namespace_name(self):
21 |         return "public"
22 | 
23 |     def get_qualified_name(self, table_addr: TableAddress):
24 |         return f"{table_addr.namespace_name}.{table_addr.table_name}" if table_addr.namespace_name else table_addr.table_name
25 |     
26 |     def quote_name(self, name: str):
27 |         return f'"{name}"'
28 | 
29 |     def get_create_table_sql(self, query: str, table_addr: TableAddress) -> str:
30 |         target_table_qualified = self.get_qualified_name(table_addr)
31 |         query = f"CREATE TABLE {target_table_qualified} AS {query}"
32 |         return query
33 |     


--------------------------------------------------------------------------------
/src/sequor/operations/block.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja
 5 | from sequor.core.context import Context
 6 | from sequor.core.flow import Flow
 7 | from sequor.core.op import Op
 8 | from sequor.source.table_address import TableAddress
 9 | 
10 | 
11 | # @Op.register('block')
12 | class BlockOp(Op):
13 |     def __init__(self, proj, op_def: Dict[str, Any]):
14 |         super().__init__(proj, op_def)
15 | 
16 |     def get_title(self) -> str:
17 |         op_title = self.op_def.get('title')
18 |         op_name_alias = self.op_def.get('op_name_alias')
19 |         op_name = self.name if op_name_alias is None else op_name_alias
20 |         if (op_title is not None):
21 |             title = op_name + ": " + op_title
22 |         else:
23 |             title = op_name
24 |         return title
25 | 
26 |     def run(self, context: Context, op_options: Dict[str, Any]):
27 |         logger = logging.getLogger("sequor.ops.block")
28 |         # self.op_def = render_jinja(context, self.op_def)
29 |         logger.info(f"Starting")
30 |         steps_def = self.op_def.get('steps')
31 |         flow = context.project.build_flow_from_block_def(steps_def)
32 |         op_name_alias = self.op_def.get('op_name_alias')
33 |         if op_name_alias is not None:
34 |             flow.type_name = op_name_alias
35 |         # new_context = context.clone()
36 |         # new_context.set_flow_info("block", None)
37 |         flow.run(context)
38 |         logger.info(f"Finished")


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["setuptools>=42", "wheel"]
 3 | build-backend = "setuptools.build_meta"
 4 | 
 5 | [project]
 6 | name = "sequor"
 7 | dynamic = ["version"]
 8 | description = "SQL-centric API integration platform"
 9 | readme = "README.md"
10 | license = {text = "Apache-2.0"}
11 | classifiers = [
12 |     "Programming Language :: Python :: 3",
13 |     "License :: OSI Approved :: Apache Software License",
14 |     "Operating System :: OS Independent"
15 | ]
16 | requires-python = ">=3.9"
17 | # psycopg2-binary 2.9.10 cannot be installed with python 3.9
18 | dependencies = [
19 |     "pyyaml>=6.0.2",
20 |     "jinja2>=3.1.6",
21 |     "ruamel.yaml>=0.18.10",
22 |     "sqlalchemy>=2.0.40",
23 |     "typer>=0.15.2",
24 |     "requests>=2.32.3",
25 |     "requests-toolbelt>=1.0.0",
26 |     "authlib>=1.5.2",
27 |     "psycopg2-binary>=2.9.0,<2.9.10", 
28 |     "duckdb-engine>=0.17.0",
29 |     "posthog>=4.0.0"
30 | ]
31 | 
32 | [tool.setuptools.dynamic]
33 | version = {attr = "sequor._version.__version__"}  # Get version from _version.py
34 | 
35 | [tool.autopep8]
36 | max_line_length = 1000
37 | 
38 | [project.urls]
39 | "Homepage" = "https://sequor.dev/"
40 | "GitHub" = "https://github.com/paloaltodatabases/sequor"
41 | "Documentation" = "https://docs.sequor.dev/"
42 | "Examples" = "https://github.com/paloaltodatabases/sequor-integrations"
43 | "Newsletter" = "https://buttondown.com/sequor"
44 | 
45 | [project.scripts]
46 | sequor = "sequor.cli:main"
47 | 
48 | [project.optional-dependencies]
49 | dev = [
50 |     "pytest>=7.3.1",
51 |     "pytest-cov>=4.1.0",
52 |     "responses>=0.23.1",
53 |     "black>=23.3.0",
54 |     "build>=0.10.0",
55 |     "twine>=4.0.2"
56 | ]


--------------------------------------------------------------------------------
/src/sequor/core/instance.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | from sequor.common import telemetry
 4 | from pathlib import Path
 5 | 
 6 | import yaml
 7 | 
 8 | from sequor.core.user_error import UserError
 9 | 
10 | 
11 | class Instance:
12 |     def __init__(self, home_dir_cli: str):
13 |         # Setting home dir of the Sequor installation
14 |         if home_dir_cli:
15 |             home_dir = Path(os.path.expanduser(home_dir_cli))
16 |         else:
17 |             # Default home dir
18 |             default_home_dir = '~/.sequor'
19 |             home_dir = Path(os.path.expanduser(default_home_dir))
20 | 
21 |         self.home_dir = home_dir
22 |         # Create home directory if it does not exist
23 |         home_dir.mkdir(parents=True, exist_ok=True)
24 | 
25 |         # Init logging
26 |         log_dir = self.home_dir / "logs"
27 |         log_dir.mkdir(parents=True, exist_ok=True)
28 |         log_path = log_dir / "sequor.log"
29 |         logging.basicConfig(
30 |             level=logging.INFO,                         # default level
31 |             format="%(asctime)s %(levelname)s [%(name)s]: %(message)s",         # format for stdout
32 |             handlers=[
33 |                 logging.StreamHandler(),                # prints to console
34 |                 logging.FileHandler(log_path)         # writes to log file
35 |             ]
36 |         )
37 | 
38 |         self.project_state_dir = self.home_dir / "project_state"
39 | 
40 |         # Set up telemetry
41 |         telemetry.basicConfig(
42 |             api_key = "phc_XBYG9x8aUaBlQGhNhRwEwJbQ9xCzWs05Cy671pzjxvs", 
43 |             host = "https://us.i.posthog.com", 
44 |             user_id_file = self.home_dir / ".sequor_user_id")
45 | 
46 | 
47 |     def get_home_dir(self) -> Path:
48 |         return self.home_dir
49 |     
50 |     def get_project_state_dir(self) -> Path:
51 |         return self.home_dir / "project_state"
52 | 


--------------------------------------------------------------------------------
/src/sequor/source/source.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from sequor.common.executor_utils import render_jinja
 4 | from sequor.core.context import Context
 5 | from sequor.source.table_address import TableAddress
 6 | 
 7 | class Source:
 8 |     def __init__(self, context: 'Context', name: str, source_def: Dict[str, Any]):
 9 |         self.context = context
10 |         self.name = name 
11 |         self.source_def = source_def
12 |         self.source_rendered_def = None
13 | 
14 |     def get_rendered_def(self):
15 |         if self.source_rendered_def is None:
16 |             # if context is None:
17 |             #     context = Context.from_project(self.project)
18 |             self.source_rendered_def = render_jinja(self.context, self.source_def)
19 |         return self.source_rendered_def
20 | 
21 |     def connect(self):
22 |         raise NotImplementedError("Subclasses must implement connect()")
23 | 
24 |     def get_qualified_name(self, table_addr: TableAddress):
25 |         raise NotImplementedError("Subclasses must implement get_qualified_name()")
26 | 
27 |     def get_default_namespace_name(self):
28 |         raise NotImplementedError("Subclasses must implement get_default_namespace_name()")
29 | 
30 |     def quote_name(self, name: str):
31 |         raise NotImplementedError("Subclasses must implement quote_name()")
32 | 
33 |     @staticmethod
34 |     def get_parameter(context, source_def: Dict[str, Any], name: str, is_required: bool = False, render: bool = False) -> Any:
35 |         param_value = source_def.get(name)
36 |         if render:
37 |             param_value = render_jinja(context, param_value)
38 |         result_value = None
39 |         if param_value:
40 |             result_value = param_value
41 |         else:
42 |             if is_required:
43 |                 raise Exception(f"{name} must be specified in source definition.")
44 |         return result_value


--------------------------------------------------------------------------------
/src/sequor/operations/transform.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja
 5 | from sequor.core.op import Op
 6 | from sequor.source.table_address import TableAddress
 7 | 
 8 | 
 9 | class TransformOp(Op):
10 |     def __init__(self, proj, op_def: Dict[str, Any]):
11 |         self.name = op_def.get('op')
12 |         self.proj = proj
13 |         self.op_def = op_def
14 | 
15 |     def get_title(self) -> str:
16 |         title = self.name
17 |         op_id = self.op_def.get('id')
18 |         if (op_id is not None):
19 |             title = self.name + ": " + op_id
20 |         elif self.op_def.get('target_table') is not None:
21 |             title = self.name + ": " + self.op_def.get('target_table')
22 |         return title
23 | 
24 |     def run(self, context, op_options: Dict[str, Any]):
25 |         logger = logging.getLogger("sequor.ops.transform")
26 |         self.op_def = render_jinja(context, self.op_def)
27 |         logger.info(f"Starting \"{self.get_title()}\"")
28 |         source_name = self.op_def.get('source')
29 |         query = self.op_def.get('query')
30 |         target_database = self.op_def.get('target_database')
31 |         target_namespace = self.op_def.get('target_namespace')
32 |         target_table = self.op_def.get('target_table')
33 |         
34 |         # Create TableAddress object from target_table string
35 |         target_table_addr = TableAddress(source_name, target_database, target_namespace, target_table)
36 | 
37 |         source = self.proj.get_source(context,source_name)
38 |         with source.connect() as conn:
39 |             conn.drop_table(target_table_addr)
40 |             createTableSql = source.get_create_table_sql(query, target_table_addr)
41 |             print(f"Executing: {createTableSql}")
42 |             conn.execute_update(createTableSql)
43 |     
44 |         # logger.info(f"Finished \"{self.get_title()}\"")
45 |         context.add_to_log_op_finished(
46 |             logger, f"Finished \"" + self.get_title() + "\"")
47 | 


--------------------------------------------------------------------------------
/src/sequor/common/telemetry.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import uuid
 4 | import posthog
 5 | from typing import Dict
 6 | 
 7 | # # Set your PostHog project API key and host (or use env vars)
 8 | # POSTHOG_API_KEY = os.getenv("SEQUOR_TELEMETRY_KEY", "<your-posthog-key>")
 9 | # POSTHOG_HOST = os.getenv("SEQUOR_TELEMETRY_HOST", "https://app.posthog.com")
10 | 
11 | # Global state
12 | _logger_registry: Dict[str, "TelemetryLogger"] = {}
13 | _user_id = None
14 | _enabled = True
15 | 
16 | class TelemetryLogger:
17 |     def __init__(self, name: str):
18 |         self.name = name
19 | 
20 |     def event(self, name: str, **props):
21 |         self._send(name, props)
22 | 
23 |     def _send(self, event_type: str, props: dict):
24 |         logger = logging.getLogger("sequor.telemetry")
25 |         if _enabled:
26 |             try:
27 |                 data = {
28 |                     "component": self.name,
29 |                     "event_type": event_type,
30 |                     **(props or {}),
31 |                 }
32 |                 logger.info(f"Before: {event_type}")
33 |                 posthog.capture(_user_id, event_type, data)
34 |                 logger.info(f"After: {event_type}")
35 |             except Exception:
36 |                 logger.info(f"Event sending failed: {event_type}")
37 |                 # pass
38 |         return
39 | 
40 | def _load_or_create_user_id(path: str):
41 |     if os.path.exists(path):
42 |         return open(path).read().strip()
43 |     uid = str(uuid.uuid4())
44 |     with open(path, 'w') as f:
45 |         f.write(uid)
46 |     return uid
47 | 
48 | def basicConfig(api_key: str, host: str, user_id_file: StopIteration, enabled: bool = True):
49 |     global _user_id, _enabled
50 |     posthog.project_api_key = api_key
51 |     posthog.host = host
52 |     _enabled = enabled
53 |     _user_id = _load_or_create_user_id(user_id_file)
54 | 
55 | def getLogger(name: str) -> TelemetryLogger:
56 |     if name not in _logger_registry:
57 |         _logger_registry[name] = TelemetryLogger(name)
58 |     return _logger_registry[name]


--------------------------------------------------------------------------------
/src/sequor/operations/set_variable.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja, set_variable, set_variable_from_def
 5 | from sequor.core.op import Op
 6 | 
 7 | 
 8 | # @Op.register('set_variable')
 9 | class SetVariableOp(Op):
10 |     def __init__(self, proj, op_def: Dict[str, Any]):
11 |         self.name = op_def.get('op')
12 |         self.proj = proj
13 |         self.op_def = op_def
14 | 
15 |     def get_title(self) -> str:
16 |         op_id = self.op_def.get('id')
17 |         if (op_id is not None):
18 |             title = self.name + ": " + op_id
19 |         else:
20 |             title = self.name
21 |         return title
22 | 
23 |     def run(self, context, op_options: Dict[str, Any]):
24 |         logger = logging.getLogger("sequor.ops.set_variable")
25 |         self.op_def = render_jinja(context, self.op_def)
26 |         # var_name = Op.get_parameter(context, self.op_def, 'name', is_required=True)
27 |         # logger.info(f"Setting variable: {var_name}")
28 |         # var_value = Op.get_parameter(context, self.op_def, 'value', is_required=True)
29 |         # var_scope = Op.get_parameter(context, self.op_def, 'scope', is_required=False)
30 |         # if var_scope is None:
31 |         #     var_scope = "project"
32 |         # set_variable(context, var_name, var_value, var_scope)
33 |         # msg = f"Finished. Variable \"{var_name}\" set in scope \"{var_scope}\" to: {var_value}"
34 | 
35 |         set_def = Op.get_parameter(context, self.op_def, 'set', is_required=True, render=0, location_desc="set_variable")
36 |         vars_set = []
37 |         for var_name, var_value in set_def.items():
38 |             var_value_set, var_scope_set = set_variable_from_def(context, var_name, var_value)
39 |             var_set_str = f"\"{var_name}\" to \"{var_value_set}\" in {var_scope_set} scope"
40 |             logger.info(f"Setting variable: " + var_set_str)
41 |             vars_set.append(var_set_str)
42 |         # msg = f"Finished. Variables set: " + ", ".join(vars_set)
43 |         # context.add_to_log_op_finished(logger, msg)
44 |         context.add_to_log_op_finished(
45 |             logger, f"Finished \"" + self.get_title() + "\": variables set: " + ", ".join(vars_set))
46 | 


--------------------------------------------------------------------------------
/src/sequor/operations/run_flow.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from sequor.core.context import Context
 3 | from typing import Any, Dict
 4 | 
 5 | from sequor.common.executor_utils import render_jinja
 6 | from sequor.core.flow import Flow
 7 | from sequor.core.op import Op
 8 | from sequor.core.variable_bindings import VariableBindings
 9 | from sequor.source.table_address import TableAddress
10 | 
11 | 
12 | # @Op.register('run_flow')
13 | class RunFlowOp(Op):
14 |     def __init__(self, proj, op_def: Dict[str, Any]):
15 |         self.name = op_def.get('op')
16 |         self.proj = proj
17 |         self.op_def = op_def
18 | 
19 |     def get_title(self) -> str:
20 |         op_title = self.op_def.get('title')
21 |         if (op_title is not None):
22 |             title = self.name + ": " + op_title
23 |         else:
24 |             title = self.name + ": " + self.op_def.get('flow') if self.op_def.get('flow') else "unknown"
25 |         return title
26 | 
27 |     def run(self, context: Context, op_options: Dict[str, Any]):
28 |         logger = logging.getLogger("sequor.ops.run_flow")
29 |         self.op_def = render_jinja(context, self.op_def)
30 |         flow_name = self.op_def.get('flow')
31 |         logger.info(f"Starting flow: {flow_name}")
32 | 
33 |         start_step = self.op_def.get('start_step')
34 |         # Safely cast start_step to int with error handling
35 |         try:
36 |             start_step_int = int(start_step) if start_step is not None else 0
37 |             if start_step_int < 0:
38 |                 raise ValueError(f"start_step cannot be negative: {start_step}")
39 |         except (TypeError, ValueError) as e:
40 |             raise ValueError(f"Invalid start_step value '{start_step}'. Must be a non-negative integer: {str(e)}")
41 |         
42 |         parameters_def = self.op_def.get('parameters', {})
43 |         # Clone the context to avoid mutating the original context
44 |         new_context = context.clone()
45 |         # Load parameters into a new variable bindings
46 |         flow_parameters_bindings = VariableBindings()
47 |         for param_name, param_value in parameters_def.items():
48 |             flow_parameters_bindings.set(param_name, param_value)
49 |         # The new context will only have the flow parameters
50 |         # i.e. We do not pass local variables from the current context
51 |         new_context.set_variables(flow_parameters_bindings)
52 |         
53 |         flow = self.proj.get_flow(flow_name)
54 |         flow.run(new_context, start_step_int)
55 |         
56 |         logger.info(f"Finished flow: {flow_name}")


--------------------------------------------------------------------------------
/src/sequor/source/connection.py:
--------------------------------------------------------------------------------
 1 | from typing import Union
 2 | from sequor.source.data_type import DataType
 3 | from sequor.source.model import Model
 4 | from sequor.source.row import Row
 5 | from sequor.source.source import Source
 6 | from sequor.source.table_address import TableAddress
 7 | 
 8 | class Connection:
 9 |     """Class representing a source connection"""
10 |     def __init__(self, source: Source):
11 |         self.source = source 
12 |         self.model: Model | None = None # used by DataLoader to store schema as it opens conn per table
13 | 
14 |     def get_model(self, table_addr: TableAddress):
15 |         raise NotImplementedError("Subclasses must implement get_model()")
16 | 
17 |     def drop_table(self, table_addr: TableAddress, only_if_exists: bool = True):
18 |         raise NotImplementedError("Subclasses must implement drop_table_if_exists()")
19 |     def create_table(self, table_addr: TableAddress):
20 |         raise NotImplementedError("Subclasses must implement create_table()")
21 |     def add_column(self, table_addr: TableAddress, column_name: str, column_type: DataType):
22 |         raise NotImplementedError("Subclasses must implement add_columns()")
23 |     def drop_column(self, table_addr: TableAddress, column_name: str):
24 |         raise NotImplementedError("Subclasses must implement drop_columns()")
25 |     
26 |     def execute_update(self, query: str):
27 |         raise NotImplementedError("Subclasses must implement execute_update()")
28 |     
29 |     def open_table_for_insert(self, table_addr: TableAddress, model: Union[Model, None] = None):
30 |         raise NotImplementedError("Subclasses must implement open_table_for_insert()")
31 |     def insert_row(self, row: Row):
32 |         raise NotImplementedError("Subclasses must implement insert_record()")
33 |     def close_table_for_insert(self):
34 |         raise NotImplementedError("Subclasses must implement close_table_for_insert()")
35 |     
36 |     def open_table_for_read(self, table_addr: TableAddress):
37 |         raise NotImplementedError("Subclasses must implement open_table_for_read()")
38 |     def open_query(self, table_addr: TableAddress, query: str):
39 |         raise NotImplementedError("Subclasses must implement open_query()")
40 |     def next_row(self):
41 |         raise NotImplementedError("Subclasses must implement next_row()")
42 |     def close_query(self):
43 |         raise NotImplementedError("Subclasses must implement close_query()")    
44 |     def close_table_for_read(self):
45 |         raise NotImplementedError("Subclasses must implement close_table_for_read()")
46 | 


--------------------------------------------------------------------------------
/src/sequor/operations/for_each.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.core.context import Context
 5 | from sequor.core.flow import Flow
 6 | from sequor.core.op import Op
 7 | from sequor.core.registry import create_op
 8 | from sequor.source.table_address import TableAddress
 9 | 
10 | 
11 | # @Op.register('for_each')
12 | class ForEachOp(Op):
13 |     def __init__(self, proj, op_def: Dict[str, Any]):
14 |         super().__init__(proj, op_def)
15 | 
16 |     def get_title(self) -> str:
17 |         op_title = self.op_def.get('title')
18 |         if (op_title is not None):
19 |             title = self.name + ": " + op_title
20 |         else:
21 |             title = self.name
22 |         return title
23 | 
24 |     def run(self, context: Context, op_options: Dict[str, Any]):
25 |         logger = logging.getLogger("sequor.ops.for_each")
26 |         # in "control statement" type of op we cannot render the whole op_def as it contains other ops
27 |         # for which context is not available yet -> we will render each parameter individually
28 |         # self.op_def = render_jinja(context, self.op_def)
29 |         logger.info(f"Starting")
30 |         source_name = Op.get_parameter(context, self.op_def, 'source', is_required=True, render=3)
31 |         database_name = Op.get_parameter(context, self.op_def, 'database', is_required=False, render=3)
32 |         namespace_name = Op.get_parameter(context, self.op_def, 'namespace', is_required=False, render=3)
33 |         table_name= Op.get_parameter(context, self.op_def, 'table', is_required=True, render=3)
34 |         table_address = TableAddress(source_name, database_name, namespace_name, table_name)
35 |         var_name= Op.get_parameter(context, self.op_def, 'as', is_required=True, render=3)
36 | 
37 |         steps_def = self.op_def.get('steps')
38 |         block_op_def = {
39 |             "op": "block",
40 |             "op_name_alias": f"for_each_block",
41 |             "steps": steps_def
42 |         }
43 |         block_op = create_op(context.project, block_op_def)
44 |         new_context = context.clone()
45 |         new_context.set_flow_info("for_each", None)
46 |         new_context.set_flow_step_info(None)
47 | 
48 |         row_count = 0
49 |         self.source = self.proj.get_source(context,source_name)
50 |         with self.source.connect() as conn:
51 |             conn.open_table_for_read(table_address)
52 |             row = conn.next_row()
53 |             while row is not None:
54 |                 row_count += 1
55 |                 new_context.set_variable(var_name, row)
56 |                 context.job.run_op(new_context, block_op, None)
57 |                 row = conn.next_row()
58 | 
59 | 
60 |         logger.info(f"Finished. Processed {row_count} rows")


--------------------------------------------------------------------------------
/src/sequor/operations/execute.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja
 5 | from sequor.core.op import Op
 6 | from sequor.core.user_error import UserError
 7 | from sequor.source.table_address import TableAddress
 8 | 
 9 | 
10 | # @Op.register('execute')
11 | class ExecuteOp(Op):
12 |     def __init__(self, proj, op_def: Dict[str, Any]):
13 |         self.name = op_def.get('op')
14 |         self.proj = proj
15 |         self.op_def = op_def
16 | 
17 |     def get_title(self) -> str:
18 |         op_id = self.op_def.get('id')
19 | 
20 |         if op_id is not None:
21 |             title = self.name + ": " + op_id
22 |         else:
23 |             title = "unknown"
24 |         return title
25 | 
26 |     def run(self, context, op_options: Dict[str, Any]):
27 |         logger = logging.getLogger("sequor.ops.transform")
28 |         self.op_def = render_jinja(context, self.op_def)
29 |         logger.info(f"Starting \"{self.get_title()}\"")
30 |         source_name = Op.get_parameter(context, self.op_def, 'source', is_required=True)
31 |         source_name = Op.get_parameter(context, self.op_def, 'source', is_required=True)
32 | 
33 |         script = self.op_def.get('statement')
34 |         if not script:
35 |             raise UserError("The 'statement' parameter is required and cannot be empty.")
36 | 
37 |         # Split the script into individual statements using 'go' as a separator
38 |         statements = []
39 |         current_statement = []
40 | 
41 |         for line in script.splitlines():
42 |             if line.strip().lower() == "go":
43 |                 if current_statement:
44 |                     statements.append("\n".join(current_statement).strip())
45 |                     current_statement = []
46 |             else:
47 |                 current_statement.append(line)
48 | 
49 |         # Add the last statement if it exists
50 |         if current_statement:
51 |             statements.append("\n".join(current_statement).strip())
52 | 
53 |         # Check if the last statement is followed by a 'go' command
54 |         if script.strip().splitlines()[-1].strip().lower() != "go":
55 |             raise UserError(
56 |                 "Missing 'go' command after the last statement. Each statement must be followed by a 'go' command on its own line."
57 |             )
58 |     
59 |         source = self.proj.get_source(context, source_name)
60 |         with source.connect() as conn:
61 |             for stmt in statements:
62 |                 if stmt.strip() == "": # Skip emty statements (muliple go comands in a row) and the empty statement after the last go
63 |                     continue
64 |                 logger.info(f"Executing statement: {stmt}")
65 |                 conn.execute_update(stmt)
66 |     
67 |         logger.info(f"Finished \"{self.get_title()}\"")        


--------------------------------------------------------------------------------
/src/sequor/core/registry.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict
 2 | 
 3 | from sequor.core.op import Op
 4 | from sequor.core.user_error import UserError
 5 | 
 6 | from sequor.source.source import Source
 7 | from sequor.source.sources.duckdb_source import DuckDBSource
 8 | from sequor.source.sources.http_source import HTTPSource
 9 | from sequor.source.sources.sql_source import SQLSource
10 | 
11 | def create_source(context: 'Context', source_name: str, source_def: Dict[str, Any]) -> Any:
12 |     source: Source = None
13 |     source_type = source_def.get('type')
14 |     if source_type == 'http':
15 |         source = HTTPSource(context, source_name, source_def)
16 |     elif source_type == 'postgres':
17 |         source = SQLSource(context, source_name, source_def)
18 |     elif source_type == 'duckdb':
19 |         source = DuckDBSource(context, source_name, source_def)
20 |     else:
21 |         raise ValueError(f"Unknown source type: {source_type}")
22 |     return source
23 | 
24 | # @classmethod
25 | # def create(cls, proj, op_def: Dict[str, Any]) -> 'Op':
26 | 
27 | 
28 | def create_op(proj, op_def: Dict[str, Any]) -> 'Op':
29 |     """Factory method to create operation instances"""
30 |     op_type = op_def.get('op')
31 |     op: Op = None
32 |     if op_type == "http_request":
33 |         from sequor.operations.http_request import HTTPRequestOp
34 |         op = HTTPRequestOp(proj, op_def)
35 |     elif op_type == "transform":
36 |         from sequor.operations.transform import TransformOp
37 |         op = TransformOp(proj, op_def)
38 |     elif op_type == "execute":
39 |         from sequor.operations.execute import ExecuteOp
40 |         op = ExecuteOp(proj, op_def)
41 |     elif op_type == "run_flow":
42 |         from sequor.operations.run_flow import RunFlowOp
43 |         op = RunFlowOp(proj, op_def)
44 |     elif op_type == "set_variable":
45 |         from sequor.operations.set_variable import SetVariableOp
46 |         op = SetVariableOp(proj, op_def)
47 |     elif op_type == "print":
48 |         from sequor.operations.print import PrintOp
49 |         op = PrintOp(proj, op_def)
50 |     elif op_type == "if":
51 |         from sequor.operations.if_op import IfOp
52 |         op = IfOp(proj, op_def)
53 |     elif op_type == "for_each":
54 |         from sequor.operations.for_each import ForEachOp
55 |         op = ForEachOp(proj, op_def)
56 |     elif op_type == "block":
57 |         from sequor.operations.block import BlockOp
58 |         op = BlockOp(proj, op_def)
59 |     elif op_type == "migrate_schema":
60 |         from sequor.operations.migrate_schema import MigrateSchemaOp
61 |         op = MigrateSchemaOp(proj, op_def)
62 |     else:
63 |         raise UserError(f"Unknown operation: {op_type}")
64 |     # if op_type not in cls._registry:
65 |     #     raise ValueError(f"Unknown operation type: {op_type}")
66 |     # return cls._registry[op_type](proj, op_def)
67 |     return op
68 | 
69 | 
70 | 
71 | 
72 | 
73 | 
74 | 
75 | 
76 | 


--------------------------------------------------------------------------------
/src/sequor/core/context.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import logging
 3 | from typing import Any, TYPE_CHECKING
 4 | from sequor.core.flow_log_entry import FlowLogEntry
 5 | from sequor.core.variable_bindings import VariableBindings
 6 | 
 7 | # if TYPE_CHECKING:
 8 | #     from sequor.core.job import Job
 9 | #     from sequor.project.project import Project
10 | 
11 | 
12 | class Context:
13 |     def __init__(self, env: 'Environment', project: 'Project', job: 'Job'):
14 |         self.env = env
15 |         self.project = project
16 |         self.cur_execution_stack_entry = None
17 |         self.job = job
18 |         self.variables = VariableBindings()
19 |         self.flow_log = []
20 | 
21 | 
22 |         # flow that is currently executing:
23 |         self.flow_type_name = None # can be: flow, if (for IfOp), None (for ForEachOp, WhileOp)
24 |         self.flow_name = None
25 |         self.flow_step_index = None
26 |         self.flow_step_index_name = None
27 |     
28 |     # Used only for accessing project and env variables - not for execution
29 |     @classmethod
30 |     def from_project(cls, project: 'Project'):
31 |         return cls(project, None)
32 | 
33 |     def clone(self):
34 |         new_context = Context(self.env, self.project, self.job)
35 |         new_context.variables = self.variables
36 |         new_context.cur_execution_stack_entry = self.cur_execution_stack_entry
37 |         new_context.flow_type_name = self.flow_type_name
38 |         new_context.flow_name = self.flow_name
39 |         new_context.flow_step_index = self.flow_step_index
40 |         new_context.flow_step_index_name = self.flow_step_index_name
41 |         new_context.flow_log = self.flow_log
42 |         return new_context
43 |     
44 |     def set_variables(self, variables: VariableBindings):
45 |         self.variables = variables
46 | 
47 |     def set_variable(self, name: str, value: Any):
48 |         self.variables.set(name, value)
49 |     
50 |     def get_variable_value(self, name: str):
51 |         value = self.variables.get(name)
52 |         if value is None:
53 |             value = self.project.get_variable_value(name)
54 |         if value is None:
55 |             value = self.env.get_variable_value(name)
56 |         return value
57 |     
58 |     def set_flow_info(self, flow_type_name: str, flow_name: str):
59 |         self.flow_type_name = flow_type_name
60 |         self.flow_name = flow_name
61 |     
62 |     def set_flow_step_info(self, index: int, index_name: str = None ):
63 |         self.flow_step_index = index
64 |         self.flow_step_index_name = index_name
65 | 
66 |     
67 |     def add_to_log_op_finished(self, logger: logging.Logger, message: str):
68 |         start_time = self.cur_execution_stack_entry.start_time
69 |         end_time = datetime.now()
70 |         duration = end_time - start_time
71 |         self.flow_log.append(FlowLogEntry(message, start_time, end_time))
72 |         logger.info(f"{message} {duration}")
73 | 
74 |     
75 | 


--------------------------------------------------------------------------------
/src/sequor/source/row.py:
--------------------------------------------------------------------------------
 1 | from typing import List
 2 | 
 3 | from sequor.core.user_error import UserError
 4 | from .column import Column
 5 | 
 6 | 
 7 | class Row:
 8 |     def __init__(self):
 9 |         self.columns: list[Column] = []
10 | 
11 |     @staticmethod
12 |     def from_dict(data: dict) -> 'Row':
13 |         row = Row()
14 |         for name, value in data.items():
15 |             row.add_column(Column(name, value))
16 |         return row
17 |     
18 |     def to_dict(self) -> dict:
19 |         return {column.name: column.value for column in self.columns}
20 | 
21 |     def add_column(self, column: Column):
22 |         self.columns.append(column)
23 | 
24 |     def get_column(self, name: str) -> Column:
25 |         for column in self.columns:
26 |             if column.name == name:
27 |                 return column
28 |         raise UserError(f"Column '{name}' does not exist")
29 | 
30 |     def remove_column(self, name: str) -> bool:
31 |         for i, column in enumerate(self.columns):
32 |             if column.name == name:
33 |                 self.columns.pop(i)
34 |                 return True
35 |         return False
36 | 
37 |     # ------------ dict-style access method: beginning ------------
38 |     def __getitem__(self, key: str):
39 |         """Access column value by name (str) or index (int)"""
40 |         if isinstance(key, str):
41 |             column = self.get_column(key)
42 |             if column is None:
43 |                 raise KeyError(f"Column '{key}' does not exist")
44 |             return column.value
45 |         elif isinstance(key, int):
46 |             return self.columns[key].value
47 |         else:
48 |             raise UserError(f"Key must be string or integer, not {type(key).__name__}")
49 | 
50 |     def get(self, key: str, default=None):
51 |         column = self.get_column(key)
52 |         return column.value if column.value is not None else default
53 | 
54 | 
55 |     def __setitem__(self, key: str, value):
56 |         column = self.get_column(key)
57 |         if column is None:
58 |             self.add_column(Column(key, value))
59 |         else:
60 |             column.value = value
61 |     def __iter__(self):
62 |         """Make Row iterable (iterates through column names)"""
63 |         return (col.name for col in self.columns)
64 |     
65 |     def __len__(self):
66 |         """Return number of columns"""
67 |         return len(self.columns)
68 |     
69 |     def keys(self):
70 |         """Return column names"""
71 |         return (col.name for col in self.columns)
72 |     
73 |     def values(self):
74 |         """Return column values"""
75 |         return (col.value for col in self.columns)
76 |     
77 |     def items(self):
78 |         """Return (name, value) pairs"""
79 |         return ((col.name, col.value) for col in self.columns)
80 |     
81 |     def __contains__(self, key):
82 |         """Support for 'in' operator"""
83 |         if isinstance(key, str):
84 |             return any(col.name == key for col in self.columns)
85 |         return False
86 |     # ------------ dict-style access method: end ------------
87 | 
88 | 


--------------------------------------------------------------------------------
/src/sequor/operations/if_op.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja
 5 | from sequor.core.context import Context
 6 | from sequor.core.flow import Flow
 7 | from sequor.core.op import Op
 8 | from sequor.core.registry import create_op
 9 | 
10 | 
11 | 
12 | # @Op.register('if')
13 | class IfOp(Op):
14 |     def __init__(self, proj, op_def: Dict[str, Any]):
15 |         super().__init__(proj, op_def)
16 | 
17 |     def get_title(self) -> str:
18 |         title = self.name
19 |         op_id = self.op_def.get('id')
20 |         if (op_id is not None):
21 |             title = self.name + ": " + op_id
22 |         return title
23 | 
24 |     def run(self, context: Context, op_options: Dict[str, Any]):
25 |         logger = logging.getLogger("sequor.ops.if")
26 |         # in "control statement" type of op we cannot render the whole op_def as it contains other ops
27 |         # for which context is not available yet -> we will render each parameter individually
28 |         # self.op_def = render_jinja(context, self.op_def)
29 |         logger.info(f"Starting \"{self.get_title()}\"")
30 |         conditions_def = self.op_def.get('conditions')
31 |         block_op = None
32 |         is_condition_met = False
33 |         condition_met_index = None
34 |         for index, conditions_def in enumerate(conditions_def):
35 |             condition_value_def = conditions_def.get("condition")
36 |             condition = Op.get_parameter(context, conditions_def, 'condition', is_required=True, render=3)
37 |             condition = Op.eval_parameter(context, condition, "condition", render=0, location_desc=None, extra_params=[])
38 |             then_steps_def = conditions_def.get('then')
39 |             if str(condition).strip().lower() == "true":
40 |                 block_op_def = {
41 |                     "op": "block",
42 |                     "op_name_alias": f"condition_block",
43 |                     "title": f"{condition_value_def}",
44 |                     "steps": then_steps_def
45 |                 }
46 |                 block_op = create_op(context.project, block_op_def)
47 |                 # flow = context.project.build_flow_from_block_def("then", None, then_block)
48 |                 is_condition_met = True
49 |                 condition_met_index = index
50 |                 break
51 |         if not is_condition_met:
52 |             else_steps_def = self.op_def.get('else')
53 |             # flow = context.project.build_flow_from_block_def("else", None, else_steps_def)
54 |             block_op_def = {
55 |                 "op": "block",
56 |                 "op_name_alias": f"else_block",
57 |                 "steps": else_steps_def
58 |             }
59 |             block_op = create_op(context.project, block_op_def)
60 |         # flow.run(context)
61 |         new_context = context.clone()
62 |         new_context.set_flow_info("if", None)
63 |         if is_condition_met:
64 |             new_context.set_flow_step_info(condition_met_index, "condition")
65 |         else:
66 |             new_context.set_flow_step_info(None)
67 |         context.job.run_op(new_context, block_op, None)
68 |         # logger.info(f"Finished")
69 |         context.add_to_log_op_finished(
70 |             logger, f"Finished \"" + self.get_title() + "\"")


--------------------------------------------------------------------------------
/src/sequor/operations/migrate_schema.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict
 3 | 
 4 | from sequor.common.executor_utils import render_jinja
 5 | from sequor.core.op import Op
 6 | from sequor.source.column_schema import ColumnSchema
 7 | from sequor.source.data_type import DataType
 8 | from sequor.source.model import Model
 9 | from sequor.source.table_address import TableAddress
10 | 
11 | 
12 | class MigrateSchemaOp(Op):
13 |     def __init__(self, proj, op_def: Dict[str, Any]):
14 |         self.name = op_def.get('op')
15 |         self.proj = proj
16 |         self.op_def = op_def
17 | 
18 |     def get_title(self) -> str:
19 |         title = self.name
20 |         op_id = self.op_def.get('id')
21 |         if (op_id is not None):
22 |             title = self.name + ": " + op_id
23 |         elif self.op_def.get('target_table') is not None:
24 |             title = self.name + ": " + self.op_def.get('target_table')
25 |         return title
26 | 
27 |     def run(self, context, op_options: Dict[str, Any]):
28 |         logger = logging.getLogger("sequor.ops.migrate_schema")
29 |         self.op_def = render_jinja(context, self.op_def)
30 |         logger.info(f"Starting \"{self.get_title()}\"")
31 |         target_source_name = self.op_def.get('target_source')
32 |         target_database_name = self.op_def.get('target_database')
33 |         target_namespace_name = self.op_def.get('target_namespace')
34 |         target_table_name = self.op_def.get('target_table')
35 |         columns_source_name = self.op_def.get('columns_source')
36 |         columns_database_name = self.op_def.get('columns_database')
37 |         columns_namespace_name = self.op_def.get('columns_namespace')
38 |         columns_table_name = self.op_def.get('columns_table')
39 | 
40 |         target_table_addr = TableAddress(target_source_name, target_database_name, target_namespace_name, target_table_name)
41 |         columns_table_addr = TableAddress(columns_source_name, columns_database_name, columns_namespace_name, columns_table_name)
42 | 
43 |         columns_model = None
44 |         columns_source = self.proj.get_source(context, columns_source_name)
45 |         with columns_source.connect() as columns_conn:
46 |             columns_conn.open_table_for_read(columns_table_addr)
47 |             columns_table_count = 0
48 |             column_row = columns_conn.next_row()
49 |             column_schemas = []
50 |             while column_row is not None:
51 |                 columns_table_count += 1
52 |                 column_schema = ColumnSchema(column_row.get('name'), DataType(column_row.get('type')))
53 |                 column_schemas.append(column_schema)
54 |                 column_row=columns_conn.next_row()
55 |             columns_model = Model.from_columns(column_schemas)
56 | 
57 |         target_source = self.proj.get_source(context, target_source_name)
58 |         with target_source.connect() as target_conn:
59 |             target_model = target_conn.get_model(target_table_addr)
60 |             # add new columns
61 |             for columns_column in columns_model.columns:
62 |                 target_column = target_model.get_column(columns_column.name)
63 |                 if target_column is None:
64 |                     target_conn.add_column(target_table_addr, columns_column.name, columns_column.type)
65 |             # drop removed columns
66 |             for target_column in target_model.columns:
67 |                 columns_column = columns_model.get_column(target_column.name)
68 |                 if columns_column is None:
69 |                     target_conn.drop_column(target_table_addr, target_column.name)
70 | 
71 |         context.add_to_log_op_finished(
72 |             logger, f"Finished \"" + self.get_title() + "\"")
73 | 


--------------------------------------------------------------------------------
/src/sequor/core/flow.py:
--------------------------------------------------------------------------------
 1 | from sequor.core.context import Context
 2 | from typing import Any, Dict, List, Optional, Union
 3 | from sequor.core.op import Op
 4 | from sequor.core.user_error import UserError
 5 | 
 6 | 
 7 | class Flow:
 8 |     """A flow containing a sequence of operations or nested flows"""
 9 |     def __init__(self, type_name: str, name: str = None, description: Optional[str] = None):
10 |         self.type_name = type_name
11 |         self.name = name
12 |         self.description = description
13 |         self.steps: List[Op] = []
14 | 
15 |     def add_step(self, step: Op) -> None:
16 |         """Add an operation or a nested flow to this flow"""
17 |         self.steps.append(step)
18 |     
19 |     def run(self, context: Context, start_step: int = 0, op_options: Dict[str, Any] = {}):
20 |         """Execute all steps in the flow sequentially"""
21 |         context.set_flow_info(self.type_name, self.name)
22 |         for op_index, op in enumerate(self.steps[start_step:], start=start_step):
23 |             # op.run(context)
24 |             context.set_flow_step_info(op_index)
25 |             context.job.run_op(context, op, op_options)
26 |     
27 |     # def get_child_blocks(self) -> List[Dict[str, List['Op']]]:
28 |     #     for step in self.steps:
29 |     #         op = Op.create(self.proj, step)
30 | 
31 |     def get_op_by_id(self, op_id: str) -> List[tuple['Op', List[tuple[str, int]]]]:
32 |         start = [{'root': self.steps}]
33 |         ops_found = self.find_op_by_id_helper(start, op_id)
34 |         if len(ops_found) == 0:
35 |             raise UserError(f"Operation with ID {op_id} not found in flow {self.name}")
36 |         elif len(ops_found) > 1:
37 |             # Build a string of all paths where the operation was found
38 |             paths = []
39 |             for op, path in ops_found:
40 |                 path_str = " -> ".join([f"{block_name}[{index}]" for block_name, index in path])
41 |                 paths.append(path_str)
42 |             
43 |             # Join all paths with commas for the error message
44 |             paths_str = ", ".join(paths)
45 |             raise UserError(f"Multiple operations with ID {op_id} found in flow {self.name}: {paths_str}")
46 |         return ops_found[0][0]
47 | 
48 |     def find_op_by_id_helper(self,blocks: List[Dict[str, List['Op']]], op_id: str) -> List[tuple['Op', List[tuple[str, int]]]]:
49 |         """Recursively find all operations with the given ID and their paths.
50 |         
51 |         Args:
52 |             blocks: List of dictionaries where each dictionary contains a block name and list of operations
53 |             op_id: The ID to search for
54 |             
55 |         Returns:
56 |             List of tuples containing (operation, path) where path is a list of (block_name, index) tuples
57 |         """
58 |         results = []
59 |         
60 |         for block in blocks:
61 |             for block_name, ops in block.items():
62 |                 for index, op in enumerate(ops):
63 |                     # Check if current op matches the ID
64 |                     if op.get_id() == op_id:
65 |                         results.append((op, [(block_name, index)]))
66 |                     
67 |                     # Recursively search in child blocks
68 |                     child_blocks = op.get_child_blocks()
69 |                     if child_blocks:
70 |                         child_results = self.find_op_by_id_helper(child_blocks, op_id)
71 |                         for child_op, child_path in child_results:
72 |                             # Prepend current block and index to the path
73 |                             full_path = [(block_name, index)] + child_path
74 |                             results.append((child_op, full_path))
75 |         
76 |         return results
77 |         
78 |                 


--------------------------------------------------------------------------------
/src/sequor/core/job.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Any, Dict, List
 3 | from sequor.common.common import Common
 4 | from sequor.core.context import Context
 5 | from sequor.core.environment import Environment
 6 | from sequor.core.execution_stack_entry import ExecutionStackEntry
 7 | from sequor.core.op import Op
 8 | from sequor.core.user_error import UserError
 9 | from sequor.project.project import Project
10 | import uuid
11 | 
12 | logger = logging.getLogger("sequor.job")
13 | 
14 | 
15 | class Job:
16 |     def __init__(self, env: Environment, project: Project, op: Op, options: dict):
17 |         self.env = env
18 |         self.project = project
19 |         self.op = op
20 |         self.execution_stack = []
21 |         self.options = options
22 | 
23 | 
24 |     def get_cur_stack_entry(self) -> ExecutionStackEntry:
25 |         if len(self.execution_stack) == 0:
26 |             return None
27 |         return self.execution_stack[-1]
28 | 
29 |     # logger: logging.Logger,
30 |     def run(self, op_options: Dict[str, Any]):
31 |         context = Context(self.env, self.project, self)
32 |         try:
33 |             self.run_op(context, self.op, op_options)
34 |         except Exception as e:
35 |             cur_stack_entry = self.get_cur_stack_entry()
36 | 
37 |             # Build job stacktrace lines
38 |             job_stacktrace_lines = []
39 |             for i, entry in enumerate(self.execution_stack):
40 |                 # Generate indentation based on stack depth
41 |                 indent = " " * (i * 2)
42 |                 location = None
43 |                 # flow_type_name is None in ops with single block such as ForEachOp
44 |                 # flow_name is None in the initial op of a job
45 |                 if entry.flow_type_name is None:
46 |                     location = ""
47 |                 else:
48 |                     if entry.flow_name is None:
49 |                         flow_name_str = ""
50 |                     else:
51 |                         flow_name_str = f" \"{entry.flow_name}\""
52 |                     if entry.flow_step_index is None:
53 |                         index_str = ""
54 |                     else:
55 |                         index_name = "step" if entry.flow_step_index_name is None else entry.flow_step_index_name
56 |                         index_str = f"{index_name} {entry.flow_step_index + 1} "
57 |                     location = f" [{index_str}in {entry.flow_type_name}{flow_name_str}]"
58 |                 log_str = f"{indent}{'-> ' if i > 0 else ''}\"{entry.op_title}\"{location}"
59 |                 job_stacktrace_lines.append(log_str)
60 | 
61 |             job_stacktrace = Common.get_exception_traceback()
62 |             if self.options.get("show_stacktrace"):
63 |                 logger.error("Python stacktrace:\n" + job_stacktrace)
64 |             # cur_stack_entry can be None if the error happens in the initial op of a job: e.g. in get_title() of an op during stack_entry creation
65 |             if cur_stack_entry is not None:
66 |                 error_msg = f"Error in \"{cur_stack_entry.op_title}\": {str(e)}"
67 |             else:
68 |                 error_msg = f"Error: {str(e)}"
69 |             if self.options.get("disable_flow_stacktrace") is not None and not self.options["disable_flow_stacktrace"]:
70 |                 error_msg = error_msg + "\nStacktrace (most recent op last):\n" + "\n".join(job_stacktrace_lines)
71 |             logger.error(error_msg)
72 |         flow_log_dict = [entry.to_dict() for entry in context.flow_log]
73 |         return {"flow_log": flow_log_dict}
74 | 
75 | 
76 | 
77 | 
78 |     def run_op(self, context: Context, op: Op, op_options: Dict[str, Any]):
79 |         prev_execution_stack_entry = context.cur_execution_stack_entry
80 |         stack_entry = ExecutionStackEntry(op.get_title(), context.flow_type_name, context.flow_name, context.flow_step_index, context.flow_step_index_name, prev_execution_stack_entry)
81 |         self.execution_stack.append(stack_entry)
82 |         context.cur_execution_stack_entry = stack_entry
83 |         op.run(context, op_options)
84 |         context.cur_execution_stack_entry = prev_execution_stack_entry
85 |         self.execution_stack.pop()
86 | 
87 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Sequor
  2 | Instead of using rigid SaaS connectors with growing costs, build complete API workflows that connect your database to any API using familiar YAML and SQL. Your requirements fully met with zero per-row costs.
  3 | 
  4 | Sequor fuses API execution with your database, enabling bidirectional data flow between APIs and database tables. By storing intermediate data in your database, you can leverage the full power of SQL for transformations, analytics, and business logic. This unified execution model eliminates the traditional boundary between iPaaS-style app integration and ETL-style data pipelines.
  5 | 
  6 | With Sequor's code-first approach (YAML for flows, Jinja or Python for dynamic parameters, and SQL for logic), you can apply software engineering best practices to integrations: version control, collaboration, CI/CD, and local development.
  7 | 
  8 | # Core capabilities
  9 | * **YAML for clear workflow structure**: Define integration flows in readable YAML. Version control friendly, no vendor lock-in. Built-in reliability: validation, retries, task-level observability.
 10 | 
 11 | * **Dynamic expressions for flexible logic**: Use Jinja templates `{{ var() }}` for environment variables. Add `_expression` suffix to any YAML property to compute it with Python. Infinite customization within YAML structure.
 12 | 
 13 | * **Built-in database integration**: Iterate over input tables to make API calls per record. Every HTTP response maps to database tables. Seamless data flow without custom glue code.
 14 | 
 15 | # Example: Pull Shopify orders → Compute customer metrics in Snowflake → Update Mailchimp
 16 | Create a Sequor project with the following 3-step flow. You get an end-to-end solution in minutes with just two operations: **transform** for running SQL and **http_request** for API interactions. 
 17 | 
 18 | ## Step 1: Fetch orders from Shopify API
 19 | Pull orders data into Snowflake. Jinja for source-level variables. Python for one-line response navigation.
 20 | ```yaml
 21 | - op: http_request
 22 |   request:
 23 |     source: "shopify"
 24 |     url: "https://{{ var('store_name') }}.myshopify.com/admin/api/{{ var('api_version') }}/orders.json"
 25 |     method: GET
 26 |     parameters:
 27 |       status: any
 28 |     headers:
 29 |       "Accept": "application/json"
 30 |   response:
 31 |     success_status: [200]
 32 |     tables:
 33 |       - source: "snowflake"
 34 |         table: "shopify_orders"
 35 |         columns: {
 36 |           "id": "text", "customer_id": text, "email": "text", 
 37 |           "created_at": "text", "total_price": "text", "total_items": "text"
 38 |         }
 39 |         data_expression: response.json()['orders']
 40 | ```
 41 | 
 42 | ## Step 2: Compute customer metrics in SQL
 43 | Calculate total spend and order count per customer in Snowflake.
 44 | ```yaml
 45 | - op: transform
 46 |   source: "snowflake"
 47 |   target_table: "customer_metrics"
 48 |   query: |
 49 |     SELECT
 50 |       email,
 51 |       SUM(total_price::decimal) as total_spent,
 52 |       COUNT(*) as order_count
 53 |     FROM shopify_orders
 54 |     WHERE email IS NOT NULL
 55 |     GROUP BY email
 56 | ```
 57 | 
 58 | ## Step 3: Update Mailchimp API
 59 | Push customer metrics to Mailchimp. Python is used for advanced URL construction and flexible body formation.
 60 | ```yaml
 61 | - op: http_request
 62 |   for_each:
 63 |     source: "snowflake"
 64 |     table: "customer_metrics"
 65 |     as: customer
 66 |   request:
 67 |     source: "mailchimp"
 68 |     url_expression: |
 69 |       email = var('customer')['email']
 70 |       import hashlib
 71 |       subscriber_hash = hashlib.md5(email.lower().encode()).hexdigest()
 72 |       return "https://{{ var('dc') }}.api.mailchimp.com/{{ var('api_version') }}/lists/{{ var('mailchimp_list_id') }}/members/" + subscriber_hash
 73 |     method: PATCH
 74 |     body_format: json
 75 |     body_expression: |
 76 |         customer = var('customer')
 77 |         return {
 78 |           "merge_fields": {
 79 |             "TOTALSPENT": float(customer['total_spent']),
 80 |             "ORDERCOUNT": customer['order_count']
 81 |           }
 82 |         }
 83 |   response:
 84 |     success_status: [200]
 85 | ```
 86 | 
 87 | 
 88 | # Getting started
 89 | * [Install Sequor](https://docs.sequor.dev/getting-started/installation). Easy to get started with `pip install sequor`.
 90 | * [Follow Quickstart](https://docs.sequor.dev/getting-started/quickstart)
 91 | * [Explore examples of real-life integrations](https://github.com/paloaltodatabases/sequor-integrations)
 92 | * [Documentation](https://docs.sequor.dev/)
 93 | 
 94 | # Community
 95 | * [Discuss Sequor on GitHub](https://github.com/paloaltodatabases/sequor/discussions) - To get help and participate in discussions about best practices, or any other conversation that would benefit from being searchable
 96 | 
 97 | # Stay connected
 98 | * [Subscribe to our newsletter](https://buttondown.com/sequor) -  Updates on new releases and features, guides, and case studies.
 99 | 
100 | 
101 | 
102 | 
103 | 
104 | 
105 |   
106 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # =======================================================================
  2 | # IDE specific
  3 | # =======================================================================
  4 | #PyCharm / JetBrains IDE config
  5 | .idea/
  6 | *.iml
  7 | .vscode/
  8 | 
  9 | # =======================================================================
 10 | # Python specific
 11 | # =======================================================================
 12 | 
 13 | # Byte-compiled / optimized / DLL files
 14 | __pycache__/
 15 | *.py[cod]
 16 | *$py.class
 17 | 
 18 | # C extensions
 19 | *.so
 20 | 
 21 | # Distribution / packaging
 22 | .Python
 23 | build/
 24 | develop-eggs/
 25 | dist/
 26 | downloads/
 27 | eggs/
 28 | .eggs/
 29 | lib/
 30 | lib64/
 31 | parts/
 32 | sdist/
 33 | var/
 34 | wheels/
 35 | share/python-wheels/
 36 | *.egg-info/
 37 | .installed.cfg
 38 | *.egg
 39 | MANIFEST
 40 | 
 41 | # PyInstaller
 42 | #  Usually these files are written by a python script from a template
 43 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 44 | *.manifest
 45 | *.spec
 46 | 
 47 | # Installer logs
 48 | pip-log.txt
 49 | pip-delete-this-directory.txt
 50 | 
 51 | # Unit test / coverage reports
 52 | htmlcov/
 53 | .tox/
 54 | .nox/
 55 | .coverage
 56 | .coverage.*
 57 | .cache
 58 | nosetests.xml
 59 | coverage.xml
 60 | *.cover
 61 | *.py,cover
 62 | .hypothesis/
 63 | .pytest_cache/
 64 | cover/
 65 | 
 66 | # Translations
 67 | *.mo
 68 | *.pot
 69 | 
 70 | # Django stuff:
 71 | *.log
 72 | local_settings.py
 73 | db.sqlite3
 74 | db.sqlite3-journal
 75 | 
 76 | # Flask stuff:
 77 | instance/
 78 | .webassets-cache
 79 | 
 80 | # Scrapy stuff:
 81 | .scrapy
 82 | 
 83 | # Sphinx documentation
 84 | docs/_build/
 85 | 
 86 | # PyBuilder
 87 | .pybuilder/
 88 | target/
 89 | 
 90 | # Jupyter Notebook
 91 | .ipynb_checkpoints
 92 | 
 93 | # IPython
 94 | profile_default/
 95 | ipython_config.py
 96 | 
 97 | # pyenv
 98 | #   For a library or package, you might want to ignore these files since the code is
 99 | #   intended to run in multiple environments; otherwise, check them in:
100 | # .python-version
101 | 
102 | # pipenv
103 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
104 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
105 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
106 | #   install all needed dependencies.
107 | #Pipfile.lock
108 | 
109 | # UV
110 | #   Similar to Pipfile.lock, it is generally recommended to include uv.lock in version control.
111 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
112 | #   commonly ignored for libraries.
113 | #uv.lock
114 | 
115 | # poetry
116 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
117 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
118 | #   commonly ignored for libraries.
119 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
120 | #poetry.lock
121 | 
122 | # pdm
123 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
124 | #pdm.lock
125 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
126 | #   in version control.
127 | #   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
128 | .pdm.toml
129 | .pdm-python
130 | .pdm-build/
131 | 
132 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
133 | __pypackages__/
134 | 
135 | # Celery stuff
136 | celerybeat-schedule
137 | celerybeat.pid
138 | 
139 | # SageMath parsed files
140 | *.sage.py
141 | 
142 | # Environments
143 | .env
144 | .venv
145 | env/
146 | venv/
147 | ENV/
148 | env.bak/
149 | venv.bak/
150 | 
151 | # Spyder project settings
152 | .spyderproject
153 | .spyproject
154 | 
155 | # Rope project settings
156 | .ropeproject
157 | 
158 | # mkdocs documentation
159 | /site
160 | 
161 | # mypy
162 | .mypy_cache/
163 | .dmypy.json
164 | dmypy.json
165 | 
166 | # Pyre type checker
167 | .pyre/
168 | 
169 | # pytype static type analyzer
170 | .pytype/
171 | 
172 | # Cython debug symbols
173 | cython_debug/
174 | 
175 | # PyCharm
176 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
177 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
178 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
179 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
180 | #.idea/
181 | 
182 | # Ruff stuff:
183 | .ruff_cache/
184 | 
185 | # PyPI configuration file
186 | .pypirc
187 | 
188 | # =======================================================================
189 | # Mac specific
190 | # =======================================================================
191 | 
192 | # General
193 | .DS_Store
194 | .AppleDouble
195 | .LSOverride
196 | 
197 | # Icon must end with two \r
198 | Icon
199 | 
200 | # Thumbnails
201 | ._*
202 | 
203 | # Files that might appear in the root of a volume
204 | .DocumentRevisions-V100
205 | .fseventsd
206 | .Spotlight-V100
207 | .TemporaryItems
208 | .Trashes
209 | .VolumeIcon.icns
210 | .com.apple.timemachine.donotpresent
211 | 
212 | # Directories potentially created on remote AFP share
213 | .AppleDB
214 | .AppleDesktop
215 | Network Trash Folder
216 | Temporary Items
217 | .apdisk
218 | 


--------------------------------------------------------------------------------
/src/sequor/source/sources/duckdb_connection.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | from sqlalchemy import MetaData, Table, create_engine, text
  3 | 
  4 | from sequor.source.column import Column
  5 | from sequor.source.column_schema import ColumnSchema
  6 | from sequor.source.data_type import DataType
  7 | from sequor.source.model import Model
  8 | from sequor.source.row import Row
  9 | from sequor.source.source import Source
 10 | from sequor.source.connection import Connection
 11 | from sequor.source.sources.sql_connection import SQLConnection
 12 | from sequor.source.table_address import TableAddress
 13 | 
 14 | class DuckDBConnection(SQLConnection):
 15 |     def __init__(self, source: Source):
 16 |         super().__init__(source)
 17 |         self.open()
 18 | 
 19 |     def open(self):
 20 |         self.engine = create_engine(
 21 |             self.source.connStr,
 22 |             connect_args={
 23 |             }
 24 |         )
 25 |         self.conn = self.engine.connect()
 26 | 
 27 |     def close(self):
 28 |         if self.conn:
 29 |             self.conn.close()
 30 | 
 31 |     def __enter__(self):
 32 |         self.open()
 33 |         return self
 34 | 
 35 |     def __exit__(self, exc_type, exc_val, exc_tb):
 36 |         self.close()
 37 | 
 38 |     def get_model(self, table_addr: TableAddress):
 39 |         metadata = MetaData()
 40 |         if table_addr.namespace_name is None:
 41 |             users_table = Table(table_addr.table_name, metadata, autoload_with=self.engine)
 42 |         else:
 43 |             users_table = Table(table_addr.table_name, metadata, schema=table_addr.namespace_name, autoload_with=self.engine)
 44 |         column_schemas = [ColumnSchema(c.name, DataType(c.type)) for c in users_table.columns]
 45 |         return Model.from_columns(column_schemas)
 46 | 
 47 |     def drop_table(self, table_addr: TableAddress, only_if_exists: bool = True):
 48 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 49 |         self.conn.execute(text(f"DROP TABLE {'IF EXISTS' if only_if_exists else ''} {table_qualified_name}"))
 50 |     
 51 |     def create_table(self, table_addr: TableAddress, model: Model):
 52 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 53 |         self.conn.execute(text(f"CREATE TABLE {table_qualified_name} ({', '.join([c.name + ' ' + c.type.name for c in model.columns])})"))
 54 |     
 55 |     def execute_update(self, query: str):
 56 |         self.conn.execute(text(query))
 57 |         self.conn.commit()
 58 | 
 59 |     def open_table_for_insert(self, table_addr: TableAddress, model: Union[Model, None] = None, autocommit: bool = False):
 60 |         self.open_table_for_insert_table_addr = table_addr
 61 |         if model is not None:
 62 |             self.open_table_for_insert_model = model
 63 |         else:
 64 |             self.open_table_for_insert_model = self.get_model(table_addr)
 65 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 66 | 
 67 |         # build sql
 68 |         columns_sql = [c.name for c in self.open_table_for_insert_model.columns]
 69 |         placeholders_sql = [f":{c.name}" for c in self.open_table_for_insert_model.columns]
 70 |         sql = f"INSERT INTO {table_qualified_name}(" + ", ".join(columns_sql) + ") VALUES (" + ", ".join(placeholders_sql) + ")"
 71 |         
 72 |         self.open_table_for_insert_stmt = text(sql);
 73 |         self.conn.autocommit = autocommit
 74 |         self.open_table_for_insert_autocommit = autocommit
 75 | 
 76 |     def insert_row(self, row: Row):
 77 |         row_dict = row.to_dict()
 78 |         self.conn.execute(self.open_table_for_insert_stmt, row_dict )
 79 | 
 80 |     def close_table_for_insert(self):
 81 |         if not self.open_table_for_insert_autocommit:
 82 |             self.conn.commit()
 83 |         self.open_table_for_insert_stmt = None
 84 |         self.open_table_for_insert_model = None
 85 |         self.open_table_for_insert_table_addr = None
 86 | 
 87 |     def open_table_for_read(self, table_addr: TableAddress):
 88 |         query = f"SELECT * FROM {self.source.get_qualified_name(table_addr)}"
 89 |         self.open_query(query)
 90 | 
 91 |  
 92 |     def open_query(self, query_str: str):
 93 |         query = text(query_str)
 94 |         self.conn.execution_options(stream_results=True)
 95 |         self.open_table_for_read_result = self.conn.execute(query)
 96 |         # to get precision and scale use:
 97 |         # for col in self.open_table_for_read_result.cursor.description
 98 |         # name = col[0] precision = col[4] scale = col[5]
 99 |         col_schemas = []
100 |         for col in self.open_table_for_read_result.cursor.description:
101 |             col_name = col[0]  # Column name
102 |             col_type = str(col[1])  # Data type (DBAPI-specific type object)
103 |             precision = col[4]  # Precision for numeric columns
104 |             scale = col[5]  # Scale for numeric columns (if applicable)
105 |             col_schema = ColumnSchema(col_name, DataType(col_type, precision, scale))
106 |             col_schemas.append(col_schema)
107 |         # for col in self.open_table_for_read_result._metadata.columns:
108 |         #     col_schema = ColumnSchema(col.name, col.type)
109 |         #     col_schemas.append(col_schema)
110 |         self.open_table_for_read_model = Model.from_columns(col_schemas)
111 | 
112 |     def next_row(self):
113 |         row_source = next(self.open_table_for_read_result, None)
114 |         if row_source is not None:
115 |             row = Row()
116 |             for i, col_schema in enumerate(self.open_table_for_read_model.columns):
117 |                 col_name = col_schema.name
118 |                 col_value = row_source[i]  # Access tuple by index instead of column name
119 |                 row.add_column(Column(col_name, col_value))
120 |             return row
121 |         else:
122 |             return None
123 |     
124 |     def close_query(self):
125 |         self.open_table_for_read_result.close()
126 | 
127 |     def close_table_for_read(self):
128 |         self.open_table_for_read_result.close()
129 | 
130 |         
131 | 
132 | 


--------------------------------------------------------------------------------
/src/sequor/core/op.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List, Type, ClassVar, Union
  2 | 
  3 | from sequor.common.common import Common
  4 | from sequor.common.executor_utils import UserContext, load_user_function, render_jinja
  5 | from sequor.core.context import Context
  6 | from sequor.common.executor_utils import user_function_params_def
  7 | from sequor.core.user_error import UserError
  8 | 
  9 | 
 10 | class Op:
 11 |     """Base class for all operations"""
 12 |     # # Registry to store operation types and their corresponding classes
 13 |     # _registry: ClassVar[Dict[str, Type['Op']]] = {}
 14 |     
 15 |     # @classmethod
 16 |     # def register(cls, op_type: str):
 17 |     #     """Decorator to register operation classes"""
 18 |     #     def decorator(op_class: Type['Op']):
 19 |     #         # Register the operation class
 20 |     #         cls._registry[op_type] = op_class
 21 |     #         return op_class
 22 |     #     return decorator
 23 |     
 24 | 
 25 |     def __init__(self, proj, op_def: Dict[str, Any]):
 26 |         self.name = op_def.get('op')
 27 |         self.proj = proj
 28 |         self.op_def = op_def
 29 | 
 30 |     def get_title(self) -> str:
 31 |         raise NotImplementedError("Subclasses must implement get_title")
 32 | 
 33 |     
 34 |     def run(self, context: Dict[str, Any], op_options: Dict[str, Any]) -> Dict[str, Any]:
 35 |         """Execute this operation with the given context"""
 36 |         raise NotImplementedError("Subclasses must implement run")
 37 | 
 38 |     # render: 0 - none, 1 - value only, 2 - expression only, 3 - both
 39 |     @staticmethod
 40 |     def get_parameter(context: Context, op_def: Dict[str, Any], name: str, is_required: bool = False, render: int = 0, location_desc: str = None) -> Any: # function_params_def: str = "context"
 41 |         param_value = op_def.get(name)
 42 |         param_expression = op_def.get(f"{name}_expression")
 43 |         param_expression_line = Common.get_line_number(op_def, f"{name}_expression")
 44 |         if render == 1:
 45 |             param_value = render_jinja(context, param_value)
 46 |         elif render == 2:
 47 |             param_expression = render_jinja(context, param_expression)
 48 |         elif render == 3:
 49 |             param_value = render_jinja(context, param_value)
 50 |             param_expression = render_jinja(context, param_expression)
 51 |         result_value = None
 52 |         if param_value and param_expression:
 53 |             raise ValueError(f"Both {name} and {name}_expression are specified in the definition. Only one of them can be specified.")
 54 |         elif param_expression is not None:
 55 |             if not isinstance(param_expression, str):
 56 |                 raise UserError(f"Parameter '{name}_expression' must be a string. Type '{type(param_expression).__name__}' provided: {str(param_expression)}")
 57 |             result_value = load_user_function(param_expression, f"{name}_expression", param_expression_line) # function_params_def=function_params_def
 58 |         elif param_value is not None:
 59 |             result_value = param_value
 60 |         else:
 61 |             if is_required:
 62 |                 err_msg = f"{name} or {name}_expression must be specified"
 63 |                 if location_desc:
 64 |                     err_msg = err_msg + f" in {location_desc}"
 65 |                 raise ValueError(err_msg)
 66 |         return result_value
 67 |     
 68 |     # render: 0 - none, 1 - value only; no need to render expression as it is already a compiled function
 69 |     @staticmethod
 70 |     def eval_parameter(context: Context, value: Any, name: str, render: int = 0, null_literal: bool = False, location_desc: str = None, extra_params: List[Any] = []) -> Any: 
 71 |         try:    
 72 |             if value and callable(value):
 73 |                 user_context = UserContext(context)
 74 |                 # params = [user_context] + extra_params
 75 |                 # must match parameters defined in user_function_params_def of executor_utils.py
 76 |                 params = [user_context, user_context.is_var_defined, user_context.var, user_context.table, user_context.query, user_context.query_scalar] + extra_params
 77 |                 if len(params) < len(user_function_params_def):
 78 |                     while len(params) < len(user_function_params_def):
 79 |                         params.append(None)
 80 |                 res = value(*params)
 81 |             else:
 82 |                 res = render_jinja(context, value, null_literal) if render == 1 else value
 83 |         except Exception as e:
 84 |             err_msg = f"Error evaluating '{name}'"
 85 |             if location_desc:
 86 |                 err_msg = err_msg + f" in {location_desc}"
 87 |             err_msg = err_msg + f": {e}"
 88 |             raise UserError(err_msg) from e
 89 |         return res
 90 |     
 91 |     @staticmethod
 92 |     def eval_dict(context: Context, value_dict: Dict[str, Any], name: str, location_desc: str = None, extra_params: List[Any] = []): # function_params_def: str = "context", 
 93 |         if value_dict is None:
 94 |             return None
 95 |         res = {}
 96 |         for key_name, key_value_def in value_dict.items():
 97 |                 if key_name.endswith("_expression"):
 98 |                     key_name_real = key_name[:-11]  # Remove "_expression" suffix
 99 |                     key_value_def = Op.get_parameter(context, value_dict, key_name_real, is_required=False, render=3, location_desc= f"{location_desc}.{name}" if location_desc else name) # , function_params_def=function_params_def
100 |                     key_value_def = Op.eval_parameter(context, key_value_def, key_name_real,render=0, location_desc= f"{location_desc}.{name}" if location_desc else name, extra_params=extra_params)
101 |                     res[key_name_real] = key_value_def
102 |                 else:
103 |                     res[key_name] = key_value_def
104 |         return res
105 | 
106 |     def get_child_blocks(self) -> List[Dict[str, List['Op']]]:
107 |         return []
108 |     
109 |     def get_id(self) -> Union[str, None]:
110 |         id = self.op_def.get('id')
111 |         return id


--------------------------------------------------------------------------------
/src/sequor/source/sources/sql_connection.py:
--------------------------------------------------------------------------------
  1 | from typing import Union
  2 | from sqlalchemy import MetaData, Table, create_engine, text
  3 | 
  4 | from sequor.source.column import Column
  5 | from sequor.source.column_schema import ColumnSchema
  6 | from sequor.source.data_type import DataType
  7 | from sequor.source.model import Model
  8 | from sequor.source.row import Row
  9 | from sequor.source.source import Source
 10 | from sequor.source.connection import Connection
 11 | from sequor.source.table_address import TableAddress
 12 | 
 13 | class SQLConnection(Connection):
 14 |     def __init__(self, source: Source):
 15 |         super().__init__(source)
 16 |         self.open()
 17 | 
 18 |     def open(self):
 19 |         self.engine = create_engine(
 20 |             self.source.connStr,
 21 |             connect_args={
 22 |                 'user': self.source.username,
 23 |                 'password': self.source.password
 24 |             },
 25 |         )
 26 |         self.conn = self.engine.connect()
 27 | 
 28 |     def close(self):
 29 |         if self.conn:
 30 |             self.conn.close()
 31 | 
 32 |     def __enter__(self):
 33 |         self.open()
 34 |         return self
 35 | 
 36 |     def __exit__(self, exc_type, exc_val, exc_tb):
 37 |         self.close()
 38 | 
 39 |     def get_model(self, table_addr: TableAddress):
 40 |         metadata = MetaData()
 41 |         if table_addr.namespace_name is None:
 42 |             users_table = Table(table_addr.table_name, metadata, autoload_with=self.engine)
 43 |         else:
 44 |             users_table = Table(table_addr.table_name, metadata, schema=table_addr.namespace_name, autoload_with=self.engine)
 45 |         column_schemas = [ColumnSchema(c.name, DataType(c.type)) for c in users_table.columns]
 46 |         return Model.from_columns(column_schemas)
 47 | 
 48 |     def drop_table(self, table_addr: TableAddress, only_if_exists: bool = True):
 49 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 50 |         self.conn.execute(text(f"DROP TABLE {'IF EXISTS' if only_if_exists else ''} {table_qualified_name}"))
 51 |     
 52 |     def create_table(self, table_addr: TableAddress, model: Model):
 53 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 54 |         query = f"CREATE TABLE {table_qualified_name} ({', '.join([self.source.quote_name(c.name) + ' ' + c.type.name for c in model.columns])})"
 55 |         self.conn.execute(text(query))
 56 |     
 57 |     def add_column(self, table_addr: TableAddress, column_name: str, column_type: DataType):
 58 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 59 |         self.conn.execute(text(f"ALTER TABLE {table_qualified_name} ADD COLUMN {column_name} {column_type.name}"))
 60 | 
 61 |     def drop_column(self, table_addr: TableAddress, column_name: str):
 62 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 63 |         self.conn.execute(text(f"ALTER TABLE {table_qualified_name} DROP COLUMN {column_name}"))
 64 | 
 65 |     def execute_update(self, query: str):
 66 |         self.conn.execute(text(query))
 67 |         self.conn.commit()
 68 | 
 69 |     def open_table_for_insert(self, table_addr: TableAddress, model: Union[Model, None] = None, autocommit: bool = False):
 70 |         self.open_table_for_insert_table_addr = table_addr
 71 |         if model is not None:
 72 |             self.open_table_for_insert_model = model
 73 |         else:
 74 |             self.open_table_for_insert_model = self.get_model(table_addr)
 75 |         table_qualified_name = self.source.get_qualified_name(table_addr)
 76 | 
 77 |         # build sql
 78 |         columns_sql = [self.source.quote_name(c.name) for c in self.open_table_for_insert_model.columns]
 79 |         placeholders_sql = [f":{c.name}" for c in self.open_table_for_insert_model.columns]
 80 |         sql = f"INSERT INTO {table_qualified_name}(" + ", ".join(columns_sql) + ") VALUES (" + ", ".join(placeholders_sql) + ")"
 81 |         
 82 |         self.open_table_for_insert_stmt = text(sql);
 83 |         self.conn.autocommit = autocommit
 84 |         self.open_table_for_insert_autocommit = autocommit
 85 | 
 86 |     def insert_row(self, row: Row):
 87 |         row_dict = row.to_dict()
 88 |         self.conn.execute(self.open_table_for_insert_stmt, row_dict )
 89 | 
 90 |     def close_table_for_insert(self):
 91 |         if not self.open_table_for_insert_autocommit:
 92 |             self.conn.commit()
 93 |         self.open_table_for_insert_stmt = None
 94 |         self.open_table_for_insert_model = None
 95 |         self.open_table_for_insert_table_addr = None
 96 | 
 97 |     def open_table_for_read(self, table_addr: TableAddress):
 98 |         query = f"SELECT * FROM {self.source.get_qualified_name(table_addr)}"
 99 |         self.open_query(query)
100 | 
101 |  
102 |     def open_query(self, query_str: str):
103 |         query = text(query_str)
104 |         self.conn.execution_options(stream_results=True)
105 |         self.open_table_for_read_result = self.conn.execute(query)
106 |         # to get precision and scale use:
107 |         # for col in self.open_table_for_read_result.cursor.description
108 |         # name = col[0] precision = col[4] scale = col[5]
109 |         col_schemas = []
110 |         for col in self.open_table_for_read_result.cursor.description:
111 |             col_name = col[0]  # Column name
112 |             col_type = str(col[1])  # Data type (DBAPI-specific type object)
113 |             precision = col[4]  # Precision for numeric columns
114 |             scale = col[5]  # Scale for numeric columns (if applicable)
115 |             col_schema = ColumnSchema(col_name, DataType(col_type, precision, scale))
116 |             col_schemas.append(col_schema)
117 |         # for col in self.open_table_for_read_result._metadata.columns:
118 |         #     col_schema = ColumnSchema(col.name, col.type)
119 |         #     col_schemas.append(col_schema)
120 |         self.open_table_for_read_model = Model.from_columns(col_schemas)
121 | 
122 |     def next_row(self):
123 |         row_source = next(self.open_table_for_read_result, None)
124 |         if row_source is not None:
125 |             row = Row()
126 |             for i, col_schema in enumerate(self.open_table_for_read_model.columns):
127 |                 col_name = col_schema.name
128 |                 col_value = row_source[i]  # Access tuple by index instead of column name
129 |                 row.add_column(Column(col_name, col_value))
130 |             return row
131 |         else:
132 |             return None
133 |     
134 |     def close_query(self):
135 |         self.open_table_for_read_result.close()
136 | 
137 |     def close_table_for_read(self):
138 |         self.open_table_for_read_result.close()
139 | 
140 |         
141 | 
142 | 


--------------------------------------------------------------------------------
/src/sequor/common/data_loader.py:
--------------------------------------------------------------------------------
  1 | from typing import Any, Dict, List
  2 | from sqlalchemy import Connection
  3 | from sequor.core.context import Context
  4 | from sequor.core.user_error import UserError
  5 | from sequor.source.model import Model
  6 | from sequor.source.row import Row
  7 | from sequor.source.table_address import TableAddress
  8 | from sequor.source.column import Column
  9 | 
 10 | class TableAddressToConnectionMap:
 11 |     def __init__(self, table_addr: TableAddress, conn: Connection):
 12 |         self.table_addr = table_addr
 13 |         self.conn = conn
 14 | 
 15 | 
 16 | class DataLoader:
 17 |     """Class for loading data from data definition"""
 18 |     def __init__(self, proj):
 19 |         self.proj = proj
 20 |         # self.source_name = source_name
 21 |         # self.table_addrs = table_addrs
 22 |         self._conn_pool: List[TableAddressToConnectionMap] = []
 23 | 
 24 |     # def get_model(self, model_name: str, model_def: Dict[str, Any], table_name: str) -> None:
 25 |     #     model = None
 26 |     #     if model_def is not None:
 27 |     #         model = Model.from_model_def(model_def)
 28 |     #     elif model_name is not None:
 29 |     #         model_spec = self.proj.get_specification("model", model_name)
 30 |     #         model = Model(model_spec.spec_def)
 31 |     #     else:
 32 |     #         raise Exception(f"Either model name or model specification must be provided for table: {table_name}")
 33 |     #     return model
 34 | 
 35 |     def get_connection(self, context: Context, table_addr: TableAddress, write_mode: str) -> Connection:
 36 |         conn = None
 37 |         for mapping in self._conn_pool:
 38 |             if (mapping.table_addr.source_name == table_addr.source_name and
 39 |                 ((mapping.table_addr.database_name is None and table_addr.database_name is None) or
 40 |                  (mapping.table_addr.database_name is not None and table_addr.database_name is not None and
 41 |                   mapping.table_addr.database_name == table_addr.database_name)) and
 42 |                 ((mapping.table_addr.namespace_name is None and table_addr.namespace_name is None) or
 43 |                  (mapping.table_addr.namespace_name is not None and table_addr.namespace_name is not None and
 44 |                   mapping.table_addr.namespace_name == table_addr.namespace_name)) and
 45 |                 mapping.table_addr.table_name == table_addr.table_name):
 46 |                 conn = mapping.conn
 47 |                 break
 48 |         if conn is None:
 49 |             source = self.proj.get_source(context, table_addr.source_name)
 50 |             if source is None:
 51 |                 raise Exception(f"Source not found: {table_addr.source_name}")
 52 |             new_conn = source.connect();
 53 |             table_addr_sub = table_addr.clone() # because we want original tableLoc to be added to the mapping (before spaceName enrichment)
 54 |             if table_addr_sub.namespace_name is None:
 55 |                 table_addr_sub.namespace_name = source.get_default_namespace_name()
 56 |             # model = self.get_model(model_name, model_def, table_addr_sub.table_name)
 57 |             model = Model.from_model_def(table_addr.model_def)
 58 |             if write_mode == "create":
 59 |                 new_conn.drop_table(table_addr_sub)
 60 |                 new_conn.create_table(table_addr_sub, model)
 61 |             elif write_mode == "append":
 62 |                 pass
 63 |             else:
 64 |                 raise Exception(f"Unknown write mode: {write_mode}")
 65 | 
 66 |             self._conn_pool.append(TableAddressToConnectionMap(table_addr, new_conn)) # notice that we use "table_addr" not "table_addr_sub"
 67 |             new_conn.open_table_for_insert(table_addr_sub, model)
 68 |             new_conn.model = model # used in run() to create records to insert
 69 |             conn = new_conn # because we return "conn" to the caller and we want it to be the newly created connection
 70 |         return conn
 71 | 
 72 |     def close(self):
 73 |         for mapping in self._conn_pool:
 74 |             if mapping.conn is not None:
 75 |                 mapping.conn.close_table_for_insert()
 76 |                 mapping.conn.close()
 77 | 
 78 |     def run(self, context: Context, tables: List[TableAddress]) -> None:  # List[Dict[str, Any]]
 79 |         # if isinstance(tables_def, dict): # data for tables defined in response.tables section of http_request op
 80 |         # elif isinstance(tables_def, list): # not just data but full tables (definition + data)
 81 |         # else:
 82 |         #     raise UserError(f"Unknown type of tables data. Must be a dict or a list: {type(tables_def)}")
 83 | 
 84 |         for table_addr in tables:
 85 |             # data_def = tables_def.get(table_addr.table_name)
 86 |             # if data_def is None:
 87 |             #     raise UserError(f"Data for the target table {table_addr.table_name} not found in the result returned by the HTTP response parser.")
 88 |             # data_def = table_def.get('data')
 89 |             # model_def = table_def.get('model')
 90 |             # model_name = None
 91 |             # if isinstance(model_def, str):
 92 |             #     model_name = model_def
 93 |             # else:
 94 |             #     model_name = None
 95 |             # model_def = {"columns": table_addr.columns_def}
 96 |             write_mode = table_addr.write_mode
 97 |             if write_mode is None:
 98 |                 write_mode = "create"
 99 |             # if data_def is not None: # skip quietly if no data, we used it in InfoLink for HTTPRequest op but why?
100 |             conn = self.get_connection(context, table_addr, write_mode)
101 |             # insert data
102 |             table_data = table_addr.data
103 |             if not isinstance(table_data, list):
104 |                 raise UserError(f"'data' for table '{table_addr.table_name}' must be a list. Type '{type(table_data).__name__}' provided: {str(table_data)}")
105 |             for record_def in table_data:
106 |                 record = Row()
107 |                 for column_schema in conn.model.columns:
108 |                     column_name = column_schema.name
109 |                     if not isinstance(record_def, dict):
110 |                         raise UserError(f"Element of 'data' array for table '{table_addr.table_name}' must be a dictionary.  Type '{type(record_def).__name__}' provided: {str(record_def)}")
111 |                     column_value = record_def.get(column_name)
112 |                     column_value_str = str(column_value) if column_value is not None else None # need to convert to string because it can be any type returned by the source
113 |                     column = Column(column_name, column_value_str)
114 |                     record.add_column(column)
115 |                 conn.insert_row(record)
116 | 
117 | 


--------------------------------------------------------------------------------
/src/sequor/project/project.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum
  2 | import os
  3 | from pathlib import Path
  4 | import tempfile
  5 | # import yaml
  6 | from ruamel.yaml import YAML
  7 | 
  8 | # from sequor.core.environment import Environment
  9 | # from sequor.core.instance import Instance
 10 | from sequor.core.context import Context
 11 | from sequor.core.registry import create_op, create_source
 12 | from sequor.operations.block import BlockOp
 13 | from sequor.operations.execute import ExecuteOp
 14 | from sequor.operations.for_each import ForEachOp
 15 | from sequor.operations.http_request import HTTPRequestOp
 16 | from sequor.operations.if_op import IfOp
 17 | from sequor.operations.print import PrintOp
 18 | from sequor.operations.run_flow import RunFlowOp
 19 | from sequor.operations.set_variable import SetVariableOp
 20 | from sequor.operations.transform import TransformOp
 21 | from sequor.source.sources.duckdb_source import DuckDBSource
 22 | from typing import Any, Dict, List
 23 | 
 24 | from sequor.core.flow import Flow
 25 | from sequor.core.op import Op
 26 | from sequor.core.user_error import UserError
 27 | from sequor.project.specification import Specification
 28 | from sequor.source.source import Source
 29 | from sequor.source.sources.http_source import HTTPSource
 30 | from sequor.source.sources.sql_source import SQLSource
 31 | 
 32 | class Project:
 33 |     def __init__(self, project_dir: Path, home_dir):  # instance: Instance, env: Environment,
 34 |         self.yaml = YAML()
 35 |         self.yaml.preserve_quotes = True 
 36 |         # self.instance = instance
 37 | 
 38 |         # self.env = env
 39 |         self.home_dir = home_dir
 40 |         self.project_dir = project_dir
 41 |         self.flows_dir = os.path.join(project_dir, "flows")
 42 |         self.sources_dir = os.path.join(project_dir, "sources")
 43 |         self.specs_dir = os.path.join(project_dir, "specifications")
 44 | 
 45 |         # Load project configuration file
 46 |         project_def_file = os.path.join(self.project_dir, f"project.yaml")
 47 |         if not os.path.exists(project_def_file):
 48 |             raise UserError(f"Project configuration file does not exist: {project_def_file}")
 49 | 
 50 |         with open(project_def_file, 'r') as f:
 51 |             project_def = self.yaml.load(f)
 52 |             self.project_name = project_def.get('name')
 53 |             if self.project_name is None:
 54 |                 raise UserError(f"Project configuration file does not contain 'name' field: {project_def_file}")
 55 |             # self.project_version = project_def.get('version')
 56 |      
 57 |         self.project_state_dir = self.home_dir / "project_state" / self.project_name
 58 |         self.project_vars_file = os.path.join(self.project_state_dir, "variables.yaml")
 59 |         
 60 |     def get_source(self, context: Context, source_name: str) -> Any:
 61 |         # Construct flow file path
 62 |         source_file = os.path.join(self.sources_dir, f"{source_name}.yaml")
 63 | 
 64 |         # Check if file exists
 65 |         if not os.path.exists(source_file):
 66 |             raise UserError(
 67 |                 f"Source \"{source_name}\" not found: file does not exist: {source_file}")
 68 | 
 69 |         # Load and parse the flow
 70 |         with open(source_file, 'r') as f:
 71 |             source_def = self.yaml.load(f)
 72 |         source = create_source(context, source_name, source_def)
 73 |         return source
 74 |     
 75 |     # @classmethod
 76 |     # def create(cls, proj, op_def: Dict[str, Any]) -> 'Op':
 77 | 
 78 |     # @staticmethod
 79 |     # def op_from_def(proj, op_def: Dict[str, Any]) -> 'Op':
 80 |     #     return create_op(proj, op_def)
 81 | 
 82 |     def get_flow(self, flow_name: str) -> Flow:
 83 |         # Construct flow file path
 84 |         flow_file = os.path.join(self.flows_dir, f"{flow_name}.yaml")
 85 |         
 86 |         # Check if file exists
 87 |         if not os.path.exists(flow_file):
 88 |             raise UserError(f"Flow \"{flow_name}\" not found: file does not exist: {flow_file}")
 89 |         
 90 |         # Load and parse the flow
 91 |         try:
 92 |             with open(flow_file, 'r') as f:
 93 |                 flow_def = self.yaml.load(f)
 94 |         except Exception as e:
 95 |             raise UserError(f"Error loading flow definition: {e}")
 96 |         
 97 |         # Parse the flow definition into a Flow object
 98 |         description = flow_def.get('description', '')
 99 |         flow = Flow("flow", flow_name, description)
100 |         ops = flow_def.get('steps', [])
101 |         for op_def in ops:
102 |             op = create_op(self, op_def)
103 |             flow.add_step(op)
104 | 
105 |         return flow
106 |     
107 |     def build_flow_from_block_def(self, block_def: List[Dict[str, Any]]) -> Flow:
108 |         flow = Flow("block", name = None, description = None)
109 |         for op_def in block_def:
110 |             op = create_op(self, op_def)
111 |             flow.add_step(op)
112 |         return flow
113 |     
114 |     
115 |     def list_flows(self) -> List[str]:
116 |         if not os.path.exists(self.flows_dir):
117 |             return []
118 |             
119 |         flow_files = [f for f in os.listdir(self.flows_dir) 
120 |                      if f.endswith('.yaml') and os.path.isfile(os.path.join(self.flows_dir, f))]
121 |         
122 |         # Strip .yaml extension to get flow names
123 |         flow_names = [os.path.splitext(f)[0] for f in flow_files]
124 |         
125 |         return flow_names
126 | 
127 |     
128 |     def get_specification(self, spec_type: str, spec_name: str) -> Specification:
129 |         # Construct file path
130 |         spec_file = os.path.join(self.specs_dir, spec_type, f"{spec_name}.yaml")
131 |         
132 |         # Check if file exists
133 |         if not os.path.exists(spec_file):
134 |             raise UserError(f"Specification \"{spec_name}\" not found: file does not exist: {spec_file}")
135 |         
136 |         # Load and parse the flow
137 |         with open(spec_file, 'r') as f:
138 |             spec_def = self.yaml.load(f)
139 |         
140 |         return spec_def
141 |     
142 |     def set_variable(self, var_name: str, var_value: Any):
143 |         # Read current data
144 |         try:
145 |             with open(self.project_vars_file, 'r') as f:
146 |                 vars = self.yaml.load(f) or {}
147 |         except (FileNotFoundError):  
148 |             # Ensure the directory for the variables file exists. We will create the file later when we write the variable
149 |             project_vars_dir = os.path.dirname(self.project_vars_file)
150 |             os.makedirs(project_vars_dir, exist_ok=True)
151 |             vars = {}
152 |         
153 |         # Update variable
154 |         vars[var_name] = var_value
155 |         
156 |         # Write to temp file and replace
157 |         dir_path = os.path.dirname(self.project_vars_file)
158 |         fd, temp_path = tempfile.mkstemp(dir=dir_path or '.')
159 |         try:
160 |             with os.fdopen(fd, 'w') as f:
161 |                 self.yaml.dump(vars, f)
162 |             os.replace(temp_path, self.project_vars_file)
163 |         except Exception:
164 |             # Clean up the temp file if something goes wrong
165 |             if os.path.exists(temp_path):
166 |                 os.unlink(temp_path)
167 |             raise
168 |     
169 |     def get_variable_value(self, var_name: str):
170 |         # Try to read the variables file
171 |         try:
172 |             with open(self.project_vars_file, 'r') as f:
173 |                 vars = self.yaml.load(f) or {}
174 |                 
175 |             # Return the variable value if it exists, otherwise return default_value
176 |             return vars.get(var_name) # None if the variable is not set
177 |         except FileNotFoundError:
178 |             # File doesn't exist, means that the variable is not set
179 |             return None
180 |     
181 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/src/sequor/cli.py:
--------------------------------------------------------------------------------
  1 | # run_flow.py
  2 | import logging
  3 | import os
  4 | from pathlib import Path
  5 | import sys
  6 | import yaml
  7 | from sequor.common import telemetry
  8 | from sequor.common.common import Common
  9 | from sequor.core.context import Context
 10 | from sequor.core.environment import Environment
 11 | from sequor.core.execution_stack_entry import ExecutionStackEntry
 12 | from sequor.core.instance import Instance
 13 | from sequor.core.job import Job
 14 | from sequor.core.user_error import UserError
 15 | from sequor.operations.run_flow import RunFlowOp
 16 | from sequor.project.project import Project
 17 | 
 18 | import typer
 19 | # import typer.core
 20 | # typer.core.rich = None
 21 | 
 22 | # Disable rich traceback: rich_traceback=False
 23 | app = typer.Typer(
 24 |     pretty_exceptions_enable=False,
 25 |     pretty_exceptions_show_locals=False,
 26 |     rich_markup_mode=None)
 27 | env_app = typer.Typer(
 28 |     pretty_exceptions_enable=False,
 29 |     pretty_exceptions_show_locals=False,
 30 |     rich_markup_mode=None)
 31 | app.add_typer(env_app, name="env")
 32 | 
 33 | @app.command()
 34 | def version():
 35 |     from sequor import __version__
 36 |     typer.echo(f"Sequor version: {__version__}")
 37 | 
 38 | @env_app.command()
 39 | def init(
 40 |     env_name: str = typer.Argument(..., help="Name for the new environment."),
 41 |     home_dir_cli: str = typer.Option(None, "--home-dir", help="Path to Sequor home directory"),
 42 |     show_stacktrace: bool = typer.Option(False, "--stacktrace", help="Show the Python exception stack trace", is_flag=True),
 43 | ):
 44 |     logger = logging.getLogger("sequor.cli")    
 45 |     try:
 46 |         instance = Instance(home_dir_cli)
 47 | 
 48 |         sequor_home_dir = instance.get_home_dir()
 49 |         envs_dir = sequor_home_dir / "envs"
 50 |         envs_dir.mkdir(parents=True, exist_ok=True)
 51 | 
 52 |         env_file = envs_dir / f"{env_name}.yaml"
 53 |         if env_file.exists():
 54 |             raise UserError(f"Environment with such name already exists: {os.path.abspath(env_file)}")
 55 |         env_file.touch()
 56 |         env_file.write_text("variables:\n")
 57 |         typer.echo(f"Environment initialized successfully: {env_file.resolve()}")        
 58 |     except Exception as e:
 59 |         if show_stacktrace:
 60 |             job_stacktrace = Common.get_exception_traceback()
 61 |             logger.error("Python stacktrace:\n" + job_stacktrace)
 62 |         logger.error(f"Error initializing environment \"{env_name}\": " + str(e))        
 63 |         raise typer.Exit(code=1)
 64 | 
 65 | @app.command()
 66 | def init(
 67 |     project_dir: str = typer.Argument(..., help="Path to directory for the new project. Will be created if it doesn't exist. Example: ~/my-sequor-project"),
 68 |     home_dir_cli: str = typer.Option(None, "--home-dir", help="Path to Sequor home directory"),
 69 |     show_stacktrace: bool = typer.Option(False, "--stacktrace", help="Show the Python exception stack trace", is_flag=True),
 70 | ):
 71 |     logger = logging.getLogger("sequor.cli")
 72 |     try:
 73 |         instance = Instance(home_dir_cli)
 74 | 
 75 | 
 76 |         # Check if the directory already exists and is not empty
 77 |         project_path = Path(os.path.expanduser(project_dir))
 78 |         if project_path.exists() and any(project_path.iterdir()):
 79 |             raise UserError(f"Project directory '{project_dir}' already exists and is not empty.")
 80 |             
 81 |         # Create the project directory if it does not exist
 82 |         project_path.mkdir(parents=True, exist_ok=True)
 83 |         # Initialize the project
 84 |         (project_path / "flows").mkdir(parents=True, exist_ok=True)
 85 |         (project_path / "sources").mkdir(parents=True, exist_ok=True)
 86 |         project_name = project_path.name
 87 |         project_conf_file = project_path / f"project.yaml"
 88 |         project_conf_file.touch()
 89 |         project_conf_file.write_text(f"name: \"{project_name}\"\n")
 90 |         logger.info(f"Project initialized successfully at {project_path.resolve()}")
 91 |     except Exception as e:
 92 |         if show_stacktrace:
 93 |             job_stacktrace = Common.get_exception_traceback()
 94 |             logger.error("Python stacktrace:\n" + job_stacktrace)
 95 |         logger.error(f"Error initializing project \"{project_dir}\": " + str(e))
 96 |         raise typer.Exit(code=1)
 97 |    
 98 | 
 99 | @app.command()
100 | def run(
101 |     flow_name: str = typer.Argument(..., help="Flow to run (e.g. 'myflow' or 'salesforce/account_sync')"),
102 |     # op_mode: str = typer.Option(None, "--op-mode", help="Operation-specific mode for debugging or diagnostics (e.g. 'preview_response' for http_request op)"),
103 |     home_dir_cli: str = typer.Option(None, "--home-dir", help="Path to Sequor home directory"),
104 |     project_dir_cli: str = typer.Option(None, "--project-dir", "-p", help="Path to Sequor project"),
105 |     env_name_cli: str = typer.Option(None, "--env", help="Environment name"),
106 | 
107 |     # Job-level options
108 |     disable_flow_stacktrace: bool = typer.Option(False, "--disable-flow-stacktrace", help="Show the execution path through the flow operations", is_flag=True),
109 |     show_stacktrace: bool = typer.Option(False, "--stacktrace", help="Show the Python exception stack trace", is_flag=True),
110 | 
111 |     op_id: str = typer.Option(None, "--op-id", help="ID of the operation to run"),
112 | 
113 |     # http_request op specific options
114 |     debug_foreach_record: str = typer.Option(None, "--debug-httprequest-foreach-test-record", help="Run with a test for_each record specified as JSON object (or records if batching is enabled as JSON array). The record(s) should match the structure expected by the operation. Example: --debug-test-record='{\"email\":\"test@example.com\"}'"),
115 |     debug_request_preview_trace: bool = typer.Option(False, "--debug-httprequest-preview-trace", help="Run only HTTP request part and show HTTP request trace", is_flag=True),
116 |     debug_request_preview_pretty: bool = typer.Option(False, "--debug-httprequest-preview-pretty", help="Run only HTTP request part and show pretty trace", is_flag=True),
117 |     debug_response_parser_preview: bool = typer.Option(False, "--debug-httprequest-response-parser-preview", help="Show parser result without applying it", is_flag=True),
118 | ):
119 |     logger = logging.getLogger("sequor.cli")
120 |     try:
121 |         instance = Instance(home_dir_cli)
122 |         # logger.info("Starting Sequor CLI")
123 |         telemetry_logger = telemetry.getLogger("sequor.cli")
124 |         telemetry_logger.event("cli_start", command="run")
125 |         
126 |         # Setting project dir
127 |         if project_dir_cli:
128 |             project_dir = Path(project_dir_cli)
129 |             if not project_dir.exists():
130 |                 raise UserError(f"Project directory passed as CLI --project-dir argument does not exist: {project_dir_cli}")            
131 |         else:
132 |             current_dir = os.getcwd()
133 |             project_dir = Path(current_dir)
134 | 
135 |         # Setting env dir
136 |         env_os_var = os.environ.get("SEQUOR_ENV")
137 |         env_project_file = project_dir / "env.yaml"
138 |         # default_env_dir = Path.home() / "sequor_env"
139 |         if env_name_cli:
140 |             env_name = env_name_cli
141 |             # env_dir = Path(os.path.expanduser(env_dir_cli))
142 |             # if not env_dir.exists():
143 |             #     raise UserError(f"Environment directory passed as CLI --env-dir argument does not exist: {env_dir_cli}")
144 |         elif env_project_file.exists():
145 |             with env_project_file.open("r") as f:
146 |                 env_project_data = yaml.safe_load(f)
147 |                 if "env" not in env_project_data:
148 |                     raise UserError(f"'env' key not found in project environment file: {env_project_file}")
149 |                 env_name = env_project_data["env"]
150 |                 # env_dir = Path(os.path.expanduser(project_env_data["env_dir"]))
151 |                 # if not env_dir.exists():
152 |                 #     raise UserError(f"Environment directory referenced in project environment file sequor_env.yaml does not exist: {env_dir}")
153 |         elif env_os_var:
154 |             env_name = env_os_var
155 |             # env_dir = Path(os.path.expanduser(env_os_var))
156 |             # if not env_dir.exists():
157 |             #     raise UserError(f"Environment directory passed as SEQUOR_ENV environment variable does not exist: {env_os_var}")
158 |         # elif default_env_dir.exists():
159 |         #     env_dir = default_env_dir
160 |         # else:
161 |         #     raise UserError(f"Environment directory not found. Please specify it using --env-dir argument, SEQUOR_ENV environment variable, or in project sequor_env.ymal.")
162 | 
163 |         # Initialize an environment
164 |         if env_name is not None:
165 |             env = Environment(env_name, instance.get_home_dir())
166 |             env.load()
167 |         else:
168 |             env = Environment.create_empty(instance.get_home_dir())
169 | 
170 |         # # Register all operations at program startup
171 |         # register_all_operations()
172 | 
173 |         # Initialize a project
174 |         project = Project(project_dir, instance.get_home_dir())
175 | 
176 |         op_options = {
177 |             "debug_foreach_record": debug_foreach_record,
178 |             "debug_request_preview_trace": debug_request_preview_trace,
179 |             "debug_request_preview_pretty": debug_request_preview_pretty,
180 |             "debug_response_parser_preview": debug_response_parser_preview
181 |         }
182 | 
183 |         if op_id is not None:
184 |             # execute a single op in the flow
185 |             flow = project.get_flow(flow_name)
186 |             op = flow.get_op_by_id(op_id)
187 |         else:
188 |             # execute the whole flow
189 |             run_flow_op_def = {
190 |                 "op": "run_flow",
191 |                 "flow": flow_name,
192 |                 "start_step": 0,
193 |                 "parameters": {}
194 |             }
195 |             op = RunFlowOp(project, run_flow_op_def)
196 |         job = Job(env, project, op, {"disable_flow_stacktrace": disable_flow_stacktrace, "show_stacktrace": show_stacktrace})
197 |         job.run(op_options)
198 |     except Exception as e:
199 |         if show_stacktrace:
200 |             job_stacktrace = Common.get_exception_traceback()
201 |             logger.error("Python stacktrace:\n" + job_stacktrace)
202 |         logger.error(str(e))
203 |         raise typer.Exit(code=1)
204 | 
205 | def main():
206 |     app()
207 | 
208 | if __name__ == "__main__":
209 |     # sys.argv = ["cli.py", "version"]
210 | 
211 |     # sys.argv = ["cli.py", "--help"]
212 |     # sys.argv = ["cli.py", "env", "init", "dev", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
213 |     # sys.argv = ["cli.py", "init", "~/myprogs/sequor-misc123", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
214 | 
215 | 
216 |     # sequor-integrations tests
217 |     # sys.argv = ["cli.py", "run", "0_run_tests", "--stacktrace", "--project-dir", "/Users/maximgrinev/myprogs/sequor-integrations", "--env", "dev", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
218 | 
219 | 
220 |     # Utility
221 |     sys.argv = ["cli.py", "run", "github_repo_health", "--stacktrace", "--project-dir", "/Users/maximgrinev/myprogs/sequor-integrations", "--env", "dev", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
222 |     # sys.argv = ["cli.py", "run", "bigcommerce_fetch_customers_variations", "--op-id", "get_customers_without_pagenation", "--stacktrace", "--project-dir", "/Users/maximgrinev/myprogs/sequor-integrations", "--env", "dev", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
223 |     # sys.argv = ["cli.py", "run", "bigcommerce_fetch_customers_variations", "--op-id", "get_customers_with_response_expression", "--debug-httprequest-preview-trace", "--stacktrace", "--project-dir", "/Users/maximgrinev/myprogs/sequor-integrations", "--env", "dev", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
224 |     # sys.argv = ["cli.py", "run", "salesforce_create_accounts", "--op-id", "post_accounts", "--debug-httprequest-foreach-test-record", '{"id":"1", "Name": "Bob Smith"}', "--debug-httprequest-preview-trace", "--stacktrace", "--project-dir", "/Users/maximgrinev/myprogs/sequor-integrations", "--env", "dev", "--home-dir", "/Users/maximgrinev/.sequor-dev"]
225 | 
226 | 
227 | 
228 |  
229 |     app()


--------------------------------------------------------------------------------
/src/sequor/common/executor_utils.py:
--------------------------------------------------------------------------------
  1 | import ast
  2 | import builtins
  3 | import logging
  4 | from typing import Any, Callable, List, NamedTuple
  5 | from sequor.core.context import Context
  6 | from jinja2 import Template, StrictUndefined
  7 | 
  8 | from sequor.core.execution_stack_entry import ExecutionStackEntry
  9 | from sequor.core.user_error import UserError
 10 | from sequor.source.table_address import TableAddress
 11 | 
 12 | # if you add anything here, you must also add it to Op.eval_parameter: context, ... standard functions ..., ... extra_params ...
 13 | user_function_params_def = ["context", "is_var_defined", "var", "table", "query", "query_scalar", "response"]
 14 | 
 15 | 
 16 | 
 17 | def set_variable_from_def(context: Context, name: str, value_def: Any):
 18 |     if isinstance(value_def, dict):
 19 |         if not ("value" in value_def):
 20 |             raise UserError(f"Setting variable \"{name}\" with a dict that has no \"value\" key: {str(value_def)}")
 21 |         var_value = value_def["value"]
 22 |         if not ("scope" in value_def):
 23 |             raise UserError(f"Setting variable \"{name}\" with a dict that has no \"scope\" key: {str(value_def)}")
 24 |         var_scope = value_def["scope"]
 25 |         if var_scope not in ["local", "project"]:
 26 |             raise UserError(f"Setting variable \"{name}\" with invalid scope: {var_scope}")
 27 |     else:
 28 |         var_value = value_def
 29 |         var_scope = "project"
 30 |     set_variable(context, name, var_value, var_scope)
 31 |     return var_value, var_scope
 32 | 
 33 | def set_variable(context: Context, name: str, value: Any, scope: str):
 34 |     if scope == "local":
 35 |         context.set_variable(name, value)
 36 |     elif scope == "project":
 37 |         context.project.set_variable(name, value)
 38 |     else:
 39 |         raise UserError(f"Setting variable \"{name}\" in invalid scope: {scope}")
 40 | 
 41 | class UserSourcesAPI:
 42 |     def __init__(self, context, user_context):
 43 |         self.context = context
 44 |         self.user_context = user_context
 45 |     
 46 |     def query(self, source_name: str, query: str, database_name: str = None, namespace_name: str = None):
 47 |         source = self.context.project.get_source(self.context, source_name)
 48 |         result = []
 49 |         with source.connect() as conn:
 50 |             conn.open_query(query)
 51 |             while row := conn.next_row():
 52 |                 result.append(row)
 53 |         return result
 54 |     
 55 | class UserContext:
 56 |     def __init__(self, context: Context):
 57 |         self.context = context
 58 |         self.sources = UserSourcesAPI(self.context, self)
 59 | 
 60 |     def is_var_defined(self, name):
 61 |         value = self.context.get_variable_value(name)
 62 |         if value is None:
 63 |             return False
 64 |         else:
 65 |             return True
 66 |     
 67 |     def var(self, name):
 68 |         value = self.context.get_variable_value(name)
 69 |         if value is None:
 70 |             raise UserError(f"Variable '{name}' is not defined")
 71 |         return value
 72 |     
 73 |     def table(self, source_name: str, table_name: str, database_name: str = None, spacename_name: str = None):
 74 |         source = self.context.project.get_source(self.context, source_name)
 75 |         table_addr = TableAddress(source_name, database_name, spacename_name, table_name)
 76 |         result = []
 77 |         with source.connect() as conn:
 78 |             conn.open_table_for_read(table_addr)
 79 |             row_count = 0
 80 |             row = conn.next_row()
 81 |             while row is not None:
 82 |                 row_count += 1
 83 |                 result.append(row)
 84 |                 row = conn.next_row()
 85 |         return result
 86 |     
 87 |     def query(self, source_name: str, query: str):
 88 |         source = self.context.project.get_source(self.context, source_name)
 89 |         result = []
 90 |         with source.connect() as conn:
 91 |             conn.open_query(query)
 92 |             row_count = 0
 93 |             row = conn.next_row()
 94 |             while row is not None:
 95 |                 row_count += 1
 96 |                 result.append(row)
 97 |                 row = conn.next_row()
 98 |         return result
 99 |     
100 |     def query_scalar(self, source_name: str, query: str):
101 |         source = self.context.project.get_source(self.context, source_name)
102 |         res_value = None
103 |         with source.connect() as conn:
104 |             conn.open_query(query)
105 |             row = conn.next_row()
106 |             if row is not None:
107 |                 res_value = row.columns[0].value
108 |                 row = conn.next_row()
109 |                 if row is not None:
110 |                     UserError(f"query_value error: query returned multiple rows: {query}")
111 |             else:
112 |                 UserError(f"query_value error: query returned no rows: {query}")
113 |         return res_value
114 |     
115 | 
116 | 
117 | def build_jinja_user_context(context: Context):
118 |     def var(name):
119 |         value = context.get_variable_value(name)
120 |         if value is None:
121 |             raise UserError(f"Variable '{name}' is not defined")
122 |         return value
123 |     return {
124 |         "var": var
125 |     }
126 | 
127 | # Recursively render all values in parsed YAML
128 | def render_jinja(context, any_def, null_literal: bool = False):
129 |     jinja_context = build_jinja_user_context(context)
130 |     try:
131 |         any_def_rendered = _render_jinja_helper(any_def, jinja_context, null_literal)
132 |     except Exception as e:
133 |         raise UserError(f"Error rendering Jinja template \"{str(e)}\" in definition: {str(any_def)}")
134 |     return any_def_rendered
135 | 
136 | # Utility function to render a string with Jinja
137 | def _render_jinja_str(template_str, jinja_context, null_literal: bool):
138 |     str_rendered = Template(template_str, undefined=StrictUndefined).render(jinja_context)
139 |     if null_literal and str_rendered == "__NULL__": # compare case sensitive to align with YAML which is case sensitive
140 |         str_rendered = None
141 |     return str_rendered
142 | 
143 | 
144 | 
145 | # def _render_jinja_helper(any_def, jinja_context):
146 | #     if isinstance(any_def, str):
147 | #         return render_jinja_str(any_def, jinja_context)
148 | #     elif isinstance(any_def, dict):
149 | #         return {k: _render_jinja_helper(v, jinja_context) for k, v in any_def.items()}
150 | #     elif isinstance(any_def, list):
151 | #         return [_render_jinja_helper(v, jinja_context) for v in any_def]
152 | #     else:
153 | #         return any_def  # raw number, boolean, None, etc.
154 | def _render_jinja_helper(any_def, jinja_context, null_literal: bool):
155 |     from ruamel.yaml.comments import CommentedMap, CommentedSeq
156 |     import copy
157 |     
158 |     if isinstance(any_def, str):
159 |         return _render_jinja_str(any_def, jinja_context, null_literal)
160 |     elif isinstance(any_def, (CommentedMap, dict)):
161 |         # For CommentedMap, create a shallow copy to preserve metadata
162 |         if isinstance(any_def, CommentedMap):
163 |             result = copy.copy(any_def)
164 |             # Clear contents but keep metadata
165 |             result.clear()
166 |             # Fill with rendered values
167 |             for k, v in any_def.items():
168 |                 result[k] = _render_jinja_helper(v, jinja_context, null_literal)
169 |             return result
170 |         else:
171 |             return {k: _render_jinja_helper(v, jinja_context, null_literal) for k, v in any_def.items()}
172 |     elif isinstance(any_def, (CommentedSeq, list)):
173 |         # For CommentedSeq, create a shallow copy to preserve metadata
174 |         if isinstance(any_def, CommentedSeq):
175 |             result = copy.copy(any_def)
176 |             # Clear contents but keep metadata
177 |             result.clear()
178 |             # Fill with rendered values
179 |             for v in any_def:
180 |                 result.append(_render_jinja_helper(v, jinja_context, null_literal))
181 |             return result
182 |         else:
183 |             return [_render_jinja_helper(v, jinja_context, null_literal) for v in any_def]
184 |     else:
185 |         return any_def  # raw number, boolean, None, etc.
186 | 
187 | def get_restricted_builtins():
188 |     # Whitelist only safe functions:
189 |     safe_builtin_names = [
190 |         'len', 'range', 'str', 'int', 'float', 'bool', 'dict', 'list', 'sum', 'min', 'max', 'abs', 'enumerate', 'zip', 'sorted', 'any', 'all'
191 |     ]
192 |     return {name: getattr(builtins, name) for name in safe_builtin_names}
193 | 
194 | def user_function_error_message(e: Exception, key_name: str, line_in_code: int, line_in_yaml: int, auto_wrapped: bool, prefix: str = "Error"):
195 |     if auto_wrapped:
196 |         line_in_code = line_in_code - 1
197 |     position_in_code = f"line {line_in_code} of code, " if line_in_code else ""
198 |     absolute_line_in_yaml = line_in_yaml + line_in_code if line_in_code else line_in_yaml
199 |     error_msg = f"{prefix} in {key_name} ({position_in_code}line {absolute_line_in_yaml} in YAML): {type(e).__name__}: {str(e)}"
200 |     return error_msg
201 | 
202 | def load_user_function(function_code: str, key_name: str, line_in_yaml: int): # function_params_def: str = "context", 
203 |     # must match parameters passed in Op.eval_parameter of op.py
204 |     function_name: str = "evaluate"
205 |     # auto_wrapped = False
206 |     if function_code.strip().startswith("def evaluate"):
207 |         raise UserError(f"Sequor does not require to define 'evaluate' function anymore. Please remove the 'def evaluate(...):' line from the function code of YAML property '{key_name}'")
208 |     auto_wrapped = True
209 | 
210 |     try:
211 |         # Test if it's a valid single expression
212 |         ast.parse(function_code.strip(), mode='eval')
213 |         function_body = f"return ({function_code})"
214 |     except SyntaxError:
215 |         # Not a single expression, check if it's valid multi-line code
216 |         try:
217 |             ast.parse(function_code.strip(), mode='exec')
218 |             function_body = function_code
219 |         except SyntaxError as e:
220 |             function_body = function_code
221 |         
222 |     # Properly indent the function body with 4 spaces
223 |     indented_body = '\n'.join('    ' + line for line in function_body.splitlines())
224 |     user_function_params_def_str = ", ".join(user_function_params_def)
225 |     function_code = f"def evaluate({user_function_params_def_str}):\n{indented_body}"
226 | 
227 |     # Compile the code first to catch syntax errors
228 |     try:
229 |         compiled_code = compile(function_code, filename=key_name, mode="exec")
230 |     except SyntaxError as e:
231 |         line_in_code = e.lineno
232 |         pure_msg = e.msg
233 |         raise UserError(user_function_error_message(SyntaxError(pure_msg), key_name, line_in_code, line_in_yaml, auto_wrapped, "Syntax error"))
234 |        
235 | 
236 |     # Setup sandboxed globals
237 |     safe_globals = {
238 |         "__builtins__": get_restricted_builtins()
239 |     }
240 | 
241 |     local_namespace = {}
242 | 
243 |     # Execute in restricted environment
244 |     try:
245 |         # exec(compiled_code, safe_globals, local_namespace)
246 |         exec(compiled_code, None, local_namespace)
247 |     except Exception as e:
248 |         # Get the traceback information
249 |         import traceback
250 |         tb = traceback.extract_tb(e.__traceback__)
251 |         if tb:
252 |             # Get the last frame from the traceback
253 |             last_frame = tb[-1]
254 |             # Format the error with file and line information
255 |             error_msg = f"Error in {key_name} at line {last_frame.lineno}: {str(e)}"
256 |             # Create a new exception with the same type but our custom message
257 |             new_exc = type(e)(error_msg)
258 |             # Preserve the original traceback
259 |             new_exc.__traceback__ = e.__traceback__
260 |             raise new_exc
261 |         # raise RuntimeError(f"Error executing user code in {key_name}: {e}")
262 |         raise UserError(user_function_error_message(e, key_name, None, line_in_yaml, auto_wrapped, "Error"))
263 | 
264 |     # Retrieve the function object
265 |     user_function = local_namespace.get(function_name)
266 |     # if not user_function:
267 |     #     fun_not_defined_error = UserError(f"Function {function_name} not found in user code")
268 |     #     raise UserError(user_function_error_message(fun_not_defined_error, key_name, None, line_in_yaml, auto_wrapped, "Error"))
269 | 
270 |     # Attach the source code and filename to the function for better error reporting
271 |     user_function.__source_code__ = function_code
272 |     user_function.__filename__ = key_name
273 |     user_function.__auto_wrapped__ = auto_wrapped
274 | 
275 |     return user_function
276 | 
277 | 
278 | class UserFunction:
279 |     def __init__(self, fun: Callable, line_in_yaml: int):
280 |         self.fun: Callable = fun
281 |         self.line_in_yaml: int = line_in_yaml # line number of the function definition in the yaml file
282 | 
283 |     def apply(self, *args, **kwargs):
284 |         try:
285 |             result = self.fun(*args, **kwargs)
286 |         except Exception as e:
287 |             # Get the traceback information
288 |             import traceback
289 |             tb_info = traceback.extract_tb(e.__traceback__)
290 |             
291 |             # Find the frame in the user's code (it will have the filename we attached)
292 |             user_frame = None
293 |             for frame in tb_info:
294 |                 if hasattr(self.fun, '__filename__') and frame.filename == self.fun.__filename__:
295 |                     user_frame = frame
296 |                     break
297 |             
298 |             if user_frame:
299 |                 # Format error with line information from the user's code
300 |                 # error_msg = f"Error in {self.fun.__filename__} (line {user_frame.lineno} of code, line {self.line_in_yaml} in YAML): {type(e).__name__}: {str(e)}"
301 |                 error_msg = user_function_error_message(e, self.fun.__filename__, user_frame.lineno, self.line_in_yaml, self.fun.__auto_wrapped__, "Error")
302 |                 new_exc = UserError(error_msg)
303 |                 # Preserve the original traceback
304 |                 new_exc.__traceback__ = e.__traceback__
305 |                 raise new_exc
306 |             else:
307 |                 # If we couldn't find the frame, just pass through the original error
308 |                 raise
309 |         return result
310 | 
311 | 


--------------------------------------------------------------------------------
/src/sequor/operations/http_request.py:
--------------------------------------------------------------------------------
  1 | from collections import OrderedDict
  2 | import json
  3 | import logging
  4 | from typing import Any, Dict, List, NamedTuple
  5 | 
  6 | import urllib.parse
  7 | from sequor.common.common import Common
  8 | from sequor.core.context import Context
  9 | from sequor.core.op import Op
 10 | import requests
 11 | from sequor.common.executor_utils import UserContext, UserFunction, load_user_function, render_jinja, set_variable_from_def
 12 | from sequor.common.data_loader import DataLoader
 13 | from sequor.core.user_error import UserError
 14 | from sequor.source.row import Row
 15 | from sequor.source.source import Source
 16 | from sequor.source.table_address import TableAddress
 17 | from requests.auth import HTTPBasicAuth, HTTPDigestAuth
 18 | from requests.auth import AuthBase
 19 | from requests_toolbelt.utils import dump
 20 | from authlib.integrations.requests_client import OAuth2Session
 21 | 
 22 | 
 23 | class HTTPRequestParameters:
 24 |     def __init__(self, auth_handler, oauth_session, url, method, parameters, headers, body_format, body, response_def): # success_status, target_table_addrs, parse_response_fun):
 25 |         self.auth_handler = auth_handler
 26 |         self.oauth_session = oauth_session
 27 |         self.url = url
 28 |         self.method = method
 29 |         self.parameters = parameters
 30 |         self.headers = headers
 31 |         self.body = body
 32 |         self.body_format = body_format
 33 |         self.response_def = response_def
 34 |         # self.success_status = success_status
 35 |         # self.target_table_addrs = target_table_addrs
 36 |         # self.parse_response_fun = parse_response_fun
 37 | 
 38 | class UserResponse:
 39 |     def __init__(self, response: requests.Response):
 40 |         self.response = response
 41 |         self.response_json_parsed = None
 42 | 
 43 |     def status_code(self):
 44 |         return self.response.status_code
 45 |     
 46 |     def json(self):
 47 |         if self.response_json_parsed is not None:
 48 |             return self.response_json_parsed
 49 |         else:
 50 |             self.response_json_parsed = self.response.json()
 51 |             return self.response_json_parsed
 52 |     
 53 |     def text(self):
 54 |         return self.response.text
 55 |     
 56 |     def headers(self):
 57 |         return self.response.headers
 58 |     
 59 |     
 60 | class APIKeyAuth(AuthBase):
 61 |     def __init__(self, key, value, add_to='header'):
 62 |         self.key = key
 63 |         self.value = value
 64 |         self.add_to = add_to
 65 | 
 66 |     def __call__(self, r):
 67 |         if self.add_to == 'header':
 68 |             r.headers[self.key] = self.value
 69 |         elif self.add_to == 'query':
 70 |             from urllib.parse import urlencode, urlparse, parse_qsl, urlunparse
 71 |             parsed = urlparse(r.url)
 72 |             query = dict(parse_qsl(parsed.query))
 73 |             query[self.key] = self.value
 74 |             r.url = urlunparse(parsed._replace(query=urlencode(query)))
 75 |         else:
 76 |             raise ValueError(f"Unsupported add_to location: {self.add_to}")
 77 |         return r
 78 | 
 79 | class BearerTokenAuth(AuthBase):
 80 |     def __init__(self, token):
 81 |         self.token = token
 82 | 
 83 |     def __call__(self, r):
 84 |         r.headers['Authorization'] = f'Bearer {self.token}'
 85 |         return r
 86 | 
 87 | class OAuth2PasswordFlowSession():
 88 |     def __init__(self, authlib_session: OAuth2Session, token_endpoint, client_id, client_secret, username, password, **kwargs):
 89 |         self.authlib_session = authlib_session
 90 |         self.token_endpoint = token_endpoint
 91 |         self.client_id = client_id
 92 |         self.client_secret = client_secret
 93 |         self.username = username
 94 |         self.password = password
 95 |         self.token = None
 96 |             
 97 |     def ensure_active_token(self):
 98 |         """Check if token exists and is valid, fetch or refresh as needed"""
 99 |         if self.token is None:
100 |             self.token = self.authlib_session.fetch_token(
101 |                 self.token_endpoint,
102 |                 grant_type='password',
103 |                 username=self.username,
104 |                 password=self.password,
105 |                 client_id=self.client_id,
106 |                 client_secret=self.client_secret
107 |             )
108 |         elif self.token.is_expired():
109 |             self.refresh_token(self.token_endpoint)
110 |         return self.token
111 | 
112 | # @Op.register('http_request')
113 | class HTTPRequestOp(Op):
114 |     def __init__(self, proj, op_def: Dict[str, Any]):
115 |         super().__init__(proj, op_def)
116 |     
117 |     def get_title(self) -> str:
118 |         request_def = self.op_def.get('request')
119 |         if request_def:
120 |             url = request_def.get('url')
121 |             if url:
122 |                 url_title = self.name + ": " + url
123 |             else:
124 |                 url_title = None
125 |         else:
126 |             url_title = None
127 |         op_title = self.op_def.get('title')
128 |         op_id = self.op_def.get('id')
129 |         if op_id:
130 |             title = self.name + ": " + op_id
131 |         elif op_title is not None:
132 |             title = self.name + ": " + op_title
133 |         elif url_title:
134 |             title = self.name + ": " + url_title
135 |         else:
136 |             title = self.name + ": unknown"
137 |         return title
138 | 
139 | 
140 | 
141 |     def _convert_yaml_to_python(self, obj):
142 |         """Recursively convert YAML objects (CommentedKeyMap, etc.) to Python types"""
143 |         if isinstance(obj, OrderedDict) or hasattr(obj, 'items'):
144 |             return dict({str(k): self._convert_yaml_to_python(v) for k, v in obj.items()})
145 |         elif isinstance(obj, (list, tuple)):
146 |             return [self._convert_yaml_to_python(v) for v in obj]
147 |         return obj
148 | 
149 |     def _make_request_helper(self, context: Context, http_params: HTTPRequestParameters, op_options: Dict[str, Any], logger: logging.Logger):
150 |         # Requests lib docs: https://requests.readthedocs.io/en/latest/
151 |         http_service = None
152 |         auth_handler = None
153 |         if http_params.oauth_session:
154 |             http_service = http_params.oauth_session.authlib_session
155 |             http_params.oauth_session.ensure_active_token()
156 |             auth_handler = None
157 |         else:
158 |             http_service = requests
159 |             auth_handler = http_params.auth_handler
160 | 
161 |         # Serialize body to body_format
162 |         body = Op.eval_parameter(context, http_params.body, "body", render=1, null_literal=True, location_desc="request")
163 |         request_body = None
164 |         if http_params.body_format == "json":
165 |             # body_dict = self._convert_yaml_to_python(body)
166 |             # body_test = {
167 |             #     "email_address": "test@test.com"
168 |             # }
169 |             request_body = json.dumps(self._convert_yaml_to_python(body))
170 |             # if "Content-Type" not in headers:
171 |             # #     headers["Content-Type"] = "application/json"
172 |         if http_params.body_format == "form_urlencoded":
173 |             body_dict = self._convert_yaml_to_python(body)
174 |             if not isinstance(body_dict, dict):
175 |                 raise UserError("Request body must be dictionary for form_urlencoded body format: " + str(body_dict))
176 |             request_body = urllib.parse.urlencode(body_dict)
177 |             # if "Content-Type" not in headers:
178 |             #     headers["Content-Type"] = "application/x-www-form-urlencoded"
179 |         elif http_params.body_format == "multipart_form_data":
180 |             raise UserError("multipart_form_data body format is not supported yet")
181 |             # # Handle files vs regular data
182 |             # request_files = {}
183 |             # request_data = {}
184 |             # # Separate file fields from regular data fields
185 |             # for key, value in body.items():
186 |             #     if isinstance(value, str) and value.startswith("file://"):
187 |             #         file_path = value[7:]  # Remove file:// prefix
188 |             #         request_files[key] = open(file_path, "rb")
189 |             #     else:
190 |             #         request_data[key] = value
191 |         elif http_params.body_format == "xml":
192 |             raise UserError("xml body format is not supported yet")
193 |             # Assume body is already XML string or convert dict to XML
194 |             # request_body = self._convert_yaml_to_python(body)
195 |             # if "Content-Type" not in headers:
196 |             #     headers["Content-Type"] = "application/xml"
197 |         elif http_params.body_format == "text":
198 |             raise UserError("text body format is not supported yet")
199 |             # request_body = self._convert_yaml_to_python(body)
200 |             # if "Content-Type" not in headers:
201 |             #     headers["Content-Type"] = "text/plain"
202 |         elif http_params.body_format == "binary":
203 |             raise UserError("binary body format is not supported yet")
204 |             # Assume body is either binary data or a path to a file
205 |             # if isinstance(body, str) and body.startswith("file://"):
206 |             #     with open(body[7:], "rb") as f:
207 |             #         request_body = f.read()
208 |             # else:
209 |             #     request_body = self._convert_yaml_to_python(body)
210 |             # if "Content-Type" not in headers:
211 |             #     headers["Content-Type"] = "application/octet-stream"
212 | 
213 |         parameters = Op.eval_parameter(context, http_params.parameters, "parameters", render=1, location_desc="request")
214 |         parameters = Op.eval_dict(context, parameters, "parameters", location_desc="request")
215 | 
216 |         response = http_service.request(
217 |             method = Op.eval_parameter(context, http_params.method, "method", render=1, location_desc="request"),  # or "POST", "PUT", "DELETE", etc.
218 |             url = Op.eval_parameter(context, http_params.url, "url", render=1, location_desc="request"),
219 |             params = parameters, # {"key": "value"},  # Query parameters
220 |             headers = Op.eval_parameter(context, http_params.headers, "headers", render=1, location_desc="request"), # {"Content-Type": "application/json"},
221 |             auth = auth_handler,
222 |             # json={"data": "payload"},  # JSON body
223 |             # data={"form": "data"},     # Form data
224 |             data = request_body
225 |             # timeout=10,
226 |             # verify=True,  # SSL verification
227 |         )
228 |         if op_options.get("debug_request_preview_trace"):
229 |             # http_log = dump.dump_all(response, request_prefix=b'>> ', response_prefix=b'<< ')
230 |             http_log = dump.dump_all(response, request_prefix=b'', response_prefix=b'')
231 |             http_log_st = http_log.decode("utf-8")
232 |             logger.info(f"HTTP request trace:\n----------------- TRACE START -----------------\n{http_log_st}\n----------------- TRACE END -----------------")
233 |         return response
234 | 
235 |     def _make_request(self, context, http_params: HTTPRequestParameters, op_options: Dict[str, Any], logger: logging.Logger):
236 |         while True:
237 |             response = self._make_request_helper(context, http_params, op_options, logger)
238 |             response_user = UserResponse(response)
239 |             response_def = Op.eval_parameter(context, http_params.response_def, "response", render=0, extra_params=[response_user]) 
240 |             # if callable(http_params.response_def):
241 |             #     response_def = http_params.response_def(UserContext(context), response)
242 |             # else:
243 |             #     response_def = http_params.response_def
244 |             
245 |             success_status = Op.get_parameter(context, response_def, 'success_status', is_required=False, render=3)
246 |             if success_status is not None and not isinstance(success_status, list):
247 |                 raise UserError(f"success_status must be a list of integers: {success_status}")            
248 |             if success_status is not None:
249 |                 if response.status_code not in success_status:
250 |                     raise UserError(f"HTTP request failed with unexpected status code: {response.status_code}. Expected status codes: {success_status}. Response body: {response.text}") 
251 |                 
252 |             target_source_name = Op.get_parameter(context, response_def, 'source', is_required=False, render=3)
253 |             target_database_name = Op.get_parameter(context, response_def, 'database', is_required=False, render=3)
254 |             target_namespace_name = Op.get_parameter(context, response_def, 'namespace', is_required=False, render=3)
255 |             target_table_name = Op.get_parameter(context, response_def, 'table', is_required=False, render=3)
256 |             target_tables_def = Op.get_parameter(context, response_def, 'tables', is_required=False, render=3)
257 |             target_table_addrs = None
258 |             if target_tables_def:
259 |                 target_table_addrs = []
260 |                 for table_def in target_tables_def:
261 |                     table_source_name = table_def.get('source')
262 |                     table_database_name = table_def.get('database')
263 |                     table_namespace_name = table_def.get('namespace')
264 |                     table_table_name = table_def.get('table')
265 |                     table_model_def = table_def.get('model')
266 |                     if table_model_def is None:
267 |                         table_columns_def = Op.get_parameter(context, table_def, 'columns', is_required=True, render=3)  # table_def.get('columns')
268 |                         if table_columns_def is not None:
269 |                             table_model_def = {"columns": Op.eval_parameter(context, table_columns_def, "columns",render=0, location_desc="tables.'{table_table_name}'")}
270 |                     data_def = Op.get_parameter(context, table_def, 'data', is_required=False, render=3) # function_params_def="context, response"
271 |                     data_def = Op.eval_parameter(context, data_def, "data", render=0, location_desc=f"tables.'{table_table_name}'", extra_params=[response_user])
272 |                     write_mode = table_def.get('write_mode')
273 |                     table_addr = TableAddress(table_source_name or target_source_name, table_database_name or target_database_name, table_namespace_name or target_namespace_name, 
274 |                                             table_table_name or target_table_name, table_model_def, data_def,write_mode)
275 |                     target_table_addrs.append(table_addr)
276 | 
277 |             parser = Op.get_parameter(context, response_def, 'parser', is_required=False, render=3)
278 |             # # Compile parser response function code
279 |             # parser = response_def.get('parser')
280 |             # # todo: do we have any use case to allow non-expression parser?
281 |             # if parser:
282 |             #     raise UserError("parser is not supported. Use parser_expression instead")
283 |             # parse_response_fun = None
284 |             # parse_response_expression = response_def.get('parser_expression')
285 |             # if parse_response_expression is not None:
286 |             #     parse_response_expression_line = Common.get_line_number(response_def, 'parser_expression')
287 |             #     parse_response_fun_compiled = load_user_function(parse_response_expression, "parser_expression", parse_response_expression_line) # , function_params_def="context, response"
288 |             #     parse_response_fun = UserFunction(parse_response_fun_compiled, parse_response_expression_line)
289 |            
290 | 
291 |             tables_to_load = []
292 |             if parser is not None:
293 |                 # parse response
294 |                 # response_parsed = parse_response_fun.apply(UserContext(context), response_user)
295 |                 response_parsed = Op.eval_parameter(context, parser, "parser", render=0, location_desc="response", extra_params=[response_user])
296 | 
297 |                 # preprocess target table definitions: 
298 |                 # target tables are created inside the loader get_connection() method, it means that they will not be created without the response parser
299 |                 tables_def = response_parsed.get('tables')
300 |                 if target_table_addrs is not None:
301 |                     if tables_def is None or not isinstance(tables_def, dict):
302 |                         raise UserError("Response parser must return data as a dictionary for 'tables' defined in the response.tables section of this http_request op: " + str(tables_def))
303 |                     # if not isinstance(tables_def, dict):
304 |                     #     raise UserError("Response parser must return data for 'tables' as a dictionary because tables are defined in the response.tables section of this http_request op: " + str(tables_def))
305 |                     # Check that all required tables have data
306 |                     for table_addr in target_table_addrs:
307 |                         if table_addr.table_name not in tables_def:
308 |                             raise UserError(f"Data for the target table {table_addr.table_name} not found in the result returned by the HTTP response parser.")
309 |                     # Check that no extra tables were returned
310 |                     for table_name in tables_def:
311 |                         if not any(table_addr.table_name == table_name for table_addr in target_table_addrs):
312 |                             raise UserError(f"Unexpected table '{table_name}' found in the result returned by the HTTP response parser. This table was not defined in the response.tables section of this http_request op.")
313 |                     # tables to load
314 |                     for table_addr in target_table_addrs:
315 |                         table_addr_clone = table_addr.clone()
316 |                         table_addr_clone.data = tables_def.get(table_addr.table_name)
317 |                         tables_to_load.append(table_addr_clone)
318 |                 else:
319 |                     # response parser can still return tables as array
320 |                     if tables_def is not None:
321 |                         if not isinstance(tables_def, list):
322 |                             raise UserError("Response parser must return 'tables' as array if no tables are defined in the response.tables section of this http_request op: " + str(tables_def))
323 |                         for table_def in tables_def:
324 |                             table_model_def = table_def.get('model')
325 |                             if table_model_def is None:
326 |                                 table_columns_def = table_def.get('columns')
327 |                                 if table_columns_def is not None:
328 |                                     table_model_def = {"columns": table_columns_def}
329 |                             table_addr_from_def = TableAddress(table_def.get('source'), table_def.get('database'), table_def.get('namespace'), table_def.get('table'),
330 |                                                                table_model_def, table_def.get('data'), table_def.get('write_mode'))
331 |                             tables_to_load.append(table_addr_from_def)
332 |                 
333 |                 if response_parsed.get('variables') is not None:
334 |                     response_def["variables"] = response_parsed.get('variables')
335 |                 if response_parsed.get('while') is not None:
336 |                     response_def["while"] = response_parsed.get('while')
337 |             elif target_table_addrs is not None:
338 |                 tables_to_load = target_table_addrs
339 | 
340 |             # load tables    
341 |             self.data_loader.run(context, tables_to_load)
342 | 
343 |             # set returned variables
344 |             variables_def = Op.get_parameter(context, response_def, 'variables', is_required=False, render=3, location_desc="response") # , function_params_def="context, response"
345 |             variables_def = Op.eval_parameter(context, variables_def, "variables", render=0, location_desc="response", extra_params=[response_user])
346 |             if variables_def is None:
347 |                 variables_def = {}
348 |             variables_def = Op.eval_dict(context, variables_def, "variables", location_desc="response", extra_params=[response_user])
349 |             for name, value_def in variables_def.items():
350 |                 # if name.endswith("_expression"):
351 |                 #     name_real = name[:-11]  # Remove "_expression" suffix
352 |                 #     value_def = Op.get_parameter(context, variables_def, name_real, is_required=False, render=3)
353 |                 #     value_def = Op.eval_parameter(context, value_def, render=0, extra_params=[response_user])
354 |                 # else:
355 |                 #     real_name = name
356 |                 if isinstance(value_def, dict):
357 |                     value_def = Op.eval_dict(context, value_def, "values", location_desc="response.variables", extra_params=[response_user])
358 |                 set_variable_from_def(context, name, value_def)
359 | 
360 |             while_def = Op.get_parameter(context, response_def, 'while', is_required=False, render=3, location_desc="response") # , function_params_def="context, response"
361 |             while_def = Op.eval_parameter(context, while_def, "while", render=0, location_desc="response", extra_params=[response_user])
362 |             if while_def is None:
363 |                 while_def = False
364 |             if not isinstance(while_def, bool):
365 |                 raise UserError("\"while\" in the result of the response section must be a boolean: " + str(while_def))
366 |             
367 |             if not while_def:
368 |                 break
369 | 
370 | 
371 | 
372 |     def run(self, context, op_options: Dict[str, Any]):
373 |         logger = logging.getLogger("sequor.ops.http_request")
374 |         logger.info(f"Starting \"" + self.get_title() + "\"")
375 | 
376 |         # clone context because we extend it with source variables and for_each variable that we don't want to be passed to the next op
377 |         parent_context = context
378 |         context = context.clone()
379 | 
380 |         # extend context with source variables
381 |         request_def = Op.get_parameter(context, self.op_def, 'request', is_required=True)
382 |         http_source_name = Op.get_parameter(context, request_def, 'source', is_required=False) # at this point context is equal to parent_context which is what we want
383 |         if http_source_name:
384 |             http_source = self.proj.get_source(context, http_source_name)
385 |             variables_def = http_source.source_def.get("variables");
386 |             if variables_def:
387 |                 for var_name, var_value in variables_def.items():
388 |                     var_value = render_jinja(context, var_value) # at this point context is equal to parent_context which is what we want
389 |                     context.set_variable(var_name, var_value)
390 |         
391 |         # render http source def in the context extended with source variable 
392 |         # because source properties can contain references to the variables
393 |         if http_source_name:
394 |             http_source_def = http_source.get_rendered_def()
395 |         # self.op_def = render_jinja(context, self.op_def)
396 | 
397 |         # Extract init def
398 |         init_def = Op.get_parameter(context, self.op_def, 'init', is_required=False, render=3)
399 |         if init_def is not None:
400 |             init_def = Op.eval_parameter(context, init_def, "init", render=0, null_literal=False)  # render=0 because we did render=3 above
401 |             for name, value_def in init_def.get('variables', {}).items():
402 |                 set_variable_from_def(context, name, value_def)
403 | 
404 | 
405 |         # Extract for_each def (rendered)
406 |         foreach_def = self.op_def.get('for_each')
407 |         if foreach_def:
408 |             location_desc="for_each"
409 |             # do not render the rest of foreach_def here as it can cause variable unresolved error in case of using --debug_ parameters
410 |             def parse_foreach_def():
411 |                 nonlocal location_desc
412 |                 foreach_source_name = Op.get_parameter(context, foreach_def, 'source', is_required=True, render=3, location_desc=location_desc)
413 |                 foreach_database_name = Op.get_parameter(context, foreach_def, 'database', is_required=False, render=3)
414 |                 foreach_namespace_name = Op.get_parameter(context, foreach_def, 'namespace', is_required=False, render=3)
415 |                 foreach_table_name = Op.get_parameter(context, foreach_def, 'table', is_required=True, render=3, location_desc=location_desc)
416 |                 foreach_table_addr = TableAddress(foreach_source_name,foreach_database_name, foreach_namespace_name, foreach_table_name)
417 |                 return foreach_table_addr
418 |             foreach_var_name = Op.get_parameter(context, foreach_def, 'as', is_required=True, render=3, location_desc=location_desc)
419 |         else:
420 |             foreach_table_addr = None
421 | 
422 |         # Extract request def (render only _expression parameters - non-expression parameters will be rendered on each iteration)
423 |         # request_def = Op.get_parameter(context, self.op_def, 'request', is_required=True) # get request_def again as we need it to be rendered in the context extended with source variables
424 |         url = Op.get_parameter(context, request_def, 'url', is_required=True, render=2)
425 |         method = Op.get_parameter(context, request_def, 'method', is_required=True, render=2)
426 |         parameters = Op.get_parameter(context, request_def, 'parameters', is_required=False, render=2) 
427 |         headers = Op.get_parameter(context, request_def, 'headers', is_required=False, render=2)
428 |         body_format = Op.get_parameter(context, request_def, 'body_format', is_required=False, render=2)
429 |         body = Op.get_parameter(context, request_def, 'body', is_required=False, render=2)
430 |         if body is not None and body_format is None:
431 |             raise UserError("body_format is required when request body is provided (e.g. \"json\", \"form_urlencoded\", etc)")
432 |         
433 |         # Extract response def
434 |         # todo: do we have any use case when we need to render=2. In this case ninja can be used to dynamically set targer_table_addr -> danger: DataLoader will open too many connections!
435 |         response_def = Op.get_parameter(context, self.op_def, 'response', is_required=False, render=3)  # self.op_def.get('response', {}) , function_params_def="context, response"
436 |         if response_def == None:
437 |             response_def = {}
438 |         
439 |         auth_handler = None
440 |         oauth_session = None
441 |         if http_source_name:
442 |             http_source_auth_def = Source.get_parameter(context, http_source_def, 'auth')
443 |             http_source_auth_type = Source.get_parameter(context, http_source_auth_def, 'type', is_required=True)
444 |             if http_source_auth_type == 'basic_auth':
445 |                 http_source_auth_username = Source.get_parameter(context, http_source_auth_def, 'username')
446 |                 http_source_auth_password = Source.get_parameter(context, http_source_auth_def, 'password')
447 |                 auth_handler = HTTPBasicAuth(http_source_auth_username, http_source_auth_password)
448 |             elif http_source_auth_type == 'bearer_token':
449 |                 http_source_auth_token = Source.get_parameter(context, http_source_auth_def, 'token')
450 |                 auth_handler = BearerTokenAuth(http_source_auth_token)
451 |             elif http_source_auth_type == 'digest_auth':
452 |                 http_source_auth_username = Source.get_parameter(context, http_source_auth_def, 'username')
453 |                 http_source_auth_password = Source.get_parameter(context, http_source_auth_def, 'password')
454 |                 auth_handler = HTTPDigestAuth(http_source_auth_username, http_source_auth_password)
455 |             elif http_source_auth_type == 'api_key':
456 |                 http_source_auth_key_name = Source.get_parameter(context, http_source_auth_def, 'key_name')
457 |                 http_source_auth_key_value = Source.get_parameter(context, http_source_auth_def, 'key_value')
458 |                 http_source_auth_add_to = Source.get_parameter(context, http_source_auth_def, 'add_to')
459 |                 auth_handler = APIKeyAuth(http_source_auth_key_name, http_source_auth_key_value, http_source_auth_add_to)
460 |             elif http_source_auth_type == 'oauth1':
461 |                 raise UserError("oauth1 auth is not supported yet")
462 |             elif http_source_auth_type == 'oauth2':
463 |                 http_source_auth_grant_type = Source.get_parameter(context, http_source_auth_def, 'grant_type')
464 |                 if http_source_auth_grant_type == 'password': # 'client_credentials':
465 |                     http_source_auth_token_endpoint = Source.get_parameter(context, http_source_auth_def, 'token_endpoint')
466 |                     http_source_auth_client_id = Source.get_parameter(context, http_source_auth_def, 'client_id')
467 |                     http_source_auth_client_secret = Source.get_parameter(context, http_source_auth_def, 'client_secret')
468 |                     http_source_auth_username = Source.get_parameter(context, http_source_auth_def, 'username')
469 |                     http_source_auth_password = Source.get_parameter(context, http_source_auth_def, 'password')
470 |                     authlib_session = OAuth2Session(http_source_auth_client_id, http_source_auth_client_secret)
471 |                     oauth_session = OAuth2PasswordFlowSession(authlib_session, http_source_auth_token_endpoint, http_source_auth_client_id, http_source_auth_client_secret, http_source_auth_username, http_source_auth_password)
472 |             else:
473 |                 raise UserError(f"Unsupported auth type: {http_source_auth_type}")
474 |         
475 |         http_req_params = HTTPRequestParameters(auth_handler, oauth_session, url, method, parameters, headers, body_format, body, response_def) # success_status, target_table_addrs, parse_response_fun)
476 | 
477 | 
478 |         if op_options.get("debug_foreach_record") or op_options.get("debug_request_preview_trace") or op_options.get("debug_request_preview_pretty"):
479 |             foreach_row_json = op_options.get("debug_foreach_record")
480 |             if foreach_row_json is not None:
481 |                 try:
482 |                     foreach_row_dict = json.loads(foreach_row_json)
483 |                 except json.JSONDecodeError as e:
484 |                     raise UserError(f"Cannot parse --debug_foreach_record as JSON:" + str(e))
485 |                 logger.info("Running in debug_foreach_record mode")
486 |                 foreach_row = Row.from_dict(foreach_row_dict)
487 |                 context.set_variable(foreach_var_name, foreach_row)
488 |             if op_options.get("debug_request_preview_trace") or op_options.get("debug_request_preview_pretty"):
489 |                 logger.info("Running in debug_request_preview_trace mode")
490 |                 self._make_request_helper(context, http_req_params, op_options, logger)
491 |             else:
492 |                 self._make_request(context, http_req_params, op_options, logger)
493 |         else:
494 |             self.data_loader = DataLoader(self.proj)
495 |             try:
496 |                 if foreach_def is None:
497 |                     self._make_request(context, http_req_params, op_options, logger)
498 |                 else:
499 |                     foreach_table_addr = parse_foreach_def()
500 |                     foreach_source = self.proj.get_source(context,foreach_table_addr.source_name)
501 |                     with foreach_source.connect() as conn:
502 |                         conn.open_table_for_read(foreach_table_addr)
503 |                         foreach_row_count = 0
504 |                         foreach_row = conn.next_row()
505 |                         while foreach_row is not None:
506 |                             foreach_row_count += 1
507 |                             context.set_variable(foreach_var_name, foreach_row)
508 |                             self._make_request(context, http_req_params, op_options, logger)
509 |                             foreach_row = conn.next_row()
510 |             finally:
511 |                 self.data_loader.close()
512 | 
513 |         # logger.info(f"Finished \"" + self.get_title() + "\"")
514 |         context.add_to_log_op_finished(logger, f"Finished \"" + self.get_title() + "\"")
515 | 


--------------------------------------------------------------------------------