├── tests
    ├── __init__.py
    ├── helpers
    │   ├── __init__.py
    │   ├── test_smoke.py
    │   ├── test_config.py
    │   ├── test_utils.py
    │   ├── test_config_types.py
    │   ├── test_model.py
    │   ├── test_dataset_profiles.py
    │   ├── test_entity_schema.py
    │   └── test_monitor_helpers.py
    ├── monitor
    │   ├── __init__.py
    │   ├── manager
    │   │   ├── __init__.py
    │   │   ├── test_credentials.py
    │   │   ├── test_manager.py
    │   │   └── test_monitor_setup.py
    │   └── diagnoser
    │   │   ├── __init__.py
    │   │   ├── converters
    │   │       ├── __init__.py
    │   │       └── test_granularity.py
    │   │   ├── recommendation
    │   │       ├── __init__.py
    │   │       ├── test_changes.py
    │   │       └── test_remove_columns.py
    │   │   └── test_helpers.py
    └── conftest.py
├── .python-version
├── whylabs_toolkit
    ├── __init__.py
    ├── utils
    │   ├── __init__.py
    │   └── granularity.py
    ├── helpers
    │   ├── __init__.py
    │   ├── client.py
    │   ├── utils.py
    │   ├── cron_validators.py
    │   ├── models.py
    │   ├── dataset_profiles.py
    │   ├── README.md
    │   ├── config.py
    │   ├── schema.py
    │   └── monitor_helpers.py
    ├── monitor
    │   ├── diagnoser
    │   │   ├── __init__.py
    │   │   ├── helpers
    │   │   │   ├── __init__.py
    │   │   │   ├── describe.py
    │   │   │   └── utils.py
    │   │   ├── converters
    │   │   │   ├── __init__.py
    │   │   │   └── granularity.py
    │   │   ├── recommendation
    │   │   │   ├── __init__.py
    │   │   │   ├── manual_change.py
    │   │   │   ├── remove_columns.py
    │   │   │   ├── recommended_change.py
    │   │   │   └── change_recommender.py
    │   │   ├── models
    │   │   │   ├── __init__.py
    │   │   │   ├── noisy_monitors.py
    │   │   │   └── diagnosis_report.py
    │   │   ├── constants.py
    │   │   ├── targeting.py
    │   │   └── README.md
    │   ├── __init__.py
    │   ├── manager
    │   │   ├── __init__.py
    │   │   ├── credentials.py
    │   │   ├── manager.py
    │   │   └── README.md
    │   └── models
    │   │   ├── segments.py
    │   │   ├── utils.py
    │   │   ├── analyzer
    │   │       ├── __init__.py
    │   │       ├── targets.py
    │   │       ├── baseline.py
    │   │       └── analyzer.py
    │   │   ├── __init__.py
    │   │   ├── document.py
    │   │   ├── commons.py
    │   │   ├── column_schema.py
    │   │   └── monitor.py
    └── container
    │   └── config_types.py
├── .gitignore
├── mypy.ini
├── .bumpversion.cfg
├── .github
    └── workflows
    │   ├── release.yaml
    │   └── publish.yaml
├── pyproject.toml
├── Makefile
├── README.md
├── examples
    ├── presets.md
    └── example_notebooks
    │   ├── Custom LLM Metrics.ipynb
    │   └── Metrics API.ipynb
└── LICENSE


/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/.python-version:
--------------------------------------------------------------------------------
1 | 3.8
2 | 


--------------------------------------------------------------------------------
/tests/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/monitor/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/monitor/manager/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/utils/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/converters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/recommendation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/helpers/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/converters/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/recommendation/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .noisy_monitors import *
2 | from .diagnosis_report import *
3 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/__init__.py:
--------------------------------------------------------------------------------
1 | from .manager import MonitorSetup, MonitorManager
2 | 
3 | 
4 | ALL = [
5 |     MonitorManager,
6 |     MonitorSetup,
7 | ]
8 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/constants.py:
--------------------------------------------------------------------------------
1 | MAX_COLUMNS = 100
2 | DEFAULT_BATCHES = 30
3 | MAX_PROFILES = 10000
4 | assert DEFAULT_BATCHES * MAX_COLUMNS <= MAX_PROFILES
5 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/manager/__init__.py:
--------------------------------------------------------------------------------
1 | from .manager import MonitorManager
2 | from .credentials import MonitorCredentials
3 | from .monitor_setup import MonitorSetup
4 | 
5 | ALL = [MonitorManager, MonitorCredentials, MonitorSetup]
6 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/utils/granularity.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | 
 3 | 
 4 | class Granularity(str, Enum):
 5 |     """Supported granularity."""
 6 | 
 7 |     hourly = "hourly"
 8 |     daily = "daily"
 9 |     weekly = "weekly"
10 |     monthly = "monthly"
11 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | 
 2 | # Can generate requirements.txt from the latest poetry package lock with `make requirements.txt`
 3 | *requirements.txt
 4 | 
 5 | **/__pycache__/**
 6 | .vscode/
 7 | .ipynb_checkpoints
 8 | .venv/
 9 | .env
10 | .idea
11 | **/.mypy_cache
12 | **/.pytest_cache
13 | dist
14 | 


--------------------------------------------------------------------------------
/mypy.ini:
--------------------------------------------------------------------------------
 1 | [mypy]
 2 | warn_redundant_casts = True
 3 | ignore_missing_imports = True
 4 | warn_unused_ignores = False
 5 | 
 6 | # Needed because of bug in MyPy
 7 | disallow_subclassing_any = False
 8 | 
 9 | mypy_path = stubs
10 | 
11 | disallow_untyped_calls = True
12 | disallow_untyped_defs = True
13 | check_untyped_defs = True
14 | warn_return_any = True
15 | no_implicit_optional = True
16 | strict_optional = True


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/client.py:
--------------------------------------------------------------------------------
 1 | from whylabs_client import ApiClient, Configuration
 2 | 
 3 | from .config import Config
 4 | 
 5 | 
 6 | def create_client(config: Config = Config()) -> ApiClient:
 7 |     client_config = Configuration(host=config.get_whylabs_api_endpoint())
 8 |     client_config.api_key = {"ApiKeyAuth": config.get_whylabs_api_key()}
 9 |     client_config.discard_unknown_keys = True
10 |     return ApiClient(client_config)
11 | 


--------------------------------------------------------------------------------
/tests/helpers/test_smoke.py:
--------------------------------------------------------------------------------
 1 | def test_import() -> None:
 2 |     import whylabs_toolkit.helpers.client
 3 |     from whylabs_toolkit.monitor import MonitorManager, MonitorSetup
 4 |     
 5 | def test_helpers_import() -> None:
 6 |     from whylabs_toolkit.helpers.monitor_helpers import (
 7 |         get_analyzer_ids, 
 8 |         get_analyzers, 
 9 |         get_model_granularity, 
10 |         get_models_api, 
11 |         get_monitor, 
12 |         get_monitor_api, 
13 |         get_monitor_config
14 |     )
15 | 


--------------------------------------------------------------------------------
/.bumpversion.cfg:
--------------------------------------------------------------------------------
 1 | [bumpversion]
 2 | current_version = 0.1.2
 3 | tag = False
 4 | parse = (?P<major>\d+)\.(?P<minor>\d+)\.(?P<patch>\d+)(\-(?P<release>[a-z]+)(?P<build>\d+))?
 5 | serialize = 
 6 | 	{major}.{minor}.{patch}-{release}{build}
 7 | 	{major}.{minor}.{patch}
 8 | 
 9 | [bumpversion:part:release]
10 | optional_value = prod
11 | first_value = dev
12 | values = 
13 | 	dev
14 | 	prod
15 | 
16 | [bumpversion:file:pyproject.toml]
17 | search = version = "{current_version}"
18 | replace = version = "{new_version}"
19 | 


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/test_helpers.py:
--------------------------------------------------------------------------------
 1 | import pandas as pd
 2 | from pandas.testing import assert_series_equal
 3 | 
 4 | from whylabs_toolkit.monitor.diagnoser.helpers.describe import filter_by_index
 5 | 
 6 | 
 7 | def test_filter_by_index():
 8 |     to_sort = pd.Series([0, 1, 1], index=['c3', 'c4', 'c1'])
 9 |     ref = pd.Series([10, 9, 8], index=['c1', 'c2', 'c3'])
10 |     expected = pd.Series([10, 8, 0], index=['c1', 'c3', 'c4'])
11 |     assert_series_equal(filter_by_index(to_sort.index, ref), expected)
12 |     assert_series_equal(filter_by_index(['c3', 'c4', 'c1'], ref), expected)
13 | 


--------------------------------------------------------------------------------
/tests/helpers/test_config.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | from whylabs_toolkit.helpers.config import Config
 4 | from whylabs_toolkit.helpers.models import get_models_api
 5 | 
 6 | 
 7 | def test_setup_with_private_endpoint():
 8 |     os.environ["WHYLABS_PRIVATE_API_ENDPOINT"] = "http://private.com"
 9 |     
10 |     api_endpoint = Config().get_whylabs_api_endpoint()
11 |     
12 |     assert api_endpoint == "http://private.com"
13 |     
14 |     models_api = get_models_api()
15 |     
16 |     assert models_api.api_client.configuration.host == "http://private.com"
17 |     
18 |     del os.environ["WHYLABS_PRIVATE_API_ENDPOINT"]
19 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/manager/credentials.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | from dataclasses import dataclass, field
 3 | 
 4 | from whylabs_toolkit.helpers.config import Config
 5 | 
 6 | 
 7 | @dataclass
 8 | class MonitorCredentials:
 9 |     monitor_id: str
10 |     dataset_id: Optional[str] = field(default=None)  # type: ignore
11 |     config: Config = field(default=Config())  # type: ignore
12 | 
13 |     def __post_init__(self) -> None:
14 |         self.org_id = self.config.get_default_org_id()
15 |         self.analyzer_id = f"{self.monitor_id}-analyzer"
16 |         if not self.dataset_id:
17 |             self.dataset_id = self.config.get_default_dataset_id()
18 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/container/config_types.py:
--------------------------------------------------------------------------------
 1 | from enum import Enum
 2 | from dataclasses import dataclass
 3 | from whylogs.core.schema import DatasetSchema
 4 | from typing import Optional
 5 | 
 6 | 
 7 | class DatasetCadence(Enum):
 8 |     HOURLY = "HOURLY"
 9 |     DAILY = "DAILY"
10 | 
11 | 
12 | class DatasetUploadCadenceGranularity(Enum):
13 |     MINUTE = "M"
14 |     HOUR = "H"
15 |     DAY = "D"
16 | 
17 | 
18 | @dataclass
19 | class DatasetUploadCadence:
20 |     interval: int
21 |     granularity: DatasetUploadCadenceGranularity
22 | 
23 | 
24 | @dataclass
25 | class DatasetOptions:
26 |     schema: Optional[DatasetSchema]
27 |     dataset_cadence: DatasetCadence
28 |     whylabs_upload_cadence: DatasetUploadCadence
29 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/segments.py:
--------------------------------------------------------------------------------
 1 | """Segment definitions."""
 2 | from typing import List
 3 | 
 4 | from pydantic import Field
 5 | 
 6 | from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel
 7 | 
 8 | 
 9 | class SegmentTag(NoExtrasBaseModel):
10 |     """A single tag key value pair for a segment."""
11 | 
12 |     key: str = Field(max_length=1000)
13 |     value: str = Field(max_length=1000)
14 | 
15 | 
16 | class Segment(NoExtrasBaseModel):
17 |     """A segment is a list of tags.
18 | 
19 |     We normalize these in the backend.
20 |     """
21 | 
22 |     tags: List[SegmentTag] = Field(
23 |         description="List of tags that define the specific segment",
24 |         max_items=10,
25 |     )
26 | 


--------------------------------------------------------------------------------
/tests/helpers/test_utils.py:
--------------------------------------------------------------------------------
 1 | from whylabs_toolkit.helpers.config import UserConfig
 2 | from whylabs_toolkit.helpers.utils import get_dataset_profile_api, get_models_api, get_notification_api
 3 | 
 4 | 
 5 | def test_get_apis_with_different_config(user_config: UserConfig) -> None:
 6 |     dataset_api = get_dataset_profile_api(config = user_config)
 7 |     assert dataset_api.api_client.configuration.api_key["ApiKeyAuth"] == user_config.api_key
 8 |     
 9 |     models_api = get_models_api(config = user_config)
10 |     assert models_api.api_client.configuration.api_key["ApiKeyAuth"] == user_config.api_key
11 |     
12 |     notifications_api = get_notification_api(config = user_config)
13 |     assert notifications_api.api_client.configuration.api_key["ApiKeyAuth"] == user_config.api_key
14 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/recommendation/manual_change.py:
--------------------------------------------------------------------------------
 1 | from whylabs_toolkit.monitor.diagnoser.recommendation.recommended_change import RecommendedChange
 2 | 
 3 | 
 4 | class ManualChange(RecommendedChange):
 5 |     name = "manual_change"
 6 |     summary = "Make a manual change to the analyzer to address {condition}: {summary}"
 7 |     required_info = ["condition"]
 8 |     manual = True
 9 | 
10 |     def summarize(self) -> str:
11 |         condition = self.info.get("condition", "") if self.info else ""
12 |         if condition == "narrow_threshold_band":
13 |             # percent diff of 0 would be bad... need to add info to differentiate
14 |             return "Move columns to a new analyzer that uses absolute diff, percent diff or fixed thresholds"
15 |         return super().summarize()
16 | 


--------------------------------------------------------------------------------
/tests/monitor/manager/test_credentials.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from whylabs_toolkit.monitor.manager import MonitorCredentials
 6 | 
 7 | 
 8 | @pytest.fixture
 9 | def credentials() -> MonitorCredentials:
10 |     return MonitorCredentials(
11 |         monitor_id="test_id"
12 |     )
13 | 
14 | def test_credentials_org_id_match_env_var(credentials):
15 |     expected_org_id = os.environ["WHYLABS_DEFAULT_ORG_ID"]
16 |     assert expected_org_id == credentials.org_id
17 | 
18 | def test_analyzer_id_derived_from_monitor_id(credentials):
19 |     assert credentials.analyzer_id == f"{credentials.monitor_id}-analyzer"
20 | 
21 | def test_gets_dataset_id_from_env_var_if_not_passed(credentials):
22 |     expected_dataset_id = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
23 |     assert expected_dataset_id == credentials.dataset_id
24 | 


--------------------------------------------------------------------------------
/.github/workflows/release.yaml:
--------------------------------------------------------------------------------
 1 | name: Upload Package to PyPi
 2 | 
 3 | on:
 4 |   release:
 5 |     types: [released]
 6 | 
 7 | permissions:
 8 |   contents: read
 9 | 
10 | jobs:
11 |   deploy:
12 | 
13 |     runs-on: ubuntu-latest
14 | 
15 |     steps:
16 |     - uses: actions/checkout@v4
17 |     - name: Set up Python
18 |       uses: actions/setup-python@v5
19 |       with:
20 |         python-version: '3.8'
21 | 
22 |     - name: Install poetry
23 |       uses: Gr1N/setup-poetry@v9
24 |       with:
25 |         poetry-version: 1.2.2
26 | 
27 |     - name: Install dependencies
28 |       run: |
29 |         make setup
30 |     - name: Build package
31 |       run: | 
32 |         poetry build
33 |     - name: Publish a Python distribution to PyPI
34 |       uses: pypa/gh-action-pypi-publish@release/v1
35 |       with:
36 |         password: ${{ secrets.PYPI_API_TOKEN }}
37 | 


--------------------------------------------------------------------------------
/tests/helpers/test_config_types.py:
--------------------------------------------------------------------------------
 1 | from whylabs_toolkit.container.config_types import DatasetCadence, DatasetUploadCadenceGranularity
 2 | 
 3 | 
 4 | def test_container_config_parsing() -> None:
 5 |     """
 6 |     Our python container depends on these types staying the way they are. This is
 7 |     the easiest way to make sure we don't accidentally break the downstream container.
 8 |     """
 9 |     daily = DatasetCadence("DAILY")
10 |     assert daily == DatasetCadence.DAILY
11 | 
12 |     hourly = DatasetCadence("HOURLY")
13 |     assert hourly == DatasetCadence.HOURLY
14 | 
15 |     daily_granularity = DatasetUploadCadenceGranularity("D")
16 |     assert daily_granularity == DatasetUploadCadenceGranularity.DAY
17 | 
18 |     hour_granularity = DatasetUploadCadenceGranularity("H")
19 |     assert hour_granularity == DatasetUploadCadenceGranularity.HOUR
20 | 
21 |     minute_granularity = DatasetUploadCadenceGranularity("M")
22 |     assert minute_granularity == DatasetUploadCadenceGranularity.MINUTE
23 | 


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/recommendation/test_changes.py:
--------------------------------------------------------------------------------
 1 | from whylabs_toolkit.monitor.diagnoser.models import ConditionRecord
 2 | from whylabs_toolkit.monitor.diagnoser.recommendation.recommended_change import RecommendedChange
 3 | 
 4 | 
 5 | def test_from_condition():
 6 |     info = {'k1': 3}
 7 |     condition = ConditionRecord(name="fixed_threshold_mismatch", summary='a mismatch', columns=['col1', 'col3', 'col4'], info=info)
 8 |     change = RecommendedChange.from_condition(condition)
 9 |     assert change.columns == condition.columns
10 |     assert change.info == condition.info
11 | 
12 | 
13 | def test_merge_changes():
14 |     change1 = RecommendedChange(columns=['c1', 'c2'], info={'f1': 1, 'f2': 2})
15 |     change2 = RecommendedChange(columns=['c1', 'c3'], info={'f1': 0, 'f3': 3})
16 |     merged = change1.merge(change2)
17 |     assert change1.columns == ['c1', 'c2']
18 |     assert change2.columns == ['c1', 'c3']
19 |     assert set(merged.columns) == {'c1', 'c2', 'c3'}
20 |     assert merged.info == {'f1': 0, 'f2': 2, 'f3': 3}
21 | 
22 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/utils.py:
--------------------------------------------------------------------------------
 1 | """Common utilities."""
 2 | from typing import Any, Dict
 3 | 
 4 | from pydantic import Field, constr
 5 | 
 6 | 
 7 | def anyOf_to_oneOf(schema: Dict[str, Any], field_name: str) -> None:
 8 |     """Turn anyOf in JSON schema to oneOf.
 9 | 
10 |     onfOf is much stricter and pyDantic doesn't produce this tag. We hijack the JSON schema object to
11 |     set this correctly.
12 | 
13 |     See: https://github.com/samuelcolvin/pydantic/issues/656
14 |     """
15 |     cfg = schema["properties"].get(field_name)
16 |     if cfg is None:
17 |         return
18 |     if cfg.get("anyOf") is None:
19 |         return
20 |     cfg["oneOf"] = cfg["anyOf"]
21 |     del cfg["anyOf"]
22 | 
23 | 
24 | COLUMN_NAME_TYPE = constr(max_length=1000)
25 | METRIC_NAME_STR = constr(max_length=50)
26 | 
27 | 
28 | def duration_field(description: str, title: str) -> Any:
29 |     """Duration of a field."""
30 |     return Field(
31 |         None,
32 |         title=title,
33 |         description=description,
34 |         example="PT1H, P1D",
35 |         regex="^P(?!$)(\\d+M)?(\\d+W)?(\\d+D)?(T(?=\\d+[HM])(\\d+H)?(\\d+M)?)?$",
36 |     )
37 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/models/noisy_monitors.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional, List
 2 | 
 3 | from pydantic import BaseModel
 4 | from whylabs_toolkit.monitor.models import Segment
 5 | 
 6 | 
 7 | class NoisyMonitorStats(BaseModel):
 8 |     monitor_id: Optional[str]
 9 |     analyzer_id: str
10 |     metric: str
11 |     column_count: int
12 |     segment_count: int
13 |     anomaly_count: int
14 |     max_anomaly_per_column: int
15 |     min_anomaly_per_column: int
16 |     avg_anomaly_per_column: int
17 |     action_count: int
18 |     action_targets: List[str]
19 | 
20 | 
21 | class FailedMonitorStats(BaseModel):
22 |     monitor_id: Optional[str]
23 |     analyzer_id: str
24 |     metric: str
25 |     failed_count: int
26 |     max_failed_per_column: int
27 |     min_failed_per_column: int
28 |     avg_failed_per_column: int
29 |     action_count: int
30 |     action_targets: List[str]
31 | 
32 | 
33 | class NoisySegmentStats(BaseModel):
34 |     segment: Segment
35 |     total_anomalies: int
36 |     batch_count: int
37 | 
38 | 
39 | class FailedSegmentStats(BaseModel):
40 |     segment: Segment
41 |     total_failed: int
42 | 
43 | 
44 | class NoisyColumnStats(BaseModel):
45 |     column: str
46 |     total_anomalies: int
47 | 


--------------------------------------------------------------------------------
/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | 
 5 | from whylabs_toolkit.monitor.manager import MonitorSetup
 6 | from whylabs_toolkit.monitor.models import DiffConfig, DiffMode, SimpleColumnMetric, TrailingWindowBaseline
 7 | from whylabs_toolkit.helpers.config import UserConfig
 8 | 
 9 | 
10 | @pytest.fixture
11 | def monitor_setup() -> MonitorSetup:
12 |     monitor_setup = MonitorSetup(monitor_id="some_long_and_descriptive_id")
13 |     monitor_setup.config = DiffConfig(
14 |         mode=DiffMode.pct,
15 |         threshold=12.0,
16 |         metric=SimpleColumnMetric.median,
17 |         baseline=TrailingWindowBaseline(size=14)
18 |     )
19 |     return monitor_setup
20 | 
21 | @pytest.fixture
22 | def existing_monitor_setup() -> MonitorSetup:
23 |     monitor_setup = MonitorSetup(
24 |         monitor_id=os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
25 |     )
26 |     return monitor_setup
27 | 
28 | @pytest.fixture
29 | def user_config() -> UserConfig:
30 |     config = UserConfig(
31 |         api_key=os.environ["DEV_WHYLABS_API_KEY"],
32 |         org_id=os.environ["DEV_ORG_ID"],
33 |         dataset_id=os.environ["DEV_DATASET_ID"],
34 |         whylabs_api_endpoint="https://songbird.development.whylabsdev.com"
35 |     )
36 |     return config


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/helpers/describe.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | import pandas as pd
 4 | 
 5 | 
 6 | def describe_truncated_list(vals: List[str], num: int = 10) -> str:
 7 |     if len(vals) <= num:
 8 |         return str(vals)
 9 |     return f"{vals[0:num]} and {len(vals) - num} more"
10 | 
11 | 
12 | def describe_truncated_table(df: Union[pd.DataFrame, pd.Series], num: int = 10) -> str:
13 |     if len(df) <= num:
14 |         table = df.to_markdown()
15 |         return str(table) if table is not None else "No data to display."
16 |     return f"{df[0:num].to_markdown()}\n and {len(df) - num} more"
17 | 
18 | 
19 | def filter_by_index(items: Union[pd.Index, list], ref: pd.Series) -> pd.Series:
20 |     """
21 |     Filters the reference by items in its index. Appends 0 values for any
22 |     items not in the ref index.
23 | 
24 |     Example use... ref is anomalies by column, items are columns in a condition.
25 |     """
26 |     index = items if isinstance(items, pd.Index) else pd.Index(items)
27 |     diff = index.difference(ref.index)
28 |     if len(diff) == 0:
29 |         return ref.loc[index].sort_index()
30 |     expanded_ref = pd.concat([ref, pd.Series([0] * len(diff), index=diff)])
31 |     return expanded_ref.loc[index].sort_index()
32 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/utils.py:
--------------------------------------------------------------------------------
 1 | from whylabs_client.api.dataset_profile_api import DatasetProfileApi
 2 | from whylabs_client.api.monitor_diagnostics_api import MonitorDiagnosticsApi
 3 | from whylabs_client.api.models_api import ModelsApi
 4 | from whylabs_client.api.notification_settings_api import NotificationSettingsApi
 5 | from whylabs_client.api.monitor_api import MonitorApi
 6 | 
 7 | from whylabs_toolkit.helpers.client import create_client
 8 | from whylabs_toolkit.helpers.config import Config
 9 | 
10 | 
11 | def get_models_api(config: Config = Config()) -> ModelsApi:
12 |     return ModelsApi(api_client=create_client(config=config))
13 | 
14 | 
15 | def get_dataset_profile_api(config: Config = Config()) -> DatasetProfileApi:
16 |     return DatasetProfileApi(api_client=create_client(config=config))
17 | 
18 | 
19 | def get_notification_api(config: Config = Config()) -> NotificationSettingsApi:
20 |     return NotificationSettingsApi(api_client=create_client(config=config))
21 | 
22 | 
23 | def get_monitor_api(config: Config = Config()) -> MonitorApi:
24 |     return MonitorApi(api_client=create_client(config=config))
25 | 
26 | 
27 | def get_monitor_diagnostics_api(config: Config = Config()) -> MonitorDiagnosticsApi:
28 |     return MonitorDiagnosticsApi(api_client=create_client(config=config))
29 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/analyzer/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | """Analyzer module."""
 3 | from .algorithms import *
 4 | from .analyzer import Analyzer
 5 | from .baseline import *
 6 | from .targets import *
 7 | 
 8 | __all__ = [
 9 |     "DatasetMetric",
10 |     "SimpleColumnMetric",
11 |     "ComplexMetrics",
12 |     # analyzer
13 |     "Analyzer",
14 |     # baseline
15 |     "BaselineType",
16 |     "ReferenceProfileId",
17 |     "TimeRangeBaseline",
18 |     "TrailingWindowBaseline",
19 |     "SingleBatchBaseline",
20 |     # configs
21 |     "DriftConfig",
22 |     "DiffConfig",
23 |     "ComparisonConfig",
24 |     "ComparisonOperator",
25 |     "ExperimentalConfig",
26 |     "FixedThresholdsConfig",
27 |     "ColumnListChangeConfig",
28 |     "SeasonalConfig",
29 |     "ListComparisonConfig",
30 |     "FrequentStringComparisonConfig",
31 |     "StddevConfig",
32 |     "ConjunctionConfig",
33 |     "DisjunctionConfig",
34 |     # enums
35 |     "DiffMode",
36 |     "ThresholdType",
37 |     "AlgorithmType",
38 |     "DatasetMetric",
39 |     "SimpleColumnMetric",
40 |     "ComplexMetrics",
41 |     "ListComparisonOperator",
42 |     "FrequentStringComparisonOperator",
43 |     # targets
44 |     "DatasetMatrix",
45 |     "ColumnMatrix",
46 |     "TargetLevel",
47 |     "ExpectedValue",
48 | ]
49 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/converters/granularity.py:
--------------------------------------------------------------------------------
 1 | from math import floor
 2 | from isodate import parse_datetime, parse_duration, parse_date
 3 | 
 4 | 
 5 | def calculate_num_batches(interval: str, granularity: str) -> int:
 6 |     # Parse the ISO8601 interval string into a start and end datetime
 7 |     start, end = interval.split("/")
 8 |     start_date = parse_datetime(start) if "T" in start else parse_date(start)
 9 |     try:
10 |         end_date = parse_datetime(end) if "T" in start else parse_date(end)
11 |     except ValueError:
12 |         end_date = start_date + parse_duration(end)
13 | 
14 |     # Calculate the (somewhat approximate) difference based on the granularity
15 |     # Truncates to whole batches, ignores leap seconds
16 |     if granularity == "hourly":
17 |         difference = (end_date - start_date).total_seconds() / 3600
18 |     elif granularity == "daily":
19 |         difference = (end_date - start_date).total_seconds() / (3600 * 24)
20 |     elif granularity == "weekly":
21 |         difference = (end_date - start_date).total_seconds() / (3600 * 24 * 7)
22 |     elif granularity == "monthly":
23 |         difference = (end_date.year - start_date.year) * 12 + end_date.month - start_date.month
24 |     else:
25 |         raise ValueError(f"Unsupported granularity: {granularity}")
26 | 
27 |     diff_as_int: int = floor(difference)
28 |     return diff_as_int
29 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/cron_validators.py:
--------------------------------------------------------------------------------
 1 | from dataclasses import dataclass
 2 | 
 3 | 
 4 | @dataclass
 5 | class SplitCron:
 6 |     day_of_week: str
 7 |     month: str
 8 |     day_of_month: str
 9 |     hour: str
10 |     minute: str
11 | 
12 | 
13 | def split_cron_expression(cron: str) -> SplitCron:
14 |     """Split the cron expression into its components."""
15 |     cron_slots = cron.split(" ")
16 |     if len(cron_slots) != 5:
17 |         raise ValueError("CronSchedule must have 5 fields.")
18 |     return SplitCron(
19 |         minute=cron_slots[0],
20 |         hour=cron_slots[1],
21 |         day_of_month=cron_slots[2],
22 |         month=cron_slots[3],
23 |         day_of_week=cron_slots[4],
24 |     )
25 | 
26 | 
27 | def _is_not_less_granular_than_1_hour(split_cron: SplitCron) -> bool:
28 |     """Check if the cron expression is less granular than 1 hour."""
29 |     if split_cron.minute == "*":
30 |         return False
31 | 
32 |     for item in ["-", ","]:
33 |         if item in split_cron.minute:
34 |             return False
35 | 
36 |     if split_cron.minute.startswith("*/"):
37 |         try:
38 |             divisor = int(split_cron.minute.split("/")[1])
39 |             if divisor < 60:
40 |                 return False
41 |         except ValueError:
42 |             pass
43 | 
44 |     return True
45 | 
46 | 
47 | def validate_cron_expression(cron: str) -> bool:
48 |     split_cron = split_cron_expression(cron)
49 |     return _is_not_less_granular_than_1_hour(split_cron=split_cron)
50 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yaml:
--------------------------------------------------------------------------------
 1 | name: Whylabs Package Workflow
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: ["mainline"]
 6 |   pull_request:
 7 |     branches: ["*"]
 8 | 
 9 | jobs:
10 |   build:
11 |     name: Run lints and tests on PRs and merges
12 |     timeout-minutes: 10
13 |     runs-on: ubuntu-latest
14 |     environment: whylabs-toolkit-ci
15 |     steps:
16 |       - uses: actions/checkout@v4
17 | 
18 |       - uses: actions/setup-python@v5
19 |         name: Install Python
20 |         with:
21 |           python-version: "3.8.15"
22 | 
23 |       - uses: Gr1N/setup-poetry@v9
24 |         name: Install poetry
25 |         with:
26 |           poetry-version: 1.2.2
27 | 
28 |       - name: Install python dependencies
29 |         run: make setup
30 | 
31 |       - name: Check types
32 |         run: make lint
33 | 
34 |       - name: Check formatting
35 |         run: make format
36 | 
37 |       - name: Run test
38 |         run: make test
39 |         env:
40 |           WHYLABS_DEFAULT_ORG_ID : ${{ secrets.WHYLABS_DEFAULT_ORG_ID }}
41 |           WHYLABS_DEFAULT_DATASET_ID : ${{ secrets.WHYLABS_DEFAULT_DATASET_ID }}
42 |           WHYLABS_API_KEY : ${{ secrets.WHYLABS_API_KEY }}
43 |           WHYLABS_DEFAULT_MONITOR_ID : ${{ secrets.WHYLABS_DEFAULT_MONITOR_ID }}
44 |           WHYLABS_DEFAULT_ANALYZER_ID : ${{ secrets.WHYLABS_DEFAULT_ANALYZER_ID }}
45 |           DEV_WHYLABS_API_KEY: ${{secrets.DEV_WHYLABS_API_KEY}}
46 |           DEV_ORG_ID: ${{secrets.DEV_ORG_ID}}
47 |           DEV_DATASET_ID: ${{secrets.DEV_DATASET_ID}}
48 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/targeting.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union, Set
 2 | 
 3 | from whylabs_toolkit.monitor.models import EntitySchema, ColumnMatrix, DatasetMatrix
 4 | 
 5 | 
 6 | def expand_target(target: str, schema: EntitySchema) -> List[str]:
 7 |     if target == "*":
 8 |         return [str(k) for k in schema.columns.keys()]
 9 |     col_items = schema.columns.items()
10 |     if target == "group:discrete":
11 |         return [name for (name, c) in col_items if c.discreteness == "discrete"]
12 |     if target == "group:continuous":
13 |         return [name for (name, c) in col_items if c.discreteness == "continuous"]
14 |     if target == "group:input":
15 |         return [name for (name, c) in col_items if c.classifier == "input"]
16 |     if target == "group:output":
17 |         return [name for (name, c) in col_items if c.classifier == "output"]
18 |     return [target]
19 | 
20 | 
21 | def targeted_columns(target_matrix: Union[ColumnMatrix, DatasetMatrix], schema: EntitySchema) -> List[str]:
22 |     if target_matrix is None:
23 |         return []
24 |     if isinstance(target_matrix, DatasetMatrix):
25 |         return ["__internal__datasetMetrics"]
26 |     columns: Set[str] = set()
27 |     if target_matrix.include is not None:
28 |         for include in target_matrix.include:
29 |             columns.update(expand_target(include, schema))
30 |     if target_matrix.exclude is not None:
31 |         for exclude in target_matrix.exclude:
32 |             columns = columns - set(expand_target(exclude, schema))
33 |     return list(columns)
34 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [tool.poetry]
 2 | name = "whylabs-toolkit"
 3 | version = "0.1.2"
 4 | description = "Whylabs Toolkit package."
 5 | authors = ["Murilo Mendonca <murilommen@gmail.com>", "Anthony Naddeo <anthony.naddeo@gmail.com>",
 6 |     "Christine Draper <christine@whylabs.ai>"]
 7 | license = "Apache-2.0 license"
 8 | readme = "README.md"
 9 | packages = [{include = "whylabs_toolkit/**/*.py"}]
10 | include = ["whylabs_toolkit/monitor/schema/schema.json"]
11 | 
12 | [tool.poetry.dependencies]
13 | python = "^3.8"
14 | whylabs-client = "^0.6.3"
15 | pydantic = "^1.10.15"
16 | whylogs = "^1.1.26"
17 | jsonschema = "^4.17.3"
18 | typing-extensions = "^4.11.0"
19 | urllib3 = "^2.0.2, <2.1"
20 | 
21 | # diagnoser extra dependencies
22 | pandas = { version="^2.0.3", optional=true }
23 | numpy = { version="^1.24.1", optional=true }
24 | tabulate = { version="^0.8.9", optional=true }
25 | isodate = { version="^0.6.1", optional=true }
26 | python-dateutil = { version="^2.8.2", optional=true }
27 | 
28 | [tool.poetry.group.dev.dependencies]
29 | autoflake = "^2.0.1"
30 | pytest = "^7.2.0"
31 | black = "^22.10.0"
32 | mypy = "~1.0.1"
33 | bumpversion = "^0.6.0"
34 | types-python-dateutil = "^2.9.0.20240316"
35 | 
36 | [tool.black]
37 | line-length = 140
38 | 
39 | [build-system]
40 | requires = ["poetry-core"]
41 | build-backend = "poetry.core.masonry.api"
42 | 
43 | [tool.flake8]
44 | max-line-length = 140
45 | ignore = ["F405"]
46 | 
47 | [tool.pyright]
48 | include = ["whylabs_toolkit/**/*.py"]
49 | 
50 | [tool.poetry.extras]
51 | diagnoser = ["pandas", "numpy", "tabulate", "isodate", "python-dateutil"]


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/README.md:
--------------------------------------------------------------------------------
 1 | # Noisy monitor diagnosis
 2 | 
 3 | This package helps users diagnose and fix noisy monitors in WhyLabs. This workflow has the following steps: 
 4 | * Identify the noisiest monitors for a selected organization and dataset, and choose one to diagnose.
 5 | * Identify the noisiest segment of the monitor to be the diagnostic segment.
 6 | * Within that segment, identify the noisiest columns.
 7 | * Identify the conditions contributing to the noise in the diagnostic segment and noisiest columns.
 8 | * Determine the appropriate action to take to fix the conditions contributing to the noise.
 9 | * Apply the actions to the monitor.
10 | 
11 | Most of the above steps are automated by the monitor diagnoser for common noise conditions, although in some cases the 
12 | diagnoser may not match the dataset to any known conditions. Users will also usually need to manually consider the 
13 | most appropriate action to take to fix the monitor. A recommender is provided to suggest reasonable actions 
14 | and to automate some of the basic actions. We are happy to work with you to improve the diagnoser in such cases.
15 | 
16 | ## Usage
17 | To start using the diagnoser, install whylabs_toolkit including the diagnoser extra from PyPI with:
18 | ```bash
19 | pip install 'whylabs_toolkit[diagnoser]'
20 | ``` 
21 | 
22 | See [diagnoser.ipynb](/examples/example_notebooks/diagnoser.ipynb) for an end-to-end example of identifying noisy
23 | monitors, diagnosing the conditions contributing to noise, and getting recommendations for fixing them.
24 | 
25 | See [customized_diagnoser.ipynb](/examples/example_notebooks/customized_diagnoser.ipynb) for an example of how to
26 | customize the diagnosis for your specific needs.


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/recommendation/test_remove_columns.py:
--------------------------------------------------------------------------------
 1 | from typing import Optional
 2 | 
 3 | from whylabs_toolkit.monitor.models import Analyzer
 4 | 
 5 | from whylabs_toolkit.monitor.diagnoser.models import ConditionRecord
 6 | from whylabs_toolkit.monitor.diagnoser.recommendation.remove_columns import RemoveColumns
 7 | 
 8 | 
 9 | def gen_analyzer(metric='mean', config: Optional[dict] = None,
10 |                  target_matrix: Optional[dict] = None, baseline: Optional[dict] = None):
11 |     target_matrix = {'type': 'column', 'include': ['col1']} if target_matrix is None else target_matrix
12 |     config = {'type': 'fixed', 'metric': metric, 'upper': 1.0} if config is None else config
13 |     if config['type'] != 'fixed':
14 |         config['baseline'] = {'type': 'TrailingWindow', 'size': 7} if baseline is None else baseline
15 |     return Analyzer.parse_obj(
16 |         {
17 |             'id': 'test_analyzer',
18 |             'config': config,
19 |             'targetMatrix': target_matrix,
20 |         })
21 | 
22 | 
23 | def test_remove_columns():
24 |     analyzer = gen_analyzer(target_matrix={'type': 'column', 'include': ['col1', 'col2'], 'exclude': ['col3']})
25 |     condition = ConditionRecord(name='fixed_threshold', summary='', columns=['col1', 'col3', 'col4'])
26 |     change = RemoveColumns.from_condition(condition)
27 |     result = change.generate_config(analyzer)
28 |     assert len(result) == 1
29 |     updated = result[0]
30 |     assert updated.targetMatrix.include == ['col2']
31 |     assert updated.targetMatrix.exclude.sort() == ['col3', 'col4'].sort()
32 | 
33 | 
34 | def test_remove_columns2():
35 |     analyzer = gen_analyzer(target_matrix={'type': 'column', 'include': ['col1', 'col2'], 'exclude': ['col3']})
36 |     action = RemoveColumns(['col1', 'col2'])
37 |     result = action.generate_config(analyzer)
38 |     assert len(result) == 0
39 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/recommendation/remove_columns.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Union
 2 | 
 3 | from whylabs_toolkit.monitor.models import Analyzer, TargetLevel, ColumnMatrix, DatasetMatrix
 4 | 
 5 | from whylabs_toolkit.monitor.diagnoser.recommendation.recommended_change import RecommendedChange
 6 | from whylabs_toolkit.monitor.models.analyzer import ColumnGroups
 7 | 
 8 | 
 9 | class RemoveColumns(RecommendedChange):
10 |     name = "remove_columns"
11 |     summary = "Remove columns from the analyzer"
12 |     required_info: List[str] = []
13 |     manual = False
14 | 
15 |     def _check_can_do(self, analyzer: Analyzer) -> bool:
16 |         if analyzer.targetMatrix.type == TargetLevel.dataset:
17 |             raise ValueError("Cannot remove columns from a dataset level target matrix")
18 |         return super()._check_can_do(analyzer)
19 | 
20 |     def generate_config(self, analyzer: Analyzer) -> List[Analyzer]:
21 |         self._check_can_do(analyzer)
22 |         if isinstance(analyzer.targetMatrix, DatasetMatrix):
23 |             return [analyzer]
24 |         target_matrix: ColumnMatrix = analyzer.targetMatrix
25 |         include: List[str] = analyzer.targetMatrix.include if analyzer.targetMatrix.include is not None else []
26 |         exclude: List[Union[ColumnGroups, str]] = (
27 |             analyzer.targetMatrix.exclude if analyzer.targetMatrix.exclude is not None else []
28 |         )
29 |         to_remove = set(self.columns)
30 |         # remove from includes if possible, otherwise exclude
31 |         remove_includes = set(include).intersection(to_remove)
32 |         new_includes = list(set(include) - to_remove)
33 |         analyzer.targetMatrix.include = new_includes
34 |         new_excludes = list(set(exclude).union(to_remove - remove_includes))
35 |         analyzer.targetMatrix.exclude = new_excludes
36 |         # if nothing's left to target, just remove the analyzer
37 |         if len(analyzer.targetMatrix.include) == 0:
38 |             return []
39 |         return [analyzer]
40 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/__init__.py:
--------------------------------------------------------------------------------
 1 | # flake8: noqa
 2 | """Console script for monitor_schema."""
 3 | from .analyzer import *
 4 | from .column_schema import *
 5 | from .commons import *
 6 | from .document import *
 7 | from .monitor import *
 8 | from .segments import *
 9 | from ...utils.granularity import Granularity
10 | 
11 | # TODO add all algorithms
12 | 
13 | 
14 | __all__ = [
15 |     "DatasetMetric",
16 |     "SimpleColumnMetric",
17 |     "ComplexMetrics",
18 |     "Metadata",
19 |     # analyzer
20 |     "Analyzer",
21 |     # baseline
22 |     "BaselineType",
23 |     "ReferenceProfileId",
24 |     "TimeRangeBaseline",
25 |     "TimeRange",
26 |     "TrailingWindowBaseline",
27 |     "SingleBatchBaseline",
28 |     # configs
29 |     "DiffConfig",
30 |     "DriftConfig",
31 |     "ComparisonConfig",
32 |     "ComparisonOperator",
33 |     "FrequentStringComparisonConfig",
34 |     "FrequentStringComparisonOperator",
35 |     "ListComparisonOperator",
36 |     "ListComparisonConfig",
37 |     "ExperimentalConfig",
38 |     "FixedThresholdsConfig",
39 |     "ColumnListChangeConfig",
40 |     "SeasonalConfig",
41 |     "StddevConfig",
42 |     "ConjunctionConfig",
43 |     "DisjunctionConfig",
44 |     # targets
45 |     "DatasetMatrix",
46 |     "ColumnMatrix",
47 |     "Segment",
48 |     "SegmentTag",
49 |     "TargetLevel",
50 |     # monitors
51 |     "Monitor",
52 |     "EveryAnomalyMode",
53 |     "DigestMode",
54 |     "AnomalyFilter",
55 |     # scheduling
56 |     "ImmediateSchedule",
57 |     "CronSchedule",
58 |     "FixedCadenceSchedule",
59 |     "Cadence",
60 |     "GlobalAction",
61 |     # big document
62 |     "Document",
63 |     # schema
64 |     "EntitySchema",
65 |     "ColumnSchema",
66 |     "ColumnDataType",
67 |     "ColumnDiscreteness",
68 |     "WeightConfig",
69 |     "SegmentWeightConfig",
70 |     "Granularity",
71 |     # enums
72 |     "DiffMode",
73 |     "ThresholdType",
74 |     "AlgorithmType",
75 |     "DatasetMetric",
76 |     "SimpleColumnMetric",
77 |     "ComplexMetrics",
78 |     "ExpectedValue",
79 | ]
80 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | NAME=
 2 | PY_SOURCE=$(shell find whylabs_toolkit/ -type f -name "*.py")
 3 | SHA=$(shell git rev-parse HEAD)
 4 | VERSION=$(SHA)
 5 | REQUIREMENTS=requirements.txt
 6 | SRC_DIR=./whylabs_toolkit/
 7 | 
 8 | .PHONY: default 
 9 | .PHONY: lint format format-fix test setup help requirements
10 | 
11 | default:help
12 | 
13 | requirements: requirements.txt
14 | 
15 | requirements.txt: pyproject.toml
16 | 	poetry export -f requirements.txt > requirements.txt
17 | 
18 | lint:
19 | 	poetry run mypy ${SRC_DIR} --config-file=mypy.ini
20 | 
21 | format:
22 | 	poetry run black --check --line-length 120 ${SRC_DIR}
23 | 	poetry run autoflake --check --in-place --remove-unused-variables $(PY_SOURCE)
24 | 
25 | bump-patch: ## Bump the patch version (_._.X) everywhere it appears in the project
26 | 	@$(call i, Bumping the patch number)
27 | 	poetry run bumpversion patch --allow-dirty
28 | 
29 | bump-minor: ## Bump the minor version (_.X._) everywhere it appears in the project
30 | 	@$(call i, Bumping the minor number)
31 | 	poetry run bumpversion minor --allow-dirty
32 | 
33 | bump-major: ## Bump the major version (X._._) everywhere it appears in the project
34 | 	@$(call i, Bumping the major number)
35 | 	poetry run bumpversion major --allow-dirty
36 | 
37 | bump-release: ## Convert the version into a release variant (_._._) everywhere it appears in the project
38 | 	@$(call i, Removing the dev build suffix)
39 | 	poetry run bumpversion release --allow-dirty
40 | 
41 | bump-build: ## Bump the build number (_._._-____XX) everywhere it appears in the project
42 | 	@$(call i, Bumping the build number)
43 | 	poetry run bumpversion build --allow-dirty
44 | 
45 | format-fix:
46 | 	poetry run black --line-length 120 ${SRC_DIR}
47 | 	poetry run autoflake --in-place --remove-unused-variables $(PY_SOURCE)
48 | 
49 | setup:
50 | 	poetry install -E diagnoser
51 | 
52 | test:
53 | 	poetry run pytest
54 | 
55 | help: ## Show this help message.
56 | 	@echo 'usage: make [target] ...'
57 | 	@echo
58 | 	@echo 'targets:'
59 | 	@egrep '^(.+)\:(.*) ##\ (.+)' ${MAKEFILE_LIST} | sed -s 's/:\(.*\)##/: ##/' | column -t -c 2 -s ':#'
60 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # WhyLabs Toolkit
 2 | 
 3 | The WhyLabs Toolkit package contains helper methods to help users interact with our internal APIs. Users will benefit from using it if they want to abstract some of WhyLabs' internal logic and also automate recurring API calls.
 4 | 
 5 | 
 6 | ## Basic usage
 7 | To start using the `whylabs_toolkit` package, install it from PyPI with:
 8 | ```bash
 9 | pip install whylabs_toolkit
10 | ``` 
11 | 
12 | ## Packages
13 | 
14 | The available packages that we have enable different use-cases for the `whylabs_toolkit`. To get started, navigate to one of the following sections and find useful tutorials there.
15 | 
16 | | Package                                                                                                                   | Usage                                                                  |
17 | |---------------------------------------------------------------------------------------------------------------------------|------------------------------------------------------------------------|
18 | | [Monitor Manager](https://github.com/whylabs/whylabs-toolkit/blob/mainline/whylabs_toolkit/monitor/manager/README.md)     | Author and modify existing WhyLabs monitor with Python.                |
19 | | [Monitor Diagnoser](https://github.com/whylabs/whylabs-toolkit/blob/mainline/whylabs_toolkit/monitor/diagnoser/README.md) | Diagnose problems with monitors.                                       |
20 | | [WhyLabs Helpers](https://github.com/whylabs/whylabs-toolkit/blob/mainline/whylabs_toolkit/helpers/README.md)             | Interact with and modify your Datasets and ML Models specs in WhyLabs. |
21 | 
22 | ## Development
23 | 
24 | To start contributing, you will manage dependencies with [Poetry](https://python-poetry.org/) and also a handful of `Makefile` commands. To install all necessary dependencies and activate the virtual environment, run:
25 | 
26 | ```bash
27 | make setup && poetry shell
28 | ```
29 | 
30 | ## Get in touch
31 | If you want to learn more how you can benefit from this package or if there is anything missing, please [contact our support](https://whylabs.ai/contact-us), we'll be more than happy to help you!


--------------------------------------------------------------------------------
/tests/helpers/test_model.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | import pytest
 4 | from whylabs_client.api.models_api import ModelsApi
 5 | 
 6 | from whylabs_toolkit.helpers.models import update_model_metadata, add_custom_metric
 7 | from whylabs_toolkit.helpers.utils import get_models_api
 8 | from whylabs_toolkit.helpers.config import Config
 9 | 
10 | ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
11 | DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
12 | 
13 | 
14 | @pytest.fixture
15 | def models_api() -> ModelsApi:
16 |     return get_models_api()
17 | 
18 | 
19 | def test_update_model_time_period(models_api: ModelsApi) -> None:
20 |     update_model_metadata(dataset_id=DATASET_ID, org_id=ORG_ID, time_period="P1D")
21 |     model_meta = models_api.get_model(model_id=DATASET_ID, org_id=ORG_ID)
22 |     
23 |     assert model_meta["time_period"] == "P1D"
24 |     
25 |     update_model_metadata(dataset_id=DATASET_ID, org_id=ORG_ID, time_period="P1M")
26 |     model_meta = models_api.get_model(model_id=DATASET_ID, org_id=ORG_ID)
27 |     
28 |     assert model_meta["time_period"] == "P1M"
29 | 
30 | 
31 | def test_update_model_type(models_api: ModelsApi) -> None:
32 |     update_model_metadata(dataset_id=DATASET_ID, org_id=ORG_ID, model_type="REGRESSION")
33 |     model_meta = models_api.get_model(model_id=DATASET_ID, org_id=ORG_ID)
34 |     
35 |     assert model_meta["model_type"] == "REGRESSION"
36 |     
37 |     update_model_metadata(dataset_id=DATASET_ID, org_id=ORG_ID, model_type="CLASSIFICATION")
38 |     model_meta = models_api.get_model(model_id=DATASET_ID, org_id=ORG_ID)
39 |     
40 |     assert model_meta["model_type"] == "CLASSIFICATION"
41 | 
42 | 
43 | @pytest.mark.skip(reason="Re-enable when fix to whylabs API is in prod")
44 | def test_create_custom_metric(models_api: ModelsApi) -> None:
45 |     add_custom_metric(
46 |         dataset_id="model-7",
47 |         label="temperature.median",
48 |         column="temperature",
49 |         default_metric="median",
50 |     )
51 |     
52 |     org_id = Config().get_default_org_id()
53 |     
54 |     entity = models_api.get_entity_schema(dataset_id="model-7", org_id=org_id)
55 |     
56 |     assert entity["metrics"]["temperature.median"].to_dict() == {'column': 'temperature', 'default_metric': 'median','label': 'temperature.median'}
57 |     
58 |     models_api.delete_entity_schema_metric(org_id=org_id, dataset_id="model-7", metric_name="temperature.median")


--------------------------------------------------------------------------------
/tests/monitor/diagnoser/converters/test_granularity.py:
--------------------------------------------------------------------------------
 1 | from whylabs_toolkit.monitor.diagnoser.converters.granularity import calculate_num_batches
 2 | 
 3 | 
 4 | def test_calculate_num_batches_hourly():
 5 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-01T03:30:00Z', 'hourly') == 3
 6 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-03T01:00:00Z', 'hourly') == 49
 7 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-02T00:00:00Z', 'hourly') == 24
 8 |     assert calculate_num_batches('2022-11-19T00:00:00.000Z/2022-12-19T00:00:00.000Z', 'hourly') == 720
 9 |     assert calculate_num_batches('2022-11-19T00:00:00.000Z/2023-12-19T00:00:00.000Z', 'hourly') == 9480
10 | 
11 | 
12 | def test_calculate_num_batches_daily():
13 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-02T00:00:00Z', 'daily') == 1
14 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-09T00:00:00Z', 'daily') == 8
15 |     assert calculate_num_batches('2022-11-19T00:00:00.000Z/2022-12-19T00:00:00.000Z', 'daily') == 30
16 |     assert calculate_num_batches('2022-11-19T00:00:00.000Z/2023-12-19T00:00:00.000Z', 'daily') == 395
17 | 
18 | 
19 | def test_calculate_num_batches_weekly():
20 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-02T00:00:00Z', 'weekly') == 0
21 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-01-09T00:00:00Z', 'weekly') == 1
22 |     assert calculate_num_batches('2022-11-19T00:00:00.000Z/2022-12-19T00:00:00.000Z', 'weekly') == 4
23 |     assert calculate_num_batches('2022-11-19T00:00:00.000Z/2023-12-19T00:00:00.000Z', 'weekly') == 56
24 | 
25 | 
26 | def test_calculate_num_batches_monthly():
27 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2022-02-02T00:00:00Z', 'monthly') == 1
28 |     assert calculate_num_batches('2022-01-01T00:00:00Z/2023-02-02T00:00:00Z', 'monthly') == 13
29 | 
30 | 
31 | def test_calculate_num_batches_duration():
32 |     assert calculate_num_batches('2022-01-01T00:00:00Z/P3D', 'daily') == 3
33 |     assert calculate_num_batches('2022-01-01T00:00:00Z/P1W', 'daily') == 7
34 |     assert calculate_num_batches('2022-01-01T00:00:00Z/P1D', 'hourly') == 24
35 | 
36 | 
37 | def test_calculate_num_batches_format():
38 |     assert calculate_num_batches('2022-01-01T00:00/2022-01-02T00:00', 'daily') == 1
39 |     assert calculate_num_batches('2022-01-01/2022-01-02', 'daily') == 1
40 |     assert calculate_num_batches('2022-01-01/P1D', 'daily') == 1
41 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/recommendation/recommended_change.py:
--------------------------------------------------------------------------------
 1 | from __future__ import annotations
 2 | from typing import Optional, List
 3 | 
 4 | from whylabs_toolkit.monitor.models import Analyzer
 5 | 
 6 | from whylabs_toolkit.monitor.diagnoser.models import ConditionRecord
 7 | from whylabs_toolkit.monitor.diagnoser.helpers.describe import describe_truncated_list
 8 | 
 9 | 
10 | class RecommendedChange:
11 |     name = ""
12 |     summary = ""
13 |     manual = True
14 |     required_info: List[str] = []
15 | 
16 |     @classmethod
17 |     def from_condition(cls, condition: ConditionRecord) -> RecommendedChange:
18 |         return cls(condition.columns if condition.columns is not None else [], condition.info)
19 | 
20 |     def __init__(self, columns: List[str], info: Optional[dict] = None):
21 |         self.columns = columns
22 |         self.info = info
23 | 
24 |     def merge(self, change: RecommendedChange) -> RecommendedChange:
25 |         if change.name != self.name:
26 |             raise ValueError(f"Cannot merge {self.name} and {change.name}")
27 |         merged = RecommendedChange(list(set(self.columns) | set(change.columns)), self.info)
28 |         merged.merge_info(change.info)
29 |         return merged
30 | 
31 |     def merge_info(self, info: Optional[dict]) -> Optional[dict]:
32 |         if self.info is None:
33 |             self.info = info
34 |         elif info is not None:
35 |             self.info = {**self.info, **info}
36 |         return self.info
37 | 
38 |     def summarize(self) -> str:
39 |         info = self.info if self.info else {}
40 |         return self.summary.format(**info)
41 | 
42 |     def describe(self) -> str:
43 |         return f"{self.summarize()} for {describe_truncated_list(self.columns)}"
44 | 
45 |     def can_automate(self) -> bool:
46 |         return all(getattr(self.info, f, False) for f in self.required_info) and not self.manual
47 | 
48 |     def _check_can_do(self, analyzer: Analyzer) -> bool:
49 |         if self.manual:
50 |             raise Exception(f"{self.name} has not been automated")
51 |         if not self.can_automate():
52 |             raise Exception(
53 |                 f"{self.name} requires extra information "
54 |                 f"{[f for f in self.required_info if self.info is None or f not in self.info.keys()]}"
55 |             )
56 |         return True
57 | 
58 |     def generate_config(self, analyzer: Analyzer) -> List[Analyzer]:
59 |         self._check_can_do(analyzer)
60 |         return [analyzer]
61 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/models.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from typing import Optional
 3 | 
 4 | from whylabs_client.exceptions import ApiValueError, ApiException
 5 | from whylabs_client.model.model_type import ModelType
 6 | from whylabs_client.model.time_period import TimePeriod
 7 | from whylabs_client.model.metric_schema import MetricSchema
 8 | 
 9 | from whylabs_toolkit.helpers.utils import get_models_api
10 | from whylabs_toolkit.helpers.config import Config
11 | 
12 | logger = logging.getLogger(__name__)
13 | 
14 | 
15 | def update_model_metadata(
16 |     dataset_id: Optional[str] = None,
17 |     org_id: Optional[str] = None,
18 |     time_period: Optional[str] = None,
19 |     model_type: Optional[str] = None,
20 |     config: Config = Config(),
21 | ) -> None:
22 |     """
23 |     Update model attributes like model type and period.
24 |     """
25 |     org_id = org_id or config.get_default_org_id()
26 |     dataset_id = dataset_id or config.get_default_dataset_id()
27 | 
28 |     api = get_models_api(config=config)
29 | 
30 |     model_metadata = api.get_model(org_id=org_id, model_id=dataset_id)
31 |     logger.debug(f"Updating dataset with current metadata: \n {model_metadata}")
32 | 
33 |     try:
34 |         resp = api.update_model(
35 |             org_id=org_id,
36 |             model_id=dataset_id,
37 |             model_name=model_metadata["name"],
38 |             time_period=TimePeriod(time_period) if time_period else model_metadata["time_period"],
39 |             model_type=ModelType(model_type) if model_type else model_metadata["model_type"],
40 |         )
41 |         logger.debug(f"Updated sucessfully! Resp: {resp}")
42 |     except ApiValueError as e:
43 |         raise e
44 | 
45 | 
46 | def add_custom_metric(
47 |     label: str,
48 |     column: str,
49 |     default_metric: str,
50 |     org_id: Optional[str] = None,
51 |     dataset_id: Optional[str] = None,
52 |     config: Config = Config(),
53 | ) -> None:
54 | 
55 |     org_id = org_id or config.get_default_org_id()
56 |     dataset_id = dataset_id or config.get_default_dataset_id()
57 | 
58 |     api = get_models_api(config=config)
59 |     metric_schema = MetricSchema(label=label, column=column, default_metric=default_metric)
60 | 
61 |     try:
62 |         api.put_entity_schema_metric(org_id, dataset_id, metric_schema)
63 |         logger.info(f"Updated entity schema metric!")
64 |     except ApiException as e:
65 |         logger.error("Exception when calling ModelsApi -> put_entity_schema_metric\n")
66 |         raise e
67 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/document.py:
--------------------------------------------------------------------------------
 1 | """The overall document for monitor."""
 2 | import uuid
 3 | from typing import List, Literal, Optional
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel
 8 | 
 9 | from .analyzer import Analyzer
10 | from .column_schema import EntitySchema, EntityWeights
11 | from .commons import DATASET_ID_DEF, Metadata
12 | from .monitor import Monitor
13 | from ...utils.granularity import Granularity
14 | 
15 | 
16 | class Document(NoExtrasBaseModel):
17 |     """The main document that dictates how the monitor should be run. This document is managed by WhyLabs internally."""
18 | 
19 |     id: Optional[uuid.UUID] = Field(None, description="A unique ID for the document")
20 |     schemaVersion: Literal[1] = Field(
21 |         description="The schema version of the document",
22 |         title="SchemaVersion",
23 |         default=1,
24 |     )
25 |     metadata: Optional[Metadata] = Field(
26 |         None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
27 |     )
28 |     orgId: str = Field(title="OrgId", description="Organization ID for the document", max_length=100)
29 |     datasetId: str = DATASET_ID_DEF
30 |     granularity: Granularity = Field(description="Granularity of the entity")
31 |     allowPartialTargetBatches: Optional[bool] = Field(
32 |         None,
33 |         title="AllowPartialTargetBatches",
34 |         description="""The standard 
35 |         flow waits for a target batch as defined by the dataset granularity 
36 |         setting to conclude before running analysis. For example, on monthly datasets datapoints in the 
37 |         current month would be analyzed at midnight on the last day of the month anticipating additional 
38 |         data may be profiled. With allowPartialTargetBatches enabled a target batch may be analyzed as 
39 |         soon as the data is present and dataReadinessDuration/batchCooldownPeriod (if configured) 
40 |         conditions have been met. This can be ideal for data pipelines that upload a single profile per 
41 |         dataset granularity to reduce the waiting time for analysis.""",
42 |     )
43 |     entitySchema: Optional[EntitySchema] = Field(description="Schema configuration for the entity")
44 |     weightConfig: Optional[EntityWeights] = Field(None, description="Weight configuration for the entity")
45 |     analyzers: List[Analyzer] = Field(description="List of analyzers", max_items=1000)
46 |     monitors: List[Monitor] = Field(description="List of monitors", max_items=1000)
47 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/helpers/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | from typing import List, Optional
 3 | 
 4 | from whylabs_client.api.monitor_diagnostics_api import MonitorDiagnosticsApi
 5 | 
 6 | from whylabs_toolkit.helpers.client import create_client
 7 | from whylabs_toolkit.helpers.config import Config
 8 | from whylabs_toolkit.monitor.models import SegmentTag
 9 | 
10 | 
11 | def get_monitor_diagnostics_api(config: Config = Config()) -> MonitorDiagnosticsApi:
12 |     """
13 |     Get the monitor diagnostics API, which is used to interact with the WhyLabs Monitor Diagnostics service
14 |     to diagnose noisy monitors.
15 |     :param config:
16 |     :return:
17 |     """
18 |     return MonitorDiagnosticsApi(api_client=create_client(config=config))
19 | 
20 | 
21 | # TODO this should not be required anymore, but need to test
22 | def env_setup(
23 |     org_id: str, dataset_id: str, api_key: Optional[str] = None, whylabs_endpoint: Optional[str] = None
24 | ) -> None:
25 |     """
26 |     Set environment variables to work with both whylabs-toolkit and whylogs. Will pick up the API
27 |     key from the environment if not provided as a parameter.
28 |     :param org_id:
29 |     :param dataset_id:
30 |     :param api_key:
31 |     :param whylabs_endpoint:
32 |     :return:
33 |     """
34 |     os.environ["WHYLABS_API_KEY"] = api_key if api_key else os.environ["WHYLABS_API_KEY"]
35 |     if not os.environ["WHYLABS_API_KEY"]:
36 |         raise Exception("Please provide an API key")
37 |     os.environ["WHYLABS_DEFAULT_ORG_ID"] = org_id
38 |     os.environ["ORG_ID"] = org_id
39 |     os.environ["WHYLABS_DEFAULT_DATASET_ID"] = dataset_id
40 |     if whylabs_endpoint:
41 |         os.environ["WHYLABS_API_ENDPOINT"] = whylabs_endpoint
42 |         os.environ["WHYLABS_HOST"] = whylabs_endpoint
43 | 
44 | 
45 | def segment_to_text(segment: List[SegmentTag]) -> str:
46 |     if segment is None or len(segment) == 0:
47 |         return ""
48 |     text = ""
49 |     for tag in segment:
50 |         if len(text) > 0:
51 |             text += "&"
52 |         text += f"{tag.key}={tag.value}"
53 |     return text
54 | 
55 | 
56 | def segment_as_readable_text(segment: List[SegmentTag]) -> str:
57 |     text = segment_to_text(segment)
58 |     return "overall" if text == "" else text
59 | 
60 | 
61 | def text_to_segment(text: str) -> List[SegmentTag]:
62 |     if text == "":
63 |         return []
64 |     tags = []
65 |     parts = text.split("&")
66 |     for part in parts:
67 |         [key, value] = part.split("=", 2)
68 |         tags.append(SegmentTag(key=key, value=value))
69 |     return tags
70 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/analyzer/targets.py:
--------------------------------------------------------------------------------
 1 | """Define what targets for the analyses."""
 2 | from enum import Enum
 3 | from typing import List, Literal, Optional, Union
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel
 8 | from whylabs_toolkit.monitor.models.segments import Segment
 9 | from whylabs_toolkit.monitor.models.utils import COLUMN_NAME_TYPE
10 | 
11 | 
12 | class TargetLevel(str, Enum):
13 |     """Which nested level we are targeting."""
14 | 
15 |     dataset = "dataset"
16 |     column = "column"
17 | 
18 | 
19 | class _BaseMatrix(NoExtrasBaseModel):
20 |     segments: Optional[List[Segment]] = Field(
21 |         None,
22 |         description="List of targeted segments. If not set, default to the overall segment",
23 |         max_items=1000,
24 |     )
25 | 
26 | 
27 | class DatasetMatrix(_BaseMatrix):
28 |     """Define the matrix of fields and segments to fan out for monitoring.
29 | 
30 |     .
31 |     """
32 | 
33 |     type: Literal[TargetLevel.dataset] = Field(
34 |         TargetLevel.dataset,
35 |         description="Must be 'dataset' level",
36 |     )
37 | 
38 | 
39 | class ColumnGroups(str, Enum):
40 |     """Standard column groupings."""
41 | 
42 |     group_continuous = "group:continuous"
43 |     group_discrete = "group:discrete"
44 | 
45 |     # based on classification
46 |     group_input = "group:input"
47 |     group_output = "group:output"
48 | 
49 |     # based on data types
50 |     group_bool = "group:bool"
51 |     group_int = "group:int"
52 |     group_frac = "group:frac"
53 |     group_str = "group:str"
54 | 
55 | 
56 | class ColumnMatrix(_BaseMatrix):
57 |     """Define the matrix of columns and segments to fan out for monitoring."""
58 | 
59 |     type: Literal[TargetLevel.column] = TargetLevel.column
60 |     include: Optional[List[Union[ColumnGroups, COLUMN_NAME_TYPE]]] = Field(  # type: ignore
61 |         None,
62 |         description="List of allowed fields/features/columns. Could be a grouping as well.",
63 |         max_items=1000,
64 |     )
65 |     exclude: Optional[List[Union[ColumnGroups, COLUMN_NAME_TYPE]]] = Field(  # type: ignore
66 |         None,
67 |         description="List of blocked fields/features/columns. Could be a grouping as well. This setting is "
68 |         "evaluated AFTER the 'include' field and thus should be used with caution.",
69 |         max_items=1000,
70 |     )
71 |     profileId: Optional[str] = Field(
72 |         default=None,
73 |         description="The unique profile ID for the reference profile",
74 |         max_length=100,
75 |     )
76 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/dataset_profiles.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from datetime import datetime
 3 | from typing import Optional, Union
 4 | 
 5 | from whylabs_client.api.dataset_profile_api import DeleteDatasetProfilesResponse, DeleteAnalyzerResultsResponse
 6 | 
 7 | from whylabs_toolkit.helpers.utils import get_dataset_profile_api
 8 | from whylabs_toolkit.helpers.config import Config
 9 | 
10 | date_or_millis = Union[datetime, int]
11 | logger = logging.getLogger(__name__)
12 | 
13 | 
14 | def validate_timestamp_in_millis(epoch_milliseconds: int) -> bool:
15 |     if not isinstance(epoch_milliseconds, int):
16 |         return False
17 |     try:
18 |         epoch_seconds = epoch_milliseconds / 1000
19 |         dt = datetime.fromtimestamp(epoch_seconds)
20 |         return dt >= datetime(1970, 1, 1)
21 |     except (ValueError, OverflowError):
22 |         return False
23 | 
24 | 
25 | def process_date_input(date_input: date_or_millis) -> int:
26 |     if isinstance(date_input, int):
27 |         try:
28 |             assert validate_timestamp_in_millis(epoch_milliseconds=date_input)
29 |             return date_input
30 |         except AssertionError:
31 |             raise ValueError("You must provide a valid date input")
32 |     elif isinstance(date_input, datetime):
33 |         return int(date_input.timestamp() * 1000.0)
34 |     else:
35 |         raise ValueError(f"The date object {date_input} input must be a datetime or an integer Epoch!")
36 | 
37 | 
38 | def delete_all_profiles_for_period(
39 |     start: date_or_millis,
40 |     end: date_or_millis,
41 |     config: Config = Config(),
42 |     org_id: Optional[str] = None,
43 |     dataset_id: Optional[str] = None,
44 | ) -> DeleteDatasetProfilesResponse:
45 |     api = get_dataset_profile_api()
46 | 
47 |     profile_start_timestamp = process_date_input(date_input=start)
48 |     profile_end_timestamp = process_date_input(date_input=end)
49 | 
50 |     org_id = org_id or config.get_default_org_id()
51 |     dataset_id = dataset_id or config.get_default_dataset_id()
52 | 
53 |     result_profiles: DeleteDatasetProfilesResponse = api.delete_dataset_profiles(
54 |         org_id=org_id,
55 |         dataset_id=dataset_id,
56 |         profile_start_timestamp=profile_start_timestamp,
57 |         profile_end_timestamp=profile_end_timestamp,
58 |     )
59 |     logger.info(f"Scheduled deletion for profiles on {dataset_id} for {org_id}")
60 | 
61 |     api.delete_analyzer_results(
62 |         org_id=org_id,
63 |         dataset_id=dataset_id,
64 |         start_timestamp=profile_start_timestamp,
65 |         end_timestamp=profile_end_timestamp,
66 |     )
67 | 
68 |     logger.info("Deleted analyzer results for the same timestamps as the profiles")
69 |     logger.info(f"NOTE: Profile deletion happens every full hour on WhyLabs")
70 | 
71 |     return result_profiles
72 | 


--------------------------------------------------------------------------------
/tests/monitor/manager/test_manager.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import os
 3 | from typing import Dict
 4 | 
 5 | import pytest
 6 | from jsonschema import ValidationError
 7 | 
 8 | from whylabs_toolkit.monitor.manager import MonitorManager, MonitorSetup
 9 | from whylabs_toolkit.monitor.models import GlobalAction
10 | from tests.helpers.test_monitor_helpers import BaseTestMonitor
11 | from whylabs_toolkit.helpers.monitor_helpers import get_monitor, get_analyzer_ids, get_monitor_config
12 | 
13 | 
14 | class TestModelManager(BaseTestMonitor):
15 |     @pytest.fixture
16 |     def manager(self, existing_monitor_setup: MonitorSetup) -> MonitorManager:
17 |         mm = MonitorManager(setup=existing_monitor_setup)
18 |         return mm
19 | 
20 |     def test_dump(self, manager: MonitorManager) -> None:
21 |         document = manager.dump()
22 |         assert isinstance(json.loads(document), Dict)
23 | 
24 |     def test_validate(self, manager: MonitorManager) -> None:
25 |         assert manager.validate()
26 | 
27 |     def test_failing_validation(self, monitor_setup: MonitorSetup) -> None:
28 |         monitor_setup.actions = [GlobalAction(target="some_long_id")]
29 |         monitor_setup.config.mode = "weird_mode" # type: ignore
30 |         monitor_setup.apply()
31 | 
32 |         manager = MonitorManager(setup=monitor_setup)
33 |         with pytest.raises(ValidationError):
34 |             manager.validate()
35 | 
36 |     def test_save(self, manager: MonitorManager) -> None:
37 |         manager.save()
38 | 
39 |         monitor = get_monitor(
40 |             org_id=os.environ["WHYLABS_DEFAULT_ORG_ID"],
41 |             dataset_id=os.environ["WHYLABS_DEFAULT_DATASET_ID"],
42 |             monitor_id=os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
43 |         )
44 | 
45 |         assert monitor is not None
46 |         assert isinstance(monitor, Dict)
47 |         assert monitor.get("id") == os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
48 | 
49 |         assert get_analyzer_ids(
50 |             org_id=os.environ["WHYLABS_DEFAULT_ORG_ID"],
51 |             dataset_id=os.environ["WHYLABS_DEFAULT_DATASET_ID"],
52 |             monitor_id=os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
53 |         )
54 |     
55 |     def test_monitor_running_eagerly(self, existing_monitor_setup: MonitorSetup) -> None:
56 |         mm = MonitorManager(setup=existing_monitor_setup, eager=True)
57 |         actual_doc = mm.dump()
58 |         assert json.loads(actual_doc)["allowPartialTargetBatches"] == True
59 |         
60 |         mm.save()
61 |         
62 |         expected_result = get_monitor_config(
63 |             dataset_id=existing_monitor_setup.credentials.dataset_id, 
64 |             org_id=existing_monitor_setup.credentials.org_id
65 |         )
66 |         
67 |         assert expected_result["allowPartialTargetBatches"] == True
68 |     
69 |         new_mm = MonitorManager(setup=existing_monitor_setup, eager=False)
70 |         new_mm.save()
71 |         
72 |         new_expected_result = get_monitor_config(
73 |             dataset_id=existing_monitor_setup.credentials.dataset_id, 
74 |             org_id=existing_monitor_setup.credentials.org_id
75 |         )
76 |         
77 |         assert new_expected_result["allowPartialTargetBatches"] == False
78 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/analyzer/baseline.py:
--------------------------------------------------------------------------------
 1 | """Define various baselines."""
 2 | from enum import Enum
 3 | from typing import List, Literal, Optional
 4 | 
 5 | from pydantic import Field
 6 | 
 7 | from whylabs_toolkit.monitor.models.commons import DATASET_ID_DEF, NoExtrasBaseModel, TimeRange
 8 | 
 9 | 
10 | class BaselineType(str, Enum):
11 |     """Supported baseline types."""
12 | 
13 |     BatchTimestamp = "BatchTimestamp"
14 |     Reference = "Reference"
15 |     TrailingWindow = "TrailingWindow"
16 |     TimeRange = "TimeRange"
17 |     CurrentBatch = "CurrentBatch"
18 | 
19 | 
20 | class _Baseline(NoExtrasBaseModel):
21 |     """Base class for a baseline."""
22 | 
23 |     datasetId: Optional[str] = DATASET_ID_DEF
24 | 
25 | 
26 | class _SegmentBaseline(_Baseline):
27 |     inheritSegment: Optional[bool] = Field(
28 |         None,
29 |         title="InheritSegment",
30 |         description="Default to false. Whether to use the segment from the target to filter down the baseline",
31 |     )
32 | 
33 | 
34 | class ReferenceProfileId(_Baseline):
35 |     """A baseline based on a static reference profile.
36 | 
37 |     A typical use case is to use a "gold" dataset and upload its profile to WhyLabs. This can be a training dataset
38 |     as well for an ML model.
39 |     """
40 | 
41 |     type: Literal[BaselineType.Reference] = BaselineType.Reference
42 |     profileId: str = Field(
43 |         title="ProfileId",
44 |         description="The unique profile ID for the reference profile",
45 |         max_length=100,
46 |     )
47 | 
48 | 
49 | class TrailingWindowBaseline(_SegmentBaseline):
50 |     """A dynamic trailing window.
51 | 
52 |     This is useful if you don't have a static baseline to monitor against. This is the default mode for most
53 |     monitors.
54 |     """
55 | 
56 |     type: Optional[Literal[BaselineType.TrailingWindow]] = Field(BaselineType.TrailingWindow)
57 |     size: int = Field(description="Window size", le=90, gt=3)
58 |     offset: Optional[int] = Field(
59 |         None,
60 |         description="Offset from the current batch for the range of the trailing window. Default to 1 (the previous "
61 |         "batch). This means that if set this to 0, the baseline will include the current batch's value, or"
62 |         "if we set it o 7, then the window is off by 7.",
63 |     )
64 |     exclusionRanges: Optional[List[TimeRange]] = Field(
65 |         None,
66 |         title="ExclusionRanges",
67 |         description="The list of exclusion ranges",
68 |         max_items=100,
69 |     )
70 | 
71 | 
72 | class TimeRangeBaseline(_SegmentBaseline):
73 |     """A static time range.
74 | 
75 |     Instead of using a single profile or a trailing window, user can lock in a "good" period.
76 |     """
77 | 
78 |     type: Literal[BaselineType.TimeRange] = BaselineType.TimeRange
79 |     range: TimeRange = Field(description="The range to set the time range with")
80 | 
81 | 
82 | class SingleBatchBaseline(_SegmentBaseline):
83 |     """Using current batch.
84 | 
85 |     This is used when you want to use one batch to monitor another batch in a different metric entity.
86 |     """
87 | 
88 |     type: Literal[BaselineType.CurrentBatch] = BaselineType.CurrentBatch
89 |     offset: Optional[int] = Field(
90 |         None,
91 |         description="Offset from the current batch for the baseline. Default to 0 - (the current batch). This means "
92 |         "that if this field set this to 0, the baseline be the current batch's value. The dataset field"
93 |         "is required to be set for this baseline config."
94 |         "Typical use case is to use another entity to monitor against the current entity",
95 |     )
96 |     datasetId: str = DATASET_ID_DEF
97 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/README.md:
--------------------------------------------------------------------------------
  1 | # Helpers
  2 | In here we will describe some examples on how to use the `helpers` package. You will need to set `WHYLABS_API_KEY` as an environment variable. With that, the package will be able to authenticate with WhyLabs' API endpoints. You can configure a token for your account directly on the platform.
  3 | 
  4 | ## Models
  5 | Users can change their model type between `REGRESSION`, `CLASSIFICATION` and `EMBEDDINGS`, using the models helpers, as the example shows:
  6 | ```python
  7 | from whylabs_toolkit.helpers.models import update_model_metadata
  8 | 
  9 | update_model_metadata(
 10 |     org_id="org_id",
 11 |     dataset_id="dataset_id",
 12 |     model_type="CLASSIFICATION"
 13 | )
 14 | ```
 15 | 
 16 | And to change the model granularity:
 17 | 
 18 | ```python
 19 | from whylabs_toolkit.helpers.models import update_model_metadata
 20 | 
 21 | update_model_metadata(
 22 |     dataset_id="dataset_id", 
 23 |     org_id="org_id", 
 24 |     time_period="P1M"
 25 | )
 26 | ```
 27 | 
 28 | >**NOTE**: Learn more on the time period config options with the `whylabs_client.model.time_period.TimePeriod` class, available after you've configured your environment with the described make command above.
 29 | 
 30 | ## Entity Schema
 31 | Entity Schema helpers assist users to change some of their dataset metadata, such as data types, discreteness and column classification (between inputs and outputs). Here's an example that covers all three cases:
 32 | 
 33 | ### Column Classes
 34 | ```python
 35 | from whylabs_toolkit.helpers.schema import (
 36 |     UpdateColumnClassifiers, 
 37 |     ColumnsClassifiers,
 38 | )
 39 | 
 40 | # Note that you don't need to specify all existing columns, but only those you wish to modify
 41 | 
 42 | classifiers = ColumnsClassifiers(
 43 |     outputs=["actual_temperature", "predicted_temperature"]
 44 | )
 45 | 
 46 | update_entity = UpdateColumnClassifiers(
 47 |     classifiers=classifiers,
 48 |     dataset_id="dataset_id",
 49 |     org_id="org_id"
 50 | )
 51 | 
 52 | update_entity.update()
 53 | 
 54 | ```
 55 | ### Data types
 56 | ```python
 57 | from whylabs_toolkit.helpers.schema import UpdateEntityDataTypes
 58 | from whylabs_toolkit.monitor_schema.models.column_schema import ColumnDataType
 59 | 
 60 | columns_schema = {
 61 |     "temperature": ColumnDataType.fractional,
 62 |     "is_active": ColumnDataType.boolean
 63 | }
 64 | 
 65 | update_data_types = UpdateEntityDataTypes(
 66 |     dataset_id="dataset_id",
 67 |     columns_schema=columns_schema,
 68 |     org_id="org_id"
 69 | )
 70 | 
 71 | update_data_types.update()
 72 | ```
 73 | ### Discreteness
 74 | ```python
 75 | from whylabs_toolkit.helpers.schema import (
 76 |     UpdateColumnsDiscreteness,
 77 |     ColumnsDiscreteness
 78 | )
 79 | 
 80 | columns = ColumnsDiscreteness(
 81 |     discrete=["temperature"]
 82 | )
 83 | 
 84 | update_discreteness = UpdateColumnsDiscreteness(
 85 |     dataset_id="dataset_id",
 86 |     columns=columns,
 87 |     org_id="org_id"
 88 | )
 89 | 
 90 | update_discreteness.update()
 91 | ```
 92 | ## Monitors
 93 | The Monitors helpers will help you manage existing alerts on WhyLabs' platform.
 94 | 
 95 | ### Delete monitor
 96 | 
 97 | ```python
 98 | from whylabs_toolkit.helpers.monitor_helpers import delete_monitor
 99 | 
100 | delete_monitor(
101 |     org_id="org_id",
102 |     dataset_id="dataset_id",
103 |     monitor_id="monitor_id"
104 | )
105 | ```
106 | 
107 | ### List monitors
108 | 
109 | To list all the monitor ids associated with a dataset-id, you can do:
110 | 
111 | ```python
112 | from whylabs_toolkit.helpers.monitor_helpers import list_monitors
113 | 
114 | monitors = list_monitors(
115 |     org_id="org_id",
116 |     dataset_id="dataset_id"
117 | )
118 | 
119 | print(monitors)
120 | 
121 | # >> ["monitor-1", "monitor-2"]
122 | ```


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/manager/manager.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import json
 3 | from pathlib import Path
 4 | from typing import Optional, Any
 5 | 
 6 | from jsonschema import validate, ValidationError
 7 | from whylabs_client.api.monitor_api import MonitorApi
 8 | 
 9 | from whylabs_toolkit.monitor.manager.monitor_setup import MonitorSetup
10 | from whylabs_toolkit.monitor.models import *
11 | from whylabs_toolkit.helpers.monitor_helpers import get_model_granularity
12 | from whylabs_toolkit.helpers.config import Config
13 | from whylabs_toolkit.helpers.utils import get_monitor_api
14 | 
15 | 
16 | logging.basicConfig(level=logging.INFO)
17 | logger = logging.getLogger(__name__)
18 | 
19 | 
20 | class MonitorManager:
21 |     def __init__(
22 |         self,
23 |         setup: MonitorSetup,
24 |         eager: Optional[bool] = None,
25 |         monitor_api: Optional[MonitorApi] = None,
26 |         config: Config = Config(),
27 |     ) -> None:
28 |         self._setup = setup
29 |         self.__monitor_api = monitor_api or get_monitor_api(config=config)
30 |         self.__eager = eager
31 | 
32 |     def _get_current_monitor_config(self) -> Optional[Any]:
33 |         monitor_config = self.__monitor_api.get_monitor_config_v3(
34 |             org_id=self._setup.credentials.org_id, dataset_id=self._setup.credentials.dataset_id
35 |         )
36 |         return monitor_config
37 | 
38 |     def dump(self) -> Any:
39 |         doc = Document(
40 |             orgId=self._setup.credentials.org_id,
41 |             datasetId=self._setup.credentials.dataset_id,
42 |             granularity=get_model_granularity(
43 |                 org_id=self._setup.credentials.org_id, dataset_id=self._setup.credentials.dataset_id  # type: ignore
44 |             ),
45 |             analyzers=[self._setup.analyzer],
46 |             monitors=[self._setup.monitor],
47 |             allowPartialTargetBatches=self.__eager,
48 |         )
49 |         return doc.json(indent=2, exclude_none=True)
50 | 
51 |     def validate(self) -> bool:
52 |         try:
53 |             Monitor.validate(self._setup.monitor)
54 |             Analyzer.validate(self._setup.analyzer)
55 | 
56 |             with open(f"{Path(__file__).parent.parent.resolve()}/schema/schema.json", "r") as f:
57 |                 schema = json.load(f)
58 |             document = self.dump()
59 |             validate(instance=json.loads(document), schema=schema)
60 |             return True
61 |         except ValidationError as e:
62 |             raise e
63 | 
64 |     def save(self) -> None:
65 |         if self.validate() is True:
66 |             self.__monitor_api.put_analyzer(
67 |                 org_id=self._setup.credentials.org_id,
68 |                 dataset_id=self._setup.credentials.dataset_id,
69 |                 analyzer_id=self._setup.credentials.analyzer_id,
70 |                 body=self._setup.analyzer.dict(exclude_none=True),  # type: ignore
71 |             )
72 |             self.__monitor_api.put_monitor(
73 |                 org_id=self._setup.credentials.org_id,
74 |                 dataset_id=self._setup.credentials.dataset_id,
75 |                 monitor_id=self._setup.credentials.monitor_id,
76 |                 body=self._setup.monitor.dict(exclude_none=True),  # type: ignore
77 |             )
78 |         if self.__eager is not None:
79 |             current_config = self._get_current_monitor_config()
80 | 
81 |             if self.__eager != current_config.get("allowPartialTargetBatches"):  # type: ignore
82 |                 current_config["allowPartialTargetBatches"] = self.__eager  # type: ignore
83 |                 self.__monitor_api.put_monitor_config_v3(
84 |                     org_id=self._setup.credentials.org_id,
85 |                     dataset_id=self._setup.credentials.dataset_id,
86 |                     body=current_config,
87 |                 )
88 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/commons.py:
--------------------------------------------------------------------------------
  1 | """Common schema definitions."""
  2 | from datetime import datetime
  3 | from enum import Enum
  4 | from typing import List, Literal, Optional
  5 | 
  6 | from pydantic import BaseModel, Extra
  7 | from pydantic.fields import Field
  8 | 
  9 | CRON_REGEX = (
 10 |     "(@(annually|yearly|monthly|weekly|daily|hourly))|" "((((\\d+,)+\\d+|(\\d+(\\/|-)\\d+)|\\d+|\\*|\\*/\\d+) ?){5,7})"
 11 | )
 12 | DATASET_ID_REGEX = "[a-zA-Z0-9\\-_\\.]+"
 13 | 
 14 | DATASET_ID_DEF = Field(
 15 |     title="DatasetId",
 16 |     description="The unique ID of an dataset. This is specific to WhyLabs. If the dataset ID "
 17 |     "does not exist, user will get a validation exception when saving the "
 18 |     "config with WhyLabs API",
 19 |     regex=DATASET_ID_REGEX,
 20 |     max_length=100,
 21 | )
 22 | 
 23 | 
 24 | class NoExtrasBaseModel(BaseModel, extra=Extra.forbid):  # type: ignore
 25 |     """No extras base model.
 26 | 
 27 |     Inherit to prevent accidental extra fields.
 28 |     """
 29 | 
 30 | 
 31 | class ImmediateSchedule(NoExtrasBaseModel):
 32 |     """Schedule the monitor to run immediately."""
 33 | 
 34 |     type: Literal["immediate"] = "immediate"
 35 | 
 36 | 
 37 | class TimeRange(NoExtrasBaseModel):
 38 |     """Support for a specific time range."""
 39 | 
 40 |     start: datetime = Field(description="Inclusive. Start time of a time range.")
 41 |     end: datetime = Field(description="Exclusive. End time of a time range.")
 42 | 
 43 | 
 44 | class CronSchedule(NoExtrasBaseModel):
 45 |     """Support for scheduling."""
 46 | 
 47 |     type: Literal["cron"] = "cron"
 48 |     cron: str = Field(
 49 |         description="Cron expression",
 50 |         regex=CRON_REGEX,
 51 |     )
 52 |     exclusionRanges: Optional[List[TimeRange]] = Field(
 53 |         title="ExclusionRanges", description="The ranges of dates during which this Analyzer is NOT run."
 54 |     )
 55 | 
 56 | 
 57 | class Cadence(str, Enum):
 58 |     """Cadence for an analyzer or monitor run."""
 59 | 
 60 |     hourly = "hourly"
 61 |     daily = "daily"
 62 |     weekly = "weekly"
 63 |     monthly = "monthly"
 64 | 
 65 | 
 66 | class FixedCadenceSchedule(NoExtrasBaseModel):
 67 |     """Support for scheduling based on a predefined cadence."""
 68 | 
 69 |     type: Literal["fixed"] = "fixed"
 70 |     cadence: Literal[Cadence.hourly, Cadence.daily, Cadence.weekly, Cadence.monthly] = Field(
 71 |         description="Frequency to run the analyzer or monitor, based on UTC time. The monitor will run at the start of "
 72 |         "the cadence with some SLA depending on the customer tiers.",
 73 |     )
 74 |     exclusionRanges: Optional[List[TimeRange]] = Field(
 75 |         title="ExclusionRanges", description="Ranges of dates during which this Analyzer is NOT run."
 76 |     )
 77 | 
 78 | 
 79 | class Metadata(NoExtrasBaseModel):
 80 |     """Metadata for a top-level objects such as monitors, analyzers, and schema.
 81 | 
 82 |     This object is managed by WhyLabs. Any user-provided values will be ignored on WhyLabs side.
 83 |     """
 84 | 
 85 |     version: int = Field(description="A monotonically increasing numer that indicates the version of the object.")
 86 |     schemaVersion: Optional[int] = Field(
 87 |         None,
 88 |         description="The version of the schema. Currently the accepted value is 1.",
 89 |         le=1,
 90 |         ge=1,
 91 |     )
 92 |     updatedTimestamp: int = Field(
 93 |         description="Last updated timestamp",
 94 |         gt=0,
 95 |     )
 96 |     author: str = Field(
 97 |         description="The author of the change. It can be an API Key ID, a user ID, or a WhyLabs system ID.",
 98 |         max_length=100,
 99 |         regex="[0-9a-zA-Z-_.+]+",
100 |     )
101 |     description: Optional[str] = Field(
102 |         None,
103 |         description="A description of the object",
104 |         max_length=1000,
105 |     )
106 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/config.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import logging
  3 | from enum import Enum
  4 | from typing import Optional
  5 | 
  6 | logging.basicConfig(level=logging.INFO)
  7 | logger = logging.getLogger(__name__)
  8 | 
  9 | 
 10 | class ConfigVars(Enum):
 11 |     WHYLABS_DEFAULT_ORG_ID = 1
 12 |     WHYLABS_DEFAULT_DATASET_ID = 2
 13 |     WHYLABS_API_ENDPOINT = "https://api.whylabsapp.com"
 14 |     # keeping these three for backwards compatibility, but they should be removed in the future
 15 |     ORG_ID = 3
 16 |     DATASET_ID = 4
 17 |     WHYLABS_API_KEY = 5
 18 |     # TODO remove these two and favor only WHYLABS_API_ENDPOINT
 19 |     WHYLABS_HOST = WHYLABS_API_ENDPOINT
 20 |     WHYLABS_PRIVATE_API_ENDPOINT = 6
 21 | 
 22 | 
 23 | class Config:
 24 |     def get_whylabs_api_key(self) -> str:
 25 |         return Validations.require(ConfigVars.WHYLABS_API_KEY)
 26 | 
 27 |     # TODO deprecate this method
 28 |     def get_whylabs_host(self) -> str:
 29 |         logger.warning("this method will be deprecated in future releases. use get_whylabs_api_endpoint instead")
 30 |         whylabs_host = Validations.get(ConfigVars.WHYLABS_HOST)
 31 |         if whylabs_host is not None:
 32 |             logger.warning("WHYLABS_HOST will be deprecated, use WHYLABS_API_ENDPOINT instead.")
 33 |             return whylabs_host
 34 |         return self.get_whylabs_api_endpoint()
 35 | 
 36 |     def get_whylabs_api_endpoint(self) -> str:
 37 |         _private_api_endpoint = Validations.get(ConfigVars.WHYLABS_PRIVATE_API_ENDPOINT)
 38 |         if _private_api_endpoint and isinstance(_private_api_endpoint, str):
 39 |             logger.warning(
 40 |                 f"Using private API endpoint: {_private_api_endpoint}. "
 41 |                 f"WHYLABS_PRIVATE_API_ENDPOINT will be deprecated in the future. "
 42 |                 f"You should use the WHYLABS_API_ENDPOINT for this purpose."
 43 |             )
 44 |             return _private_api_endpoint
 45 |         return Validations.get_or_default(ConfigVars.WHYLABS_API_ENDPOINT)
 46 | 
 47 |     def get_default_org_id(self) -> str:
 48 |         org_id = Validations.get(ConfigVars.WHYLABS_DEFAULT_ORG_ID) or Validations.get(ConfigVars.ORG_ID)
 49 |         if org_id is None:
 50 |             raise TypeError("You need to specify WHYLABS_DEFAULT_ORG_ID")
 51 |         return org_id
 52 | 
 53 |     def get_default_dataset_id(self) -> str:
 54 |         dataset_id = Validations.get(ConfigVars.WHYLABS_DEFAULT_DATASET_ID) or Validations.get(ConfigVars.DATASET_ID)
 55 |         if dataset_id is None:
 56 |             raise TypeError("You need to specify WHYLABS_DEFAULT_DATASET_ID")
 57 |         return dataset_id
 58 | 
 59 | 
 60 | class UserConfig(Config):
 61 |     def __init__(
 62 |         self,
 63 |         api_key: str,
 64 |         org_id: str,
 65 |         dataset_id: str,
 66 |         whylabs_api_endpoint: str = ConfigVars.WHYLABS_API_ENDPOINT.value,
 67 |     ):
 68 |         self.api_key = api_key
 69 |         self.whylabs_api_endpoint = whylabs_api_endpoint
 70 |         self.whylabs_host = self.whylabs_api_endpoint
 71 |         self.org_id = org_id
 72 |         self.dataset_id = dataset_id
 73 | 
 74 |     def get_whylabs_api_key(self) -> str:
 75 |         return self.api_key
 76 | 
 77 |     def get_whylabs_api_endpoint(self) -> str:
 78 |         return self.whylabs_api_endpoint
 79 | 
 80 |     def get_whylabs_host(self) -> str:
 81 |         return self.get_whylabs_api_endpoint()
 82 | 
 83 |     def get_default_org_id(self) -> str:
 84 |         return self.org_id
 85 | 
 86 |     def get_default_dataset_id(self) -> str:
 87 |         return self.dataset_id
 88 | 
 89 | 
 90 | class Validations:
 91 |     @staticmethod
 92 |     def require(env: ConfigVars) -> str:
 93 |         val = os.getenv(env.name)
 94 |         if not val:
 95 |             raise TypeError(f"Missing {env.name} env variable.")
 96 |         return val
 97 | 
 98 |     @staticmethod
 99 |     def get_or_default(env: ConfigVars) -> str:
100 |         val = os.getenv(env.name, env.value)
101 |         if not val:
102 |             raise TypeError(f"No default value for {env.name}")
103 |         return val
104 | 
105 |     @staticmethod
106 |     def get(env: ConfigVars) -> Optional[str]:
107 |         return os.getenv(env.name)
108 | 


--------------------------------------------------------------------------------
/tests/helpers/test_dataset_profiles.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from datetime import datetime
  3 | from unittest.mock import patch, Mock
  4 | 
  5 | import pytest
  6 | 
  7 | from whylabs_toolkit.helpers.dataset_profiles import (
  8 |     delete_all_profiles_for_period,
  9 |     validate_timestamp_in_millis,
 10 |     process_date_input
 11 | )
 12 | 
 13 | ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
 14 | DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
 15 | 
 16 | 
 17 | def test_validate_timestamp_in_millis() -> None:
 18 |     assert validate_timestamp_in_millis(1627233600000) == True
 19 |     assert validate_timestamp_in_millis(-1231214) == False
 20 |     assert validate_timestamp_in_millis("some_string") == False
 21 |     assert validate_timestamp_in_millis(None) == False
 22 |     assert validate_timestamp_in_millis(3.1415) == False
 23 | 
 24 | def test_process_date_input() -> None:
 25 |     input_milliseconds = 1627233600000
 26 |     assert process_date_input(input_milliseconds) == input_milliseconds
 27 |     
 28 |     input_datetime = datetime(2023, 7, 25)
 29 |     expected_milliseconds = int(input_datetime.timestamp() * 1000.0)
 30 |     assert process_date_input(input_datetime) == expected_milliseconds
 31 |     
 32 |     with pytest.raises(ValueError):
 33 |         process_date_input("invalid")
 34 |         
 35 |     with pytest.raises(ValueError):
 36 |         process_date_input(-12498127412)
 37 |     
 38 | 
 39 | ## -- Note:
 40 | # After calling delete_dataset_profiles, it will schedule the deletion,
 41 | # that currently happens hourly, so there is no trivial way to check that on
 42 | # unit tests. For that matter, we will only make the assertion of a successful call, 
 43 | # and the actual deletion logic is tested and maintained by Songbird only
 44 | 
 45 | def test_delete_profile_for_datetime_range():
 46 |     result = delete_all_profiles_for_period(
 47 |         start=datetime(2023,7,5), 
 48 |         end=datetime(2023,7,6), 
 49 |         dataset_id = DATASET_ID,
 50 |         org_id=ORG_ID
 51 |     )
 52 |     
 53 |     assert result.get("id") == f"{ORG_ID}/{DATASET_ID}"
 54 | 
 55 | 
 56 | def test_delete_profiles_for_milliseconds_range():
 57 |     result = delete_all_profiles_for_period(
 58 |         start=int(datetime(2023,7,5).timestamp()*1000.0), 
 59 |         end=int(datetime(2023,7,6).timestamp()*1000.0), 
 60 |         dataset_id = DATASET_ID,
 61 |         org_id= ORG_ID
 62 |     )
 63 |     
 64 |     assert result.get("id") == f"{ORG_ID}/{DATASET_ID}"
 65 | 
 66 | 
 67 | def test_delete_profiles_raises_if_other_format_is_passed():
 68 |     with pytest.raises(ValueError):
 69 |         delete_all_profiles_for_period(
 70 |             start=-123123123123, 
 71 |             end=int(datetime(2023,7,6).timestamp()*1000.0), 
 72 |             dataset_id = DATASET_ID,
 73 |             org_id= ORG_ID
 74 |         )
 75 |     with pytest.raises(ValueError):
 76 |         delete_all_profiles_for_period(
 77 |             start="string_example", 
 78 |             end=int(datetime(2023,7,6).timestamp()*1000.0), 
 79 |             dataset_id = DATASET_ID,
 80 |             org_id = ORG_ID
 81 |         )
 82 | 
 83 | @patch('whylabs_toolkit.helpers.dataset_profiles.get_dataset_profile_api')
 84 | def test_delete_profiles_calls_delete_analyzer_results(mock_get_api):
 85 |     mock_call = Mock()
 86 |     mock_get_api.return_value = mock_call
 87 |     mock_call.delete_dataset_profiles = Mock()
 88 |     mock_call.delete_analyzer_results = Mock()
 89 |     
 90 |     
 91 |     
 92 |     delete_all_profiles_for_period(
 93 |         start = int(datetime(2023,7,5).timestamp()*1000.0),
 94 |         end = int(datetime(2023,7,6).timestamp()*1000.0),
 95 |         dataset_id = DATASET_ID,
 96 |         org_id = ORG_ID
 97 |     )
 98 |     
 99 |     mock_call.delete_dataset_profiles.assert_called_with(
100 |         org_id= ORG_ID,
101 |         dataset_id= DATASET_ID,
102 |         profile_start_timestamp=int(datetime(2023,7,5).timestamp()*1000.0), 
103 |         profile_end_timestamp=int(datetime(2023,7,6).timestamp()*1000.0)
104 |     )
105 |     
106 |     mock_call.delete_analyzer_results.assert_called_with(
107 |         org_id = ORG_ID,
108 |         dataset_id = DATASET_ID,
109 |         start_timestamp=int(datetime(2023,7,5).timestamp()*1000.0), 
110 |         end_timestamp=int(datetime(2023,7,6).timestamp()*1000.0)
111 |     )


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/column_schema.py:
--------------------------------------------------------------------------------
  1 | """Schema definitions."""
  2 | from enum import Enum
  3 | from typing import Dict, List, Optional
  4 | 
  5 | from pydantic import Field
  6 | 
  7 | from whylabs_toolkit.monitor.models.commons import Metadata, NoExtrasBaseModel
  8 | from whylabs_toolkit.monitor.models.segments import Segment
  9 | from whylabs_toolkit.monitor.models.utils import COLUMN_NAME_TYPE
 10 | 
 11 | 
 12 | class ColumnDiscreteness(str, Enum):
 13 |     """Classifying the type."""
 14 | 
 15 |     discrete = "discrete"
 16 |     continuous = "continuous"
 17 | 
 18 | 
 19 | class ColumnDataType(str, Enum):
 20 |     """Options for configuring data type for a column."""
 21 | 
 22 |     integral = "integral"
 23 |     fractional = "fractional"
 24 |     boolean = "bool"
 25 |     string = "string"
 26 |     unknown = "unknown"
 27 |     null = "null"
 28 | 
 29 | 
 30 | class ColumnSchema(NoExtrasBaseModel):
 31 |     """Schema configuration for a column.
 32 | 
 33 |     Should be generated by WhyLabs originally but can be overridden by users.
 34 |     """
 35 | 
 36 |     discreteness: ColumnDiscreteness = Field(
 37 |         description="Whether a column should be discrete or continuous. WhyLabs will detect this by default but"
 38 |         "users can override this value. Changing this columns will change the default grouping (discrete"
 39 |         " columns vs. continuous columns."
 40 |     )
 41 |     dataType: ColumnDataType = Field(
 42 |         description="The data type of the columns. Setting this field affects the default grouping (i.e integral "
 43 |         "columns) and the frontend behavior (what default metrics to offer. It does NOT affect what is "
 44 |         "collected in whylogs (an integral field can still have a frequentItems sketch if it opts in)"
 45 |     )
 46 |     classifier: Optional[str] = Field(
 47 |         "input",
 48 |         description="We can classify these columns into various grouping. Currently we only support 'input' "
 49 |         "and 'output'",
 50 |         max_length=20,
 51 |     )
 52 | 
 53 | 
 54 | class WeightConfig(NoExtrasBaseModel):
 55 |     """Object that specifies column weights.
 56 | 
 57 |     - By default, the weight of a column is None (unspecified)
 58 |     - If the weight is unspecified, the column is EXCLUDED when you perform a filter/sort by weight
 59 |     - For sorting, unweighted column take the LEAST PRECEDENCE, meaning that weight column have higher priorities
 60 |     - They are not hierarchical: if a segment weight config is specified and a column does not have a weight in that
 61 |     config, we will not use any hierarchy to resolve the value. It will be None
 62 |     - Order of unweighted column is undefined.
 63 |     """
 64 | 
 65 |     weights: Dict[COLUMN_NAME_TYPE, float] = Field(description="Weights of the features")  # type: ignore
 66 | 
 67 | 
 68 | class SegmentWeightConfig(WeightConfig):
 69 |     """Object that specifies column weights for a segment."""
 70 | 
 71 |     segment: Optional[Segment] = Field(
 72 |         None,
 73 |         description="Set this value if the weights are for a specific segment. Note that an empty segment (no tags)"
 74 |         "is treated to be the overall segment in this case.",
 75 |     )
 76 | 
 77 | 
 78 | class EntitySchema(NoExtrasBaseModel):
 79 |     """Schema definition of an entity."""
 80 | 
 81 |     metadata: Optional[Metadata] = Field(
 82 |         None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
 83 |     )
 84 |     columns: Dict[COLUMN_NAME_TYPE, ColumnSchema] = Field(  # type: ignore
 85 |         description="Schema configuration for the entity"
 86 |     )
 87 | 
 88 | 
 89 | class EntityWeights(NoExtrasBaseModel):
 90 |     """Entity weight configurations."""
 91 | 
 92 |     metadata: Optional[Metadata] = Field(
 93 |         None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
 94 |     )
 95 |     defaultWeights: Optional[WeightConfig] = Field(
 96 |         None,
 97 |         title="DefaultWeights",
 98 |         description="Optional. Default weights for ALL the segments that don't have an explicit weight specification."
 99 |         "Note that if you specify the segment in this WeightConfig object, it is ignored.",
100 |     )
101 |     segmentWeights: Optional[List[SegmentWeightConfig]] = Field(
102 |         None,
103 |         title="SegmentWeights",
104 |         description="Optional. Segment-specific weights. Use this if you want to override the defaultWeights. Note that"
105 |         "there might be a case where a segment fields are weighted without specifying the default weights",
106 |         max_items=1000,
107 |     )
108 | 


--------------------------------------------------------------------------------
/tests/helpers/test_entity_schema.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | import pytest
  4 | 
  5 | from whylabs_toolkit.helpers.schema import (
  6 |     ColumnsClassifiers,
  7 |     ColumnsDiscreteness,
  8 |     UpdateColumnClassifiers,
  9 |     UpdateColumnsDiscreteness,
 10 |     UpdateEntityDataTypes
 11 | )
 12 | from whylabs_toolkit.monitor.models.column_schema import ColumnDataType
 13 | 
 14 | ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
 15 | DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
 16 | 
 17 | 
 18 | def test_change_columns_input_output() -> None:
 19 |     classifiers = ColumnsClassifiers(
 20 |         inputs=["temperature"],
 21 |         outputs=["prediction_temperature"]
 22 |     )
 23 | 
 24 |     update_entity = UpdateColumnClassifiers(
 25 |         classifiers=classifiers,
 26 |         dataset_id=DATASET_ID,
 27 |         org_id=ORG_ID
 28 |     )
 29 | 
 30 |     update_entity.update()
 31 | 
 32 |     assert update_entity.current_entity_schema["columns"]["temperature"]["classifier"] == "input"
 33 |     assert update_entity.current_entity_schema["columns"]["prediction_temperature"]["classifier"] == "output"
 34 | 
 35 |     correct_classifiers = ColumnsClassifiers(outputs=["temperature"])
 36 | 
 37 |     update_entity = UpdateColumnClassifiers(
 38 |         classifiers=correct_classifiers,
 39 |         dataset_id=DATASET_ID,
 40 |         org_id=ORG_ID
 41 |     )
 42 | 
 43 |     update_entity.update()
 44 | 
 45 |     assert update_entity.current_entity_schema["columns"]["temperature"]["classifier"] == "output"
 46 | 
 47 | 
 48 | def test_change_columns_discreteness() -> None:
 49 |     columns = ColumnsDiscreteness(
 50 |         discrete=["prediction_temperature"],
 51 |         continuous=["temperature"]
 52 |     )
 53 | 
 54 |     update_discreteness = UpdateColumnsDiscreteness(
 55 |         dataset_id=DATASET_ID,
 56 |         columns=columns,
 57 |         org_id=ORG_ID
 58 |     )
 59 | 
 60 |     update_discreteness.update()
 61 | 
 62 |     assert update_discreteness.current_entity_schema["columns"]["temperature"]["discreteness"] == "continuous"
 63 |     assert update_discreteness.current_entity_schema["columns"]["prediction_temperature"][
 64 |                "discreteness"] == "discrete"
 65 | 
 66 |     columns = ColumnsDiscreteness(
 67 |         discrete=["temperature"],
 68 |         continuous=["prediction_temperature"]
 69 |     )
 70 | 
 71 |     update_discreteness = UpdateColumnsDiscreteness(
 72 |         dataset_id=DATASET_ID,
 73 |         columns=columns,
 74 |         org_id=ORG_ID
 75 |     )
 76 | 
 77 |     update_discreteness.update()
 78 | 
 79 |     assert update_discreteness.current_entity_schema["columns"]["temperature"]["discreteness"] == "discrete"
 80 |     assert update_discreteness.current_entity_schema["columns"]["prediction_temperature"][
 81 |                "discreteness"] == "continuous"
 82 | 
 83 | 
 84 | def test_same_column_on_both_parameters_should_raise():
 85 |     columns = ColumnsDiscreteness(
 86 |         discrete=["temperature"],
 87 |         continuous=["temperature"]
 88 |     )
 89 | 
 90 |     update_discreteness = UpdateColumnsDiscreteness(
 91 |         dataset_id=DATASET_ID,
 92 |         columns=columns,
 93 |         org_id=ORG_ID
 94 |     )
 95 |     with pytest.raises(ValueError):
 96 |         update_discreteness.update()
 97 | 
 98 |     classifiers = ColumnsClassifiers(
 99 |         inputs=["temperature"],
100 |         outputs=["temperature"]
101 |     )
102 | 
103 |     update_entity = UpdateColumnClassifiers(
104 |         classifiers=classifiers,
105 |         dataset_id=DATASET_ID,
106 |         org_id=ORG_ID
107 |     )
108 |     with pytest.raises(ValueError):
109 |         update_entity.update()
110 | 
111 | 
112 | def test_change_columns_schema():
113 |     columns_schema = {"temperature": ColumnDataType.boolean}
114 | 
115 |     update_data_types = UpdateEntityDataTypes(
116 |         dataset_id=DATASET_ID,
117 |         columns_schema=columns_schema,
118 |         org_id=ORG_ID
119 |     )
120 | 
121 |     update_data_types.update()
122 | 
123 |     assert update_data_types.current_entity_schema["columns"]["temperature"]["data_type"] == "bool"
124 | 
125 |     columns_schema = {"temperature": ColumnDataType.fractional}
126 | 
127 |     update_data_types = UpdateEntityDataTypes(
128 |         dataset_id=DATASET_ID,
129 |         columns_schema=columns_schema,
130 |         org_id=ORG_ID
131 |     )
132 | 
133 |     update_data_types.update()
134 | 
135 |     assert update_data_types.current_entity_schema["columns"]["temperature"]["data_type"] == "fractional"
136 | 
137 | 
138 | def test_wrong_configuration_on_data_types():
139 |     # If the specified column does not exist
140 |     columns_schema = {"some_weird_column": ColumnDataType.boolean}
141 | 
142 |     update_data_types = UpdateEntityDataTypes(
143 |         dataset_id=DATASET_ID,
144 |         columns_schema=columns_schema,
145 |         org_id=ORG_ID
146 |     )
147 | 
148 |     # Nothing gets updated
149 | 
150 |     update_data_types.update()
151 | 
152 |     # If a datatype doesn't exist
153 |     columns_schema = {"temperature": "booleans"}
154 | 
155 |     update_data_types = UpdateEntityDataTypes(
156 |         dataset_id=DATASET_ID,
157 |         columns_schema=columns_schema,
158 |         org_id=ORG_ID
159 |     )
160 | 
161 |     # It throws an exception
162 |     with pytest.raises(ValueError):
163 |         update_data_types.update()
164 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/analyzer/analyzer.py:
--------------------------------------------------------------------------------
  1 | """Schema for analyses."""
  2 | from typing import Any, Dict, List, Optional, Union
  3 | 
  4 | from pydantic import BaseModel, Field, constr, validator
  5 | 
  6 | from whylabs_toolkit.monitor.models.commons import NoExtrasBaseModel
  7 | 
  8 | from ..commons import CronSchedule, FixedCadenceSchedule, Metadata
  9 | from ..utils import anyOf_to_oneOf, duration_field
 10 | from .algorithms import (
 11 |     ColumnListChangeConfig,
 12 |     ComparisonConfig,
 13 |     ListComparisonConfig,
 14 |     FrequentStringComparisonConfig,
 15 |     DiffConfig,
 16 |     DriftConfig,
 17 |     ExperimentalConfig,
 18 |     FixedThresholdsConfig,
 19 |     SeasonalConfig,
 20 |     StddevConfig,
 21 |     ConjunctionConfig,
 22 |     DisjunctionConfig,
 23 | )
 24 | from .targets import ColumnMatrix, DatasetMatrix
 25 | from whylabs_toolkit.helpers.cron_validators import validate_cron_expression
 26 | 
 27 | 
 28 | class Analyzer(NoExtrasBaseModel):
 29 |     """Configuration for running an analysis.
 30 | 
 31 |     An analysis targets a metric (note that a metric could be a complex object) for one or multiple fields in
 32 |     one or multiple segments. The output is a list of 'anomalies' that might show issues with data.
 33 |     """
 34 | 
 35 |     metadata: Optional[Metadata] = Field(
 36 |         None, description="WhyLabs-managed metadata. This is to track various metadata for auditing."
 37 |     )
 38 | 
 39 |     id: str = Field(
 40 |         None,
 41 |         description="A unique, human readable ID for an analyzer. "
 42 |         "Can only contain alpha numeric characters, underscores and dashes",
 43 |         min_length=10,
 44 |         max_length=128,
 45 |         regex="[0-9a-zA-Z\\-_]+",
 46 |     )
 47 |     displayName: Optional[str] = Field(
 48 |         None,
 49 |         id="DisplayName",
 50 |         description="A display name for the analyzer if view through WhyLabs UI. Can only contain dashes, underscores,"
 51 |         "spaces, and alphanumeric characters",
 52 |         min_length=10,
 53 |         max_length=256,
 54 |         regex="[0-9a-zA-Z \\-_]+",
 55 |     )
 56 |     tags: Optional[  # type: ignore
 57 |         List[constr(min_length=3, max_length=32, regex="[0-9a-zA-Z\\-_]")]  # type: ignore
 58 |     ] = Field(  # noqa F722
 59 |         None, description="A list of tags that are associated with the analyzer."
 60 |     )
 61 | 
 62 |     schedule: Optional[Union[FixedCadenceSchedule, CronSchedule]] = Field(
 63 |         None,
 64 |         description="A schedule for running the analyzer. If not set, the analyzer's considered disabled",
 65 |     )
 66 |     disabled: Optional[bool] = Field(
 67 |         None,
 68 |         description="Whether the analyzer is disabled. "
 69 |         "This allows user to keep the configuration"
 70 |         "around without having to delete the analyzer config",
 71 |     )
 72 |     disableTargetRollup: Optional[bool] = Field(
 73 |         None,
 74 |         description="For customers with individual profile storage enabled on their account (contact us), this "
 75 |         "allows a user to monitor individual profiles without rolling them up. When enabled, analysis "
 76 |         "will be timestamped 1:1 with the profile's dataset timestamp rather than being truncated "
 77 |         "to the dataset granularity. ",
 78 |     )
 79 |     targetMatrix: Union[ColumnMatrix, DatasetMatrix] = Field(
 80 |         description="A matrix for possible locations of the target",
 81 |         discriminator="type",
 82 |     )
 83 |     dataReadinessDuration: Optional[str] = duration_field(
 84 |         title="DataReadinessDuration",
 85 |         description="ISO 8610 duration format. The duration determines how fast data is ready for the monitor. For "
 86 |         "example, if your pipeline takes 2 days to deliver profiles to WhyLabs, the value should be"
 87 |         "P2D. Note that this value will be used to evaluate missing data as well",
 88 |     )
 89 |     batchCoolDownPeriod: Optional[str] = duration_field(
 90 |         title="BatchCoolDownPeriod",
 91 |         description="ISO 8610 duration format. Specifies the duration that the monitor will wait from the last time"
 92 |         "a profile arrives Any batch involved in the calculation must have received the last profile by "
 93 |         "the duration.",
 94 |     )
 95 |     backfillGracePeriodDuration: Optional[str] = duration_field(
 96 |         title="BackfillGracePeriodDuration",
 97 |         description="ISO 8610 duration format. How far back an analyzer will attempt to backfill late data. Note that "
 98 |         "we will only backfill batches not previously analyzed. If the batch was already analyzed, "
 99 |         "even with partial data, the backfill will ignore the new data unless you trigger an explicit "
100 |         "backfill request. We support 48 hours for hourly data, 30 days for daily data, and 6 months for "
101 |         "monthly data.",
102 |     )
103 | 
104 |     @validator("schedule", pre=True, always=True)
105 |     def validate_schedule(
106 |         cls, v: Optional[Union[FixedCadenceSchedule, CronSchedule]]
107 |     ) -> Optional[Union[FixedCadenceSchedule, CronSchedule]]:
108 |         """Validate the schedule."""
109 |         if isinstance(v, CronSchedule) and not validate_cron_expression(v.cron):
110 |             raise ValueError("CronSchedule must be no less granular than 1 hour and must have 5 fields.")
111 |         return v
112 | 
113 |     # NOT YET IMPLEMENTED:
114 |     # ExperimentalConfig,
115 |     # ColumnListChangeConfig,
116 | 
117 |     config: Union[
118 |         DiffConfig,
119 |         FixedThresholdsConfig,
120 |         ListComparisonConfig,
121 |         FrequentStringComparisonConfig,
122 |         StddevConfig,
123 |         DriftConfig,
124 |         ComparisonConfig,
125 |         SeasonalConfig,
126 |         ConjunctionConfig,
127 |         DisjunctionConfig,
128 |     ] = Field(description="The configuration map of the analyzer", discriminator="type")
129 | 
130 |     class Config:
131 |         """Updates JSON schema anyOf to oneOf."""
132 | 
133 |         # noinspection PyUnusedLocal
134 |         @staticmethod
135 |         def schema_extra(schema: Dict[str, Any], model: BaseModel) -> None:
136 |             """Update specific fields here (for Union type, specifically)."""
137 |             anyOf_to_oneOf(schema, "config")
138 |             anyOf_to_oneOf(schema, "targetMatrix")
139 | 


--------------------------------------------------------------------------------
/tests/helpers/test_monitor_helpers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from typing import List, Dict
  3 | 
  4 | from whylabs_toolkit.helpers.monitor_helpers import (
  5 |     delete_monitor,
  6 |     get_model_granularity,
  7 |     get_monitor_config,
  8 |     get_analyzer_ids,
  9 |     get_monitor,
 10 |     list_monitors
 11 | )
 12 | from whylabs_toolkit.helpers.utils import get_monitor_api
 13 | from whylabs_toolkit.utils.granularity import Granularity
 14 | 
 15 | 
 16 | ORG_ID = os.environ["WHYLABS_DEFAULT_ORG_ID"]
 17 | DATASET_ID = os.environ["WHYLABS_DEFAULT_DATASET_ID"]
 18 | MONITOR_ID = os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
 19 | ANALYZER_ID = os.environ["WHYLABS_DEFAULT_ANALYZER_ID"]
 20 | MONITOR_BODY = {
 21 |     "id": MONITOR_ID, "analyzerIds": [ANALYZER_ID], 
 22 |     "schedule": {"type": "immediate"}, 
 23 |     "mode": {"type": "DIGEST"}, "disabled": False, "actions": [], 
 24 |     "metadata": {
 25 |         "schemaVersion": 1, 
 26 |         "author": "system", 
 27 |         "updatedTimestamp": 1671824015981, 
 28 |         "version": 1
 29 |         }
 30 | }
 31 | ANALYZER_BODY = {
 32 |     "config": {
 33 |         "metric": "median",
 34 |         "type": "stddev",
 35 |         "factor": 2.0,
 36 |         "minBatchSize": 1,
 37 |         "baseline": {
 38 |           "type": "TrailingWindow",
 39 |           "size": 14
 40 |         }
 41 |     },
 42 |     "id": ANALYZER_ID, 
 43 |     "schedule": {"type": "fixed", "cadence": "monthly"}, 
 44 |     "targetMatrix": {"include": ["*"], "segments": [], "type": "column"}, 
 45 |     "metadata": {
 46 |         "schemaVersion": 1, 
 47 |         "author": "system", 
 48 |         "updatedTimestamp": 1671824015105, 
 49 |         "version": 1
 50 |     }
 51 | }
 52 | 
 53 | 
 54 | class BaseTestMonitor:
 55 |     @classmethod
 56 |     def setup_class(cls) -> None:
 57 |         api = get_monitor_api()
 58 |         api.put_monitor(
 59 |             org_id=ORG_ID,
 60 |             dataset_id=DATASET_ID,
 61 |             monitor_id=MONITOR_ID,
 62 |             body=MONITOR_BODY  # type: ignore
 63 |         )
 64 | 
 65 |         api.put_analyzer(
 66 |             org_id=ORG_ID,
 67 |             dataset_id=DATASET_ID,
 68 |             analyzer_id=ANALYZER_ID,
 69 |             body=ANALYZER_BODY  # type: ignore
 70 |         )
 71 | 
 72 |     @classmethod
 73 |     def teardown_class(cls) -> None:
 74 |         delete_monitor(
 75 |             org_id=ORG_ID,
 76 |             dataset_id=DATASET_ID,
 77 |             monitor_id=MONITOR_ID
 78 |         )
 79 | 
 80 | 
 81 | class TestDeleteMonitor(BaseTestMonitor):
 82 |     @classmethod
 83 |     def teardown_class(cls) -> None:
 84 |         pass
 85 | 
 86 |     def test_get_analyzer_ids(self) -> None:
 87 |         analyzer_ids = get_analyzer_ids(
 88 |             org_id=ORG_ID, 
 89 |             dataset_id = DATASET_ID, 
 90 |             monitor_id= MONITOR_ID,     
 91 |         )
 92 |         assert analyzer_ids is not None
 93 |         assert isinstance(analyzer_ids, List)
 94 |         for analyzer in analyzer_ids:
 95 |             assert analyzer == f"{MONITOR_ID}-analyzer"
 96 |     
 97 |     def test_get_analyzer_ids_that_dont_exist(self) -> None:
 98 |         analyzer_ids = get_analyzer_ids(
 99 |             org_id=ORG_ID, 
100 |             dataset_id = DATASET_ID, 
101 |             monitor_id= "dont_exist",     
102 |         )
103 |         assert analyzer_ids is None
104 |         
105 |         analyzer_ids = get_analyzer_ids(
106 |             org_id="wrong_org", 
107 |             dataset_id = DATASET_ID, 
108 |             monitor_id= MONITOR_ID,     
109 |         )
110 |         
111 |         assert analyzer_ids is None
112 |         
113 |         analyzer_ids = get_analyzer_ids(
114 |             org_id=ORG_ID, 
115 |             dataset_id = "model-X", 
116 |             monitor_id= MONITOR_ID,     
117 |         )
118 |         
119 |         assert analyzer_ids is None
120 |         
121 | 
122 |     def test_get_monitor_config(self) -> None:
123 |         monitor_config = get_monitor_config(
124 |             org_id=ORG_ID, 
125 |             dataset_id = DATASET_ID, 
126 |         )
127 |         
128 |         assert monitor_config is not None
129 |         assert isinstance(monitor_config, Dict)
130 |         for key in monitor_config.keys():
131 |             assert key in ['orgId', 'datasetId', 'granularity', 'metadata', 'allowPartialTargetBatches', 'analyzers', 'monitors']
132 |     
133 |     def test_get_monitor(self) -> None:
134 |         monitor = get_monitor(
135 |             monitor_id=MONITOR_ID,
136 |             dataset_id=DATASET_ID,
137 |             org_id=ORG_ID
138 |         )
139 |         
140 |         assert monitor is not None
141 |         assert isinstance(monitor, Dict)
142 |         
143 |         for key in monitor.keys():
144 |             assert key in ['id', 'analyzerIds', 'schedule', 'mode', 'disabled', 'actions', 'metadata']
145 |     
146 |     
147 |     def test_get_monitor_with_wrong_configs(self) -> None:
148 |         monitor = get_monitor(
149 |             monitor_id="fake-monitor",
150 |             dataset_id=DATASET_ID,
151 |             org_id=ORG_ID
152 |         )
153 |         assert monitor is None
154 |     
155 |         monitor = get_monitor(
156 |             monitor_id=MONITOR_ID,
157 |             dataset_id="fake-dataset-id",
158 |             org_id=ORG_ID
159 |         )
160 |         
161 |         assert monitor is None    
162 | 
163 |     def test_get_granularity(self) -> None:
164 |         granularity = get_model_granularity(org_id=ORG_ID, dataset_id=DATASET_ID)
165 |         assert granularity == Granularity.monthly
166 | 
167 | 
168 |     def test_delete_monitor(self) -> None:
169 |         delete_monitor(
170 |             org_id=ORG_ID,
171 |             dataset_id=DATASET_ID,
172 |             monitor_id=MONITOR_ID
173 |         )
174 | 
175 |         # Checking both monitor and analyzers were deleted
176 | 
177 |         monitor_config = get_monitor_config(
178 |             org_id=ORG_ID,
179 |             dataset_id=DATASET_ID
180 |         )
181 | 
182 |         for monitor in monitor_config["monitors"]:
183 |             assert MONITOR_ID not in monitor["id"]
184 | 
185 |         for analyzer in monitor_config["analyzers"]:
186 |             assert ANALYZER_ID not in analyzer["id"]
187 | 
188 | 
189 | class TestListMonitor(BaseTestMonitor):
190 |     def test_list_monitors(self) -> None:
191 |         monitors = list_monitors(org_id=ORG_ID, dataset_id=DATASET_ID)
192 | 
193 |         assert monitors == [MONITOR_ID]
194 | 
195 |     def test_list_monitors_with_wrong_configs(self) -> None:
196 |         monitors = list_monitors(org_id=ORG_ID, dataset_id="model-doesnt-exist")
197 | 
198 |         assert monitors == []


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/schema.py:
--------------------------------------------------------------------------------
  1 | from abc import ABC, abstractmethod
  2 | from dataclasses import dataclass, field
  3 | from typing import Any, Dict, List, Optional
  4 | 
  5 | from whylabs_client.models import EntitySchema
  6 | 
  7 | from whylabs_toolkit.helpers.config import Config
  8 | from whylabs_toolkit.helpers.utils import get_models_api
  9 | from whylabs_toolkit.monitor.models.column_schema import ColumnDataType
 10 | 
 11 | BASE_ENDPOINT = "https://api.whylabsapp.com"
 12 | 
 13 | 
 14 | @dataclass
 15 | class ColumnsClassifiers:
 16 |     inputs: List[str] = field(default_factory=list)  # type: ignore
 17 |     outputs: List[str] = field(default_factory=list)  # type: ignore
 18 | 
 19 | 
 20 | @dataclass
 21 | class ColumnsDiscreteness:
 22 |     discrete: List[str] = field(default_factory=list)  # type: ignore
 23 |     continuous: List[str] = field(default_factory=list)  # type: ignore
 24 | 
 25 | 
 26 | class UpdateEntity(ABC):
 27 |     def __init__(self, dataset_id: Optional[str] = None, org_id: Optional[str] = None, config: Config = Config()):
 28 |         self.dataset_id = dataset_id or Config().get_default_dataset_id()
 29 |         self.org_id = org_id or Config().get_default_org_id()
 30 |         self.api = get_models_api(config=config)
 31 | 
 32 |     def _get_entity_schema(self) -> Any:
 33 |         entity_schema = self.api.get_entity_schema(org_id=self.org_id, dataset_id=self.dataset_id)
 34 |         return entity_schema
 35 | 
 36 |     def _put_entity_schema(self, schema: EntitySchema) -> None:
 37 |         self.api.put_entity_schema(org_id=self.org_id, dataset_id=self.dataset_id, entity_schema=schema)
 38 | 
 39 |     def _get_current_entity_schema(self) -> None:
 40 |         self.current_entity_schema = self._get_entity_schema()
 41 |         self.columns_dict = self.current_entity_schema["columns"]
 42 | 
 43 |     @abstractmethod
 44 |     def _validate_input(self) -> None:
 45 |         pass
 46 | 
 47 |     @abstractmethod
 48 |     def _update_entity_schema(self) -> None:
 49 |         pass
 50 | 
 51 |     def _put_updated_entity_schema(self) -> None:
 52 |         metadata_dict = self.current_entity_schema["metadata"]
 53 |         entity_schema_dict = EntitySchema(columns=self.columns_dict, metadata=metadata_dict)
 54 |         self._put_entity_schema(schema=entity_schema_dict)  # type: ignore
 55 | 
 56 |     def update(self) -> None:
 57 |         self._validate_input()
 58 |         self._get_current_entity_schema()
 59 |         self._update_entity_schema()
 60 |         self._put_updated_entity_schema()
 61 | 
 62 | 
 63 | class UpdateColumnClassifiers(UpdateEntity):
 64 |     def __init__(self, classifiers: ColumnsClassifiers, org_id: Optional[str] = None, dataset_id: Optional[str] = None):
 65 |         super().__init__(dataset_id, org_id)
 66 |         self.classifiers = classifiers
 67 | 
 68 |     def _validate_input(self) -> None:
 69 |         if self.classifiers.inputs == [] and self.classifiers.outputs == []:
 70 |             raise ValueError("You must define either input or output features to use this function.")
 71 |         same_list = [item for item in self.classifiers.inputs if item in self.classifiers.outputs]
 72 |         if same_list:
 73 |             raise ValueError(f"Column {same_list[0]} must either be input or output.")
 74 | 
 75 |     def _update_entity_schema(self) -> Any:
 76 |         for key in self.columns_dict.keys():
 77 |             if key in self.classifiers.inputs and self.columns_dict[key]["classifier"] != "input":
 78 |                 self.columns_dict[key].classifier = "input"
 79 |             elif key in self.classifiers.outputs and self.columns_dict[key]["classifier"] != "output":
 80 |                 self.columns_dict[key].classifier = "output"
 81 | 
 82 | 
 83 | class UpdateEntityDataTypes(UpdateEntity):
 84 |     """
 85 |     Update data types on each column of the dataset
 86 | 
 87 |     Arguments
 88 |     ----
 89 |     columns_schema: Dict[str, ColumnDataType]
 90 |         The keys are column names and the values are the
 91 |         desired data_types, as the example below shows
 92 | 
 93 |     ```python
 94 |     columns_schema = {
 95 |         "column_1": ColumnDataType.fractional,
 96 |         "column_2": ColumnDataType.boolean,
 97 |         "column_3": ColumnDataType.string,
 98 |     }
 99 |     ```
100 | 
101 |     These are the currently supported data types:
102 |     ---
103 |     - integral
104 |     - fractional
105 |     - bool
106 |     - string
107 |     - unknown
108 |     - null
109 |     ---
110 |     """
111 | 
112 |     def __init__(
113 |         self, columns_schema: Dict[str, ColumnDataType], org_id: Optional[str] = None, dataset_id: Optional[str] = None
114 |     ):
115 |         super().__init__(dataset_id, org_id)
116 |         self.columns_schema = columns_schema
117 | 
118 |     def _validate_input(self) -> None:
119 |         for data_type in self.columns_schema.values():
120 |             if not isinstance(data_type, ColumnDataType):
121 |                 raise ValueError(
122 |                     f"{data_type} is not an accepted data type! Refer to this functions help to learn more."
123 |                 )
124 | 
125 |     def _update_entity_schema(self) -> None:
126 |         for column, data_type in self.columns_schema.items():
127 |             if column in self.columns_dict.keys() and self.columns_dict[column]["data_type"] != data_type.value:
128 |                 self.columns_dict[column].data_type = self.columns_schema[column].value
129 | 
130 | 
131 | class UpdateColumnsDiscreteness(UpdateEntity):
132 |     def __init__(
133 |         self,
134 |         columns: ColumnsDiscreteness,
135 |         org_id: Optional[str] = None,
136 |         dataset_id: Optional[str] = None,
137 |     ):
138 |         super().__init__(dataset_id, org_id)
139 |         self.columns = columns
140 | 
141 |     def _validate_input(self) -> None:
142 |         if self.columns.discrete == [] and self.columns.continuous == []:
143 |             raise ValueError("You must define either discrete or continuous columns to use this.")
144 | 
145 |         same_list = [item for item in self.columns.discrete if item in self.columns.continuous]
146 |         if same_list:
147 |             raise ValueError(f"Column {same_list[0]} must either be discrete or continuous.")
148 | 
149 |     def _update_entity_schema(self) -> Any:
150 |         for key in self.columns_dict.keys():
151 |             if key in self.columns.discrete and self.columns_dict[key]["discreteness"] != "discrete":
152 |                 self.columns_dict[key].discreteness = "discrete"
153 |             elif key in self.columns.continuous and self.columns_dict[key]["discreteness"] != "continuous":
154 |                 self.columns_dict[key].discreteness = "continuous"
155 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/helpers/monitor_helpers.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | from typing import Any, List, Optional
  3 | 
  4 | from whylabs_client.exceptions import ApiValueError
  5 | from whylabs_client.exceptions import NotFoundException, ForbiddenException
  6 | 
  7 | from whylabs_toolkit.helpers.config import Config
  8 | from whylabs_toolkit.helpers.utils import get_monitor_api, get_models_api
  9 | from whylabs_toolkit.utils.granularity import Granularity
 10 | 
 11 | 
 12 | BASE_ENDPOINT = "https://api.whylabsapp.com"
 13 | logging.basicConfig(level=logging.INFO)
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | 
 17 | def get_monitor_config(org_id: str, dataset_id: str, config: Config = Config()) -> Any:
 18 |     api = get_monitor_api(config=config)
 19 |     try:
 20 |         monitor_config = api.get_monitor_config_v3(org_id=org_id, dataset_id=dataset_id)
 21 |         logger.info(f"Found monitor config for {dataset_id}")
 22 |         return monitor_config
 23 |     except NotFoundException:
 24 |         logger.info(f"Could not find a monitor config for {dataset_id}")
 25 |         return None
 26 |     except ForbiddenException as e:
 27 |         logger.warning(
 28 |             f"You don't have access to monitor config for {dataset_id}. Did you set a correct WHYLABS_API_KEY?"
 29 |         )
 30 |         raise e
 31 | 
 32 | 
 33 | def get_monitor(
 34 |     monitor_id: str, org_id: Optional[str] = None, dataset_id: Optional[str] = None, config: Config = Config()
 35 | ) -> Any:
 36 |     org_id = org_id or config.get_default_org_id()
 37 |     dataset_id = dataset_id or config.get_default_dataset_id()
 38 | 
 39 |     api = get_monitor_api(config=config)
 40 |     try:
 41 |         monitor = api.get_monitor(org_id=org_id, dataset_id=dataset_id, monitor_id=monitor_id)
 42 |         return monitor
 43 |     except NotFoundException:
 44 |         logger.info(f"Didn't find a monitor with id {monitor_id} for {dataset_id}. Creating a new one...")
 45 |         return None
 46 |     except ForbiddenException as e:
 47 |         logger.warning(
 48 |             f"You don't have access to monitor {monitor_id} for {dataset_id}. Did you set a correct WHYLABS_API_KEY?"
 49 |         )
 50 |         raise e
 51 | 
 52 | 
 53 | def get_analyzer_ids(
 54 |     monitor_id: str, org_id: Optional[str] = None, dataset_id: Optional[str] = None, config: Config = Config()
 55 | ) -> Any:
 56 |     org_id = org_id or config.get_default_org_id()
 57 |     dataset_id = dataset_id or config.get_default_dataset_id()
 58 |     try:
 59 |         monitor_config = get_monitor_config(org_id=org_id, dataset_id=dataset_id, config=config)
 60 | 
 61 |         if monitor_config:
 62 |             for item in monitor_config.get("monitors"):
 63 |                 if item["id"] == monitor_id:
 64 |                     resp = item["analyzerIds"]
 65 |                     return resp
 66 | 
 67 |     except NotFoundException:
 68 |         logger.warning(f"Could not find analyzer IDs for {org_id}, {dataset_id}, {monitor_id}")
 69 |         return None
 70 | 
 71 |     except ForbiddenException:
 72 |         logger.error(f"Could not get analyzer id's due to a ForbiddenException, did you set a correct WHYLABS_API_KEY?")
 73 |         return None
 74 | 
 75 |     except Exception as e:
 76 |         raise e
 77 | 
 78 | 
 79 | def get_analyzers(
 80 |     monitor_id: str, org_id: Optional[str] = None, dataset_id: Optional[str] = None, config: Config = Config()
 81 | ) -> Optional[List[Any]]:
 82 |     org_id = org_id or config.get_default_org_id()
 83 |     dataset_id = dataset_id or config.get_default_dataset_id()
 84 |     api = get_monitor_api(config=config)
 85 |     analyzers = []
 86 |     analyzer_ids = get_analyzer_ids(org_id=org_id, dataset_id=dataset_id, monitor_id=monitor_id, config=config)
 87 |     if analyzer_ids:
 88 |         for analyzer in analyzer_ids:
 89 |             analyzers.append(api.get_analyzer(org_id=org_id, dataset_id=dataset_id, analyzer_id=analyzer))
 90 |         return analyzers
 91 |     else:
 92 |         return None
 93 | 
 94 | 
 95 | def time_period_to_granularity(time_period: str) -> Granularity:
 96 |     if time_period == "PT1H":
 97 |         return Granularity.hourly
 98 | 
 99 |     if time_period == "P1W":
100 |         return Granularity.weekly
101 | 
102 |     if time_period == "P1M":
103 |         return Granularity.monthly
104 | 
105 |     return Granularity.daily
106 | 
107 | 
108 | def get_model_granularity(
109 |     org_id: Optional[str] = None, dataset_id: Optional[str] = None, config: Config = Config()
110 | ) -> Optional[Granularity]:
111 |     org_id = org_id or config.get_default_org_id()
112 |     dataset_id = dataset_id or config.get_default_dataset_id()
113 | 
114 |     api = get_models_api(config=config)
115 |     model_meta = api.get_model(org_id=org_id, model_id=dataset_id)
116 | 
117 |     if model_meta:
118 |         return time_period_to_granularity(model_meta["time_period"])
119 |     return None
120 | 
121 | 
122 | def delete_monitor(
123 |     monitor_id: str, org_id: Optional[str] = None, dataset_id: Optional[str] = None, config: Config = Config()
124 | ) -> None:
125 |     org_id = org_id or config.get_default_org_id()
126 |     dataset_id = dataset_id or config.get_default_dataset_id()
127 | 
128 |     api = get_monitor_api(config=config)
129 |     try:
130 |         analyzer_ids = get_analyzer_ids(org_id=org_id, dataset_id=dataset_id, monitor_id=monitor_id, config=config)
131 |         if analyzer_ids is None:
132 |             return
133 |         for analyzer_id in analyzer_ids:
134 |             resp_analyzer = api.delete_analyzer(org_id=org_id, dataset_id=dataset_id, analyzer_id=analyzer_id)
135 |             logger.debug(f"Deleted analyzer with Resp:{resp_analyzer}")
136 |         resp_monitor = api.delete_monitor(org_id=org_id, dataset_id=dataset_id, monitor_id=monitor_id)
137 |         logger.debug(f"Deleted monitor with Resp:{resp_monitor}")
138 |     except ApiValueError as e:
139 |         logger.error(f"Error deleting monitor {monitor_id}: {e.msg}")  # type: ignore
140 |         raise e
141 | 
142 | 
143 | def list_monitors(org_id: Optional[str], dataset_id: Optional[str], config: Config = Config()) -> List[str]:
144 |     org_id = org_id or config.get_default_org_id()
145 |     dataset_id = dataset_id or config.get_default_dataset_id()
146 | 
147 |     try:
148 |         monitors = get_monitor_config(org_id=org_id, dataset_id=dataset_id)
149 |         if monitors is not None:
150 |             return [monitor["id"] for monitor in monitors.get("monitors")]
151 |         else:
152 |             logger.info(f"No monitors found for {dataset_id}")
153 |             return []
154 |     except ForbiddenException as e:
155 |         logger.warning(
156 |             f"You don't have access to monitor list for {dataset_id}. Did you set a correct WHYLABS_API_KEY?"
157 |         )
158 |         raise e
159 |     except Exception as e:
160 |         logger.error(f"Error listing monitors for {dataset_id}: {e}")
161 |         raise e
162 | 


--------------------------------------------------------------------------------
/examples/presets.md:
--------------------------------------------------------------------------------
  1 | # WhyLabs Monitors Presets
  2 | 
  3 | In this section we will present some existing presets available on the WhyLabs platform, using `whylabs-toolkit`.
  4 | 
  5 | On a general line, the configuration workflow will always consist of: 
  6 | 
  7 | 1. Create a `MonitorSetup` object
  8 | ```python
  9 | monitor_setup = MonitorSetup(monitor_id=...)
 10 | ```
 11 | 2. Add a config
 12 | ```python
 13 | monitor_setup.config = SomeConfig(...)
 14 | monitor_setup.apply()
 15 | ```
 16 | 3. Save it to WhyLabs with `MonitorManager`
 17 | ```python
 18 | manager = MonitorManager(monitor_setup)
 19 | manager.save()
 20 | ```
 21 | 
 22 | To understand what other options are available to be set, please check the [Manager Docs](../whylabs_toolkit/monitor/manager/README.md).
 23 | 
 24 | ## Drift
 25 | 
 26 | ### Discrete inputs
 27 | ```python
 28 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
 29 | from whylabs_toolkit.monitor.models import *
 30 | 
 31 | monitor_setup = MonitorSetup(monitor_id="drift_with_discrete_inputs")
 32 | 
 33 | monitor_setup.config = DriftConfig(
 34 |     metric = ComplexMetrics.frequent_items,
 35 |     baseline = TrailingWindowBaseline(size=7),
 36 | )
 37 | 
 38 | monitor_setup.set_target_columns(columns=["group:discrete"])
 39 | monitor_setup.exclude_target_columns(columns=["group:output"])
 40 | 
 41 | monitor_setup.apply()
 42 | 
 43 | manager = MonitorManager(setup=monitor_setup)
 44 | manager.save()
 45 | ```
 46 | 
 47 | ### Continuous inputs
 48 | ```python
 49 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
 50 | from whylabs_toolkit.monitor.models import *
 51 | 
 52 | monitor_setup = MonitorSetup(monitor_id="drift_with_continuous_inputs")
 53 | 
 54 | monitor_setup.config = DriftConfig(
 55 |     metric = ComplexMetrics.histogram,
 56 |     baseline = TrailingWindowBaseline(size=7),
 57 | )
 58 | 
 59 | monitor_setup.set_target_columns(columns=["group:continuous"])
 60 | monitor_setup.exclude_target_columns(columns=["group:output"])
 61 | 
 62 | monitor_setup.apply()
 63 | 
 64 | manager = MonitorManager(setup=monitor_setup)
 65 | manager.save()
 66 | ```
 67 | 
 68 | ## Data Quality
 69 | 
 70 | ### Missing values
 71 | ```python
 72 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
 73 | from whylabs_toolkit.monitor.models import *
 74 | 
 75 | monitor_setup = MonitorSetup(monitor_id="missing_value_ratio_monitor")
 76 | 
 77 | monitor_setup.config = StddevConfig(
 78 |     metric = SimpleColumnMetric.count_null_ratio,
 79 |     baseline = TrailingWindowBaseline(size=7),
 80 | )
 81 | 
 82 | monitor_setup.apply()
 83 | 
 84 | manager = MonitorManager(setup=monitor_setup)
 85 | manager.save()
 86 | ```
 87 | 
 88 | ### Unique values: duplicate changes
 89 | ```python
 90 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
 91 | from whylabs_toolkit.monitor.models import *
 92 | 
 93 | monitor_setup = MonitorSetup(monitor_id="unique_values_estimation")
 94 | 
 95 | monitor_setup.config = StddevConfig(
 96 |     metric = SimpleColumnMetric.unique_est,
 97 |     baseline = TrailingWindowBaseline(size=7),
 98 | )
 99 | 
100 | monitor_setup.apply()
101 | 
102 | manager = MonitorManager(setup=monitor_setup)
103 | manager.save()
104 | ```
105 | 
106 | ### Data Type: detect mixed schema
107 | ```python
108 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
109 | from whylabs_toolkit.monitor.models import *
110 | 
111 | monitor_setup = MonitorSetup(monitor_id="monitor_data_type_changes")
112 | 
113 | monitor_setup.config = ComparisonConfig(
114 |     metric = SimpleColumnMetric.inferred_data_type,
115 |     baseline = TrailingWindowBaseline(size=7),
116 |     operator = ComparisonOperator.eq
117 | )
118 | 
119 | monitor_setup.apply()
120 | 
121 | manager = MonitorManager(setup=monitor_setup)
122 | manager.save()
123 | ```
124 | 
125 | ### List Comparison
126 | 
127 | ```python
128 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
129 | from whylabs_toolkit.monitor.models import *
130 | 
131 | setup = MonitorSetup(monitor_id="monitor_list_comparison")
132 | setup.config = ListComparisonConfig(
133 |     operator=ListComparisonOperator.in_list,
134 |     expected=[
135 |         ExpectedValue(
136 |             str="expected"
137 |         ),
138 |         ExpectedValue(
139 |             int=123229
140 |         )
141 |     ],
142 |     baseline=TrailingWindowBaseline(size=7),
143 |     metric=SimpleColumnMetric.count_bool
144 | )
145 | setup.apply()
146 | 
147 | mm = MonitorManager(setup=setup)
148 | mm.save()
149 | ```
150 | 
151 | ### Frequent Items
152 | ```python
153 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
154 | from whylabs_toolkit.monitor.models import *
155 | 
156 | 
157 | setup = MonitorSetup(monitor_id="frequent_items")
158 | setup.config = FrequentStringComparisonConfig(
159 |     operator=FrequentStringComparisonOperator.eq,
160 |     baseline=TrailingWindowBaseline(size=7)
161 | )
162 | setup.apply()
163 | 
164 | mm = MonitorManager(setup=setup)
165 | mm.save()
166 | ```
167 | 
168 | ## Model Performance
169 | 
170 | ### F1 Score
171 | ```python
172 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
173 | from whylabs_toolkit.monitor.models import *
174 | 
175 | monitor_setup = MonitorSetup(monitor_id="f1_score_monitor")
176 | 
177 | monitor_setup.config = DiffConfig(
178 |     metric = DatasetMetric.classification_f1,
179 |     mode = DiffMode.pct,
180 |     threshold = 10,
181 |     baseline = TrailingWindowBaseline(size=7)
182 | )
183 | 
184 | monitor_setup.apply()
185 | 
186 | manager = MonitorManager(setup=monitor_setup)
187 | manager.save()
188 | ```
189 | ### Precision
190 | ```python
191 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
192 | from whylabs_toolkit.monitor.models import *
193 | 
194 | monitor_setup = MonitorSetup(monitor_id="precision_score_monitor")
195 | 
196 | monitor_setup.config = DiffConfig(
197 |     metric = DatasetMetric.classification_precision,
198 |     mode = DiffMode.pct,
199 |     threshold = 10,
200 |     baseline = TrailingWindowBaseline(size=7)
201 | )
202 | 
203 | monitor_setup.apply()
204 | 
205 | manager = MonitorManager(setup=monitor_setup)
206 | manager.save()
207 | ```
208 | ### Recall
209 | ```python
210 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
211 | from whylabs_toolkit.monitor.models import *
212 | 
213 | monitor_setup = MonitorSetup(monitor_id="recall_score_monitor")
214 | 
215 | monitor_setup.config = DiffConfig(
216 |     metric = DatasetMetric.classification_recall,
217 |     mode = DiffMode.pct,
218 |     threshold = 10,
219 |     baseline = TrailingWindowBaseline(size=7)
220 | )
221 | 
222 | monitor_setup.apply()
223 | 
224 | manager = MonitorManager(setup=monitor_setup)
225 | manager.save()
226 | ```
227 | ### Accuracy
228 | ```python
229 | from whylabs_toolkit.monitor import MonitorSetup, MonitorManager
230 | from whylabs_toolkit.monitor.models import *
231 | 
232 | monitor_setup = MonitorSetup(monitor_id="accuracy_score_monitor")
233 | 
234 | monitor_setup.config = DiffConfig(
235 |     metric = DatasetMetric.classification_accuracy,
236 |     mode = DiffMode.pct,
237 |     threshold = 10,
238 |     baseline = TrailingWindowBaseline(size=7)
239 | )
240 | 
241 | monitor_setup.apply()
242 | 
243 | manager = MonitorManager(setup=monitor_setup)
244 | manager.save()
245 | ```


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/recommendation/change_recommender.py:
--------------------------------------------------------------------------------
  1 | from __future__ import annotations
  2 | from typing import List, NamedTuple, Optional
  3 | import pandas as pd
  4 | from whylabs_client.api.monitor_api import MonitorApi
  5 | from whylabs_toolkit.helpers.utils import get_monitor_api
  6 | from whylabs_toolkit.monitor.models import Analyzer, Monitor
  7 | 
  8 | from whylabs_toolkit.monitor.diagnoser.recommendation.recommended_change import RecommendedChange
  9 | from whylabs_toolkit.monitor.diagnoser.recommendation.manual_change import ManualChange
 10 | from whylabs_toolkit.monitor.diagnoser.recommendation.remove_columns import RemoveColumns
 11 | from whylabs_toolkit.monitor.diagnoser.models.diagnosis_report import (
 12 |     MonitorDiagnosisReport,
 13 |     ConditionRecord,
 14 | )
 15 | 
 16 | 
 17 | class ChangeResults(NamedTuple):
 18 |     succeeded: List[RecommendedChange]
 19 |     failed: List[RecommendedChange]
 20 |     errors: List[str]
 21 |     manual: List[RecommendedChange]
 22 | 
 23 |     def describe(self) -> str:
 24 |         description = ""
 25 |         if len(self.succeeded):
 26 |             description += "Successfully made the following changes:\n"
 27 |             description += "\n\t".join(["\t* " + c.describe() for c in self.succeeded]) + "\n"
 28 |         if len(self.failed):
 29 |             description += "Failed to make the following changes:\n"
 30 |             description += "\n\t".join(["\t* " + c.describe() for c in self.failed])
 31 |             description += "\nErrors:\n"
 32 |             description += "\n\t".join(["\t* " + e for e in self.errors]) + "\n"
 33 |         if len(self.manual):
 34 |             description += "The following changes require manual intervention:\n"
 35 |             description += "\n\t".join(["\t* " + c.describe() for c in self.manual]) + "\n"
 36 |         return description
 37 | 
 38 | 
 39 | class ChangeRecommender:
 40 | 
 41 |     _condition_order = [
 42 |         # specific conditions unlikely to be rectified by other actions
 43 |         "changing_discrete",
 44 |         "changing_continuous",
 45 |         "few_unique",
 46 |         "many_unique",
 47 |         "very_few_unique",
 48 |         "late_upload_mismatch",
 49 |         "narrow_threshold_band",
 50 |         "small_nonnull_batches",
 51 |         # most general conditions
 52 |         "stale_analysis",
 53 |         "low_drift_threshold",
 54 |         "fixed_threshold_mismatch",
 55 |         "stddev_insufficient_baseline",
 56 |         "missing_baseline_batches",
 57 |         "fixed_baseline_mismatch",
 58 |     ]
 59 | 
 60 |     def __init__(self, report: MonitorDiagnosisReport):
 61 |         self._min_anomaly_count = 0
 62 |         self.report = report
 63 |         self.org_id = report.orgId
 64 |         self.dataset_id = report.datasetId
 65 |         self.analyzer = report.analyzer
 66 |         self.monitor = report.monitor
 67 |         self._monitor_api = None  # lazy
 68 | 
 69 |     @property
 70 |     def monitor_api(self) -> MonitorApi:
 71 |         if self._monitor_api is None:
 72 |             self._monitor_api = get_monitor_api()
 73 |         return self._monitor_api
 74 | 
 75 |     def _sort_conditions(self, conditions: List[ConditionRecord]) -> List[ConditionRecord]:
 76 |         return sorted(conditions, key=lambda c: self._condition_order.index(c.name))
 77 | 
 78 |     @staticmethod
 79 |     def _best_change_for_condition(condition: ConditionRecord) -> RecommendedChange:
 80 |         if condition.columns is None:
 81 |             raise ValueError("Condition must have columns to recommend a change")
 82 |         if condition.name in ["changing_discrete", "changing_continuous"]:
 83 |             return RemoveColumns(columns=condition.columns, info=condition.info)
 84 |         info = condition.info if condition.info else {}
 85 |         info["condition"] = condition.name
 86 |         info["summary"] = condition.summary
 87 |         return ManualChange(columns=condition.columns, info=info)
 88 | 
 89 |     @property
 90 |     def min_anomaly_count(self) -> int:
 91 |         return self._min_anomaly_count
 92 | 
 93 |     @min_anomaly_count.setter
 94 |     def min_anomaly_count(self, count: int) -> int:
 95 |         self._min_anomaly_count = count
 96 |         return self._min_anomaly_count
 97 | 
 98 |     def recommend(self) -> List[RecommendedChange]:
 99 |         by_col_count = (
100 |             self.report.diagnosticData.analysisResults.anomalies.byColumnCount
101 |             if (self.report.diagnosticData.analysisResults is not None)
102 |             else []
103 |         )
104 |         count_tuples = [c.to_tuple() for c in by_col_count]
105 |         cols, counts = zip(*count_tuples)
106 |         anom_count = pd.Series(counts, index=cols)
107 |         cols_to_address = anom_count[anom_count >= self.min_anomaly_count]
108 |         changes = []
109 |         # find the best actions for the cols that pass min anomaly criteria
110 |         for c in self._sort_conditions(self.report.conditions):
111 |             c.columns = list(cols_to_address.filter(items=c.columns if c.columns else []).index)
112 |             if len(c.columns) > 0:
113 |                 changes.append(self._best_change_for_condition(c))
114 |         return changes
115 | 
116 |     def _update_analyzer(self, updated: Analyzer) -> None:
117 |         self.monitor_api.put_analyzer(
118 |             org_id=self.org_id,
119 |             dataset_id=self.dataset_id,
120 |             analyzer_id=updated.id,
121 |             body=updated.dict(exclude_none=True),
122 |         )
123 | 
124 |     def _delete_monitor(self) -> None:
125 |         if self.monitor is not None and self.analyzer is not None:
126 |             analyzer: Analyzer = self.analyzer
127 |             self.monitor_api.delete_monitor(org_id=self.org_id, dataset_id=self.dataset_id, monitor_id=self.monitor.id)
128 |         self.monitor_api.delete_analyzer(org_id=self.org_id, dataset_id=self.dataset_id, analyzer_id=analyzer.id)
129 | 
130 |     def _add_new_monitor(self, new_analyzer: Analyzer) -> None:
131 |         new_monitor = (
132 |             Monitor(**self.monitor.dict(), id=new_analyzer.id) if self.monitor else Monitor(id=new_analyzer.id)
133 |         )
134 |         self.monitor_api.put_monitor(
135 |             org_id=self.org_id,
136 |             dataset_id=self.dataset_id,
137 |             monitor_id=new_analyzer.id,  # use same id as the analyzer
138 |             body=new_monitor.json(exclude_none=True),
139 |         )
140 |         self.monitor_api.put_analyzer(
141 |             org_id=self.org_id,
142 |             dataset_id=self.dataset_id,
143 |             analyzer_id=new_analyzer.id,
144 |             body=new_analyzer.json(exclude_none=True),
145 |         )
146 | 
147 |     def make_changes(self, changes: Optional[List[RecommendedChange]] = None) -> ChangeResults:
148 |         changes = self.recommend() if changes is None else changes
149 |         succeeded: List[RecommendedChange] = []
150 |         failed: List[RecommendedChange] = []
151 |         errors: List[str] = []
152 |         for c in changes:
153 |             if c.can_automate() and self.analyzer:
154 |                 try:
155 |                     changed_analyzers = c.generate_config(self.analyzer)
156 |                     if next((a.id for a in changed_analyzers), None) is None:
157 |                         # Delete existing analyzer/monitor as there's nothing useful left in it
158 |                         self._delete_monitor()
159 |                     # update existing or create new monitor(s)
160 |                     for changed in changed_analyzers:
161 |                         if changed.id == self.analyzer.id:
162 |                             self._update_analyzer(changed)
163 |                         else:
164 |                             self._add_new_monitor(changed)
165 |                     succeeded.append(c)
166 |                 except Exception as e:
167 |                     failed.append(c)
168 |                     errors.append(f"{c.name} failed with {e}")
169 |         return ChangeResults(succeeded, failed, errors, [c for c in changes if not c.can_automate()])
170 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/manager/README.md:
--------------------------------------------------------------------------------
  1 | # Monitor creation workflow
  2 | 
  3 | This package gives users a workflow to author and modify existing WhyLabs Monitors. 
  4 | Users want to use and modify monitors to capture unexpected changes on their data based on their business logics and needs. 
  5 | A Data Monitor can be briefly described as: a way to be **alerted** when a certain **criteria** is met.
  6 |  
  7 | 
  8 | ## Set your credentials
  9 | The first step is to set your credentials to access WhyLabs. Define your environment variables:
 10 | ```python
 11 | import os
 12 | 
 13 | os.environ["WHYLABS_DEFAULT_ORG_ID"] = "org-id"
 14 | os.environ["WHYLABS_API_KEY"] = "api-key"
 15 | 
 16 | # Option 1: set your dataset_id as an env var 
 17 | os.environ["WHYLABS_DEFAULT_DATASET_ID"] = "dataset-id"
 18 | ```
 19 | 
 20 | ## Create a Monitor Setup
 21 | 
 22 | You will need to create a `MonitorSetup` object. The `monitor_id` passed to 
 23 | the setup is the unique name given to a monitor. If there is an existing monitor under that ID, 
 24 | it will try to fetch it first. Otherwise, it will create a default one.
 25 | 
 26 | ```python
 27 | from whylabs_toolkit.monitor import MonitorSetup
 28 | 
 29 | monitor_setup = MonitorSetup(
 30 |     monitor_id="my-awesome-monitor",
 31 |     dataset_id=None  # Option 2: set your dataset_id as an argument 
 32 | )
 33 | ```
 34 | 
 35 | ## Add a configuration
 36 |  
 37 | A configuration (or *criteria*) declares **how** WhyLabs will **detect an anomaly** and will then trigger alerts. 
 38 | Here is an example configuration to detect Drift:
 39 | 
 40 | ```python
 41 | from whylabs_toolkit.monitor.models import *
 42 | 
 43 | monitor_setup.config = StddevConfig(
 44 |         metric=SimpleColumnMetric.median,
 45 |         factor=2.0,
 46 |         baseline=TrailingWindowBaseline(size=14)
 47 | )
 48 | ```
 49 | 
 50 | ## Add alert actions 
 51 | Now that you have a logic to which you will generate alerts, you need to define the Global Actions that will be triggered once this monitor detects an anomaly. In order to create an action, you can refer to [our docs](https://docs.whylabs.ai/docs/whylabs-notifications/#overview) and click through the UI or do it programmatically. Once you have an action ID, you can use the `whylabs-toolkit` to append that action to your monitor by using the following assignment:
 52 | 
 53 | ```python
 54 | monitor_setup.actions = [GlobalAction(target="my_slack_webhook")] # you can define a list of GlobalAction's to be triggered 
 55 | ```
 56 | 
 57 | ## Define a schedule
 58 | You will also need to define **when** your criteria will run to check if it will meet your expectations.
 59 | 
 60 | ```python
 61 | monitor_setup.schedule = FixedCadenceSchedule(cadence=Cadence.weekly)
 62 | ```
 63 | 
 64 | 
 65 | ## Apply the changes
 66 | 
 67 | You need to call the `apply()` method to apply your changes to the object to be able to persist them to WhyLabs later on.
 68 | 
 69 | ```python
 70 | monitor_setup.apply()
 71 | ```
 72 | 
 73 | ## Interact with the created Monitor
 74 | 
 75 | 
 76 | To persist your monitor to WhyLabs, you will create a `MonitorManager`
 77 | object and save it:
 78 | 
 79 | ```python
 80 | from whylabs_toolkit.monitor import MonitorManager
 81 | 
 82 | manager = MonitorManager(
 83 |     setup=monitor_setup
 84 | )
 85 | 
 86 | manager.save()
 87 | ```
 88 | Which will validate and push changes to your WhyLabs monitors.
 89 | 
 90 | ### Other Monitor interactions
 91 | 
 92 | With the `MonitorManager`, you are also able to either dump the monitor config to a JSON configuration with `dump()` 
 93 | or `validate()` it to check if you've set things correctly.
 94 | ```python
 95 | manager.validate()
 96 | 
 97 | print(manager.dump())
 98 | ```
 99 | Which will print the following JSON object to the console:
100 | ```bash
101 | {
102 |   "schemaVersion": 1,
103 |   "orgId": "org-id",
104 |   "datasetId": "dataset-id",
105 |   "granularity": "monthly",
106 |   "analyzers": [
107 |     {
108 |       "id": "my-awesome-monitor-3-analyzer",
109 |       "displayName": "my-awesome-monitor-3-analyzer",
110 |       "tags": [],
111 |       "schedule": {
112 |         "type": "fixed",
113 |         "cadence": "weekly"
114 |       },
115 |       "targetMatrix": {
116 |         "segments": [],
117 |         "type": "column",
118 |         "include": [
119 |           "*"
120 |         ],
121 |         "exclude": []
122 |       },
123 |       "config": {
124 |         "metric": "median",
125 |         "type": "stddev",
126 |         "factor": 2.0,
127 |         "minBatchSize": 1,
128 |         "baseline": {
129 |           "type": "TrailingWindow",
130 |           "size": 14
131 |         }
132 |       }
133 |     }
134 |   ],
135 |   "monitors": [
136 |     {
137 |       "id": "my-awesome-monitor-3",
138 |       "displayName": "my-awesome-monitor-3",
139 |       "tags": [],
140 |       "analyzerIds": [
141 |         "my-awesome-monitor-3-analyzer"
142 |       ],
143 |       "schedule": {
144 |         "type": "immediate"
145 |       },
146 |       "disabled": false,
147 |       "severity": 3,
148 |       "mode": {
149 |         "type": "DIGEST"
150 |       },
151 |       "actions": [
152 |         {
153 |           "type": "global",
154 |           "target": "my_slack_webhook"
155 |         },
156 |       ]
157 |     }
158 |   ]
159 | }
160 | ```
161 | It can be used to interact with WhyLabs' API endpoints as the request body. The validation method call is optional at this point.
162 | 
163 | ## Monitor examples
164 | 
165 | What usually differs from monitors is **how** data changes will trigger alerts for the users.
166 | In here we will see a couple of different examples on how to set different monitor configurations
167 | into three main categories. All of them are relying on understanding the Monitor authoring workflow
168 | [previously explained](#add-a-configuration) on this tutorial.
169 | 
170 | 
171 | ### Diff
172 | To capture a % difference in the F1-Score, you can create a `DiffConfig`, 
173 | and compare it either to a fixed Reference Profile:
174 | ```python
175 | from whylabs_toolkit.monitor.models import *
176 | 
177 | monitor_setup.config = DiffConfig(
178 |     metric=DatasetMetric.classification_f1, 
179 |     baseline=ReferenceProfileId(profileId="ref-prof-id"), 
180 |     mode=DiffMode.pct, # or DiffMode.abs 
181 |     threshold=5
182 | )
183 | ```
184 | 
185 | or to a Trailing Window Baseline:
186 | ```python
187 | from whylabs_toolkit.monitor.models import *
188 | 
189 | monitor_setup.config = DiffConfig(
190 |     metric=DatasetMetric.classification_f1, 
191 |     baseline=TrailingWindowBaseline(size=14), 
192 |     mode=DiffMode.pct, # or DiffMode.abs 
193 |     threshold=5
194 | )
195 | ```
196 | 
197 | ### Null counts ratio
198 | To compare the ratio of the null counts on a particular column to a certain time range.
199 | ```python
200 | from whylabs_toolkit.monitor.models import *
201 | 
202 | monitor_setup.config = StddevConfig(
203 |     factor = 1.5,
204 |     metric = SimpleColumnMetric.count_null_ratio,
205 |     baseline = TrailingWindowBaseline(size=14)
206 | )
207 | 
208 | ```
209 | 
210 | ### Drift
211 | To detect Drift on a continuous feature, you can use the `DriftConfig` object with its default
212 | drift calculation algorithm: Hellinger's Distance. 
213 | 
214 | ```python
215 | from whylabs_toolkit.monitor.models import *
216 | 
217 | monitor_setup.config = DriftConfig(
218 |     metric = ComplexMetrics.histogram,
219 |     threshold = 0.6,
220 |     baseline = TrailingWindowBaseline(size=7),
221 | )
222 | ```
223 | 
224 | 
225 | ## Modify properties of existing Monitors
226 | In case you have an existing Monitor, and you wish to change one thing about it,
227 | you can instantiate again a `MonitorSetup`, make the changes and `manager.save()` it again.
228 | Here are a few examples of other things you can do before creating your monitor:
229 | 
230 | ```python
231 | from datetime import datetime
232 | 
233 | monitor_setup = MonitorSetup(
234 |     monitor_id="existing-monitor-id"
235 | )
236 | 
237 | # Set a fixed time range, with a helper method
238 | monitor_setup.set_fixed_dates_baseline(
239 |     start_date=datetime(2022, 1, 12),
240 |     end_date=datetime(2022, 1, 29)
241 | )
242 | 
243 | # Include only certain columns to be monitored
244 | monitor_setup.set_target_columns(columns=["feature_1", "feature_2"])
245 | 
246 | # Exclude other unnecessary colums
247 | monitor_setup.exclude_target_columns(columns=["id_column"])
248 | 
249 | # Include ALL DISCRETE columns
250 | monitor_setup.set_target_columns(columns=["group: discrete"])
251 | 
252 | # Exclude ALL OUTPUT columns
253 | monitor_setup.exclude_target_columns(columns=["group:output"])
254 | 
255 | # Instead of setting a new action, extend the existing ones
256 | monitor_setup.actions.extend([GlobalAction(target="my_pagerduty_id")])
257 | 
258 | ## Save your modifications
259 | monitor_setup.apply()
260 | 
261 | manager = MonitorManager(monitor_setup=monitor_setup)
262 | manager.save()
263 | 
264 | ```
265 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/models/monitor.py:
--------------------------------------------------------------------------------
  1 | """Schema for configuring a monitor."""
  2 | from enum import Enum
  3 | from typing import Any, Dict, List, Literal, Optional, Union
  4 | from typing_extensions import Annotated
  5 | 
  6 | from pydantic import BaseModel, Field, constr
  7 | 
  8 | 
  9 | from whylabs_toolkit.monitor.models.commons import (
 10 |     CronSchedule,
 11 |     FixedCadenceSchedule,
 12 |     ImmediateSchedule,
 13 |     Metadata,
 14 |     NoExtrasBaseModel,
 15 | )
 16 | from whylabs_toolkit.monitor.models.utils import COLUMN_NAME_TYPE, METRIC_NAME_STR, anyOf_to_oneOf
 17 | 
 18 | 
 19 | class MonitorConfigMetadata(NoExtrasBaseModel):
 20 |     """Metadata related to a monitor."""
 21 | 
 22 |     revision: int = Field(title="Revision number")
 23 |     update_timestamp: int = Field(title="Last update timestamp of this config")
 24 |     update_author: str = Field(title="The entity that updated this config", max_length=1000)
 25 | 
 26 | 
 27 | class GlobalAction(NoExtrasBaseModel):
 28 |     """Actions that are configured at the team/organization level."""
 29 | 
 30 |     type: Literal["global"] = "global"
 31 |     target: str = Field(description="The unique action ID in the platform", regex="[a-zA-Z0-9\\-_]+", max_length=100)
 32 | 
 33 | 
 34 | class AnomalyFilter(NoExtrasBaseModel):
 35 |     """Filter the anomalies based on certain criteria. If the alerts are filtered down to 0, the monitor won't fire."""
 36 | 
 37 |     includeColumns: Optional[List[COLUMN_NAME_TYPE]] = Field(  # type: ignore
 38 |         None,
 39 |         title="IncludeColumns",
 40 |         description="If set, we only include anomalies from these columns",
 41 |         max_items=1000,
 42 |     )
 43 |     excludeColumns: Optional[List[COLUMN_NAME_TYPE]] = Field(  # type: ignore
 44 |         None,
 45 |         title="ExcludeColumns",
 46 |         description="If set, we will exclude anomalies from these columns. This is applied AFTER the includeColumns",
 47 |         max_items=1000,
 48 |     )
 49 |     minWeight: Optional[float] = Field(
 50 |         None,
 51 |         title="MinWeight",
 52 |         description="We will include only features with weights greater "
 53 |         "than or equal to this value. NOT SUPPORTED YET",
 54 |     )
 55 |     maxWeight: Optional[float] = Field(
 56 |         None,
 57 |         title="MaxWeight",
 58 |         description="We will include only features with weights less than" "or equal to this value. NOT SUPPORTED YET",
 59 |     )
 60 |     minRankByWeight: Optional[int] = Field(
 61 |         None,
 62 |         title="MinRankByWeight",
 63 |         description="Include only features ranked greater than or equal to"
 64 |         "this value by weight. If features have the same weight"
 65 |         ", we order them alphabetically. NOT SUPPORTED YET",
 66 |     )
 67 |     maxRankByWeight: Optional[int] = Field(
 68 |         None,
 69 |         title="MaxRankByWeight",
 70 |         description="Include only features ranked less than or equal to"
 71 |         "this value by weight. If features have the same "
 72 |         "weight, we order them alphabetically. NOT "
 73 |         "SUPPORTED YET",
 74 |     )
 75 |     minTotalWeight: Optional[float] = Field(
 76 |         None,
 77 |         title="MinTotalWeight",
 78 |         description="Only fire the monitor if the total weights of the"
 79 |         " alerts (based on feature weights) is greater than or "
 80 |         "equal to this value. NOT SUPPORTED YET",
 81 |     )
 82 |     maxTotalWeight: Optional[float] = Field(
 83 |         None,
 84 |         title="MaxTotalWeight",
 85 |         description="Only fire the monitor if the total weights of the"
 86 |         " alerts (based on feature weights) is less than or "
 87 |         "equal to this value. NOT SUPPORTED YET",
 88 |     )
 89 |     minAlertCount: Optional[int] = Field(
 90 |         None,
 91 |         title="MinAlertCount",
 92 |         description="If the total alert count is less than this value, the " "monitor won't fire. NOT SUPPORTED YET",
 93 |     )
 94 |     maxAlertCount: Optional[int] = Field(
 95 |         None,
 96 |         title="MaxAlertCount",
 97 |         description="If the total alert count is greater than this value, " "the monitor won't fire. NOT SUPPORTED YET",
 98 |     )
 99 |     includeMetrics: Optional[List[METRIC_NAME_STR]] = Field(  # type: ignore
100 |         None,
101 |         title="IncludeMetrics",
102 |         description="Metrics to filter by. NOT SUPPORTED YET",
103 |         max_items=100,
104 |     )
105 | 
106 | 
107 | excludeMetrics: Optional[List[METRIC_NAME_STR]] = Field(  # type: ignore
108 |     None,
109 |     title="ExcludeMetrics",
110 |     description="Metrics to filter by. NOT SUPPORTED YET",
111 |     max_items=100,
112 | )
113 | 
114 | 
115 | class EveryAnomalyMode(NoExtrasBaseModel):
116 |     """Config mode that indicates the monitor will send out individual messages per anomaly."""
117 | 
118 |     type: Literal["EVERY_ANOMALY"] = "EVERY_ANOMALY"
119 |     filter: Optional[AnomalyFilter] = Field(None, description="Filter for anomalies")
120 | 
121 | 
122 | class DigestModeGrouping(str, Enum):
123 |     """Enable the ability to group digest by various fields."""
124 | 
125 |     byField = "byColumn"
126 |     byDataset = "byDataset"
127 |     byAnalyzer = "byAnalyzer"
128 |     byDay = "byDay"
129 |     byHour = "byHour"
130 | 
131 | 
132 | class DigestMode(NoExtrasBaseModel):
133 |     """Config mode that indicates the monitor will send out a digest message."""
134 | 
135 |     type: Literal["DIGEST"] = Field("DIGEST")
136 |     filter: Optional[AnomalyFilter] = Field(None, description="Filter for anomalies")
137 |     creationTimeOffset: Optional[str] = Field(
138 |         None,
139 |         # format='duration', # TODO: is not supported by draft-7, only in draft 2019
140 |         title="CreationTimeOffset",
141 |         description="Optional for Immediate digest, required for Scheduled digest. The earliest creation timestamp"
142 |         " that we will "
143 |         "filter by to build the digest. ISO 8601 "
144 |         "format for timedelta.",
145 |         max_length=20,
146 |     )
147 |     datasetTimestampOffset: Optional[str] = Field(
148 |         None,
149 |         # format='duration',
150 |         title="DatasetTimestampOffset",
151 |         description="Optional for Immediate digest, required for Scheduled digest. "
152 |         "The earliest dataset timestamp that we will filter by in the digest",
153 |         max_length=20,
154 |     )
155 |     groupBy: Optional[List[DigestModeGrouping]] = Field(
156 |         None,
157 |         description="Default is None.If this is set, we will group alerts by these groupings and emit multiple messages"
158 |         " per group.",
159 |         max_items=10,
160 |     )
161 | 
162 | 
163 | class Monitor(NoExtrasBaseModel):
164 |     """Customer specified monitor configs."""
165 | 
166 |     metadata: Optional[Metadata] = Field(None, description="Meta. This is to track various metadata for auditing.")
167 |     id: str = Field(
168 |         None,
169 |         description="A human-readable alias for a monitor. Must be readable",
170 |         min_length=10,
171 |         max_length=128,
172 |         regex="[0-9a-zA-Z\\-_]+",
173 |     )
174 |     displayName: Optional[str] = Field(
175 |         None,
176 |         id="DisplayName",
177 |         description="A display name for the monitor if view through WhyLabs UI. Can only contain dashes, underscores,"
178 |         "spaces, and alphanumeric characters",
179 |         min_length=10,
180 |         max_length=256,
181 |         regex="[0-9a-zA-Z \\-_]+",
182 |     )
183 |     tags: Annotated[
184 |         Optional[List[str]],
185 |         Field(title="Tags", description="The corresponding segment tags.", max_items=10, pattern="[0-9a-zA-Z\\-_]+$"),
186 |     ] = None
187 |     analyzerIds: Annotated[
188 |         List[str],
189 |         Field(
190 |             title="AnalyzerIds",
191 |             description="The corresponding analyzer IDs for the conjunction.",
192 |             # max_items=10,
193 |             pattern="^[A-Za-z0-9_\\-]+$",
194 |         ),
195 |     ]
196 |     schedule: Union[FixedCadenceSchedule, CronSchedule, ImmediateSchedule] = Field(
197 |         description="Schedule of the monitor. We only support hourly monitor at " "the finest granularity",
198 |     )
199 |     disabled: Optional[bool] = Field(None, description="Whether the monitor is enabled or not")
200 |     severity: Optional[int] = Field(3, description="The severity of the monitor messages")
201 |     mode: Union[EveryAnomalyMode, DigestMode] = Field(
202 |         description="Notification mode and how we might handle different analysis",
203 |         discriminator="type",
204 |     )
205 |     actions: List[GlobalAction] = Field(
206 |         description="List of destination for the outgoing messages",
207 |         max_items=100,
208 |     )
209 | 
210 |     class Config:
211 |         """Updates JSON schema anyOf to oneOf."""
212 | 
213 |         # noinspection PyUnusedLocal
214 |         @staticmethod
215 |         def schema_extra(schema: Dict[str, Any], model: BaseModel) -> None:
216 |             """Update specific fields here (for Union type, specifically)."""
217 |             anyOf_to_oneOf(schema, "mode")
218 |             anyOf_to_oneOf(schema, "schedule")
219 | 


--------------------------------------------------------------------------------
/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py:
--------------------------------------------------------------------------------
  1 | import pandas as pd
  2 | from typing import Dict, List, Optional, Tuple
  3 | from pydantic import BaseModel
  4 | from whylabs_toolkit.monitor.models import (
  5 |     Analyzer,
  6 |     Monitor,
  7 |     Segment,
  8 |     TargetLevel,
  9 |     FixedThresholdsConfig,
 10 |     ConjunctionConfig,
 11 |     DisjunctionConfig,
 12 |     GlobalAction,
 13 | )
 14 | 
 15 | from whylabs_toolkit.monitor.diagnoser.helpers.describe import (
 16 |     describe_truncated_table,
 17 |     filter_by_index,
 18 |     describe_truncated_list,
 19 | )
 20 | from whylabs_toolkit.monitor.diagnoser.helpers.utils import segment_as_readable_text
 21 | 
 22 | 
 23 | class SegmentReport(BaseModel):
 24 |     batchCount: int
 25 |     segment: Segment
 26 |     totalAnomalies: int
 27 |     totalFailures: int
 28 |     totalColumns: int
 29 | 
 30 | 
 31 | class NamedCount(BaseModel):
 32 |     name: str
 33 |     count: int
 34 | 
 35 |     def to_tuple(self) -> Tuple[str, int]:
 36 |         return self.name, self.count
 37 | 
 38 | 
 39 | class ConditionRecord(BaseModel):
 40 |     columns: Optional[List[str]]  # not present for some conditions like stale analysis
 41 |     info: Optional[Dict]
 42 |     summary: str
 43 |     name: str
 44 | 
 45 | 
 46 | class QualityIssueRecord(BaseModel):
 47 |     name: str
 48 |     description: str
 49 |     detectors: List[str]
 50 | 
 51 | 
 52 | class ProfileSummary(BaseModel):
 53 |     minRowName: str
 54 |     minRowCount: int
 55 |     maxRowName: str
 56 |     maxRowCount: int
 57 | 
 58 |     def describe(self) -> str:
 59 |         count_desc = (
 60 |             str(self.minRowCount)
 61 |             if self.minRowCount == self.maxRowCount
 62 |             else f"{self.minRowCount} - {self.maxRowCount}"
 63 |         )
 64 |         return f"Diagnostic interval rollup contains {count_desc} rows for the diagnosed columns.\n"
 65 | 
 66 | 
 67 | class BatchesSummary(BaseModel):
 68 |     minBatchName: str
 69 |     minBatchCount: int
 70 |     maxBatchName: str
 71 |     maxBatchCount: int
 72 | 
 73 |     def describe(self) -> str:
 74 |         count_desc = (
 75 |             str(self.minBatchCount)
 76 |             if self.minBatchCount == self.maxBatchCount
 77 |             else f"{self.minBatchCount} - {self.maxBatchCount}"
 78 |         )
 79 |         return f"Diagnostic interval contains {count_desc} batches.\n"
 80 | 
 81 | 
 82 | class ResultRecord(BaseModel):
 83 |     diagnosedColumnCount: int
 84 |     batchCount: int
 85 | 
 86 |     def describe(self) -> str:
 87 |         return f"Found non-failed results for {self.diagnosedColumnCount} columns and {self.batchCount} batches."
 88 | 
 89 | 
 90 | class FailureRecord(BaseModel):
 91 |     totalFailuresCount: int
 92 |     maxFailuresCount: int
 93 |     meanFailuresCount: int
 94 |     byColumnCount: List[NamedCount]
 95 |     byTypeCount: List[NamedCount]
 96 | 
 97 |     def describe(self) -> str:
 98 |         failures = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=["column", "count"])
 99 |         failure_types = [t.name for t in self.byTypeCount]
100 |         if len(failures) == 0:
101 |             return "No failures were detected."
102 |         return (
103 |             f"Found {self.totalFailuresCount} failed results, with up to {self.maxFailuresCount} "
104 |             f"failures per column and {self.meanFailuresCount} failures on average.\n"
105 |             f"Failure types are {describe_truncated_list(failure_types)}\n"
106 |             f"Columns with failures are: \n{describe_truncated_table(failures)}\n"
107 |         )
108 | 
109 | 
110 | class AnomalyRecord(BaseModel):
111 |     totalAnomalyCount: int
112 |     maxAnomalyCount: int
113 |     meanAnomalyCount: int
114 |     batchCount: int
115 |     byColumnCount: List[NamedCount]
116 |     byColumnBatchCount: List[NamedCount]
117 | 
118 |     def describe(self) -> str:
119 |         counts = pd.DataFrame([c.to_tuple() for c in self.byColumnCount], columns=["column", "count"])
120 |         max_count = int(self.maxAnomalyCount)
121 |         max_pct = max_count * 100 / self.batchCount
122 |         mean_count = float(self.meanAnomalyCount)
123 |         mean_pct = mean_count * 100 / self.batchCount
124 |         return (
125 |             f"Found {self.totalAnomalyCount} anomalies in {len(self.byColumnCount)} columns, with up to "
126 |             f"{max_pct:.1f}% ({max_count}) batches having anomalies per column and "
127 |             f"{mean_pct:.1f}% ({mean_count:.1f}) on average.\n"
128 |             f"Columns with anomalies are:\n{describe_truncated_table(counts)}\n"
129 |         )
130 | 
131 | 
132 | class AnalysisResultsSummary(BaseModel):
133 |     results: ResultRecord
134 |     failures: FailureRecord
135 |     anomalies: AnomalyRecord
136 | 
137 |     def describe(self) -> str:
138 |         return (
139 |             f"Analysis results summary:\n"
140 |             f"{self.results.describe()}\n"
141 |             f"{self.anomalies.describe()}\n"
142 |             f"{self.failures.describe()}\n"
143 |         )
144 | 
145 | 
146 | class DiagnosticDataSummary(BaseModel):
147 |     diagnosticSegment: Segment
148 |     diagnosticProfile: Optional[ProfileSummary]
149 |     diagnosticBatches: Optional[BatchesSummary]
150 |     analysisResults: Optional[AnalysisResultsSummary]
151 |     targetedColumnCount: int
152 | 
153 |     def describe(self) -> str:
154 |         return "\n".join(
155 |             [
156 |                 f'Diagnostic segment is "{segment_as_readable_text(self.diagnosticSegment.tags)}".',
157 |                 self.diagnosticBatches.describe() if self.diagnosticBatches is not None else "",
158 |                 self.diagnosticProfile.describe() if self.diagnosticProfile is not None else "",
159 |                 self.analysisResults.describe() if self.analysisResults is not None else "",
160 |             ]
161 |         )
162 | 
163 | 
164 | class AnalyzerDiagnosisReport(BaseModel):
165 |     orgId: str
166 |     datasetId: str
167 |     analyzerId: str
168 |     interval: str
169 |     expectedBatchCount: int
170 |     diagnosticData: DiagnosticDataSummary
171 |     qualityIssues: List[QualityIssueRecord]
172 |     conditions: List[ConditionRecord]
173 | 
174 |     def describe(self) -> str:
175 |         text = "\n".join([self.diagnosticData.describe(), self.describe_quality_issues(), self.describe_conditions()])
176 |         return text
177 | 
178 |     def describe_quality_issues(self) -> str:
179 |         if len(self.qualityIssues) == 0:
180 |             return "No issues impacting diagnosis quality were detected"
181 |         text = "Conditions that may impact diagnosis quality include:\n"
182 |         for issue in self.qualityIssues:
183 |             text += f"\t* {issue.name}: {issue.description} - detectors {issue.detectors}\n"
184 |         return text
185 | 
186 |     def describe_conditions(self) -> str:
187 |         if len(self.conditions) == 0:
188 |             return "No conditions related to noise were detected."
189 |         condition_cols: List[str] = []
190 |         text = "Conditions that may contribute to noise include:\n"
191 |         for condition in self.conditions:
192 |             text += f"\t* Condition {condition.name} ({condition.summary})"
193 |             if condition.columns is not None:
194 |                 condition_cols += condition.columns
195 |                 col_text = describe_truncated_list(condition.columns, 10)
196 |                 text += f" for {len(condition.columns)} columns: {col_text}"
197 |             text += "\n"
198 | 
199 |         cols = pd.Series(condition_cols).unique()
200 |         if len(cols) > 0:
201 |             text += f"\nAnomalies for columns with these conditions:\n"
202 |             by_col_count = (
203 |                 self.diagnosticData.analysisResults.anomalies.byColumnCount
204 |                 if (self.diagnosticData.analysisResults is not None)
205 |                 else []
206 |             )
207 |             count_tuples = [c.to_tuple() for c in by_col_count]
208 |             idx, values = zip(*count_tuples)
209 |             count_by_col = pd.Series(values, idx)
210 |             cols_with_count = filter_by_index(cols.tolist(), count_by_col).sort_values(ascending=False)
211 |             cols_with_count.index.name = "column"
212 |             cols_with_count.name = "count"
213 |             text += describe_truncated_table(pd.DataFrame(cols_with_count).reset_index())
214 |             text += f"\nAccounting for {cols_with_count.sum()} anomalies out of " f"{count_by_col.sum()}\n"
215 | 
216 |         return text
217 | 
218 | 
219 | class MonitorDiagnosisReport(AnalyzerDiagnosisReport):
220 |     monitor: Optional[Monitor]  # sometimes there isn't one, e.g. it's been deleted
221 |     analyzer: Optional[Analyzer]
222 |     analyzedColumnCount: int
223 | 
224 |     def describe(self) -> str:
225 |         text = "\n".join([self.describe_monitor(), self.describe_analyzer(), super().describe()])
226 |         return text
227 | 
228 |     def describe_monitor(self) -> str:
229 |         if self.monitor is None:
230 |             return "Monitor has been deleted.\n"
231 |         text = (
232 |             f'Diagnosis is for monitor "{self.monitor.displayName if self.monitor.displayName else self.monitor.id}" '
233 |             f"[{self.monitor.id}] in {self.datasetId} {self.orgId}, over interval {self.interval}.\n"
234 |         )
235 |         if len(self.monitor.actions) > 0:
236 |             text += f"Monitor has {len(self.monitor.actions)} notification actions "
237 |             text += f"{[a.target for a in self.monitor.actions if isinstance(a, GlobalAction)]}.\n"
238 |         return text
239 | 
240 |     def describe_analyzer(self) -> str:
241 |         if self.analyzer is None:
242 |             return "No analyzer found.\n"
243 |         if isinstance(self.analyzer.config, ConjunctionConfig) or isinstance(self.analyzer.config, DisjunctionConfig):
244 |             return f"\nAnalyzer is a composite {self.analyzer.config.type}."
245 |         baseline = (
246 |             "no baseline"
247 |             if (isinstance(self.analyzer.config, FixedThresholdsConfig) or self.analyzer.config.baseline is None)
248 |             else f"{self.analyzer.config.baseline.type} baseline"
249 |         )
250 |         targeting_desc = ""
251 |         if self.analyzer is None:
252 |             return ""
253 |         metric = self.analyzer.config.metric
254 |         if self.analyzer.targetMatrix is not None and self.analyzer.targetMatrix.type == TargetLevel.column:
255 |             targeting_desc = (
256 |                 f'\nAnalyzer "{self.analyzer.id}" targets {self.diagnosticData.targetedColumnCount} '
257 |                 f"columns and ran on {self.analyzedColumnCount} columns in the diagnosed segment.\n"
258 |             )
259 |         text = f"Analyzer is {self.analyzer.config.type} configuration for {metric} metric with {baseline}."
260 |         text += targeting_desc
261 |         text += "\n"
262 |         return text
263 | 
264 | 
265 | class MonitorDiagnosisReportList(BaseModel):
266 |     __root__: List[MonitorDiagnosisReport]
267 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                                  Apache License
  2 |                            Version 2.0, January 2004
  3 |                         http://www.apache.org/licenses/
  4 | 
  5 |    TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
  6 | 
  7 |    1. Definitions.
  8 | 
  9 |       "License" shall mean the terms and conditions for use, reproduction,
 10 |       and distribution as defined by Sections 1 through 9 of this document.
 11 | 
 12 |       "Licensor" shall mean the copyright owner or entity authorized by
 13 |       the copyright owner that is granting the License.
 14 | 
 15 |       "Legal Entity" shall mean the union of the acting entity and all
 16 |       other entities that control, are controlled by, or are under common
 17 |       control with that entity. For the purposes of this definition,
 18 |       "control" means (i) the power, direct or indirect, to cause the
 19 |       direction or management of such entity, whether by contract or
 20 |       otherwise, or (ii) ownership of fifty percent (50%) or more of the
 21 |       outstanding shares, or (iii) beneficial ownership of such entity.
 22 | 
 23 |       "You" (or "Your") shall mean an individual or Legal Entity
 24 |       exercising permissions granted by this License.
 25 | 
 26 |       "Source" form shall mean the preferred form for making modifications,
 27 |       including but not limited to software source code, documentation
 28 |       source, and configuration files.
 29 | 
 30 |       "Object" form shall mean any form resulting from mechanical
 31 |       transformation or translation of a Source form, including but
 32 |       not limited to compiled object code, generated documentation,
 33 |       and conversions to other media types.
 34 | 
 35 |       "Work" shall mean the work of authorship, whether in Source or
 36 |       Object form, made available under the License, as indicated by a
 37 |       copyright notice that is included in or attached to the work
 38 |       (an example is provided in the Appendix below).
 39 | 
 40 |       "Derivative Works" shall mean any work, whether in Source or Object
 41 |       form, that is based on (or derived from) the Work and for which the
 42 |       editorial revisions, annotations, elaborations, or other modifications
 43 |       represent, as a whole, an original work of authorship. For the purposes
 44 |       of this License, Derivative Works shall not include works that remain
 45 |       separable from, or merely link (or bind by name) to the interfaces of,
 46 |       the Work and Derivative Works thereof.
 47 | 
 48 |       "Contribution" shall mean any work of authorship, including
 49 |       the original version of the Work and any modifications or additions
 50 |       to that Work or Derivative Works thereof, that is intentionally
 51 |       submitted to Licensor for inclusion in the Work by the copyright owner
 52 |       or by an individual or Legal Entity authorized to submit on behalf of
 53 |       the copyright owner. For the purposes of this definition, "submitted"
 54 |       means any form of electronic, verbal, or written communication sent
 55 |       to the Licensor or its representatives, including but not limited to
 56 |       communication on electronic mailing lists, source code control systems,
 57 |       and issue tracking systems that are managed by, or on behalf of, the
 58 |       Licensor for the purpose of discussing and improving the Work, but
 59 |       excluding communication that is conspicuously marked or otherwise
 60 |       designated in writing by the copyright owner as "Not a Contribution."
 61 | 
 62 |       "Contributor" shall mean Licensor and any individual or Legal Entity
 63 |       on behalf of whom a Contribution has been received by Licensor and
 64 |       subsequently incorporated within the Work.
 65 | 
 66 |    2. Grant of Copyright License. Subject to the terms and conditions of
 67 |       this License, each Contributor hereby grants to You a perpetual,
 68 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 69 |       copyright license to reproduce, prepare Derivative Works of,
 70 |       publicly display, publicly perform, sublicense, and distribute the
 71 |       Work and such Derivative Works in Source or Object form.
 72 | 
 73 |    3. Grant of Patent License. Subject to the terms and conditions of
 74 |       this License, each Contributor hereby grants to You a perpetual,
 75 |       worldwide, non-exclusive, no-charge, royalty-free, irrevocable
 76 |       (except as stated in this section) patent license to make, have made,
 77 |       use, offer to sell, sell, import, and otherwise transfer the Work,
 78 |       where such license applies only to those patent claims licensable
 79 |       by such Contributor that are necessarily infringed by their
 80 |       Contribution(s) alone or by combination of their Contribution(s)
 81 |       with the Work to which such Contribution(s) was submitted. If You
 82 |       institute patent litigation against any entity (including a
 83 |       cross-claim or counterclaim in a lawsuit) alleging that the Work
 84 |       or a Contribution incorporated within the Work constitutes direct
 85 |       or contributory patent infringement, then any patent licenses
 86 |       granted to You under this License for that Work shall terminate
 87 |       as of the date such litigation is filed.
 88 | 
 89 |    4. Redistribution. You may reproduce and distribute copies of the
 90 |       Work or Derivative Works thereof in any medium, with or without
 91 |       modifications, and in Source or Object form, provided that You
 92 |       meet the following conditions:
 93 | 
 94 |       (a) You must give any other recipients of the Work or
 95 |           Derivative Works a copy of this License; and
 96 | 
 97 |       (b) You must cause any modified files to carry prominent notices
 98 |           stating that You changed the files; and
 99 | 
100 |       (c) You must retain, in the Source form of any Derivative Works
101 |           that You distribute, all copyright, patent, trademark, and
102 |           attribution notices from the Source form of the Work,
103 |           excluding those notices that do not pertain to any part of
104 |           the Derivative Works; and
105 | 
106 |       (d) If the Work includes a "NOTICE" text file as part of its
107 |           distribution, then any Derivative Works that You distribute must
108 |           include a readable copy of the attribution notices contained
109 |           within such NOTICE file, excluding those notices that do not
110 |           pertain to any part of the Derivative Works, in at least one
111 |           of the following places: within a NOTICE text file distributed
112 |           as part of the Derivative Works; within the Source form or
113 |           documentation, if provided along with the Derivative Works; or,
114 |           within a display generated by the Derivative Works, if and
115 |           wherever such third-party notices normally appear. The contents
116 |           of the NOTICE file are for informational purposes only and
117 |           do not modify the License. You may add Your own attribution
118 |           notices within Derivative Works that You distribute, alongside
119 |           or as an addendum to the NOTICE text from the Work, provided
120 |           that such additional attribution notices cannot be construed
121 |           as modifying the License.
122 | 
123 |       You may add Your own copyright statement to Your modifications and
124 |       may provide additional or different license terms and conditions
125 |       for use, reproduction, or distribution of Your modifications, or
126 |       for any such Derivative Works as a whole, provided Your use,
127 |       reproduction, and distribution of the Work otherwise complies with
128 |       the conditions stated in this License.
129 | 
130 |    5. Submission of Contributions. Unless You explicitly state otherwise,
131 |       any Contribution intentionally submitted for inclusion in the Work
132 |       by You to the Licensor shall be under the terms and conditions of
133 |       this License, without any additional terms or conditions.
134 |       Notwithstanding the above, nothing herein shall supersede or modify
135 |       the terms of any separate license agreement you may have executed
136 |       with Licensor regarding such Contributions.
137 | 
138 |    6. Trademarks. This License does not grant permission to use the trade
139 |       names, trademarks, service marks, or product names of the Licensor,
140 |       except as required for reasonable and customary use in describing the
141 |       origin of the Work and reproducing the content of the NOTICE file.
142 | 
143 |    7. Disclaimer of Warranty. Unless required by applicable law or
144 |       agreed to in writing, Licensor provides the Work (and each
145 |       Contributor provides its Contributions) on an "AS IS" BASIS,
146 |       WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 |       implied, including, without limitation, any warranties or conditions
148 |       of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 |       PARTICULAR PURPOSE. You are solely responsible for determining the
150 |       appropriateness of using or redistributing the Work and assume any
151 |       risks associated with Your exercise of permissions under this License.
152 | 
153 |    8. Limitation of Liability. In no event and under no legal theory,
154 |       whether in tort (including negligence), contract, or otherwise,
155 |       unless required by applicable law (such as deliberate and grossly
156 |       negligent acts) or agreed to in writing, shall any Contributor be
157 |       liable to You for damages, including any direct, indirect, special,
158 |       incidental, or consequential damages of any character arising as a
159 |       result of this License or out of the use or inability to use the
160 |       Work (including but not limited to damages for loss of goodwill,
161 |       work stoppage, computer failure or malfunction, or any and all
162 |       other commercial damages or losses), even if such Contributor
163 |       has been advised of the possibility of such damages.
164 | 
165 |    9. Accepting Warranty or Additional Liability. While redistributing
166 |       the Work or Derivative Works thereof, You may choose to offer,
167 |       and charge a fee for, acceptance of support, warranty, indemnity,
168 |       or other liability obligations and/or rights consistent with this
169 |       License. However, in accepting such obligations, You may act only
170 |       on Your own behalf and on Your sole responsibility, not on behalf
171 |       of any other Contributor, and only if You agree to indemnify,
172 |       defend, and hold each Contributor harmless for any liability
173 |       incurred by, or claims asserted against, such Contributor by reason
174 |       of your accepting any such warranty or additional liability.
175 | 
176 |    END OF TERMS AND CONDITIONS
177 | 
178 |    APPENDIX: How to apply the Apache License to your work.
179 | 
180 |       To apply the Apache License to your work, attach the following
181 |       boilerplate notice, with the fields enclosed by brackets "[]"
182 |       replaced with your own identifying information. (Don't include
183 |       the brackets!)  The text should be enclosed in the appropriate
184 |       comment syntax for the file format. We also recommend that a
185 |       file or class name and description of purpose be included on the
186 |       same "printed page" as the copyright notice for easier
187 |       identification within third-party archives.
188 | 
189 |    Copyright [yyyy] [name of copyright owner]
190 | 
191 |    Licensed under the Apache License, Version 2.0 (the "License");
192 |    you may not use this file except in compliance with the License.
193 |    You may obtain a copy of the License at
194 | 
195 |        http://www.apache.org/licenses/LICENSE-2.0
196 | 
197 |    Unless required by applicable law or agreed to in writing, software
198 |    distributed under the License is distributed on an "AS IS" BASIS,
199 |    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 |    See the License for the specific language governing permissions and
201 |    limitations under the License.
202 | 


--------------------------------------------------------------------------------
/examples/example_notebooks/Custom LLM Metrics.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "48a0f4d2",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Customizing WhyLabs LLM Metrics\n",
  9 |     "\n",
 10 |     "This notebook shows how you can customize the LLM Metrics that appear in the Whylabs LLM dashboard."
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "id": "50c5fa12",
 16 |    "metadata": {
 17 |     "ExecuteTime": {
 18 |      "end_time": "2024-11-13T20:38:50.877837Z",
 19 |      "start_time": "2024-11-13T20:38:48.199192Z"
 20 |     }
 21 |    },
 22 |    "source": [
 23 |     "!pip install whylabs_toolkit"
 24 |    ],
 25 |    "outputs": [
 26 |     {
 27 |      "name": "stdout",
 28 |      "output_type": "stream",
 29 |      "text": [
 30 |       "Requirement already satisfied: whylabs_toolkit in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (0.1.1)\r\n",
 31 |       "Requirement already satisfied: jsonschema<5.0.0,>=4.17.3 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs_toolkit) (4.21.1)\r\n",
 32 |       "Requirement already satisfied: pydantic<2.0.0,>=1.10.15 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs_toolkit) (1.10.18)\r\n",
 33 |       "Requirement already satisfied: typing-extensions<5.0.0,>=4.11.0 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs_toolkit) (4.12.2)\r\n",
 34 |       "Requirement already satisfied: urllib3<2.1,>=2.0.2 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs_toolkit) (2.0.7)\r\n",
 35 |       "Requirement already satisfied: whylabs-client<0.7.0,>=0.6.3 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs_toolkit) (0.6.7)\r\n",
 36 |       "Requirement already satisfied: whylogs<2.0.0,>=1.1.26 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs_toolkit) (1.4.4)\r\n",
 37 |       "Requirement already satisfied: attrs>=22.2.0 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from jsonschema<5.0.0,>=4.17.3->whylabs_toolkit) (23.2.0)\r\n",
 38 |       "Requirement already satisfied: jsonschema-specifications>=2023.03.6 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from jsonschema<5.0.0,>=4.17.3->whylabs_toolkit) (2023.12.1)\r\n",
 39 |       "Requirement already satisfied: referencing>=0.28.4 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from jsonschema<5.0.0,>=4.17.3->whylabs_toolkit) (0.34.0)\r\n",
 40 |       "Requirement already satisfied: rpds-py>=0.7.1 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from jsonschema<5.0.0,>=4.17.3->whylabs_toolkit) (0.18.0)\r\n",
 41 |       "Requirement already satisfied: python-dateutil in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylabs-client<0.7.0,>=0.6.3->whylabs_toolkit) (2.9.0.post0)\r\n",
 42 |       "Requirement already satisfied: backoff<3.0.0,>=2.2.1 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (2.2.1)\r\n",
 43 |       "Requirement already satisfied: platformdirs<4.0.0,>=3.5.0 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (3.11.0)\r\n",
 44 |       "Requirement already satisfied: protobuf>=3.19.4 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (4.25.3)\r\n",
 45 |       "Requirement already satisfied: requests<3.0,>=2.27 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (2.32.3)\r\n",
 46 |       "Requirement already satisfied: whylogs-sketching>=3.4.1.dev3 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (3.4.1.dev3)\r\n",
 47 |       "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from requests<3.0,>=2.27->whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (3.3.2)\r\n",
 48 |       "Requirement already satisfied: idna<4,>=2.5 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from requests<3.0,>=2.27->whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (3.7)\r\n",
 49 |       "Requirement already satisfied: certifi>=2017.4.17 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from requests<3.0,>=2.27->whylogs<2.0.0,>=1.1.26->whylabs_toolkit) (2024.6.2)\r\n",
 50 |       "Requirement already satisfied: six>=1.5 in /Users/cdraper/miniconda3/envs/hackthis/lib/python3.9/site-packages (from python-dateutil->whylabs-client<0.7.0,>=0.6.3->whylabs_toolkit) (1.16.0)\r\n",
 51 |       "\r\n",
 52 |       "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m A new release of pip is available: \u001B[0m\u001B[31;49m24.1.2\u001B[0m\u001B[39;49m -> \u001B[0m\u001B[32;49m24.3.1\u001B[0m\r\n",
 53 |       "\u001B[1m[\u001B[0m\u001B[34;49mnotice\u001B[0m\u001B[1;39;49m]\u001B[0m\u001B[39;49m To update, run: \u001B[0m\u001B[32;49mpip install --upgrade pip\u001B[0m\r\n"
 54 |      ]
 55 |     }
 56 |    ],
 57 |    "execution_count": 22
 58 |   },
 59 |   {
 60 |    "cell_type": "markdown",
 61 |    "id": "8f3932e9",
 62 |    "metadata": {},
 63 |    "source": [
 64 |     "Edit the following for your organization and model."
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "id": "237ad408",
 70 |    "metadata": {
 71 |     "ExecuteTime": {
 72 |      "end_time": "2024-11-13T20:38:50.884595Z",
 73 |      "start_time": "2024-11-13T20:38:50.882341Z"
 74 |     }
 75 |    },
 76 |    "source": [
 77 |     "org_id = 'org-5Hsdjx'\n",
 78 |     "dataset_id = 'model-98'\n",
 79 |     "base_url = 'https://api.whylabsapp.com'"
 80 |    ],
 81 |    "outputs": [],
 82 |    "execution_count": 23
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "id": "24c96cae",
 87 |    "metadata": {
 88 |     "ExecuteTime": {
 89 |      "end_time": "2024-11-13T20:38:53.385211Z",
 90 |      "start_time": "2024-11-13T20:38:50.891482Z"
 91 |     }
 92 |    },
 93 |    "source": [
 94 |     "import getpass\n",
 95 |     "api_key = getpass.getpass(\"Enter API Key:\")"
 96 |    ],
 97 |    "outputs": [],
 98 |    "execution_count": 24
 99 |   },
100 |   {
101 |    "cell_type": "markdown",
102 |    "id": "de8cc991",
103 |    "metadata": {},
104 |    "source": [
105 |     "Set up the client API"
106 |    ]
107 |   },
108 |   {
109 |    "cell_type": "code",
110 |    "id": "482728e1",
111 |    "metadata": {
112 |     "ExecuteTime": {
113 |      "end_time": "2024-11-13T20:38:53.423636Z",
114 |      "start_time": "2024-11-13T20:38:53.419281Z"
115 |     }
116 |    },
117 |    "source": [
118 |     "import whylabs_client\n",
119 |     "from whylabs_client.api import models_api\n",
120 |     "from whylabs_client.model.metric_schema import MetricSchema\n",
121 |     "from whylabs_client.model.column_schema import ColumnSchema\n",
122 |     "configuration = whylabs_client.Configuration(\n",
123 |     "    host = base_url\n",
124 |     ")\n",
125 |     "configuration.api_key['ApiKeyAuth'] = api_key\n",
126 |     "\n",
127 |     "\n",
128 |     "api = models_api.ModelsApi(whylabs_client.ApiClient(configuration))"
129 |    ],
130 |    "outputs": [],
131 |    "execution_count": 25
132 |   },
133 |   {
134 |    "cell_type": "markdown",
135 |    "id": "5532d139",
136 |    "metadata": {},
137 |    "source": "This first example overrides the default built-in metric used to visualize the 'prompt.sentiment_nltk' data, choosing instead to use the 75th percentile. The LLM metric name is set to 'majority_sentiment'. This is done by creating a custom metric using the WhyLabs models API."
138 |   },
139 |   {
140 |    "cell_type": "code",
141 |    "id": "a9d5aeed",
142 |    "metadata": {
143 |     "ExecuteTime": {
144 |      "end_time": "2024-11-13T20:38:53.998529Z",
145 |      "start_time": "2024-11-13T20:38:53.433360Z"
146 |     }
147 |    },
148 |    "source": [
149 |     "col_to_change = 'prompt.sentiment_nltk'\n",
150 |     "name = \"majority_sentiment\"\n",
151 |     "results = api.put_entity_schema_metric(org_id, dataset_id, MetricSchema(\n",
152 |     "        name = name,\n",
153 |     "        label = \"Majority sentiment\",\n",
154 |     "        column=col_to_change,\n",
155 |     "        default_metric=\"quantile_75\"))\n",
156 |     "schema = api.get_entity_schema(org_id, dataset_id)\n",
157 |     "schema.metrics[name]"
158 |    ],
159 |    "outputs": [
160 |     {
161 |      "data": {
162 |       "text/plain": [
163 |        "{'builtin_metric': 'quantile_75',\n",
164 |        " 'column': 'prompt.sentiment_nltk',\n",
165 |        " 'default_metric': 'quantile_75',\n",
166 |        " 'label': 'Majority sentiment'}"
167 |       ]
168 |      },
169 |      "execution_count": 26,
170 |      "metadata": {},
171 |      "output_type": "execute_result"
172 |     }
173 |    ],
174 |    "execution_count": 26
175 |   },
176 |   {
177 |    "cell_type": "markdown",
178 |    "id": "42b3ec50",
179 |    "metadata": {},
180 |    "source": [
181 |     "The second example changes the tab that 'response.relevance_to_prompt' is displayed in. This is done by setting the 'tags' in the column schema for that data to include 'performance', 'security' or both."
182 |    ]
183 |   },
184 |   {
185 |    "cell_type": "code",
186 |    "id": "bcdfbff3",
187 |    "metadata": {
188 |     "ExecuteTime": {
189 |      "end_time": "2024-11-13T20:38:54.446273Z",
190 |      "start_time": "2024-11-13T20:38:54.079685Z"
191 |     }
192 |    },
193 |    "source": [
194 |     "col_to_move = 'response.relevance_to_prompt'\n",
195 |     "col_schema = api.get_entity_schema_column(org_id, dataset_id, col_to_move)\n",
196 |     "col_schema.tags = ['security']\n",
197 |     "\n",
198 |     "results = api.put_entity_schema_column(org_id, dataset_id, col_to_move, col_schema)\n",
199 |     "api.get_entity_schema_column(org_id, dataset_id, col_to_move)"
200 |    ],
201 |    "outputs": [
202 |     {
203 |      "data": {
204 |       "text/plain": [
205 |        "{'classifier': 'input',\n",
206 |        " 'data_type': 'fractional',\n",
207 |        " 'discreteness': 'continuous',\n",
208 |        " 'tags': ['security']}"
209 |       ]
210 |      },
211 |      "execution_count": 27,
212 |      "metadata": {},
213 |      "output_type": "execute_result"
214 |     }
215 |    ],
216 |    "execution_count": 27
217 |   },
218 |   {
219 |    "cell_type": "markdown",
220 |    "id": "d253a52f",
221 |    "metadata": {},
222 |    "source": [
223 |     "The third example removes the 'prompt.has_patterns' from either of the dashboard tabs by setting tags to contain a value other than 'performance' or 'security'. If the tags array is empty, the default categorization of the metric will be restored."
224 |    ]
225 |   },
226 |   {
227 |    "cell_type": "code",
228 |    "id": "8c019e36",
229 |    "metadata": {
230 |     "ExecuteTime": {
231 |      "end_time": "2024-11-13T20:38:54.831952Z",
232 |      "start_time": "2024-11-13T20:38:54.464831Z"
233 |     }
234 |    },
235 |    "source": [
236 |     "col_to_remove = 'prompt.has_patterns'\n",
237 |     "col_schema = api.get_entity_schema_column(org_id, dataset_id, col_to_remove)\n",
238 |     "col_schema.tags = ['quality']\n",
239 |     "\n",
240 |     "results = api.put_entity_schema_column(org_id, dataset_id, col_to_remove, col_schema)\n",
241 |     "api.get_entity_schema_column(org_id, dataset_id, col_to_remove)"
242 |    ],
243 |    "outputs": [
244 |     {
245 |      "data": {
246 |       "text/plain": [
247 |        "{'classifier': 'input',\n",
248 |        " 'data_type': 'null',\n",
249 |        " 'discreteness': 'discrete',\n",
250 |        " 'tags': ['quality']}"
251 |       ]
252 |      },
253 |      "execution_count": 28,
254 |      "metadata": {},
255 |      "output_type": "execute_result"
256 |     }
257 |    ],
258 |    "execution_count": 28
259 |   },
260 |   {
261 |    "cell_type": "code",
262 |    "id": "a203a894",
263 |    "metadata": {
264 |     "ExecuteTime": {
265 |      "end_time": "2024-11-13T20:38:54.847119Z",
266 |      "start_time": "2024-11-13T20:38:54.844192Z"
267 |     }
268 |    },
269 |    "source": [],
270 |    "outputs": [],
271 |    "execution_count": null
272 |   }
273 |  ],
274 |  "metadata": {
275 |   "kernelspec": {
276 |    "display_name": "Python 3 (ipykernel)",
277 |    "language": "python",
278 |    "name": "python3"
279 |   },
280 |   "language_info": {
281 |    "codemirror_mode": {
282 |     "name": "ipython",
283 |     "version": 3
284 |    },
285 |    "file_extension": ".py",
286 |    "mimetype": "text/x-python",
287 |    "name": "python",
288 |    "nbconvert_exporter": "python",
289 |    "pygments_lexer": "ipython3",
290 |    "version": "3.9.16"
291 |   }
292 |  },
293 |  "nbformat": 4,
294 |  "nbformat_minor": 5
295 | }
296 | 


--------------------------------------------------------------------------------
/tests/monitor/manager/test_monitor_setup.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | 
  3 | from datetime import datetime, timezone
  4 | 
  5 | import pytest
  6 | 
  7 | from whylabs_toolkit.monitor.models import *
  8 | from tests.helpers.test_monitor_helpers import BaseTestMonitor
  9 | from whylabs_toolkit.monitor.manager.credentials import MonitorCredentials
 10 | from whylabs_toolkit.monitor import MonitorSetup
 11 | from whylabs_toolkit.helpers.config import UserConfig
 12 | 
 13 | 
 14 | def test_set_fixed_dates_baseline(monitor_setup: MonitorSetup) -> None:
 15 |     monitor_setup.set_fixed_dates_baseline(
 16 |         start_date=datetime(2023,1,1),
 17 |         end_date=datetime(2023,1,2)
 18 |     )
 19 | 
 20 |     assert monitor_setup.config.baseline == TimeRangeBaseline(
 21 |         range=TimeRange(
 22 |             start=datetime(2023,1,1, tzinfo=timezone.utc),
 23 |             end=datetime(2023,1,2, tzinfo=timezone.utc)
 24 |         )
 25 |     )
 26 |     
 27 |     monitor_setup.apply()
 28 |     
 29 |     assert monitor_setup.config.baseline == TimeRangeBaseline(
 30 |         range=TimeRange(
 31 |             start=datetime(2023,1,1, tzinfo=timezone.utc),
 32 |             end=datetime(2023,1,2, tzinfo=timezone.utc)
 33 |         )
 34 |     )
 35 | 
 36 | def test_exclude_target_columns(monitor_setup):
 37 |     monitor_setup.exclude_target_columns(
 38 |         columns=["prediction_temperature"]
 39 |     )
 40 | 
 41 |     assert monitor_setup._exclude_columns == ["prediction_temperature"]
 42 |     
 43 |     monitor_setup.apply()
 44 |     
 45 |     assert isinstance(monitor_setup.target_matrix, ColumnMatrix)
 46 |     assert monitor_setup.target_matrix.exclude == ["prediction_temperature"]
 47 |     
 48 |     assert isinstance(monitor_setup.analyzer.targetMatrix, ColumnMatrix)
 49 |     assert monitor_setup.analyzer.targetMatrix.exclude == ["prediction_temperature"]
 50 | 
 51 | 
 52 | def test_set_target_columns(monitor_setup):
 53 |     monitor_setup.set_target_columns(
 54 |         columns=["prediction_temperature"]
 55 |     )
 56 | 
 57 |     assert monitor_setup._target_columns == ["prediction_temperature"]
 58 |     
 59 |     monitor_setup.apply()
 60 |     
 61 |     assert isinstance(monitor_setup.target_matrix, ColumnMatrix)
 62 |     assert monitor_setup.target_matrix.include == ["prediction_temperature"]
 63 |     assert isinstance(monitor_setup.analyzer.targetMatrix, ColumnMatrix)
 64 |     assert monitor_setup.analyzer.targetMatrix.include == ["prediction_temperature"]
 65 | 
 66 | def test_setup_apply(monitor_setup):
 67 |     assert not monitor_setup.monitor
 68 |     assert not monitor_setup.analyzer
 69 | 
 70 |     monitor_setup.apply()
 71 | 
 72 |     assert isinstance(monitor_setup.monitor, Monitor)
 73 |     assert isinstance(monitor_setup.analyzer, Analyzer)
 74 | 
 75 | 
 76 | def test_set_target_matrix(monitor_setup):
 77 |     monitor_setup.target_matrix = ColumnMatrix(include=["some_specific_column"], segments=[])
 78 |     monitor_setup.apply()
 79 | 
 80 |     assert isinstance(monitor_setup.target_matrix, ColumnMatrix)
 81 |     assert monitor_setup.analyzer.targetMatrix == ColumnMatrix(include=["some_specific_column"], segments=[])
 82 | 
 83 | 
 84 | def test_set_and_exclude_columns_keep_state(monitor_setup):
 85 |     assert monitor_setup._target_columns == []
 86 |     assert monitor_setup._exclude_columns == []
 87 | 
 88 |     monitor_setup.exclude_target_columns(columns=["prediction_temperature"])
 89 | 
 90 |     assert monitor_setup._target_columns == []
 91 |     assert monitor_setup._exclude_columns == ["prediction_temperature"]
 92 | 
 93 |     monitor_setup.set_target_columns(columns=["prediction_temperature"])
 94 | 
 95 |     assert monitor_setup._target_columns == ["prediction_temperature"]
 96 |     assert monitor_setup._exclude_columns == ["prediction_temperature"]
 97 | 
 98 |     monitor_setup.apply()
 99 | 
100 |     assert monitor_setup.target_matrix == ColumnMatrix(
101 |         include=["prediction_temperature"], exclude=["prediction_temperature"], segments=[]
102 |     )
103 | 
104 | 
105 | class TestExistingMonitor(BaseTestMonitor):
106 |     def test_existing_monitor_monitor_setup_with_id(self, existing_monitor_setup) -> None:
107 |         assert isinstance(existing_monitor_setup.config, StddevConfig)
108 | 
109 |     def test_create_monitor_from_existing_monitor_id(self, existing_monitor_setup) -> None:
110 |         assert existing_monitor_setup.monitor.id == os.environ["WHYLABS_DEFAULT_MONITOR_ID"]
111 | 
112 |         new_credentials = MonitorCredentials(monitor_id="new_monitor_id")
113 | 
114 |         existing_monitor_setup.credentials = new_credentials
115 |         existing_monitor_setup.apply()
116 | 
117 |         assert existing_monitor_setup.monitor.id == "new_monitor_id"
118 |         assert existing_monitor_setup.analyzer.id == "new_monitor_id-analyzer"
119 | 
120 | def test_validate_if_columns_exist_before_setting(existing_monitor_setup: MonitorSetup) -> None:
121 |     with pytest.raises(ValueError) as e:
122 |         existing_monitor_setup.exclude_target_columns(columns=["test_exclude_column"])
123 |         assert e.value == f"test_exclude_column is not present on {existing_monitor_setup.credentials.dataset_id}"
124 | 
125 |     with pytest.raises(ValueError) as e:
126 |         existing_monitor_setup.set_target_columns(columns=["test_set_column"])
127 |         assert e.value == f"test_set_column is not present on {existing_monitor_setup.credentials.dataset_id}"
128 | 
129 | 
130 | def test_setup_with_passed_in_credentials(user_config: UserConfig) -> None:
131 |     monitor_setup = MonitorSetup(
132 |         monitor_id="different_id",
133 |         config=user_config
134 |     )
135 |     
136 |     assert monitor_setup.credentials.org_id == user_config.org_id
137 | 
138 | 
139 | def test_setup_with_group_of_columns(monitor_setup) -> None:
140 |     monitor_setup.set_target_columns(columns=["group:discrete"])
141 |     monitor_setup.exclude_target_columns(columns=["group:output", "other_feature"])
142 |     monitor_setup.apply()
143 | 
144 | def test_setup_with_wrong_group_column_type(monitor_setup) -> None:
145 |     with pytest.raises(ValueError):
146 |         monitor_setup.set_target_columns(columns=["group:inputs"])
147 | 
148 | 
149 | def test_dataset_matrix_is_auto_setup_if_model_metrics(monitor_setup):
150 |     monitor_setup.config = FixedThresholdsConfig(
151 |         metric=DatasetMetric.classification_accuracy,
152 |         lower=0.75
153 |     )
154 |     monitor_setup.apply()
155 |     
156 |     assert monitor_setup.target_matrix == DatasetMatrix(segments=[])
157 |     assert monitor_setup.analyzer.targetMatrix == DatasetMatrix(segments=[])
158 |     
159 |     monitor_setup.config = FixedThresholdsConfig(
160 |         metric=SimpleColumnMetric.count_bool,
161 |         lower=0.75
162 |     )
163 |     monitor_setup.apply()
164 |     
165 |     assert isinstance(
166 |         monitor_setup.target_matrix,
167 |         ColumnMatrix
168 |     )
169 |     
170 |     assert isinstance(
171 |         monitor_setup.analyzer.targetMatrix,
172 |         ColumnMatrix
173 |     )
174 | 
175 | def test_apply_wont_change_monitor_columns(monitor_setup):
176 |     monitor_setup.set_target_columns(columns=["prediction_temperature", "temperature"])
177 |     monitor_setup.apply()
178 |     
179 |     assert monitor_setup.analyzer.targetMatrix != ColumnMatrix(include=["*"] , exclude=[], segments=[])
180 |     
181 |     assert monitor_setup.target_matrix == ColumnMatrix(include=["prediction_temperature", "temperature"] , exclude=[], segments=[])
182 |     assert monitor_setup.analyzer.targetMatrix == ColumnMatrix(include=["prediction_temperature", "temperature"] , exclude=[], segments=[])
183 | 
184 | def test_apply_wont_erase_existing_preconfig(monitor_setup):
185 |     monitor_setup.config = FixedThresholdsConfig(
186 |         metric=DatasetMetric.classification_accuracy,
187 |         lower=0.75
188 |     )
189 |     
190 |     monitor_setup.target_matrix = DatasetMatrix(segments=[Segment(tags=[SegmentTag(key="segment_a", value="value_a")])])
191 |     
192 |     monitor_setup.apply()
193 |     assert monitor_setup.analyzer.targetMatrix == DatasetMatrix(segments=[Segment(tags=[SegmentTag(key="segment_a", value="value_a")])])
194 | 
195 | 
196 | def test_target_matrix_is_warned_on_setup(caplog):
197 |     with caplog.at_level(level="WARNING"):
198 |         monitor_setup = MonitorSetup(
199 |             monitor_id='test-target-matrix'
200 |         )
201 | 
202 |         monitor_setup.config = DriftConfig(
203 |             metric=ComplexMetrics.frequent_items,
204 |             threshold=0.7,
205 |             baseline=TrailingWindowBaseline(size=7),
206 |         )
207 | 
208 |         # Set wrong matrix with segments
209 |         monitor_setup.target_matrix = DatasetMatrix(
210 |             segments=[Segment(tags=[SegmentTag(key="Segment_Dataset", value="Training_PCS_tags")])])
211 | 
212 |         monitor_setup.apply()
213 | 
214 | 
215 |         assert isinstance(monitor_setup.target_matrix, ColumnMatrix)
216 |         assert monitor_setup.target_matrix.segments == [Segment(tags=[
217 |             SegmentTag(key="Segment_Dataset", value="Training_PCS_tags")
218 |         ])]
219 |         assert "Setting a DatasetMatrix requires a DatasetMetric to be used" in caplog.text
220 | 
221 | def test_dataset_metrics_are_warned_on_setup(caplog):
222 |     with caplog.at_level(level="WARNING"):
223 |         monitor_setup = MonitorSetup(
224 |             monitor_id='test-target-matrix'
225 |         )
226 | 
227 |         monitor_setup.config = StddevConfig(
228 |             metric=DatasetMetric.classification_accuracy,
229 |             maxUpperThreshold=7,
230 |             baseline=TrailingWindowBaseline(size=7),
231 |         )
232 | 
233 |         # Set wrong matrix with segments
234 |         monitor_setup.target_matrix = ColumnMatrix(
235 |             segments=[Segment(tags=[SegmentTag(key="Segment_Dataset", value="Training_PCS_tags")])])
236 | 
237 |         monitor_setup.apply()
238 |         
239 |         assert isinstance(monitor_setup.target_matrix, DatasetMatrix)
240 |         assert monitor_setup.target_matrix.segments == [Segment(tags=[
241 |             SegmentTag(key="Segment_Dataset", value="Training_PCS_tags")
242 |         ])]
243 |         assert "ColumnMatrix is not configurable with a DatasetMetric" in caplog.text
244 | 
245 | 
246 | def test_dataset_matrix_if_metric_is_missing_datapoint(monitor_setup) -> None:
247 |     monitor_setup.config = FixedThresholdsConfig(
248 |         upper=0,
249 |         metric=DatasetMetric.missing_data_point
250 |     )
251 |     monitor_setup.data_readiness_duration = "P1DT18H"
252 |     monitor_setup.apply()
253 |     
254 |     assert monitor_setup.analyzer.config.metric == DatasetMetric.missing_data_point
255 |     assert monitor_setup.analyzer.dataReadinessDuration == "P1DT18H"
256 |     assert isinstance(monitor_setup.analyzer.targetMatrix, DatasetMatrix)
257 | 
258 | 
259 | def test_dataset_matrix_if_metric_is_secondsSinceLastUpload(monitor_setup) -> None:
260 |     monitor_setup.config = FixedThresholdsConfig(
261 |         upper=0,
262 |         metric="secondsSinceLastUpload"
263 |     )
264 |     monitor_setup.apply()
265 |     
266 |     assert monitor_setup.analyzer.config.metric == "secondsSinceLastUpload"
267 |     assert isinstance(monitor_setup.analyzer.targetMatrix, DatasetMatrix)
268 | 
269 | 
270 | def test_set_non_iso_data_readiness_raises(monitor_setup) -> None:
271 |     monitor_setup.data_readiness_duration = "P1DT18H"
272 |     monitor_setup.apply()
273 |     
274 |     with pytest.raises(ValueError):
275 |         monitor_setup.data_readiness_duration = "Some non-conformant string"
276 | 
277 | 
278 | def test_cron_schedule_for_analyzer(monitor_setup) -> None:
279 |     monitor_setup.config = FixedThresholdsConfig(
280 |         metric=DatasetMetric.classification_accuracy,
281 |         upper=0.75
282 |     )
283 |     monitor_setup.schedule = CronSchedule(cron="0 0 * * *")
284 |     monitor_setup.apply()
285 |     
286 |     assert monitor_setup.analyzer.schedule == CronSchedule(
287 |         cron="0 0 * * *"
288 |     )
289 |     
290 |     monitor_setup.schedule = CronSchedule(cron="0 0 * * 1-5")
291 |     monitor_setup.apply()
292 |     
293 |     assert monitor_setup.analyzer.schedule == CronSchedule(
294 |         cron="0 0 * * 1-5"
295 |     )
296 |     
297 |     monitor_setup.schedule = CronSchedule(cron="0 0 * * 6,0")
298 |     monitor_setup.apply()
299 |     
300 |     assert monitor_setup.analyzer.schedule == CronSchedule(
301 |         cron="0 0 * * 6,0"
302 |     )
303 |     
304 |     monitor_setup.schedule = CronSchedule(cron="0 9-17 * * *")
305 |     monitor_setup.apply()
306 |     
307 |     assert monitor_setup.analyzer.schedule == CronSchedule(
308 |         cron="0 9-17 * * *"
309 |     )
310 |     
311 |     monitor_setup.schedule = CronSchedule(cron="0 9,10,17 * * *") 
312 |     monitor_setup.apply()
313 |     
314 |     assert monitor_setup.analyzer.schedule == CronSchedule(
315 |         cron="0 9,10,17 * * *"
316 |     )
317 |     
318 |     monitor_setup.schedule = CronSchedule(cron="*/90 9,10,17 * * *") 
319 |     monitor_setup.apply()
320 |     
321 |     assert monitor_setup.analyzer.schedule == CronSchedule(
322 |         cron="*/90 9,10,17 * * *"
323 |     )
324 |     
325 |     monitor_setup.schedule = CronSchedule(cron="0 9,10,17 1,2,3 2,4,5 2,4")
326 |     monitor_setup.apply()
327 |     
328 |     assert monitor_setup.analyzer.schedule == CronSchedule(
329 |         cron="0 9,10,17 1,2,3 2,4,5 2,4"
330 |     )
331 |     
332 |     # All below Must fail
333 |     
334 |     monitor_setup.schedule = CronSchedule(cron="* * * * *") # Every minute
335 |     with pytest.raises(ValueError):
336 |         monitor_setup.apply()
337 |         
338 |     monitor_setup.schedule = CronSchedule(cron="0 0 * * * *") # Too many fields
339 |     with pytest.raises(ValueError):
340 |         monitor_setup.apply()
341 |         
342 |     monitor_setup.schedule = CronSchedule(cron="1,2 0 * * *") # Less granular than 1h
343 |     with pytest.raises(ValueError):
344 |         monitor_setup.apply()
345 |         
346 |     monitor_setup.schedule = CronSchedule(cron="*/15 0 * * *") # every 15min
347 |     with pytest.raises(ValueError):
348 |         monitor_setup.apply()


--------------------------------------------------------------------------------
/examples/example_notebooks/Metrics API.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "4c6f95af",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Timeseries metrics queries\n",
  9 |     "## Introduction\n",
 10 |     "This notebook illustrates using the WhyLabs timeseries metrics API to query profile and monitor metrics. Most of the examples use the REST API. Towards the end, there's some examples using the [whylabs-python-client](https://github.com/whylabs/whylabs-client-python). \n",
 11 |     "\n",
 12 |     "## Using the REST API\n"
 13 |    ]
 14 |   },
 15 |   {
 16 |    "cell_type": "code",
 17 |    "execution_count": 1,
 18 |    "id": "65524d62",
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "import requests\n",
 23 |     "import os\n",
 24 |     "import getpass\n",
 25 |     "import json"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "id": "657aadf0",
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "base_url = \"https://api.whylabsapp.com\"\n",
 36 |     "org_id = \"org-0\"\n",
 37 |     "dataset_id = \"model-0\""
 38 |    ]
 39 |   },
 40 |   {
 41 |    "cell_type": "code",
 42 |    "execution_count": 3,
 43 |    "id": "c1ed9ba0",
 44 |    "metadata": {},
 45 |    "outputs": [
 46 |     {
 47 |      "name": "stdout",
 48 |      "output_type": "stream",
 49 |      "text": [
 50 |       "········\n"
 51 |      ]
 52 |     }
 53 |    ],
 54 |    "source": [
 55 |     "api_key = getpass.getpass()"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "id": "cfd1e93b",
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "headers = {\"Accept\": \"application/json\", \"Content-Type\": \"application/json\", \"X-API-KEY\": api_key}\n",
 66 |     "url = base_url + f\"/v0/organizations/{org_id}/dataset/{dataset_id}/data/metric-timeseries\""
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "markdown",
 71 |    "id": "9757e0e8",
 72 |    "metadata": {},
 73 |    "source": [
 74 |     "Get 5 days data from 19th March for the `bc_util` column and `quantile_95` metric. Queries are limited to at most 90 days."
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 5,
 80 |    "id": "902b1c3e",
 81 |    "metadata": {},
 82 |    "outputs": [
 83 |     {
 84 |      "data": {
 85 |       "text/plain": [
 86 |        "{'data': [{'timestamp': 1710806400000,\n",
 87 |        "   'lastModified': 1710882333280,\n",
 88 |        "   'value': 469.6818354382451},\n",
 89 |        "  {'timestamp': 1710892800000,\n",
 90 |        "   'lastModified': 1710968747758,\n",
 91 |        "   'value': 482.79219419934026},\n",
 92 |        "  {'timestamp': 1710979200000,\n",
 93 |        "   'lastModified': 1711055104866,\n",
 94 |        "   'value': 476.2878226406801},\n",
 95 |        "  {'timestamp': 1711065600000,\n",
 96 |        "   'lastModified': 1711141547080,\n",
 97 |        "   'value': 489.0629875981804},\n",
 98 |        "  {'timestamp': 1711152000000,\n",
 99 |        "   'lastModified': 1711227917918,\n",
100 |        "   'value': 207.7292131334232}]}"
101 |       ]
102 |      },
103 |      "execution_count": 5,
104 |      "metadata": {},
105 |      "output_type": "execute_result"
106 |     }
107 |    ],
108 |    "source": [
109 |     "resp = requests.post(\n",
110 |     "    url=url,\n",
111 |     "    data=json.dumps({\n",
112 |     "      \"interval\": \"2024-03-19T00:00:00Z/P5D\",\n",
113 |     "      \"column\": \"bc_util\",\n",
114 |     "      \"metric\": \"quantile_95\"\n",
115 |     "    }),\n",
116 |     "    headers=headers\n",
117 |     ")\n",
118 |     "resp.json()"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "markdown",
123 |    "id": "cdeb7d31",
124 |    "metadata": {},
125 |    "source": [
126 |     "Get precision for a specified timerange."
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 6,
132 |    "id": "ddbcaca6",
133 |    "metadata": {},
134 |    "outputs": [
135 |     {
136 |      "data": {
137 |       "text/plain": [
138 |        "{'data': []}"
139 |       ]
140 |      },
141 |      "execution_count": 6,
142 |      "metadata": {},
143 |      "output_type": "execute_result"
144 |     }
145 |    ],
146 |    "source": [
147 |     "resp = requests.post(\n",
148 |     "    url=url,\n",
149 |     "    data=json.dumps({\n",
150 |     "      \"interval\": \"2024-03-19T00:00:00Z/P2D\",\n",
151 |     "      \"metric\": \"classification_precision\"\n",
152 |     "    }),\n",
153 |     "    headers=headers\n",
154 |     ")\n",
155 |     "resp.json()"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "markdown",
160 |    "id": "8b5ee9b5",
161 |    "metadata": {},
162 |    "source": [
163 |     "Get a metric for a specific segment."
164 |    ]
165 |   },
166 |   {
167 |    "cell_type": "code",
168 |    "execution_count": 7,
169 |    "id": "0fca4e70",
170 |    "metadata": {},
171 |    "outputs": [
172 |     {
173 |      "data": {
174 |       "text/plain": [
175 |        "{'data': [{'timestamp': 1710806400000,\n",
176 |        "   'lastModified': 1710882333248,\n",
177 |        "   'value': 476.5492592565752},\n",
178 |        "  {'timestamp': 1710892800000,\n",
179 |        "   'lastModified': 1710968747758,\n",
180 |        "   'value': 483.42130654590056}]}"
181 |       ]
182 |      },
183 |      "execution_count": 7,
184 |      "metadata": {},
185 |      "output_type": "execute_result"
186 |     }
187 |    ],
188 |    "source": [
189 |     "resp = requests.post(\n",
190 |     "    url=url,\n",
191 |     "    data=json.dumps({\n",
192 |     "      \"interval\": \"2024-03-19T00:00:00Z/P2D\",\n",
193 |     "      \"column\": \"bc_util\",\n",
194 |     "      \"metric\": \"quantile_95\",\n",
195 |     "      \"segment\": {\n",
196 |     "          \"tags\": [{\"key\": \"verification_status\", \"value\":\"Source Verified\"}]\n",
197 |     "      }\n",
198 |     "    }),\n",
199 |     "    headers=headers\n",
200 |     ")\n",
201 |     "resp.json()"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "markdown",
206 |    "id": "c9fe9572-68dd-4c5c-bbba-b5a85b6ce045",
207 |    "metadata": {},
208 |    "source": [
209 |     "Get data with an hourly granularity."
210 |    ]
211 |   },
212 |   {
213 |    "cell_type": "code",
214 |    "execution_count": 8,
215 |    "id": "14ca248c-b070-4eac-8406-62268f849217",
216 |    "metadata": {},
217 |    "outputs": [
218 |     {
219 |      "data": {
220 |       "text/plain": [
221 |        "{'data': [{'timestamp': 1710882000000,\n",
222 |        "   'lastModified': 1710882333280,\n",
223 |        "   'value': 469.6818354382451},\n",
224 |        "  {'timestamp': 1710968400000,\n",
225 |        "   'lastModified': 1710968747758,\n",
226 |        "   'value': 482.79219419934026}]}"
227 |       ]
228 |      },
229 |      "execution_count": 8,
230 |      "metadata": {},
231 |      "output_type": "execute_result"
232 |     }
233 |    ],
234 |    "source": [
235 |     "resp = requests.post(\n",
236 |     "    url=url,\n",
237 |     "    data=json.dumps({\n",
238 |     "      \"interval\": \"2024-03-19T00:00:00Z/P2D\",\n",
239 |     "      \"column\": \"bc_util\",\n",
240 |     "      \"metric\": \"quantile_95\",\n",
241 |     "      \"granularity\": \"HOURLY\"\n",
242 |     "    }),\n",
243 |     "    headers=headers\n",
244 |     ")\n",
245 |     "resp.json()"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "markdown",
250 |    "id": "6bf017a9-832a-4304-b2d9-2fc44716cfdd",
251 |    "metadata": {},
252 |    "source": [
253 |     "Get a rollup of a metric across the whole specified range."
254 |    ]
255 |   },
256 |   {
257 |    "cell_type": "code",
258 |    "execution_count": 9,
259 |    "id": "5f363d1f-7881-4ddd-ad47-399aeb9ee079",
260 |    "metadata": {},
261 |    "outputs": [
262 |     {
263 |      "data": {
264 |       "text/plain": [
265 |        "{'data': [{'timestamp': 1710806400000,\n",
266 |        "   'lastModified': 1711227917918,\n",
267 |        "   'value': 413.9951103488429}]}"
268 |       ]
269 |      },
270 |      "execution_count": 9,
271 |      "metadata": {},
272 |      "output_type": "execute_result"
273 |     }
274 |    ],
275 |    "source": [
276 |     "resp = requests.post(\n",
277 |     "    url=url,\n",
278 |     "    data=json.dumps({\n",
279 |     "      \"interval\": \"2024-03-19T00:00:00Z/P5D\",\n",
280 |     "      \"column\": \"bc_util\",\n",
281 |     "      \"metric\": \"quantile_95\",\n",
282 |     "      \"granularity\": \"ALL\"\n",
283 |     "    }),\n",
284 |     "    headers=headers\n",
285 |     ")\n",
286 |     "resp.json()"
287 |    ]
288 |   },
289 |   {
290 |    "cell_type": "markdown",
291 |    "id": "364996b1",
292 |    "metadata": {},
293 |    "source": [
294 |     "Get monitor metric `avg_drift` for a specific analyzer."
295 |    ]
296 |   },
297 |   {
298 |    "cell_type": "code",
299 |    "execution_count": 10,
300 |    "id": "d303d108",
301 |    "metadata": {},
302 |    "outputs": [
303 |     {
304 |      "data": {
305 |       "text/plain": [
306 |        "{'data': [{'timestamp': 1730419200000, 'value': 0.9329209663795195}]}"
307 |       ]
308 |      },
309 |      "execution_count": 10,
310 |      "metadata": {},
311 |      "output_type": "execute_result"
312 |     }
313 |    ],
314 |    "source": [
315 |     "resp = requests.post(\n",
316 |     "    url=url,\n",
317 |     "    data=json.dumps({\n",
318 |     "      \"interval\": \"2024-11-01T00:00:00Z/P1D\",\n",
319 |     "      \"column\": \"url\",\n",
320 |     "      \"metric\": \"avg_drift\",\n",
321 |     "      \"analyzerId\": \"comfortable-orchid-stinkbug-6423-analyzer\"\n",
322 |     "    }),\n",
323 |     "    headers=headers\n",
324 |     ")\n",
325 |     "resp.json()"
326 |    ]
327 |   },
328 |   {
329 |    "cell_type": "markdown",
330 |    "id": "b1404ddd",
331 |    "metadata": {},
332 |    "source": [
333 |     "## Using the WhyLabs client\n",
334 |     "\n",
335 |     "Use the WhyLabs client to make a metrics request.\n",
336 |     "\n",
337 |     "NOTE: The timeseries metrics API is supported from version 0.6.2 with [profile metrics](https://docs.whylabs.ai/docs/profile-metrics/), but monitor metrics like anomaly_count and avg_drift are only supported from version 0.6.12."
338 |    ]
339 |   },
340 |   {
341 |    "cell_type": "code",
342 |    "execution_count": 11,
343 |    "id": "2bd22c42",
344 |    "metadata": {},
345 |    "outputs": [],
346 |    "source": [
347 |     "#!pip install \"whylabs-client~=0.6.12\""
348 |    ]
349 |   },
350 |   {
351 |    "cell_type": "code",
352 |    "execution_count": 12,
353 |    "id": "236113f8",
354 |    "metadata": {},
355 |    "outputs": [],
356 |    "source": [
357 |     "import whylabs_client\n",
358 |     "from whylabs_client.api import data_api\n",
359 |     "from whylabs_client.model.metric_timeseries_request import MetricTimeseriesRequest\n",
360 |     "configuration = whylabs_client.Configuration(\n",
361 |     "    host = base_url\n",
362 |     ")\n",
363 |     "configuration.api_key['ApiKeyAuth'] = api_key\n",
364 |     "\n",
365 |     "data_api = data_api.DataApi(whylabs_client.ApiClient(configuration))"
366 |    ]
367 |   },
368 |   {
369 |    "cell_type": "markdown",
370 |    "id": "94de2a20",
371 |    "metadata": {},
372 |    "source": [
373 |     "Get data from 19th March for the `bc_util` column and `quantile_95` metric."
374 |    ]
375 |   },
376 |   {
377 |    "cell_type": "code",
378 |    "execution_count": 13,
379 |    "id": "4e7da143",
380 |    "metadata": {},
381 |    "outputs": [
382 |     {
383 |      "data": {
384 |       "text/plain": [
385 |        "{'data': [{'last_modified': 1710882333280,\n",
386 |        "           'timestamp': 1710806400000,\n",
387 |        "           'value': 469.6818354382451}]}"
388 |       ]
389 |      },
390 |      "execution_count": 13,
391 |      "metadata": {},
392 |      "output_type": "execute_result"
393 |     }
394 |    ],
395 |    "source": [
396 |     "results = data_api.metric_timeseries_data(org_id, dataset_id, MetricTimeseriesRequest(\n",
397 |     "      interval = \"2024-03-19T00:00:00Z/P1D\",\n",
398 |     "      column = \"bc_util\",\n",
399 |     "      metric = \"quantile_95\",\n",
400 |     "))\n",
401 |     "results"
402 |    ]
403 |   },
404 |   {
405 |    "cell_type": "markdown",
406 |    "id": "21974274",
407 |    "metadata": {},
408 |    "source": [
409 |     "Get monitor metric `anomaly_count` for `bc_util` column and a specific analyzer. You can also omit the analyzer to get the total anomaly count for a column. Note that the python client uses snake case for field names like `monitor_id`, whereas the REST API uses lower camel case e.g. `monitorId`."
410 |    ]
411 |   },
412 |   {
413 |    "cell_type": "code",
414 |    "execution_count": 14,
415 |    "id": "ac30ad14",
416 |    "metadata": {},
417 |    "outputs": [
418 |     {
419 |      "data": {
420 |       "text/plain": [
421 |        "{'data': [{'timestamp': 1730419200000, 'value': 0.9329209663795195}]}"
422 |       ]
423 |      },
424 |      "execution_count": 14,
425 |      "metadata": {},
426 |      "output_type": "execute_result"
427 |     }
428 |    ],
429 |    "source": [
430 |     "results = data_api.metric_timeseries_data(org_id, dataset_id, MetricTimeseriesRequest(\n",
431 |     "      interval = \"2024-11-01T00:00:00Z/P1D\",\n",
432 |     "      column = \"url\",\n",
433 |     "      metric = \"avg_drift\",\n",
434 |     "      monitor_id = \"comfortable-orchid-stinkbug-6423\",\n",
435 |     "))\n",
436 |     "results"
437 |    ]
438 |   },
439 |   {
440 |    "cell_type": "markdown",
441 |    "id": "2e13b1f7",
442 |    "metadata": {},
443 |    "source": [
444 |     "Get anomaly_count for a monitor targeted at the dataset level. In this case, a column name of `__internal__.datasetMetrics` is used."
445 |    ]
446 |   },
447 |   {
448 |    "cell_type": "code",
449 |    "execution_count": 15,
450 |    "id": "5cfe22c4",
451 |    "metadata": {},
452 |    "outputs": [
453 |     {
454 |      "data": {
455 |       "text/plain": [
456 |        "{'data': []}"
457 |       ]
458 |      },
459 |      "execution_count": 15,
460 |      "metadata": {},
461 |      "output_type": "execute_result"
462 |     }
463 |    ],
464 |    "source": [
465 |     "results = data_api.metric_timeseries_data(org_id, dataset_id, MetricTimeseriesRequest(\n",
466 |     "      interval = \"2024-11-01T00:00:00Z/P1D\",\n",
467 |     "      column = \"__internal__.datasetMetrics\",\n",
468 |     "      metric = \"anomaly_count\",\n",
469 |     "      monitor_id = \"breakable-salmon-ferret-9291\",\n",
470 |     "))\n",
471 |     "results"
472 |    ]
473 |   }
474 |  ],
475 |  "metadata": {
476 |   "kernelspec": {
477 |    "display_name": "Python 3 (ipykernel)",
478 |    "language": "python",
479 |    "name": "python3"
480 |   },
481 |   "language_info": {
482 |    "codemirror_mode": {
483 |     "name": "ipython",
484 |     "version": 3
485 |    },
486 |    "file_extension": ".py",
487 |    "mimetype": "text/x-python",
488 |    "name": "python",
489 |    "nbconvert_exporter": "python",
490 |    "pygments_lexer": "ipython3",
491 |    "version": "3.10.12"
492 |   }
493 |  },
494 |  "nbformat": 4,
495 |  "nbformat_minor": 5
496 | }
497 | 


--------------------------------------------------------------------------------