`check_partition_drift`

Public callable

Check partition-level drift using keys, partitions, and optional watermark baselines.

Parameters:

Name	Type	Description	Default
`df`	`Any`	Value used by this callable.	required
`dataset_name`	`Any`	Value used by this callable.	required
`table_name`	`Any`	Value used by this callable.	required
`partition_column`	`Any`	Value used by this callable.	required
`business_keys`	`Any`	Value used by this callable.	`None`
`watermark_column`	`Any`	Value used by this callable.	`None`
`baseline_snapshot`	`Any`	Value used by this callable.	`None`
`policy`	`Any`	Value used by this callable.	`None`
`run_id`	`Any`	Value used by this callable.	`None`
`engine`	`Any`	Value used by this callable.	`'spark'`

Returns:

Type	Description
`dict`	Structured output produced by this callable.

Source code in src/fabricops_kit/drift.py

def check_partition_drift(df, dataset_name: str, table_name: str, partition_column: str, business_keys: list[str] | None = None, watermark_column: str | None = None, baseline_snapshot: list[dict] | dict | None = None, policy: dict | None = None, run_id: str | None = None, engine: str = "spark") -> dict:
    """Check partition-level drift using keys, partitions, and optional watermark baselines.

        Parameters
        ----------
        df : Any
            Value used by this callable.
        dataset_name : Any
            Value used by this callable.
        table_name : Any
            Value used by this callable.
        partition_column : Any
            Value used by this callable.
        business_keys : Any
            Value used by this callable.
        watermark_column : Any
            Value used by this callable.
        baseline_snapshot : Any
            Value used by this callable.
        policy : Any
            Value used by this callable.
        run_id : Any
            Value used by this callable.
        engine : Any
            Value used by this callable.

        Returns
        -------
        dict
            Structured output produced by this callable.
    """
    keys = business_keys or []
    if not keys:
        raise ValueError("business_keys must contain at least one column for partition drift checks.")
    current_snapshot = build_partition_snapshot(
        df,
        dataset_name=dataset_name,
        table_name=table_name,
        partition_column=partition_column,
        business_keys=keys,
        watermark_column=watermark_column,
        run_id=run_id,
        engine=engine,
    )
    if baseline_snapshot is None:
        return {
            "dataset_name": dataset_name,
            "table_name": table_name,
            "status": "no_baseline",
            "can_continue": True,
            "current_snapshot": current_snapshot,
            "baseline_snapshot": None,
            "comparison": None,
            "message": "No baseline partition snapshot found; current snapshot captured as first observation.",
        }

    baseline_rows = baseline_snapshot if isinstance(baseline_snapshot, list) else [baseline_snapshot]
    comparison = compare_partition_snapshots(baseline_rows, current_snapshot, policy=policy or default_incremental_safety_policy())
    status = str(comparison.get("status", "passed"))
    return {
        "dataset_name": dataset_name,
        "table_name": table_name,
        "status": status,
        "can_continue": bool(comparison.get("can_continue", True)),
        "current_snapshot": current_snapshot,
        "baseline_snapshot": baseline_rows,
        "comparison": comparison,
        "message": "Partition drift check completed.",
    }