Skip to content

_build_spark_schema_snapshot

Internal helper
This page documents an internal implementation helper, not a primary public API.
Source code in src/fabricops_kit/drift.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
def _build_spark_schema_snapshot(df, dataset_name: str, table_name: str) -> dict:
    columns = []
    for index, field in enumerate(df.schema.fields):
        column_name = str(field.name)
        data_type = str(field.dataType)
        nullable = bool(field.nullable)
        ordinal_position = int(index)
        columns.append(
            {
                "column_name": column_name,
                "ordinal_position": ordinal_position,
                "data_type": data_type,
                "nullable": nullable,
                "column_hash": _column_hash(column_name, ordinal_position, data_type, nullable),
            }
        )

    return {
        "dataset_name": str(dataset_name),
        "table_name": str(table_name),
        "engine": "spark",
        "generated_at": datetime.now(timezone.utc).isoformat(),
        "columns": columns,
    }