_build_spark_schema_snapshot
Internal helper
This page documents an internal implementation helper, not a primary public API.
Source code in src/fabricops_kit/drift.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142 | def _build_spark_schema_snapshot(df, dataset_name: str, table_name: str) -> dict:
columns = []
for index, field in enumerate(df.schema.fields):
column_name = str(field.name)
data_type = str(field.dataType)
nullable = bool(field.nullable)
ordinal_position = int(index)
columns.append(
{
"column_name": column_name,
"ordinal_position": ordinal_position,
"data_type": data_type,
"nullable": nullable,
"column_hash": _column_hash(column_name, ordinal_position, data_type, nullable),
}
)
return {
"dataset_name": str(dataset_name),
"table_name": str(table_name),
"engine": "spark",
"generated_at": datetime.now(timezone.utc).isoformat(),
"columns": columns,
}
|