`add_datetime_features`

Public callable

Add localized datetime feature columns derived from a UTC datetime column.

Parameters:

Name	Type	Description	Default
`df`	`Any`	Input pandas or Spark DataFrame.	required
`datetime_column`	`str`	Source UTC datetime column.	required
`prefix`	`str \| None`	Prefix used for output columns. When omitted, `datetime_column` is used.	`None`
`timezone`	`str`	IANA timezone used for localization.	`"Asia/Singapore"`
`include_datetime`	`bool`	Whether to add `{PREFIX}_DTM_UTC8`.	`True`
`include_date`	`bool`	Whether to add `{PREFIX}_DATE_UTC8`.	`True`
`include_time`	`bool`	Whether to add `{PREFIX}_TIME_UTC8`.	`True`
`include_hour`	`bool`	Whether to add `{PREFIX}_HOUR_UTC8`.	`True`
`include_30_min_block`	`bool`	Whether to add `{PREFIX}_TIME_BLOCK_30_MIN`.	`True`
`engine`	`str`	Execution engine (`auto`, `pandas`, or `spark`).	`"auto"`

Returns:

Type	Description
`Any`	DataFrame with requested datetime features.

Raises:

Type	Description
`ValueError`	If `datetime_column` does not exist.

Examples:

>>> import pandas as pd
>>> df = pd.DataFrame({"event_ts": ["2026-01-01T00:45:00Z"]})
>>> add_datetime_features(df, "event_ts", prefix="EVENT")["EVENT_TIME_UTC8"].iloc[0]
'08:45:00'

Source code in src/fabricops_kit/technical_columns.py

def add_datetime_features(
    df,
    datetime_column: str,
    *,
    prefix: str | None = None,
    timezone: str = "Asia/Singapore",
    include_datetime: bool = True,
    include_date: bool = True,
    include_time: bool = True,
    include_hour: bool = True,
    include_30_min_block: bool = True,
    engine: str = "auto",
):
    """Add localized datetime feature columns derived from a UTC datetime column.

    Parameters
    ----------
    df : Any
        Input pandas or Spark DataFrame.
    datetime_column : str
        Source UTC datetime column.
    prefix : str | None, optional
        Prefix used for output columns. When omitted, `datetime_column` is used.
    timezone : str, default="Asia/Singapore"
        IANA timezone used for localization.
    include_datetime : bool, default=True
        Whether to add ``{PREFIX}_DTM_UTC8``.
    include_date : bool, default=True
        Whether to add ``{PREFIX}_DATE_UTC8``.
    include_time : bool, default=True
        Whether to add ``{PREFIX}_TIME_UTC8``.
    include_hour : bool, default=True
        Whether to add ``{PREFIX}_HOUR_UTC8``.
    include_30_min_block : bool, default=True
        Whether to add ``{PREFIX}_TIME_BLOCK_30_MIN``.
    engine : str, default="auto"
        Execution engine (``auto``, ``pandas``, or ``spark``).

    Returns
    -------
    Any
        DataFrame with requested datetime features.

    Raises
    ------
    ValueError
        If `datetime_column` does not exist.

    Examples
    --------
    >>> import pandas as pd
    >>> df = pd.DataFrame({"event_ts": ["2026-01-01T00:45:00Z"]})
    >>> add_datetime_features(df, "event_ts", prefix="EVENT")["EVENT_TIME_UTC8"].iloc[0]
    '08:45:00'
    """
    _assert_columns_exist(df, [datetime_column])
    selected_engine = _resolve_engine(df, engine)
    col_prefix = prefix or datetime_column
    if selected_engine == "pandas":
        out = df.copy()
        parsed = pd.to_datetime(out[datetime_column], errors="coerce", utc=True).dt.tz_convert(timezone)
        if include_datetime:
            out[f"{col_prefix}_DTM_UTC8"] = parsed.dt.strftime("%Y-%m-%d %H:%M:%S%z")
        if include_date:
            out[f"{col_prefix}_DATE_UTC8"] = parsed.dt.strftime("%Y-%m-%d")
        if include_time:
            out[f"{col_prefix}_TIME_UTC8"] = parsed.dt.strftime("%H:%M:%S")
        if include_hour:
            out[f"{col_prefix}_HOUR_UTC8"] = parsed.dt.hour
        if include_30_min_block:
            out[f"{col_prefix}_TIME_BLOCK_30_MIN"] = parsed.dt.strftime("%H:") + parsed.dt.minute.apply(
                lambda m: "00" if pd.notna(m) and m < 30 else "30"
            )
        return out

    from pyspark.sql import functions as F

    localized = F.from_utc_timestamp(F.col(datetime_column), timezone)
    out = df
    if include_datetime:
        out = out.withColumn(f"{col_prefix}_DTM_UTC8", localized)
    if include_date:
        out = out.withColumn(f"{col_prefix}_DATE_UTC8", F.to_date(localized))
    if include_time:
        out = out.withColumn(f"{col_prefix}_TIME_UTC8", F.date_format(localized, "HH:mm:ss"))
    if include_hour:
        out = out.withColumn(f"{col_prefix}_HOUR_UTC8", F.hour(localized))
    if include_30_min_block:
        out = out.withColumn(
            f"{col_prefix}_TIME_BLOCK_30_MIN",
            F.when(F.minute(localized) < 30, F.concat(F.date_format(localized, "HH:"), F.lit("00"))).otherwise(
                F.concat(F.date_format(localized, "HH:"), F.lit("30"))
            ),
        )
    return out