from typing import Optionaldef generate_lag_variable_with_group_key( df: pd.DataFrame | pl.DataFrame, target_column: str, sort_key: list[str], group_key: list[str], lag_size: int, ascending: list[bool] |bool=True, lagged_col_name: Optional[str] =None,) -> pd.DataFrame | pl.DataFrame:""" Generate a lagged variable in a DataFrame with a specified group key. This function creates a new column in the DataFrame that contains the lagged values of an existing column, grouped by specified keys. It supports both pandas and polars DataFrames. Parameters: df (pd.DataFrame | pl.DataFrame): The input DataFrame, either pandas or polars. col (str): The name of the column to generate the lagged variable from. sort_key (list[str]): The list of columns to sort the DataFrame by before generating the lagged variable. group_key (list[str]): The list of columns to group the DataFrame by before generating the lagged variable. lag_size (int): The number of periods to lag the variable. ascending (list[bool] | bool, optional): The sort order for each column in sort_key. Defaults to True. lagged_col_name (Optional[str], optional): The name of the new lagged column. If None, defaults to "{col}_{lag_size}lag". Returns: pd.DataFrame | pl.DataFrame: The DataFrame with the new lagged variable column. Raises: TypeError: If the input DataFrame is neither pandas.DataFrame nor polars.DataFrame. """# set up common variablesif lagged_col_name isNone: lagged_col_name =f"{target_column}_{lag_size}lag" result_sort_key = group_key + sort_keyifisinstance(df, pd.DataFrame):# Sort the dataframe by the specified sort key df_sorted = df.sort_values(by=sort_key, ascending=ascending)# Generate the lagged variable df_sorted[lagged_col_name] = df_sorted.groupby(group_key)[target_column].shift( lag_size )# Sort the dataframe by the group key and sort key result_sort_key = group_key + sort_key df_result = df_sorted.sort_values(by=result_sort_key).reset_index(drop=True)elifisinstance(df, pl.DataFrame):ifisinstance(ascending, list): order_reverse = [not x for x in ascending]else: order_reverse =not ascending df_sorted = df.sort(sort_key, descending=order_reverse) df_sorted = df_sorted.with_columns( pl.col(target_column) .shift(lag_size) .over(group_key) .alias(lagged_col_name) ) df_result = df_sorted.sort(result_sort_key)else:raiseTypeError(f"type(df) is {type(df)}: df should be pandas.DataFrame or polars.DataFrame" )return df_result