""" >>> df = pd.DataFrame({'GDP': [100, 110, 120, 130, 140, 150], ... 'INV': [0, 10, 20, 30, 40, 50]}) >>> lagged_dataframe(df, lags=3) GDP INV GDP_Int INV_Int GDP_Lag01 INV_Lag01 GDP_Lag02 INV_Lag02 \ 0 100 0 1 1 NaN NaN NaN NaN 1 110 10 1 1 100.0 0.0 NaN NaN 2 120 20 1 1 110.0 10.0 100.0 0.0 3 130 30 1 1 120.0 20.0 110.0 10.0 4 140 40 1 1 130.0 30.0 120.0 20.0 5 150 50 1 1 140.0 40.0 130.0 30.0 <BLANKLINE> GDP_Lag03 INV_Lag03 0 NaN NaN 1 NaN NaN 2 NaN NaN 3 100.0 0.0 4 110.0 10.0 5 120.0 20.0 """ import numpy as np import pandas as pd def constants(df): new_columns = ["{}_Int".format(variable) for variable in df.columns] return pd.DataFrame(np.ones_like(df.loc[:,:]), index=df.index, columns=new_columns) def lag(df, n): new_columns = ["{}_Lag{:02d}".format(variable, n) for variable in df.columns] new_df = df.shift(n) new_df.columns = new_columns return new_df def lagged_dataframe(df, lags=1): data_frames = [df] data_frames.append(constants(df)) data_frames.extend([lag(df, i) for i in range(1, lags + 1)]) return pd.concat(data_frames, axis=1) if __name__ == "__main__": import doctest doctest.testmod(verbose=True, optionflags=doctest.NORMALIZE_WHITESPACE)
I’m creating a pandas.DataFrame
out of lags from an existing pandas.DataFrame
, using DataFrame.shift
and pandas.concat
. There are a two aspects I don’t like about this code:
- Using
np.ones_like
to create constants; - Building up a list of lag tables and using
pandas.concat
to combine them.