Interpretability¶

Silverkite generates easily interpretable forecasting models when using its default ML algorithms (e.g. Ridge). This is because after transforming the raw features to basis functions (transformed features), the model uses an additive structure. Silverkite can break down each forecast into various summable components e.g. long-term growth, seasonality, holidays, events, short-term growth (auto-regression), regressors impact etc.

The approach to generate these breakdowns consists of two steps:

Group the transformed variables into various meaningful groups.
Calculate the sum of the features multiplied by their regression coefficients within each group.

These breakdowns then can be used to answer questions such as:

Question 1: How is the forecast value is generated?
Question 2: What is driving the change of the forecast as new data comes in?

Forecast components can also help us analyze model behavior and sensitivity. This is because while it is not feasible to compare a large set of features across two model settings, it can be quite practical and informative to compare a few well-defined components.

 # required imports
 import plotly
 import warnings
 import pandas as pd
 from greykite.framework.benchmark.data_loader_ts import DataLoaderTS
 from greykite.framework.templates.autogen.forecast_config import EvaluationPeriodParam
 from greykite.framework.templates.autogen.forecast_config import ForecastConfig
 from greykite.framework.templates.autogen.forecast_config import MetadataParam
 from greykite.framework.templates.autogen.forecast_config import ModelComponentsParam
 from greykite.framework.templates.forecaster import Forecaster
 from greykite.framework.templates.model_templates import ModelTemplateEnum
 from greykite.framework.utils.result_summary import summarize_grid_search_results
 from greykite.common.viz.timeseries_plotting import plot_multivariate
 warnings.filterwarnings("ignore")

Function to load and prepare data¶

This is the code to upload and prepare the daily bike-sharing data in Washington DC.

 def prepare_bikesharing_data():
     """Loads bike-sharing data and adds proper regressors."""
     dl = DataLoaderTS()
     agg_func = {"count": "sum", "tmin": "mean", "tmax": "mean", "pn": "mean"}
     df = dl.load_bikesharing(agg_freq="daily", agg_func=agg_func)

     # There are some zero values which cause issue for MAPE
     # This adds a small number to all data to avoid that issue
     value_col = "count"
     df[value_col] += 10
     # We drop last value as data might be incorrect as original data is hourly
     df.drop(df.tail(1).index, inplace=True)
     # We only use data from 2018 for demonstration purposes (run time is shorter)
     df = df.loc[df["ts"] > "2018-01-01"]
     df.reset_index(drop=True, inplace=True)

     print(f"\n df.tail(): \n {df.tail()}")

     # Creates useful regressors from existing raw regressors
     df["bin_pn"] = (df["pn"] > 5).map(float)
     df["bin_heavy_pn"] = (df["pn"] > 20).map(float)
     df.columns = [
         "ts",
         value_col,
         "regressor_tmin",
         "regressor_tmax",
         "regressor_pn",
         "regressor_bin_pn",
         "regressor_bin_heavy_pn"]

     forecast_horizon = 7
     train_df = df.copy()
     test_df = df.tail(forecast_horizon).reset_index(drop=True)
     # When using the pipeline (as done in the ``fit_forecast`` below),
     # fitting and prediction are done in one step
     # Therefore for demonstration purpose we remove the response values of last 7 days.
     # This is needed because we are using regressors,
     # and future regressor data must be augmented to ``df``.
     # We mimic that by removal of the values of the response.
     train_df.at[(len(train_df) - forecast_horizon):len(train_df), value_col] = None

     print(f"train_df shape: \n {train_df.shape}")
     print(f"test_df shape: \n {test_df.shape}")
     print(f"train_df.tail(14): \n {train_df.tail(14)}")
     print(f"test_df: \n {test_df}")

     return {
         "train_df": train_df,
         "test_df": test_df}

Function to fit silverkite¶

This is the code for fitting a silverkite model to the data.

 def fit_forecast(
         df,
         time_col,
         value_col):
     """Fits a daily model for this use case.
     The daily model is a generic silverkite model with regressors."""

     meta_data_params = MetadataParam(
         time_col=time_col,
         value_col=value_col,
         freq="D",
     )

     # Autoregression to be used in the function
     autoregression = {
         "autoreg_dict": {
             "lag_dict": {"orders": [1, 2, 3]},
             "agg_lag_dict": {
                 "orders_list": [[7, 7*2, 7*3]],
                 "interval_list": [(1, 7), (8, 7*2)]},
             "series_na_fill_func": lambda s: s.bfill().ffill()},
             "fast_simulation": True
     }

     # Changepoints configuration
     # The config includes changepoints both in trend and seasonality
     changepoints = {
         "changepoints_dict": {
             "method": "auto",
             "yearly_seasonality_order": 15,
             "resample_freq": "2D",
             "actual_changepoint_min_distance": "100D",
             "potential_changepoint_distance": "50D",
             "no_changepoint_distance_from_end": "50D"},
         "seasonality_changepoints_dict": {
             "method": "auto",
             "yearly_seasonality_order": 15,
             "resample_freq": "2D",
             "actual_changepoint_min_distance": "100D",
             "potential_changepoint_distance": "50D",
             "no_changepoint_distance_from_end": "50D"}
         }

     regressor_cols = [
         "regressor_tmin",
         "regressor_bin_pn",
         "regressor_bin_heavy_pn",
     ]

     # Model parameters
     model_components = ModelComponentsParam(
         growth=dict(growth_term="linear"),
         seasonality=dict(
             yearly_seasonality=[15],
             quarterly_seasonality=[False],
             monthly_seasonality=[False],
             weekly_seasonality=[7],
             daily_seasonality=[False]
         ),
         custom=dict(
             fit_algorithm_dict=dict(fit_algorithm="ridge"),
             extra_pred_cols=None,
             normalize_method="statistical"
         ),
         regressors=dict(regressor_cols=regressor_cols),
         autoregression=autoregression,
         uncertainty=dict(uncertainty_dict=None),
         events=dict(holiday_lookup_countries=["US"]),
         changepoints=changepoints
      )

     # Evaluation is done on same ``forecast_horizon`` as desired for output
     evaluation_period_param = EvaluationPeriodParam(
         test_horizon=None,
         cv_horizon=forecast_horizon,
         cv_min_train_periods=365*2,
         cv_expanding_window=True,
         cv_use_most_recent_splits=False,
         cv_periods_between_splits=None,
         cv_periods_between_train_test=0,
         cv_max_splits=5,
     )

     # Runs the forecast model using "SILVERKITE" template
     forecaster = Forecaster()
     result = forecaster.run_forecast_config(
         df=df,
         config=ForecastConfig(
             model_template=ModelTemplateEnum.SILVERKITE.name,
             coverage=0.95,
             forecast_horizon=forecast_horizon,
             metadata_param=meta_data_params,
             evaluation_period_param=evaluation_period_param,
             model_components_param=model_components
         )
     )

     # Gets cross-validation results
     grid_search = result.grid_search
     cv_results = summarize_grid_search_results(
         grid_search=grid_search,
         decimals=2,
         cv_report_metrics=None)
     cv_results = cv_results.transpose()
     cv_results = pd.DataFrame(cv_results)
     cv_results.columns = ["err_value"]
     cv_results["err_name"] = cv_results.index
     cv_results = cv_results.reset_index(drop=True)
     cv_results = cv_results[["err_name", "err_value"]]

     print(f"\n cv_results: \n {cv_results}")

     return result

Loads and prepares data¶

The data is loaded and some information about the input data is printed. We use the number of daily rented bikes in Washington DC over time. The data is augmented with weather data (precipitation, min/max daily temperature).

 data = prepare_bikesharing_data()

Out:

 df.tail():
             ts  count  tmin  tmax   pn
2019-08-27  12216  17.2  26.7  0.0
2019-08-28  11401  18.3  27.8  0.0
2019-08-29  12685  16.7  28.9  0.0
2019-08-30  12097  14.4  32.8  0.0
2019-08-31  11281  17.8  31.1  0.0
train_df shape:
 (607, 7)
test_df shape:
 (7, 7)
train_df.tail(14):
             ts    count  regressor_tmin  regressor_tmax  regressor_pn  regressor_bin_pn  regressor_bin_heavy_pn
2019-08-18   9655.0            22.2            35.6           0.3               0.0                     0.0
2019-08-19  10579.0            21.1            37.2           0.0               0.0                     0.0
2019-08-20   8898.0            22.2            36.1           0.0               0.0                     0.0
2019-08-21  11648.0            21.7            35.0           1.8               0.0                     0.0
2019-08-22  11724.0            21.7            35.0          30.7               1.0                     1.0
2019-08-23   8158.0            17.8            23.3           1.8               0.0                     0.0
2019-08-24  12475.0            16.7            26.1           0.0               0.0                     0.0
2019-08-25      NaN            15.6            26.7           0.0               0.0                     0.0
2019-08-26      NaN            17.2            25.0           0.0               0.0                     0.0
2019-08-27      NaN            17.2            26.7           0.0               0.0                     0.0
2019-08-28      NaN            18.3            27.8           0.0               0.0                     0.0
2019-08-29      NaN            16.7            28.9           0.0               0.0                     0.0
2019-08-30      NaN            14.4            32.8           0.0               0.0                     0.0
2019-08-31      NaN            17.8            31.1           0.0               0.0                     0.0
test_df:
           ts  count  regressor_tmin  regressor_tmax  regressor_pn  regressor_bin_pn  regressor_bin_heavy_pn
2019-08-25  11634            15.6            26.7           0.0               0.0                     0.0
2019-08-26  11747            17.2            25.0           0.0               0.0                     0.0
2019-08-27  12216            17.2            26.7           0.0               0.0                     0.0
2019-08-28  11401            18.3            27.8           0.0               0.0                     0.0
2019-08-29  12685            16.7            28.9           0.0               0.0                     0.0
2019-08-30  12097            14.4            32.8           0.0               0.0                     0.0
2019-08-31  11281            17.8            31.1           0.0               0.0                     0.0

Fits model to daily data¶

In this step we fit a silverkite model to the data which uses weather regressors, holidays, auto-regression etc.

 df = data["train_df"]
 time_col = "ts"
 value_col = "count"
 forecast_horizon = 7

 result = fit_forecast(
     df=df,
     time_col=time_col,
     value_col=value_col)
 trained_estimator = result.model[-1]
 # Checks model coefficients and p-values
 print("\n Model Summary:")
 print(trained_estimator.summary())

Out:

Fitting 1 folds for each of 1 candidates, totalling 1 fits

 cv_results:
                                              err_name                                          err_value
0                                      rank_test_MAPE                                                  1
1                                      mean_test_MAPE                                              10.28
2                                     split_test_MAPE                                           (10.28,)
3                                     mean_train_MAPE                                              21.66
4                                              params                                                 []
5                 param_estimator__yearly_seasonality                                                 15
6                 param_estimator__weekly_seasonality                                                  7
7                   param_estimator__uncertainty_dict                                               None
8                  param_estimator__training_fraction                                               None
9                  param_estimator__train_test_thresh                                               None
10                   param_estimator__time_properties  {'period': 86400, 'simple_freq': SimpleTimeFre...
11                    param_estimator__simulation_num                                                 10
12     param_estimator__seasonality_changepoints_dict  {'method': 'auto', 'yearly_seasonality_order':...
13                    param_estimator__regressor_cols  [regressor_tmin, regressor_bin_pn, regressor_b...
14             param_estimator__regression_weight_col                                               None
15             param_estimator__quarterly_seasonality                                              False
16              param_estimator__origin_for_time_vars                                               None
17                  param_estimator__normalize_method                                        statistical
18               param_estimator__monthly_seasonality                                              False
19              param_estimator__min_admissible_value                                               None
20  param_estimator__max_weekly_seas_interaction_o...                                                  2
21  param_estimator__max_daily_seas_interaction_order                                                  5
22              param_estimator__max_admissible_value                                               None
23             param_estimator__lagged_regressor_dict                                               None
24      param_estimator__holidays_to_model_separately                                               auto
25         param_estimator__holiday_pre_post_num_dict                                               None
26              param_estimator__holiday_pre_num_days                                                  2
27             param_estimator__holiday_post_num_days                                                  2
28          param_estimator__holiday_lookup_countries                                               [US]
29                       param_estimator__growth_term                                             linear
30                param_estimator__fit_algorithm_dict                         {'fit_algorithm': 'ridge'}
31              param_estimator__feature_sets_enabled                                               auto
32                   param_estimator__fast_simulation                                               True
33                   param_estimator__extra_pred_cols                                               None
34                param_estimator__explicit_pred_cols                                               None
35                    param_estimator__drop_pred_cols                                               None
36                 param_estimator__daily_seasonality                                              False
37               param_estimator__daily_event_df_dict                                               None
38                 param_estimator__changepoints_dict  {'method': 'auto', 'yearly_seasonality_order':...
39                      param_estimator__autoreg_dict  {'lag_dict': {'orders': [1, 2, 3]}, 'agg_lag_d...
40                  param_estimator__auto_seasonality                                              False
41                      param_estimator__auto_holiday                                              False
42                       param_estimator__auto_growth                                              False
43                                   split_train_MAPE                                           (21.66,)
44                                      mean_fit_time                                               4.79
45                                       std_fit_time                                                  0
46                                    mean_score_time                                               10.1
47                                     std_score_time                                                  0
48                                   split0_test_MAPE                                              10.28
49                                      std_test_MAPE                                                  0
50                                  split0_train_MAPE                                              21.66
51                                     std_train_MAPE                                                  0

 Model Summary:
================================ Model Summary =================================

Number of observations: 600,   Number of features: 134
Method: Ridge regression
Number of nonzero features: 133
Regularization parameter: 174.3

Residuals:
         Min           1Q       Median           3Q          Max
     -7534.0       -895.1        89.89        997.0       7621.0

             Pred_col Estimate Std. Err Pr(>)_boot sig. code                95%CI
            Intercept   9633.0    73.94     <2e-16       ***     (9504.0, 9787.0)
 events_Christmas Day   -145.0    77.24     <2e-16       ***  (-185.9, 7.917e-28)
  events_C...as Day-1   -135.8     71.9     <2e-16       ***  (-174.6, 8.152e-28)
  events_C...as Day-2   -51.78    29.98      0.004        **  (-81.94, 8.064e-28)
  events_C...as Day+1    -72.6     40.1     <2e-16       ***  (-104.4, 8.861e-28)
  events_C...as Day+2   -23.42    17.43      0.120             (-51.7, 8.766e-28)
  events_I...ence Day    45.72    22.53      0.018         *  (-9.712e-28, 79.14)
  events_I...ce Day-1   -27.68    20.22      0.142                (-64.06, 9.238)
  events_I...ce Day-2   -14.51    28.19      0.572                (-64.88, 38.62)
  events_I...ce Day+1   -15.82    15.06      0.236                (-45.41, 13.12)
  events_I...ce Day+2   -65.05    47.52      0.132                 (-139.9, 12.1)
     events_Labor Day   -61.41    32.19      0.006        **  (-88.66, 6.223e-28)
   events_Labor Day-1    92.37    47.65     <2e-16       ***  (-6.695e-28, 122.2)
   events_Labor Day-2    -59.2    32.58      0.010         *  (-92.38, 5.980e-28)
   events_Labor Day+1   -51.36    29.33      0.024         *  (-83.99, 5.986e-28)
   events_Labor Day+2   -3.448    11.31      0.486                (-30.33, 19.85)
  events_Memorial Day   -42.14    21.76      0.026         *  (-74.79, 1.215e-27)
  events_M...al Day-1    125.1    72.99      0.024         *  (-1.820e-27, 223.7)
  events_M...al Day-2   -29.22    21.61      0.144                 (-66.9, 13.46)
  events_M...al Day+1   -57.53    51.18      0.302                (-130.4, 33.14)
  events_M...al Day+2    -35.5    19.77      0.044         *  (-65.59, 1.754e-27)
 events_New Years Day   -46.89     26.3      0.012         *  (-78.04, 8.683e-28)
  events_N...rs Day-1    -42.7    25.41      0.026         *  (-73.79, 8.004e-28)
  events_N...rs Day-2     7.73    11.58      0.358                (-17.03, 30.31)
  events_N...rs Day+1   -23.69    32.25      0.476                (-82.48, 32.99)
  events_N...rs Day+2    33.59     33.2      0.324                (-29.95, 91.51)
         events_Other   -129.2    49.37      0.008        **     (-210.3, -16.32)
       events_Other-1    29.81    51.27      0.574                (-75.87, 118.6)
       events_Other-2   -107.1    44.54      0.022         *     (-180.5, -8.931)
       events_Other+1     32.0    46.15      0.482                (-51.08, 116.1)
       events_Other+2   -45.72    67.19      0.494                (-162.4, 88.56)
  events_Thanksgiving   -184.4    95.57     <2e-16       ***  (-225.9, 6.168e-28)
  events_T...giving-1   -46.46    26.53      0.016         *  (-73.42, 6.497e-28)
  events_T...giving-2    1.876    8.613      0.506                (-20.18, 17.97)
  events_T...giving+1   -128.7    65.43     <2e-16       ***  (-172.3, 6.463e-28)
  events_T...giving+2   -53.12    31.73      0.036         *  (-90.99, 1.163e-27)
  events_Veterans Day   -28.32    19.92      0.102                (-61.6, 0.9676)
  events_V...ns Day-1   -36.49     24.0      0.072         .  (-74.64, 6.280e-28)
  events_V...ns Day-2   -77.61     41.2     <2e-16       ***  (-104.4, 6.548e-28)
  events_V...ns Day+1    32.43    19.94      0.080         .      (-4.655, 58.78)
  events_V...ns Day+2    4.144    15.34      0.542                (-32.25, 34.99)
        str_dow_2-Tue    20.87    29.08      0.470                (-36.43, 79.57)
        str_dow_3-Wed    19.99    23.17      0.406                (-25.45, 61.91)
        str_dow_4-Thu    28.17    26.45      0.300                (-26.37, 79.17)
        str_dow_5-Fri    40.95    32.01      0.210                (-20.11, 102.0)
        str_dow_6-Sat   -8.635    38.43      0.840                (-77.99, 67.03)
        str_dow_7-Sun   -105.5    28.85      0.002        **     (-163.1, -54.52)
       regressor_tmin    598.5    62.21     <2e-16       ***       (456.7, 701.9)
     regressor_bin_pn   -835.9     61.1     <2e-16       ***     (-941.6, -699.5)
  regresso...heavy_pn   -364.5    80.62     <2e-16       ***     (-530.8, -217.2)
                  ct1   -8.196     30.9      0.810                (-67.44, 49.18)
       is_weekend:ct1   -13.61    24.44      0.586                (-60.22, 32.66)
    str_dow_2-Tue:ct1     21.7    23.97      0.368                (-27.01, 67.37)
    str_dow_3-Wed:ct1    14.95     20.4      0.474                (-26.38, 51.67)
    str_dow_4-Thu:ct1    4.774     22.7      0.830                (-36.84, 53.22)
    str_dow_5-Fri:ct1    8.282     26.9      0.738                (-47.55, 57.74)
    str_dow_6-Sat:ct1    17.64    32.06      0.614                (-42.43, 78.95)
    str_dow_7-Sun:ct1   -36.09    28.37      0.200                 (-89.29, 22.5)
    cp0_2018_07_21_00   -155.5    25.68     <2e-16       ***     (-194.9, -97.24)
  is_weeke...07_21_00   -26.61    28.84      0.338                (-76.35, 32.09)
  str_dow_...07_21_00   -38.97    35.66      0.270                (-110.4, 31.26)
  str_dow_...07_21_00   -33.33    22.34      0.148                 (-72.5, 10.25)
  str_dow_...07_21_00   -14.47    30.92      0.658                (-77.68, 45.39)
  str_dow_...07_21_00   -88.07    42.61      0.042         *     (-170.8, -7.553)
  str_dow_...07_21_00    15.77    44.21      0.724                (-73.23, 102.0)
  str_dow_...07_21_00   -52.68    43.61      0.238                (-129.4, 41.76)
  ct1:sin1_tow_weekly    20.52    22.18      0.364                (-21.43, 62.33)
  ct1:cos1_tow_weekly   -35.92    23.38      0.134                (-82.59, 6.589)
  ct1:sin2_tow_weekly    30.35    21.04      0.128                (-11.45, 70.39)
  ct1:cos2_tow_weekly   -30.92    24.69      0.222                 (-77.93, 18.5)
  cp0_2018...w_weekly   -2.809    26.49      0.878                (-55.41, 45.86)
  cp0_2018...w_weekly   -25.97    32.89      0.432                (-92.45, 40.25)
  cp0_2018...w_weekly   -12.22    33.21      0.690                (-82.95, 46.34)
  cp0_2018...w_weekly   -61.22    31.85      0.042         *     (-121.2, -4.486)
      sin1_tow_weekly    59.81    27.95      0.038         *       (1.393, 115.4)
      cos1_tow_weekly   -56.32     30.1      0.050         .     (-113.0, -4.038)
      sin2_tow_weekly    59.61    30.24      0.046         *       (3.018, 120.4)
      cos2_tow_weekly    27.39    32.29      0.378                (-31.46, 90.84)
      sin3_tow_weekly    9.787    30.41      0.746                (-46.12, 71.02)
      cos3_tow_weekly    35.32    28.01      0.226                (-14.59, 93.59)
      sin4_tow_weekly   -9.787    30.41      0.746                (-71.02, 46.12)
      cos4_tow_weekly    35.32    28.01      0.226                (-14.59, 93.59)
      sin5_tow_weekly   -59.61    30.24      0.046         *     (-120.4, -3.018)
      cos5_tow_weekly    27.39    32.29      0.378                (-31.46, 90.84)
      sin6_tow_weekly   -59.81    27.95      0.038         *     (-115.4, -1.393)
      cos6_tow_weekly   -56.32     30.1      0.050         .     (-113.0, -4.038)
      sin7_tow_weekly    63.92    26.24      0.012         *       (10.52, 112.1)
      cos7_tow_weekly       0.       0.      1.000                       (0., 0.)
      sin1_ct1_yearly    14.61    46.95      0.748                (-84.11, 94.62)
      cos1_ct1_yearly   -523.4    38.97     <2e-16       ***     (-586.6, -439.0)
      sin2_ct1_yearly   -203.5    45.51     <2e-16       ***     (-294.1, -108.1)
      cos2_ct1_yearly   -83.95    56.28      0.162                (-180.6, 34.07)
      sin3_ct1_yearly   -69.93    54.06      0.182                (-170.1, 41.85)
      cos3_ct1_yearly   -40.88    49.84      0.418                (-133.4, 63.62)
      sin4_ct1_yearly    34.08    50.49      0.492                (-72.56, 129.7)
      cos4_ct1_yearly    36.63    55.76      0.550                (-52.64, 152.3)
      sin5_ct1_yearly   -57.09    53.62      0.292                (-155.4, 50.06)
      cos5_ct1_yearly   -59.11    57.11      0.316                (-171.0, 54.01)
      sin6_ct1_yearly   -17.35    52.76      0.744                (-118.0, 77.28)
      cos6_ct1_yearly   -198.7    58.58     <2e-16       ***     (-295.9, -66.54)
      sin7_ct1_yearly   -23.14    54.46      0.672                (-132.0, 80.12)
      cos7_ct1_yearly    60.45    61.08      0.326                 (-51.5, 183.9)
      sin8_ct1_yearly    16.92    63.72      0.806                (-101.6, 130.8)
      cos8_ct1_yearly    29.37    57.44      0.620                (-77.54, 143.5)
      sin9_ct1_yearly   -21.39    60.04      0.718                (-141.6, 88.29)
      cos9_ct1_yearly   -32.55    54.76      0.544                (-129.3, 83.25)
     sin10_ct1_yearly    95.53    59.54      0.098         .      (-24.03, 209.3)
     cos10_ct1_yearly   -24.85    55.15      0.662                (-126.2, 79.35)
     sin11_ct1_yearly   -14.08    56.79      0.844                (-129.9, 92.91)
     cos11_ct1_yearly   -9.692    60.93      0.904                (-129.0, 101.1)
     sin12_ct1_yearly   -42.91    54.95      0.420                (-158.5, 56.08)
     cos12_ct1_yearly    119.3    59.78      0.038         *       (7.019, 231.5)
     sin13_ct1_yearly   -78.89    56.16      0.150                 (-188.1, 32.5)
     cos13_ct1_yearly   -50.95    59.49      0.386                (-167.6, 62.37)
     sin14_ct1_yearly   -54.58    54.88      0.302                (-171.6, 50.61)
     cos14_ct1_yearly   -22.74    59.79      0.700                (-143.8, 90.64)
     sin15_ct1_yearly   -165.8    57.89     <2e-16       ***     (-276.7, -48.78)
     cos15_ct1_yearly   -36.63    60.13      0.568                (-150.3, 82.42)
  sin1_con...07_21_00    38.06    46.58      0.378                (-64.74, 122.8)
  cos1_con...07_21_00   -136.9    53.88      0.010         *     (-252.7, -35.97)
  sin2_con...07_21_00    32.56    45.06      0.472                (-59.63, 113.8)
  cos2_con...07_21_00   -172.8    61.73      0.004        **     (-288.6, -43.99)
  sin3_con...07_21_00   -27.26    56.41      0.628                (-143.6, 76.76)
  cos3_con...07_21_00   -3.896    48.86      0.936                 (-91.08, 97.2)
  sin4_con...07_21_00   -18.09    43.46      0.702                (-108.8, 62.85)
  cos4_con...07_21_00    43.25    53.98      0.422                 (-67.3, 151.9)
  sin5_con...07_21_00    45.09    59.12      0.434                (-66.86, 166.5)
  cos5_con...07_21_00    67.98    48.38      0.156                (-24.62, 156.7)
               y_lag1    606.3    81.57     <2e-16       ***       (419.2, 733.3)
               y_lag2    89.37    66.61      0.146                (-32.68, 229.8)
               y_lag3    158.1    70.68      0.030         *       (26.69, 305.2)
     y_avglag_7_14_21    331.3    57.69     <2e-16       ***       (209.5, 424.2)
      y_avglag_1_to_7    235.6    43.37     <2e-16       ***       (173.6, 331.3)
     y_avglag_8_to_14    334.5    57.35     <2e-16       ***       (218.0, 445.0)
Signif. Code: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Multiple R-squared: 0.7796,   Adjusted R-squared: 0.7502
F-statistic: 22.675 on 70 and 528 DF,   p-value: 1.110e-16
Model AIC: 12944.0,   model BIC: 13259.0

WARNING: the F-ratio and its p-value on regularized methods might be misleading, they are provided only for reference purposes.
WARNING: the following columns have estimated coefficients equal to zero, while ridge is not supposed to have zero estimates. This is probably because these columns are degenerate in the design matrix. Make sure these columns do not have constant values.
['cos7_tow_weekly']
WARNING: the following columns are degenerate, do you really want to include them in your model? This may cause some of them to show unrealistic significance. Consider using the `drop_degenerate` transformer.
['Intercept', 'cos7_tow_weekly']

Grouping of variables¶

Regex expressions are used to group variables in the breakdown plot. Each group is given in one key of this dictionary. The grouping is done using variable names and for each group multiple regex are given. For each group, variables that satisfy EITHER regex are chosen. Note that this grouping assumes that regressor variables start with “regressor_”. Also note that the order of this grouping matters (Python treats the dictionary as ordered in 3.6+). That means the variables chosen using regex in top groups will not be picked up again. If some variables do not satisfy any of the groupings, they will be grouped into “OTHER”. The following breakdown dictionary should work for many use cases. However, the users can customize it as needed.

 grouping_regex_patterns_dict = {
     "regressors": "regressor_.*",  # regressor effects
     "AR": ".*lag",  # autoregression component
     "events": ".*events_.*",  # events and holidays
     "seasonality": ".*quarter.*|.*month.*|.*C\(dow.*|.*C\(dow_hr.*|sin.*|cos.*|.*doq.*|.*dom.*|.*str_dow.*|.*is_weekend.*|.*tow_weekly.*",  # seasonality
     "trend": "ct1|ct2|ct_sqrt|ct3|ct_root3|.*changepoint.*",  # long term trend (includes changepoints)
 }

Creates forecast breakdown¶

This is generated for observed data plus the prediction data (available in df). Each component is centered around zero and the sum of all components is equal to forecast.

 breakdown_result = trained_estimator.forecast_breakdown(
     grouping_regex_patterns_dict=grouping_regex_patterns_dict,
     center_components=True,
     plt_title="forecast breakdowns")
 forecast_breakdown_df = breakdown_result["breakdown_df_with_index_col"]
 forecast_components_fig = breakdown_result["breakdown_fig"]
 plotly.io.show(forecast_components_fig)

Standardization of the components¶

Next we provide a more “standardized” view of the breakdown. This is achieved by dividing all components by observed absolute value of the metric. By doing so, intercept should be mapped to 1 and the y-axis changes can be viewed relative to the average magnitude of the series. The sum of all components at each time point will be equal to “forecast / obs_abs_mean”.

 column_grouping_result = breakdown_result["column_grouping_result"]
 component_cols = list(grouping_regex_patterns_dict.keys())
 forecast_breakdown_stdzd_df = forecast_breakdown_df.copy()
 obs_abs_mean = abs(df[value_col]).mean()
 for col in component_cols + ["Intercept", "OTHER"]:
     if col in forecast_breakdown_stdzd_df.columns:
         forecast_breakdown_stdzd_df[col] /= obs_abs_mean
 forecast_breakdown_stdzd_fig = plot_multivariate(
     df=forecast_breakdown_stdzd_df,
     x_col=time_col,
     title="forecast breakdowns divided by mean of abs value of response",
     ylabel="component")
 forecast_breakdown_stdzd_fig.update_layout(yaxis_range=[-1.1, 1.1])
 plotly.io.show(forecast_breakdown_stdzd_fig)

Breaking down the predictions¶

Next we perform a prediction and generate a breakdown plot for that prediction.

 test_df = data["test_df"].reset_index()
 test_df[value_col] = None
 print(f"\n test_df: \n {test_df}")
 pred_df = trained_estimator.predict(test_df)
 forecast_x_mat = trained_estimator.forecast_x_mat
 # Generate the breakdown plot
 breakdown_result = trained_estimator.forecast_breakdown(
     grouping_regex_patterns_dict=grouping_regex_patterns_dict,
     forecast_x_mat=forecast_x_mat,
     time_values=pred_df[time_col])

 breakdown_fig = breakdown_result["breakdown_fig"]
 plotly.io.show(breakdown_fig)

Out:

 test_df:
    index         ts count  regressor_tmin  regressor_tmax  regressor_pn  regressor_bin_pn  regressor_bin_heavy_pn
    0 2019-08-25  None            15.6            26.7           0.0               0.0                     0.0
    1 2019-08-26  None            17.2            25.0           0.0               0.0                     0.0
    2 2019-08-27  None            17.2            26.7           0.0               0.0                     0.0
    3 2019-08-28  None            18.3            27.8           0.0               0.0                     0.0
    4 2019-08-29  None            16.7            28.9           0.0               0.0                     0.0
    5 2019-08-30  None            14.4            32.8           0.0               0.0                     0.0
    6 2019-08-31  None            17.8            31.1           0.0               0.0                     0.0

Demonstrating a scenario-based breakdown¶

We artificially inject a “bad weather” day into test data on the second day of prediction. This is done to observe if the breakdown plot captures a decrease in the collective regressors’ effect. The impact of the change in the regressor values can be clearly seen in the updated breakdown.

 # Altering the test data.
 # We alter the normal weather conditions on the second day to heavy precipitation and low temperature.
 test_df["regressor_bin_pn"] = [0, 1, 0, 0, 0, 0, 0]
 test_df["regressor_bin_heavy_pn"] = [0, 1, 0, 0, 0, 0, 0]
 test_df["regressor_tmin"] = [15, 0, 15, 15,  15, 15, 15]
 print(f"altered test_df: \n {test_df}")

 # Gets predictions and the design matrix used during predictions.
 pred_df = trained_estimator.predict(test_df.reset_index())
 forecast_x_mat = trained_estimator.forecast_x_mat

 # Generates the breakdown plot.
 breakdown_result = trained_estimator.forecast_breakdown(
     grouping_regex_patterns_dict=grouping_regex_patterns_dict,
     forecast_x_mat=forecast_x_mat,
     time_values=pred_df[time_col])
 breakdown_fig = breakdown_result["breakdown_fig"]
 plotly.io.show(breakdown_fig)

Out:

altered test_df:
    index         ts count  regressor_tmin  regressor_tmax  regressor_pn  regressor_bin_pn  regressor_bin_heavy_pn
    0 2019-08-25  None              15            26.7           0.0                 0                       0
    1 2019-08-26  None               0            25.0           0.0                 1                       1
    2 2019-08-27  None              15            26.7           0.0                 0                       0
    3 2019-08-28  None              15            27.8           0.0                 0                       0
    4 2019-08-29  None              15            28.9           0.0                 0                       0
    5 2019-08-30  None              15            32.8           0.0                 0                       0
    6 2019-08-31  None              15            31.1           0.0                 0                       0

Total running time of the script: ( 0 minutes 33.933 seconds)

Gallery generated by Sphinx-Gallery