Source code for greykite.algo.forecast.silverkite.forecast_simple_silverkite

# BSD 2-CLAUSE LICENSE

# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:

# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# #ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# original author: Albert Chen


from datetime import datetime
from typing import Dict
from typing import List
from typing import Optional
from typing import Type
from typing import Union

import pandas as pd

from greykite.algo.changepoint.adalasso.changepoint_detector import get_changepoints_dict
from greykite.algo.forecast.silverkite.constants.silverkite_column import SilverkiteColumn
from greykite.algo.forecast.silverkite.constants.silverkite_constant import SilverkiteConstant
from greykite.algo.forecast.silverkite.constants.silverkite_constant import default_silverkite_constant
from greykite.algo.forecast.silverkite.constants.silverkite_holiday import SilverkiteHoliday
from greykite.algo.forecast.silverkite.constants.silverkite_time_frequency import SilverkiteTimeFrequencyEnum
from greykite.algo.forecast.silverkite.forecast_silverkite import SilverkiteForecast
from greykite.algo.forecast.silverkite.forecast_simple_silverkite_helper import cols_interact
from greykite.algo.forecast.silverkite.forecast_simple_silverkite_helper import generate_holiday_events
from greykite.algo.forecast.silverkite.forecast_simple_silverkite_helper import get_event_pred_cols
from greykite.algo.forecast.silverkite.forecast_simple_silverkite_helper import patsy_categorical_term
from greykite.common import constants as cst
from greykite.common.constants import GROWTH_COL_ALIAS
from greykite.common.enums import SimpleTimeFrequencyEnum
from greykite.common.enums import TimeEnum
from greykite.common.features.timeseries_features import get_available_holidays_across_countries
from greykite.common.features.timeseries_features import get_changepoint_features_and_values_from_config
from greykite.common.logging import LoggingLevelEnum
from greykite.common.logging import log_message
from greykite.common.python_utils import unique_elements_in_list
from greykite.common.python_utils import update_dictionary
from greykite.common.time_properties_forecast import get_forecast_time_properties


[docs]class SimpleSilverkiteForecast(SilverkiteForecast):
    """A derived class of `~greykite.algo.forecast.silverkite.SilverkiteForecast`.
    Provides an alternative interface with simplified configuration parameters.
    Produces the same trained model output and uses the same predict functions.
    """
    def __init__(
            self,
            constants: SilverkiteConstant = default_silverkite_constant):
        super().__init__(constants=constants)
        self._silverkite_time_frequency_enum: Type[SilverkiteTimeFrequencyEnum] = constants.get_silverkite_time_frequency_enum()
        self._silverkite_holiday: Type[SilverkiteHoliday] = constants.get_silverkite_holiday()
        self._silverkite_column: Type[SilverkiteColumn] = constants.get_silverkite_column()

[docs]    def convert_params(
            self,
            df: pd.DataFrame,
            time_col: str,
            value_col: str,
            time_properties: Optional[Dict] = None,
            freq: Optional[str] = None,
            forecast_horizon: Optional[int] = None,
            origin_for_time_vars: Optional[float] = None,
            train_test_thresh: Optional[datetime] = None,
            training_fraction: Optional[float] = 0.9,
            fit_algorithm: str = "ridge",
            fit_algorithm_params: Optional[Dict] = None,
            holidays_to_model_separately: Optional[Union[str, List[str]]] = "auto",
            holiday_lookup_countries: Optional[Union[str, List[str]]] = "auto",
            holiday_pre_num_days: int = 2,
            holiday_post_num_days: int = 2,
            holiday_pre_post_num_dict: Optional[Dict] = None,
            daily_event_df_dict: Optional[Dict] = None,
            changepoints_dict: Optional[Dict] = None,
            yearly_seasonality: Union[bool, str, int] = "auto",
            quarterly_seasonality: Union[bool, str, int] = "auto",
            monthly_seasonality: Union[bool, str, int] = "auto",
            weekly_seasonality: Union[bool, str, int] = "auto",
            daily_seasonality: Union[bool, str, int] = "auto",
            max_daily_seas_interaction_order: Optional[int] = None,
            max_weekly_seas_interaction_order: Optional[int] = None,
            autoreg_dict: Optional[Dict] = None,
            past_df: Optional[pd.DataFrame] = None,
            lagged_regressor_dict: Optional[Dict] = None,
            seasonality_changepoints_dict: Optional[Dict] = None,
            min_admissible_value: Optional[float] = None,
            max_admissible_value: Optional[float] = None,
            uncertainty_dict: Optional[Dict] = None,
            normalize_method: Optional[str] = None,
            growth_term: Optional[str] = "linear",
            regressor_cols: Optional[List[str]] = None,
            feature_sets_enabled: Optional[Union[bool, str, Dict[str, Optional[Union[bool, str]]]]] = "auto",
            extra_pred_cols: Optional[List[str]] = None,
            drop_pred_cols: Optional[List[str]] = None,
            explicit_pred_cols: Optional[List[str]] = None,
            regression_weight_col: Optional[str] = None,
            simulation_based: Optional[bool] = False,
            simulation_num: int = 10):
        """Converts parameters of
        :func:`~greykite.algo.forecast.silverkite.forecast_simple_silverkite` into those
        of :func:`~greykite.algo.forecast.forecast_silverkite.SilverkiteForecast::forecast`.

        Makes it easier to set parameters to ``SilverkiteForecast::forecast`` suitable for most forecasting problems.
        Provides data-aware defaults for seasonality and interaction terms. Provides a simple
        configuration of holidays from an internal holiday database, and user-friendly configuration
        for growth and regressors.

        These parameters can be set from a plain-text config (e.g. no pandas dataframes).
        The parameter list is intentionally flat to facilitate hyperparameter grid search. Every
        parameter is either a parameter of ``SilverkiteForecast::forecast`` or a tuning parameter.

        Notes
        -----
        The basic parameters are identical to ``SilverkiteForecast::forecast``.
        The more complex parameters are specified via config parameters:

        * ``daily_event_df_dict`` (via ``holiday*``)
        * ``fs_components_df`` (via `*_seasonality``)
        * ``extra_pred_cols`` (via ``holiday*``, ``*seas*``, ``growth_term``,
          ``regressor_cols``, ``feature_sets_enabled``, ``extra_pred_cols``)

        Parameters
        ----------
        df : `pandas.DataFrame`
            A data frame which includes the timestamp column
            as well as the value column. This is the ``df`` for
            training the model, not for future prediction.
        time_col : `str`
            The column name in `df` representing time for the time series data
            The time column can be anything that can be parsed by pandas DatetimeIndex
        value_col: `str`
            The column name which has the value of interest to be forecasted
        time_properties : `dict` [`str`, `any`] or None, optional
            Time properties dictionary (likely produced by
            `~greykite.common.time_properties_forecast.get_forecast_time_properties`)
            with keys:

                ``"ts"`` : `UnivariateTimeSeries` or None
                    ``df`` converted to a ``UnivariateTimeSeries``.
                ``"period"`` : `int`
                    Period of each observation (i.e. minimum time between observations, in seconds).
                ``"simple_freq"`` : `SimpleTimeFrequencyEnum`
                    ``SimpleTimeFrequencyEnum`` member corresponding to data frequency.
                ``"num_training_points"`` : `int`
                    Number of observations for training.
                ``"num_training_days"`` : `int`
                    Number of days for training.
                ``"start_year"`` : `int`
                    Start year of the training period.
                ``"end_year"`` : `int`
                    End year of the forecast period.
                ``"origin_for_time_vars"`` : `float`
                    Continuous time representation of the first date in ``df``.

            In this function,

                - ``start_year`` and ``end_year`` are used to define ``daily_event_df_dict``.
                - ``simple_freq`` and ``num_training_days`` are used to define ``fs_components_df``.
                - ``simple_freq`` and ``num_training_days`` are used to set default ``feature_sets_enabled``.
                - ``origin_for_time_vars`` is used to set default ``origin_for_time_vars``.
                - the other parameters are ignored

            It is okay if ``num_training_points``, ``num_training_days``, ``start_year``, ``end_year``
            are computed for a superset of ``df``. This allows CV splits and backtest, which train on
            partial data, to use the same data-aware model parameters as the forecast on all training data.

            If None, the values are computed for ``df``. This corresponds to using the same
            modeling *approach* on the CV splits and backtest from `forecast_pipeline`, without
            requiring the same parameters. In this case, make sure ``forecast_horizon`` is at
            least as large as the test period for the split, to ensure all holidays are captured.
        freq : `str` or None, optional, default `None`
            Frequency of input data.
            Used to compute ``time_properties`` only if ``time_properties is None``.
            Frequency strings can have multiples, e.g. '5H'.
            See https://pandas.pydata.org/pandas-docs/stable/user_guide/timeseries.html#offset-aliases
            for a list of frequency aliases.
            If None, inferred by `pandas.infer_freq`.
            Provide this parameter if ``df`` has missing timepoints.
        forecast_horizon : `int` or None, optional, default `None`
            Number of periods to forecast into the future. Must be > 0.
            Used to compute ``time_properties`` only if ``time_properties is None``.
            If None, default is determined by input data frequency.
            Used to determine forecast end date, to pull the appropriate holiday data.
            Should be at least as large as the prediction period (if this function
            is called from ``forecast_pipeline``, the prediction period for different
            splits is set via ``cv_horizon``, ``test_horizon``, ``forecast_horizon``).
        origin_for_time_vars : `float` or None, optional, default `None`
            The time origin used to create continuous variables for time.
            If None, uses the value from ``time_properties``.
        train_test_thresh : `datetime.datetime` or None, optional, default `None`
            e.g. datetime.datetime(2019, 6, 30)
            The threshold for training and testing split.
            Note that the final returned model is trained using all data.
            If None, training split is based on ``training_fraction``.
        training_fraction : `float` or None, optional, default 0.9
            The fraction of data used for training (0.0 to 1.0)
            Used only if ``train_test_thresh is None``.
            If this is also None or 1.0, then we skip testing
            and train on the entire dataset.
        fit_algorithm : `str`, optional, default "linear"
            The type of predictive model used in fitting.

            See `~greykite.algo.common.ml_models.fit_model_via_design_matrix`
            for available options and their parameters.
        fit_algorithm_params : `dict` or None, optional, default None
            Parameters passed to the requested fit_algorithm.
            If None, uses the defaults in `~greykite.algo.common.ml_models.fit_model_via_design_matrix`.
        holiday_lookup_countries : `list` [`str`] or "auto" or None, optional, default "auto"
            The countries that contain the holidays you intend to model
            (``holidays_to_model_separately``).

                * If "auto", uses a default list of countries
                  that contain the default ``holidays_to_model_separately``.
                  See `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.HOLIDAY_LOOKUP_COUNTRIES_AUTO`.
                * If a list, must be a list of country names.
                * If None or an empty list, no holidays are modeled.

        holidays_to_model_separately : `list` [`str`] or "auto" or `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES` or None, optional, default "auto"  # noqa: E501
            Which holidays to include in the model.
            The model creates a separate key, value for each item in ``holidays_to_model_separately``.
            The other holidays in the countries are grouped together as a single effect.

                * If "auto", uses a default list of important holidays.
                  See `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.HOLIDAYS_TO_MODEL_SEPARATELY_AUTO`.
                * If `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES`,
                  uses all available holidays in ``holiday_lookup_countries``. This can often
                  create a model that has too many parameters, and should typically be avoided.
                * If a list, must be a list of holiday names.
                * If None or an empty list, all holidays in ``holiday_lookup_countries`` are grouped together
                  as a single effect.

            Use ``holiday_lookup_countries`` to provide a list of countries where these holiday occur.
        holiday_pre_num_days : `int`, default 2
            Model holiday effects for ``holiday_pre_num_days`` days before the holiday.
        holiday_post_num_days : `int`, default 2
            Model holiday effects for ``holiday_post_num_days`` days after the holiday.
        holiday_pre_post_num_dict : `dict` [`str`, (`int`, `int`)] or None, default None
            Overrides ``pre_num`` and ``post_num`` for each holiday in
            ``holidays_to_model_separately``.
            For example, if ``holidays_to_model_separately`` contains "Thanksgiving" and "Labor Day",
            this parameter can be set to ``{"Thanksgiving": [1, 3], "Labor Day": [1, 2]}``,
            denoting that the "Thanksgiving" ``pre_num`` is 1 and ``post_num`` is 3, and "Labor Day"
            ``pre_num`` is 1 and ``post_num`` is 2.
            Holidays not specified use the default given by ``pre_num`` and ``post_num``.
        daily_event_df_dict : `dict` [`str`, `pandas.DataFrame`] or None, default None
            A dictionary of data frames, each representing events data for the corresponding key.
            Specifies additional events to include besides the holidays specified above. The format
            is the same as in `~greykite.algo.forecast.silverkite.SilverkiteForecast.forecast`.
            The DataFrame has two columns:

                - The first column contains event dates. Must be in a format
                  recognized by `pandas.to_datetime`. Must be at daily
                  frequency for proper join. It is joined against the time
                  in ``df``, converted to a day:
                  ``pd.to_datetime(pd.DatetimeIndex(df[time_col]).date)``.
                - the second column contains the event label for each date

            The column order is important; column names are ignored.
            The event dates must span their occurrences in both the training
            and future prediction period.

            During modeling, each key in the dictionary is mapped to a categorical variable
            named ``f"{EVENT_PREFIX}_{key}"``, whose value at each timestamp is specified
            by the corresponding DataFrame.

            For example, to manually specify a yearly event on September 1
            during a training/forecast period that spans 2020-2022::

                daily_event_df_dict = {
                    "custom_event": pd.DataFrame({
                        "date": ["2020-09-01", "2021-09-01", "2022-09-01"],
                        "label": ["is_event", "is_event", "is_event"]
                    })
                }

            It's possible to specify multiple events in the same df. Two events,
            ``"sep"`` and ``"oct"`` are specified below for 2020-2021::

                daily_event_df_dict = {
                    "custom_event": pd.DataFrame({
                        "date": ["2020-09-01", "2020-10-01", "2021-09-01", "2021-10-01"],
                        "event_name": ["sep", "oct", "sep", "oct"]
                    })
                }

            Use multiple keys if two events may fall on the same date. These events
            must be in separate DataFrames::

                daily_event_df_dict = {
                    "fixed_event": pd.DataFrame({
                        "date": ["2020-09-01", "2021-09-01", "2022-09-01"],
                        "event_name": "fixed_event"
                    }),
                    "moving_event": pd.DataFrame({
                        "date": ["2020-09-01", "2021-08-28", "2022-09-03"],
                        "event_name": "moving_event"
                    }),
                }

            The multiple event specification can be used even if events never overlap. An
            equivalent specification to the second example::

                daily_event_df_dict = {
                    "sep": pd.DataFrame({
                        "date": ["2020-09-01", "2021-09-01"],
                        "event_name": "is_event"
                    }),
                    "oct": pd.DataFrame({
                        "date": ["2020-10-01", "2021-10-01"],
                        "event_name": "is_event"
                    }),
                }

            Note: All these events are automatically added to the model. There is no need
            to specify them in ``extra_pred_cols`` as you would for
            `~greykite.algo.forecast.silverkite.SilverkiteForecast.forecast`.

            Note: Do not use `~greykite.common.constants.EVENT_DEFAULT`
            in the second column. This is reserved to indicate dates that do not
            correspond to an event.
        changepoints_dict : `dict` or None, optional, default None
            Specifies the changepoint configuration.

            ``"method"``: `str`
                The method to locate changepoints.
                Valid options:

                    - "uniform". Places n_changepoints evenly spaced changepoints to allow growth to change.
                    - "custom". Places changepoints at the specified dates.
                    - "auto". Automatically detects change points. For configuration, see
                      `~greykite.algo.changepoint.adalasso.changepoint_detector.ChangepointDetector.find_trend_changepoints`

                Additional keys to provide parameters for each particular method are described below.
            ``"continuous_time_col"``: `str`, optional
                Column to apply ``growth_func`` to, to generate changepoint features
                Typically, this should match the growth term in the model
            ``"growth_func"``: callable or None, optional
                Growth function (scalar -> scalar). Changepoint features are created
                by applying ``growth_func`` to ``continuous_time_col`` with offsets.
                If None, uses identity function to use ``continuous_time_col`` directly
                as growth term
                If changepoints_dict["method"] == "uniform", this other key is required:

                    ``"n_changepoints"``: int
                        number of changepoints to evenly space across training period

                If changepoints_dict["method"] == "custom", this other key is required:

                    ``"dates"``: Iterable[Union[int, float, str, datetime]]
                        Changepoint dates. Must be parsable by pd.to_datetime.
                        Changepoints are set at the closest time on or after these dates
                        in the dataset.

                If changepoints_dict["method"] == "auto", the keys that matches the parameters in
                `~greykite.algo.changepoint.adalasso.changepoint_detector.ChangepointDetector.find_trend_changepoints`,
                except ``df``, ``time_col`` and ``value_col``, are optional.
                Extra keys also include "dates", "combine_changepoint_min_distance" and "keep_detected" to specify
                additional custom trend changepoints. These three parameters correspond to the three parameters
                "custom_changepoint_dates", "min_distance" and "keep_detected" in
                `~greykite.algo.changepoint.adalasso.changepoints_utils.combine_detected_and_custom_trend_changepoints`.

        yearly_seasonality : `str` or `bool` or `int`
            Determines the yearly seasonality.
            'auto', True, False, or a number for the Fourier order
        quarterly_seasonality : `str` or `bool` or `int`
            Determines the quarterly seasonality.
            'auto', True, False, or a number for the Fourier order
        monthly_seasonality : `str` or `bool` or `int`
            Determines the monthly seasonality.
            'auto', True, False, or a number for the Fourier order
        weekly_seasonality : `str` or `bool` or `int`
            Determines the weekly seasonality.
            'auto', True, False, or a number for the Fourier order
        daily_seasonality : `str` or `bool` or `int`
            Determines the daily seasonality.
            'auto', True, False, or a number for the Fourier order
        max_daily_seas_interaction_order : `int` or None, optional, default `None`
            Max fourier order for interaction terms with daily seasonality.
            If None, uses all available terms.
        max_weekly_seas_interaction_order : `int` or None, optional, default `None`
            Max fourier order for interaction terms with weekly seasonality.
            If None, uses all available terms.
        autoreg_dict : `dict` or `str` or None, optional, default `None`
            If a `dict`: A dictionary with arguments for `~greykite.common.features.timeseries_lags.build_autoreg_df`.
            That function's parameter ``value_col`` is inferred from the input of
            current function ``self.forecast``. Other keys are:

                ``"lag_dict"`` : `dict` or None
                ``"agg_lag_dict"`` : `dict` or None
                ``"series_na_fill_func"`` : callable

            If a `str`: The string will represent a method and a dictionary will be
            constructed using that `str`.
            Currently only implemented method is "auto" which uses
            `~greykite.algo.forecast.silverkite.SilverkiteForecast.__get_default_autoreg_dict`
            to create a dictionary.
            See more details for above parameters in
            `~greykite.common.features.timeseries_lags.build_autoreg_df`.
        past_df : `pandas.DataFrame` or None, default None
            The past df used for building autoregression features.
            This is not necessarily needed since imputation is available,
            however, if such data is available but not used in training for speed purposes,
            they can be passed here to build more accurate autoregression features.
        lagged_regressor_dict : `dict` or None, default None
            A dictionary with arguments for `greykite.common.features.timeseries_lags.build_autoreg_df_multi`.
            The keys of the dictionary are the target lagged regressor column names.
            It can leverage the regressors included in ``df``.
            The value of each key is either a `dict` or `str`.
            If `dict`, it has the following keys:

                ``"lag_dict"`` : `dict` or None
                ``"agg_lag_dict"`` : `dict` or None
                ``"series_na_fill_func"`` : callable

            If `str`, it represents a method and a dictionary will be constructed using that `str`.
            Currently the only implemented method is "auto" which uses
            `~greykite.algo.forecast.silverkite.SilverkiteForecast.__get_default_lagged_regressor_dict`
            to create a dictionary for each lagged regressor.
            An example::

                lagged_regressor_dict = {
                    "regressor1": {
                        "lag_dict": {"orders": [1, 2, 3]},
                        "agg_lag_dict": {
                            "orders_list": [[7, 7 * 2, 7 * 3]],
                            "interval_list": [(8, 7 * 2)]},
                        "series_na_fill_func": lambda s: s.bfill().ffill()},
                    "regressor2": "auto"}
        seasonality_changepoints_dict : `dict` or None, optional, default `None`
            The parameter dictionary for seasonality change point detection. Parameters are in
            `~greykite.algo.changepoint.adalasso.changepoint_detector.ChangepointDetector.find_seasonality_changepoints`.
            Note ``df``, ``time_col``, ``value_col`` and ``trend_changepoints`` are auto populated,
            and do not need to be provided.
        min_admissible_value : `float` or None, optional, default `None`
            The minimum admissible value to return during prediction.
            If None, no limit is applied.
        max_admissible_value : `float` or None, optional, default `None`
            The maximum admissible value to return during prediction.
            If None, no limit is applied.
        uncertainty_dict : `dict` or None, optional, default `None`
            How to fit the uncertainty model. A dictionary with keys:
                ``"uncertainty_method"`` : `str`
                    The title of the method.
                    Only "simple_conditional_residuals" is implemented
                    in ``fit_prediction_model`` which calculates CIs using residuals
                ``"params"``: `dict`
                    A dictionary of parameters needed for
                    the requested ``uncertainty_method``. For example, for
                    ``uncertainty_method="simple_conditional_residuals"``, see
                    parameters of `~greykite.algo.uncertainty.conditional.conf_interval.conf_interval`,
                    listed briefly here:

                        ``"conditional_cols"``
                        ``"quantiles"``
                        ``"quantile_estimation_method"``
                        ``"sample_size_thresh"``
                        ``"small_sample_size_method"``
                        ``"small_sample_size_quantile"``

            If None, no uncertainty intervals are calculated.
        normalize_method : `str` or None, default None
            If a string is provided, it will be used as the normalization method
            in `~greykite.common.features.normalize.normalize_df`, passed via
            the argument ``method``. Available options are: "min_max", "statistical".
            If None, no normalization will be performed.
            See that function for more details.
        growth_term : `str` or None, optional, default "ct1"
            How to model the growth. Valid options are
            {"linear", "quadratic", "sqrt", "cuberoot"}.
        regressor_cols : `list` [`str`] or None, optional, default None
            The columns in ``df`` to use as regressors.
            These must be provided during prediction as well.
        feature_sets_enabled: `dict` [`str`, `bool` or "auto" or None] or `bool` or "auto" or None, default "auto"
            Whether to include interaction terms and categorical variables to increase model flexibility.

            If a `dict`, boolean values indicate whether include various sets of features in the model.
            The following keys are recognized
            (from `~greykite.algo.forecast.silverkite.constants.silverkite_column.SilverkiteColumn`):

                ``"COLS_HOUR_OF_WEEK"`` : `str`
                    Constant hour of week effect
                ``"COLS_WEEKEND_SEAS"`` : `str`
                    Daily seasonality interaction with is_weekend
                ``"COLS_DAY_OF_WEEK_SEAS"`` : `str`
                    Daily seasonality interaction with day of week
                ``"COLS_TREND_DAILY_SEAS"`` : `str`
                    Allow daily seasonality to change over time by is_weekend
                ``"COLS_EVENT_SEAS"`` : `str`
                    Allow sub-daily event effects
                ``"COLS_EVENT_WEEKEND_SEAS"`` : `str`
                    Allow sub-daily event effect to interact with is_weekend
                ``"COLS_DAY_OF_WEEK"`` : `str`
                    Constant day of week effect
                ``"COLS_TREND_WEEKEND"`` : `str`
                    Allow trend (growth, changepoints) to interact with is_weekend
                ``"COLS_TREND_DAY_OF_WEEK"`` : `str`
                    Allow trend to interact with day of week
                ``"COLS_TREND_WEEKLY_SEAS"`` : `str`
                    Allow weekly seasonality to change over time

            The following dictionary values are recognized:

                - True: include the feature set in the model
                - False: do not include the feature set in the model
                - None: do not include the feature set in the model
                - "auto" or not provided: use the default setting based on data frequency and size

            If not a `dict`:

                - if a boolean, equivalent to a dictionary with all values set to the boolean.
                - if None, equivalent to a dictionary with all values set to False.
                - if "auto", equivalent to a dictionary with all values set to "auto".

        extra_pred_cols : `list` [`str`] or None, optional, default `None`
            Columns to include in ``extra_pred_cols`` for ``SilverkiteForecast::forecast``.
            Other columns are added to ``extra_pred_cols`` by the other
            parameters of this function (i.e. ``holidays_*``, ``growth_term``,
            ``regressors``, ``feature_sets_enabled``).
            If `None`, treated is the same as [].
        drop_pred_cols : `list` [`str`] or None, default None
            Names of predictor columns to be dropped from the final model.
            Ignored if None.
        explicit_pred_cols : `list` [`str`] or None, default None
            Names of the explicit predictor columns which will be
            the only variables in the final model. Note that this overwrites
            the generated predictors in the model and may include new
            terms not appearing in the predictors (e.g. interaction terms).
            Ignored if None.
        regression_weight_col : `str` or None, default None
            The column name for the weights to be used in weighted regression version
            of applicable machine-learning models.
        simulation_based : `bool`, default False
            Boolean to specify if the future predictions are to be using simulations
            or not.
            Note that this is only used in deciding what parameters should be
            used for certain components e.g. autoregression, if automatic methods
            are requested. However, the auto-settings and the prediction settings
            regarding using simulations should match.
        simulation_num : `int`, default 10
            The number of simulations for when simulations are used for generating
            forecasts and prediction intervals.


        Returns
        -------
        parameters : `dict`
            Parameters to call :func:`~greykite.algo.forecast.silverkite.SilverkiteForecast.forecast`.
        """
        if extra_pred_cols is None:
            extra_pred_cols = []
        else:
            # Does not modify the input list
            extra_pred_cols = extra_pred_cols.copy()

        # Specifies regressors (via ``extra_pred_cols``)
        if regressor_cols is None:
            regressor_cols = []
        extra_pred_cols += regressor_cols

        if time_properties is None:
            # ``df`` only contains the dates for training,
            # so we can use ``use_univariate_ts=False``.
            # ``forecast_horizon`` must be at least as large as
            # the actual size of the test set / forecast set
            # in order to pull all holidays
            time_properties = get_forecast_time_properties(
                df=df,
                time_col=time_col,
                value_col=value_col,
                freq=freq,
                regressor_cols=regressor_cols,
                forecast_horizon=forecast_horizon)

        if time_properties is not None:
            forecast_horizon = forecast_horizon or time_properties.get("forecast_horizon")

        if origin_for_time_vars is None:
            origin_for_time_vars = time_properties["origin_for_time_vars"]

        # Specifies seasonality (added to ``extra_pred_cols`` by `SilverkiteForecast::forecast`)
        seasonality_dict = {
            "yearly_seasonality": yearly_seasonality,
            "quarterly_seasonality": quarterly_seasonality,
            "monthly_seasonality": monthly_seasonality,
            "weekly_seasonality": weekly_seasonality,
            "daily_seasonality": daily_seasonality,
        }

        fs_components_df = self.__get_silverkite_seasonality(
            simple_freq=time_properties["simple_freq"].name,
            num_days=time_properties["num_training_days"],
            seasonality=seasonality_dict)

        # Specifies growth (via ``extra_pred_cols``)
        growth_term_formula = None
        if growth_term is not None:
            growth_term_formula = GROWTH_COL_ALIAS[growth_term]
            extra_pred_cols += [growth_term_formula]

        # Specifies events (via ``daily_event_df_dict``, ``extra_pred_cols``).
        # Constant daily effect.
        holiday_df_dict = self.__get_silverkite_holidays(
            holiday_lookup_countries=holiday_lookup_countries,
            holidays_to_model_separately=holidays_to_model_separately,
            start_year=time_properties["start_year"],
            end_year=time_properties["end_year"],
            pre_num=holiday_pre_num_days,
            post_num=holiday_post_num_days,
            pre_post_num_dict=holiday_pre_post_num_dict)
        if holiday_df_dict is not None:
            # Adds holidays to the user-specified events,
            # giving preference to user events
            # if there are conflicts
            daily_event_df_dict = update_dictionary(
                holiday_df_dict,
                overwrite_dict=daily_event_df_dict)

        if not daily_event_df_dict:
            # Sets empty dictionary to None
            daily_event_df_dict = None

        extra_pred_cols += get_event_pred_cols(daily_event_df_dict)

        # Specifies ``extra_pred_cols`` (interactions and additional model terms).
        # Seasonality interaction order is limited by the available order and max requested.
        daily_seas_interaction_order = self.__get_seasonality_order_from_dataframe(
            seasonality=self._silverkite_seasonality_enum.DAILY_SEASONALITY.value,
            fs=fs_components_df,
            max_order=max_daily_seas_interaction_order
        )

        weekly_seas_interaction_order = self.__get_seasonality_order_from_dataframe(
            seasonality=self._silverkite_seasonality_enum.WEEKLY_SEASONALITY.value,
            fs=fs_components_df,
            max_order=max_weekly_seas_interaction_order
        )

        # updates `changepoints_dict`, unchanged if not "method" == "auto"
        changepoints_dict, changepoint_detector = get_changepoints_dict(
            df=df,
            time_col=time_col,
            value_col=value_col,
            changepoints_dict=changepoints_dict)

        # determines changepoint column names
        if changepoints_dict is not None:
            changepoints = get_changepoint_features_and_values_from_config(
                df=df,  # the training dataset
                time_col=time_col,
                changepoints_dict=changepoints_dict,
                origin_for_time_vars=origin_for_time_vars)
            changepoint_cols = changepoints["changepoint_cols"]
        else:
            changepoint_cols = []

        feature_sets_enabled = self.__get_feature_sets_enabled(
            simple_freq=time_properties["simple_freq"].name,
            num_days=time_properties["num_training_days"],
            feature_sets_enabled=feature_sets_enabled)

        model_feature_terms = self.__get_feature_sets_terms(
            daily_event_df_dict=daily_event_df_dict,
            daily_seas_interaction_order=daily_seas_interaction_order,
            weekly_seas_interaction_order=weekly_seas_interaction_order,
            growth_term=growth_term_formula,
            changepoint_cols=changepoint_cols)

        # extends ``extra_pred_cols`` by the requested feature sets from ``feature_sets_enabled``
        for feature_set_name, feature_set_terms in model_feature_terms.items():
            if feature_sets_enabled[feature_set_name]:
                extra_pred_cols += feature_set_terms
        extra_pred_cols = unique_elements_in_list(extra_pred_cols)

        # the parameters to call ``SilverkiteForecast::forecast``
        # parameters that are directly passed through are noted below
        parameters = dict(
            df=df,                                                          # pass-through
            time_col=time_col,                                              # pass-through
            value_col=value_col,                                            # pass-through
            origin_for_time_vars=origin_for_time_vars,
            extra_pred_cols=extra_pred_cols,
            drop_pred_cols=drop_pred_cols,
            explicit_pred_cols=explicit_pred_cols,
            train_test_thresh=train_test_thresh,                            # pass-through
            training_fraction=training_fraction,                            # pass-through
            fit_algorithm=fit_algorithm,                                    # pass-through
            fit_algorithm_params=fit_algorithm_params,                      # pass-through
            daily_event_df_dict=daily_event_df_dict,
            fs_components_df=fs_components_df,
            autoreg_dict=autoreg_dict,                                      # pass-through
            past_df=past_df,                                                # pass-through
            lagged_regressor_dict=lagged_regressor_dict,                    # pass-through
            changepoints_dict=changepoints_dict,                            # pass-through
            seasonality_changepoints_dict=seasonality_changepoints_dict,    # pass-through
            changepoint_detector=changepoint_detector,
            min_admissible_value=min_admissible_value,                      # pass-through
            max_admissible_value=max_admissible_value,                      # pass-through
            uncertainty_dict=uncertainty_dict,
            normalize_method=normalize_method,                              # pass-through
            regression_weight_col=regression_weight_col,                    # pass-through
            forecast_horizon=forecast_horizon,                              # pass-through
            simulation_based=simulation_based,                              # pass-through
            simulation_num=simulation_num                                   # pass-through
        )

        return parameters

[docs]    def forecast_simple(
            self,
            *args,
            **kwargs):
        """A wrapper around ``SilverkiteForecast::forecast`` that simplifies some of the input parameters.

        Parameters
        ----------
        args : positional args
            Positional args to pass to
            :func:`~greykite.algo.forecast.silverkite.forecast_simple_silverkite.convert_simple_silverkite_params`.
            See that function for details.

        kwargs : keyword args
            Keyword args to pass to
            :func:`~greykite.algo.forecast.silverkite.forecast_simple_silverkite.convert_simple_silverkite_params`.
            See that function for details.

        Returns
        -------
        trained_model : `dict`
            The return value of :func:`~greykite.algo.forecast.silverkite.SilverkiteForecast.forecast`
            A dictionary that includes the fitted model from the function
            :func:`~greykite.algo.common.ml_models.fit_ml_model_with_evaluation`.
        """
        parameters = self.convert_params(*args, **kwargs)
        trained_model = super().forecast(**parameters)
        return trained_model

    def __get_requested_seasonality_order(
            self,
            requested_seasonality="auto",
            default_order=5,
            is_enabled_auto=True):
        """Returns requested seasonality fourier series order.

        Parameters
        ----------
        requested_seasonality :  `str` or `bool` or `int`, default = 'auto'
            The requested seasonality.
            'auto', True, False, or a number for the Fourier order.
        default_order : `int`
            The default order to use for 'auto' and True.
        is_enabled_auto : `bool`
            Whether the seasonality should be modeled for 'auto' seasonality.

        Returns
        -------
        order : `int`
            Seasonality fourier series order.
        """
        if requested_seasonality is True or (requested_seasonality == 'auto' and is_enabled_auto):
            order = default_order
        elif requested_seasonality is False or (requested_seasonality == 'auto' and not is_enabled_auto):
            order = 0
        else:
            try:
                order = int(requested_seasonality)
            except ValueError as e:
                log_message(f"Requested seasonality order '{requested_seasonality}' must be one of:"
                            f" 'auto', True, False, integer", LoggingLevelEnum.ERROR)
                raise e
        return order

    def __get_silverkite_seasonality(
            self,
            simple_freq=SimpleTimeFrequencyEnum.DAY.name,
            num_days=1000,
            seasonality=None):
        """Generates `fs_components_df` parameter for `forecast_silverkite`
        for modeling seasonality.

        Parameters
        ----------
        simple_freq : `str`
            SimpleTimeFrequencyEnum member that best matches the input data frequency
            according to `get_simple_time_frequency_from_period`
        num_days : `int`
            Number of days of observations in the input data
        seasonality : `dict` or None
            Seasonality configuration dictionary, with the following optional keys.
            (keys are SilverkiteSeasonalityEnum members in lower case):

                - ``"yearly_seasonality"`` : `str` or `bool` or `int` or None, default = 'auto'
                    Determines the yearly seasonality
                    'auto', True, False, or a number for the Fourier order
                ``"quarterly_seasonality"`` : `str` or `bool` or `int` or None, default = 'auto'
                    Determines the quarterly seasonality
                    'auto', True, False, or a number for the Fourier order
                ``"monthly_seasonality"`` : `str` or `bool` or `int` or None, default = 'auto'
                    Determines the monthly seasonality
                    'auto', True, False, or a number for the Fourier order
                ``"weekly_seasonality"`` : `str` or `bool` or `int` or None, default = 'auto'
                    Determines the weekly seasonality
                    'auto', True, False, or a number for the Fourier order
                ``"daily_seasonality"`` : `str` or `bool` or `int` or None, default = 'auto'
                    Determines the daily seasonality
                    'auto', True, False, or a number for the Fourier order

            None is equivalent to 'auto'. If 'auto', seasonality components are based on input data
            (``num_days``, ``simple_freq``), according to
            `~greykite.algo.forecast.silverkite.constants.silverkite_seasonality.SilverkiteSeasonalityEnum`.
            and `~greykite.algo.forecast.silverkite.constants.silverkite_time_frequency.SilverkiteTimeFrequencyEnum`.

        Returns
        -------
        fs_components_df : `pandas.DataFrame`
            Contains fourier series specification. Columns:

                - "name"
                - "period"
                - "order"
                - "seas_names"
        """
        if seasonality is None:
            seasonality = {}

        # recognized seasonalities for silverkite
        silverkite_seasonalities = self._silverkite_seasonality_enum.__members__.copy()
        silverkite_seasonalities = {k.lower(): v for k, v in silverkite_seasonalities.items()}

        # valid seasonalities based on input data frequency
        freq_valid_seas_names = SimpleTimeFrequencyEnum[simple_freq].value.valid_seas
        freq_auto_seas_names = self._silverkite_time_frequency_enum[simple_freq].value.auto_fourier_seas

        for key in seasonality.keys():
            if key not in silverkite_seasonalities.keys():
                raise ValueError(f"{key} must be one of {silverkite_seasonalities.keys()}")

        seasonalities = []  # seasonalities to add to the model
        for seas in silverkite_seasonalities.values():
            # keys are SilverkiteSeasonalityEnum members in lower case
            seas_input = seasonality.get(seas.name.lower(), "auto")
            # under auto configuration, seasonality is added if it's recommended for both
            # the input frequency and data size
            is_enabled_auto = (
                    num_days >= seas.value.default_min_days
                    and seas.name in freq_auto_seas_names)
            order = self.__get_requested_seasonality_order(
                requested_seasonality=seas_input,
                default_order=seas.value.order,
                is_enabled_auto=is_enabled_auto)
            if order > 0:
                if seas.name not in freq_valid_seas_names:
                    log_message(f"'{seas.name.lower()}' is typically not valid for "
                                f"data with '{simple_freq}' frequency. Each seasonality period "
                                f"should cover multiple observations in the data. To remove "
                                f"these seasonality terms from the model, remove {seas.name.lower()}={seas_input} "
                                f"or set it to 'auto' or 0.", LoggingLevelEnum.WARNING)
                seasonalities.append({
                    "name": seas.value.name,
                    "period": seas.value.period,
                    "order": order,  # user is allowed to override default order
                    "seas_names": seas.value.seas_names
                })

        # constructs dataframe where each seasonality is a row
        if len(seasonalities) > 0:
            fs = pd.DataFrame(
                seasonalities,
                columns=["name", "period", "order", "seas_names"])
        else:
            fs = None
        return fs

    def __get_seasonality_order_from_dataframe(
            self,
            seasonality,
            fs=None,
            max_order=None):
        """Returns fourier series order from a `pandas.DataFrame`
        fourier series specification. Return value is capped by ``max_order``.

        Parameters
        ----------
        seasonality : `SilverkiteSeasonalityEnum.Seasonality` namedtuple
            Which seasonality to extract from ``fs``.
            Has attributes ``name``, ``period``, ``order``, ``seas_names``
            Can be a `SilverkiteSeasonalityEnum` member value.
        fs : `pandas.DataFrame` or None, optional, default `None`
            Columns: "name", "period", "order", "seas_names"
            Suitable for ``fs_components_df`` parameter for ``forecast_silverkite``
            for modeling seasonality.
            Could be returned by ``get_silverkite_seasonality``.
            Assumes that ``name`` and ``seas_names`` uniquely identify a row.
        max_order: `int` or None, optional, default `None`
            Upper limit on seasonality_order.

        Returns
        -------
        fs_order : `int`
            The Fourier series order of the row with the given `name` and `seas_names`
        """
        order = 0
        if fs is not None:
            name_match = (fs["name"] == seasonality.name)
            seas_match = ((fs["seas_names"] == seasonality.seas_names)
                          if seasonality.seas_names is not None
                          else pd.isna(fs["seas_names"]))

            if any(name_match & seas_match):
                order = fs.loc[(name_match & seas_match), "order"].values[0]
        if max_order is not None:
            order = min(order, max_order)
        return order

    def __get_feature_sets_enabled(
            self,
            simple_freq=SimpleTimeFrequencyEnum.DAY.name,
            num_days=1000,
            feature_sets_enabled="auto"):
        """Returns default feature sets based on training data frequency and size.

        Parameters
        ----------
        simple_freq: `str`, default SimpleTimeFrequencyEnum.DAY.name
            SimpleTimeFrequencyEnum member that best matches the input data frequency
            according to `get_simple_time_frequency_from_period`
        num_days: `int`, default 1000
            Number of days of observations in the input data
        feature_sets_enabled: `dict` [`str`, `bool` or "auto" or None] or `bool` or "auto" or None, default "auto"
            Whether to include interaction terms and categorical variables to increase model flexibility.

            If a `dict`, boolean values indicate whether include various sets of features in the model.
            The following keys are recognized
            (from `~greykite.algo.forecast.silverkite.constants.silverkite_column.SilverkiteColumn`):

                ``"COLS_HOUR_OF_WEEK"`` : `str`
                    Constant hour of week effect
                ``"COLS_WEEKEND_SEAS"`` : `str`
                    Daily seasonality interaction with is_weekend
                ``"COLS_DAY_OF_WEEK_SEAS"`` : `str`
                    Daily seasonality interaction with day of week
                ``"COLS_TREND_DAILY_SEAS"`` : `str`
                    Allow daily seasonality to change over time by is_weekend
                ``"COLS_EVENT_SEAS"`` : `str`
                    Allow sub-daily event effects
                ``"COLS_EVENT_WEEKEND_SEAS"`` : `str`
                    Allow sub-daily event effect to interact with is_weekend
                ``"COLS_DAY_OF_WEEK"`` : `str`
                    Constant day of week effect
                ``"COLS_TREND_WEEKEND"`` : `str`
                    Allow trend (growth, changepoints) to interact with is_weekend
                ``"COLS_TREND_DAY_OF_WEEK"`` : `str`
                    Allow trend to interact with day of week
                ``"COLS_TREND_WEEKLY_SEAS"`` : `str`
                    Allow weekly seasonality to change over time

            The following dictionary values are recognized:

                - True: include the feature set in the model
                - False: do not include the feature set in the model
                - None: do not include the feature set in the model
                - "auto" or not provided: use the default setting based on data frequency and size

            If not a `dict`:

                - if a boolean, equivalent to a dictionary with all values set to the boolean.
                - if None, equivalent to a dictionary with all values set to False.
                - if "auto", equivalent to a dictionary with all values set to "auto".

        Returns
        -------
        feature_sets_enabled : `dict` [`str`, `bool`]
            Indicates which feature sets will be added to the model. Feature sets are determined
            by `get_model_feature_terms` and may be empty (e.g. if there are no events,
            there is no event:seasonality interaction)
            Same valid options as `feature_sets_enabled` parameter.
        """
        feature_sets_enabled_default = {
            self._silverkite_column.COLS_HOUR_OF_WEEK: False,
            self._silverkite_column.COLS_WEEKEND_SEAS: False,
            self._silverkite_column.COLS_DAY_OF_WEEK_SEAS: False,
            self._silverkite_column.COLS_TREND_DAILY_SEAS: False,
            self._silverkite_column.COLS_EVENT_SEAS: False,
            self._silverkite_column.COLS_EVENT_WEEKEND_SEAS: False,
            self._silverkite_column.COLS_DAY_OF_WEEK: False,
            self._silverkite_column.COLS_TREND_WEEKEND: False,
            self._silverkite_column.COLS_TREND_DAY_OF_WEEK: False,
            self._silverkite_column.COLS_TREND_WEEKLY_SEAS: False,
        }
        frequency = SimpleTimeFrequencyEnum[simple_freq].value

        # for sub-daily data
        if (
                frequency.seconds_per_observation
                <= SimpleTimeFrequencyEnum.HOUR.value.seconds_per_observation):
            if num_days >= TimeEnum.ONE_MONTH_IN_DAYS.value:
                # hour of week offset, helps the fourier terms
                feature_sets_enabled_default[self._silverkite_column.COLS_HOUR_OF_WEEK] = True
                # daily seasonality on weekday vs weekend
                feature_sets_enabled_default[self._silverkite_column.COLS_WEEKEND_SEAS] = True
                # daily seasonality by day of week
                feature_sets_enabled_default[self._silverkite_column.COLS_DAY_OF_WEEK_SEAS] = True
                # daily seasonality trend on weekday, weekend
                feature_sets_enabled_default[self._silverkite_column.COLS_TREND_DAILY_SEAS] = True

            if num_days < 3 * TimeEnum.ONE_YEAR_IN_DAYS.value:
                # holiday daily seasonality
                feature_sets_enabled_default[self._silverkite_column.COLS_EVENT_SEAS] = True
            else:
                # holiday daily seasonality that depends on weekend/weekday
                #   By pigeonhole principle, with reasonable assumption that a holiday must fall on a different
                #   day of the week for any three consecutive years (or else always be on the same day of week),
                #   it takes at most 3 years of training data to observe all weekend/weekday possibilities.
                feature_sets_enabled_default[self._silverkite_column.COLS_EVENT_WEEKEND_SEAS] = True

        # for sub-weekly data
        if (
                frequency.seconds_per_observation
                <= SimpleTimeFrequencyEnum.DAY.value.seconds_per_observation):
            # day of week offset, helps the fourier terms
            feature_sets_enabled_default[self._silverkite_column.COLS_DAY_OF_WEEK] = True

            # allows different trend on weekday vs weekend
            if num_days >= TimeEnum.ONE_MONTH_IN_DAYS.value:
                feature_sets_enabled_default[self._silverkite_column.COLS_TREND_WEEKEND] = True

            # allows trend interaction with day of week
            if num_days >= TimeEnum.ONE_QUARTER_IN_DAYS.value:
                feature_sets_enabled_default[self._silverkite_column.COLS_TREND_DAY_OF_WEEK] = True

            if num_days >= TimeEnum.ONE_YEAR_IN_DAYS.value:
                # weekly seasonality trend over time
                feature_sets_enabled_default[self._silverkite_column.COLS_TREND_WEEKLY_SEAS] = True

        # None is treated the same as False.
        # Intuitively, feature_sets_enabled=None should
        #   mean no feature sets are enabled.
        if feature_sets_enabled is None:
            feature_sets_enabled = False

        # Overrides defaults with user provided dictionary
        if feature_sets_enabled == "auto":
            pass  # uses the automatic defaults directly
        elif isinstance(feature_sets_enabled, bool):
            # All values are set to the provided boolean value
            for k in feature_sets_enabled_default.keys():
                feature_sets_enabled_default[k] = feature_sets_enabled
        elif isinstance(feature_sets_enabled, dict):
            # Uses the boolean values in `feature_sets_enabled` to override `feature_sets_enabled_default`
            for setting, is_enabled in feature_sets_enabled.items():
                if setting not in feature_sets_enabled_default:
                    raise ValueError(f"Unrecognized feature set: '{setting}'. Value feature sets are "
                                     f"{list(feature_sets_enabled_default.keys())}")

                if is_enabled == "auto":
                    # "auto" values are considered not set by the user and fall back to the default
                    continue
                if is_enabled is True:
                    # User explicitly turned on this feature set.
                    feature_sets_enabled_default[setting] = True
                elif is_enabled is False or is_enabled is None:
                    # User explicitly turned off this feature set.
                    # None values are treated the same as False.
                    feature_sets_enabled_default[setting] = False
                else:
                    raise ValueError(
                        f"Unrecognized `feature_sets_enabled` dictionary value for key {setting}: "
                        f"expected bool or 'auto' or None. Found: {is_enabled}")
        else:
            raise ValueError(
                f"Unrecognized type for `feature_sets_enabled`: expected bool, dict, 'auto', or None. Found: {feature_sets_enabled}")
        return feature_sets_enabled_default

    def __get_feature_sets_terms(
            self,
            daily_event_df_dict=None,
            daily_seas_interaction_order=0,
            weekly_seas_interaction_order=0,
            growth_term=None,
            changepoint_cols=None):
        """Defines features sets for use in the `extra_pred_cols` parameter
        to `forecast_silverkite`.
        Derived from events, seasonality, and trend (growth + changepoints).

        :param daily_event_df_dict: Optional[Dict[str, pd.DataFrame("date", "event")]]
            suitable for use as `daily_event_df_dict` parameter in `forecast_silverkite`
            Each event is modeled as its own effect
        :param daily_seas_interaction_order: int
            Order on interaction terms with daily seasonality
        :param weekly_seas_interaction_order: int
            Order on interaction terms with weekly seasonality
        :param growth_term: Optional[str]
            How to model the growth. Valid options are "linear", "quadratic", "sqrt", "cubic", "cuberoot"
        :param changepoint_cols: Optional[List[str]]
            Names of the changepoint feature columns to be generated by `build_silverkite_features`
        :return: Dict[str, List[str]]
            The patsy model terms for each feature set
            key: feature set name
            value: list of patsy model terms
                If there are no valid patsy model terms according to the input configuration,
                the list is empty.
                For example, if there are no events, the event related effects will be empty
        """
        # enumerates all possible keys
        extra_pred_cols_grouped = {
            self._silverkite_column.COLS_HOUR_OF_WEEK: [],
            self._silverkite_column.COLS_WEEKEND_SEAS: [],
            self._silverkite_column.COLS_DAY_OF_WEEK_SEAS: [],
            self._silverkite_column.COLS_TREND_DAILY_SEAS: [],
            self._silverkite_column.COLS_EVENT_SEAS: [],
            self._silverkite_column.COLS_EVENT_WEEKEND_SEAS: [],
            self._silverkite_column.COLS_DAY_OF_WEEK: [],
            self._silverkite_column.COLS_TREND_WEEKEND: [],
            self._silverkite_column.COLS_TREND_DAY_OF_WEEK: [],
            self._silverkite_column.COLS_TREND_WEEKLY_SEAS: [],
        }

        # the columns which constitute the trend
        if changepoint_cols is None:
            changepoint_cols = []
        growth_col = [growth_term] if growth_term is not None else []
        trend_cols = growth_col + changepoint_cols

        # all possible values of `dow` and `dow_hr` from `build_time_features_df`
        dow_levels = ["1-Mon", "2-Tue", "3-Wed", "4-Thu", "5-Fri", "6-Sat", "7-Sun"]
        dow_hr_levels = [f"{day + 1}_{str(hour).zfill(2)}" for day in range(7) for hour in range(24)]
        day_of_week = patsy_categorical_term(term="str_dow", levels=dow_levels)
        hour_of_week = patsy_categorical_term(term="dow_hr", levels=dow_hr_levels)

        extra_pred_cols_grouped[self._silverkite_column.COLS_DAY_OF_WEEK] = [day_of_week]
        extra_pred_cols_grouped[self._silverkite_column.COLS_HOUR_OF_WEEK] = [hour_of_week]
        extra_pred_cols_grouped[self._silverkite_column.COLS_TREND_WEEKEND] = [f"is_weekend:{col}" for col in trend_cols]
        extra_pred_cols_grouped[self._silverkite_column.COLS_TREND_DAY_OF_WEEK] = [f"{day_of_week}:{col}" for col in trend_cols]

        # allows major holidays to have different daily seasonality
        # interact with fourier series terms up to fs_daily_interaction_order
        daily_seasonality = self._silverkite_seasonality_enum.DAILY_SEASONALITY.value
        weekly_seasonality = self._silverkite_seasonality_enum.WEEKLY_SEASONALITY.value
        if daily_seas_interaction_order > 0:
            for holiday in self._silverkite_holiday.HOLIDAYS_TO_INTERACT:
                if daily_event_df_dict is not None and holiday in daily_event_df_dict.keys():
                    event_levels = [cst.EVENT_DEFAULT]  # reference level for non-event days, added by `add_daily_events`
                    # This event's levels
                    event_levels += list(daily_event_df_dict[holiday][cst.EVENT_DF_LABEL_COL].unique())

                    # `term` matches new_col in `add_daily_events`
                    term = f"{cst.EVENT_PREFIX}_{holiday}"
                    extra_pred_cols_grouped[self._silverkite_column.COLS_EVENT_SEAS] += cols_interact(
                        static_col=f"{patsy_categorical_term(term=term, levels=event_levels)}",
                        fs_name=daily_seasonality.name,
                        fs_order=daily_seas_interaction_order,
                        fs_seas_name=daily_seasonality.seas_names)

                    extra_pred_cols_grouped[self._silverkite_column.COLS_EVENT_WEEKEND_SEAS] += cols_interact(
                        static_col=f"is_weekend:{patsy_categorical_term(term=term, levels=event_levels)}",
                        fs_name=daily_seasonality.name,
                        fs_order=daily_seas_interaction_order,
                        fs_seas_name=daily_seasonality.seas_names)

            extra_pred_cols_grouped[self._silverkite_column.COLS_WEEKEND_SEAS] = cols_interact(
                static_col="is_weekend",
                fs_name=daily_seasonality.name,
                fs_order=daily_seas_interaction_order,
                fs_seas_name=daily_seasonality.seas_names)

            extra_pred_cols_grouped[self._silverkite_column.COLS_DAY_OF_WEEK_SEAS] = cols_interact(
                static_col=day_of_week,
                fs_name=daily_seasonality.name,
                fs_order=daily_seas_interaction_order,
                fs_seas_name=daily_seasonality.seas_names)

            for col in trend_cols:
                extra_pred_cols_grouped[self._silverkite_column.COLS_TREND_DAILY_SEAS] += cols_interact(
                    static_col=f"is_weekend:{col}",
                    fs_name=daily_seasonality.name,
                    fs_order=daily_seas_interaction_order,
                    fs_seas_name=daily_seasonality.seas_names)

        if weekly_seas_interaction_order > 0:
            for col in trend_cols:
                extra_pred_cols_grouped[self._silverkite_column.COLS_TREND_WEEKLY_SEAS] += cols_interact(
                    static_col=col,
                    fs_name=weekly_seasonality.name,
                    fs_order=weekly_seas_interaction_order,
                    fs_seas_name=weekly_seasonality.seas_names)

        return extra_pred_cols_grouped

    def __get_silverkite_holidays(
            self,
            holiday_lookup_countries="auto",
            holidays_to_model_separately="auto",
            start_year=2015,
            end_year=2030,
            pre_num=2,
            post_num=2,
            pre_post_num_dict=None):
        """Generates holidays dictionary for input to daily_event_df_dict parameter of silverkite model.
        The main purpose is to provide reasonable defaults for the holiday names and countries

        Parameters
        ----------
        holiday_lookup_countries : `list` [`str`] or "auto" or None, optional, default "auto"
            The countries that contain the holidays you intend to model
            (``holidays_to_model_separately``).

            * If "auto", uses a default list of countries
              that contain the default ``holidays_to_model_separately``.
              See `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.HOLIDAY_LOOKUP_COUNTRIES_AUTO`.
            * If a list, must be a list of country names.
            * If None or an empty list, no holidays are modeled.

        holidays_to_model_separately : `list` [`str`] or "auto" or `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES` or None, optional, default "auto"  # noqa: E501
            Which holidays to include in the model.
            The model creates a separate key, value for each item in ``holidays_to_model_separately``.
            The other holidays in the countries are grouped together as a single effect.

            * If "auto", uses a default list of important holidays.
              See `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.HOLIDAYS_TO_MODEL_SEPARATELY_AUTO`.
            * If `~greykite.algo.forecast.silverkite.constants.silverkite_holiday.SilverkiteHoliday.ALL_HOLIDAYS_IN_COUNTRIES`,
              uses all available holidays in ``holiday_lookup_countries``. This can often
              create a model that has too many parameters, and should typically be avoided.
            * If a list, must be a list of holiday names.
            * If None or an empty list, all holidays in ``holiday_lookup_countries`` are grouped together
              as a single effect.

            Use ``holiday_lookup_countries`` to provide a list of countries where these holiday occur.
        start_year : `int`
            Year of first training data point, used to generate holiday events.
        end_year : `int`
            Year of last forecast data point, used to generate holiday events.
        pre_num : `int`
            Model holiday effects for ``pre_num`` days before the holiday.
        post_num : `int`
            Model holiday effects for ``post_num`` days after the holiday.
        pre_post_num_dict : `dict` [`str`, (`int`, `int`)] or None, default None
            Overrides ``pre_num`` and ``post_num`` for each holiday in
            ``holidays_to_model_separately``.
            For example, if ``holidays_to_model_separately`` contains "Thanksgiving" and "Labor Day",
            this parameter can be set to ``{"Thanksgiving": [1, 3], "Labor Day": [1, 2]}``,
            denoting that the "Thanksgiving" ``pre_num`` is 1 and ``post_num`` is 3, and "Labor Day"
            ``pre_num`` is 1 and ``post_num`` is 2.
            Holidays not specified use the default given by ``pre_num`` and ``post_num``.

        Returns
        -------
        daily_event_df_dict : `dict` [`str`, `pandas.DataFrame` [EVENT_DF_DATE_COL, EVENT_DF_LABEL_COL]]
            Suitable for use as `daily_event_df_dict` parameter in `forecast_silverkite`.
            Each holiday is modeled as its own effect (not specific to each country).

        See Also
        --------
        `~greykite.common.features.timeseries_features.get_available_holiday_lookup_countries`
        to list available countries for modeling.

        `~greykite.common.features.timeseries_features.get_available_holidays_across_countries`
        to see available holidays in those countries.
        """
        if holiday_lookup_countries is None:
            # `None` will not model any holidays
            holiday_lookup_countries = []
        elif holiday_lookup_countries == "auto":
            # countries that contain the default `holidays_to_model_separately`
            holiday_lookup_countries = self._silverkite_holiday.HOLIDAY_LOOKUP_COUNTRIES_AUTO
        elif not isinstance(holiday_lookup_countries, (list, tuple)):
            raise ValueError(
                f"`holiday_lookup_countries` should be a list, found {holiday_lookup_countries}")

        if holidays_to_model_separately is None:
            holidays_to_model_separately = []
        elif holidays_to_model_separately == "auto":
            # important holidays
            holidays_to_model_separately = self._silverkite_holiday.HOLIDAYS_TO_MODEL_SEPARATELY_AUTO
        elif holidays_to_model_separately == self._silverkite_holiday.ALL_HOLIDAYS_IN_COUNTRIES:
            holidays_to_model_separately = get_available_holidays_across_countries(
                countries=holiday_lookup_countries,
                year_start=start_year - 1,
                year_end=end_year + 1)
        elif not isinstance(holidays_to_model_separately, (list, tuple)):
            raise ValueError(
                f"`holidays_to_model_separately` should be a list, found {holidays_to_model_separately}")

        return generate_holiday_events(
            countries=holiday_lookup_countries,
            holidays_to_model_separately=holidays_to_model_separately,
            year_start=start_year - 1,  # subtract 1 just in case, to ensure coverage of all holidays
            year_end=end_year + 1,  # add 1 just in case, to ensure coverage of all holidays
            pre_num=pre_num,
            post_num=post_num,
            pre_post_num_dict=pre_post_num_dict)