Source code for greykite.sklearn.transform.drop_degenerate_transformer

# BSD 2-CLAUSE LICENSE

# Redistribution and use in source and binary forms, with or without modification,
# are permitted provided that the following conditions are met:

# Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR
# #ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
# original author: Albert Chen

import warnings

import pandas as pd
from sklearn.base import BaseEstimator
from sklearn.base import TransformerMixin
from sklearn.exceptions import NotFittedError


[docs]class DropDegenerateTransformer(BaseEstimator, TransformerMixin): """Removes degenerate (constant) columns. Parameters ---------- drop_degenerate : `bool`, default False Whether to drop degenerate columns. Attributes ---------- drop_cols : `list` [`str`] or None Degenerate columns to drop keep_cols : `list` [`str`] or None Columns to keep """ def __init__(self, drop_degenerate=False): # sets params without modification to ensure get_params() works in grid search self.drop_degenerate = drop_degenerate self.drop_cols = None self.keep_cols = None
[docs] def fit(self, X, y=None): """Identifies the degenerate columns, and sets ``self.keep_cols`` and ``self.drop_cols``. Parameters ---------- X : `pandas.DataFrame` Training input data. e.g. each column is a timeseries. Columns are expected to be numeric. y : None There is no need of a target in a transformer, yet the pipeline API requires this parameter. Returns ------- self : object Returns self. """ assert isinstance(X, pd.DataFrame) if self.drop_degenerate: self.keep_cols = list(X.loc[:, (X != X.iloc[0]).any()].columns) self.drop_cols = [col for col in X.columns if col not in self.keep_cols] else: self.keep_cols = list(X.columns) self.drop_cols = [] if self.drop_cols: warnings.warn(f"Columns {self.drop_cols} are degenerate (constant value), " f"and will not be used in the forecast.", RuntimeWarning) return self
[docs] def transform(self, X): """Normalizes data using the specified scaling method. Parameters ---------- X : `pandas.DataFrame` Data to transform. e.g. each column is a timeseries. Columns are expected to be numeric. Returns ------- X_subset : `pandas.DataFrame` Selected columns of X. Keeps columns that were not degenerate on the training data. """ if self.keep_cols is None: raise NotFittedError( "This instance is not fitted yet. Call 'fit' with appropriate arguments " "before calling 'transform'.") return X[self.keep_cols]