Skip to content

skorecard

KeepPandas

KeepPandas

Bases: BaseEstimator, TransformerMixin

Wrapper to keep column names of pandas dataframes in a scikit-learn transformer.

Any scikit-learn transformer wrapped in KeepPandas will return a pd.DataFrame on .transform().

Warning

You should only use KeepPandas() when you know for sure scikit-learn did not change the order of your columns.

Example:

from skorecard.pipeline import KeepPandas
from skorecard import datasets
from skorecard.bucketers import EqualWidthBucketer

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

X, y = datasets.load_uci_credit_card(return_X_y=True)

bucket_pipeline = make_pipeline(
    KeepPandas(StandardScaler()),
    EqualWidthBucketer(n_bins=5, variables=['LIMIT_BAL', 'BILL_AMT1']),
)
bucket_pipeline.fit_transform(X, y)

Source code in skorecard/pipeline/pipeline.py

class KeepPandas(BaseEstimator, TransformerMixin):
    """
    Wrapper to keep column names of pandas dataframes in a `scikit-learn` transformer.

    Any scikit-learn transformer wrapped in KeepPandas will return a `pd.DataFrame` on `.transform()`.

    !!! warning
        You should only use `KeepPandas()` when you know for sure `scikit-learn`
        did not change the order of your columns.

    Example:

    ```python
    from skorecard.pipeline import KeepPandas
    from skorecard import datasets
    from skorecard.bucketers import EqualWidthBucketer

    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler

    X, y = datasets.load_uci_credit_card(return_X_y=True)

    bucket_pipeline = make_pipeline(
        KeepPandas(StandardScaler()),
        EqualWidthBucketer(n_bins=5, variables=['LIMIT_BAL', 'BILL_AMT1']),
    )
    bucket_pipeline.fit_transform(X, y)
    ```
    """

    def __init__(self, transformer):
        """Initialize."""
        self.transformer = transformer

        # Warn if there is a chance order of columns are changed
        if isinstance(transformer, Pipeline):
            for step in _get_all_steps(transformer):
                self._check_for_column_transformer(step)
        else:
            self._check_for_column_transformer(transformer)

    def __repr__(self):
        """String representation."""
        return self.transformer.__repr__()

    @staticmethod
    def _check_for_column_transformer(obj):
        msg = "sklearn.compose.ColumnTransformer can change the order of columns"
        msg += ", be very careful when using with KeepPandas()"
        if type(obj).__name__ == "ColumnTransformer":
            logging.warning(msg)

    def fit(self, X, y=None, *args, **kwargs):
        """Fit estimator."""
        assert isinstance(X, pd.DataFrame)
        self.columns_ = list(X.columns)
        self.transformer.fit(X, y, *args, **kwargs)
        return self

    def transform(self, X, *args, **kwargs):
        """Transform X."""
        check_is_fitted(self)
        new_X = self.transformer.transform(X, *args, **kwargs)
        return pd.DataFrame(new_X, columns=self.columns_)

    def get_feature_names(self):
        """Return estimator feature names."""
        check_is_fitted(self)
        return self.columns_

`init(transformer)` ¶

Initialize.

Source code in skorecard/pipeline/pipeline.py

def __init__(self, transformer):
    """Initialize."""
    self.transformer = transformer

    # Warn if there is a chance order of columns are changed
    if isinstance(transformer, Pipeline):
        for step in _get_all_steps(transformer):
            self._check_for_column_transformer(step)
    else:
        self._check_for_column_transformer(transformer)

`repr()` ¶

String representation.

Source code in skorecard/pipeline/pipeline.py

def __repr__(self):
    """String representation."""
    return self.transformer.__repr__()

`fit(X, y=None, *args, **kwargs)` ¶

Fit estimator.

Source code in skorecard/pipeline/pipeline.py

def fit(self, X, y=None, *args, **kwargs):
    """Fit estimator."""
    assert isinstance(X, pd.DataFrame)
    self.columns_ = list(X.columns)
    self.transformer.fit(X, y, *args, **kwargs)
    return self

`get_feature_names()` ¶

Return estimator feature names.

Source code in skorecard/pipeline/pipeline.py

def get_feature_names(self):
    """Return estimator feature names."""
    check_is_fitted(self)
    return self.columns_

`transform(X, *args, **kwargs)` ¶

Transform X.

Source code in skorecard/pipeline/pipeline.py

def transform(self, X, *args, **kwargs):
    """Transform X."""
    check_is_fitted(self)
    new_X = self.transformer.transform(X, *args, **kwargs)
    return pd.DataFrame(new_X, columns=self.columns_)