Skip to content

KeepPandas

Bases: BaseEstimator, TransformerMixin

Wrapper to keep column names of pandas dataframes in a scikit-learn transformer.

Any scikit-learn transformer wrapped in KeepPandas will return a pd.DataFrame on .transform().

Warning

You should only use KeepPandas() when you know for sure scikit-learn did not change the order of your columns.

Example:

from skorecard.pipeline import KeepPandas
from skorecard import datasets
from skorecard.bucketers import EqualWidthBucketer

from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler

X, y = datasets.load_uci_credit_card(return_X_y=True)

bucket_pipeline = make_pipeline(
    KeepPandas(StandardScaler()),
    EqualWidthBucketer(n_bins=5, variables=['LIMIT_BAL', 'BILL_AMT1']),
)
bucket_pipeline.fit_transform(X, y)
Source code in skorecard/pipeline/pipeline.py
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
class KeepPandas(BaseEstimator, TransformerMixin):
    """
    Wrapper to keep column names of pandas dataframes in a `scikit-learn` transformer.

    Any scikit-learn transformer wrapped in KeepPandas will return a `pd.DataFrame` on `.transform()`.

    !!! warning
        You should only use `KeepPandas()` when you know for sure `scikit-learn`
        did not change the order of your columns.

    Example:

    ```python
    from skorecard.pipeline import KeepPandas
    from skorecard import datasets
    from skorecard.bucketers import EqualWidthBucketer

    from sklearn.pipeline import make_pipeline
    from sklearn.preprocessing import StandardScaler

    X, y = datasets.load_uci_credit_card(return_X_y=True)

    bucket_pipeline = make_pipeline(
        KeepPandas(StandardScaler()),
        EqualWidthBucketer(n_bins=5, variables=['LIMIT_BAL', 'BILL_AMT1']),
    )
    bucket_pipeline.fit_transform(X, y)
    ```
    """

    def __init__(self, transformer):
        """Initialize."""
        self.transformer = transformer

        # Warn if there is a chance order of columns are changed
        if isinstance(transformer, Pipeline):
            for step in _get_all_steps(transformer):
                self._check_for_column_transformer(step)
        else:
            self._check_for_column_transformer(transformer)

    def __repr__(self):
        """String representation."""
        return self.transformer.__repr__()

    @staticmethod
    def _check_for_column_transformer(obj):
        msg = "sklearn.compose.ColumnTransformer can change the order of columns"
        msg += ", be very careful when using with KeepPandas()"
        if type(obj).__name__ == "ColumnTransformer":
            logging.warning(msg)

    def fit(self, X, y=None, *args, **kwargs):
        """Fit estimator."""
        assert isinstance(X, pd.DataFrame)
        self.columns_ = list(X.columns)
        self.transformer.fit(X, y, *args, **kwargs)
        return self

    def transform(self, X, *args, **kwargs):
        """Transform X."""
        check_is_fitted(self)
        new_X = self.transformer.transform(X, *args, **kwargs)
        return pd.DataFrame(new_X, columns=self.columns_)

    def get_feature_names(self):
        """Return estimator feature names."""
        check_is_fitted(self)
        return self.columns_

__init__(transformer)

Initialize.

Source code in skorecard/pipeline/pipeline.py
62
63
64
65
66
67
68
69
70
71
def __init__(self, transformer):
    """Initialize."""
    self.transformer = transformer

    # Warn if there is a chance order of columns are changed
    if isinstance(transformer, Pipeline):
        for step in _get_all_steps(transformer):
            self._check_for_column_transformer(step)
    else:
        self._check_for_column_transformer(transformer)

__repr__()

String representation.

Source code in skorecard/pipeline/pipeline.py
73
74
75
def __repr__(self):
    """String representation."""
    return self.transformer.__repr__()

fit(X, y=None, *args, **kwargs)

Fit estimator.

Source code in skorecard/pipeline/pipeline.py
84
85
86
87
88
89
def fit(self, X, y=None, *args, **kwargs):
    """Fit estimator."""
    assert isinstance(X, pd.DataFrame)
    self.columns_ = list(X.columns)
    self.transformer.fit(X, y, *args, **kwargs)
    return self

get_feature_names()

Return estimator feature names.

Source code in skorecard/pipeline/pipeline.py
 97
 98
 99
100
def get_feature_names(self):
    """Return estimator feature names."""
    check_is_fitted(self)
    return self.columns_

transform(X, *args, **kwargs)

Transform X.

Source code in skorecard/pipeline/pipeline.py
91
92
93
94
95
def transform(self, X, *args, **kwargs):
    """Transform X."""
    check_is_fitted(self)
    new_X = self.transformer.transform(X, *args, **kwargs)
    return pd.DataFrame(new_X, columns=self.columns_)

Last update: 2023-08-08