Bases: BaseEstimator
, TransformerMixin
Wrapper to keep column names of pandas dataframes in a scikit-learn
transformer.
Any scikit-learn transformer wrapped in KeepPandas will return a pd.DataFrame
on .transform()
.
Warning
You should only use KeepPandas()
when you know for sure scikit-learn
did not change the order of your columns.
Example:
from skorecard.pipeline import KeepPandas
from skorecard import datasets
from skorecard.bucketers import EqualWidthBucketer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
X, y = datasets.load_uci_credit_card(return_X_y=True)
bucket_pipeline = make_pipeline(
KeepPandas(StandardScaler()),
EqualWidthBucketer(n_bins=5, variables=['LIMIT_BAL', 'BILL_AMT1']),
)
bucket_pipeline.fit_transform(X, y)
Source code in skorecard/pipeline/pipeline.py
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100 | class KeepPandas(BaseEstimator, TransformerMixin):
"""
Wrapper to keep column names of pandas dataframes in a `scikit-learn` transformer.
Any scikit-learn transformer wrapped in KeepPandas will return a `pd.DataFrame` on `.transform()`.
!!! warning
You should only use `KeepPandas()` when you know for sure `scikit-learn`
did not change the order of your columns.
Example:
```python
from skorecard.pipeline import KeepPandas
from skorecard import datasets
from skorecard.bucketers import EqualWidthBucketer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
X, y = datasets.load_uci_credit_card(return_X_y=True)
bucket_pipeline = make_pipeline(
KeepPandas(StandardScaler()),
EqualWidthBucketer(n_bins=5, variables=['LIMIT_BAL', 'BILL_AMT1']),
)
bucket_pipeline.fit_transform(X, y)
```
"""
def __init__(self, transformer):
"""Initialize."""
self.transformer = transformer
# Warn if there is a chance order of columns are changed
if isinstance(transformer, Pipeline):
for step in _get_all_steps(transformer):
self._check_for_column_transformer(step)
else:
self._check_for_column_transformer(transformer)
def __repr__(self):
"""String representation."""
return self.transformer.__repr__()
@staticmethod
def _check_for_column_transformer(obj):
msg = "sklearn.compose.ColumnTransformer can change the order of columns"
msg += ", be very careful when using with KeepPandas()"
if type(obj).__name__ == "ColumnTransformer":
logging.warning(msg)
def fit(self, X, y=None, *args, **kwargs):
"""Fit estimator."""
assert isinstance(X, pd.DataFrame)
self.columns_ = list(X.columns)
self.transformer.fit(X, y, *args, **kwargs)
return self
def transform(self, X, *args, **kwargs):
"""Transform X."""
check_is_fitted(self)
new_X = self.transformer.transform(X, *args, **kwargs)
return pd.DataFrame(new_X, columns=self.columns_)
def get_feature_names(self):
"""Return estimator feature names."""
check_is_fitted(self)
return self.columns_
|
__init__(transformer)
Initialize.
Source code in skorecard/pipeline/pipeline.py
62
63
64
65
66
67
68
69
70
71 | def __init__(self, transformer):
"""Initialize."""
self.transformer = transformer
# Warn if there is a chance order of columns are changed
if isinstance(transformer, Pipeline):
for step in _get_all_steps(transformer):
self._check_for_column_transformer(step)
else:
self._check_for_column_transformer(transformer)
|
__repr__()
String representation.
Source code in skorecard/pipeline/pipeline.py
| def __repr__(self):
"""String representation."""
return self.transformer.__repr__()
|
fit(X, y=None, *args, **kwargs)
Fit estimator.
Source code in skorecard/pipeline/pipeline.py
| def fit(self, X, y=None, *args, **kwargs):
"""Fit estimator."""
assert isinstance(X, pd.DataFrame)
self.columns_ = list(X.columns)
self.transformer.fit(X, y, *args, **kwargs)
return self
|
get_feature_names()
Return estimator feature names.
Source code in skorecard/pipeline/pipeline.py
| def get_feature_names(self):
"""Return estimator feature names."""
check_is_fitted(self)
return self.columns_
|
Transform X.
Source code in skorecard/pipeline/pipeline.py
| def transform(self, X, *args, **kwargs):
"""Transform X."""
check_is_fitted(self)
new_X = self.transformer.transform(X, *args, **kwargs)
return pd.DataFrame(new_X, columns=self.columns_)
|