|
@@ -7,16 +7,17 @@
|
|
|
import pandas as pd
|
|
|
from sklearn.preprocessing import KBinsDiscretizer
|
|
|
from entitys import DataSplitEntity
|
|
|
+from enums import BinsStrategyEnum
|
|
|
|
|
|
|
|
|
def f_get_bins(data: DataSplitEntity, feat: str, strategy: str='quantile', nbins: int=10) -> pd.DataFrame:
|
|
|
|
|
|
- if strategy == 'quantile':
|
|
|
+ if strategy == BinsStrategyEnum.QUANTILE.value:
|
|
|
kbin_encoder = KBinsDiscretizer(n_bins=nbins, encode='ordinal', strategy='quantile')
|
|
|
feature_binned = kbin_encoder.fit_transform(data[feat])
|
|
|
return feature_binned.astype(int).astype(str)
|
|
|
|
|
|
- if strategy == 'width':
|
|
|
+ if strategy == BinsStrategyEnum.WIDTH.value:
|
|
|
bin_width = (data[feat].max() - data[feat].min()) / nbins
|
|
|
return pd.cut(data[feat], bins=nbins, labels=[f'Bin_{i}' for i in range(1, nbins + 1)])
|
|
|
|