wangzhaoyang 5 сар өмнө
parent
commit
4e3a12feba

+ 9 - 1
feature/feature_utils.py

@@ -10,7 +10,15 @@ from entitys import DataSplitEntity
 
 
 def f_get_bins(data: DataSplitEntity) -> pd.DataFrame:
-    pass
+    # 等频分箱
+    if strategy == 'quantile':
+        kbin_encoder = KBinsDiscretizer(n_bins=nbins, encode='ordinal', strategy='quantile')
+        feature_binned = kbin_encoder.fit_transform(data[feat])
+        return feature_binned.astype(int).astype(str)
+    # 等宽分箱
+    if strategy == 'width':
+        bin_width = (data[feat].max() - data[feat].min()) / nbins
+        return pd.cut(data[feat], bins=nbins, labels=[f'Bin_{i}' for i in range(1, nbins + 1)])
 
 
 def f_get_woe(data: DataSplitEntity) -> pd.DataFrame: