Browse Source

add: BinsStrategyEnum

yq 5 months ago
parent
commit
c73886d9a0
3 changed files with 17 additions and 3 deletions
  1. 2 1
      enums/__init__.py
  2. 12 0
      enums/bins_strategy_enum.py
  3. 3 2
      feature/feature_utils.py

+ 2 - 1
enums/__init__.py

@@ -4,7 +4,8 @@
 @time: 2024/10/30
 @desc: 枚举值
 """
+from .bins_strategy_enum import BinsStrategyEnum
 from .placeholder_prefix_enum import PlaceholderPrefixEnum
 from .result_codes_enum import ResultCodesEnum
 
-__all__ = ['ResultCodesEnum', 'PlaceholderPrefixEnum']
+__all__ = ['ResultCodesEnum', 'PlaceholderPrefixEnum', 'BinsStrategyEnum']

+ 12 - 0
enums/bins_strategy_enum.py

@@ -0,0 +1,12 @@
+# -*- coding: utf-8 -*-
+"""
+@author: yq
+@time: 2024/11/14
+@desc: 分箱策略枚举值
+"""
+from enum import Enum
+
+
+class BinsStrategyEnum(Enum):
+    QUANTILE = "quantile"
+    WIDTH = "width"

+ 3 - 2
feature/feature_utils.py

@@ -7,16 +7,17 @@
 import pandas as pd
 from sklearn.preprocessing import KBinsDiscretizer
 from entitys import DataSplitEntity
+from enums import BinsStrategyEnum
 
 
 def f_get_bins(data: DataSplitEntity, feat: str, strategy: str='quantile', nbins: int=10) -> pd.DataFrame:
     # 等频分箱
-    if strategy == 'quantile':
+    if strategy == BinsStrategyEnum.QUANTILE.value:
         kbin_encoder = KBinsDiscretizer(n_bins=nbins, encode='ordinal', strategy='quantile')
         feature_binned = kbin_encoder.fit_transform(data[feat])
         return feature_binned.astype(int).astype(str)
     # 等宽分箱
-    if strategy == 'width':
+    if strategy == BinsStrategyEnum.WIDTH.value:
         bin_width = (data[feat].max() - data[feat].min()) / nbins
         return pd.cut(data[feat], bins=nbins, labels=[f'Bin_{i}' for i in range(1, nbins + 1)])