Pārlūkot izejas kodu

add: 增加分箱iv,badrate展示代码

wangzhaoyang 5 mēneši atpakaļ
vecāks
revīzija
61a9547a75
1 mainītis faili ar 18 papildinājumiem un 0 dzēšanām
  1. 18 0
      feature/feature_utils.py

+ 18 - 0
feature/feature_utils.py

@@ -30,6 +30,24 @@ def f_get_bins(data: DataSplitEntity, feat: str, strategy: str='quantile', nbins
     # 可使用c.export()[feature]查看某一特征的分箱临界值
     return c
     '''
+
+# 此函数入参应为scorecardpy进行woebin函数转换后的dataframe
+def f_get_bins_display(bins_info: pd.DataFrame) -> pd.DataFrame:
+    df_list = []
+    for col, bin_data in bins_info.items():
+        tmp_df = pd.DataFrame(bin_data)
+        df_list.append(tmp_df)
+    result_df = pd.concat(df_list, ignore_index = True)
+    total_bad = result_df['bad'].sum()
+    total_cnt = result_df['count'].sum()
+    # 整体的坏样本率
+    br_overall = total_bad / total_cnt
+    result_df['lift'] = result_df['badprob'] / br_overall
+    result_df = result_df.sort_values(['total_iv', 'variable'], ascending=False).set_index(['variable','total_iv','bin'])\
+                    [['count_distr','count','good','bad','badprob','lift','bin_iv','woe']]
+    return result_df.style.format(subset=['count','good','bad'], precision=0).format(subset=['count_distr','bad','lift',
+                                    'badprob','woe','bin_iv'],precision=4).bar(subset=['badprob','bin_iv','lift'],color=['#d65f58','#5fbb7a'])
+
 def f_get_woe(data: DataSplitEntity, c: td.transform.Combiner, to_drop:list) -> pd.DataFrame:
     transer = td.transform.WOETransformer()
     # 根据训练数据来训练woe转换器,并选择目标变量和排除变量