|
@@ -30,6 +30,24 @@ def f_get_bins(data: DataSplitEntity, feat: str, strategy: str='quantile', nbins
|
|
|
# 可使用c.export()[feature]查看某一特征的分箱临界值
|
|
|
return c
|
|
|
'''
|
|
|
+
|
|
|
+# 此函数入参应为scorecardpy进行woebin函数转换后的dataframe
|
|
|
+def f_get_bins_display(bins_info: pd.DataFrame) -> pd.DataFrame:
|
|
|
+ df_list = []
|
|
|
+ for col, bin_data in bins_info.items():
|
|
|
+ tmp_df = pd.DataFrame(bin_data)
|
|
|
+ df_list.append(tmp_df)
|
|
|
+ result_df = pd.concat(df_list, ignore_index = True)
|
|
|
+ total_bad = result_df['bad'].sum()
|
|
|
+ total_cnt = result_df['count'].sum()
|
|
|
+ # 整体的坏样本率
|
|
|
+ br_overall = total_bad / total_cnt
|
|
|
+ result_df['lift'] = result_df['badprob'] / br_overall
|
|
|
+ result_df = result_df.sort_values(['total_iv', 'variable'], ascending=False).set_index(['variable','total_iv','bin'])\
|
|
|
+ [['count_distr','count','good','bad','badprob','lift','bin_iv','woe']]
|
|
|
+ return result_df.style.format(subset=['count','good','bad'], precision=0).format(subset=['count_distr','bad','lift',
|
|
|
+ 'badprob','woe','bin_iv'],precision=4).bar(subset=['badprob','bin_iv','lift'],color=['#d65f58','#5fbb7a'])
|
|
|
+
|
|
|
def f_get_woe(data: DataSplitEntity, c: td.transform.Combiner, to_drop:list) -> pd.DataFrame:
|
|
|
transer = td.transform.WOETransformer()
|
|
|
# 根据训练数据来训练woe转换器,并选择目标变量和排除变量
|