|
@@ -59,23 +59,34 @@ def f_bins_filter(bins: pd.DataFrame, cols: list) -> list:
|
|
|
if tmp_len <= 2:
|
|
|
result_cols.append(tmp_col)
|
|
|
else:
|
|
|
- start_tr = tmp_br[1] - tmp_br[0]
|
|
|
- pos_neg_flag = 0
|
|
|
- for i in range(2,tmp_len):
|
|
|
- tmp_tr = tmp_br[i] - tmp_br[i-1]
|
|
|
- # 后一位bad_rate减前一位bad_rate,保证bad_rate的单调性
|
|
|
- # 记录符号变化, 允许 最多一次符号变化,即U型分布
|
|
|
- if (tmp_tr >= 0 and start_tr >= 0) or (tmp_tr <= 0 and start_tr <= 0):
|
|
|
- # 满足趋势保持,查看下一位
|
|
|
- continue
|
|
|
- else:
|
|
|
- # 记录一次符号变化
|
|
|
- pos_neg_flag += 1
|
|
|
- # 记录满足趋势要求的变量
|
|
|
- if pos_neg_flag <= 1:
|
|
|
+ tmp_judge = f_judge_monto(tmp_br)
|
|
|
+ # f_judge_monto 函数返回1表示list单调,0表示非单调
|
|
|
+ if tmp_judge:
|
|
|
result_cols.append(tmp_col)
|
|
|
return result_cols
|
|
|
|
|
|
+# 此函数判断list的单调性,允许至多一次符号变化,即U型分布
|
|
|
+def f_judge_monto(bd_list: list) -> int:
|
|
|
+ start_tr = bd_list[1] - bd_list[0]
|
|
|
+ tmp_len = len(bd_list)
|
|
|
+ pos_neg_flag = 0
|
|
|
+ for i in range(2, tmp_len):
|
|
|
+ tmp_tr = bd_list[i] - bd_list[i - 1]
|
|
|
+ # 后一位bad_rate减前一位bad_rate,保证bad_rate的单调性
|
|
|
+ # 记录符号变化, 允许 最多一次符号变化,即U型分布
|
|
|
+ if (tmp_tr >= 0 and start_tr >= 0) or (tmp_tr <= 0 and start_tr <= 0):
|
|
|
+ # 满足趋势保持,查看下一位
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ # 记录一次符号变化
|
|
|
+ pos_neg_flag += 1
|
|
|
+ # 记录满足趋势要求的变量
|
|
|
+ if pos_neg_flag <= 1:
|
|
|
+ # 1 表示单调
|
|
|
+ return 1
|
|
|
+ # 0 表示非单调
|
|
|
+ return 0
|
|
|
+
|
|
|
def f_get_woe(data: DataSplitEntity, c: td.transform.Combiner, to_drop:list) -> pd.DataFrame:
|
|
|
transer = td.transform.WOETransformer()
|
|
|
# 根据训练数据来训练woe转换器,并选择目标变量和排除变量
|