Sfoglia il codice sorgente

modify: 修改模型报表

yq 3 giorni fa
parent
commit
922de49104
3 ha cambiato i file con 21 aggiunte e 17 eliminazioni
  1. 1 0
      model/model_lr.py
  2. 19 17
      model/model_utils.py
  3. 1 0
      online_learning/trainer_lr.py

+ 1 - 0
model/model_lr.py

@@ -202,6 +202,7 @@ class ModelLr(ModelBase):
         metric_value_dict = {}
         # 评分卡
         df_card = pd.concat(self.card.values())
+        df_card.reset_index(drop=True, inplace=True)
         img_path_card = self.ml_config.f_get_save_path(f"card.png")
         f_df_to_image(df_card, img_path_card)
         metric_value_dict["评分卡"] = MetricFucResultEntity(table=df_card, image_path=img_path_card)

+ 19 - 17
model/model_utils.py

@@ -16,19 +16,21 @@ def f_calcu_model_ks(data, y_column, sort_ascending):
         ascending=sort_ascending)
     var_ks.columns = ['样本数', '坏样本数']
     var_ks['好样本数'] = var_ks['样本数'] - var_ks['坏样本数']
-    var_ks['样本比'] = (var_ks['样本数'] / var_ks['样本数']).round(3)
-    var_ks['样本数比例'] = (var_ks['样本数'] / var_ks['样本数'].sum()).round(3)
-    var_ks['总坏样本数'] = var_ks['坏样本数'].sum()
-    var_ks['总好样本数'] = var_ks['好样本数'].sum()
-    var_ks['平均坏样本率'] = (var_ks['总坏样本数'] / var_ks['样本数'].sum()).round(3)
+    var_ks['样本数占比'] = (var_ks['样本数'] / var_ks['样本数'].sum()).round(3)
+    var_ks['坏样本率'] = (var_ks['坏样本数'] / var_ks['样本数']).round(3)
+    var_ks['平均坏样本率'] = (var_ks['坏样本数'].sum() / var_ks['样本数'].sum()).round(3)
+    var_ks['坏样本占所有坏样本的比例'] = (var_ks['坏样本数'] / var_ks['坏样本数'].sum()).round(3)
+    var_ks['好样本占所有好样本的比例'] = (var_ks['好样本数'] / var_ks['好样本数'].sum()).round(3)
     var_ks['累计坏样本数'] = var_ks['坏样本数'].cumsum()
     var_ks['累计好样本数'] = var_ks['好样本数'].cumsum()
     var_ks['累计样本数'] = var_ks['样本数'].cumsum()
-    var_ks['累计坏样本比'] = (var_ks['累计坏样本数'] / var_ks['坏样本数']).round(3)
-    var_ks['累计好样本比'] = (var_ks['累计好样本数'] / var_ks['好样本数']).round(3)
-    var_ks['KS'] = (var_ks['累计坏样本比'] - var_ks['累计好样本比']).round(3)
+    var_ks['累计坏样本比'] = (var_ks['累计坏样本数'] / var_ks['坏样本数'].sum()).round(3)
+    var_ks['累计好样本比'] = (var_ks['累计好样本数'] / var_ks['好样本数'].sum()).round(3)
+    var_ks['KS'] = (var_ks['累计坏样本比'] - var_ks['累计好样本比']).round(3)
     var_ks['LIFT'] = ((var_ks['累计坏样本数'] / var_ks['累计样本数']) / var_ks['平均坏样本率']).round(3)
-    return var_ks.reset_index()
+    var_ks = var_ks.reset_index()
+    return var_ks[[ConstantEnum.SCORE_BIN.value, "样本数", "样本数占比", "好样本数", "好样本占所有好样本的比例",
+                   "累计好样本占比", "坏样本数", "坏样本占所有坏样本的比例", "累计坏样本占比", "坏样本率", "KS", "LIFT"]]
 
 
 def f_get_model_score_bin(df, score, bins=None):
@@ -47,18 +49,18 @@ def f_get_model_score_bin(df, score, bins=None):
 def f_calcu_model_psi(df_train, df_test, sort_ascending=True):
     tmp1 = df_train.groupby(ConstantEnum.SCORE_BIN.value)[ConstantEnum.SCORE_BIN.value].agg(['count']).sort_index(
         ascending=sort_ascending)
-    tmp1['样本数比'] = (tmp1['count'] / tmp1['count'].sum()).round(3)
+    tmp1['样本数比'] = (tmp1['count'] / tmp1['count'].sum()).round(3)
     tmp2 = df_test.groupby(ConstantEnum.SCORE_BIN.value)[ConstantEnum.SCORE_BIN.value].agg(['count']).sort_index(
         ascending=sort_ascending)
-    tmp2['样本数比'] = (tmp2['count'] / tmp2['count'].sum()).round(3)
-    psi = ((tmp1['样本数比'] - tmp2['样本数比']) * np.log(tmp1['样本数比'] / tmp2['样本数比'])).round(3)
+    tmp2['样本数比'] = (tmp2['count'] / tmp2['count'].sum()).round(3)
+    psi = ((tmp1['样本数比'] - tmp2['样本数比']) * np.log(tmp1['样本数比'] / tmp2['样本数比'])).round(3)
     psi = psi.reset_index()
-    psi = psi.rename(columns={"样本数比": "psi"})
+    psi = psi.rename(columns={"样本数比": "psi"})
     psi['训练样本数'] = list(tmp1['count'])
-    psi['测试样本数'] = list(tmp2['count'])
-    psi['训练样本数比例'] = list(tmp1['样本数比例'])
-    psi['测试样本数比例'] = list(tmp2['样本数比例'])
-    return psi
+    psi['验证样本数'] = list(tmp2['count'])
+    psi['训练样本占比'] = list(tmp1['样本数占比'])
+    psi['验证样本占比'] = list(tmp2['样本数占比'])
+    return psi[[ConstantEnum.SCORE_BIN.value, "训练样本数", "训练样本占比", "验证样本数", "验证样本占比", "psi"]]
 
 
 def f_stress_test(df: pd.DataFrame, sample_times: int, bad_rate_list: list, target_column: str, score_column: str,

+ 1 - 0
online_learning/trainer_lr.py

@@ -337,6 +337,7 @@ class OnlineLearningTrainerLr:
         if not self.card_cfg is None:
             self._f_get_scorecard()
             df_card = pd.concat(self.card.values())
+            df_card.reset_index(drop=True, inplace=True)
             img_path_card = self._ol_config.f_get_save_path(f"card.png")
             f_df_to_image(df_card, img_path_card)
             metric_value_dict["评分卡"] = MetricFucResultEntity(table=df_card, image_path=img_path_card)