Explorar el Código

modify: 优化模型结果报告

yq hace 4 meses
padre
commit
e548d75523

+ 2 - 2
feature/strategy_iv.py

@@ -347,7 +347,7 @@ class StrategyIv(FilterStrategyBase):
 
         metric_value_dict = {}
         # 样本分布
-        metric_value_dict["样本分布"] = MetricFucEntity(table=data.get_distribution(y_column), table_font_size=12,
+        metric_value_dict["样本分布"] = MetricFucEntity(table=data.get_distribution(y_column), table_font_size=10,
                                                     table_cell_width=3)
         # 变量iv及psi
         train_bins = self._f_get_bins_by_breaks(train_data, candidate_dict)
@@ -367,7 +367,7 @@ class StrategyIv(FilterStrategyBase):
             image_path_list = self._f_save_var_trend(test_bins, x_columns_candidate, "test")
             metric_value_dict["变量趋势-测试集"] = MetricFucEntity(image_path=image_path_list, image_size=4)
 
-        metric_value_dict["变量iv"] = MetricFucEntity(table=train_iv, table_font_size=12, table_cell_width=3)
+        metric_value_dict["变量iv"] = MetricFucEntity(table=train_iv, table_font_size=10, table_cell_width=3)
         # 变量趋势-训练集
         image_path_list = self._f_save_var_trend(train_bins, x_columns_candidate, "train")
         metric_value_dict["变量趋势-训练集"] = MetricFucEntity(image_path=image_path_list, image_size=4)

+ 8 - 8
model/model_lr.py

@@ -44,14 +44,14 @@ class ModelLr(ModelBase):
         card_df = pd.DataFrame(columns=card['basepoints'].keys())
         for k, v in card.items():
             card_df = pd.concat((card_df, v))
-        metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, table_font_size=12)
+        metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, table_font_size=10)
 
         # 模型系数
         coef = dict(zip(train_data.x_columns, self.lr.coef_.reshape(-1)))
         coef_df = pd.DataFrame()
         coef_df['变量'] = coef.keys()
         coef_df['变量系数'] = coef.values()
-        metric_value_dict["变量系数"] = MetricFucEntity(table=coef_df, table_font_size=12)
+        metric_value_dict["变量系数"] = MetricFucEntity(table=coef_df, table_font_size=10)
 
         # 模型ks auc
         train_prob = self.lr.predict_proba(train_data.get_Xdata())[:, 1]
@@ -61,8 +61,8 @@ class ModelLr(ModelBase):
         train_perf["pic"].savefig(path)
         image_path_list.append(path)
 
-        train_auc = train_perf["KS"]
-        train_ks = train_perf["AUC"]
+        train_auc = train_perf["AUC"]
+        train_ks = train_perf["KS"]
 
         test_auc = "-"
         test_ks = "-"
@@ -81,22 +81,22 @@ class ModelLr(ModelBase):
         df_auc["AUC"] = [train_auc, test_auc]
         df_auc["KS"] = [train_ks, test_ks]
         metric_value_dict["模型结果"] = MetricFucEntity(table=df_auc, image_path=image_path_list, image_size=5,
-                                                    table_font_size=12)
+                                                    table_font_size=10)
 
         # 评分卡分箱
         train_data_original, score_bins = f_get_model_score_bin(train_data_original, card)
         train_data_gain = f_calcu_model_ks(train_data_original, y_column, sort_ascending=True)
-        metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, table_font_size=12)
+        metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, table_font_size=9)
         if test_data is not None:
             test_data_original, bins = f_get_model_score_bin(test_data_original, card, score_bins)
             test_data_gain = f_calcu_model_ks(test_data_original, y_column, sort_ascending=True)
             metric_value_dict["测试集分数分箱"] = MetricFucEntity(table=test_data_gain,
-                                                           table_font_size=12)
+                                                           table_font_size=9)
 
         # 模型分psi
         model_psi = f_calcu_model_psi(train_data_original, test_data_original)
         metric_value_dict["模型稳定性"] = MetricFucEntity(value=model_psi["psi"].sum().round(4), table=model_psi,
-                                                     table_font_size=12)
+                                                     table_font_size=10)
         return metric_value_dict
 
     def predict_prob(self, x: pd.DataFrame, *args, **kwargs):

+ 30 - 36
monitor/report_generate.py

@@ -8,7 +8,6 @@ import os
 from typing import Dict
 
 import pandas as pd
-
 from docx import Document
 from docx.enum.table import WD_ALIGN_VERTICAL
 from docx.enum.text import WD_ALIGN_PARAGRAPH
@@ -26,26 +25,42 @@ class Report():
 
     @staticmethod
     def _set_cell_width(table, table_cell_width):
+        # 固定宽度
         for column in table.columns:
             if table_cell_width is not None:
                 column.width = Cm(table_cell_width)
                 continue
+        # 自动调整宽度
+        max_text_len_list = []
+        a4_width = 21 - 2  # * 3.18
+        for column in table.columns:
             max_text_len = 0
             for cell in column.cells:
-                max_text_len = len(cell.text) if len(cell.text) > max_text_len else max_text_len
-            if max_text_len >= 10:
-                column.width = Cm(2)
-            elif max_text_len >= 15:
-                column.width = Cm(2.5)
-            elif max_text_len >= 25:
-                column.width = Cm(3)
-            else:
-                column.width = Cm(1.5)
+                cell_text_len = Report._get_text_length(cell.text)
+                max_text_len = cell_text_len if cell_text_len > max_text_len else max_text_len
+            max_text_len_list.append(max_text_len)
+
+        # 按比例分配宽度
+        cell_width_unit = a4_width / sum(max_text_len_list)
+        cell_widths = [c * cell_width_unit for c in max_text_len_list]
+        min_cell_width = 1
+        # 限制最小宽度
+        adjusted_cell_widths = [max(c, min_cell_width) for c in cell_widths]
+        adjusted_width = sum(adjusted_cell_widths)
+        if adjusted_width > a4_width:
+            excess_width = adjusted_width - a4_width
+            excess_width_per_column = excess_width / len(table.columns)
+            # 减去多的宽度
+            adjusted_cell_widths = [max(min_cell_width, c - excess_width_per_column) for c in
+                                    adjusted_cell_widths]
+
+        for idx, column in enumerate(table.columns):
+            column.width = Cm(adjusted_cell_widths[idx])
 
     @staticmethod
     def _set_cell_format(cell, font_size=None):
         for paragraph in cell.paragraphs:
-            # paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
             for run in paragraph.runs:
                 # 判断文本是否包含中文
                 if any('\u4e00' <= char <= '\u9fff' for char in run.text):
@@ -126,7 +141,7 @@ class Report():
 
     @staticmethod
     def _get_text_length(text):
-        return sum(3 if '\u4e00' <= char <= '\u9fff' else 1 for char in text)
+        return sum(2 if '\u4e00' <= char <= '\u9fff' else 1 for char in text)
 
     @staticmethod
     def _fill_table_placeholder(doc: Document, metric_value_dict: Dict[str, MetricFucEntity]):
@@ -149,22 +164,6 @@ class Report():
                 table.alignment = WD_ALIGN_PARAGRAPH.CENTER
                 paragraph._element.addnext(table._element)
 
-                # 根据列名计算单元格宽度,对不符合最小宽度的情况,重新调整
-                # TODO:根据列名和内容综合调整单元格宽度
-                a4_width = 21 - 2 * 3.18
-                total_columns = metric_table.shape[1]
-                col_lengthes = [Report._get_text_length(c) for c in metric_table.columns]
-                cell_width_unit = a4_width / sum(col_lengthes)
-                cell_widths = [c * cell_width_unit for c in col_lengthes]
-                min_cell_width = 1
-                adjusted_cell_widths = [max(c, min_cell_width) for c in cell_widths]
-                adjusted_width = sum(adjusted_cell_widths)
-                if adjusted_width > a4_width:
-                    excess_width = adjusted_width - a4_width
-                    excess_width_per_column = excess_width / total_columns
-                    adjusted_cell_widths = [max(min_cell_width, c - excess_width_per_column) for c in
-                                            adjusted_cell_widths]
-
                 # 列名
                 for column_idx, column_name in enumerate(metric_table.columns):
                     cell = table.cell(0, column_idx)
@@ -172,7 +171,6 @@ class Report():
                     for run in cell.paragraphs[0].runs:
                         run.bold = True
                     Report._set_cell_format(cell, table_font_size)
-                    table.columns[column_idx].width = Cm(adjusted_cell_widths[column_idx])
                     # 合并相同的列名
                     if column_idx != 0 and BaseConfig.merge_table_column:
                         pre_cell = table.cell(0, column_idx - 1)
@@ -181,11 +179,7 @@ class Report():
                 for row_idx, row in metric_table.iterrows():
                     for column_idx, value in enumerate(row):
                         cell = table.cell(row_idx + 1, column_idx)
-                        if "率" in metric_table.columns[column_idx] or (
-                                "率" in str(row[0]) and pd.notna(value) and (column_idx != 0)):
-                            value = f"{float(value) * 100:.2f}%" if pd.notna(value) else '/'
-                        else:
-                            value = str(value) if pd.notna(value) else '/'
+                        value = str(value) if pd.notna(value) else '/'
                         cell.text = str(value)
                         Report._set_cell_format(cell, table_font_size)
                         # 合并第一行数据也为列的情况
@@ -193,10 +187,10 @@ class Report():
                             Report._merge_cell_column(table.cell(0, column_idx), cell, table_font_size,
                                                       table_cell_width)
 
-                # Report._set_cell_width(table, table_cell_width)
                 Report._set_table_singleBoard(table)
+                Report._set_cell_width(table, table_cell_width)
                 # 禁止自动调整表格
-                if len(metric_table.columns) <= 12 or not table_autofit:
+                if len(metric_table.columns) <= 20 and not table_autofit:
                     table.autofit = False
 
     @staticmethod

BIN
template/模型开发报告模板_lr.docx