Ver Fonte

modify: 优化模型结果报告

yq há 4 meses atrás
pai
commit
3600e5cf45

+ 3 - 2
commom/__init__.py

@@ -7,7 +7,8 @@
 from .logger import get_logger
 from .placeholder_func import f_fill_placeholder
 from .user_exceptions import GeneralException
-from .utils import f_get_clazz_in_module, f_clazz_to_json, f_get_date, f_get_datetime, f_save_train_df, f_format_float
+from .utils import f_get_clazz_in_module, f_clazz_to_json, f_get_date, f_get_datetime, f_save_train_df, f_format_float, \
+    f_df_to_image
 
 __all__ = ['f_get_clazz_in_module', 'f_clazz_to_json', 'GeneralException', 'get_logger', 'f_fill_placeholder',
-           'f_get_date', 'f_get_datetime', 'f_save_train_df', 'f_format_float']
+           'f_get_date', 'f_get_datetime', 'f_save_train_df', 'f_format_float', 'f_df_to_image']

+ 5 - 0
commom/utils.py

@@ -10,6 +10,7 @@ import inspect
 import os
 from json import JSONEncoder
 
+import dataframe_image as dfi
 import pandas as pd
 import pytz
 
@@ -46,6 +47,10 @@ def f_save_train_df(file_name: str, df: pd.DataFrame):
     df.to_excel(f"{file_path}.xlsx", index=False)
 
 
+def f_df_to_image(df, filename):
+    dfi.export(obj=df, filename=filename, fontsize=30, table_conversion='matplotlib')
+
+
 class f_clazz_to_json(JSONEncoder):
     def default(self, o):
         return o.__dict__

+ 6 - 0
init/__init__.py

@@ -7,11 +7,17 @@
 
 import os
 
+import matplotlib.pyplot as plt
+
 from commom import f_get_datetime
 from config import BaseConfig
 
 __all__ = ['f_get_save_path']
 
+# 设置支持中文的字体
+plt.rcParams['font.sans-serif'] = ['SimHei']  # 使用黑体
+plt.rcParams['axes.unicode_minus'] = False  # 解决负号显示问题
+
 save_path = os.path.join(BaseConfig.train_path, f"{f_get_datetime()}")
 os.makedirs(save_path, exist_ok=True)
 

+ 14 - 6
model/model_lr.py

@@ -10,6 +10,7 @@ import pandas as pd
 import scorecardpy as sc
 from sklearn.linear_model import LogisticRegression
 
+from commom import f_df_to_image
 from entitys import TrainConfigEntity, DataPreparedEntity, MetricFucEntity, DataSplitEntity
 from feature import f_calcu_model_ks, f_get_model_score_bin, f_calcu_model_psi
 from init import f_get_save_path
@@ -44,7 +45,9 @@ class ModelLr(ModelBase):
         card_df = pd.DataFrame(columns=card['basepoints'].keys())
         for k, v in card.items():
             card_df = pd.concat((card_df, v))
-        metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, table_font_size=10)
+        card_df_path = f_get_save_path(f"card_df.png")
+        f_df_to_image(card_df, card_df_path)
+        metric_value_dict["评分卡"] = MetricFucEntity(image_path=card_df_path)
 
         # 模型系数
         coef = dict(zip(train_data.x_columns, self.lr.coef_.reshape(-1)))
@@ -86,17 +89,22 @@ class ModelLr(ModelBase):
         # 评分卡分箱
         train_data_original, score_bins = f_get_model_score_bin(train_data_original, card)
         train_data_gain = f_calcu_model_ks(train_data_original, y_column, sort_ascending=True)
-        metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, table_font_size=9)
+        train_data_gain_path = f_get_save_path(f"train_data_gain.png")
+        f_df_to_image(train_data_gain, train_data_gain_path)
+        metric_value_dict["训练集分数分箱"] = MetricFucEntity(image_path=train_data_gain_path)
         if test_data is not None:
             test_data_original, bins = f_get_model_score_bin(test_data_original, card, score_bins)
             test_data_gain = f_calcu_model_ks(test_data_original, y_column, sort_ascending=True)
-            metric_value_dict["测试集分数分箱"] = MetricFucEntity(table=test_data_gain,
-                                                           table_font_size=9)
+            test_data_gain_path = f_get_save_path(f"test_data_gain.png")
+            f_df_to_image(test_data_gain, test_data_gain_path)
+            metric_value_dict["测试集分数分箱"] = MetricFucEntity(image_path=test_data_gain_path)
 
         # 模型分psi
         model_psi = f_calcu_model_psi(train_data_original, test_data_original)
-        metric_value_dict["模型稳定性"] = MetricFucEntity(value=model_psi["psi"].sum().round(4), table=model_psi,
-                                                     table_font_size=10)
+        model_psi_path = f_get_save_path(f"model_psi.png")
+        f_df_to_image(model_psi, model_psi_path)
+        metric_value_dict["模型稳定性"] = MetricFucEntity(value=model_psi["psi"].sum().round(4), image_path=model_psi_path)
+
         return metric_value_dict
 
     def predict_prob(self, x: pd.DataFrame, *args, **kwargs):

+ 5 - 5
monitor/report_generate.py

@@ -9,8 +9,8 @@ from typing import Dict
 
 import pandas as pd
 from docx import Document
-from docx.enum.table import WD_ALIGN_VERTICAL
-from docx.enum.text import WD_ALIGN_PARAGRAPH
+from docx.enum.table import WD_TABLE_ALIGNMENT, WD_CELL_VERTICAL_ALIGNMENT
+from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
 from docx.oxml import OxmlElement
 from docx.oxml.ns import qn
 from docx.shared import Inches, Cm, Pt
@@ -60,7 +60,7 @@ class Report():
     @staticmethod
     def _set_cell_format(cell, font_size=None):
         for paragraph in cell.paragraphs:
-            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
+            paragraph.alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
             for run in paragraph.runs:
                 # 判断文本是否包含中文
                 if any('\u4e00' <= char <= '\u9fff' for char in run.text):
@@ -69,7 +69,7 @@ class Report():
                     run.font.name = 'Times New Roman'  # 设置英文字体为Times New Roman
                 if font_size is not None:
                     run.font.size = Pt(font_size)
-        cell.vertical_alignment = WD_ALIGN_VERTICAL.CENTER
+        cell.vertical_alignment = WD_CELL_VERTICAL_ALIGNMENT.CENTER
 
     @staticmethod
     def _merge_cell_column(pre_cell, curr_cell, table_font_size, table_cell_width):
@@ -161,7 +161,7 @@ class Report():
                 for run in paragraph.runs:
                     run.text = run.text.replace(placeholder, "")
                 table = doc.add_table(rows=metric_table.shape[0] + 1, cols=metric_table.shape[1])
-                table.alignment = WD_ALIGN_PARAGRAPH.CENTER
+                table.alignment = WD_TABLE_ALIGNMENT.CENTER
                 paragraph._element.addnext(table._element)
 
                 # 列名

+ 1 - 0
requirements.txt

@@ -3,3 +3,4 @@ python-docx==0.8.11
 xlrd==1.2.0
 scorecardpy==0.1.9.7
 toad==0.0.64
+dataframe_image==0.1.14

BIN
template/模型开发报告模板_lr.docx