hace 7 meses · e548d75523
--- a/feature/strategy_iv.py
+++ b/feature/strategy_iv.py
@@ -347,7 +347,7 @@ class StrategyIv(FilterStrategyBase):
 
				 
			
 
				         metric_value_dict = {}
			
 
				         # 样本分布
			
 
				-        metric_value_dict["样本分布"] = MetricFucEntity(table=data.get_distribution(y_column), table_font_size=12,
			
 
				+        metric_value_dict["样本分布"] = MetricFucEntity(table=data.get_distribution(y_column), table_font_size=10,
			
 
				                                                     table_cell_width=3)
			
 
				         # 变量iv及psi
			
 
				         train_bins = self._f_get_bins_by_breaks(train_data, candidate_dict)
			
@@ -367,7 +367,7 @@ class StrategyIv(FilterStrategyBase):
 
				             image_path_list = self._f_save_var_trend(test_bins, x_columns_candidate, "test")
			
 
				             metric_value_dict["变量趋势-测试集"] = MetricFucEntity(image_path=image_path_list, image_size=4)
			
 
				 
			
 
				-        metric_value_dict["变量iv"] = MetricFucEntity(table=train_iv, table_font_size=12, table_cell_width=3)
			
 
				+        metric_value_dict["变量iv"] = MetricFucEntity(table=train_iv, table_font_size=10, table_cell_width=3)
			
 
				         # 变量趋势-训练集
			
 
				         image_path_list = self._f_save_var_trend(train_bins, x_columns_candidate, "train")
			
 
				         metric_value_dict["变量趋势-训练集"] = MetricFucEntity(image_path=image_path_list, image_size=4)
			
--- a/model/model_lr.py
+++ b/model/model_lr.py
@@ -44,14 +44,14 @@ class ModelLr(ModelBase):
 
				         card_df = pd.DataFrame(columns=card['basepoints'].keys())
			
 
				         for k, v in card.items():
			
 
				             card_df = pd.concat((card_df, v))
			
 
				-        metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, table_font_size=12)
			
 
				+        metric_value_dict["评分卡"] = MetricFucEntity(table=card_df, table_font_size=10)
			
 
				 
			
 
				         # 模型系数
			
 
				         coef = dict(zip(train_data.x_columns, self.lr.coef_.reshape(-1)))
			
 
				         coef_df = pd.DataFrame()
			
 
				         coef_df['变量'] = coef.keys()
			
 
				         coef_df['变量系数'] = coef.values()
			
 
				-        metric_value_dict["变量系数"] = MetricFucEntity(table=coef_df, table_font_size=12)
			
 
				+        metric_value_dict["变量系数"] = MetricFucEntity(table=coef_df, table_font_size=10)
			
 
				 
			
 
				         # 模型ks auc
			
 
				         train_prob = self.lr.predict_proba(train_data.get_Xdata())[:, 1]
			
@@ -61,8 +61,8 @@ class ModelLr(ModelBase):
 
				         train_perf["pic"].savefig(path)
			
 
				         image_path_list.append(path)
			
 
				 
			
 
				-        train_auc = train_perf["KS"]
			
 
				-        train_ks = train_perf["AUC"]
			
 
				+        train_auc = train_perf["AUC"]
			
 
				+        train_ks = train_perf["KS"]
			
 
				 
			
 
				         test_auc = "-"
			
 
				         test_ks = "-"
			
@@ -81,22 +81,22 @@ class ModelLr(ModelBase):
 
				         df_auc["AUC"] = [train_auc, test_auc]
			
 
				         df_auc["KS"] = [train_ks, test_ks]
			
 
				         metric_value_dict["模型结果"] = MetricFucEntity(table=df_auc, image_path=image_path_list, image_size=5,
			
 
				-                                                    table_font_size=12)
			
 
				+                                                    table_font_size=10)
			
 
				 
			
 
				         # 评分卡分箱
			
 
				         train_data_original, score_bins = f_get_model_score_bin(train_data_original, card)
			
 
				         train_data_gain = f_calcu_model_ks(train_data_original, y_column, sort_ascending=True)
			
 
				-        metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, table_font_size=12)
			
 
				+        metric_value_dict["训练集分数分箱"] = MetricFucEntity(table=train_data_gain, table_font_size=9)
			
 
				         if test_data is not None:
			
 
				             test_data_original, bins = f_get_model_score_bin(test_data_original, card, score_bins)
			
 
				             test_data_gain = f_calcu_model_ks(test_data_original, y_column, sort_ascending=True)
			
 
				             metric_value_dict["测试集分数分箱"] = MetricFucEntity(table=test_data_gain,
			
 
				-                                                           table_font_size=12)
			
 
				+                                                           table_font_size=9)
			
 
				 
			
 
				         # 模型分psi
			
 
				         model_psi = f_calcu_model_psi(train_data_original, test_data_original)
			
 
				         metric_value_dict["模型稳定性"] = MetricFucEntity(value=model_psi["psi"].sum().round(4), table=model_psi,
			
 
				-                                                     table_font_size=12)
			
 
				+                                                     table_font_size=10)
			
 
				         return metric_value_dict
			
 
				 
			
 
				     def predict_prob(self, x: pd.DataFrame, *args, **kwargs):
			
--- a/monitor/report_generate.py
+++ b/monitor/report_generate.py
@@ -8,7 +8,6 @@ import os
 
				 from typing import Dict
			
 
				 
			
 
				 import pandas as pd
			
 
				-
			
 
				 from docx import Document
			
 
				 from docx.enum.table import WD_ALIGN_VERTICAL
			
 
				 from docx.enum.text import WD_ALIGN_PARAGRAPH
			
@@ -26,26 +25,42 @@ class Report():
 
				 
			
 
				     @staticmethod
			
 
				     def _set_cell_width(table, table_cell_width):
			
 
				+        # 固定宽度
			
 
				         for column in table.columns:
			
 
				             if table_cell_width is not None:
			
 
				                 column.width = Cm(table_cell_width)
			
 
				                 continue
			
 
				+        # 自动调整宽度
			
 
				+        max_text_len_list = []
			
 
				+        a4_width = 21 - 2  # * 3.18
			
 
				+        for column in table.columns:
			
 
				             max_text_len = 0
			
 
				             for cell in column.cells:
			
 
				-                max_text_len = len(cell.text) if len(cell.text) > max_text_len else max_text_len
			
 
				-            if max_text_len >= 10:
			
 
				-                column.width = Cm(2)
			
 
				-            elif max_text_len >= 15:
			
 
				-                column.width = Cm(2.5)
			
 
				-            elif max_text_len >= 25:
			
 
				-                column.width = Cm(3)
			
 
				-            else:
			
 
				-                column.width = Cm(1.5)
			
 
				+                cell_text_len = Report._get_text_length(cell.text)
			
 
				+                max_text_len = cell_text_len if cell_text_len > max_text_len else max_text_len
			
 
				+            max_text_len_list.append(max_text_len)
			
 
				+
			
 
				+        # 按比例分配宽度
			
 
				+        cell_width_unit = a4_width / sum(max_text_len_list)
			
 
				+        cell_widths = [c * cell_width_unit for c in max_text_len_list]
			
 
				+        min_cell_width = 1
			
 
				+        # 限制最小宽度
			
 
				+        adjusted_cell_widths = [max(c, min_cell_width) for c in cell_widths]
			
 
				+        adjusted_width = sum(adjusted_cell_widths)
			
 
				+        if adjusted_width > a4_width:
			
 
				+            excess_width = adjusted_width - a4_width
			
 
				+            excess_width_per_column = excess_width / len(table.columns)
			
 
				+            # 减去多的宽度
			
 
				+            adjusted_cell_widths = [max(min_cell_width, c - excess_width_per_column) for c in
			
 
				+                                    adjusted_cell_widths]
			
 
				+
			
 
				+        for idx, column in enumerate(table.columns):
			
 
				+            column.width = Cm(adjusted_cell_widths[idx])
			
 
				 
			
 
				     @staticmethod
			
 
				     def _set_cell_format(cell, font_size=None):
			
 
				         for paragraph in cell.paragraphs:
			
 
				-            # paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
			
 
				+            paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
			
 
				             for run in paragraph.runs:
			
 
				                 # 判断文本是否包含中文
			
 
				                 if any('\u4e00' <= char <= '\u9fff' for char in run.text):
			
@@ -126,7 +141,7 @@ class Report():
 
				 
			
 
				     @staticmethod
			
 
				     def _get_text_length(text):
			
 
				-        return sum(3 if '\u4e00' <= char <= '\u9fff' else 1 for char in text)
			
 
				+        return sum(2 if '\u4e00' <= char <= '\u9fff' else 1 for char in text)
			
 
				 
			
 
				     @staticmethod
			
 
				     def _fill_table_placeholder(doc: Document, metric_value_dict: Dict[str, MetricFucEntity]):
			
@@ -149,22 +164,6 @@ class Report():
 
				                 table.alignment = WD_ALIGN_PARAGRAPH.CENTER
			
 
				                 paragraph._element.addnext(table._element)
			
 
				 
			
 
				-                # 根据列名计算单元格宽度，对不符合最小宽度的情况，重新调整
			
 
				-                # TODO：根据列名和内容综合调整单元格宽度
			
 
				-                a4_width = 21 - 2 * 3.18
			
 
				-                total_columns = metric_table.shape[1]
			
 
				-                col_lengthes = [Report._get_text_length(c) for c in metric_table.columns]
			
 
				-                cell_width_unit = a4_width / sum(col_lengthes)
			
 
				-                cell_widths = [c * cell_width_unit for c in col_lengthes]
			
 
				-                min_cell_width = 1
			
 
				-                adjusted_cell_widths = [max(c, min_cell_width) for c in cell_widths]
			
 
				-                adjusted_width = sum(adjusted_cell_widths)
			
 
				-                if adjusted_width > a4_width:
			
 
				-                    excess_width = adjusted_width - a4_width
			
 
				-                    excess_width_per_column = excess_width / total_columns
			
 
				-                    adjusted_cell_widths = [max(min_cell_width, c - excess_width_per_column) for c in
			
 
				-                                            adjusted_cell_widths]
			
 
				-
			
 
				                 # 列名
			
 
				                 for column_idx, column_name in enumerate(metric_table.columns):
			
 
				                     cell = table.cell(0, column_idx)
			
@@ -172,7 +171,6 @@ class Report():
 
				                     for run in cell.paragraphs[0].runs:
			
 
				                         run.bold = True
			
 
				                     Report._set_cell_format(cell, table_font_size)
			
 
				-                    table.columns[column_idx].width = Cm(adjusted_cell_widths[column_idx])
			
 
				                     # 合并相同的列名
			
 
				                     if column_idx != 0 and BaseConfig.merge_table_column:
			
 
				                         pre_cell = table.cell(0, column_idx - 1)
			
@@ -181,11 +179,7 @@ class Report():
 
				                 for row_idx, row in metric_table.iterrows():
			
 
				                     for column_idx, value in enumerate(row):
			
 
				                         cell = table.cell(row_idx + 1, column_idx)
			
 
				-                        if "率" in metric_table.columns[column_idx] or (
			
 
				-                                "率" in str(row[0]) and pd.notna(value) and (column_idx != 0)):
			
 
				-                            value = f"{float(value) * 100:.2f}%" if pd.notna(value) else '/'
			
 
				-                        else:
			
 
				-                            value = str(value) if pd.notna(value) else '/'
			
 
				+                        value = str(value) if pd.notna(value) else '/'
			
 
				                         cell.text = str(value)
			
 
				                         Report._set_cell_format(cell, table_font_size)
			
 
				                         # 合并第一行数据也为列的情况
			
@@ -193,10 +187,10 @@ class Report():
 
				                             Report._merge_cell_column(table.cell(0, column_idx), cell, table_font_size,
			
 
				                                                       table_cell_width)
			
 
				 
			
 
				-                # Report._set_cell_width(table, table_cell_width)
			
 
				                 Report._set_table_singleBoard(table)
			
 
				+                Report._set_cell_width(table, table_cell_width)
			
 
				                 # 禁止自动调整表格
			
 
				-                if len(metric_table.columns) <= 12 or not table_autofit:
			
 
				+                if len(metric_table.columns) <= 20 and not table_autofit:
			
 
				                     table.autofit = False
			
 
				 
			
 
				     @staticmethod
			
--- a/template/模型开发报告模板_lr.docx
+++ b/template/模型开发报告模板_lr.docx