4 ماه پیش · 3f64a41d83
--- a/config/monitor_config_template_excel.json
+++ b/config/monitor_config_template_excel.json
@@ -0,0 +1,25 @@
 
				+{
			
 
				+  "template_path": "./cache/后评估报表模板.xlsx",
			
 
				+  "metric_config_list": [
			
 
				+    {
			
 
				+      "metric_code": "t1",
			
 
				+      "metric_func": "MetricBySqlGeneral",
			
 
				+      "sql": "select * from test.t1"
			
 
				+    },
			
 
				+        {
			
 
				+      "metric_code": "apply_num",
			
 
				+      "metric_func": "BMetric",
			
 
				+      "v": "1"
			
 
				+    },
			
 
				+        {
			
 
				+      "metric_code": "auto_pass_num",
			
 
				+      "metric_func": "BMetric",
			
 
				+      "v": "2"
			
 
				+    },
			
 
				+        {
			
 
				+      "metric_code": "auto_pass_per",
			
 
				+      "metric_func": "BMetric",
			
 
				+      "v": "3"
			
 
				+    }
			
 
				+  ]
			
 
				+}
			
--- a/config/monitor_config_template_word.json
+++ b/config/monitor_config_template_word.json
--- a/easy_ml_demo.ipynb
+++ b/easy_ml_demo.ipynb
--- a/metric_test2.py
+++ b/metric_test2.py
@@ -45,10 +45,10 @@ if __name__ == "__main__":
 
				     f_register_metric_func(BMetric)
			
 
				     data_loader = DataLoaderExcel()
			
 
				 
			
 
				-    a = data_loader.get_data("cache/报表自动化需求-2411.xlsx")
			
 
				-    a.writr("cache/a.xlsx")
			
 
				+    a = data_loader.get_data("cache/t1.xlsx")
			
 
				+    # a.writr("cache/a.xlsx")
			
 
				 
			
 
				 
			
 
				-    monitor_metric = MonitorMetric("./cache/model_feature_strategy1.json")
			
 
				+    monitor_metric = MonitorMetric("./cache/model_monitor_config1.json")
			
 
				     monitor_metric.calculate_metric(data_loader=data_loader)
			
 
				     monitor_metric.generate_report()
			
--- a/metric_test3.py
+++ b/metric_test3.py
@@ -0,0 +1,32 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+"""
			
 
				+@author: yq
			
 
				+@time: 2024/11/1
			
 
				+@desc: 
			
 
				+"""
			
 
				+
			
 
				+from data import DataLoaderMysql
			
 
				+from entitys import MetricFucResultEntity,DbConfigEntity
			
 
				+from metrics import MetricBase, f_register_metric_func
			
 
				+from monitor import MonitorMetric
			
 
				+
			
 
				+
			
 
				+class BMetric(MetricBase):
			
 
				+
			
 
				+    def __init__(self, v: str, *args, **kwargs):
			
 
				+        super().__init__(*args, **kwargs)
			
 
				+        self._v = v
			
 
				+
			
 
				+    def calculate(self, *args, **kwargs) -> MetricFucResultEntity:
			
 
				+        if ".png" in self._v:
			
 
				+            return MetricFucResultEntity(image_path=self._v)
			
 
				+        else:
			
 
				+            return MetricFucResultEntity(value=self._v)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    f_register_metric_func(BMetric)
			
 
				+    data_loader = DataLoaderMysql(DbConfigEntity.from_config("./config/mysql_config.json"))
			
 
				+    monitor_metric = MonitorMetric("./config/monitor_config_template_excel.json")
			
 
				+    monitor_metric.calculate_metric(data_loader=data_loader)
			
 
				+    monitor_metric.generate_report()
			
--- a/monitor/__init__.py
+++ b/monitor/__init__.py
@@ -4,10 +4,10 @@
 
				 @time: 2022/10/24
			
 
				 @desc: 指标监控
			
 
				 """
			
 
				-
			
 
				+from monitor.report_generate import ReportWord
			
 
				 from .monitor_metric import MonitorMetric
			
 
				 
			
 
				-__all__ = ['MonitorMetric']
			
 
				+__all__ = ['MonitorMetric','ReportWord']
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				     pass
			
--- a/monitor/monitor_metric.py
+++ b/monitor/monitor_metric.py
@@ -7,8 +7,10 @@
 
				 import threading
			
 
				 from typing import Dict
			
 
				 
			
 
				+from commom import GeneralException
			
 
				 from entitys import MonitorConfigEntity, MetricFucResultEntity
			
 
				-from .report_generate import Report
			
 
				+from enums import ResultCodesEnum
			
 
				+from .report_generate import ReportWord, ReportExcel
			
 
				 
			
 
				 
			
 
				 class MonitorMetric():
			
@@ -34,7 +36,13 @@ class MonitorMetric():
 
				             self._update_metric_value_dict(metric_code, metric_value)
			
 
				 
			
 
				     def generate_report(self):
			
 
				-        Report.generate_report(self._metric_value_dict, self._monitor_config.template_path)
			
 
				+        if ".docx" in self._monitor_config.template_path:
			
 
				+            ReportWord.generate_report(self._metric_value_dict, self._monitor_config.template_path)
			
 
				+        elif ".xlsx" in self._monitor_config.template_path:
			
 
				+            ReportExcel.generate_report(self._metric_value_dict, self._monitor_config.template_path)
			
 
				+        else:
			
 
				+            raise GeneralException(ResultCodesEnum.NOT_FOUND,
			
 
				+                                   message=f"模板【{self._monitor_config.template_path}】不能处理，请使用【.docx】或【.xlsx】模板")
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
--- a/monitor/report_generate.py
+++ b/monitor/report_generate.py
@@ -7,6 +7,7 @@
 
				 import os
			
 
				 from typing import Dict
			
 
				 
			
 
				+import openpyxl
			
 
				 import pandas as pd
			
 
				 from docx import Document
			
 
				 from docx.enum.table import WD_TABLE_ALIGNMENT, WD_CELL_VERTICAL_ALIGNMENT
			
@@ -14,6 +15,7 @@ from docx.enum.text import WD_PARAGRAPH_ALIGNMENT
 
				 from docx.oxml import OxmlElement
			
 
				 from docx.oxml.ns import qn
			
 
				 from docx.shared import Inches, Cm, Pt
			
 
				+from openpyxl.worksheet.worksheet import Worksheet
			
 
				 
			
 
				 from commom import GeneralException, f_get_datetime
			
 
				 from config import BaseConfig
			
@@ -21,7 +23,7 @@ from entitys import MetricFucResultEntity
 
				 from enums import ResultCodesEnum, PlaceholderPrefixEnum
			
 
				 
			
 
				 
			
 
				-class Report():
			
 
				+class ReportWord():
			
 
				 
			
 
				     @staticmethod
			
 
				     def _set_cell_width(table, table_cell_width):
			
@@ -36,7 +38,7 @@ class Report():
 
				         for column in table.columns:
			
 
				             max_text_len = 0
			
 
				             for cell in column.cells:
			
 
				-                cell_text_len = Report._get_text_length(cell.text)
			
 
				+                cell_text_len = ReportWord._get_text_length(cell.text)
			
 
				                 max_text_len = cell_text_len if cell_text_len > max_text_len else max_text_len
			
 
				             max_text_len_list.append(max_text_len)
			
 
				 
			
@@ -83,7 +85,7 @@ class Report():
 
				             pre_cell.text = column_name
			
 
				             for run in pre_cell.paragraphs[0].runs:
			
 
				                 run.bold = True
			
 
				-            Report._set_cell_format(pre_cell, table_font_size)
			
 
				+            ReportWord._set_cell_format(pre_cell, table_font_size)
			
 
				 
			
 
				     @staticmethod
			
 
				     def _set_table_singleBoard(table):
			
@@ -132,7 +134,7 @@ class Report():
 
				         for paragraph in doc.paragraphs:
			
 
				             text = paragraph.text
			
 
				             for metric_code, metric_fuc_entity in metric_value_dict.items():
			
 
				-                placeholder = Report._get_placeholder(PlaceholderPrefixEnum.VALUE, metric_code)
			
 
				+                placeholder = ReportWord._get_placeholder(PlaceholderPrefixEnum.VALUE, metric_code)
			
 
				                 metric_value = metric_fuc_entity.value
			
 
				                 if metric_value is None:
			
 
				                     continue
			
@@ -152,7 +154,7 @@ class Report():
 
				         # 替换表格
			
 
				         for paragraph in doc.paragraphs:
			
 
				             for metric_code, metric_fuc_entity in metric_value_dict.items():
			
 
				-                placeholder = Report._get_placeholder(PlaceholderPrefixEnum.TABLE, metric_code)
			
 
				+                placeholder = ReportWord._get_placeholder(PlaceholderPrefixEnum.TABLE, metric_code)
			
 
				                 metric_table = metric_fuc_entity.table
			
 
				                 table_font_size = metric_fuc_entity.table_font_size
			
 
				                 table_autofit = metric_fuc_entity.table_autofit
			
@@ -174,25 +176,25 @@ class Report():
 
				                     cell.text = str(column_name)
			
 
				                     for run in cell.paragraphs[0].runs:
			
 
				                         run.bold = True
			
 
				-                    Report._set_cell_format(cell, table_font_size)
			
 
				+                    ReportWord._set_cell_format(cell, table_font_size)
			
 
				                     # 合并相同的列名
			
 
				                     if column_idx != 0 and BaseConfig.merge_table_column:
			
 
				                         pre_cell = table.cell(0, column_idx - 1)
			
 
				-                        Report._merge_cell_column(pre_cell, cell, table_font_size, table_cell_width)
			
 
				+                        ReportWord._merge_cell_column(pre_cell, cell, table_font_size, table_cell_width)
			
 
				                 # 值
			
 
				                 for row_idx, row in metric_table.iterrows():
			
 
				                     for column_idx, value in enumerate(row):
			
 
				                         cell = table.cell(row_idx + 1, column_idx)
			
 
				                         value = str(value) if pd.notna(value) else '/'
			
 
				                         cell.text = str(value)
			
 
				-                        Report._set_cell_format(cell, table_font_size)
			
 
				+                        ReportWord._set_cell_format(cell, table_font_size)
			
 
				                         # 合并第一行数据也为列的情况
			
 
				                         if row_idx == 0:
			
 
				-                            Report._merge_cell_column(table.cell(0, column_idx), cell, table_font_size,
			
 
				-                                                      table_cell_width)
			
 
				+                            ReportWord._merge_cell_column(table.cell(0, column_idx), cell, table_font_size,
			
 
				+                                                          table_cell_width)
			
 
				 
			
 
				-                Report._set_table_singleBoard(table)
			
 
				-                Report._set_cell_width(table, table_cell_width)
			
 
				+                ReportWord._set_table_singleBoard(table)
			
 
				+                ReportWord._set_cell_width(table, table_cell_width)
			
 
				                 # 禁止自动调整表格
			
 
				                 if len(metric_table.columns) <= 20 and not table_autofit:
			
 
				                     table.autofit = False
			
@@ -202,7 +204,7 @@ class Report():
 
				         # 替换图片
			
 
				         for paragraph in doc.paragraphs:
			
 
				             for metric_code, metric_fuc_entity in metric_value_dict.items():
			
 
				-                placeholder = Report._get_placeholder(PlaceholderPrefixEnum.IMAGE, metric_code)
			
 
				+                placeholder = ReportWord._get_placeholder(PlaceholderPrefixEnum.IMAGE, metric_code)
			
 
				                 image_path = metric_fuc_entity.image_path
			
 
				                 image_size = metric_fuc_entity.image_size
			
 
				                 if image_path is None:
			
@@ -229,14 +231,77 @@ class Report():
 
				         else:
			
 
				             raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"监控模板文件【{template_path}】不存在")
			
 
				 
			
 
				-        Report._fill_value_placeholder(doc, metric_value_dict)
			
 
				-        Report._fill_table_placeholder(doc, metric_value_dict)
			
 
				-        Report._fill_image_placeholder(doc, metric_value_dict)
			
 
				+        ReportWord._fill_value_placeholder(doc, metric_value_dict)
			
 
				+        ReportWord._fill_table_placeholder(doc, metric_value_dict)
			
 
				+        ReportWord._fill_image_placeholder(doc, metric_value_dict)
			
 
				         new_path = template_path.replace(".docx", f"{f_get_datetime()}.docx")
			
 
				         if save_path is not None:
			
 
				             new_path = save_path
			
 
				         doc.save(f"./{new_path}")
			
 
				 
			
 
				 
			
 
				+class ReportExcel():
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _fill_value_placeholder(worksheet: Worksheet, metric_value_dict: Dict[str, MetricFucResultEntity]):
			
 
				+        # 替换指标,检查每个单元格并替换
			
 
				+        for metric_code, metric_fuc_entity in metric_value_dict.items():
			
 
				+            metric_value = metric_fuc_entity.value
			
 
				+            if metric_value is None:
			
 
				+                continue
			
 
				+            placeholder = ReportWord._get_placeholder(PlaceholderPrefixEnum.VALUE, metric_code)
			
 
				+            for row in worksheet.rows:
			
 
				+                for cell in row:
			
 
				+                    if placeholder in str(cell.value):
			
 
				+                        cell.value = str(cell.value).replace(placeholder, str(metric_value))
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def _fill_table_placeholder(worksheet: Worksheet, metric_value_dict: Dict[str, MetricFucResultEntity]):
			
 
				+        # 替换表格
			
 
				+        for metric_code, metric_fuc_entity in metric_value_dict.items():
			
 
				+            metric_table = metric_fuc_entity.table
			
 
				+            if metric_table is None:
			
 
				+                continue
			
 
				+            placeholder = ReportWord._get_placeholder(PlaceholderPrefixEnum.TABLE, metric_code)
			
 
				+            # 定位占位符位置
			
 
				+            start_row = 1
			
 
				+            start_col = 1
			
 
				+            end_flag = False
			
 
				+            for row in worksheet.rows:
			
 
				+                start_col = 1
			
 
				+                for cell in row:
			
 
				+                    if placeholder in str(cell.value):
			
 
				+                        end_flag = True
			
 
				+                        break
			
 
				+                    start_col += 1
			
 
				+                if end_flag:
			
 
				+                    break
			
 
				+                start_row += 1
			
 
				+            # 无占位符则跳过
			
 
				+            if not end_flag:
			
 
				+                continue
			
 
				+
			
 
				+            for row_idx, row in metric_table.iterrows():
			
 
				+                for column_idx, value in enumerate(row):
			
 
				+                    worksheet.cell(row=start_row + row_idx, column=start_col + column_idx, value=str(value))
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def generate_report(metric_value_dict: Dict[str, MetricFucResultEntity], template_path: str, save_path=None):
			
 
				+        if os.path.exists(template_path):
			
 
				+            workbook = openpyxl.load_workbook(template_path)
			
 
				+            sheet_names = workbook.sheetnames
			
 
				+            worksheet = workbook[sheet_names[0]]
			
 
				+
			
 
				+        else:
			
 
				+            raise GeneralException(ResultCodesEnum.NOT_FOUND, message=f"监控模板文件【{template_path}】不存在")
			
 
				+
			
 
				+        ReportExcel._fill_value_placeholder(worksheet, metric_value_dict)
			
 
				+        ReportExcel._fill_table_placeholder(worksheet, metric_value_dict)
			
 
				+        new_path = template_path.replace(".xlsx", f"{f_get_datetime()}.xlsx")
			
 
				+        if save_path is not None:
			
 
				+            new_path = save_path
			
 
				+        workbook.save(f"./{new_path}")
			
 
				+
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				     pass
			
--- a/pipeline/pipeline.py
+++ b/pipeline/pipeline.py
@@ -12,7 +12,7 @@ from feature.feature_strategy_base import FeatureStrategyBase
 
				 from init import init
			
 
				 from model import ModelBase
			
 
				 from model import ModelFactory
			
 
				-from monitor.report_generate import Report
			
 
				+from monitor import ReportWord
			
 
				 
			
 
				 init()
			
 
				 
			
@@ -57,7 +57,7 @@ class Pipeline():
 
				 
			
 
				     def report(self, ):
			
 
				         save_path = self._ml_config.f_get_save_path("模型报告.docx")
			
 
				-        Report.generate_report(self.metric_value_dict, self._model.get_report_template_path(), save_path=save_path)
			
 
				+        ReportWord.generate_report(self.metric_value_dict, self._model.get_report_template_path(), save_path=save_path)
			
 
				         print(f"模型报告文件储存路径:{save_path}")
			
 
				 
			
 
				     def save(self):
			
--- a/train_test.py
+++ b/train_test.py
@@ -32,9 +32,9 @@ if __name__ == "__main__":
 
				     cfg = {
			
 
				         "project_name": "demo",
			
 
				         # jupyter下输出内容
			
 
				-        "jupyter_print": True,
			
 
				+        "jupyter_print": False,
			
 
				         # 是否开启粗分箱
			
 
				-        "format_bin": False,
			
 
				+        "format_bin": True,
			
 
				         # 变量切分点搜索采样率
			
 
				         "bin_sample_rate": 0.01,
			
 
				         # 最多保留候选变量数
			
@@ -43,8 +43,8 @@ if __name__ == "__main__":
 
				         "monto_shift_threshold": 1,
			
 
				         "iv_threshold": 0.01,
			
 
				         "corr_threshold": 0.4,
			
 
				-        "psi_threshold": 0.2,
			
 
				-        "vif_threshold": 10,
			
 
				+        "psi_threshold": 0.001,
			
 
				+        "vif_threshold": 1.06,
			
 
				         # 压力测试
			
 
				         "stress_test": True,
			
 
				         "stress_sample_times": 10,
			
@@ -53,7 +53,7 @@ if __name__ == "__main__":
 
				         # 手动定义切分点，字符型的变量以'%,%'合并枚举值
			
 
				         "breaks_list": {
			
 
				             #                 'duration_in_month': [12, 18, 48],
			
 
				-            'credit_amount': [2000, 3500, 4000, 7000],
			
 
				+            #                 'credit_amount': [2000, 3500, 4000, 7000],
			
 
				             'purpose': ['retraining%,%car (used)', 'radio/television', 'furniture/equipment%,%business%,%repairs',
			
 
				                         'domestic appliances%,%education%,%car (new)%,%others'],
			
 
				             #                 'age_in_years': [27, 34, 58]
			
@@ -76,7 +76,8 @@ if __name__ == "__main__":
 
				             "credit_history": "借贷历史"
			
 
				         },
			
 
				         "columns_exclude": [],
			
 
				-        # "columns_include": ["age_in_years"],
			
 
				+        # "columns_include": ["credit_amount"],
			
 
				+        "rules": ["df.loc[df['credit_amount']>=9000,'SCORE'] += -50"]
			
 
				     }
			
 
				 
			
 
				     train_pipeline = Pipeline(data=data, **cfg)