Browse Source

add: silent_print df_print_nolimit

yq 1 month ago
parent
commit
419ab9c858
3 changed files with 49 additions and 8 deletions
  1. 4 2
      commom/__init__.py
  2. 30 0
      commom/utils.py
  3. 15 6
      online_learning/trainer_xgb.py

+ 4 - 2
commom/__init__.py

@@ -8,8 +8,10 @@ from .logger import get_logger
 from .placeholder_func import f_fill_placeholder
 from .placeholder_func import f_fill_placeholder
 from .user_exceptions import GeneralException
 from .user_exceptions import GeneralException
 from .utils import f_get_clazz_in_module, f_clazz_to_json, f_get_date, f_get_datetime, f_save_train_df, f_format_float, \
 from .utils import f_get_clazz_in_module, f_clazz_to_json, f_get_date, f_get_datetime, f_save_train_df, f_format_float, \
-    f_df_to_image, f_display_images_by_side, NumpyEncoder, f_display_title, f_image_crop_white_borders, f_is_number
+    f_df_to_image, f_display_images_by_side, NumpyEncoder, f_display_title, f_image_crop_white_borders, f_is_number, \
+    silent_print, df_print_nolimit
 
 
 __all__ = ['f_get_clazz_in_module', 'f_clazz_to_json', 'GeneralException', 'get_logger', 'f_fill_placeholder',
 __all__ = ['f_get_clazz_in_module', 'f_clazz_to_json', 'GeneralException', 'get_logger', 'f_fill_placeholder',
            'f_get_date', 'f_get_datetime', 'f_save_train_df', 'f_format_float', 'f_df_to_image',
            'f_get_date', 'f_get_datetime', 'f_save_train_df', 'f_format_float', 'f_df_to_image',
-           'f_display_images_by_side', 'f_display_title', 'NumpyEncoder', 'f_image_crop_white_borders', 'f_is_number']
+           'f_display_images_by_side', 'f_display_title', 'NumpyEncoder', 'f_image_crop_white_borders', 'f_is_number',
+           'silent_print','df_print_nolimit']

+ 30 - 0
commom/utils.py

@@ -9,6 +9,8 @@ import base64
 import datetime
 import datetime
 import inspect
 import inspect
 import os
 import os
+import sys
+from contextlib import contextmanager
 from json import JSONEncoder
 from json import JSONEncoder
 from typing import Union
 from typing import Union
 
 
@@ -175,3 +177,31 @@ class NumpyEncoder(JSONEncoder):
         if isinstance(obj, np.ndarray):
         if isinstance(obj, np.ndarray):
             return obj.tolist()
             return obj.tolist()
         return super(NumpyEncoder, self).default(obj)
         return super(NumpyEncoder, self).default(obj)
+
+
+@contextmanager
+def silent_print():
+    original_stdout = sys.stdout
+
+    class NullWriter:
+        def write(self, text):
+            pass
+
+    null_writer = NullWriter()
+    sys.stdout = null_writer
+    try:
+        yield
+    finally:
+        sys.stdout = original_stdout
+
+@contextmanager
+def df_print_nolimit():
+    max_columns = pd.get_option('display.max_columns')
+    max_rows = pd.get_option('display.max_rows')
+    pd.set_option('display.max_columns', None)
+    pd.set_option('display.max_rows', None)
+    try:
+        yield
+    finally:
+        pd.set_option('display.max_columns', max_columns)
+        pd.set_option('display.max_rows', max_rows)

+ 15 - 6
online_learning/trainer_xgb.py

@@ -16,7 +16,7 @@ from sklearn2pmml import PMMLPipeline
 from tqdm import tqdm
 from tqdm import tqdm
 
 
 from commom import GeneralException, f_image_crop_white_borders, f_df_to_image, f_display_title, \
 from commom import GeneralException, f_image_crop_white_borders, f_df_to_image, f_display_title, \
-    f_display_images_by_side
+    f_display_images_by_side, silent_print, df_print_nolimit
 from entitys import DataSplitEntity, OnlineLearningConfigEntity, MetricFucResultEntity
 from entitys import DataSplitEntity, OnlineLearningConfigEntity, MetricFucResultEntity
 from enums import ResultCodesEnum, ConstantEnum, FileEnum
 from enums import ResultCodesEnum, ConstantEnum, FileEnum
 from init import init
 from init import init
@@ -164,10 +164,11 @@ class OnlineLearningTrainerXgb:
             random_state=self._ol_config.random_state,
             random_state=self._ol_config.random_state,
         )
         )
         self._pipeline_optimized.steps[-1] = ("classifier", model_optimized)
         self._pipeline_optimized.steps[-1] = ("classifier", model_optimized)
-        self._pipeline_optimized.fit(train_data, train_data[y_column],
-                                     classifier__verbose=False,
-                                     classifier__xgb_model=model_original.get_booster(),
-                                     )
+        with silent_print():
+            self._pipeline_optimized.fit(train_data, train_data[y_column],
+                                         classifier__verbose=False,
+                                         classifier__xgb_model=model_original.get_booster(),
+                                         )
         return ntree
         return ntree
 
 
     def train(self, ):
     def train(self, ):
@@ -182,6 +183,13 @@ class OnlineLearningTrainerXgb:
             n = n + 1
             n = n + 1
             test_y_prob = self._pipeline_optimized.predict_proba(test_data, ntree_limit=n)[:, 1]
             test_y_prob = self._pipeline_optimized.predict_proba(test_data, ntree_limit=n)[:, 1]
             test_y = test_data[y_column]
             test_y = test_data[y_column]
+
+            # auc_test = roc_auc_score(test_y, test_y_prob)
+            # auc_test = round(auc_test, 4)
+            # df = pd.DataFrame({'label': test_y, 'pred': test_y_prob})
+            # dfkslift = eva_dfkslift(df)
+            # ks_test = round(dfkslift["ks"].max(), 4)
+
             perf = sc.perf_eva(test_y, test_y_prob, show_plot=False)
             perf = sc.perf_eva(test_y, test_y_prob, show_plot=False)
             auc_test = perf["AUC"]
             auc_test = perf["AUC"]
             ks_test = perf["KS"]
             ks_test = perf["KS"]
@@ -210,7 +218,8 @@ class OnlineLearningTrainerXgb:
         if self._ol_config.jupyter_print:
         if self._ol_config.jupyter_print:
             from IPython import display
             from IPython import display
             f_display_title(display, "模型优化过程")
             f_display_title(display, "模型优化过程")
-            display.display(self._df_param_optimized)
+            with df_print_nolimit():
+                display.display(self._df_param_optimized)
 
 
         metric_value_dict = {}
         metric_value_dict = {}