yq 4 сар өмнө
parent
commit
4b12742081

+ 2 - 2
app.py

@@ -112,5 +112,5 @@ with gr.Blocks() as demo:
             demo.queue(default_concurrency_limit=5)
             demo.launch(share=False, show_error=True, server_name="0.0.0.0", server_port=18066)
 
-        if __name__ == "__main__":
-            pass
+if __name__ == "__main__":
+    pass

+ 4 - 5
feature/feature_utils.py

@@ -118,6 +118,8 @@ def f_get_corr(data: pd.DataFrame, meth: str = 'spearman') -> pd.DataFrame:
 
 
 def f_get_ivf(data: pd.DataFrame) -> pd.DataFrame:
+    if len(data.columns.to_list()) <= 1:
+        return None
     vif_v = [vif(data.values, data.columns.get_loc(i)) for i in data.columns]
     vif_df = pd.DataFrame()
     vif_df["变量"] = data.columns
@@ -148,7 +150,7 @@ def f_get_model_score_bin(df, card, bins=None):
     train_score = sc.scorecard_ply(df, card, print_step=0)
     df['score'] = train_score
     if bins is None:
-        _, bins = pd.qcut(df['score'], q=10, retbins=True)
+        _, bins = pd.qcut(df['score'], q=10, retbins=True, duplicates="drop")
         bins = list(bins)
         bins[0] = -np.inf
         bins[-1] = np.inf
@@ -167,8 +169,5 @@ def f_calcu_model_psi(df_train, df_test):
     psi['训练样本数'] = list(tmp1['count'])
     psi['测试样本数'] = list(tmp2['count'])
     psi['训练样本数比例'] = list(tmp1['样本数比例'])
-    psi['测试样本数比例']=list(tmp2['样本数比例'])
+    psi['测试样本数比例'] = list(tmp2['样本数比例'])
     return psi
-
-
-

+ 2 - 0
feature/strategy_iv.py

@@ -33,6 +33,8 @@ class StrategyIv(FilterStrategyBase):
         return iv
 
     def _f_get_var_corr_image(self, train_woe):
+        if len(train_woe.columns.to_list()) <= 1:
+            return None
         train_corr = f_get_corr(train_woe)
         plt.figure(figsize=(12, 12))
         sns.heatmap(train_corr, vmax=1, square=True, cmap='RdBu', annot=True)

+ 15 - 10
webui/utils.py

@@ -125,10 +125,11 @@ def f_verify_param(data):
 
 
 def f_train(data, progress=gr.Progress(track_tqdm=True)):
-    # import time
-    # print(1111111)
-    # time.sleep(5)
-    # return gr.update(elem_id="train_button", value="111")
+    def _reset_component_state():
+        return {engine.get_elem_by_id("download_report"): gr.update(visible=False),
+                engine.get_elem_by_id("auc_df"): gr.update(visible=False),
+                engine.get_elem_by_id("gallery_auc"): gr.update(visible=False)}
+
     progress(0, desc="Starting")
     feature_search_strategy = engine.get(data, "feature_search_strategy")
     model_type = engine.get(data, "model_type")
@@ -139,9 +140,11 @@ def f_train(data, progress=gr.Progress(track_tqdm=True)):
     _clean_base_dir(data)
     # 校验参数
     if not f_verify_param(data):
-        return
+        yield _reset_component_state()
 
-        # 数据集划分
+    yield _reset_component_state()
+
+    # 数据集划分
     train_data, test_data = train_test_split(data_upload, test_size=test_split_rate, shuffle=True, random_state=2025)
     data_split = DataSplitEntity(train_data=train_data, val_data=None, test_data=test_data)
     progress(0.01)
@@ -163,7 +166,9 @@ def f_train(data, progress=gr.Progress(track_tqdm=True)):
 
     auc_df = metric_value_dict["模型结果"].table
 
-    return {engine.get_elem_by_id("train_progress"): gr.update(value="训练完成"),
-            engine.get_elem_by_id("auc_df"): gr.update(value=auc_df, visible=True),
-            engine.get_elem_by_id("gallery_auc"): gr.update(value=_get_auc_ks_images(data), visible=True),
-            engine.get_elem_by_id("download_report"): gr.update(visible=True)}
+    report_file_path = _get_save_path(data, "模型报告.docx")
+
+    yield {engine.get_elem_by_id("train_progress"): gr.update(value="训练完成"),
+           engine.get_elem_by_id("auc_df"): gr.update(value=auc_df, visible=True),
+           engine.get_elem_by_id("gallery_auc"): gr.update(value=_get_auc_ks_images(data), visible=True),
+           engine.get_elem_by_id("download_report"): gr.update(value=report_file_path, visible=True)}