app.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: yq
  4. @time: 2024/12/4
  5. @desc:
  6. """
  7. import gradio as gr
  8. from init import init
  9. from webui import f_project_is_exist, f_data_upload, engine, f_train
  10. init()
  11. input_elems = set()
  12. elem_dict = {}
  13. with gr.Blocks() as demo:
  14. gr.HTML('<h1 ><center><font size="5">Easy-ML</font></center></h1>')
  15. gr.HTML('<h2 ><center><font size="2">快速建模工具</font></center></h2>')
  16. gr.State([])
  17. with gr.Tabs():
  18. with gr.TabItem("数据"):
  19. with gr.Row():
  20. project_name = gr.Textbox(label="项目名称", placeholder="请输入不重复的项目名称",
  21. info="项目名称将会被作为缓存目录名称,如果重复会导致结果被覆盖")
  22. with gr.Row():
  23. file_data = gr.File(label="建模数据", file_types=[".csv", ".xlsx"])
  24. with gr.Row():
  25. data_upload = gr.Dataframe(visible=False, label="当前上传数据", max_height=300)
  26. with gr.Row():
  27. data_insight = gr.Dataframe(visible=False, label="数据探查", max_height=600, wrap=True)
  28. input_elems.update(
  29. {project_name, file_data, data_upload})
  30. elem_dict.update(dict(
  31. project_name=project_name,
  32. file_data=file_data,
  33. data_upload=data_upload
  34. ))
  35. with gr.TabItem("训练"):
  36. with gr.Row():
  37. with gr.Column():
  38. with gr.Row():
  39. model_type = gr.Dropdown(["lr"], value="lr", label="模型")
  40. search_strategy = gr.Dropdown(["iv"], value="iv", label="特征搜索策略")
  41. with gr.Row():
  42. y_column = gr.Dropdown(label="Y标签列", interactive=True, info="其值应该是0或者1")
  43. x_columns_candidate = gr.Dropdown(label="X特征列", multiselect=True, interactive=True,
  44. info="不应包含Y特征列,不选择则使用全部特征")
  45. with gr.Row():
  46. x_candidate_num = gr.Number(value=10, label="建模最多保留特征数", info="保留最重要的N个特征",
  47. interactive=True)
  48. sample_rate = gr.Slider(0.05, 1, value=0.1, label="分箱组合采样率", info="对2-5箱所有分箱组合进行采样",
  49. step=0.01, interactive=True)
  50. special_values = gr.Textbox(label="特殊值", placeholder="可以是dict list str格式",
  51. info="分箱时特殊值会单独一个分箱")
  52. with gr.Row():
  53. test_split_strategy = gr.Dropdown(["随机"], value="随机", label="测试集划分方式")
  54. test_split_rate = gr.Slider(0, 0.5, value=0.3, label="测试集划分比例", step=0.05, interactive=True)
  55. train_button = gr.Button("开始训练", variant="primary", elem_id="train_button")
  56. with gr.Column():
  57. with gr.Row():
  58. train_progress = gr.Textbox(label="训练进度")
  59. with gr.Row():
  60. auc_df = gr.Dataframe(visible=False, label="auc ks", max_height=300, interactive=False)
  61. with gr.Row():
  62. gallery_auc = gr.Gallery(label="auc ks", columns=[1], rows=[2], object_fit="contain",
  63. height="auto", visible=False, interactive=False)
  64. input_elems.update(
  65. {model_type, search_strategy, y_column, x_columns_candidate, x_candidate_num, sample_rate,
  66. special_values, test_split_strategy, test_split_rate
  67. })
  68. elem_dict.update(dict(
  69. model_type=model_type,
  70. feature_search_strategy=search_strategy,
  71. y_column=y_column,
  72. x_columns_candidate=x_columns_candidate,
  73. x_candidate_num=x_candidate_num,
  74. sample_rate=sample_rate,
  75. special_values=special_values,
  76. test_split_strategy=test_split_strategy,
  77. test_split_rate=test_split_rate,
  78. ))
  79. engine.add_elems(elem_dict)
  80. project_name.change(fn=f_project_is_exist, inputs=input_elems)
  81. file_data.upload(fn=f_data_upload, inputs=input_elems, outputs=[data_upload, data_insight, y_column,
  82. x_columns_candidate])
  83. train_button.click(fn=f_train, inputs=input_elems, outputs=[train_progress, auc_df, gallery_auc])
  84. demo.queue(concurrency_count=3)
  85. demo.launch(share=False, show_error=True)
  86. if __name__ == "__main__":
  87. pass