# -*- coding: utf-8 -*- """ @author: yq @time: 2024/11/27 @desc: """ import time from entitys import DataSplitEntity from online_learning import OnlineLearningTrainerXgb if __name__ == "__main__": time_now = time.time() import scorecardpy as sc # 加载数据 dat = sc.germancredit() dat_columns = dat.columns.tolist() dat_columns = [c.replace(".", "_") for c in dat_columns] dat.columns = dat_columns dat["creditability"] = dat["creditability"].apply(lambda x: 1 if x == "bad" else 0) data = DataSplitEntity(train_data=dat[:609], test_data=dat[609:]) # 特征处理 cfg = { # 模型,请参考ol_resources_demo目录下文件 # 模型文件 model.pkl "path_resources": "/root/notebook/ol_resources_demo", # 项目名称,影响数据存储位置 "project_name": "OnlineLearningDemo", "y_column": "creditability", # 学习率 "lr": 0.01, "jupyter_print": True, # 压力测试 "stress_test": False, # 压力测试抽样次数 "stress_sample_times": 10, "columns_anns": { "age_in_years": "年龄" }, "params_xgb": { 'objective': 'binary:logistic', 'eval_metric': 'auc', 'learning_rate': 0.1, 'max_depth': 3, 'subsample': None, 'colsample_bytree': None, 'alpha': 0, 'lambda': 1, 'num_boost_round': 7, 'early_stopping_rounds': 20, 'verbose_eval': 10, 'random_state': 2025, 'save_pmml': True, 'trees_print': False, # tree_refresh tree_add 'oltype': "tree_add", 'add_columns': ['age_in_years'], } } # 训练并生成报告 trainer = OnlineLearningTrainerXgb(data=data, **cfg) trainer.train() trainer.report() print(time.time() - time_now)