12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970 |
- # -*- coding: utf-8 -*-
- """
- @author: yq
- @time: 2024/11/27
- @desc:
- """
- import time
- from entitys import DataSplitEntity
- from online_learning import OnlineLearningTrainerXgb
- if __name__ == "__main__":
- time_now = time.time()
- import scorecardpy as sc
- # 加载数据
- dat = sc.germancredit()
- dat_columns = dat.columns.tolist()
- dat_columns = [c.replace(".", "_") for c in dat_columns]
- dat.columns = dat_columns
- dat["creditability"] = dat["creditability"].apply(lambda x: 1 if x == "bad" else 0)
- data = DataSplitEntity(train_data=dat[:609], test_data=dat[609:])
- # 特征处理
- cfg = {
- # 模型,请参考ol_resources_demo目录下文件
- # 模型文件 model.pkl
- "path_resources": "/root/notebook/ol_resources_demo",
- # 项目名称,影响数据存储位置
- "project_name": "OnlineLearningDemo",
- "y_column": "creditability",
- # 学习率
- "lr": 0.01,
- "jupyter_print": True,
- # 压力测试
- "stress_test": False,
- # 压力测试抽样次数
- "stress_sample_times": 10,
- "columns_anns": {
- "age_in_years": "年龄"
- },
- "params_xgb": {
- 'objective': 'binary:logistic',
- 'eval_metric': 'auc',
- 'learning_rate': 0.1,
- 'max_depth': 3,
- 'subsample': None,
- 'colsample_bytree': None,
- 'alpha': 0,
- 'lambda': 1,
- 'num_boost_round': 7,
- 'early_stopping_rounds': 20,
- 'verbose_eval': 10,
- 'random_state': 2025,
- 'save_pmml': True,
- 'trees_print': False,
- # tree_refresh tree_add
- 'oltype': "tree_add",
- 'add_columns': ['age_in_years'],
- }
- }
- # 训练并生成报告
- trainer = OnlineLearningTrainerXgb(data=data, **cfg)
- trainer.train()
- trainer.report()
- print(time.time() - time_now)
|