123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475 |
- """
- @author: yq
- @time: 2024/11/27
- @desc:
- """
- import time
- from entitys import DataSplitEntity, MlConfigEntity
- from pipeline import Pipeline
- if __name__ == "__main__":
- time_now = time.time()
- import scorecardpy as sc
-
- dat = sc.germancredit()
- dat_columns = dat.columns.tolist()
- dat_columns = [c.replace(".","_") for c in dat_columns]
- dat.columns = dat_columns
- dat["creditability"] = dat["creditability"].apply(lambda x: 1 if x == "bad" else 0)
-
-
- data = DataSplitEntity(train_data=dat[:709], test_data=dat[709:])
-
-
-
- cfg = {
-
- "project_name": "demo",
-
- "jupyter_print": True,
-
- "format_bin": True,
- "max_feature_num": 20,
-
- "stress_test": True,
-
- "stress_sample_times": 10,
-
- "y_column": "creditability",
-
-
-
-
-
-
-
-
-
-
-
-
- "columns_anns": {
- "age_in_years": "年龄",
- "credit_history": "借贷历史"
- },
-
- "columns_exclude": [],
-
-
- "model_type": "xgb",
- "feature_strategy": "norm",
- }
- train_pipeline = Pipeline(data=data, **cfg)
- train_pipeline.train()
- train_pipeline.report()
- print(time.time() - time_now)
|