|
@@ -187,6 +187,12 @@ class StrategyIv(FilterStrategyBase):
|
|
|
sv_bin_list.append(binning)
|
|
|
return sv_bin_list
|
|
|
|
|
|
+ def _get_bin_left_value(bin: str):
|
|
|
+ if "," not in bin:
|
|
|
+ return float(bin)
|
|
|
+ left = bin.split(",")[0]
|
|
|
+ return float(left[1:])
|
|
|
+
|
|
|
def _get_bins(df, x_column, y_column, breaks_list):
|
|
|
dtm = pd.DataFrame({'y': df[y_column], 'value': df[x_column]})
|
|
|
bstbrks = [-np.inf] + breaks_list + [np.inf]
|
|
@@ -196,6 +202,8 @@ class StrategyIv(FilterStrategyBase):
|
|
|
bins = dtm.groupby(['bin'], group_keys=False)['y'].agg([_n0, _n1]) \
|
|
|
.reset_index().rename(columns={'_n0': 'good', '_n1': 'bad'})
|
|
|
bins['is_special_values'] = [False] * len(bins)
|
|
|
+ bins["ordered"] = bins['bin'].apply(_get_bin_left_value)
|
|
|
+ bins = bins.sort_values(by=["ordered"], ascending=[True])
|
|
|
return bins
|
|
|
|
|
|
def _get_badprob(bins):
|
|
@@ -356,7 +364,7 @@ class StrategyIv(FilterStrategyBase):
|
|
|
for x_column in tqdm(x_columns_candidate):
|
|
|
if is_numeric_dtype(data.train_data[x_column]):
|
|
|
iv_max, breaks_list, bins_enum_best_point = self._f_get_best_bins_numeric(data, x_column)
|
|
|
- if len(bins_enum_best_point) != 0 :
|
|
|
+ if len(bins_enum_best_point) != 0:
|
|
|
numeric_candidate_dict_all[x_column] = []
|
|
|
for point in bins_enum_best_point:
|
|
|
numeric_candidate_dict_all[x_column].append(CandidateFeatureEntity(x_column, point, 0))
|