12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849 |
- # -*- coding: utf-8 -*-
- """
- @author: zsc
- @time: 2024/11/18
- @desc: 数据处理
- """
- # 数据预处理模块
- class DataPreprocessor:
- def __init__(self, data):
- self.data = data
- def preprocess(self):
- # 更多的数据预处理函数
- processed_data = [
- item for item in self.data
- if 'user' in item and 'action' in item and 'item' in item
- ]
- processed_data = self.remove_duplicates(processed_data)
- processed_data = self.fill_missing_values(processed_data)
- processed_data = self.convert_data_types(processed_data)
- return processed_data
- def remove_duplicates(self, data):
- # 去除重复数据
- unique_data = []
- seen = set()
- for item in data:
- identifier = (item['user'], item['action'], item['item'])
- if identifier not in seen:
- unique_data.append(item)
- seen.add(identifier)
- return unique_data
- def fill_missing_values(self, data):
- # 填充缺失值
- for item in data:
- if 'item' not in item:
- item['item'] = 'unknown'
- return data
- def convert_data_types(self, data):
- # 转换数据类型
- for item in data:
- item['user'] = str(item['user'])
- item['action'] = str(item['action'])
- item['item'] = str(item['item'])
- return data
|