zhusc 5 mesiacov pred
rodič
commit
bc6fb65b40

+ 11 - 2
user_events/analyze/BehaviorAnalyzer.py

@@ -14,14 +14,23 @@ class BehaviorAnalyzer:
     def __init__(self, data):
         self.data = data
 
+
     def analyze(self):
-        # 分析用户行为模式,并展示各个指标的统计情况
+        # 根据新的数据结构调整分析逻辑
         behavior_count = defaultdict(lambda: defaultdict(int))
         action_stats = defaultdict(int)
+        product_stats = defaultdict(int)
+        channel_stats = defaultdict(int)
+
         for item in self.data:
             user = item['user']
             action = item['action']
+            product = item['product']
+            channel = item['channel']
             behavior_count[user][action] += 1
             action_stats[action] += 1
-        return behavior_count, action_stats
+            product_stats[product] += 1
+            channel_stats[channel] += 1
+
+        return behavior_count, action_stats, product_stats, channel_stats
 

+ 2 - 2
user_events/analyze/DetectAnomalies.py

@@ -11,9 +11,9 @@ class AnomalyDetector:
         self.behavior_data = behavior_data
 
     def detect(self):
-        # 识别异常行为
+        # 根据新的数据结构调整异常检测逻辑
         anomalies = []
         for user, actions in self.behavior_data.items():
-            if 'purchase' in actions and actions['purchase'] > 5:
+            if '结果-支用成功' in actions and actions['结果-支用成功'] > 5:
                 anomalies.append(user)
         return anomalies

+ 1 - 1
user_events/analyze/SegmentUsers.py

@@ -11,7 +11,7 @@ class UserSegmentation:
         self.behavior_data = behavior_data
 
     def segment(self):
-        # 简单的用户分群,根据行为次数划分
+        # 根据新的数据结构调整分群逻辑
         segments = {
             'high_activity': [],
             'medium_activity': [],

+ 17 - 3
user_events/data/collector/DataCollector.py

@@ -8,17 +8,31 @@
 
 import random
 
+
+# 数据收集模块
 class DataCollector:
     def __init__(self):
         self.data = []
 
     def collect(self):
         # 模拟从数据库或API收集数据,包含至少20条记录和多种行为
-        actions = ['click', 'view', 'add_to_cart', 'purchase', 'review']
-        prodoct = ['渝快贷', '', 'item3', 'item4', 'item5']
+        actions = [
+            '浏览-产品介绍页', '点击-立即申请', '浏览-公积金授权页', '浏览-额度申请结果',
+            '点击-立即提额', '浏览-提额方式选择页', '点击-线上公积金认证', '浏览-提额申请结果',
+            '点击-立即支用', '填写-借款申请页', '浏览-确认借款页', '浏览-支用结果页', '结果-支用成功'
+        ]
+        products = ['渝快贷', '渝悦贷', '房快贷']
+        channels = ['手机银行', '微银行']
         users = ['User{}'.format(i) for i in range(1, 21)]
         self.data = [
-            {'user': random.choice(users), 'action': random.choice(actions), 'item': random.choice(items)}
+            {
+                'user': random.choice(users),
+                'action': random.choice(actions),
+                'product': random.choice(products),
+                'channel': random.choice(channels)
+            }
             for _ in range(100)
         ]
         return self.data
+
+# 其他模块代码保持不变...

+ 15 - 8
user_events/data/processor/DataProcessor.py

@@ -15,7 +15,7 @@ class DataPreprocessor:
         # 更多的数据预处理函数
         processed_data = [
             item for item in self.data
-            if 'user' in item and 'action' in item and 'item' in item
+            if 'user' in item and 'action' in item and 'product' in item and 'channel' in item
         ]
         processed_data = self.remove_duplicates(processed_data)
         processed_data = self.fill_missing_values(processed_data)
@@ -27,23 +27,30 @@ class DataPreprocessor:
         unique_data = []
         seen = set()
         for item in data:
-            identifier = (item['user'], item['action'], item['item'])
+            # 创建一个元组,包含用户、行为、产品和渠道,用于识别唯一记录
+            identifier = (item['user'], item['action'], item['product'], item['channel'])
             if identifier not in seen:
-                unique_data.append(item)
                 seen.add(identifier)
+                unique_data.append(item)
         return unique_data
 
     def fill_missing_values(self, data):
         # 填充缺失值
         for item in data:
-            if 'item' not in item:
-                item['item'] = 'unknown'
+            if 'user' not in item:
+                item['user'] = 'UnknownUser'
+            if 'action' not in item:
+                item['action'] = 'UnknownAction'
+            if 'product' not in item:
+                item['product'] = 'UnknownProduct'
+            if 'channel' not in item:
+                item['channel'] = 'UnknownChannel'
         return data
 
     def convert_data_types(self, data):
         # 转换数据类型
         for item in data:
+            # 假设我们需要将用户ID转换为字符串类型
             item['user'] = str(item['user'])
-            item['action'] = str(item['action'])
-            item['item'] = str(item['item'])
-        return data
+            # 其他数据类型转换可以根据需要添加
+        return data

+ 2 - 2
user_events/main.py

@@ -32,7 +32,7 @@ def main():
 
     # 实例化行为分析模块
     analyzer = BehaviorAnalyzer.BehaviorAnalyzer(processed_data)
-    behavior_data, action_stats = analyzer.analyze()
+    behavior_data, action_stats, product_stats, channel_stats  = analyzer.analyze()
 
     # 实例化用户分群模块
     segmenter = SegmentUsers.UserSegmentation(behavior_data)
@@ -43,7 +43,7 @@ def main():
     anomalies = detector.detect()
 
     # 实例化报告生成模块
-    generator = GenerateReport.ReportGenerator(processed_data, anomalies, user_segments, action_stats)
+    generator = GenerateReport.ReportGenerator(processed_data, anomalies, user_segments, action_stats, product_stats, channel_stats)
     report = generator.generate()
 
     # 打印报告摘要

+ 49 - 15
user_events/monitor/GenerateReport.py

@@ -9,45 +9,79 @@ import random
 from collections import defaultdict
 import matplotlib.pyplot as plt
 
+plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
+plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
+
 # 报告生成模块(续)
 class ReportGenerator:
-    def __init__(self, data, anomalies, segments, action_stats):
+    def __init__(self, data, anomalies, segments, action_stats, product_stats, channel_stats):
         self.data = data
         self.anomalies = anomalies
         self.segments = segments
         self.action_stats = action_stats
+        self.product_stats = product_stats
+        self.channel_stats = channel_stats
 
     def generate(self):
         # 生成用户行为报告,并展示成图表形式
+
         report = {
             'total_users': len(set([item['user'] for item in self.data])),
             'total_actions': len(self.data),
             'anomalies': self.anomalies,
             'user_segments': self.segments,
-            'action_stats': self.action_stats
+            'action_stats': self.action_stats,
+            'product_stats': self.product_stats,
+            'channel_stats': self.channel_stats
         }
         self.plot_action_stats(self.action_stats)
-        self.plot_user_segments(self.segments)
+        self.plot_product_stats(self.product_stats)
+        self.plot_channel_stats(self.channel_stats)
         return report
 
     def plot_action_stats(self, action_stats):
+        if not action_stats:
+            print("No data for action stats.")
+            return
         # 生成行为统计图表
         actions = list(action_stats.keys())
         counts = list(action_stats.values())
-        plt.figure(figsize=(10, 6))
+        plt.figure(figsize=(12, 8))  # 调整图表大小
         plt.bar(actions, counts, color='skyblue')
-        plt.xlabel('Actions')
-        plt.ylabel('Counts')
-        plt.title('Action Statistics')
+        plt.xlabel('行为', fontsize=12)  # 调整字体大小
+        plt.ylabel('次数', fontsize=12)
+        plt.title('行为统计', fontsize=14)
+        plt.xticks(rotation=45, fontsize=10)  # 旋转刻度标签并调整字体大小
+        plt.yticks(fontsize=10)
+        plt.tight_layout()  # 自动调整布局
+        # 如果需要,可以手动调整边距
+        # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
+        plt.show()
+
+    def plot_product_stats(self, product_stats):
+        if not product_stats:
+            print("No data for product stats.")
+            return
+        # 生成产品统计图表
+        products = list(product_stats.keys())
+        counts = list(product_stats.values())
+        plt.figure(figsize=(10, 6))
+        plt.bar(products, counts, color='lightgreen')
+        plt.xlabel('产品')
+        plt.ylabel('次数')
+        plt.title('产品统计')
         plt.show()
 
-    def plot_user_segments(self, segments):
-        # 生成用户分群图表
-        segment_names = list(segments.keys())
-        segment_counts = [len(segments[segment]) for segment in segment_names]
+    def plot_channel_stats(self, channel_stats):
+        if not channel_stats:
+            print("No data for channel stats.")
+            return
+        # 生成渠道统计图表
+        channels = list(channel_stats.keys())
+        counts = list(channel_stats.values())
         plt.figure(figsize=(10, 6))
-        plt.bar(segment_names, segment_counts, color='lightgreen')
-        plt.xlabel('User Segments')
-        plt.ylabel('Number of Users')
-        plt.title('User Segmentation')
+        plt.bar(channels, counts, color='orange')
+        plt.xlabel('渠道')
+        plt.ylabel('次数')
+        plt.title('渠道统计')
         plt.show()