Explorar o código

Initial commit

zhusc hai 5 meses
pai
achega
bc934d6167

+ 44 - 7
user_events/__init__.py

@@ -1,9 +1,46 @@
-# -*- coding: utf-8 -*-
-"""
-@author: zsc
-@time: 2024/11/18
-@desc:  存量运营平台-行为客户行为挖掘及监控
-"""
+import random
+
+# 数据收集模块
+class DataCollector:
+    def __init__(self):
+        self.data = []
+
+    def collect(self):
+        # 定义三个流程及其名称
+        processes = {
+            '申请流程': ['浏览-产品介绍页', '点击-立即申请', '浏览-公积金授权页', '浏览-额度申请结果'],
+            '提额流程': ['点击-立即提额', '浏览-提额方式选择页', '点击-线上公积金认证', '浏览-提额申请结果'],
+            '支用流程': ['点击-立即支用', '填写-借款申请页', '浏览-确认借款页', '浏览-支用结果页', '结果-支用成功']
+        }
+        users = ['User{}'.format(i) for i in range(1, 21)]
+
+        for user in users:
+            # 随机选择一个流程
+            process_name, process_steps = random.choice(list(processes.items()))
+            # 确保生成一个完整的行为序列
+            start_index = 0
+            end_index = random.randint(start_index, len(process_steps) - 1)
+            actions = process_steps[start_index:end_index + 1]
+            user_actions = {
+                'user': user,
+                'process': process_name,
+                'actions': actions,
+                'product': random.choice(['渝快贷', '渝悦贷', '房快贷']),
+                'channel': random.choice(['手机银行', '微银行'])
+            }
+            self.data.append(user_actions)
+        return self.data
+
+# 主函数,用于测试数据收集模块
+def main():
+    collector = DataCollector()
+    data = collector.collect()
+    for user_actions in data:
+        print(f"User: {user_actions['user']}, Process: {user_actions['process']}, Product: {user_actions['product']}, Channel: {user_actions['channel']}")
+        print("Actions Sequence:")
+        for action in user_actions['actions']:
+            print(f"  - {action}")
+        print("")  # 添加空行以分隔不同用户的行为序列
 
 if __name__ == "__main__":
-    pass
+    main()

+ 14 - 12
user_events/analyze/BehaviorAnalyzer.py

@@ -14,23 +14,25 @@ class BehaviorAnalyzer:
     def __init__(self, data):
         self.data = data
 
-
     def analyze(self):
-        # 根据新的数据结构调整分析逻辑
-        behavior_count = defaultdict(lambda: defaultdict(int))
+        # 分析行为数据,例如计算每个流程的行为次数
+        process_stats = defaultdict(lambda: defaultdict(int))
         action_stats = defaultdict(int)
         product_stats = defaultdict(int)
         channel_stats = defaultdict(int)
 
-        for item in self.data:
-            user = item['user']
-            action = item['action']
-            product = item['product']
-            channel = item['channel']
-            behavior_count[user][action] += 1
-            action_stats[action] += 1
+        for user_actions in self.data:
+            process = user_actions['process']
+            actions = user_actions['actions']
+            product = user_actions['product']
+            channel = user_actions['channel']
+
+            for action in actions:
+                process_stats[process][action] += 1
+                action_stats[action] += 1
             product_stats[product] += 1
             channel_stats[channel] += 1
 
-        return behavior_count, action_stats, product_stats, channel_stats
-
+        # 将内部字典转换为普通字典
+        process_stats = {process: dict(actions) for process, actions in process_stats.items()}
+        return self.data, process_stats, action_stats, product_stats, channel_stats

+ 14 - 5
user_events/analyze/DetectAnomalies.py

@@ -4,16 +4,25 @@
 @time: 2024/11/18
 @desc:  行为分析
 """
+from collections import defaultdict
+
 
 # 异常检测模块
 class AnomalyDetector:
-    def __init__(self, behavior_data):
-        self.behavior_data = behavior_data
+    def __init__(self, data):
+        self.data = data
 
     def detect(self):
-        # 根据新的数据结构调整异常检测逻辑
+        # 简单示例:检测行为次数异常高的用户
+        user_behavior_count = defaultdict(int)
+        for user_actions in self.data:
+            user = user_actions['user']
+            actions = user_actions['actions']
+            user_behavior_count[user] += len(actions)
+
         anomalies = []
-        for user, actions in self.behavior_data.items():
-            if '结果-支用成功' in actions and actions['结果-支用成功'] > 5:
+        for user, count in user_behavior_count.items():
+            if count > 20:  # 假设行为次数超过20为异常
                 anomalies.append(user)
+
         return anomalies

+ 17 - 15
user_events/analyze/SegmentUsers.py

@@ -4,25 +4,27 @@
 @time: 2024/11/18
 @desc:  行为分析
 """
+from collections import defaultdict
+
 
 # 用户分群模块
 class UserSegmentation:
-    def __init__(self, behavior_data):
-        self.behavior_data = behavior_data
+    def __init__(self, data):
+        self.data = data
 
     def segment(self):
-        # 根据新的数据结构调整分群逻辑
-        segments = {
-            'high_activity': [],
-            'medium_activity': [],
-            'low_activity': []
-        }
-        for user, actions in self.behavior_data.items():
-            total_actions = sum(actions.values())
-            if total_actions > 10:
-                segments['high_activity'].append(user)
-            elif total_actions > 5:
-                segments['medium_activity'].append(user)
+        # 简单示例:根据用户行为数量分群
+        user_behavior_count = defaultdict(int)
+        for user_actions in self.data:
+            user = user_actions['user']
+            actions = user_actions['actions']
+            user_behavior_count[user] += len(actions)
+
+        segments = {'高活跃用户': [], '低活跃用户': []}
+        for user, count in user_behavior_count.items():
+            if count > 5:
+                segments['高活跃用户'].append(user)
             else:
-                segments['low_activity'].append(user)
+                segments['低活跃用户'].append(user)
+
         return segments

+ 25 - 18
user_events/data/collector/DataCollector.py

@@ -15,24 +15,31 @@ class DataCollector:
         self.data = []
 
     def collect(self):
-        # 模拟从数据库或API收集数据,包含至少20条记录和多种行为
-        actions = [
-            '浏览-产品介绍页', '点击-立即申请', '浏览-公积金授权页', '浏览-额度申请结果',
-            '点击-立即提额', '浏览-提额方式选择页', '点击-线上公积金认证', '浏览-提额申请结果',
-            '点击-立即支用', '填写-借款申请页', '浏览-确认借款页', '浏览-支用结果页', '结果-支用成功'
-        ]
-        products = ['渝快贷', '渝悦贷', '房快贷']
-        channels = ['手机银行', '微银行']
-        users = ['User{}'.format(i) for i in range(1, 21)]
-        self.data = [
-            {
-                'user': random.choice(users),
-                'action': random.choice(actions),
-                'product': random.choice(products),
-                'channel': random.choice(channels)
+        # 定义三个流程及其名称
+        processes = {
+            '申请流程': ['浏览-产品介绍页', '点击-立即申请', '浏览-公积金授权页', '浏览-额度申请结果'],
+            '提额流程': ['点击-立即提额', '浏览-提额方式选择页', '点击-线上公积金认证', '浏览-提额申请结果'],
+            '支用流程': ['点击-立即支用', '填写-借款申请页', '浏览-确认借款页', '浏览-支用结果页', '结果-支用成功']
+        }
+        users = ['User{}'.format(i) for i in range(1, 201)]
+
+        for user in users:
+            # 随机选择一个流程
+            process_name, process_steps = random.choice(list(processes.items()))
+            # 确保生成一个完整的行为序列
+            start_index = 0
+            end_index = random.randint(start_index, len(process_steps) - 1)
+            actions = process_steps[start_index:end_index + 1]
+            user_actions = {
+                'user': user,
+                'process': process_name,
+                'actions': actions,
+                'product': random.choice(['渝快贷', '渝悦贷', '房快贷']),
+                'channel': random.choice(['手机银行', '微银行'])
             }
-            for _ in range(100)
-        ]
+            self.data.append(user_actions)
         return self.data
 
-# 其他模块代码保持不变...
+
+
+

+ 22 - 40
user_events/data/processor/DataProcessor.py

@@ -4,7 +4,7 @@
 @time: 2024/11/18
 @desc:  数据处理
 """
-
+from collections import defaultdict
 
 # 数据预处理模块
 class DataPreprocessor:
@@ -12,45 +12,27 @@ class DataPreprocessor:
         self.data = data
 
     def preprocess(self):
-        # 更多的数据预处理函数
-        processed_data = [
-            item for item in self.data
-            if 'user' in item and 'action' in item and 'product' in item and 'channel' in item
-        ]
-        processed_data = self.remove_duplicates(processed_data)
-        processed_data = self.fill_missing_values(processed_data)
-        processed_data = self.convert_data_types(processed_data)
-        return processed_data
+        # 这里可以添加更多的预处理步骤,例如数据清洗、格式化等
+        # 目前我们只是简单地返回原始数据
+        preprocessed_data = []
+
+        for user_actions in self.data:
+            user = user_actions['user']
+            process = user_actions['process']
+            actions = user_actions['actions']
+            product = user_actions['product']
+            channel = user_actions['channel']
 
-    def remove_duplicates(self, data):
-        # 去除重复数据
-        unique_data = []
-        seen = set()
-        for item in data:
-            # 创建一个元组,包含用户、行为、产品和渠道,用于识别唯一记录
-            identifier = (item['user'], item['action'], item['product'], item['channel'])
-            if identifier not in seen:
-                seen.add(identifier)
-                unique_data.append(item)
-        return unique_data
+            # 假设的预处理步骤,例如过滤掉某些行为或添加额外的信息
+            # 这里我们只是将行为序列转换为小写
+            preprocessed_actions = [action.lower() for action in actions]
 
-    def fill_missing_values(self, data):
-        # 填充缺失值
-        for item in data:
-            if 'user' not in item:
-                item['user'] = 'UnknownUser'
-            if 'action' not in item:
-                item['action'] = 'UnknownAction'
-            if 'product' not in item:
-                item['product'] = 'UnknownProduct'
-            if 'channel' not in item:
-                item['channel'] = 'UnknownChannel'
-        return data
+            preprocessed_data.append({
+                'user': user,
+                'process': process,
+                'actions': preprocessed_actions,
+                'product': product,
+                'channel': channel
+            })
 
-    def convert_data_types(self, data):
-        # 转换数据类型
-        for item in data:
-            # 假设我们需要将用户ID转换为字符串类型
-            item['user'] = str(item['user'])
-            # 其他数据类型转换可以根据需要添加
-        return data
+        return preprocessed_data

+ 12 - 17
user_events/main.py

@@ -22,41 +22,36 @@ from monitor import GenerateReport
 
 # 主函数
 def main():
-    # 实例化数据收集模块
+    # 数据收集
     collector = DataCollector.DataCollector()
     raw_data = collector.collect()
 
-    # 实例化数据预处理模块
+    # 数据预处理
     preprocessor = DataProcessor.DataPreprocessor(raw_data)
     processed_data = preprocessor.preprocess()
 
-    # 实例化行为分析模块
+    # 行为分析
     analyzer = BehaviorAnalyzer.BehaviorAnalyzer(processed_data)
-    behavior_data, action_stats, product_stats, channel_stats  = analyzer.analyze()
+    behavior_data, process_stats, action_stats, product_stats, channel_stats = analyzer.analyze()
 
-    # 实例化用户分群模块
+    # 用户分群
     segmenter = SegmentUsers.UserSegmentation(behavior_data)
     user_segments = segmenter.segment()
 
-    # 实例化异常检测模块
+    # 异常检测
     detector = DetectAnomalies.AnomalyDetector(behavior_data)
     anomalies = detector.detect()
 
-    # 实例化报告生成模块
-    generator = GenerateReport.ReportGenerator(processed_data, anomalies, user_segments, action_stats, product_stats, channel_stats)
+    # 报告生成
+    generator = GenerateReport.ReportGenerator(processed_data, anomalies, user_segments, process_stats, action_stats, product_stats,
+                                channel_stats)
     report = generator.generate()
 
-    # 打印报告摘要
-    print("Report Summary:")
-    print(f"Total Users: {report['total_users']}")
-    print(f"Total Actions: {report['total_actions']}")
-    print(f"Anomalies: {report['anomalies']}")
-    print(f"User Segments: {report['user_segments']}")
-
-    # 实例化实时监控模块
+    # 实时监控
     monitor = Monitor.RealTimeMonitor(processed_data)
-    print("Starting real-time monitoring...")
+    print("\nStarting real-time monitoring...")
     monitor.monitor()
 
+
 if __name__ == "__main__":
     main()

+ 69 - 53
user_events/monitor/GenerateReport.py

@@ -8,80 +8,96 @@ import time
 import random
 from collections import defaultdict
 import matplotlib.pyplot as plt
+import numpy as np
 
-plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
-plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号
-
-# 报告生成模块(续)
+# 报告生成模块
 class ReportGenerator:
-    def __init__(self, data, anomalies, segments, action_stats, product_stats, channel_stats):
+    def __init__(self, data, anomalies, segments, process_stats, action_stats, product_stats, channel_stats):
         self.data = data
         self.anomalies = anomalies
         self.segments = segments
+        self.process_stats = process_stats
         self.action_stats = action_stats
         self.product_stats = product_stats
         self.channel_stats = channel_stats
 
     def generate(self):
         # 生成用户行为报告,并展示成图表形式
-
         report = {
-            'total_users': len(set([item['user'] for item in self.data])),
-            'total_actions': len(self.data),
+            'total_users': len(set([user_actions['user'] for user_actions in self.data])),
+            'total_actions': sum(len(user_actions['actions']) for user_actions in self.data),
             'anomalies': self.anomalies,
             'user_segments': self.segments,
+            'process_stats': self.process_stats,
             'action_stats': self.action_stats,
             'product_stats': self.product_stats,
             'channel_stats': self.channel_stats
         }
-        self.plot_action_stats(self.action_stats)
-        self.plot_product_stats(self.product_stats)
-        self.plot_channel_stats(self.channel_stats)
+
+        # 打印报告摘要
+        print("Report Summary:")
+        print(f"Total Users: {report['total_users']}")
+        print(f"Total Actions: {report['total_actions']}")
+        print(f"Anomalies: {report['anomalies']}")
+        print(f"User Segments: {report['user_segments']}")
+
+        # 展示每个流程的行为次数漏斗图
+        for process, actions in self.process_stats.items():
+            self.plot_bar(actions, f"{process} Funnel")
+
+        # 展示行为统计图表
+        self.plot_stats(self.action_stats, "Action Statistics")
+
+        # 展示产品统计图表
+        self.plot_stats(self.product_stats, "Product Statistics")
+
+        # 展示渠道统计图表
+        self.plot_stats(self.channel_stats, "Channel Statistics")
+
         return report
 
-    def plot_action_stats(self, action_stats):
-        if not action_stats:
-            print("No data for action stats.")
-            return
-        # 生成行为统计图表
-        actions = list(action_stats.keys())
-        counts = list(action_stats.values())
-        plt.figure(figsize=(12, 8))  # 调整图表大小
-        plt.bar(actions, counts, color='skyblue')
-        plt.xlabel('行为', fontsize=12)  # 调整字体大小
-        plt.ylabel('次数', fontsize=12)
-        plt.title('行为统计', fontsize=14)
-        plt.xticks(rotation=45, fontsize=10)  # 旋转刻度标签并调整字体大小
-        plt.yticks(fontsize=10)
-        plt.tight_layout()  # 自动调整布局
-        # 如果需要,可以手动调整边距
-        # plt.subplots_adjust(left=0.1, right=0.9, top=0.9, bottom=0.1)
+    def plot_stats(self, stats, title):
+        # 生成并展示统计图表
+        labels, values = zip(*stats.items())
+        plt.figure(figsize=(12, 6))  # 增加图表宽度
+        plt.bar(labels, values)
+        plt.title(title)
+        plt.xticks(rotation=45, ha='right', fontsize=10)  # 旋转标签,右对齐,减小字体大小
+        plt.tight_layout()  # 调整布局以避免标签被截断
         plt.show()
 
-    def plot_product_stats(self, product_stats):
-        if not product_stats:
-            print("No data for product stats.")
-            return
-        # 生成产品统计图表
-        products = list(product_stats.keys())
-        counts = list(product_stats.values())
-        plt.figure(figsize=(10, 6))
-        plt.bar(products, counts, color='lightgreen')
-        plt.xlabel('产品')
-        plt.ylabel('次数')
-        plt.title('产品统计')
+    def plot_bar(self, actions, title):
+        # 生成并展示逆序的水平柱状图
+        labels = list(actions.keys())
+        values = list(actions.values())
+
+        # 根据值对标签和值进行排序(从小到大,以实现逆序显示)
+        sorted_indices = np.argsort(values)
+        labels = np.array(labels)[sorted_indices].tolist()
+        values = np.array(values)[sorted_indices].tolist()
+
+        # 水平柱状图的矩形位置
+        positions = np.arange(len(labels))
+
+        fig, ax = plt.subplots(figsize=(10, 6))
+        ax.barh(positions, values, color='teal')
+
+        # 在矩形旁边添加具体数值
+        for i, value in enumerate(values):
+            ax.text(value, positions[i], str(value),
+                    ha='right', va='center', color='black', fontsize=10)
+
+        # 设置横坐标和纵坐标
+        ax.set_yticks(positions)
+        ax.set_yticklabels(labels)
+        ax.set_xlabel('Number of Actions')
+        ax.set_ylabel('Behavior')
+
+        ax.set_title(title)
+
+        plt.tight_layout()  # 调整布局
         plt.show()
 
-    def plot_channel_stats(self, channel_stats):
-        if not channel_stats:
-            print("No data for channel stats.")
-            return
-        # 生成渠道统计图表
-        channels = list(channel_stats.keys())
-        counts = list(channel_stats.values())
-        plt.figure(figsize=(10, 6))
-        plt.bar(channels, counts, color='orange')
-        plt.xlabel('渠道')
-        plt.ylabel('次数')
-        plt.title('渠道统计')
-        plt.show()
+# 设置matplotlib支持中文
+plt.rcParams['font.sans-serif'] = ['SimHei']  # 用来正常显示中文标签
+plt.rcParams['axes.unicode_minus'] = False  # 用来正常显示负号

+ 10 - 7
user_events/monitor/Monitor.py

@@ -8,12 +8,15 @@ import time
 
 # 实时监控模块
 class RealTimeMonitor:
-    def __init__(self, data_stream):
-        self.data_stream = data_stream
+    def __init__(self, data):
+        self.data = data
 
     def monitor(self):
-        # 模拟实时监控数据流
-        for data in self.data_stream:
-            print(f"Monitoring: {data}")
-            # 这里可以添加实时处理逻辑
-            time.sleep(0.5)  # 模拟实时数据流的时间间隔
+        # 简单示例:打印实时数据流
+        for user_actions in self.data:
+            user = user_actions['user']
+            process = user_actions['process']
+            actions = user_actions['actions']
+            product = user_actions['product']
+            channel = user_actions['channel']
+            print(f"User: {user}, Process: {process}, Product: {product}, Channel: {channel}, Actions: {actions}")

+ 0 - 48
user_events/visual/Visualizer.py

@@ -1,48 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-@author: zsc
-@time: 2024/11/18
-@desc:  可视化展示
-"""
-
-# 可视化展示模块
-import matplotlib.pyplot as plt
-
-
-class Visualizer:
-    """
-    可视化展示模块:将分析结果以图表形式展示。
-    """
-
-    def generate_statistics(self, action_counts):
-        """
-        生成统计数据图表:展示用户行为的频率分布。
-
-        :param action_counts: 包含行为频率的Series对象。
-        """
-        # 使用matplotlib生成柱状图
-        action_counts.plot(kind='bar', color='skyblue')
-        plt.title('User Action Frequency Distribution')
-        plt.xlabel('Action Types')
-        plt.ylabel('Frequency')
-        plt.xticks(rotation=45)
-        plt.tight_layout()
-        plt.show()
-
-    def draw_heatmap(self, df):
-        """
-        绘制热力图:展示用户行为的二维频率分布。
-
-        :param df: 包含用户行为数据的DataFrame。
-        """
-        # 此处可以添加绘制热力图的逻辑,例如使用seaborn库
-        pass
-
-    def plot_user_journey(self, df):
-        """
-        绘制用户旅程图:展示用户在应用中的行为路径。
-
-        :param df: 包含用户行为数据的DataFrame。
-        """
-        # 此处可以添加绘制用户旅程图的逻辑
-        pass