strategy_parse.py 4.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155
  1. # -*- coding: utf-8 -*-
  2. """
  3. @author: yq
  4. @time: 2024/12/18
  5. @desc: 策略流节点解析
  6. """
  7. import json
  8. import re
  9. import time
  10. from tqdm import tqdm
  11. import pandas as pd
  12. from PIL import Image
  13. from openpyxl import load_workbook
  14. from commom import f_get_save_path, call_llm, f_file_upload, GeneralException
  15. from enums import ResultCodesEnum
  16. from prompt import f_get_prompt_parse_node, f_get_prompt_parse_flow, f_get_prompt_parse_flow_image
  17. def _f_parse_flow_image(ws, node_list: list):
  18. image = ws._images[0]
  19. img = Image.open(image.ref).convert("RGB")
  20. save_path = f_get_save_path("流程图.png")
  21. img.save(save_path)
  22. time.sleep(1)
  23. file_id = f_file_upload(save_path)
  24. prompt = f_get_prompt_parse_flow_image(node_list)
  25. print(prompt)
  26. prompt = [
  27. {
  28. "type": "text",
  29. "text": prompt
  30. },
  31. {
  32. "type": "image",
  33. "file_id": file_id
  34. }
  35. ]
  36. prompt = json.dumps(prompt, ensure_ascii=False)
  37. llm_answer = call_llm(prompt, "object_string")
  38. print(llm_answer)
  39. code = re.findall(r"```python\n(.*)\n```", llm_answer, flags=re.DOTALL)[0]
  40. save_path = f_get_save_path("flow.py")
  41. with open(save_path, mode="w", encoding="utf8") as f:
  42. f.write(code)
  43. save_path = f_get_save_path("__init__.py")
  44. with open(save_path, mode="w", encoding="utf8") as f:
  45. f.write("")
  46. def _f_parse_flow(node_list: list, df: pd.DataFrame):
  47. flow = ""
  48. for _, row in df.iterrows():
  49. strategy = row["策略流描述"]
  50. flow = f"{flow}{strategy}\n"
  51. flow = flow.strip()
  52. prompt = f_get_prompt_parse_flow(node_list, flow)
  53. print(prompt)
  54. llm_answer = call_llm(prompt)
  55. print(llm_answer)
  56. code = re.findall(r"```python\n(.*)\n```", llm_answer, flags=re.DOTALL)[0]
  57. save_path = f_get_save_path("flow.py")
  58. with open(save_path, mode="w", encoding="utf8") as f:
  59. f.write(code)
  60. save_path = f_get_save_path("__init__.py")
  61. with open(save_path, mode="w", encoding="utf8") as f:
  62. f.write("")
  63. def _f_parse_node(df: pd.DataFrame, node_name):
  64. rules = ""
  65. for idx, row in df.iterrows():
  66. var_name = row["变量"]
  67. var_name = var_name.replace("\n", " ")
  68. rule_content = row["逻辑"]
  69. rule_content = rule_content.replace("\n", " ")
  70. rule_out = row["输出"]
  71. notes_output = row["输出备注"]
  72. if notes_output is None or notes_output != notes_output:
  73. notes_output = ""
  74. else:
  75. notes_output = notes_output.replace("\n", " ")
  76. notes_output = f" 结果备注: {notes_output}"
  77. notes_input = row["输入备注"]
  78. if notes_input is None or notes_input != notes_input:
  79. notes_input = ""
  80. else:
  81. notes_input = notes_input.replace("\n", " ")
  82. notes_input = f" 变量备注: {notes_input}"
  83. rules = f"{rules}规则{idx + 1}: 变量:{var_name} 逻辑:{rule_content} 输出:{rule_out}{notes_input}{notes_output}\n"
  84. default_output = list(df["默认输出"])[0]
  85. if default_output is None or default_output != default_output:
  86. default_output = ""
  87. else:
  88. default_output = str(default_output).replace("\n", " ")
  89. default_output = f"{default_output}"
  90. prompt = f_get_prompt_parse_node(node_name, rules, default_output)
  91. print(prompt)
  92. llm_answer = call_llm(prompt)
  93. code = re.findall(r"```python\n(.*)\n```", llm_answer, flags=re.DOTALL)[0]
  94. func_name = re.findall(r"def (.*)\(data", code)[0]
  95. save_path = f_get_save_path(f"{func_name}.py")
  96. print(code)
  97. with open(save_path, mode="w", encoding="utf8") as f:
  98. f.write(code)
  99. return func_name, code
  100. def f_parse_strategy_image(file_path):
  101. wb = load_workbook(file_path)
  102. excel = pd.ExcelFile(file_path)
  103. sheet_names = excel.sheet_names
  104. if "流程图" not in sheet_names:
  105. GeneralException(ResultCodesEnum.NOT_FOUND, message=f"sheet【流程图】不存在")
  106. node_list = []
  107. for node_name in tqdm(sheet_names):
  108. if node_name == "流程图":
  109. continue
  110. df = excel.parse(sheet_name=node_name)
  111. func_name, code = _f_parse_node(df, node_name)
  112. node_list.append((node_name, func_name, code))
  113. _f_parse_flow_image(wb["流程图"], node_list)
  114. wb.close()
  115. excel.close()
  116. def f_parse_strategy(file_path):
  117. excel = pd.ExcelFile(file_path)
  118. sheet_names = excel.sheet_names
  119. if "流程" not in sheet_names:
  120. GeneralException(ResultCodesEnum.NOT_FOUND, message=f"sheet【流程】不存在")
  121. node_list = []
  122. for node_name in tqdm(sheet_names):
  123. if node_name == "流程":
  124. continue
  125. df = excel.parse(sheet_name=node_name)
  126. func_name, code = _f_parse_node(df, node_name)
  127. node_list.append((node_name, func_name, code))
  128. _f_parse_flow(node_list, excel.parse(sheet_name="流程"))
  129. excel.close()
  130. if __name__ == "__main__":
  131. f_parse_strategy("./cache/策略节点配置3demo.xlsx")