utils.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248
  1. # -*- coding:utf-8 -*-
  2. """
  3. @author: isaacqyang
  4. @time: 2023/12/28
  5. @desc:
  6. """
  7. import json
  8. import os
  9. import time
  10. from urllib.parse import unquote
  11. from docx import Document
  12. from docx.enum.text import WD_ALIGN_PARAGRAPH
  13. import lark_oapi as lark
  14. import tos
  15. from lark_oapi.api.drive.v1 import CreateExportTaskRequest, ExportTask, CreateExportTaskResponse, GetExportTaskRequest, \
  16. GetExportTaskResponse, DownloadExportTaskRequest, DownloadExportTaskResponse
  17. from tos import HttpMethodType
  18. from config import BaseConfig
  19. def f_upload_file(save_path) -> str:
  20. ak = BaseConfig.cos_access_key_id
  21. sk = BaseConfig.cos_secret_access_key
  22. endpoint = BaseConfig.endpoint
  23. region = BaseConfig.region
  24. bucket_name = BaseConfig.bucket_name
  25. try:
  26. # 创建 TosClientV2 对象,对桶和对象的操作都通过 TosClientV2 实现
  27. client = tos.TosClientV2(ak, sk, endpoint, region)
  28. object_key = os.path.basename(save_path)
  29. client.put_object_from_file(bucket_name, object_key, save_path)
  30. pre_signed_url_output = client.pre_signed_url(HttpMethodType.Http_Method_Get, bucket_name, object_key)
  31. return pre_signed_url_output.signed_url
  32. except tos.exceptions.TosClientError as e:
  33. # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
  34. print('fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
  35. except tos.exceptions.TosServerError as e:
  36. # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
  37. print('fail with server error, code: {}'.format(e.code))
  38. # request id 可定位具体问题,强烈建议日志中保存
  39. print('error with request id: {}'.format(e.request_id))
  40. print('error with message: {}'.format(e.message))
  41. print('error with http code: {}'.format(e.status_code))
  42. print('error with ec: {}'.format(e.ec))
  43. print('error with request url: {}'.format(e.request_url))
  44. except Exception as e:
  45. print('fail with unknown error: {}'.format(e))
  46. def create_word_table(json_data):
  47. # 将JSON字符串解析为Python对象
  48. json_data = json.loads(json_data)
  49. # 创建 Word 文档对象
  50. document = Document()
  51. # 创建表格
  52. table = document.add_table(rows=len(json_data['data']), cols=len(json_data['data'][0]))
  53. # 填充表格数据
  54. for i, row in enumerate(json_data['data']):
  55. for j, cell_value in enumerate(row):
  56. cell = table.cell(i, j)
  57. cell.text = cell_value.strip() # 去除单元格文本前后的空白字符
  58. # 设置表格样式
  59. table.style = 'Table Grid'
  60. for row in table.rows:
  61. for cell in row.cells:
  62. cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
  63. # 合并单元格并处理换行问题
  64. for merge in json_data.get('merges', []): # 增加空值处理
  65. start_cell = table.cell(merge['start_row'], merge['start_column'])
  66. end_cell = table.cell(merge['end_row'], merge['end_column'])
  67. start_cell.merge(end_cell)
  68. # 合并后,将所有文本合并到一个段落中
  69. all_text = ""
  70. for paragraph in start_cell.paragraphs:
  71. all_text += paragraph.text
  72. # 清除原有段落
  73. for paragraph in start_cell.paragraphs:
  74. p = paragraph._element
  75. p.getparent().remove(p)
  76. p._p = p._element = None
  77. # 添加一个新的段落,包含所有文本
  78. start_cell.add_paragraph(all_text)
  79. return table
  80. def f_doc_export(token: str, request_id: str, data: object) -> str:
  81. # 飞书在线文档转word
  82. app_id = BaseConfig.app_id
  83. app_secret = BaseConfig.app_secret
  84. word_save_dir = BaseConfig.word_save_dir
  85. client = lark.Client.builder() \
  86. .app_id(app_id) \
  87. .app_secret(app_secret) \
  88. .log_level(lark.LogLevel.DEBUG) \
  89. .build()
  90. # 构造请求对象
  91. request1: CreateExportTaskRequest = CreateExportTaskRequest.builder() \
  92. .request_body(ExportTask.builder()
  93. .file_extension("docx")
  94. .token(token)
  95. .type("docx")
  96. .build()) \
  97. .build()
  98. # 发起请求
  99. response1: CreateExportTaskResponse = client.drive.v1.export_task.create(request1)
  100. # 处理失败返回
  101. if not response1.success():
  102. lark.logger.error(
  103. f"client.drive.v1.export_task.create failed, code: {response1.code}, msg: {response1.msg}, log_id: {response1.get_log_id()}, resp: \n{json.dumps(json.loads(response1.raw.content), indent=4, ensure_ascii=False)}")
  104. return
  105. # 处理业务结果
  106. lark.logger.info(lark.JSON.marshal(response1.data, indent=4))
  107. ticket = response1.data.ticket
  108. time.sleep(5)
  109. # 构造请求对象
  110. request2: GetExportTaskRequest = GetExportTaskRequest.builder() \
  111. .ticket(ticket) \
  112. .token(token) \
  113. .build()
  114. # 发起请求
  115. response2: GetExportTaskResponse = client.drive.v1.export_task.get(request2)
  116. # 处理失败返回
  117. if not response2.success():
  118. lark.logger.error(
  119. f"client.drive.v1.export_task.get failed, code: {response2.code}, msg: {response2.msg}, log_id: {response2.get_log_id()}, resp: \n{json.dumps(json.loads(response2.raw.content), indent=4, ensure_ascii=False)}")
  120. return
  121. # 处理业务结果
  122. lark.logger.info(lark.JSON.marshal(response2.data, indent=4))
  123. file_token = response2.data.result.file_token
  124. # 构造请求对象
  125. request3: DownloadExportTaskRequest = DownloadExportTaskRequest.builder() \
  126. .file_token(file_token) \
  127. .build()
  128. # 发起请求
  129. response3: DownloadExportTaskResponse = client.drive.v1.export_task.download(request3)
  130. # 处理失败返回
  131. if not response3.success():
  132. lark.logger.error(
  133. f"client.drive.v1.export_task.download failed, code: {response3.code}, msg: {response3.msg}, log_id: {response3.get_log_id()}")
  134. return
  135. # 处理业务结果
  136. file_name = unquote(response3.file_name)
  137. save_path = os.path.join(word_save_dir, file_name)
  138. with open(save_path, "wb") as f:
  139. f.write(response3.file.read())
  140. time.sleep(2)
  141. # # 操作word
  142. # if data is not None:
  143. # doc = Document(save_path)
  144. # placeholder = "{TABLE_PLACEHOLDER}"
  145. # for paragraph in doc.paragraphs:
  146. # if not placeholder in paragraph.text:
  147. # continue
  148. # # 清除占位符
  149. # for run in paragraph.runs:
  150. # run.text = run.text.replace(placeholder, "")
  151. # # 生成表格(调用改造后的 create_word_table 函数,传入字符串)
  152. # table = create_word_table(data)
  153. # paragraph._element.addnext(table._tbl)
  154. # doc.save(save_path)
  155. # time.sleep(2)
  156. #
  157. # doc = Document(save_path)
  158. # placeholder_base = "{TABLE_PLACEHOLDER}" # 基础占位符前缀
  159. # # table_datas = data
  160. #
  161. # # 遍历所有段落,按索引匹配占位符
  162. # for idx, paragraph in enumerate(doc.paragraphs):
  163. # # 构造当前占位符(如{TABLE_PLACEHOLDER}_1, _2, _3...)
  164. # current_placeholder = f"{placeholder_base}_{idx + 1}"
  165. #
  166. # if current_placeholder in paragraph.text:
  167. # # 检查是否有对应索引的表格数据
  168. # if idx < len(data):
  169. # table_json = data[idx] # 获取第idx+1个表格数据
  170. # # 生成表格(假设create_word_table接收JSON字符串或字典)
  171. # if isinstance(table_json, str):
  172. # table = create_word_table(table_json) # 传入JSON字符串
  173. # else:
  174. # table = create_word_table(json.dumps(table_json)) # 传入字典需转为字符串
  175. #
  176. # # 清除占位符文本
  177. # for run in paragraph.runs:
  178. # run.text = run.text.replace(current_placeholder, "")
  179. #
  180. # # 插入表格到占位符位置
  181. # paragraph._element.addnext(table._tbl)
  182. # else:
  183. # print(f"警告:占位符{current_placeholder}无对应表格数据")
  184. #
  185. # doc.save(save_path)
  186. # time.sleep(2)
  187. doc = Document(save_path)
  188. placeholder_prefix = "{TABLE_PLACEHOLDER}_"
  189. placeholder_count = len(data)
  190. for i in range(1, placeholder_count + 1):
  191. placeholder = f"{placeholder_prefix}{i}"
  192. for paragraph in doc.paragraphs:
  193. if placeholder in paragraph.text:
  194. # 清除占位符
  195. for run in paragraph.runs:
  196. run.text = run.text.replace(placeholder, "")
  197. # 生成表格(调用改造后的 create_word_table 函数,传入字符串)
  198. table_data = data[i-1]
  199. table = create_word_table(table_data)
  200. paragraph._element.addnext(table._tbl)
  201. break # 找到并处理一个占位符后,跳出内层循环
  202. doc.save(save_path)
  203. time.sleep(2)
  204. word_download_url = f_upload_file(save_path)
  205. return word_download_url
  206. if __name__ == "__main__":
  207. f_doc_export('YKNBdbs10oA3pCxTdnAczcvOnxc')
  208. # f_upload_file("/root/project/coze_znjd/大模型企业调查报告/1.docx")