utils.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. # -*- coding:utf-8 -*-
  2. """
  3. @author: isaacqyang
  4. @time: 2023/12/28
  5. @desc:
  6. """
  7. import json
  8. import os
  9. import time
  10. from urllib.parse import unquote
  11. from docx import Document
  12. from docx.enum.text import WD_ALIGN_PARAGRAPH
  13. import lark_oapi as lark
  14. import tos
  15. from lark_oapi.api.drive.v1 import CreateExportTaskRequest, ExportTask, CreateExportTaskResponse, GetExportTaskRequest, \
  16. GetExportTaskResponse, DownloadExportTaskRequest, DownloadExportTaskResponse
  17. from tos import HttpMethodType
  18. from config import BaseConfig
  19. def f_upload_file(save_path) -> str:
  20. ak = BaseConfig.cos_access_key_id
  21. sk = BaseConfig.cos_secret_access_key
  22. endpoint = BaseConfig.endpoint
  23. region = BaseConfig.region
  24. bucket_name = BaseConfig.bucket_name
  25. try:
  26. # 创建 TosClientV2 对象,对桶和对象的操作都通过 TosClientV2 实现
  27. client = tos.TosClientV2(ak, sk, endpoint, region)
  28. object_key = os.path.basename(save_path)
  29. client.put_object_from_file(bucket_name, object_key, save_path)
  30. pre_signed_url_output = client.pre_signed_url(HttpMethodType.Http_Method_Get, bucket_name, object_key)
  31. return pre_signed_url_output.signed_url
  32. except tos.exceptions.TosClientError as e:
  33. # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
  34. print('fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
  35. except tos.exceptions.TosServerError as e:
  36. # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
  37. print('fail with server error, code: {}'.format(e.code))
  38. # request id 可定位具体问题,强烈建议日志中保存
  39. print('error with request id: {}'.format(e.request_id))
  40. print('error with message: {}'.format(e.message))
  41. print('error with http code: {}'.format(e.status_code))
  42. print('error with ec: {}'.format(e.ec))
  43. print('error with request url: {}'.format(e.request_url))
  44. except Exception as e:
  45. print('fail with unknown error: {}'.format(e))
  46. def create_word_table(json_data):
  47. # 将JSON字符串解析为Python对象
  48. json_data = json.loads(json_data)
  49. # 创建 Word 文档对象
  50. document = Document()
  51. # 创建表格
  52. table = document.add_table(rows=len(json_data['data']), cols=len(json_data['data'][0]))
  53. # 填充表格数据
  54. for i, row in enumerate(json_data['data']):
  55. for j, cell_value in enumerate(row):
  56. cell = table.cell(i, j)
  57. cell.text = cell_value.strip() # 去除单元格文本前后的空白字符
  58. # 设置表格样式
  59. table.style = 'Table Grid'
  60. for row in table.rows:
  61. for cell in row.cells:
  62. cell.paragraphs[0].alignment = WD_ALIGN_PARAGRAPH.CENTER
  63. # 合并单元格并处理换行问题
  64. for merge in json_data.get('merges', []): # 增加空值处理
  65. start_cell = table.cell(merge['start_row'], merge['start_column'])
  66. end_cell = table.cell(merge['end_row'], merge['end_column'])
  67. start_cell.merge(end_cell)
  68. # 合并后,将所有文本合并到一个段落中
  69. all_text = ""
  70. for paragraph in start_cell.paragraphs:
  71. all_text += paragraph.text
  72. # 清除原有段落
  73. for paragraph in start_cell.paragraphs:
  74. p = paragraph._element
  75. p.getparent().remove(p)
  76. p._p = p._element = None
  77. # 添加一个新的段落,包含所有文本
  78. start_cell.add_paragraph(all_text)
  79. return table
  80. def f_doc_export(token: str, request_id: str, data: object) -> str:
  81. # 飞书在线文档转word
  82. app_id = BaseConfig.app_id
  83. app_secret = BaseConfig.app_secret
  84. word_save_dir = BaseConfig.word_save_dir
  85. client = lark.Client.builder() \
  86. .app_id(app_id) \
  87. .app_secret(app_secret) \
  88. .log_level(lark.LogLevel.DEBUG) \
  89. .build()
  90. # 构造请求对象
  91. request1: CreateExportTaskRequest = CreateExportTaskRequest.builder() \
  92. .request_body(ExportTask.builder()
  93. .file_extension("docx")
  94. .token(token)
  95. .type("docx")
  96. .build()) \
  97. .build()
  98. # 发起请求
  99. response1: CreateExportTaskResponse = client.drive.v1.export_task.create(request1)
  100. # 处理失败返回
  101. if not response1.success():
  102. lark.logger.error(
  103. f"client.drive.v1.export_task.create failed, code: {response1.code}, msg: {response1.msg}, log_id: {response1.get_log_id()}, resp: \n{json.dumps(json.loads(response1.raw.content), indent=4, ensure_ascii=False)}")
  104. return
  105. # 处理业务结果
  106. lark.logger.info(lark.JSON.marshal(response1.data, indent=4))
  107. ticket = response1.data.ticket
  108. time.sleep(5)
  109. # 构造请求对象
  110. request2: GetExportTaskRequest = GetExportTaskRequest.builder() \
  111. .ticket(ticket) \
  112. .token(token) \
  113. .build()
  114. # 发起请求
  115. response2: GetExportTaskResponse = client.drive.v1.export_task.get(request2)
  116. # 处理失败返回
  117. if not response2.success():
  118. lark.logger.error(
  119. f"client.drive.v1.export_task.get failed, code: {response2.code}, msg: {response2.msg}, log_id: {response2.get_log_id()}, resp: \n{json.dumps(json.loads(response2.raw.content), indent=4, ensure_ascii=False)}")
  120. return
  121. # 处理业务结果
  122. lark.logger.info(lark.JSON.marshal(response2.data, indent=4))
  123. file_token = response2.data.result.file_token
  124. # 构造请求对象
  125. request3: DownloadExportTaskRequest = DownloadExportTaskRequest.builder() \
  126. .file_token(file_token) \
  127. .build()
  128. # 发起请求
  129. response3: DownloadExportTaskResponse = client.drive.v1.export_task.download(request3)
  130. # 处理失败返回
  131. if not response3.success():
  132. lark.logger.error(
  133. f"client.drive.v1.export_task.download failed, code: {response3.code}, msg: {response3.msg}, log_id: {response3.get_log_id()}")
  134. return
  135. # 处理业务结果
  136. file_name = unquote(response3.file_name)
  137. save_path = os.path.join(word_save_dir, file_name)
  138. with open(save_path, "wb") as f:
  139. f.write(response3.file.read())
  140. time.sleep(2)
  141. # 操作word
  142. if data is not None:
  143. doc = Document(save_path)
  144. placeholder = "{TABLE_PLACEHOLDER}"
  145. for paragraph in doc.paragraphs:
  146. if not placeholder in paragraph.text:
  147. continue
  148. # 清除占位符
  149. for run in paragraph.runs:
  150. run.text = run.text.replace(placeholder, "")
  151. # 生成表格(调用改造后的 create_word_table 函数,传入字符串)
  152. table = create_word_table(data)
  153. paragraph._element.addnext(table._tbl)
  154. doc.save(save_path)
  155. time.sleep(2)
  156. word_download_url = f_upload_file(save_path)
  157. return word_download_url
  158. if __name__ == "__main__":
  159. f_doc_export('YKNBdbs10oA3pCxTdnAczcvOnxc')
  160. # f_upload_file("/root/project/coze_znjd/大模型企业调查报告/1.docx")