utils.py 5.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. # -*- coding:utf-8 -*-
  2. """
  3. @author: isaacqyang
  4. @time: 2023/12/28
  5. @desc:
  6. """
  7. import json
  8. import os
  9. import time
  10. from urllib.parse import unquote
  11. import lark_oapi as lark
  12. import tos
  13. from docx import Document
  14. from lark_oapi.api.drive.v1 import CreateExportTaskRequest, ExportTask, CreateExportTaskResponse, GetExportTaskRequest, \
  15. GetExportTaskResponse, DownloadExportTaskRequest, DownloadExportTaskResponse
  16. from tos import HttpMethodType
  17. from config import BaseConfig
  18. def f_upload_file(save_path) -> str:
  19. ak = BaseConfig.cos_access_key_id
  20. sk = BaseConfig.cos_secret_access_key
  21. endpoint = BaseConfig.endpoint
  22. region = BaseConfig.region
  23. bucket_name = BaseConfig.bucket_name
  24. try:
  25. # 创建 TosClientV2 对象,对桶和对象的操作都通过 TosClientV2 实现
  26. client = tos.TosClientV2(ak, sk, endpoint, region)
  27. object_key = os.path.basename(save_path)
  28. client.put_object_from_file(bucket_name, object_key, save_path)
  29. pre_signed_url_output = client.pre_signed_url(HttpMethodType.Http_Method_Get, bucket_name, object_key)
  30. return pre_signed_url_output.signed_url
  31. except tos.exceptions.TosClientError as e:
  32. # 操作失败,捕获客户端异常,一般情况为非法请求参数或网络异常
  33. print('fail with client error, message:{}, cause: {}'.format(e.message, e.cause))
  34. except tos.exceptions.TosServerError as e:
  35. # 操作失败,捕获服务端异常,可从返回信息中获取详细错误信息
  36. print('fail with server error, code: {}'.format(e.code))
  37. # request id 可定位具体问题,强烈建议日志中保存
  38. print('error with request id: {}'.format(e.request_id))
  39. print('error with message: {}'.format(e.message))
  40. print('error with http code: {}'.format(e.status_code))
  41. print('error with ec: {}'.format(e.ec))
  42. print('error with request url: {}'.format(e.request_url))
  43. except Exception as e:
  44. print('fail with unknown error: {}'.format(e))
  45. def f_doc_export(token: str) -> str:
  46. # 飞书在线文档转word
  47. app_id = BaseConfig.app_id
  48. app_secret = BaseConfig.app_secret
  49. word_save_dir = BaseConfig.word_save_dir
  50. client = lark.Client.builder() \
  51. .app_id(app_id) \
  52. .app_secret(app_secret) \
  53. .log_level(lark.LogLevel.DEBUG) \
  54. .build()
  55. # 构造请求对象
  56. request1: CreateExportTaskRequest = CreateExportTaskRequest.builder() \
  57. .request_body(ExportTask.builder()
  58. .file_extension("docx")
  59. .token(token)
  60. .type("docx")
  61. .build()) \
  62. .build()
  63. # 发起请求
  64. response1: CreateExportTaskResponse = client.drive.v1.export_task.create(request1)
  65. # 处理失败返回
  66. if not response1.success():
  67. lark.logger.error(
  68. f"client.drive.v1.export_task.create failed, code: {response1.code}, msg: {response1.msg}, log_id: {response1.get_log_id()}, resp: \n{json.dumps(json.loads(response1.raw.content), indent=4, ensure_ascii=False)}")
  69. return
  70. # 处理业务结果
  71. lark.logger.info(lark.JSON.marshal(response1.data, indent=4))
  72. ticket = response1.data.ticket
  73. time.sleep(5)
  74. # 构造请求对象
  75. request2: GetExportTaskRequest = GetExportTaskRequest.builder() \
  76. .ticket(ticket) \
  77. .token(token) \
  78. .build()
  79. # 发起请求
  80. response2: GetExportTaskResponse = client.drive.v1.export_task.get(request2)
  81. # 处理失败返回
  82. if not response2.success():
  83. lark.logger.error(
  84. f"client.drive.v1.export_task.get failed, code: {response2.code}, msg: {response2.msg}, log_id: {response2.get_log_id()}, resp: \n{json.dumps(json.loads(response2.raw.content), indent=4, ensure_ascii=False)}")
  85. return
  86. # 处理业务结果
  87. lark.logger.info(lark.JSON.marshal(response2.data, indent=4))
  88. file_token = response2.data.result.file_token
  89. # 构造请求对象
  90. request3: DownloadExportTaskRequest = DownloadExportTaskRequest.builder() \
  91. .file_token(file_token) \
  92. .build()
  93. # 发起请求
  94. response3: DownloadExportTaskResponse = client.drive.v1.export_task.download(request3)
  95. # 处理失败返回
  96. if not response3.success():
  97. lark.logger.error(
  98. f"client.drive.v1.export_task.download failed, code: {response3.code}, msg: {response3.msg}, log_id: {response3.get_log_id()}")
  99. return
  100. # 处理业务结果
  101. file_name = unquote(response3.file_name)
  102. save_path = os.path.join(word_save_dir, file_name)
  103. with open(save_path, "wb") as f:
  104. f.write(response3.file.read())
  105. # 操作word
  106. doc = Document(save_path)
  107. placeholder = ""
  108. for paragraph in doc.paragraphs:
  109. if not placeholder in paragraph.text:
  110. continue
  111. # 清除占位符
  112. for run in paragraph.runs:
  113. run.text = run.text.replace(placeholder, "")
  114. table = doc.add_table(rows=1, cols=[])
  115. paragraph._element.addnext(table._element)
  116. doc.save(save_path)
  117. word_download_url = f_upload_file(save_path)
  118. return word_download_url
  119. if __name__ == "__main__":
  120. f_doc_export('YKNBdbs10oA3pCxTdnAczcvOnxc')
  121. # f_upload_file("/root/project/coze_znjd/大模型企业调查报告/1.docx")