From 1c38601fe063232038af1988176b24959ea69ecd Mon Sep 17 00:00:00 2001 From: laoboli <1293528695@qq.com> Date: Wed, 21 Jan 2026 16:44:44 +0800 Subject: [PATCH] fix: util. --- util/docx_util.go | 76 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) create mode 100644 util/docx_util.go diff --git a/util/docx_util.go b/util/docx_util.go new file mode 100644 index 0000000..615ec35 --- /dev/null +++ b/util/docx_util.go @@ -0,0 +1,76 @@ +package util + +import ( + "fmt" + "os" + "strings" + + docx "github.com/fumiama/go-docx" +) + +func DocxToStructuredPrompt(filename string) (string, error) { + f, err := os.Open(filename) + if err != nil { + return "", err + } + defer f.Close() + + fi, err := f.Stat() + if err != nil { + return "", err + } + + doc, err := docx.Parse(f, fi.Size()) + if err != nil { + return "", err + } + + var sb strings.Builder + sb.WriteString(fmt.Sprintf("# 文件:%s\n\n", filename)) + + for _, item := range doc.Document.Body.Items { + switch v := item.(type) { + case *docx.Paragraph: + // 直接用 fmt.Sprint 利用庫的 Stringer + text := fmt.Sprint(v) + text = strings.TrimSpace(text) + if text != "" { + sb.WriteString(text + "\n\n") + } + + case *docx.Table: + sb.WriteString("## 表格\n") + + // 先印表頭(可選) + sb.WriteString("| ") + + // 假設第一行是表頭(很多文件如此),或全部當內容 + for i, row := range v.TableRows { + var cells []string + for _, cell := range row.TableCells { + // 這裡是重點:cell 本身沒有 String(),但可以遍歷它的 Paragraphs + var cellText strings.Builder + for _, p := range cell.Paragraphs { + cellText.WriteString(fmt.Sprint(p)) + cellText.WriteString(" ") + } + cells = append(cells, strings.TrimSpace(cellText.String())) + } + + sb.WriteString(strings.Join(cells, " | ")) + sb.WriteString(" |\n") + + // 如果想加 markdown 表頭分隔線(只在第一行後加) + if i == 0 { + sb.WriteString("| " + strings.Repeat("--- | ", len(cells)) + "\n") + } + } + sb.WriteString("\n") + + default: + // 忽略圖片、頁首等 + } + } + + return sb.String(), nil +}