package util import ( "fmt" "os" "strings" docx "github.com/fumiama/go-docx" ) func DocxToStructuredPrompt(filename string) (string, error) { f, err := os.Open(filename) if err != nil { return "", err } defer f.Close() fi, err := f.Stat() if err != nil { return "", err } doc, err := docx.Parse(f, fi.Size()) if err != nil { return "", err } var sb strings.Builder sb.WriteString(fmt.Sprintf("# 文件:%s\n\n", filename)) for _, item := range doc.Document.Body.Items { switch v := item.(type) { case *docx.Paragraph: // 直接用 fmt.Sprint 利用庫的 Stringer text := fmt.Sprint(v) text = strings.TrimSpace(text) if text != "" { sb.WriteString(text + "\n\n") } case *docx.Table: sb.WriteString("## 表格\n") // 先印表頭(可選) sb.WriteString("| ") // 假設第一行是表頭(很多文件如此),或全部當內容 for i, row := range v.TableRows { var cells []string for _, cell := range row.TableCells { // 這裡是重點:cell 本身沒有 String(),但可以遍歷它的 Paragraphs var cellText strings.Builder for _, p := range cell.Paragraphs { cellText.WriteString(fmt.Sprint(p)) cellText.WriteString(" ") } cells = append(cells, strings.TrimSpace(cellText.String())) } sb.WriteString(strings.Join(cells, " | ")) sb.WriteString(" |\n") // 如果想加 markdown 表頭分隔線(只在第一行後加) if i == 0 { sb.WriteString("| " + strings.Repeat("--- | ", len(cells)) + "\n") } } sb.WriteString("\n") default: // 忽略圖片、頁首等 } } return sb.String(), nil }