87 lines
2.0 KiB
Go
87 lines
2.0 KiB
Go
package main
|
||
|
||
import (
|
||
"fmt"
|
||
"os"
|
||
"strings"
|
||
|
||
docx "github.com/fumiama/go-docx"
|
||
)
|
||
|
||
func DocxToStructuredPrompt(filename string) (string, error) {
|
||
f, err := os.Open(filename)
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
defer f.Close()
|
||
|
||
fi, err := f.Stat()
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
doc, err := docx.Parse(f, fi.Size())
|
||
if err != nil {
|
||
return "", err
|
||
}
|
||
|
||
var sb strings.Builder
|
||
sb.WriteString(fmt.Sprintf("# 文件:%s\n\n", filename))
|
||
|
||
for _, item := range doc.Document.Body.Items {
|
||
switch v := item.(type) {
|
||
case *docx.Paragraph:
|
||
// 直接用 fmt.Sprint 利用庫的 Stringer
|
||
text := fmt.Sprint(v)
|
||
text = strings.TrimSpace(text)
|
||
if text != "" {
|
||
sb.WriteString(text + "\n\n")
|
||
}
|
||
|
||
case *docx.Table:
|
||
sb.WriteString("## 表格\n")
|
||
|
||
// 先印表頭(可選)
|
||
sb.WriteString("| ")
|
||
|
||
// 假設第一行是表頭(很多文件如此),或全部當內容
|
||
for i, row := range v.TableRows {
|
||
var cells []string
|
||
for _, cell := range row.TableCells {
|
||
// 這裡是重點:cell 本身沒有 String(),但可以遍歷它的 Paragraphs
|
||
var cellText strings.Builder
|
||
for _, p := range cell.Paragraphs {
|
||
cellText.WriteString(fmt.Sprint(p))
|
||
cellText.WriteString(" ")
|
||
}
|
||
cells = append(cells, strings.TrimSpace(cellText.String()))
|
||
}
|
||
|
||
sb.WriteString(strings.Join(cells, " | "))
|
||
sb.WriteString(" |\n")
|
||
|
||
// 如果想加 markdown 表頭分隔線(只在第一行後加)
|
||
if i == 0 {
|
||
sb.WriteString("| " + strings.Repeat("--- | ", len(cells)) + "\n")
|
||
}
|
||
}
|
||
sb.WriteString("\n")
|
||
|
||
default:
|
||
// 忽略圖片、頁首等
|
||
}
|
||
}
|
||
|
||
return sb.String(), nil
|
||
}
|
||
|
||
func main1() {
|
||
// 測試用
|
||
prompt, err := docxToStructuredPrompt("D:\\myDocument\\tencent\\weChat\\WeChat Files\\wxid_pv6rg3z2l28y22\\FileStorage\\File\\2026-01\\(改)小班体育活动《蚂蚁运粮》(泉秀实幼吴思莹).docx")
|
||
if err != nil {
|
||
fmt.Println("錯誤:", err)
|
||
return
|
||
}
|
||
fmt.Println(prompt)
|
||
}
|