Files
hr_data_analyzer/test/util.go
2026-01-21 16:39:19 +08:00

87 lines
2.0 KiB
Go
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
package main
import (
"fmt"
"os"
"strings"
docx "github.com/fumiama/go-docx"
)
func DocxToStructuredPrompt(filename string) (string, error) {
f, err := os.Open(filename)
if err != nil {
return "", err
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return "", err
}
doc, err := docx.Parse(f, fi.Size())
if err != nil {
return "", err
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("# 文件:%s\n\n", filename))
for _, item := range doc.Document.Body.Items {
switch v := item.(type) {
case *docx.Paragraph:
// 直接用 fmt.Sprint 利用庫的 Stringer
text := fmt.Sprint(v)
text = strings.TrimSpace(text)
if text != "" {
sb.WriteString(text + "\n\n")
}
case *docx.Table:
sb.WriteString("## 表格\n")
// 先印表頭(可選)
sb.WriteString("| ")
// 假設第一行是表頭(很多文件如此),或全部當內容
for i, row := range v.TableRows {
var cells []string
for _, cell := range row.TableCells {
// 這裡是重點cell 本身沒有 String(),但可以遍歷它的 Paragraphs
var cellText strings.Builder
for _, p := range cell.Paragraphs {
cellText.WriteString(fmt.Sprint(p))
cellText.WriteString(" ")
}
cells = append(cells, strings.TrimSpace(cellText.String()))
}
sb.WriteString(strings.Join(cells, " | "))
sb.WriteString(" |\n")
// 如果想加 markdown 表頭分隔線(只在第一行後加)
if i == 0 {
sb.WriteString("| " + strings.Repeat("--- | ", len(cells)) + "\n")
}
}
sb.WriteString("\n")
default:
// 忽略圖片、頁首等
}
}
return sb.String(), nil
}
func main1() {
// 測試用
prompt, err := docxToStructuredPrompt("D:\\myDocument\\tencent\\weChat\\WeChat Files\\wxid_pv6rg3z2l28y22\\FileStorage\\File\\2026-01\\(改)小班体育活动《蚂蚁运粮》(泉秀实幼吴思莹).docx")
if err != nil {
fmt.Println("錯誤:", err)
return
}
fmt.Println(prompt)
}