feat: count.

This commit is contained in:
2026-01-21 16:39:19 +08:00
parent 1a252a12be
commit 876916010f
6 changed files with 518 additions and 0 deletions

86
test/util.go Normal file
View File

@ -0,0 +1,86 @@
package main
import (
"fmt"
"os"
"strings"
docx "github.com/fumiama/go-docx"
)
func DocxToStructuredPrompt(filename string) (string, error) {
f, err := os.Open(filename)
if err != nil {
return "", err
}
defer f.Close()
fi, err := f.Stat()
if err != nil {
return "", err
}
doc, err := docx.Parse(f, fi.Size())
if err != nil {
return "", err
}
var sb strings.Builder
sb.WriteString(fmt.Sprintf("# 文件:%s\n\n", filename))
for _, item := range doc.Document.Body.Items {
switch v := item.(type) {
case *docx.Paragraph:
// 直接用 fmt.Sprint 利用庫的 Stringer
text := fmt.Sprint(v)
text = strings.TrimSpace(text)
if text != "" {
sb.WriteString(text + "\n\n")
}
case *docx.Table:
sb.WriteString("## 表格\n")
// 先印表頭(可選)
sb.WriteString("| ")
// 假設第一行是表頭(很多文件如此),或全部當內容
for i, row := range v.TableRows {
var cells []string
for _, cell := range row.TableCells {
// 這裡是重點cell 本身沒有 String(),但可以遍歷它的 Paragraphs
var cellText strings.Builder
for _, p := range cell.Paragraphs {
cellText.WriteString(fmt.Sprint(p))
cellText.WriteString(" ")
}
cells = append(cells, strings.TrimSpace(cellText.String()))
}
sb.WriteString(strings.Join(cells, " | "))
sb.WriteString(" |\n")
// 如果想加 markdown 表頭分隔線(只在第一行後加)
if i == 0 {
sb.WriteString("| " + strings.Repeat("--- | ", len(cells)) + "\n")
}
}
sb.WriteString("\n")
default:
// 忽略圖片、頁首等
}
}
return sb.String(), nil
}
func main1() {
// 測試用
prompt, err := docxToStructuredPrompt("D:\\myDocument\\tencent\\weChat\\WeChat Files\\wxid_pv6rg3z2l28y22\\FileStorage\\File\\2026-01\\(改)小班体育活动《蚂蚁运粮》(泉秀实幼吴思莹).docx")
if err != nil {
fmt.Println("錯誤:", err)
return
}
fmt.Println(prompt)
}