package email import ( "strings" "github.com/PuerkitoBio/goquery" ) // htmlToPlainText derives a plain-text representation from rendered HTML. // The text version is for email clients that cannot render HTML. The HTML // version is always preferred when the client supports it. func htmlToPlainText(htmlStr string) string { doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlStr)) if err != nil { return htmlStr } // Remove elements that should not appear in plain text. doc.Find("style, script, head, .preheader").Remove() // Replace links with "text (URL)" format. doc.Find("a[href]").Each(func(_ int, s *goquery.Selection) { href, _ := s.Attr("href") text := strings.TrimSpace(s.Text()) if href != "" && href != text && !strings.HasPrefix(href, "mailto:") { s.ReplaceWithHtml(text + "\n" + href) } }) // Walk the DOM and collect text with structural newlines. var lines []string var walk func(*goquery.Selection) walk = func(sel *goquery.Selection) { sel.Contents().Each(func(_ int, n *goquery.Selection) { tag := goquery.NodeName(n) switch tag { case "#text": t := strings.TrimSpace(n.Text()) if t != "" { lines = append(lines, t) } case "br": lines = append(lines, "") case "p", "div", "section", "article", "main", "h1", "h2", "h3", "h4", "h5", "h6", "li": walk(n) lines = append(lines, "") case "tr": walk(n) lines = append(lines, "") case "hr": lines = append(lines, "---") default: walk(n) } }) } body := doc.Find("body") if body.Length() == 0 { body = doc.Selection } walk(body) // Collapse consecutive blank lines to a single blank line. result := make([]string, 0, len(lines)) prevBlank := false for _, line := range lines { if line == "" { if !prevBlank { result = append(result, "") } prevBlank = true } else { result = append(result, line) prevBlank = false } } return strings.TrimSpace(strings.Join(result, "\n")) }