handle html type atom text

This commit is contained in:
Nazar Kanaev
2021-04-02 21:46:23 +01:00
parent 8967936fb6
commit e50c7e1a51
5 changed files with 45 additions and 26 deletions

View File

@@ -1,6 +1,7 @@
package htmlutil
import (
"bytes"
"strings"
"golang.org/x/net/html"
@@ -39,3 +40,18 @@ func Text(node *html.Node) string {
}
return strings.Join(text, " ")
}
func ExtractText(content string) string {
tokenizer := html.NewTokenizer(strings.NewReader(content))
buffer := bytes.Buffer{}
for {
token := tokenizer.Next()
if token == html.ErrorToken {
break
}
if token == html.TextToken {
buffer.WriteString(html.UnescapeString(string(tokenizer.Text())))
}
}
return buffer.String()
}