handle html type atom text

This commit is contained in:
Nazar Kanaev
2021-04-02 21:46:23 +01:00
parent 8967936fb6
commit e50c7e1a51
5 changed files with 45 additions and 26 deletions

View File

@@ -6,6 +6,8 @@ import (
"html"
"io"
"strings"
"github.com/nkanaev/yarr/src/content/htmlutil"
)
type atomFeed struct {
@@ -42,6 +44,13 @@ type atomLink struct {
type atomLinks []atomLink
func (a *atomText) Text() string {
if a.Type == "html" {
return htmlutil.ExtractText(a.Data)
}
return a.Data
}
func (a *atomText) String() string {
data := a.Data
if a.Type == "xhtml" {
@@ -76,7 +85,7 @@ func ParseAtom(r io.Reader) (*Feed, error) {
GUID: firstNonEmpty(srcitem.ID),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")),
Title: srcitem.Title.String(),
Title: srcitem.Title.Text(),
Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()),
ImageURL: srcitem.firstMediaThumbnail(),
AudioURL: "",