handle html type atom text

This commit is contained in:
Nazar Kanaev
2021-04-02 21:46:23 +01:00
parent 8967936fb6
commit e50c7e1a51
5 changed files with 45 additions and 26 deletions

View File

@@ -6,6 +6,8 @@ import (
"html"
"io"
"strings"
"github.com/nkanaev/yarr/src/content/htmlutil"
)
type atomFeed struct {
@@ -42,6 +44,13 @@ type atomLink struct {
type atomLinks []atomLink
func (a *atomText) Text() string {
if a.Type == "html" {
return htmlutil.ExtractText(a.Data)
}
return a.Data
}
func (a *atomText) String() string {
data := a.Data
if a.Type == "xhtml" {
@@ -76,7 +85,7 @@ func ParseAtom(r io.Reader) (*Feed, error) {
GUID: firstNonEmpty(srcitem.ID),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")),
Title: srcitem.Title.String(),
Title: srcitem.Title.Text(),
Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()),
ImageURL: srcitem.firstMediaThumbnail(),
AudioURL: "",

View File

@@ -77,3 +77,19 @@ func TestAtomClashingNamespaces(t *testing.T) {
t.FailNow()
}
}
func TestAtomHTMLTitle(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry><title type="html">say &lt;code&gt;what&lt;/code&gt;?</entry>
</feed>
`))
have := feed.Items[0].Title
want := "say what?"
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.FailNow()
}
}