Compare commits

..

2 Commits

Author SHA1 Message Date
Thanh Nguyen
6c3f6605de
Merge e9676491ee8972ad9d13f88e348a456dbf9e199a into 3adcddc70c292a1d8d5bce27163183a140310e3f 2023-09-23 22:10:21 +02:00
Will Harding
3adcddc70c Pull atom xhtml title from nested elements
The Atom spec says that any title marked with a type of "xhtml" should be
contained in a div element[1] so we need to use the full XML text when
extracting the text.

[1] https://www.rfc-editor.org/rfc/rfc4287#section-3.1
2023-09-23 21:08:22 +01:00
2 changed files with 40 additions and 0 deletions

View File

@ -47,6 +47,8 @@ type atomLinks []atomLink
func (a *atomText) Text() string {
if a.Type == "html" {
return htmlutil.ExtractText(a.Data)
} else if a.Type == "xhtml" {
return htmlutil.ExtractText(a.XML)
}
return a.Data
}

View File

@ -94,6 +94,44 @@ func TestAtomHTMLTitle(t *testing.T) {
}
}
func TestAtomXHTMLTitle(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry><title type="xhtml">say &lt;code&gt;what&lt;/code&gt;?</entry>
</feed>
`))
have := feed.Items[0].Title
want := "say what?"
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.FailNow()
}
}
func TestAtomXHTMLNestedTitle(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry>
<title type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml">
<a href="https://example.com">Link to Example</a>
</div>
</title>
</entry>
</feed>
`))
have := feed.Items[0].Title
want := "Link to Example"
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.FailNow()
}
}
func TestAtomImageLink(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="UTF-8"?>