mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-28 14:51:41 +00:00
The Atom spec says that any title marked with a type of "xhtml" should be contained in a div element[1] so we need to use the full XML text when extracting the text. [1] https://www.rfc-editor.org/rfc/rfc4287#section-3.1
217 lines
6.3 KiB
Go
217 lines
6.3 KiB
Go
package parser
|
|
|
|
import (
|
|
"reflect"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
func TestAtom(t *testing.T) {
|
|
have, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<title>Example Feed</title>
|
|
<subtitle>A subtitle.</subtitle>
|
|
<link href="http://example.org/feed/" rel="self" />
|
|
<link href="http://example.org/" />
|
|
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
<entry>
|
|
<title>Atom-Powered Robots Run Amok</title>
|
|
<link href="http://example.org/2003/12/13/atom03" />
|
|
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
|
|
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
|
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
|
<updated>2003-12-13T18:30:02Z</updated>
|
|
<summary>Some text.</summary>
|
|
<content type="xhtml">
|
|
<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
|
|
</content>
|
|
<author>
|
|
<name>John Doe</name>
|
|
<email>johndoe@example.com</email>
|
|
</author>
|
|
</entry>
|
|
</feed>
|
|
`))
|
|
want := &Feed{
|
|
Title: "Example Feed",
|
|
SiteURL: "http://example.org/",
|
|
Items: []Item{
|
|
{
|
|
GUID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
|
|
Date: time.Unix(1071340202, 0).UTC(),
|
|
URL: "http://example.org/2003/12/13/atom03.html",
|
|
Title: "Atom-Powered Robots Run Amok",
|
|
Content: `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
|
|
ImageURL: "",
|
|
AudioURL: "",
|
|
},
|
|
},
|
|
}
|
|
if !reflect.DeepEqual(want, have) {
|
|
t.Logf("want: %#v", want)
|
|
t.Logf("have: %#v", have)
|
|
t.Fatal("invalid atom")
|
|
}
|
|
}
|
|
|
|
func TestAtomClashingNamespaces(t *testing.T) {
|
|
have, err := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<content>atom content</content>
|
|
<media:content xmlns:media="http://search.yahoo.com/mrss/" />
|
|
</entry>
|
|
</feed>
|
|
`))
|
|
want := &Feed{Items: []Item{{Content: "atom content"}}}
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if !reflect.DeepEqual(want, have) {
|
|
t.Logf("want: %#v", want)
|
|
t.Logf("have: %#v", have)
|
|
t.FailNow()
|
|
}
|
|
}
|
|
|
|
func TestAtomHTMLTitle(t *testing.T) {
|
|
feed, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry><title type="html">say <code>what</code>?</entry>
|
|
</feed>
|
|
`))
|
|
have := feed.Items[0].Title
|
|
want := "say what?"
|
|
if !reflect.DeepEqual(want, have) {
|
|
t.Logf("want: %#v", want)
|
|
t.Logf("have: %#v", have)
|
|
t.FailNow()
|
|
}
|
|
}
|
|
|
|
func TestAtomXHTMLTitle(t *testing.T) {
|
|
feed, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry><title type="xhtml">say <code>what</code>?</entry>
|
|
</feed>
|
|
`))
|
|
have := feed.Items[0].Title
|
|
want := "say what?"
|
|
if !reflect.DeepEqual(want, have) {
|
|
t.Logf("want: %#v", want)
|
|
t.Logf("have: %#v", have)
|
|
t.FailNow()
|
|
}
|
|
}
|
|
|
|
func TestAtomXHTMLNestedTitle(t *testing.T) {
|
|
feed, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
|
<entry>
|
|
<title type="xhtml">
|
|
<div xmlns="http://www.w3.org/1999/xhtml">
|
|
<a href="https://example.com">Link to Example</a>
|
|
</div>
|
|
</title>
|
|
</entry>
|
|
</feed>
|
|
`))
|
|
have := feed.Items[0].Title
|
|
want := "Link to Example"
|
|
if !reflect.DeepEqual(want, have) {
|
|
t.Logf("want: %#v", want)
|
|
t.Logf("have: %#v", have)
|
|
t.FailNow()
|
|
}
|
|
}
|
|
|
|
func TestAtomImageLink(t *testing.T) {
|
|
feed, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="UTF-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
|
|
<entry>
|
|
<media:thumbnail url="https://example.com/image.png?width=100&height=100" />
|
|
</entry>
|
|
</feed>
|
|
`))
|
|
have := feed.Items[0].ImageURL
|
|
want := `https://example.com/image.png?width=100&height=100`
|
|
if want != have {
|
|
t.Fatalf("item.image_url doesn't match\nwant: %#v\nhave: %#v\n", want, have)
|
|
}
|
|
}
|
|
|
|
// found in: https://www.reddit.com/r/funny.rss
|
|
// items come with thumbnail urls which are also present in the content
|
|
func TestAtomImageLinkDuplicated(t *testing.T) {
|
|
feed, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
|
|
<entry>
|
|
<content type="html"><img src="https://example.com/image.png?width=100&height=100"></content>
|
|
<media:thumbnail url="https://example.com/image.png?width=100&height=100" />
|
|
</entry>
|
|
</feed>
|
|
`))
|
|
have := feed.Items[0].Content
|
|
want := `<img src="https://example.com/image.png?width=100&height=100">`
|
|
if want != have {
|
|
t.Fatalf("want: %#v\nhave: %#v\n", want, have)
|
|
}
|
|
if feed.Items[0].ImageURL != "" {
|
|
t.Fatal("item.image_url must be unset if present in the content")
|
|
}
|
|
}
|
|
|
|
func TestAtomLinkInID(t *testing.T) {
|
|
feed, _ := Parse(strings.NewReader(`
|
|
<?xml version="1.0" encoding="utf-8"?>
|
|
<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
|
|
<entry>
|
|
<title>one updated</title>
|
|
<id>https://example.com/posts/1</id>
|
|
<updated>2003-12-13T09:17:51</updated>
|
|
</entry>
|
|
<entry>
|
|
<title>two</title>
|
|
<id>urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6</id>
|
|
</entry>
|
|
<entry>
|
|
<title>one</title>
|
|
<id>https://example.com/posts/1</id>
|
|
</entry>
|
|
</feed>
|
|
`))
|
|
have := feed.Items
|
|
want := []Item{
|
|
Item{
|
|
GUID: "https://example.com/posts/1::2003-12-13T09:17:51",
|
|
Date: time.Date(2003, time.December, 13, 9, 17, 51, 0, time.UTC),
|
|
URL: "https://example.com/posts/1",
|
|
Title: "one updated",
|
|
},
|
|
Item{
|
|
GUID: "urn:uuid:60a76c80-d399-11d9-b93C-0003939e0af6",
|
|
Date: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC), URL: "",
|
|
Title: "two",
|
|
},
|
|
Item{
|
|
GUID: "https://example.com/posts/1::",
|
|
Date: time.Date(1, time.January, 1, 0, 0, 0, 0, time.UTC),
|
|
URL: "https://example.com/posts/1",
|
|
Title: "one",
|
|
Content: "",
|
|
},
|
|
}
|
|
if !reflect.DeepEqual(want, have) {
|
|
t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have)
|
|
}
|
|
}
|