yarr/src/parser/atom_test.go

package parser

import (
	"reflect"
	"strings"
	"testing"
	"time"
)

func TestAtom(t *testing.T) {
	have, _ := Parse(strings.NewReader(`
		<?xml version="1.0" encoding="utf-8"?>
		<feed xmlns="http://www.w3.org/2005/Atom">
			<title>Example Feed</title>
			<subtitle>A subtitle.</subtitle>
			<link href="http://example.org/feed/" rel="self" />
			<link href="http://example.org/" />
			<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
			<updated>2003-12-13T18:30:02Z</updated>
			<entry>
				<title>Atom-Powered Robots Run Amok</title>
				<link href="http://example.org/2003/12/13/atom03" />
				<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
				<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
				<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
				<updated>2003-12-13T18:30:02Z</updated>
				<summary>Some text.</summary>
				<content type="xhtml">
					<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
				</content>
				<author>
					<name>John Doe</name>
					<email>johndoe@example.com</email>
				</author>
			</entry>
		</feed>
	`))
	want := &Feed{
		Title:   "Example Feed",
		SiteURL: "http://example.org/",
		Items: []Item{
			{
				GUID:     "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
				Date:     time.Unix(1071340202, 0).UTC(),
				URL:      "http://example.org/2003/12/13/atom03.html",
				Title:    "Atom-Powered Robots Run Amok",
				Content:  `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
				ImageURL: "",
				AudioURL: "",
			},
		},
	}
	if !reflect.DeepEqual(want, have) {
		t.Logf("want: %#v", want)
		t.Logf("have: %#v", have)
		t.Fatal("invalid atom")
	}
}

func TestAtomClashingNamespaces(t *testing.T) {
	have, err := Parse(strings.NewReader(`
		<?xml version="1.0" encoding="utf-8"?>
		<feed xmlns="http://www.w3.org/2005/Atom">
			<entry>
				<content>atom content</content>
				<media:content xmlns:media="http://search.yahoo.com/mrss/" />
			</entry>
		</feed>
	`))
	want := &Feed{Items: []Item{{Content: "atom content"}}}
	if err != nil {
		t.Fatal(err)
	}
	if !reflect.DeepEqual(want, have) {
		t.Logf("want: %#v", want)
		t.Logf("have: %#v", have)
		t.FailNow()
	}
}

func TestAtomHTMLTitle(t *testing.T) {
	feed, _ := Parse(strings.NewReader(`
		<?xml version="1.0" encoding="utf-8"?>
		<feed xmlns="http://www.w3.org/2005/Atom">
			<entry><title type="html">say &lt;code&gt;what&lt;/code&gt;?</entry>
		</feed>
	`))
	have := feed.Items[0].Title
	want := "say what?"
	if !reflect.DeepEqual(want, have) {
		t.Logf("want: %#v", want)
		t.Logf("have: %#v", have)
		t.FailNow()
	}
}

func TestAtomImageLink(t *testing.T) {
	feed, _ := Parse(strings.NewReader(`
		<?xml version="1.0" encoding="UTF-8"?>
		<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
			<entry>
				<media:thumbnail url="https://example.com/image.png?width=100&height=100" />
			</entry>
		</feed>
	`))
	have := feed.Items[0].ImageURL
	want := `https://example.com/image.png?width=100&height=100`
	if want != have {
		t.Fatalf("item.image_url doesn't match\nwant: %#v\nhave: %#v\n", want, have)
	}
}

// found in: https://www.reddit.com/r/funny.rss
// items come with thumbnail urls which are also present in the content
func TestAtomImageLinkDuplicated(t *testing.T) {
	feed, _ := Parse(strings.NewReader(`
		<?xml version="1.0" encoding="utf-8"?>
		<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/">
			<entry>
				<content type="html">&lt;img src="https://example.com/image.png?width=100&amp;height=100"&gt;</content>
				<media:thumbnail url="https://example.com/image.png?width=100&height=100" />
			</entry>
		</feed>
	`))
	have := feed.Items[0].Content
	want := `<img src="https://example.com/image.png?width=100&height=100">`
	if want != have {
		t.Fatalf("want: %#v\nhave: %#v\n", want, have)
	}
	if feed.Items[0].ImageURL != "" {
		t.Fatal("item.image_url must be unset if present in the content")
	}
}