basic atom 1.0 parser

2026-06-15 20:45:08 +00:00 · 2021-03-19 23:04:34 +00:00
parent d185fb6dd7
commit 279fc469ab
2 changed files with 146 additions and 0 deletions
--- a/src/feed/atom.go
+++ b/src/feed/atom.go
@@ -0,0 +1,87 @@
+package feed
+
+import (
+	"encoding/xml"
+	"html"
+	"io"
+	"strings"
+)
+
+type atomFeed struct {
+	XMLName xml.Name    `xml:"http://www.w3.org/2005/Atom feed"`
+	ID      string      `xml:"id"`
+	Title   atomText    `xml:"title"`
+	Links   atomLinks   `xml:"link"`
+	Entries []atomEntry `xml:"entry"`
+}
+
+type atomEntry struct {
+	ID        string    `xml:"id"`
+	Title     atomText  `xml:"title"`
+	Summary   atomText  `xml:"summary"`
+	Published string    `xml:"published"`
+	Updated   string    `xml:"updated"`
+	Links     atomLinks `xml:"link"`
+	Content   atomText  `xml:"content"`
+}
+
+type atomText struct {
+	Type string `xml:"type,attr"`
+	Data string `xml:",chardata"`
+	XML  string `xml:",innerxml"`
+}
+
+type atomLink struct {
+	Href string `xml:"href,attr"`
+	Rel  string `xml:"rel,attr"`
+}
+
+type atomLinks []atomLink
+
+func (a *atomText) String() string {
+	data := a.Data
+	if a.Type == "xhtml" {
+		data = a.XML
+	}
+	return html.UnescapeString(strings.TrimSpace(data))
+}
+
+func (links atomLinks) First(rel string) string {
+	for _, l := range links {
+		if l.Rel == rel {
+			return l.Href
+		}
+	}
+	return ""
+}
+
+func ParseAtom(r io.Reader) (*Feed, error) {
+	f := atomFeed{}
+
+	decoder := xml.NewDecoder(r)
+	if err := decoder.Decode(&f); err != nil {
+		return nil, err
+	}
+
+	feed := &Feed{
+		Title:   f.Title.String(),
+		SiteURL: first(f.Links.First("alternate"), f.Links.First("")),
+		FeedURL: f.Links.First("self"),
+	}
+	for _, e := range f.Entries {
+		date, _ := dateParse(first(e.Published, e.Updated))
+		imageUrl := ""
+		podcastUrl := ""
+
+		feed.Items = append(feed.Items, Item{
+			GUID:       first(e.ID),
+			Date:       date,
+			URL:        first(e.Links.First("alternate"), f.Links.First("")),
+			Title:      e.Title.String(),
+			Content:    e.Content.String(),
+			ImageURL:   imageUrl,
+			PodcastURL: podcastUrl,
+		})
+	}
+	return feed, nil
+}
--- a/src/feed/atom_test.go
+++ b/src/feed/atom_test.go
@@ -0,0 +1,59 @@
+package feed
+
+import (
+	"reflect"
+	"strings"
+	"testing"
+	"time"
+)
+
+func TestAtom(t *testing.T) {
+	have, _ := ParseAtom(strings.NewReader(`
+		<?xml version="1.0" encoding="utf-8"?>
+		<feed xmlns="http://www.w3.org/2005/Atom">
+			<title>Example Feed</title>
+			<subtitle>A subtitle.</subtitle>
+			<link href="http://example.org/feed/" rel="self" />
+			<link href="http://example.org/" />
+			<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
+			<updated>2003-12-13T18:30:02Z</updated>
+			<entry>
+				<title>Atom-Powered Robots Run Amok</title>
+				<link href="http://example.org/2003/12/13/atom03" />
+				<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
+				<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
+				<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
+				<updated>2003-12-13T18:30:02Z</updated>
+				<summary>Some text.</summary>
+				<content type="xhtml">
+					<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
+				</content>
+				<author>
+					<name>John Doe</name>
+					<email>johndoe@example.com</email>
+				</author>
+			</entry>
+		</feed>
+	`))
+	want := &Feed{
+		Title:   "Example Feed",
+		SiteURL: "http://example.org/",
+		FeedURL: "http://example.org/feed/",
+		Items: []Item{
+			{
+				GUID:       "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
+				Date:       time.Unix(1071340202, 0).UTC(),
+				URL:        "http://example.org/2003/12/13/atom03.html",
+				Title:      "Atom-Powered Robots Run Amok",
+				Content:    `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
+				ImageURL:   "",
+				PodcastURL: "",
+			},
+		},
+	}
+	if !reflect.DeepEqual(want, have) {
+		t.Logf("want: %#v", want)
+		t.Logf("have: %#v", have)
+		t.Fatal("invalid atom")
+	}
+}