basic atom 1.0 parser

This commit is contained in:
Nazar Kanaev 2021-03-19 23:04:34 +00:00
parent d185fb6dd7
commit 279fc469ab
2 changed files with 146 additions and 0 deletions

87
src/feed/atom.go Normal file
View File

@ -0,0 +1,87 @@
package feed
import (
"encoding/xml"
"html"
"io"
"strings"
)
type atomFeed struct {
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
ID string `xml:"id"`
Title atomText `xml:"title"`
Links atomLinks `xml:"link"`
Entries []atomEntry `xml:"entry"`
}
type atomEntry struct {
ID string `xml:"id"`
Title atomText `xml:"title"`
Summary atomText `xml:"summary"`
Published string `xml:"published"`
Updated string `xml:"updated"`
Links atomLinks `xml:"link"`
Content atomText `xml:"content"`
}
type atomText struct {
Type string `xml:"type,attr"`
Data string `xml:",chardata"`
XML string `xml:",innerxml"`
}
type atomLink struct {
Href string `xml:"href,attr"`
Rel string `xml:"rel,attr"`
}
type atomLinks []atomLink
func (a *atomText) String() string {
data := a.Data
if a.Type == "xhtml" {
data = a.XML
}
return html.UnescapeString(strings.TrimSpace(data))
}
func (links atomLinks) First(rel string) string {
for _, l := range links {
if l.Rel == rel {
return l.Href
}
}
return ""
}
func ParseAtom(r io.Reader) (*Feed, error) {
f := atomFeed{}
decoder := xml.NewDecoder(r)
if err := decoder.Decode(&f); err != nil {
return nil, err
}
feed := &Feed{
Title: f.Title.String(),
SiteURL: first(f.Links.First("alternate"), f.Links.First("")),
FeedURL: f.Links.First("self"),
}
for _, e := range f.Entries {
date, _ := dateParse(first(e.Published, e.Updated))
imageUrl := ""
podcastUrl := ""
feed.Items = append(feed.Items, Item{
GUID: first(e.ID),
Date: date,
URL: first(e.Links.First("alternate"), f.Links.First("")),
Title: e.Title.String(),
Content: e.Content.String(),
ImageURL: imageUrl,
PodcastURL: podcastUrl,
})
}
return feed, nil
}

59
src/feed/atom_test.go Normal file
View File

@ -0,0 +1,59 @@
package feed
import (
"reflect"
"strings"
"testing"
"time"
)
func TestAtom(t *testing.T) {
have, _ := ParseAtom(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<title>Example Feed</title>
<subtitle>A subtitle.</subtitle>
<link href="http://example.org/feed/" rel="self" />
<link href="http://example.org/" />
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
<updated>2003-12-13T18:30:02Z</updated>
<entry>
<title>Atom-Powered Robots Run Amok</title>
<link href="http://example.org/2003/12/13/atom03" />
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
<updated>2003-12-13T18:30:02Z</updated>
<summary>Some text.</summary>
<content type="xhtml">
<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
</content>
<author>
<name>John Doe</name>
<email>johndoe@example.com</email>
</author>
</entry>
</feed>
`))
want := &Feed{
Title: "Example Feed",
SiteURL: "http://example.org/",
FeedURL: "http://example.org/feed/",
Items: []Item{
{
GUID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
Date: time.Unix(1071340202, 0).UTC(),
URL: "http://example.org/2003/12/13/atom03.html",
Title: "Atom-Powered Robots Run Amok",
Content: `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
ImageURL: "",
PodcastURL: "",
},
},
}
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.Fatal("invalid atom")
}
}