mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
basic atom 1.0 parser
This commit is contained in:
parent
d185fb6dd7
commit
279fc469ab
87
src/feed/atom.go
Normal file
87
src/feed/atom.go
Normal file
@ -0,0 +1,87 @@
|
||||
package feed
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"html"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type atomFeed struct {
|
||||
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
|
||||
ID string `xml:"id"`
|
||||
Title atomText `xml:"title"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Entries []atomEntry `xml:"entry"`
|
||||
}
|
||||
|
||||
type atomEntry struct {
|
||||
ID string `xml:"id"`
|
||||
Title atomText `xml:"title"`
|
||||
Summary atomText `xml:"summary"`
|
||||
Published string `xml:"published"`
|
||||
Updated string `xml:"updated"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Content atomText `xml:"content"`
|
||||
}
|
||||
|
||||
type atomText struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Data string `xml:",chardata"`
|
||||
XML string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type atomLink struct {
|
||||
Href string `xml:"href,attr"`
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type atomLinks []atomLink
|
||||
|
||||
func (a *atomText) String() string {
|
||||
data := a.Data
|
||||
if a.Type == "xhtml" {
|
||||
data = a.XML
|
||||
}
|
||||
return html.UnescapeString(strings.TrimSpace(data))
|
||||
}
|
||||
|
||||
func (links atomLinks) First(rel string) string {
|
||||
for _, l := range links {
|
||||
if l.Rel == rel {
|
||||
return l.Href
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func ParseAtom(r io.Reader) (*Feed, error) {
|
||||
f := atomFeed{}
|
||||
|
||||
decoder := xml.NewDecoder(r)
|
||||
if err := decoder.Decode(&f); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
feed := &Feed{
|
||||
Title: f.Title.String(),
|
||||
SiteURL: first(f.Links.First("alternate"), f.Links.First("")),
|
||||
FeedURL: f.Links.First("self"),
|
||||
}
|
||||
for _, e := range f.Entries {
|
||||
date, _ := dateParse(first(e.Published, e.Updated))
|
||||
imageUrl := ""
|
||||
podcastUrl := ""
|
||||
|
||||
feed.Items = append(feed.Items, Item{
|
||||
GUID: first(e.ID),
|
||||
Date: date,
|
||||
URL: first(e.Links.First("alternate"), f.Links.First("")),
|
||||
Title: e.Title.String(),
|
||||
Content: e.Content.String(),
|
||||
ImageURL: imageUrl,
|
||||
PodcastURL: podcastUrl,
|
||||
})
|
||||
}
|
||||
return feed, nil
|
||||
}
|
59
src/feed/atom_test.go
Normal file
59
src/feed/atom_test.go
Normal file
@ -0,0 +1,59 @@
|
||||
package feed
|
||||
|
||||
import (
|
||||
"reflect"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestAtom(t *testing.T) {
|
||||
have, _ := ParseAtom(strings.NewReader(`
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||
<title>Example Feed</title>
|
||||
<subtitle>A subtitle.</subtitle>
|
||||
<link href="http://example.org/feed/" rel="self" />
|
||||
<link href="http://example.org/" />
|
||||
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<entry>
|
||||
<title>Atom-Powered Robots Run Amok</title>
|
||||
<link href="http://example.org/2003/12/13/atom03" />
|
||||
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
|
||||
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
|
||||
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||
<updated>2003-12-13T18:30:02Z</updated>
|
||||
<summary>Some text.</summary>
|
||||
<content type="xhtml">
|
||||
<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
|
||||
</content>
|
||||
<author>
|
||||
<name>John Doe</name>
|
||||
<email>johndoe@example.com</email>
|
||||
</author>
|
||||
</entry>
|
||||
</feed>
|
||||
`))
|
||||
want := &Feed{
|
||||
Title: "Example Feed",
|
||||
SiteURL: "http://example.org/",
|
||||
FeedURL: "http://example.org/feed/",
|
||||
Items: []Item{
|
||||
{
|
||||
GUID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
|
||||
Date: time.Unix(1071340202, 0).UTC(),
|
||||
URL: "http://example.org/2003/12/13/atom03.html",
|
||||
Title: "Atom-Powered Robots Run Amok",
|
||||
Content: `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
|
||||
ImageURL: "",
|
||||
PodcastURL: "",
|
||||
},
|
||||
},
|
||||
}
|
||||
if !reflect.DeepEqual(want, have) {
|
||||
t.Logf("want: %#v", want)
|
||||
t.Logf("have: %#v", have)
|
||||
t.Fatal("invalid atom")
|
||||
}
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user