mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 21:19:19 +00:00
basic atom 1.0 parser
This commit is contained in:
parent
d185fb6dd7
commit
279fc469ab
87
src/feed/atom.go
Normal file
87
src/feed/atom.go
Normal file
@ -0,0 +1,87 @@
|
|||||||
|
package feed
|
||||||
|
|
||||||
|
import (
|
||||||
|
"encoding/xml"
|
||||||
|
"html"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
)
|
||||||
|
|
||||||
|
type atomFeed struct {
|
||||||
|
XMLName xml.Name `xml:"http://www.w3.org/2005/Atom feed"`
|
||||||
|
ID string `xml:"id"`
|
||||||
|
Title atomText `xml:"title"`
|
||||||
|
Links atomLinks `xml:"link"`
|
||||||
|
Entries []atomEntry `xml:"entry"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type atomEntry struct {
|
||||||
|
ID string `xml:"id"`
|
||||||
|
Title atomText `xml:"title"`
|
||||||
|
Summary atomText `xml:"summary"`
|
||||||
|
Published string `xml:"published"`
|
||||||
|
Updated string `xml:"updated"`
|
||||||
|
Links atomLinks `xml:"link"`
|
||||||
|
Content atomText `xml:"content"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type atomText struct {
|
||||||
|
Type string `xml:"type,attr"`
|
||||||
|
Data string `xml:",chardata"`
|
||||||
|
XML string `xml:",innerxml"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type atomLink struct {
|
||||||
|
Href string `xml:"href,attr"`
|
||||||
|
Rel string `xml:"rel,attr"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type atomLinks []atomLink
|
||||||
|
|
||||||
|
func (a *atomText) String() string {
|
||||||
|
data := a.Data
|
||||||
|
if a.Type == "xhtml" {
|
||||||
|
data = a.XML
|
||||||
|
}
|
||||||
|
return html.UnescapeString(strings.TrimSpace(data))
|
||||||
|
}
|
||||||
|
|
||||||
|
func (links atomLinks) First(rel string) string {
|
||||||
|
for _, l := range links {
|
||||||
|
if l.Rel == rel {
|
||||||
|
return l.Href
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func ParseAtom(r io.Reader) (*Feed, error) {
|
||||||
|
f := atomFeed{}
|
||||||
|
|
||||||
|
decoder := xml.NewDecoder(r)
|
||||||
|
if err := decoder.Decode(&f); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
feed := &Feed{
|
||||||
|
Title: f.Title.String(),
|
||||||
|
SiteURL: first(f.Links.First("alternate"), f.Links.First("")),
|
||||||
|
FeedURL: f.Links.First("self"),
|
||||||
|
}
|
||||||
|
for _, e := range f.Entries {
|
||||||
|
date, _ := dateParse(first(e.Published, e.Updated))
|
||||||
|
imageUrl := ""
|
||||||
|
podcastUrl := ""
|
||||||
|
|
||||||
|
feed.Items = append(feed.Items, Item{
|
||||||
|
GUID: first(e.ID),
|
||||||
|
Date: date,
|
||||||
|
URL: first(e.Links.First("alternate"), f.Links.First("")),
|
||||||
|
Title: e.Title.String(),
|
||||||
|
Content: e.Content.String(),
|
||||||
|
ImageURL: imageUrl,
|
||||||
|
PodcastURL: podcastUrl,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return feed, nil
|
||||||
|
}
|
59
src/feed/atom_test.go
Normal file
59
src/feed/atom_test.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
package feed
|
||||||
|
|
||||||
|
import (
|
||||||
|
"reflect"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestAtom(t *testing.T) {
|
||||||
|
have, _ := ParseAtom(strings.NewReader(`
|
||||||
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
|
<feed xmlns="http://www.w3.org/2005/Atom">
|
||||||
|
<title>Example Feed</title>
|
||||||
|
<subtitle>A subtitle.</subtitle>
|
||||||
|
<link href="http://example.org/feed/" rel="self" />
|
||||||
|
<link href="http://example.org/" />
|
||||||
|
<id>urn:uuid:60a76c80-d399-11d9-b91C-0003939e0af6</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<entry>
|
||||||
|
<title>Atom-Powered Robots Run Amok</title>
|
||||||
|
<link href="http://example.org/2003/12/13/atom03" />
|
||||||
|
<link rel="alternate" type="text/html" href="http://example.org/2003/12/13/atom03.html"/>
|
||||||
|
<link rel="edit" href="http://example.org/2003/12/13/atom03/edit"/>
|
||||||
|
<id>urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a</id>
|
||||||
|
<updated>2003-12-13T18:30:02Z</updated>
|
||||||
|
<summary>Some text.</summary>
|
||||||
|
<content type="xhtml">
|
||||||
|
<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>
|
||||||
|
</content>
|
||||||
|
<author>
|
||||||
|
<name>John Doe</name>
|
||||||
|
<email>johndoe@example.com</email>
|
||||||
|
</author>
|
||||||
|
</entry>
|
||||||
|
</feed>
|
||||||
|
`))
|
||||||
|
want := &Feed{
|
||||||
|
Title: "Example Feed",
|
||||||
|
SiteURL: "http://example.org/",
|
||||||
|
FeedURL: "http://example.org/feed/",
|
||||||
|
Items: []Item{
|
||||||
|
{
|
||||||
|
GUID: "urn:uuid:1225c695-cfb8-4ebb-aaaa-80da344efa6a",
|
||||||
|
Date: time.Unix(1071340202, 0).UTC(),
|
||||||
|
URL: "http://example.org/2003/12/13/atom03.html",
|
||||||
|
Title: "Atom-Powered Robots Run Amok",
|
||||||
|
Content: `<div xmlns="http://www.w3.org/1999/xhtml"><p>This is the entry content.</p></div>`,
|
||||||
|
ImageURL: "",
|
||||||
|
PodcastURL: "",
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
if !reflect.DeepEqual(want, have) {
|
||||||
|
t.Logf("want: %#v", want)
|
||||||
|
t.Logf("have: %#v", have)
|
||||||
|
t.Fatal("invalid atom")
|
||||||
|
}
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user