rss parser

This commit is contained in:
Nazar Kanaev 2021-03-22 19:43:16 +00:00
parent 70e9e1ed3a
commit cc6f6d91e1
2 changed files with 90 additions and 4 deletions

View File

@ -6,21 +6,23 @@ package feed
import (
"encoding/xml"
"fmt"
"io"
)
type rssFeed struct {
XMLName xml.Name `xml:"rss"`
Version string `xml:"version,attr"`
Title string `xml:"channel>title"`
Links []rssLink `xml:"channel>link"`
Link string `xml:"channel>link"`
Items []rssItem `xml:"channel>item"`
}
type rssItem struct {
GUID string `xml:"guid"`
Title []rssTitle `xml:"title"`
Links []rssLink `xml:"link"`
Description string `xml:"description"`
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"rss description"`
PubDate string `xml:"pubDate"`
EnclosureLinks []rssEnclosure `xml:"enclosure"`
@ -53,3 +55,31 @@ type rssEnclosure struct {
Type string `xml:"type,attr"`
Length string `xml:"length,attr"`
}
func ParseRSS(r io.Reader) (*Feed, error) {
f := rssFeed{}
decoder := xml.NewDecoder(r)
decoder.DefaultSpace = "rss"
if err := decoder.Decode(&f); err != nil {
fmt.Println(err)
return nil, err
}
feed := &Feed{
Title: f.Title,
SiteURL: f.Link,
}
for _, e := range f.Items {
date, _ := dateParse(first(e.DublinCoreDate, e.PubDate))
feed.Items = append(feed.Items, Item{
GUID: first(e.GUID, e.Link),
Date: date,
URL: e.Link,
Title: e.Title,
Content: e.Description,
})
}
return feed, nil
}

56
src/feed/rss_test.go Normal file
View File

@ -0,0 +1,56 @@
package feed
import (
"reflect"
"strings"
"testing"
)
func TestRSSFeed(t *testing.T) {
have, _ := ParseRSS(strings.NewReader(`
<?xml version="1.0"?>
<!DOCTYPE rss SYSTEM "http://my.netscape.com/publish/formats/rss-0.91.dtd">
<rss version="0.91">
<channel>
<language>en</language>
<description>???</description>
<link>http://www.scripting.com/</link>
<title>Scripting News</title>
<item>
<title>Title 1</title>
<link>http://www.scripting.com/one/</link>
<description>Description 1</description>
</item>
<item>
<title>Title 2</title>
<link>http://www.scripting.com/two/</link>
<description>Description 2</description>
</item>
</channel>
</rss>
`))
want := &Feed{
Title: "Scripting News",
SiteURL: "http://www.scripting.com/",
Items: []Item{
{
GUID: "http://www.scripting.com/one/",
URL: "http://www.scripting.com/one/",
Title: "Title 1",
Content: "Description 1",
},
{
GUID: "http://www.scripting.com/two/",
URL: "http://www.scripting.com/two/",
Title: "Title 2",
Content: "Description 2",
},
},
}
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.Fatal("invalid rss")
}
}