feed dump

This commit is contained in:
Nazar Kanaev 2021-03-22 21:04:10 +00:00
parent cbc75047b8
commit e78c028d20
7 changed files with 161 additions and 82 deletions

View File

@ -57,31 +57,30 @@ func (links atomLinks) First(rel string) string {
} }
func ParseAtom(r io.Reader) (*Feed, error) { func ParseAtom(r io.Reader) (*Feed, error) {
f := atomFeed{} srcfeed := atomFeed{}
decoder := xml.NewDecoder(r) decoder := xml.NewDecoder(r)
if err := decoder.Decode(&f); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
return nil, err return nil, err
} }
feed := &Feed{ dstfeed := &Feed{
Title: f.Title.String(), Title: srcfeed.Title.String(),
SiteURL: first(f.Links.First("alternate"), f.Links.First("")), SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
} }
for _, e := range f.Entries { for _, srcitem := range srcfeed.Entries {
date, _ := dateParse(first(e.Published, e.Updated))
imageUrl := "" imageUrl := ""
podcastUrl := "" podcastUrl := ""
feed.Items = append(feed.Items, Item{ dstfeed.Items = append(dstfeed.Items, Item{
GUID: first(e.ID), GUID: firstNonEmpty(srcitem.ID),
Date: date, Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: first(e.Links.First("alternate"), f.Links.First("")), URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
Title: e.Title.String(), Title: srcitem.Title.String(),
Content: e.Content.String(), Content: srcitem.Content.String(),
ImageURL: imageUrl, ImageURL: imageUrl,
PodcastURL: podcastUrl, PodcastURL: podcastUrl,
}) })
} }
return feed, nil return dstfeed, nil
} }

53
src/feed/feed.go Normal file
View File

@ -0,0 +1,53 @@
package feed
import (
"encoding/xml"
"errors"
"fmt"
"io"
"strings"
)
var UnknownFormat = errors.New("unknown feed format")
type processor func(r io.Reader) (*Feed, error)
func detect(lookup string) (string, processor) {
lookup = strings.TrimSpace(lookup)
if lookup[0] == '{' {
return "json", ParseJSON
}
decoder := xml.NewDecoder(strings.NewReader(lookup))
for {
token, _ := decoder.Token()
if token == nil {
break
}
if el, ok := token.(xml.StartElement); ok {
switch el.Name.Local {
case "rss":
return "rss", ParseRSS
case "RDF":
return "rss", ParseRDF
case "feed":
return "atom", ParseAtom
}
}
}
return "", nil
}
func Parse(r io.Reader) (*Feed, error) {
var x [1024]byte
numread, err := r.Read(x[:])
fmt.Println(numread, err)
if err != nil {
return nil, fmt.Errorf("Failed to read: %s", err)
}
_, callback := detect(string(x[:]))
if callback == nil {
return nil, UnknownFormat
}
return callback(r)
}

36
src/feed/feed_test.go Normal file
View File

@ -0,0 +1,36 @@
package feed
import "testing"
func TestDetect(t *testing.T) {
testcases := [][2]string{
{
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
"rss",
},
{
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
"rss",
},
{
`<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`,
"atom",
},
{
`{}`,
"json",
},
{
`<!DOCTYPE html><html><head><title></title></head><body></body></html>`,
"",
},
}
for _, testcase := range testcases {
have, _ := detect(testcase[0])
want := testcase[1]
if want != have {
t.Log(testcase[0])
t.Errorf("Invalid format: want=%#v have=%#v", want, have)
}
}
}

View File

@ -33,44 +33,25 @@ type jsonAttachment struct {
Duration int `json:"duration_in_seconds"` Duration int `json:"duration_in_seconds"`
} }
func first(vals ...string) string {
for _, val := range vals {
if len(val) > 0 {
return val
}
}
return ""
}
func (f *jsonFeed) convert() *Feed {
feed := &Feed{
Title: f.Title,
SiteURL: f.SiteURL,
}
for _, item := range f.Items {
date, _ := dateParse(first(item.DatePublished, item.DateModified))
content := first(item.HTML, item.Text, item.Summary)
imageUrl := ""
podcastUrl := ""
feed.Items = append(feed.Items, Item{
GUID: item.ID,
Date: date,
URL: item.URL,
Title: item.Title,
Content: content,
ImageURL: imageUrl,
PodcastURL: podcastUrl,
})
}
return feed
}
func ParseJSON(data io.Reader) (*Feed, error) { func ParseJSON(data io.Reader) (*Feed, error) {
feed := new(jsonFeed) srcfeed := new(jsonFeed)
decoder := json.NewDecoder(data) decoder := json.NewDecoder(data)
if err := decoder.Decode(&feed); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
return nil, err return nil, err
} }
return feed.convert(), nil
dstfeed := &Feed{
Title: srcfeed.Title,
SiteURL: srcfeed.SiteURL,
}
for _, srcitem := range srcfeed.Items {
dstfeed.Items = append(dstfeed.Items, Item{
GUID: srcitem.ID,
Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)),
URL: srcitem.URL,
Title: srcitem.Title,
Content: firstNonEmpty(srcitem.HTML, srcitem.Text, srcitem.Summary),
})
}
return dstfeed, nil
} }

View File

@ -25,23 +25,23 @@ type rdfItem struct {
} }
func ParseRDF(r io.Reader) (*Feed, error) { func ParseRDF(r io.Reader) (*Feed, error) {
f := rdfFeed{} srcfeed := rdfFeed{}
decoder := xml.NewDecoder(r) decoder := xml.NewDecoder(r)
if err := decoder.Decode(&f); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
return nil, err return nil, err
} }
feed := &Feed{ dstfeed := &Feed{
Title: f.Title, Title: srcfeed.Title,
SiteURL: f.Link, SiteURL: srcfeed.Link,
} }
for _, e := range f.Items { for _, srcitem := range srcfeed.Items {
feed.Items = append(feed.Items, Item{ dstfeed.Items = append(dstfeed.Items, Item{
GUID: e.Link, GUID: srcitem.Link,
URL: e.Link, URL: srcitem.Link,
Title: e.Title, Title: srcitem.Title,
}) })
} }
return feed, nil return dstfeed, nil
} }

View File

@ -57,29 +57,27 @@ type rssEnclosure struct {
} }
func ParseRSS(r io.Reader) (*Feed, error) { func ParseRSS(r io.Reader) (*Feed, error) {
f := rssFeed{} srcfeed := rssFeed{}
decoder := xml.NewDecoder(r) decoder := xml.NewDecoder(r)
decoder.DefaultSpace = "rss" decoder.DefaultSpace = "rss"
if err := decoder.Decode(&f); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
fmt.Println(err) fmt.Println(err)
return nil, err return nil, err
} }
feed := &Feed{ dstfeed := &Feed{
Title: f.Title, Title: srcfeed.Title,
SiteURL: f.Link, SiteURL: srcfeed.Link,
} }
for _, e := range f.Items { for _, srcitem := range srcfeed.Items {
date, _ := dateParse(first(e.DublinCoreDate, e.PubDate)) dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.GUID, srcitem.Link),
feed.Items = append(feed.Items, Item{ Date: dateParse(firstNonEmpty(srcitem.DublinCoreDate, srcitem.PubDate)),
GUID: first(e.GUID, e.Link), URL: srcitem.Link,
Date: date, Title: srcitem.Title,
URL: e.Link, Content: srcitem.Description,
Title: e.Title,
Content: e.Description,
}) })
} }
return feed, nil return dstfeed, nil
} }

View File

@ -1,12 +1,19 @@
package feed package feed
import ( import (
"fmt"
"time" "time"
) )
// dateformats taken from somewhere func firstNonEmpty(vals ...string) string {
// which where originally taken from github.com/mjibson/goread for _, val := range vals {
if len(val) > 0 {
return val
}
}
return ""
}
// taken from github.com/mjibson/goread
var dateFormats = []string{ var dateFormats = []string{
time.RFC822, // RSS time.RFC822, // RSS
time.RFC822Z, // RSS time.RFC822Z, // RSS
@ -207,11 +214,16 @@ var dateFormats = []string{
"2 January, 2006", "2 January, 2006",
} }
func dateParse(line string) (time.Time, error) { var defaultTime = time.Time{}
func dateParse(line string) time.Time {
if line == "" {
return defaultTime
}
for _, layout := range dateFormats { for _, layout := range dateFormats {
if t, err := time.Parse(layout, line); err == nil { if t, err := time.Parse(layout, line); err == nil {
return t, nil return t
} }
} }
return time.Time{}, fmt.Errorf("failed to parse date: %s", line) return defaultTime
} }