feed dump

This commit is contained in:
Nazar Kanaev 2021-03-22 21:04:10 +00:00
parent cbc75047b8
commit e78c028d20
7 changed files with 161 additions and 82 deletions

View File

@ -57,31 +57,30 @@ func (links atomLinks) First(rel string) string {
}
func ParseAtom(r io.Reader) (*Feed, error) {
f := atomFeed{}
srcfeed := atomFeed{}
decoder := xml.NewDecoder(r)
if err := decoder.Decode(&f); err != nil {
if err := decoder.Decode(&srcfeed); err != nil {
return nil, err
}
feed := &Feed{
Title: f.Title.String(),
SiteURL: first(f.Links.First("alternate"), f.Links.First("")),
dstfeed := &Feed{
Title: srcfeed.Title.String(),
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
}
for _, e := range f.Entries {
date, _ := dateParse(first(e.Published, e.Updated))
for _, srcitem := range srcfeed.Entries {
imageUrl := ""
podcastUrl := ""
feed.Items = append(feed.Items, Item{
GUID: first(e.ID),
Date: date,
URL: first(e.Links.First("alternate"), f.Links.First("")),
Title: e.Title.String(),
Content: e.Content.String(),
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.ID),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
Title: srcitem.Title.String(),
Content: srcitem.Content.String(),
ImageURL: imageUrl,
PodcastURL: podcastUrl,
})
}
return feed, nil
return dstfeed, nil
}

53
src/feed/feed.go Normal file
View File

@ -0,0 +1,53 @@
package feed
import (
"encoding/xml"
"errors"
"fmt"
"io"
"strings"
)
var UnknownFormat = errors.New("unknown feed format")
type processor func(r io.Reader) (*Feed, error)
func detect(lookup string) (string, processor) {
lookup = strings.TrimSpace(lookup)
if lookup[0] == '{' {
return "json", ParseJSON
}
decoder := xml.NewDecoder(strings.NewReader(lookup))
for {
token, _ := decoder.Token()
if token == nil {
break
}
if el, ok := token.(xml.StartElement); ok {
switch el.Name.Local {
case "rss":
return "rss", ParseRSS
case "RDF":
return "rss", ParseRDF
case "feed":
return "atom", ParseAtom
}
}
}
return "", nil
}
func Parse(r io.Reader) (*Feed, error) {
var x [1024]byte
numread, err := r.Read(x[:])
fmt.Println(numread, err)
if err != nil {
return nil, fmt.Errorf("Failed to read: %s", err)
}
_, callback := detect(string(x[:]))
if callback == nil {
return nil, UnknownFormat
}
return callback(r)
}

36
src/feed/feed_test.go Normal file
View File

@ -0,0 +1,36 @@
package feed
import "testing"
func TestDetect(t *testing.T) {
testcases := [][2]string{
{
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
"rss",
},
{
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
"rss",
},
{
`<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`,
"atom",
},
{
`{}`,
"json",
},
{
`<!DOCTYPE html><html><head><title></title></head><body></body></html>`,
"",
},
}
for _, testcase := range testcases {
have, _ := detect(testcase[0])
want := testcase[1]
if want != have {
t.Log(testcase[0])
t.Errorf("Invalid format: want=%#v have=%#v", want, have)
}
}
}

View File

@ -33,44 +33,25 @@ type jsonAttachment struct {
Duration int `json:"duration_in_seconds"`
}
func first(vals ...string) string {
for _, val := range vals {
if len(val) > 0 {
return val
}
}
return ""
}
func (f *jsonFeed) convert() *Feed {
feed := &Feed{
Title: f.Title,
SiteURL: f.SiteURL,
}
for _, item := range f.Items {
date, _ := dateParse(first(item.DatePublished, item.DateModified))
content := first(item.HTML, item.Text, item.Summary)
imageUrl := ""
podcastUrl := ""
feed.Items = append(feed.Items, Item{
GUID: item.ID,
Date: date,
URL: item.URL,
Title: item.Title,
Content: content,
ImageURL: imageUrl,
PodcastURL: podcastUrl,
})
}
return feed
}
func ParseJSON(data io.Reader) (*Feed, error) {
feed := new(jsonFeed)
srcfeed := new(jsonFeed)
decoder := json.NewDecoder(data)
if err := decoder.Decode(&feed); err != nil {
if err := decoder.Decode(&srcfeed); err != nil {
return nil, err
}
return feed.convert(), nil
dstfeed := &Feed{
Title: srcfeed.Title,
SiteURL: srcfeed.SiteURL,
}
for _, srcitem := range srcfeed.Items {
dstfeed.Items = append(dstfeed.Items, Item{
GUID: srcitem.ID,
Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)),
URL: srcitem.URL,
Title: srcitem.Title,
Content: firstNonEmpty(srcitem.HTML, srcitem.Text, srcitem.Summary),
})
}
return dstfeed, nil
}

View File

@ -25,23 +25,23 @@ type rdfItem struct {
}
func ParseRDF(r io.Reader) (*Feed, error) {
f := rdfFeed{}
srcfeed := rdfFeed{}
decoder := xml.NewDecoder(r)
if err := decoder.Decode(&f); err != nil {
if err := decoder.Decode(&srcfeed); err != nil {
return nil, err
}
feed := &Feed{
Title: f.Title,
SiteURL: f.Link,
dstfeed := &Feed{
Title: srcfeed.Title,
SiteURL: srcfeed.Link,
}
for _, e := range f.Items {
feed.Items = append(feed.Items, Item{
GUID: e.Link,
URL: e.Link,
Title: e.Title,
for _, srcitem := range srcfeed.Items {
dstfeed.Items = append(dstfeed.Items, Item{
GUID: srcitem.Link,
URL: srcitem.Link,
Title: srcitem.Title,
})
}
return feed, nil
return dstfeed, nil
}

View File

@ -57,29 +57,27 @@ type rssEnclosure struct {
}
func ParseRSS(r io.Reader) (*Feed, error) {
f := rssFeed{}
srcfeed := rssFeed{}
decoder := xml.NewDecoder(r)
decoder.DefaultSpace = "rss"
if err := decoder.Decode(&f); err != nil {
if err := decoder.Decode(&srcfeed); err != nil {
fmt.Println(err)
return nil, err
}
feed := &Feed{
Title: f.Title,
SiteURL: f.Link,
dstfeed := &Feed{
Title: srcfeed.Title,
SiteURL: srcfeed.Link,
}
for _, e := range f.Items {
date, _ := dateParse(first(e.DublinCoreDate, e.PubDate))
feed.Items = append(feed.Items, Item{
GUID: first(e.GUID, e.Link),
Date: date,
URL: e.Link,
Title: e.Title,
Content: e.Description,
for _, srcitem := range srcfeed.Items {
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.GUID, srcitem.Link),
Date: dateParse(firstNonEmpty(srcitem.DublinCoreDate, srcitem.PubDate)),
URL: srcitem.Link,
Title: srcitem.Title,
Content: srcitem.Description,
})
}
return feed, nil
return dstfeed, nil
}

View File

@ -1,12 +1,19 @@
package feed
import (
"fmt"
"time"
)
// dateformats taken from somewhere
// which where originally taken from github.com/mjibson/goread
func firstNonEmpty(vals ...string) string {
for _, val := range vals {
if len(val) > 0 {
return val
}
}
return ""
}
// taken from github.com/mjibson/goread
var dateFormats = []string{
time.RFC822, // RSS
time.RFC822Z, // RSS
@ -207,11 +214,16 @@ var dateFormats = []string{
"2 January, 2006",
}
func dateParse(line string) (time.Time, error) {
var defaultTime = time.Time{}
func dateParse(line string) time.Time {
if line == "" {
return defaultTime
}
for _, layout := range dateFormats {
if t, err := time.Parse(layout, line); err == nil {
return t, nil
return t
}
}
return time.Time{}, fmt.Errorf("failed to parse date: %s", line)
return defaultTime
}