mirror of
https://github.com/nkanaev/yarr.git
synced 2025-07-09 08:10:10 +00:00
feed dump
This commit is contained in:
parent
cbc75047b8
commit
e78c028d20
@ -57,31 +57,30 @@ func (links atomLinks) First(rel string) string {
|
||||
}
|
||||
|
||||
func ParseAtom(r io.Reader) (*Feed, error) {
|
||||
f := atomFeed{}
|
||||
srcfeed := atomFeed{}
|
||||
|
||||
decoder := xml.NewDecoder(r)
|
||||
if err := decoder.Decode(&f); err != nil {
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
feed := &Feed{
|
||||
Title: f.Title.String(),
|
||||
SiteURL: first(f.Links.First("alternate"), f.Links.First("")),
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title.String(),
|
||||
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
|
||||
}
|
||||
for _, e := range f.Entries {
|
||||
date, _ := dateParse(first(e.Published, e.Updated))
|
||||
for _, srcitem := range srcfeed.Entries {
|
||||
imageUrl := ""
|
||||
podcastUrl := ""
|
||||
|
||||
feed.Items = append(feed.Items, Item{
|
||||
GUID: first(e.ID),
|
||||
Date: date,
|
||||
URL: first(e.Links.First("alternate"), f.Links.First("")),
|
||||
Title: e.Title.String(),
|
||||
Content: e.Content.String(),
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: firstNonEmpty(srcitem.ID),
|
||||
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
|
||||
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
|
||||
Title: srcitem.Title.String(),
|
||||
Content: srcitem.Content.String(),
|
||||
ImageURL: imageUrl,
|
||||
PodcastURL: podcastUrl,
|
||||
})
|
||||
}
|
||||
return feed, nil
|
||||
return dstfeed, nil
|
||||
}
|
||||
|
53
src/feed/feed.go
Normal file
53
src/feed/feed.go
Normal file
@ -0,0 +1,53 @@
|
||||
package feed
|
||||
|
||||
import (
|
||||
"encoding/xml"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
)
|
||||
|
||||
var UnknownFormat = errors.New("unknown feed format")
|
||||
|
||||
type processor func(r io.Reader) (*Feed, error)
|
||||
|
||||
func detect(lookup string) (string, processor) {
|
||||
lookup = strings.TrimSpace(lookup)
|
||||
if lookup[0] == '{' {
|
||||
return "json", ParseJSON
|
||||
}
|
||||
decoder := xml.NewDecoder(strings.NewReader(lookup))
|
||||
for {
|
||||
token, _ := decoder.Token()
|
||||
if token == nil {
|
||||
break
|
||||
}
|
||||
if el, ok := token.(xml.StartElement); ok {
|
||||
switch el.Name.Local {
|
||||
case "rss":
|
||||
return "rss", ParseRSS
|
||||
case "RDF":
|
||||
return "rss", ParseRDF
|
||||
case "feed":
|
||||
return "atom", ParseAtom
|
||||
}
|
||||
}
|
||||
}
|
||||
return "", nil
|
||||
}
|
||||
|
||||
func Parse(r io.Reader) (*Feed, error) {
|
||||
var x [1024]byte
|
||||
numread, err := r.Read(x[:])
|
||||
fmt.Println(numread, err)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("Failed to read: %s", err)
|
||||
}
|
||||
|
||||
_, callback := detect(string(x[:]))
|
||||
if callback == nil {
|
||||
return nil, UnknownFormat
|
||||
}
|
||||
return callback(r)
|
||||
}
|
36
src/feed/feed_test.go
Normal file
36
src/feed/feed_test.go
Normal file
@ -0,0 +1,36 @@
|
||||
package feed
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestDetect(t *testing.T) {
|
||||
testcases := [][2]string{
|
||||
{
|
||||
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
|
||||
"rss",
|
||||
},
|
||||
{
|
||||
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
|
||||
"rss",
|
||||
},
|
||||
{
|
||||
`<?xml version="1.0" encoding="utf-8"?><feed xmlns="http://www.w3.org/2005/Atom"></feed>`,
|
||||
"atom",
|
||||
},
|
||||
{
|
||||
`{}`,
|
||||
"json",
|
||||
},
|
||||
{
|
||||
`<!DOCTYPE html><html><head><title></title></head><body></body></html>`,
|
||||
"",
|
||||
},
|
||||
}
|
||||
for _, testcase := range testcases {
|
||||
have, _ := detect(testcase[0])
|
||||
want := testcase[1]
|
||||
if want != have {
|
||||
t.Log(testcase[0])
|
||||
t.Errorf("Invalid format: want=%#v have=%#v", want, have)
|
||||
}
|
||||
}
|
||||
}
|
@ -33,44 +33,25 @@ type jsonAttachment struct {
|
||||
Duration int `json:"duration_in_seconds"`
|
||||
}
|
||||
|
||||
func first(vals ...string) string {
|
||||
for _, val := range vals {
|
||||
if len(val) > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (f *jsonFeed) convert() *Feed {
|
||||
feed := &Feed{
|
||||
Title: f.Title,
|
||||
SiteURL: f.SiteURL,
|
||||
}
|
||||
for _, item := range f.Items {
|
||||
date, _ := dateParse(first(item.DatePublished, item.DateModified))
|
||||
content := first(item.HTML, item.Text, item.Summary)
|
||||
imageUrl := ""
|
||||
podcastUrl := ""
|
||||
|
||||
feed.Items = append(feed.Items, Item{
|
||||
GUID: item.ID,
|
||||
Date: date,
|
||||
URL: item.URL,
|
||||
Title: item.Title,
|
||||
Content: content,
|
||||
ImageURL: imageUrl,
|
||||
PodcastURL: podcastUrl,
|
||||
})
|
||||
}
|
||||
return feed
|
||||
}
|
||||
|
||||
func ParseJSON(data io.Reader) (*Feed, error) {
|
||||
feed := new(jsonFeed)
|
||||
srcfeed := new(jsonFeed)
|
||||
decoder := json.NewDecoder(data)
|
||||
if err := decoder.Decode(&feed); err != nil {
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return feed.convert(), nil
|
||||
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title,
|
||||
SiteURL: srcfeed.SiteURL,
|
||||
}
|
||||
for _, srcitem := range srcfeed.Items {
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: srcitem.ID,
|
||||
Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)),
|
||||
URL: srcitem.URL,
|
||||
Title: srcitem.Title,
|
||||
Content: firstNonEmpty(srcitem.HTML, srcitem.Text, srcitem.Summary),
|
||||
})
|
||||
}
|
||||
return dstfeed, nil
|
||||
}
|
||||
|
@ -25,23 +25,23 @@ type rdfItem struct {
|
||||
}
|
||||
|
||||
func ParseRDF(r io.Reader) (*Feed, error) {
|
||||
f := rdfFeed{}
|
||||
srcfeed := rdfFeed{}
|
||||
|
||||
decoder := xml.NewDecoder(r)
|
||||
if err := decoder.Decode(&f); err != nil {
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
feed := &Feed{
|
||||
Title: f.Title,
|
||||
SiteURL: f.Link,
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title,
|
||||
SiteURL: srcfeed.Link,
|
||||
}
|
||||
for _, e := range f.Items {
|
||||
feed.Items = append(feed.Items, Item{
|
||||
GUID: e.Link,
|
||||
URL: e.Link,
|
||||
Title: e.Title,
|
||||
for _, srcitem := range srcfeed.Items {
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: srcitem.Link,
|
||||
URL: srcitem.Link,
|
||||
Title: srcitem.Title,
|
||||
})
|
||||
}
|
||||
return feed, nil
|
||||
return dstfeed, nil
|
||||
}
|
||||
|
@ -57,29 +57,27 @@ type rssEnclosure struct {
|
||||
}
|
||||
|
||||
func ParseRSS(r io.Reader) (*Feed, error) {
|
||||
f := rssFeed{}
|
||||
srcfeed := rssFeed{}
|
||||
|
||||
decoder := xml.NewDecoder(r)
|
||||
decoder.DefaultSpace = "rss"
|
||||
if err := decoder.Decode(&f); err != nil {
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
fmt.Println(err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
feed := &Feed{
|
||||
Title: f.Title,
|
||||
SiteURL: f.Link,
|
||||
dstfeed := &Feed{
|
||||
Title: srcfeed.Title,
|
||||
SiteURL: srcfeed.Link,
|
||||
}
|
||||
for _, e := range f.Items {
|
||||
date, _ := dateParse(first(e.DublinCoreDate, e.PubDate))
|
||||
|
||||
feed.Items = append(feed.Items, Item{
|
||||
GUID: first(e.GUID, e.Link),
|
||||
Date: date,
|
||||
URL: e.Link,
|
||||
Title: e.Title,
|
||||
Content: e.Description,
|
||||
for _, srcitem := range srcfeed.Items {
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: firstNonEmpty(srcitem.GUID, srcitem.Link),
|
||||
Date: dateParse(firstNonEmpty(srcitem.DublinCoreDate, srcitem.PubDate)),
|
||||
URL: srcitem.Link,
|
||||
Title: srcitem.Title,
|
||||
Content: srcitem.Description,
|
||||
})
|
||||
}
|
||||
return feed, nil
|
||||
return dstfeed, nil
|
||||
}
|
||||
|
@ -1,12 +1,19 @@
|
||||
package feed
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// dateformats taken from somewhere
|
||||
// which where originally taken from github.com/mjibson/goread
|
||||
func firstNonEmpty(vals ...string) string {
|
||||
for _, val := range vals {
|
||||
if len(val) > 0 {
|
||||
return val
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// taken from github.com/mjibson/goread
|
||||
var dateFormats = []string{
|
||||
time.RFC822, // RSS
|
||||
time.RFC822Z, // RSS
|
||||
@ -207,11 +214,16 @@ var dateFormats = []string{
|
||||
"2 January, 2006",
|
||||
}
|
||||
|
||||
func dateParse(line string) (time.Time, error) {
|
||||
var defaultTime = time.Time{}
|
||||
|
||||
func dateParse(line string) time.Time {
|
||||
if line == "" {
|
||||
return defaultTime
|
||||
}
|
||||
for _, layout := range dateFormats {
|
||||
if t, err := time.Parse(layout, line); err == nil {
|
||||
return t, nil
|
||||
return t
|
||||
}
|
||||
}
|
||||
return time.Time{}, fmt.Errorf("failed to parse date: %s", line)
|
||||
return defaultTime
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user