From e78c028d205efecc96b205375dd234f9d448a5f6 Mon Sep 17 00:00:00 2001 From: Nazar Kanaev Date: Mon, 22 Mar 2021 21:04:10 +0000 Subject: [PATCH] feed dump --- src/feed/atom.go | 27 +++++++++-------- src/feed/feed.go | 53 ++++++++++++++++++++++++++++++++++ src/feed/feed_test.go | 36 +++++++++++++++++++++++ src/feed/json.go | 53 +++++++++++----------------------- src/feed/rdf.go | 22 +++++++------- src/feed/rss.go | 28 +++++++++--------- src/feed/{date.go => utils.go} | 24 +++++++++++---- 7 files changed, 161 insertions(+), 82 deletions(-) create mode 100644 src/feed/feed.go create mode 100644 src/feed/feed_test.go rename src/feed/{date.go => utils.go} (94%) diff --git a/src/feed/atom.go b/src/feed/atom.go index 6bfd2cf..3cb9b4c 100644 --- a/src/feed/atom.go +++ b/src/feed/atom.go @@ -57,31 +57,30 @@ func (links atomLinks) First(rel string) string { } func ParseAtom(r io.Reader) (*Feed, error) { - f := atomFeed{} + srcfeed := atomFeed{} decoder := xml.NewDecoder(r) - if err := decoder.Decode(&f); err != nil { + if err := decoder.Decode(&srcfeed); err != nil { return nil, err } - feed := &Feed{ - Title: f.Title.String(), - SiteURL: first(f.Links.First("alternate"), f.Links.First("")), + dstfeed := &Feed{ + Title: srcfeed.Title.String(), + SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")), } - for _, e := range f.Entries { - date, _ := dateParse(first(e.Published, e.Updated)) + for _, srcitem := range srcfeed.Entries { imageUrl := "" podcastUrl := "" - feed.Items = append(feed.Items, Item{ - GUID: first(e.ID), - Date: date, - URL: first(e.Links.First("alternate"), f.Links.First("")), - Title: e.Title.String(), - Content: e.Content.String(), + dstfeed.Items = append(dstfeed.Items, Item{ + GUID: firstNonEmpty(srcitem.ID), + Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)), + URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")), + Title: srcitem.Title.String(), + Content: srcitem.Content.String(), ImageURL: imageUrl, PodcastURL: podcastUrl, }) } - return feed, nil + return dstfeed, nil } diff --git a/src/feed/feed.go b/src/feed/feed.go new file mode 100644 index 0000000..f43a82a --- /dev/null +++ b/src/feed/feed.go @@ -0,0 +1,53 @@ +package feed + +import ( + "encoding/xml" + "errors" + "fmt" + "io" + "strings" +) + +var UnknownFormat = errors.New("unknown feed format") + +type processor func(r io.Reader) (*Feed, error) + +func detect(lookup string) (string, processor) { + lookup = strings.TrimSpace(lookup) + if lookup[0] == '{' { + return "json", ParseJSON + } + decoder := xml.NewDecoder(strings.NewReader(lookup)) + for { + token, _ := decoder.Token() + if token == nil { + break + } + if el, ok := token.(xml.StartElement); ok { + switch el.Name.Local { + case "rss": + return "rss", ParseRSS + case "RDF": + return "rss", ParseRDF + case "feed": + return "atom", ParseAtom + } + } + } + return "", nil +} + +func Parse(r io.Reader) (*Feed, error) { + var x [1024]byte + numread, err := r.Read(x[:]) + fmt.Println(numread, err) + if err != nil { + return nil, fmt.Errorf("Failed to read: %s", err) + } + + _, callback := detect(string(x[:])) + if callback == nil { + return nil, UnknownFormat + } + return callback(r) +} diff --git a/src/feed/feed_test.go b/src/feed/feed_test.go new file mode 100644 index 0000000..b605d06 --- /dev/null +++ b/src/feed/feed_test.go @@ -0,0 +1,36 @@ +package feed + +import "testing" + +func TestDetect(t *testing.T) { + testcases := [][2]string{ + { + ``, + "rss", + }, + { + ``, + "rss", + }, + { + ``, + "atom", + }, + { + `{}`, + "json", + }, + { + ``, + "", + }, + } + for _, testcase := range testcases { + have, _ := detect(testcase[0]) + want := testcase[1] + if want != have { + t.Log(testcase[0]) + t.Errorf("Invalid format: want=%#v have=%#v", want, have) + } + } +} diff --git a/src/feed/json.go b/src/feed/json.go index 20c9f4a..fd8c08c 100644 --- a/src/feed/json.go +++ b/src/feed/json.go @@ -33,44 +33,25 @@ type jsonAttachment struct { Duration int `json:"duration_in_seconds"` } -func first(vals ...string) string { - for _, val := range vals { - if len(val) > 0 { - return val - } - } - return "" -} - -func (f *jsonFeed) convert() *Feed { - feed := &Feed{ - Title: f.Title, - SiteURL: f.SiteURL, - } - for _, item := range f.Items { - date, _ := dateParse(first(item.DatePublished, item.DateModified)) - content := first(item.HTML, item.Text, item.Summary) - imageUrl := "" - podcastUrl := "" - - feed.Items = append(feed.Items, Item{ - GUID: item.ID, - Date: date, - URL: item.URL, - Title: item.Title, - Content: content, - ImageURL: imageUrl, - PodcastURL: podcastUrl, - }) - } - return feed -} - func ParseJSON(data io.Reader) (*Feed, error) { - feed := new(jsonFeed) + srcfeed := new(jsonFeed) decoder := json.NewDecoder(data) - if err := decoder.Decode(&feed); err != nil { + if err := decoder.Decode(&srcfeed); err != nil { return nil, err } - return feed.convert(), nil + + dstfeed := &Feed{ + Title: srcfeed.Title, + SiteURL: srcfeed.SiteURL, + } + for _, srcitem := range srcfeed.Items { + dstfeed.Items = append(dstfeed.Items, Item{ + GUID: srcitem.ID, + Date: dateParse(firstNonEmpty(srcitem.DatePublished, srcitem.DateModified)), + URL: srcitem.URL, + Title: srcitem.Title, + Content: firstNonEmpty(srcitem.HTML, srcitem.Text, srcitem.Summary), + }) + } + return dstfeed, nil } diff --git a/src/feed/rdf.go b/src/feed/rdf.go index 21cd36a..2436310 100644 --- a/src/feed/rdf.go +++ b/src/feed/rdf.go @@ -25,23 +25,23 @@ type rdfItem struct { } func ParseRDF(r io.Reader) (*Feed, error) { - f := rdfFeed{} + srcfeed := rdfFeed{} decoder := xml.NewDecoder(r) - if err := decoder.Decode(&f); err != nil { + if err := decoder.Decode(&srcfeed); err != nil { return nil, err } - feed := &Feed{ - Title: f.Title, - SiteURL: f.Link, + dstfeed := &Feed{ + Title: srcfeed.Title, + SiteURL: srcfeed.Link, } - for _, e := range f.Items { - feed.Items = append(feed.Items, Item{ - GUID: e.Link, - URL: e.Link, - Title: e.Title, + for _, srcitem := range srcfeed.Items { + dstfeed.Items = append(dstfeed.Items, Item{ + GUID: srcitem.Link, + URL: srcitem.Link, + Title: srcitem.Title, }) } - return feed, nil + return dstfeed, nil } diff --git a/src/feed/rss.go b/src/feed/rss.go index 41ece39..1581545 100644 --- a/src/feed/rss.go +++ b/src/feed/rss.go @@ -57,29 +57,27 @@ type rssEnclosure struct { } func ParseRSS(r io.Reader) (*Feed, error) { - f := rssFeed{} + srcfeed := rssFeed{} decoder := xml.NewDecoder(r) decoder.DefaultSpace = "rss" - if err := decoder.Decode(&f); err != nil { + if err := decoder.Decode(&srcfeed); err != nil { fmt.Println(err) return nil, err } - feed := &Feed{ - Title: f.Title, - SiteURL: f.Link, + dstfeed := &Feed{ + Title: srcfeed.Title, + SiteURL: srcfeed.Link, } - for _, e := range f.Items { - date, _ := dateParse(first(e.DublinCoreDate, e.PubDate)) - - feed.Items = append(feed.Items, Item{ - GUID: first(e.GUID, e.Link), - Date: date, - URL: e.Link, - Title: e.Title, - Content: e.Description, + for _, srcitem := range srcfeed.Items { + dstfeed.Items = append(dstfeed.Items, Item{ + GUID: firstNonEmpty(srcitem.GUID, srcitem.Link), + Date: dateParse(firstNonEmpty(srcitem.DublinCoreDate, srcitem.PubDate)), + URL: srcitem.Link, + Title: srcitem.Title, + Content: srcitem.Description, }) } - return feed, nil + return dstfeed, nil } diff --git a/src/feed/date.go b/src/feed/utils.go similarity index 94% rename from src/feed/date.go rename to src/feed/utils.go index 1f108a5..f538c91 100644 --- a/src/feed/date.go +++ b/src/feed/utils.go @@ -1,12 +1,19 @@ package feed import ( - "fmt" "time" ) -// dateformats taken from somewhere -// which where originally taken from github.com/mjibson/goread +func firstNonEmpty(vals ...string) string { + for _, val := range vals { + if len(val) > 0 { + return val + } + } + return "" +} + +// taken from github.com/mjibson/goread var dateFormats = []string{ time.RFC822, // RSS time.RFC822Z, // RSS @@ -207,11 +214,16 @@ var dateFormats = []string{ "2 January, 2006", } -func dateParse(line string) (time.Time, error) { +var defaultTime = time.Time{} + +func dateParse(line string) time.Time { + if line == "" { + return defaultTime + } for _, layout := range dateFormats { if t, err := time.Parse(layout, line); err == nil { - return t, nil + return t } } - return time.Time{}, fmt.Errorf("failed to parse date: %s", line) + return defaultTime }