From 0a0db6890585646b9eae34326f26476e73dd8420 Mon Sep 17 00:00:00 2001 From: Nazar Kanaev Date: Fri, 2 Apr 2021 15:05:22 +0100 Subject: [PATCH] feedburner --- src/parser/atom.go | 6 ++++-- src/parser/rss.go | 14 +++++++++++--- src/parser/utils.go | 5 +++-- 3 files changed, 18 insertions(+), 7 deletions(-) diff --git a/src/parser/atom.go b/src/parser/atom.go index 5853ddc..31856ec 100644 --- a/src/parser/atom.go +++ b/src/parser/atom.go @@ -24,6 +24,8 @@ type atomEntry struct { Updated string `xml:"updated"` Links atomLinks `xml:"link"` Content atomText `xml:"http://www.w3.org/2005/Atom content"` + OrigLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` + media } @@ -73,9 +75,9 @@ func ParseAtom(r io.Reader) (*Feed, error) { dstfeed.Items = append(dstfeed.Items, Item{ GUID: firstNonEmpty(srcitem.ID), Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)), - URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")), + URL: firstNonEmpty(srcitem.OrigLink, srcitem.Links.First("alternate"), srcitem.Links.First("")), Title: srcitem.Title.String(), - Content: firstNonEmpty(srcitem.Content.String(), srcitem.firstMediaDescription()), + Content: firstNonEmpty(srcitem.Content.String(), srcitem.Summary.String(), srcitem.firstMediaDescription()), ImageURL: srcitem.firstMediaThumbnail(), AudioURL: "", }) diff --git a/src/parser/rss.go b/src/parser/rss.go index ac771f6..114d3c0 100644 --- a/src/parser/rss.go +++ b/src/parser/rss.go @@ -7,6 +7,8 @@ package parser import ( "encoding/xml" "io" + "path" + "strings" ) type rssFeed struct { @@ -28,8 +30,8 @@ type rssItem struct { DublinCoreDate string `xml:"http://purl.org/dc/elements/1.1/ date"` ContentEncoded string `xml:"http://purl.org/rss/1.0/modules/content/ encoded"` - FeedBurnerLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` - FeedBurnerEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` + OrigLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origLink"` + OrigEnclosureLink string `xml:"http://rssnamespace.org/feedburner/ext/1.0 origEnclosureLink"` ItunesSubtitle string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd subtitle"` ItunesSummary string `xml:"http://www.itunes.com/dtds/podcast-1.0.dtd summary"` @@ -74,6 +76,12 @@ func ParseRSS(r io.Reader) (*Feed, error) { for _, e := range srcitem.Enclosures { if e.Type == "audio/mpeg" || e.Type == "audio/x-m4a" { podcastURL = e.URL + + origBase := path.Base(srcitem.OrigEnclosureLink) + if origBase != "" && strings.Contains(podcastURL, origBase) { + podcastURL = srcitem.OrigEnclosureLink + } + break } } @@ -81,7 +89,7 @@ func ParseRSS(r io.Reader) (*Feed, error) { dstfeed.Items = append(dstfeed.Items, Item{ GUID: firstNonEmpty(srcitem.GUID, srcitem.Link), Date: dateParse(firstNonEmpty(srcitem.DublinCoreDate, srcitem.PubDate)), - URL: srcitem.Link, + URL: firstNonEmpty(srcitem.OrigLink, srcitem.Link), Title: srcitem.Title, Content: firstNonEmpty(srcitem.ContentEncoded, srcitem.Description), AudioURL: podcastURL, diff --git a/src/parser/utils.go b/src/parser/utils.go index b39bce6..e87c709 100644 --- a/src/parser/utils.go +++ b/src/parser/utils.go @@ -12,8 +12,9 @@ import ( func firstNonEmpty(vals ...string) string { for _, val := range vals { - if len(val) > 0 { - return val + valTrimmed := strings.TrimSpace(val) + if len(valTrimmed) > 0 { + return valTrimmed } } return ""