diff --git a/src/parser/atom.go b/src/parser/atom.go index 2aaf745..9a76d0f 100644 --- a/src/parser/atom.go +++ b/src/parser/atom.go @@ -24,6 +24,7 @@ type atomEntry struct { Updated string `xml:"updated"` Links atomLinks `xml:"link"` Content atomText `xml:"content"` + media } type atomText struct { @@ -69,17 +70,14 @@ func ParseAtom(r io.Reader) (*Feed, error) { SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")), } for _, srcitem := range srcfeed.Entries { - imageUrl := "" - podcastUrl := "" - dstfeed.Items = append(dstfeed.Items, Item{ GUID: firstNonEmpty(srcitem.ID), Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)), URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")), Title: srcitem.Title.String(), - Content: srcitem.Content.String(), - ImageURL: imageUrl, - PodcastURL: podcastUrl, + Content: firstNonEmpty(srcitem.Content.String(), srcitem.firstMediaDescription()), + ImageURL: srcitem.firstMediaThumbnail(), + PodcastURL: "", }) } return dstfeed, nil diff --git a/src/parser/feed_test.go b/src/parser/feed_test.go index a333e00..df82f60 100644 --- a/src/parser/feed_test.go +++ b/src/parser/feed_test.go @@ -66,7 +66,7 @@ func TestParse(t *testing.T) { Title: "Title", Items: []Item{ { - Title: "Item 1", + Title: "Item 1", Content: "
content
", }, }, diff --git a/src/parser/media.go b/src/parser/media.go new file mode 100644 index 0000000..e6c5a71 --- /dev/null +++ b/src/parser/media.go @@ -0,0 +1,53 @@ +package parser + +type media struct { + MediaGroups []mediaGroup `xml:"http://search.yahoo.com/mrss/ group"` + + MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"` + MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"` +} + +type mediaGroup struct { + MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"` + MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"` +} + +type mediaContent struct { + URL string `xml:"url,attr"` + Type string `xml:"type,attr"` + FileSize string `xml:"fileSize,attr"` + Medium string `xml:"medium,attr"` +} + +type mediaThumbnail struct { + URL string `xml:"url,attr"` +} + +type mediaDescription struct { + Type string `xml:"type,attr"` + Description string `xml:",chardata"` +} + +func (m *media) firstMediaThumbnail() string { + for _, t := range m.MediaThumbnails { + return t.URL + } + for _, g := range m.MediaGroups { + for _, t := range g.MediaThumbnails { + return t.URL + } + } + return "" +} + +func (m *media) firstMediaDescription() string { + for _, d := range m.MediaDescriptions { + return plain2html(d.Description) + } + for _, g := range m.MediaGroups { + for _, d := range g.MediaDescriptions { + return plain2html(d.Description) + } + } + return "" +} diff --git a/src/parser/utils.go b/src/parser/utils.go index 545cded..b39bce6 100644 --- a/src/parser/utils.go +++ b/src/parser/utils.go @@ -3,8 +3,11 @@ package parser import ( "encoding/xml" "io" - "golang.org/x/net/html/charset" + "regexp" + "strings" "time" + + "golang.org/x/net/html/charset" ) func firstNonEmpty(vals ...string) string { @@ -16,6 +19,14 @@ func firstNonEmpty(vals ...string) string { return "" } +var linkRe = regexp.MustCompile(`(https?:\/\/\S+)`) + +func plain2html(text string) string { + text = linkRe.ReplaceAllString(text, `$1`) + text = strings.ReplaceAll(text, "\n", "
") + return text +} + func xmlDecoder(r io.Reader) *xml.Decoder { decoder := xml.NewDecoder(r) decoder.Strict = false