diff --git a/src/parser/atom.go b/src/parser/atom.go
index 2aaf745..9a76d0f 100644
--- a/src/parser/atom.go
+++ b/src/parser/atom.go
@@ -24,6 +24,7 @@ type atomEntry struct {
Updated string `xml:"updated"`
Links atomLinks `xml:"link"`
Content atomText `xml:"content"`
+ media
}
type atomText struct {
@@ -69,17 +70,14 @@ func ParseAtom(r io.Reader) (*Feed, error) {
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
}
for _, srcitem := range srcfeed.Entries {
- imageUrl := ""
- podcastUrl := ""
-
dstfeed.Items = append(dstfeed.Items, Item{
GUID: firstNonEmpty(srcitem.ID),
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
Title: srcitem.Title.String(),
- Content: srcitem.Content.String(),
- ImageURL: imageUrl,
- PodcastURL: podcastUrl,
+ Content: firstNonEmpty(srcitem.Content.String(), srcitem.firstMediaDescription()),
+ ImageURL: srcitem.firstMediaThumbnail(),
+ PodcastURL: "",
})
}
return dstfeed, nil
diff --git a/src/parser/feed_test.go b/src/parser/feed_test.go
index a333e00..df82f60 100644
--- a/src/parser/feed_test.go
+++ b/src/parser/feed_test.go
@@ -66,7 +66,7 @@ func TestParse(t *testing.T) {
Title: "Title",
Items: []Item{
{
- Title: "Item 1",
+ Title: "Item 1",
Content: "
content
",
},
},
diff --git a/src/parser/media.go b/src/parser/media.go
new file mode 100644
index 0000000..e6c5a71
--- /dev/null
+++ b/src/parser/media.go
@@ -0,0 +1,53 @@
+package parser
+
+type media struct {
+ MediaGroups []mediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
+
+ MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
+ MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"`
+}
+
+type mediaGroup struct {
+ MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
+ MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"`
+}
+
+type mediaContent struct {
+ URL string `xml:"url,attr"`
+ Type string `xml:"type,attr"`
+ FileSize string `xml:"fileSize,attr"`
+ Medium string `xml:"medium,attr"`
+}
+
+type mediaThumbnail struct {
+ URL string `xml:"url,attr"`
+}
+
+type mediaDescription struct {
+ Type string `xml:"type,attr"`
+ Description string `xml:",chardata"`
+}
+
+func (m *media) firstMediaThumbnail() string {
+ for _, t := range m.MediaThumbnails {
+ return t.URL
+ }
+ for _, g := range m.MediaGroups {
+ for _, t := range g.MediaThumbnails {
+ return t.URL
+ }
+ }
+ return ""
+}
+
+func (m *media) firstMediaDescription() string {
+ for _, d := range m.MediaDescriptions {
+ return plain2html(d.Description)
+ }
+ for _, g := range m.MediaGroups {
+ for _, d := range g.MediaDescriptions {
+ return plain2html(d.Description)
+ }
+ }
+ return ""
+}
diff --git a/src/parser/utils.go b/src/parser/utils.go
index 545cded..b39bce6 100644
--- a/src/parser/utils.go
+++ b/src/parser/utils.go
@@ -3,8 +3,11 @@ package parser
import (
"encoding/xml"
"io"
- "golang.org/x/net/html/charset"
+ "regexp"
+ "strings"
"time"
+
+ "golang.org/x/net/html/charset"
)
func firstNonEmpty(vals ...string) string {
@@ -16,6 +19,14 @@ func firstNonEmpty(vals ...string) string {
return ""
}
+var linkRe = regexp.MustCompile(`(https?:\/\/\S+)`)
+
+func plain2html(text string) string {
+ text = linkRe.ReplaceAllString(text, `$1`)
+ text = strings.ReplaceAll(text, "\n", "
")
+ return text
+}
+
func xmlDecoder(r io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(r)
decoder.Strict = false