mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
extract data from media elements
This commit is contained in:
parent
fe1a1987bd
commit
6685bce51c
@ -24,6 +24,7 @@ type atomEntry struct {
|
||||
Updated string `xml:"updated"`
|
||||
Links atomLinks `xml:"link"`
|
||||
Content atomText `xml:"content"`
|
||||
media
|
||||
}
|
||||
|
||||
type atomText struct {
|
||||
@ -69,17 +70,14 @@ func ParseAtom(r io.Reader) (*Feed, error) {
|
||||
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
|
||||
}
|
||||
for _, srcitem := range srcfeed.Entries {
|
||||
imageUrl := ""
|
||||
podcastUrl := ""
|
||||
|
||||
dstfeed.Items = append(dstfeed.Items, Item{
|
||||
GUID: firstNonEmpty(srcitem.ID),
|
||||
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
|
||||
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
|
||||
Title: srcitem.Title.String(),
|
||||
Content: srcitem.Content.String(),
|
||||
ImageURL: imageUrl,
|
||||
PodcastURL: podcastUrl,
|
||||
Content: firstNonEmpty(srcitem.Content.String(), srcitem.firstMediaDescription()),
|
||||
ImageURL: srcitem.firstMediaThumbnail(),
|
||||
PodcastURL: "",
|
||||
})
|
||||
}
|
||||
return dstfeed, nil
|
||||
|
53
src/parser/media.go
Normal file
53
src/parser/media.go
Normal file
@ -0,0 +1,53 @@
|
||||
package parser
|
||||
|
||||
type media struct {
|
||||
MediaGroups []mediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
|
||||
|
||||
MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
|
||||
MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"`
|
||||
}
|
||||
|
||||
type mediaGroup struct {
|
||||
MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
|
||||
MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"`
|
||||
}
|
||||
|
||||
type mediaContent struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
FileSize string `xml:"fileSize,attr"`
|
||||
Medium string `xml:"medium,attr"`
|
||||
}
|
||||
|
||||
type mediaThumbnail struct {
|
||||
URL string `xml:"url,attr"`
|
||||
}
|
||||
|
||||
type mediaDescription struct {
|
||||
Type string `xml:"type,attr"`
|
||||
Description string `xml:",chardata"`
|
||||
}
|
||||
|
||||
func (m *media) firstMediaThumbnail() string {
|
||||
for _, t := range m.MediaThumbnails {
|
||||
return t.URL
|
||||
}
|
||||
for _, g := range m.MediaGroups {
|
||||
for _, t := range g.MediaThumbnails {
|
||||
return t.URL
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func (m *media) firstMediaDescription() string {
|
||||
for _, d := range m.MediaDescriptions {
|
||||
return plain2html(d.Description)
|
||||
}
|
||||
for _, g := range m.MediaGroups {
|
||||
for _, d := range g.MediaDescriptions {
|
||||
return plain2html(d.Description)
|
||||
}
|
||||
}
|
||||
return ""
|
||||
}
|
@ -3,8 +3,11 @@ package parser
|
||||
import (
|
||||
"encoding/xml"
|
||||
"io"
|
||||
"golang.org/x/net/html/charset"
|
||||
"regexp"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"golang.org/x/net/html/charset"
|
||||
)
|
||||
|
||||
func firstNonEmpty(vals ...string) string {
|
||||
@ -16,6 +19,14 @@ func firstNonEmpty(vals ...string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
var linkRe = regexp.MustCompile(`(https?:\/\/\S+)`)
|
||||
|
||||
func plain2html(text string) string {
|
||||
text = linkRe.ReplaceAllString(text, `<a href="$1">$1</a>`)
|
||||
text = strings.ReplaceAll(text, "\n", "<br>")
|
||||
return text
|
||||
}
|
||||
|
||||
func xmlDecoder(r io.Reader) *xml.Decoder {
|
||||
decoder := xml.NewDecoder(r)
|
||||
decoder.Strict = false
|
||||
|
Loading…
x
Reference in New Issue
Block a user