mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-25 05:29:20 +00:00
extract data from media elements
This commit is contained in:
parent
fe1a1987bd
commit
6685bce51c
@ -24,6 +24,7 @@ type atomEntry struct {
|
|||||||
Updated string `xml:"updated"`
|
Updated string `xml:"updated"`
|
||||||
Links atomLinks `xml:"link"`
|
Links atomLinks `xml:"link"`
|
||||||
Content atomText `xml:"content"`
|
Content atomText `xml:"content"`
|
||||||
|
media
|
||||||
}
|
}
|
||||||
|
|
||||||
type atomText struct {
|
type atomText struct {
|
||||||
@ -69,17 +70,14 @@ func ParseAtom(r io.Reader) (*Feed, error) {
|
|||||||
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
|
SiteURL: firstNonEmpty(srcfeed.Links.First("alternate"), srcfeed.Links.First("")),
|
||||||
}
|
}
|
||||||
for _, srcitem := range srcfeed.Entries {
|
for _, srcitem := range srcfeed.Entries {
|
||||||
imageUrl := ""
|
|
||||||
podcastUrl := ""
|
|
||||||
|
|
||||||
dstfeed.Items = append(dstfeed.Items, Item{
|
dstfeed.Items = append(dstfeed.Items, Item{
|
||||||
GUID: firstNonEmpty(srcitem.ID),
|
GUID: firstNonEmpty(srcitem.ID),
|
||||||
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
|
Date: dateParse(firstNonEmpty(srcitem.Published, srcitem.Updated)),
|
||||||
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
|
URL: firstNonEmpty(srcitem.Links.First("alternate"), srcfeed.Links.First("")),
|
||||||
Title: srcitem.Title.String(),
|
Title: srcitem.Title.String(),
|
||||||
Content: srcitem.Content.String(),
|
Content: firstNonEmpty(srcitem.Content.String(), srcitem.firstMediaDescription()),
|
||||||
ImageURL: imageUrl,
|
ImageURL: srcitem.firstMediaThumbnail(),
|
||||||
PodcastURL: podcastUrl,
|
PodcastURL: "",
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
return dstfeed, nil
|
return dstfeed, nil
|
||||||
|
@ -66,7 +66,7 @@ func TestParse(t *testing.T) {
|
|||||||
Title: "Title",
|
Title: "Title",
|
||||||
Items: []Item{
|
Items: []Item{
|
||||||
{
|
{
|
||||||
Title: "Item 1",
|
Title: "Item 1",
|
||||||
Content: "<div>content</div>",
|
Content: "<div>content</div>",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
53
src/parser/media.go
Normal file
53
src/parser/media.go
Normal file
@ -0,0 +1,53 @@
|
|||||||
|
package parser
|
||||||
|
|
||||||
|
type media struct {
|
||||||
|
MediaGroups []mediaGroup `xml:"http://search.yahoo.com/mrss/ group"`
|
||||||
|
|
||||||
|
MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
|
||||||
|
MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type mediaGroup struct {
|
||||||
|
MediaThumbnails []mediaThumbnail `xml:"http://search.yahoo.com/mrss/ thumbnail"`
|
||||||
|
MediaDescriptions []mediaDescription `xml:"http://search.yahoo.com/mrss/ description"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type mediaContent struct {
|
||||||
|
URL string `xml:"url,attr"`
|
||||||
|
Type string `xml:"type,attr"`
|
||||||
|
FileSize string `xml:"fileSize,attr"`
|
||||||
|
Medium string `xml:"medium,attr"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type mediaThumbnail struct {
|
||||||
|
URL string `xml:"url,attr"`
|
||||||
|
}
|
||||||
|
|
||||||
|
type mediaDescription struct {
|
||||||
|
Type string `xml:"type,attr"`
|
||||||
|
Description string `xml:",chardata"`
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *media) firstMediaThumbnail() string {
|
||||||
|
for _, t := range m.MediaThumbnails {
|
||||||
|
return t.URL
|
||||||
|
}
|
||||||
|
for _, g := range m.MediaGroups {
|
||||||
|
for _, t := range g.MediaThumbnails {
|
||||||
|
return t.URL
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
|
||||||
|
func (m *media) firstMediaDescription() string {
|
||||||
|
for _, d := range m.MediaDescriptions {
|
||||||
|
return plain2html(d.Description)
|
||||||
|
}
|
||||||
|
for _, g := range m.MediaGroups {
|
||||||
|
for _, d := range g.MediaDescriptions {
|
||||||
|
return plain2html(d.Description)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return ""
|
||||||
|
}
|
@ -3,8 +3,11 @@ package parser
|
|||||||
import (
|
import (
|
||||||
"encoding/xml"
|
"encoding/xml"
|
||||||
"io"
|
"io"
|
||||||
"golang.org/x/net/html/charset"
|
"regexp"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"golang.org/x/net/html/charset"
|
||||||
)
|
)
|
||||||
|
|
||||||
func firstNonEmpty(vals ...string) string {
|
func firstNonEmpty(vals ...string) string {
|
||||||
@ -16,6 +19,14 @@ func firstNonEmpty(vals ...string) string {
|
|||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var linkRe = regexp.MustCompile(`(https?:\/\/\S+)`)
|
||||||
|
|
||||||
|
func plain2html(text string) string {
|
||||||
|
text = linkRe.ReplaceAllString(text, `<a href="$1">$1</a>`)
|
||||||
|
text = strings.ReplaceAll(text, "\n", "<br>")
|
||||||
|
return text
|
||||||
|
}
|
||||||
|
|
||||||
func xmlDecoder(r io.Reader) *xml.Decoder {
|
func xmlDecoder(r io.Reader) *xml.Decoder {
|
||||||
decoder := xml.NewDecoder(r)
|
decoder := xml.NewDecoder(r)
|
||||||
decoder.Strict = false
|
decoder.Strict = false
|
||||||
|
Loading…
x
Reference in New Issue
Block a user