mirror of
https://github.com/nkanaev/yarr.git
synced 2026-06-24 09:05:16 +00:00
parser: fix parsing namespaced RSS feeds
This commit is contained in:
@@ -4,6 +4,7 @@
|
|||||||
- (fix) crash on empty article list with article is selected (thanks to @rksvc)
|
- (fix) crash on empty article list with article is selected (thanks to @rksvc)
|
||||||
- (fix) invalid article title in RSS feeds with media containing titles (thanks to @bwwu-git for the report)
|
- (fix) invalid article title in RSS feeds with media containing titles (thanks to @bwwu-git for the report)
|
||||||
- (fix) missing image enclosures in certain RSS feeds (thanks to @palinek for the report)
|
- (fix) missing image enclosures in certain RSS feeds (thanks to @palinek for the report)
|
||||||
|
- (fix) parsing namespaced legacy RSS feeds (thanks to @f100024)
|
||||||
|
|
||||||
# v2.6 (2025-11-24)
|
# v2.6 (2025-11-24)
|
||||||
|
|
||||||
|
|||||||
@@ -48,12 +48,6 @@ type rssLink struct {
|
|||||||
Rel string `xml:"rel,attr"`
|
Rel string `xml:"rel,attr"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type rssTitle struct {
|
|
||||||
XMLName xml.Name
|
|
||||||
Data string `xml:",chardata"`
|
|
||||||
Inner string `xml:",innerxml"`
|
|
||||||
}
|
|
||||||
|
|
||||||
type rssEnclosure struct {
|
type rssEnclosure struct {
|
||||||
URL string `xml:"url,attr"`
|
URL string `xml:"url,attr"`
|
||||||
Type string `xml:"type,attr"`
|
Type string `xml:"type,attr"`
|
||||||
@@ -63,9 +57,10 @@ type rssEnclosure struct {
|
|||||||
func ParseRSS(r io.Reader) (*Feed, error) {
|
func ParseRSS(r io.Reader) (*Feed, error) {
|
||||||
srcfeed := rssFeed{}
|
srcfeed := rssFeed{}
|
||||||
|
|
||||||
decoder := xmlDecoder(r)
|
rawDecoder := xmlDecoder(r)
|
||||||
decoder.DefaultSpace = "rss"
|
rawDecoder.DefaultSpace = "rss"
|
||||||
if err := decoder.Decode(&srcfeed); err != nil {
|
rssDecoder := xml.NewTokenDecoder(&rssTokenReader{Decoder: rawDecoder})
|
||||||
|
if err := rssDecoder.Decode(&srcfeed); err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -42,6 +42,59 @@ func xmlDecoder(r io.Reader) *xml.Decoder {
|
|||||||
return decoder
|
return decoder
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// XML token reader that strips the default namespace.
|
||||||
|
// It's primary purpose is to support namespaced legacy UserLand RSS feeds.
|
||||||
|
// NOTE: token readers cannot populate ",innerxml"-tagged struct fields,
|
||||||
|
// see https://github.com/golang/go/issues/39645
|
||||||
|
type rssTokenReader struct {
|
||||||
|
Decoder *xml.Decoder
|
||||||
|
defaultNS string
|
||||||
|
}
|
||||||
|
|
||||||
|
func (r *rssTokenReader) Token() (xml.Token, error) {
|
||||||
|
tok, err := r.Decoder.Token()
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch t := tok.(type) {
|
||||||
|
case xml.StartElement:
|
||||||
|
// extract default namespace: <rss xmlns="<defaultNS>">
|
||||||
|
if t.Name.Local == "rss" {
|
||||||
|
for _, attr := range t.Attr {
|
||||||
|
if attr.Name.Space == "" && attr.Name.Local == "xmlns" && attr.Value != "" {
|
||||||
|
r.defaultNS = attr.Value
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if r.defaultNS != "" {
|
||||||
|
// Rewrite element namespace
|
||||||
|
if t.Name.Space == r.defaultNS {
|
||||||
|
t.Name.Space = r.Decoder.DefaultSpace
|
||||||
|
}
|
||||||
|
// Rewrite attribute namespaces
|
||||||
|
attrs := t.Attr[:0]
|
||||||
|
for _, a := range t.Attr {
|
||||||
|
if a.Name.Space == r.defaultNS {
|
||||||
|
a.Name.Space = r.Decoder.DefaultSpace
|
||||||
|
}
|
||||||
|
attrs = append(attrs, a)
|
||||||
|
}
|
||||||
|
t.Attr = attrs
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
case xml.EndElement:
|
||||||
|
if r.defaultNS != "" && t.Name.Space == r.defaultNS {
|
||||||
|
t.Name.Space = r.Decoder.DefaultSpace
|
||||||
|
}
|
||||||
|
return t, nil
|
||||||
|
default:
|
||||||
|
return tok, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
type safexmlreader struct {
|
type safexmlreader struct {
|
||||||
reader *bufio.Reader
|
reader *bufio.Reader
|
||||||
buffer *bytes.Buffer
|
buffer *bytes.Buffer
|
||||||
|
|||||||
Reference in New Issue
Block a user