mirror of
https://github.com/nkanaev/yarr.git
synced 2026-06-24 00:55:16 +00:00
parser: fix parsing namespaced RSS feeds
This commit is contained in:
@@ -4,6 +4,7 @@
|
||||
- (fix) crash on empty article list with article is selected (thanks to @rksvc)
|
||||
- (fix) invalid article title in RSS feeds with media containing titles (thanks to @bwwu-git for the report)
|
||||
- (fix) missing image enclosures in certain RSS feeds (thanks to @palinek for the report)
|
||||
- (fix) parsing namespaced legacy RSS feeds (thanks to @f100024)
|
||||
|
||||
# v2.6 (2025-11-24)
|
||||
|
||||
|
||||
@@ -48,12 +48,6 @@ type rssLink struct {
|
||||
Rel string `xml:"rel,attr"`
|
||||
}
|
||||
|
||||
type rssTitle struct {
|
||||
XMLName xml.Name
|
||||
Data string `xml:",chardata"`
|
||||
Inner string `xml:",innerxml"`
|
||||
}
|
||||
|
||||
type rssEnclosure struct {
|
||||
URL string `xml:"url,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
@@ -63,9 +57,10 @@ type rssEnclosure struct {
|
||||
func ParseRSS(r io.Reader) (*Feed, error) {
|
||||
srcfeed := rssFeed{}
|
||||
|
||||
decoder := xmlDecoder(r)
|
||||
decoder.DefaultSpace = "rss"
|
||||
if err := decoder.Decode(&srcfeed); err != nil {
|
||||
rawDecoder := xmlDecoder(r)
|
||||
rawDecoder.DefaultSpace = "rss"
|
||||
rssDecoder := xml.NewTokenDecoder(&rssTokenReader{Decoder: rawDecoder})
|
||||
if err := rssDecoder.Decode(&srcfeed); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
|
||||
@@ -42,6 +42,59 @@ func xmlDecoder(r io.Reader) *xml.Decoder {
|
||||
return decoder
|
||||
}
|
||||
|
||||
// XML token reader that strips the default namespace.
|
||||
// It's primary purpose is to support namespaced legacy UserLand RSS feeds.
|
||||
// NOTE: token readers cannot populate ",innerxml"-tagged struct fields,
|
||||
// see https://github.com/golang/go/issues/39645
|
||||
type rssTokenReader struct {
|
||||
Decoder *xml.Decoder
|
||||
defaultNS string
|
||||
}
|
||||
|
||||
func (r *rssTokenReader) Token() (xml.Token, error) {
|
||||
tok, err := r.Decoder.Token()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch t := tok.(type) {
|
||||
case xml.StartElement:
|
||||
// extract default namespace: <rss xmlns="<defaultNS>">
|
||||
if t.Name.Local == "rss" {
|
||||
for _, attr := range t.Attr {
|
||||
if attr.Name.Space == "" && attr.Name.Local == "xmlns" && attr.Value != "" {
|
||||
r.defaultNS = attr.Value
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if r.defaultNS != "" {
|
||||
// Rewrite element namespace
|
||||
if t.Name.Space == r.defaultNS {
|
||||
t.Name.Space = r.Decoder.DefaultSpace
|
||||
}
|
||||
// Rewrite attribute namespaces
|
||||
attrs := t.Attr[:0]
|
||||
for _, a := range t.Attr {
|
||||
if a.Name.Space == r.defaultNS {
|
||||
a.Name.Space = r.Decoder.DefaultSpace
|
||||
}
|
||||
attrs = append(attrs, a)
|
||||
}
|
||||
t.Attr = attrs
|
||||
}
|
||||
return t, nil
|
||||
case xml.EndElement:
|
||||
if r.defaultNS != "" && t.Name.Space == r.defaultNS {
|
||||
t.Name.Space = r.Decoder.DefaultSpace
|
||||
}
|
||||
return t, nil
|
||||
default:
|
||||
return tok, nil
|
||||
}
|
||||
}
|
||||
|
||||
type safexmlreader struct {
|
||||
reader *bufio.Reader
|
||||
buffer *bytes.Buffer
|
||||
|
||||
Reference in New Issue
Block a user