parser: fix parsing namespaced RSS feeds

This commit is contained in:
nkanaev
2026-06-22 09:33:56 +01:00
parent d0f8e70095
commit 31f2ca57df
3 changed files with 58 additions and 9 deletions

View File

@@ -4,6 +4,7 @@
- (fix) crash on empty article list with article is selected (thanks to @rksvc)
- (fix) invalid article title in RSS feeds with media containing titles (thanks to @bwwu-git for the report)
- (fix) missing image enclosures in certain RSS feeds (thanks to @palinek for the report)
- (fix) parsing namespaced legacy RSS feeds (thanks to @f100024)
# v2.6 (2025-11-24)

View File

@@ -48,12 +48,6 @@ type rssLink struct {
Rel string `xml:"rel,attr"`
}
type rssTitle struct {
XMLName xml.Name
Data string `xml:",chardata"`
Inner string `xml:",innerxml"`
}
type rssEnclosure struct {
URL string `xml:"url,attr"`
Type string `xml:"type,attr"`
@@ -63,9 +57,10 @@ type rssEnclosure struct {
func ParseRSS(r io.Reader) (*Feed, error) {
srcfeed := rssFeed{}
decoder := xmlDecoder(r)
decoder.DefaultSpace = "rss"
if err := decoder.Decode(&srcfeed); err != nil {
rawDecoder := xmlDecoder(r)
rawDecoder.DefaultSpace = "rss"
rssDecoder := xml.NewTokenDecoder(&rssTokenReader{Decoder: rawDecoder})
if err := rssDecoder.Decode(&srcfeed); err != nil {
return nil, err
}

View File

@@ -42,6 +42,59 @@ func xmlDecoder(r io.Reader) *xml.Decoder {
return decoder
}
// XML token reader that strips the default namespace.
// It's primary purpose is to support namespaced legacy UserLand RSS feeds.
// NOTE: token readers cannot populate ",innerxml"-tagged struct fields,
// see https://github.com/golang/go/issues/39645
type rssTokenReader struct {
Decoder *xml.Decoder
defaultNS string
}
func (r *rssTokenReader) Token() (xml.Token, error) {
tok, err := r.Decoder.Token()
if err != nil {
return nil, err
}
switch t := tok.(type) {
case xml.StartElement:
// extract default namespace: <rss xmlns="<defaultNS>">
if t.Name.Local == "rss" {
for _, attr := range t.Attr {
if attr.Name.Space == "" && attr.Name.Local == "xmlns" && attr.Value != "" {
r.defaultNS = attr.Value
break
}
}
}
if r.defaultNS != "" {
// Rewrite element namespace
if t.Name.Space == r.defaultNS {
t.Name.Space = r.Decoder.DefaultSpace
}
// Rewrite attribute namespaces
attrs := t.Attr[:0]
for _, a := range t.Attr {
if a.Name.Space == r.defaultNS {
a.Name.Space = r.Decoder.DefaultSpace
}
attrs = append(attrs, a)
}
t.Attr = attrs
}
return t, nil
case xml.EndElement:
if r.defaultNS != "" && t.Name.Space == r.defaultNS {
t.Name.Space = r.Decoder.DefaultSpace
}
return t, nil
default:
return tok, nil
}
}
type safexmlreader struct {
reader *bufio.Reader
buffer *bytes.Buffer