diff --git a/doc/changelog.md b/doc/changelog.md index d1ddc20..1382cc4 100644 --- a/doc/changelog.md +++ b/doc/changelog.md @@ -4,6 +4,7 @@ - (fix) crash on empty article list with article is selected (thanks to @rksvc) - (fix) invalid article title in RSS feeds with media containing titles (thanks to @bwwu-git for the report) - (fix) missing image enclosures in certain RSS feeds (thanks to @palinek for the report) +- (fix) parsing namespaced legacy RSS feeds (thanks to @f100024) # v2.6 (2025-11-24) diff --git a/src/parser/rss.go b/src/parser/rss.go index 1470bfc..26c5bcb 100644 --- a/src/parser/rss.go +++ b/src/parser/rss.go @@ -48,12 +48,6 @@ type rssLink struct { Rel string `xml:"rel,attr"` } -type rssTitle struct { - XMLName xml.Name - Data string `xml:",chardata"` - Inner string `xml:",innerxml"` -} - type rssEnclosure struct { URL string `xml:"url,attr"` Type string `xml:"type,attr"` @@ -63,9 +57,10 @@ type rssEnclosure struct { func ParseRSS(r io.Reader) (*Feed, error) { srcfeed := rssFeed{} - decoder := xmlDecoder(r) - decoder.DefaultSpace = "rss" - if err := decoder.Decode(&srcfeed); err != nil { + rawDecoder := xmlDecoder(r) + rawDecoder.DefaultSpace = "rss" + rssDecoder := xml.NewTokenDecoder(&rssTokenReader{Decoder: rawDecoder}) + if err := rssDecoder.Decode(&srcfeed); err != nil { return nil, err } diff --git a/src/parser/util.go b/src/parser/util.go index ca1aa38..1e6b13d 100644 --- a/src/parser/util.go +++ b/src/parser/util.go @@ -42,6 +42,59 @@ func xmlDecoder(r io.Reader) *xml.Decoder { return decoder } +// XML token reader that strips the default namespace. +// It's primary purpose is to support namespaced legacy UserLand RSS feeds. +// NOTE: token readers cannot populate ",innerxml"-tagged struct fields, +// see https://github.com/golang/go/issues/39645 +type rssTokenReader struct { + Decoder *xml.Decoder + defaultNS string +} + +func (r *rssTokenReader) Token() (xml.Token, error) { + tok, err := r.Decoder.Token() + if err != nil { + return nil, err + } + + switch t := tok.(type) { + case xml.StartElement: + // extract default namespace: + if t.Name.Local == "rss" { + for _, attr := range t.Attr { + if attr.Name.Space == "" && attr.Name.Local == "xmlns" && attr.Value != "" { + r.defaultNS = attr.Value + break + } + } + } + + if r.defaultNS != "" { + // Rewrite element namespace + if t.Name.Space == r.defaultNS { + t.Name.Space = r.Decoder.DefaultSpace + } + // Rewrite attribute namespaces + attrs := t.Attr[:0] + for _, a := range t.Attr { + if a.Name.Space == r.defaultNS { + a.Name.Space = r.Decoder.DefaultSpace + } + attrs = append(attrs, a) + } + t.Attr = attrs + } + return t, nil + case xml.EndElement: + if r.defaultNS != "" && t.Name.Space == r.defaultNS { + t.Name.Space = r.Decoder.DefaultSpace + } + return t, nil + default: + return tok, nil + } +} + type safexmlreader struct { reader *bufio.Reader buffer *bytes.Buffer