add charsetreader to xmlreader

This commit is contained in:
Nazar Kanaev 2021-03-23 11:31:46 +00:00
parent 454eff0155
commit 646519e074
7 changed files with 19 additions and 6 deletions

1
go.sum
View File

@ -11,4 +11,5 @@ golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5h
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13 h1:5jaG59Zhd+8ZXe8C+lgiAGqkOaZBruqrWclLkgAww34= golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13 h1:5jaG59Zhd+8ZXe8C+lgiAGqkOaZBruqrWclLkgAww34=
golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201018230417-eeed37f84f13/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=

View File

@ -59,7 +59,7 @@ func (links atomLinks) First(rel string) string {
func ParseAtom(r io.Reader) (*Feed, error) { func ParseAtom(r io.Reader) (*Feed, error) {
srcfeed := atomFeed{} srcfeed := atomFeed{}
decoder := xml.NewDecoder(r) decoder := xmlDecoder(r)
if err := decoder.Decode(&srcfeed); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
return nil, err return nil, err
} }

View File

@ -19,7 +19,7 @@ func sniff(lookup string) (string, processor) {
lookup = strings.TrimSpace(lookup) lookup = strings.TrimSpace(lookup)
switch lookup[0] { switch lookup[0] {
case '<': case '<':
decoder := xml.NewDecoder(strings.NewReader(lookup)) decoder := xmlDecoder(strings.NewReader(lookup))
for { for {
token, _ := decoder.Token() token, _ := decoder.Token()
if token == nil { if token == nil {

View File

@ -12,6 +12,10 @@ func TestSniff(t *testing.T) {
`<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`, `<?xml version="1.0"?><rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"></rdf:RDF>`,
"rdf", "rdf",
}, },
{
`<?xml version="1.0" encoding="ISO-8859-1"?><rss version="2.0"><channel></channel></rss>`,
"rss",
},
{ {
`<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`, `<?xml version="1.0"?><rss version="2.0"><channel></channel></rss>`,
"rss", "rss",

View File

@ -27,7 +27,7 @@ type rdfItem struct {
func ParseRDF(r io.Reader) (*Feed, error) { func ParseRDF(r io.Reader) (*Feed, error) {
srcfeed := rdfFeed{} srcfeed := rdfFeed{}
decoder := xml.NewDecoder(r) decoder := xmlDecoder(r)
if err := decoder.Decode(&srcfeed); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
return nil, err return nil, err
} }

View File

@ -6,7 +6,6 @@ package feed
import ( import (
"encoding/xml" "encoding/xml"
"fmt"
"io" "io"
) )
@ -59,10 +58,9 @@ type rssEnclosure struct {
func ParseRSS(r io.Reader) (*Feed, error) { func ParseRSS(r io.Reader) (*Feed, error) {
srcfeed := rssFeed{} srcfeed := rssFeed{}
decoder := xml.NewDecoder(r) decoder := xmlDecoder(r)
decoder.DefaultSpace = "rss" decoder.DefaultSpace = "rss"
if err := decoder.Decode(&srcfeed); err != nil { if err := decoder.Decode(&srcfeed); err != nil {
fmt.Println(err)
return nil, err return nil, err
} }

View File

@ -1,6 +1,9 @@
package feed package feed
import ( import (
"encoding/xml"
"io"
"golang.org/x/net/html/charset"
"time" "time"
) )
@ -13,6 +16,13 @@ func firstNonEmpty(vals ...string) string {
return "" return ""
} }
func xmlDecoder(r io.Reader) *xml.Decoder {
decoder := xml.NewDecoder(r)
decoder.Strict = false
decoder.CharsetReader = charset.NewReaderLabel
return decoder
}
// taken from github.com/mjibson/goread // taken from github.com/mjibson/goread
var dateFormats = []string{ var dateFormats = []string{
time.RFC822, // RSS time.RFC822, // RSS