fix parsing opml with encoding

This commit is contained in:
nkanaev 2022-01-24 13:10:30 +00:00
parent bff7476b58
commit b78c8bf8bf
3 changed files with 52 additions and 0 deletions

View File

@ -3,6 +3,8 @@ package opml
import (
"encoding/xml"
"io"
"golang.org/x/net/html/charset"
)
type opml struct {
@ -45,6 +47,7 @@ func Parse(r io.Reader) (Folder, error) {
decoder := xml.NewDecoder(r)
decoder.Entity = xml.HTMLEntity
decoder.Strict = false
decoder.CharsetReader = charset.NewReaderLabel
err := decoder.Decode(&val)
if err != nil {

View File

@ -1,6 +1,7 @@
package opml
import (
"os"
"reflect"
"strings"
"testing"
@ -87,3 +88,41 @@ func TestParseFallback(t *testing.T) {
t.Fatal("invalid opml")
}
}
func TestParseWithEncoding(t *testing.T) {
file, err := os.Open("sample_win1251.xml")
if err != nil {
t.Fatal(err)
}
have, err := Parse(file)
if err != nil {
t.Fatal(err)
}
want := Folder{
Title: "",
Feeds: []Feed{
{
Title: "пример1",
FeedUrl: "https://baz.com/feed.xml",
SiteUrl: "https://baz.com/",
},
},
Folders: []Folder{
{
Title: "папка",
Feeds: []Feed{
{
Title: "пример2",
FeedUrl: "https://foo.com/feed.xml",
SiteUrl: "https://foo.com/",
},
},
},
},
}
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.Fatal("invalid opml")
}
}

View File

@ -0,0 +1,10 @@
<?xml version="1.0" encoding="windows-1251"?>
<opml version="1.1">
<head><title>Çàãîëîâîê</title></head>
<body>
<outline text="ïàïêà">
<outline type="rss" text="ïðèìåð2" description="ïðèìåð2" xmlUrl="https://foo.com/feed.xml" htmlUrl="https://foo.com/"/>
</outline>
<outline type="rss" text="ïðèìåð1" description="ïðèìåð1" xmlUrl="https://baz.com/feed.xml" htmlUrl="https://baz.com/"/>
</body>
</opml>