atom: Stop unescaping special HTML characters

The HTML data in Atom is escaped because the data needs to put as a
string to an XML file. If we are accessing it by reading the string
value, then it is already unescaped, as opposed to getting the raw
XML data.

XHTML data don't need to be unescaped either since the elements are
already encoded as is in tree. :)

Closes #198
This commit is contained in:
Karol Kosek 2024-06-11 13:26:22 +02:00 committed by nkanaev
parent b13cd85f0b
commit b9b3d2350c
2 changed files with 17 additions and 2 deletions

View File

@ -3,7 +3,6 @@ package parser
import ( import (
"encoding/xml" "encoding/xml"
"html"
"io" "io"
"strings" "strings"
@ -58,7 +57,7 @@ func (a *atomText) String() string {
if a.Type == "xhtml" { if a.Type == "xhtml" {
data = a.XML data = a.XML
} }
return html.UnescapeString(strings.TrimSpace(data)) return strings.TrimSpace(data)
} }
func (links atomLinks) First(rel string) string { func (links atomLinks) First(rel string) string {

View File

@ -214,3 +214,19 @@ func TestAtomLinkInID(t *testing.T) {
t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have) t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have)
} }
} }
func TestAtomDoesntEscapeHTMLTags(t *testing.T) {
feed, _ := Parse(strings.NewReader(`
<?xml version="1.0" encoding="utf-8"?>
<feed xmlns="http://www.w3.org/2005/Atom">
<entry><summary type="html">&amp;lt;script&amp;gt;alert(1);&amp;lt;/script&amp;gt;</summary></entry>
</feed>
`))
have := feed.Items[0].Content
want := "&lt;script&gt;alert(1);&lt;/script&gt;"
if !reflect.DeepEqual(want, have) {
t.Logf("want: %#v", want)
t.Logf("have: %#v", have)
t.FailNow()
}
}