From b9b3d2350c9e12e143c0f40c2efeece4449af061 Mon Sep 17 00:00:00 2001 From: Karol Kosek Date: Tue, 11 Jun 2024 13:26:22 +0200 Subject: [PATCH] atom: Stop unescaping special HTML characters The HTML data in Atom is escaped because the data needs to put as a string to an XML file. If we are accessing it by reading the string value, then it is already unescaped, as opposed to getting the raw XML data. XHTML data don't need to be unescaped either since the elements are already encoded as is in tree. :) Closes #198 --- src/parser/atom.go | 3 +-- src/parser/atom_test.go | 16 ++++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/src/parser/atom.go b/src/parser/atom.go index f59bd4a..5bc0c57 100644 --- a/src/parser/atom.go +++ b/src/parser/atom.go @@ -3,7 +3,6 @@ package parser import ( "encoding/xml" - "html" "io" "strings" @@ -58,7 +57,7 @@ func (a *atomText) String() string { if a.Type == "xhtml" { data = a.XML } - return html.UnescapeString(strings.TrimSpace(data)) + return strings.TrimSpace(data) } func (links atomLinks) First(rel string) string { diff --git a/src/parser/atom_test.go b/src/parser/atom_test.go index f47594e..46aa935 100644 --- a/src/parser/atom_test.go +++ b/src/parser/atom_test.go @@ -214,3 +214,19 @@ func TestAtomLinkInID(t *testing.T) { t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have) } } + +func TestAtomDoesntEscapeHTMLTags(t *testing.T) { + feed, _ := Parse(strings.NewReader(` + + + &lt;script&gt;alert(1);&lt;/script&gt; + + `)) + have := feed.Items[0].Content + want := "<script>alert(1);</script>" + if !reflect.DeepEqual(want, have) { + t.Logf("want: %#v", want) + t.Logf("have: %#v", have) + t.FailNow() + } +}