atom: Stop unescaping special HTML characters

The HTML data in Atom is escaped because the data needs to put as a string to an XML file. If we are accessing it by reading the string value, then it is already unescaped, as opposed to getting the raw XML data. XHTML data don't need to be unescaped either since the elements are already encoded as is in tree. :) Closes #198
2026-06-15 12:35:04 +00:00 · 2024-06-11 13:26:22 +02:00
parent b13cd85f0b
commit b9b3d2350c
2 changed files with 17 additions and 2 deletions
--- a/src/parser/atom.go
+++ b/src/parser/atom.go
@@ -3,7 +3,6 @@ package parser

 import (
 	"encoding/xml"
-	"html"
 	"io"
 	"strings"

@@ -58,7 +57,7 @@ func (a *atomText) String() string {
 	if a.Type == "xhtml" {
 		data = a.XML
 	}
-	return html.UnescapeString(strings.TrimSpace(data))
+	return strings.TrimSpace(data)
 }

 func (links atomLinks) First(rel string) string {
--- a/src/parser/atom_test.go
+++ b/src/parser/atom_test.go
@@ -214,3 +214,19 @@ func TestAtomLinkInID(t *testing.T) {
 		t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have)
 	}
 }
+
+func TestAtomDoesntEscapeHTMLTags(t *testing.T) {
+	feed, _ := Parse(strings.NewReader(`
+		<?xml version="1.0" encoding="utf-8"?>
+		<feed xmlns="http://www.w3.org/2005/Atom">
+			<entry><summary type="html">&amp;lt;script&amp;gt;alert(1);&amp;lt;/script&amp;gt;</summary></entry>
+		</feed>
+	`))
+	have := feed.Items[0].Content
+	want := "&lt;script&gt;alert(1);&lt;/script&gt;"
+	if !reflect.DeepEqual(want, have) {
+		t.Logf("want: %#v", want)
+		t.Logf("have: %#v", have)
+		t.FailNow()
+	}
+}