From b9b3d2350c9e12e143c0f40c2efeece4449af061 Mon Sep 17 00:00:00 2001
From: Karol Kosek <krkk@serenityos.org>
Date: Tue, 11 Jun 2024 13:26:22 +0200
Subject: [PATCH] atom: Stop unescaping special HTML characters

The HTML data in Atom is escaped because the data needs to put as a
string to an XML file. If we are accessing it by reading the string
value, then it is already unescaped, as opposed to getting the raw
XML data.

XHTML data don't need to be unescaped either since the elements are
already encoded as is in tree. :)

Closes #198
---
 src/parser/atom.go      |  3 +--
 src/parser/atom_test.go | 16 ++++++++++++++++
 2 files changed, 17 insertions(+), 2 deletions(-)
diff --git a/src/parser/atom.go b/src/parser/atom.go
index f59bd4a..5bc0c57 100644
--- a/src/parser/atom.go
+++ b/src/parser/atom.go
@@ -3,7 +3,6 @@ package parser
 
 import (
 	"encoding/xml"
-	"html"
 	"io"
 	"strings"
 
@@ -58,7 +57,7 @@ func (a *atomText) String() string {
 	if a.Type == "xhtml" {
 		data = a.XML
 	}
-	return html.UnescapeString(strings.TrimSpace(data))
+	return strings.TrimSpace(data)
 }
 
 func (links atomLinks) First(rel string) string {
diff --git a/src/parser/atom_test.go b/src/parser/atom_test.go
index f47594e..46aa935 100644
--- a/src/parser/atom_test.go
+++ b/src/parser/atom_test.go
@@ -214,3 +214,19 @@ func TestAtomLinkInID(t *testing.T) {
 		t.Fatalf("\nwant: %#v\nhave: %#v\n", want, have)
 	}
 }
+
+func TestAtomDoesntEscapeHTMLTags(t *testing.T) {
+	feed, _ := Parse(strings.NewReader(`
+		<?xml version="1.0" encoding="utf-8"?>
+		<feed xmlns="http://www.w3.org/2005/Atom">
+			<entry><summary type="html">&amp;lt;script&amp;gt;alert(1);&amp;lt;/script&amp;gt;</summary></entry>
+		</feed>
+	`))
+	have := feed.Items[0].Content
+	want := "&lt;script&gt;alert(1);&lt;/script&gt;"
+	if !reflect.DeepEqual(want, have) {
+		t.Logf("want: %#v", want)
+		t.Logf("have: %#v", have)
+		t.FailNow()
+	}
+}