cleanup sanitizer

This commit is contained in:
Nazar Kanaev 2021-03-29 20:57:20 +01:00
parent 485587825c
commit 493a4262b1
2 changed files with 19 additions and 119 deletions

View File

@ -15,10 +15,7 @@ import (
"golang.org/x/net/html" "golang.org/x/net/html"
) )
var ( var splitSrcsetRegex = regexp.MustCompile(`,\s+`)
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
splitSrcsetRegex = regexp.MustCompile(`,\s+`)
)
// Sanitize returns safe HTML. // Sanitize returns safe HTML.
func Sanitize(baseURL, input string) string { func Sanitize(baseURL, input string) string {
@ -56,7 +53,7 @@ func Sanitize(baseURL, input string) string {
tagName := token.DataAtom.String() tagName := token.DataAtom.String()
parentTag = tagName parentTag = tagName
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) { if isValidTag(tagName) {
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
if hasRequiredAttributes(tagName, attrNames) { if hasRequiredAttributes(tagName, attrNames) {
@ -80,7 +77,7 @@ func Sanitize(baseURL, input string) string {
} }
case html.SelfClosingTagToken: case html.SelfClosingTagToken:
tagName := token.DataAtom.String() tagName := token.DataAtom.String()
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) { if isValidTag(tagName) {
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr) attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
if hasRequiredAttributes(tagName, attrNames) { if hasRequiredAttributes(tagName, attrNames) {
@ -112,7 +109,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
if isExternalResourceAttribute(attribute.Key) { if isExternalResourceAttribute(attribute.Key) {
if tagName == "iframe" { if tagName == "iframe" {
if isValidIframeSource(baseURL, attribute.Val) { if isValidIframeSource(baseURL, attribute.Val) {
value = rewriteIframeURL(attribute.Val) value = attribute.Val
} else { } else {
continue continue
} }
@ -189,27 +186,6 @@ func isExternalResourceAttribute(attribute string) bool {
} }
} }
func isPixelTracker(tagName string, attributes []html.Attribute) bool {
if tagName == "img" {
hasHeight := false
hasWidth := false
for _, attribute := range attributes {
if attribute.Key == "height" && attribute.Val == "1" {
hasHeight = true
}
if attribute.Key == "width" && attribute.Val == "1" {
hasWidth = true
}
}
return hasHeight && hasWidth
}
return false
}
func hasRequiredAttributes(tagName string, attributes []string) bool { func hasRequiredAttributes(tagName string, attributes []string) bool {
elements := make(map[string][]string) elements := make(map[string][]string)
elements["a"] = []string{"href"} elements["a"] = []string{"href"}
@ -305,34 +281,27 @@ func isBlockedResource(src string) bool {
func isValidIframeSource(baseURL, src string) bool { func isValidIframeSource(baseURL, src string) bool {
whitelist := []string{ whitelist := []string{
"https://invidio.us", "bandcamp.com",
"//www.youtube.com", "cdn.embedly.com",
"http://www.youtube.com", "invidio.us",
"https://www.youtube.com", "player.bilibili.com",
"https://www.youtube-nocookie.com", "player.vimeo.com",
"http://player.vimeo.com", "soundcloud.com",
"https://player.vimeo.com", "vk.com",
"http://www.dailymotion.com", "w.soundcloud.com",
"https://www.dailymotion.com", "www.dailymotion.com",
"http://vk.com", "www.youtube-nocookie.com",
"https://vk.com", "www.youtube.com",
"http://soundcloud.com",
"https://soundcloud.com",
"http://w.soundcloud.com",
"https://w.soundcloud.com",
"http://bandcamp.com",
"https://bandcamp.com",
"https://cdn.embedly.com",
"https://player.bilibili.com",
} }
domain := urlDomain(src)
// allow iframe from same origin // allow iframe from same origin
if urlDomain(baseURL) == urlDomain(src) { if urlDomain(baseURL) == domain {
return true return true
} }
for _, prefix := range whitelist { for _, safeDomain := range whitelist {
if strings.HasPrefix(src, prefix) { if safeDomain == domain {
return true return true
} }
} }
@ -410,15 +379,6 @@ func inList(needle string, haystack []string) bool {
return false return false
} }
func rewriteIframeURL(link string) string {
matches := youtubeEmbedRegex.FindStringSubmatch(link)
if len(matches) == 2 {
return `https://www.youtube-nocookie.com/embed/` + matches[1]
}
return link
}
func isBlockedTag(tagName string) bool { func isBlockedTag(tagName string) bool {
blacklist := []string{ blacklist := []string{
"noscript", "noscript",

View File

@ -431,16 +431,6 @@ func TestBlacklistedLink(t *testing.T) {
} }
} }
func TestPixelTracker(t *testing.T) {
input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
expected := `<p> and </p>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestXmlEntities(t *testing.T) { func TestXmlEntities(t *testing.T) {
input := `<pre>echo "test" &gt; /etc/hosts</pre>` input := `<pre>echo "test" &gt; /etc/hosts</pre>`
expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>` expected := `<pre>echo &#34;test&#34; &gt; /etc/hosts</pre>`
@ -461,56 +451,6 @@ func TestEspaceAttributes(t *testing.T) {
} }
} }
func TestReplaceYoutubeURL(t *testing.T) {
input := `<iframe src="http://www.youtube.com/embed/test123?version=3&#038;rel=1&#038;fs=1&#038;autohide=2&#038;showsearch=0&#038;showinfo=1&#038;iv_load_policy=1&#038;wmode=transparent"></iframe>`
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&amp;rel=1&amp;fs=1&amp;autohide=2&amp;showsearch=0&amp;showinfo=1&amp;iv_load_policy=1&amp;wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestReplaceSecureYoutubeURL(t *testing.T) {
input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&amp;controls=0"></iframe>`
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&amp;controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
output := Sanitize("http://example.org/", input)
if expected != output {
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
}
}
func TestReplaceIframeURL(t *testing.T) { func TestReplaceIframeURL(t *testing.T) {
input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>` input := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0"></iframe>`
expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>` expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&amp;byline=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`