mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
cleanup sanitizer
This commit is contained in:
parent
485587825c
commit
493a4262b1
@ -15,10 +15,7 @@ import (
|
||||
"golang.org/x/net/html"
|
||||
)
|
||||
|
||||
var (
|
||||
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
|
||||
splitSrcsetRegex = regexp.MustCompile(`,\s+`)
|
||||
)
|
||||
var splitSrcsetRegex = regexp.MustCompile(`,\s+`)
|
||||
|
||||
// Sanitize returns safe HTML.
|
||||
func Sanitize(baseURL, input string) string {
|
||||
@ -56,7 +53,7 @@ func Sanitize(baseURL, input string) string {
|
||||
tagName := token.DataAtom.String()
|
||||
parentTag = tagName
|
||||
|
||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
||||
if isValidTag(tagName) {
|
||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||
|
||||
if hasRequiredAttributes(tagName, attrNames) {
|
||||
@ -80,7 +77,7 @@ func Sanitize(baseURL, input string) string {
|
||||
}
|
||||
case html.SelfClosingTagToken:
|
||||
tagName := token.DataAtom.String()
|
||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
||||
if isValidTag(tagName) {
|
||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||
|
||||
if hasRequiredAttributes(tagName, attrNames) {
|
||||
@ -112,7 +109,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
|
||||
if isExternalResourceAttribute(attribute.Key) {
|
||||
if tagName == "iframe" {
|
||||
if isValidIframeSource(baseURL, attribute.Val) {
|
||||
value = rewriteIframeURL(attribute.Val)
|
||||
value = attribute.Val
|
||||
} else {
|
||||
continue
|
||||
}
|
||||
@ -189,27 +186,6 @@ func isExternalResourceAttribute(attribute string) bool {
|
||||
}
|
||||
}
|
||||
|
||||
func isPixelTracker(tagName string, attributes []html.Attribute) bool {
|
||||
if tagName == "img" {
|
||||
hasHeight := false
|
||||
hasWidth := false
|
||||
|
||||
for _, attribute := range attributes {
|
||||
if attribute.Key == "height" && attribute.Val == "1" {
|
||||
hasHeight = true
|
||||
}
|
||||
|
||||
if attribute.Key == "width" && attribute.Val == "1" {
|
||||
hasWidth = true
|
||||
}
|
||||
}
|
||||
|
||||
return hasHeight && hasWidth
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
func hasRequiredAttributes(tagName string, attributes []string) bool {
|
||||
elements := make(map[string][]string)
|
||||
elements["a"] = []string{"href"}
|
||||
@ -305,34 +281,27 @@ func isBlockedResource(src string) bool {
|
||||
|
||||
func isValidIframeSource(baseURL, src string) bool {
|
||||
whitelist := []string{
|
||||
"https://invidio.us",
|
||||
"//www.youtube.com",
|
||||
"http://www.youtube.com",
|
||||
"https://www.youtube.com",
|
||||
"https://www.youtube-nocookie.com",
|
||||
"http://player.vimeo.com",
|
||||
"https://player.vimeo.com",
|
||||
"http://www.dailymotion.com",
|
||||
"https://www.dailymotion.com",
|
||||
"http://vk.com",
|
||||
"https://vk.com",
|
||||
"http://soundcloud.com",
|
||||
"https://soundcloud.com",
|
||||
"http://w.soundcloud.com",
|
||||
"https://w.soundcloud.com",
|
||||
"http://bandcamp.com",
|
||||
"https://bandcamp.com",
|
||||
"https://cdn.embedly.com",
|
||||
"https://player.bilibili.com",
|
||||
"bandcamp.com",
|
||||
"cdn.embedly.com",
|
||||
"invidio.us",
|
||||
"player.bilibili.com",
|
||||
"player.vimeo.com",
|
||||
"soundcloud.com",
|
||||
"vk.com",
|
||||
"w.soundcloud.com",
|
||||
"www.dailymotion.com",
|
||||
"www.youtube-nocookie.com",
|
||||
"www.youtube.com",
|
||||
}
|
||||
|
||||
domain := urlDomain(src)
|
||||
// allow iframe from same origin
|
||||
if urlDomain(baseURL) == urlDomain(src) {
|
||||
if urlDomain(baseURL) == domain {
|
||||
return true
|
||||
}
|
||||
|
||||
for _, prefix := range whitelist {
|
||||
if strings.HasPrefix(src, prefix) {
|
||||
for _, safeDomain := range whitelist {
|
||||
if safeDomain == domain {
|
||||
return true
|
||||
}
|
||||
}
|
||||
@ -410,15 +379,6 @@ func inList(needle string, haystack []string) bool {
|
||||
return false
|
||||
}
|
||||
|
||||
func rewriteIframeURL(link string) string {
|
||||
matches := youtubeEmbedRegex.FindStringSubmatch(link)
|
||||
if len(matches) == 2 {
|
||||
return `https://www.youtube-nocookie.com/embed/` + matches[1]
|
||||
}
|
||||
|
||||
return link
|
||||
}
|
||||
|
||||
func isBlockedTag(tagName string) bool {
|
||||
blacklist := []string{
|
||||
"noscript",
|
||||
|
@ -431,16 +431,6 @@ func TestBlacklistedLink(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestPixelTracker(t *testing.T) {
|
||||
input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
|
||||
expected := `<p> and </p>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestXmlEntities(t *testing.T) {
|
||||
input := `<pre>echo "test" > /etc/hosts</pre>`
|
||||
expected := `<pre>echo "test" > /etc/hosts</pre>`
|
||||
@ -461,56 +451,6 @@ func TestEspaceAttributes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceYoutubeURL(t *testing.T) {
|
||||
input := `<iframe src="http://www.youtube.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceSecureYoutubeURL(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&controls=0"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
|
||||
input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
|
||||
input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
|
||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
output := Sanitize("http://example.org/", input)
|
||||
|
||||
if expected != output {
|
||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
||||
}
|
||||
}
|
||||
|
||||
func TestReplaceIframeURL(t *testing.T) {
|
||||
input := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0"></iframe>`
|
||||
expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||
|
Loading…
x
Reference in New Issue
Block a user