mirror of
https://github.com/nkanaev/yarr.git
synced 2025-05-24 00:33:14 +00:00
cleanup sanitizer
This commit is contained in:
parent
485587825c
commit
493a4262b1
@ -15,10 +15,7 @@ import (
|
|||||||
"golang.org/x/net/html"
|
"golang.org/x/net/html"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var splitSrcsetRegex = regexp.MustCompile(`,\s+`)
|
||||||
youtubeEmbedRegex = regexp.MustCompile(`//www\.youtube\.com/embed/(.*)`)
|
|
||||||
splitSrcsetRegex = regexp.MustCompile(`,\s+`)
|
|
||||||
)
|
|
||||||
|
|
||||||
// Sanitize returns safe HTML.
|
// Sanitize returns safe HTML.
|
||||||
func Sanitize(baseURL, input string) string {
|
func Sanitize(baseURL, input string) string {
|
||||||
@ -56,7 +53,7 @@ func Sanitize(baseURL, input string) string {
|
|||||||
tagName := token.DataAtom.String()
|
tagName := token.DataAtom.String()
|
||||||
parentTag = tagName
|
parentTag = tagName
|
||||||
|
|
||||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
if isValidTag(tagName) {
|
||||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||||
|
|
||||||
if hasRequiredAttributes(tagName, attrNames) {
|
if hasRequiredAttributes(tagName, attrNames) {
|
||||||
@ -80,7 +77,7 @@ func Sanitize(baseURL, input string) string {
|
|||||||
}
|
}
|
||||||
case html.SelfClosingTagToken:
|
case html.SelfClosingTagToken:
|
||||||
tagName := token.DataAtom.String()
|
tagName := token.DataAtom.String()
|
||||||
if !isPixelTracker(tagName, token.Attr) && isValidTag(tagName) {
|
if isValidTag(tagName) {
|
||||||
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
attrNames, htmlAttributes := sanitizeAttributes(baseURL, tagName, token.Attr)
|
||||||
|
|
||||||
if hasRequiredAttributes(tagName, attrNames) {
|
if hasRequiredAttributes(tagName, attrNames) {
|
||||||
@ -112,7 +109,7 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
|
|||||||
if isExternalResourceAttribute(attribute.Key) {
|
if isExternalResourceAttribute(attribute.Key) {
|
||||||
if tagName == "iframe" {
|
if tagName == "iframe" {
|
||||||
if isValidIframeSource(baseURL, attribute.Val) {
|
if isValidIframeSource(baseURL, attribute.Val) {
|
||||||
value = rewriteIframeURL(attribute.Val)
|
value = attribute.Val
|
||||||
} else {
|
} else {
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
@ -189,27 +186,6 @@ func isExternalResourceAttribute(attribute string) bool {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func isPixelTracker(tagName string, attributes []html.Attribute) bool {
|
|
||||||
if tagName == "img" {
|
|
||||||
hasHeight := false
|
|
||||||
hasWidth := false
|
|
||||||
|
|
||||||
for _, attribute := range attributes {
|
|
||||||
if attribute.Key == "height" && attribute.Val == "1" {
|
|
||||||
hasHeight = true
|
|
||||||
}
|
|
||||||
|
|
||||||
if attribute.Key == "width" && attribute.Val == "1" {
|
|
||||||
hasWidth = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return hasHeight && hasWidth
|
|
||||||
}
|
|
||||||
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
|
|
||||||
func hasRequiredAttributes(tagName string, attributes []string) bool {
|
func hasRequiredAttributes(tagName string, attributes []string) bool {
|
||||||
elements := make(map[string][]string)
|
elements := make(map[string][]string)
|
||||||
elements["a"] = []string{"href"}
|
elements["a"] = []string{"href"}
|
||||||
@ -305,34 +281,27 @@ func isBlockedResource(src string) bool {
|
|||||||
|
|
||||||
func isValidIframeSource(baseURL, src string) bool {
|
func isValidIframeSource(baseURL, src string) bool {
|
||||||
whitelist := []string{
|
whitelist := []string{
|
||||||
"https://invidio.us",
|
"bandcamp.com",
|
||||||
"//www.youtube.com",
|
"cdn.embedly.com",
|
||||||
"http://www.youtube.com",
|
"invidio.us",
|
||||||
"https://www.youtube.com",
|
"player.bilibili.com",
|
||||||
"https://www.youtube-nocookie.com",
|
"player.vimeo.com",
|
||||||
"http://player.vimeo.com",
|
"soundcloud.com",
|
||||||
"https://player.vimeo.com",
|
"vk.com",
|
||||||
"http://www.dailymotion.com",
|
"w.soundcloud.com",
|
||||||
"https://www.dailymotion.com",
|
"www.dailymotion.com",
|
||||||
"http://vk.com",
|
"www.youtube-nocookie.com",
|
||||||
"https://vk.com",
|
"www.youtube.com",
|
||||||
"http://soundcloud.com",
|
|
||||||
"https://soundcloud.com",
|
|
||||||
"http://w.soundcloud.com",
|
|
||||||
"https://w.soundcloud.com",
|
|
||||||
"http://bandcamp.com",
|
|
||||||
"https://bandcamp.com",
|
|
||||||
"https://cdn.embedly.com",
|
|
||||||
"https://player.bilibili.com",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
domain := urlDomain(src)
|
||||||
// allow iframe from same origin
|
// allow iframe from same origin
|
||||||
if urlDomain(baseURL) == urlDomain(src) {
|
if urlDomain(baseURL) == domain {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, prefix := range whitelist {
|
for _, safeDomain := range whitelist {
|
||||||
if strings.HasPrefix(src, prefix) {
|
if safeDomain == domain {
|
||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -410,15 +379,6 @@ func inList(needle string, haystack []string) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func rewriteIframeURL(link string) string {
|
|
||||||
matches := youtubeEmbedRegex.FindStringSubmatch(link)
|
|
||||||
if len(matches) == 2 {
|
|
||||||
return `https://www.youtube-nocookie.com/embed/` + matches[1]
|
|
||||||
}
|
|
||||||
|
|
||||||
return link
|
|
||||||
}
|
|
||||||
|
|
||||||
func isBlockedTag(tagName string) bool {
|
func isBlockedTag(tagName string) bool {
|
||||||
blacklist := []string{
|
blacklist := []string{
|
||||||
"noscript",
|
"noscript",
|
||||||
|
@ -431,16 +431,6 @@ func TestBlacklistedLink(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestPixelTracker(t *testing.T) {
|
|
||||||
input := `<p><img src="https://tracker1.example.org/" height="1" width="1"> and <img src="https://tracker2.example.org/" height="1" width="1"/></p>`
|
|
||||||
expected := `<p> and </p>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if expected != output {
|
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestXmlEntities(t *testing.T) {
|
func TestXmlEntities(t *testing.T) {
|
||||||
input := `<pre>echo "test" > /etc/hosts</pre>`
|
input := `<pre>echo "test" > /etc/hosts</pre>`
|
||||||
expected := `<pre>echo "test" > /etc/hosts</pre>`
|
expected := `<pre>echo "test" > /etc/hosts</pre>`
|
||||||
@ -461,56 +451,6 @@ func TestEspaceAttributes(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func TestReplaceYoutubeURL(t *testing.T) {
|
|
||||||
input := `<iframe src="http://www.youtube.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent"></iframe>`
|
|
||||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?version=3&rel=1&fs=1&autohide=2&showsearch=0&showinfo=1&iv_load_policy=1&wmode=transparent" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if expected != output {
|
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReplaceSecureYoutubeURL(t *testing.T) {
|
|
||||||
input := `<iframe src="https://www.youtube.com/embed/test123"></iframe>`
|
|
||||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if expected != output {
|
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReplaceSecureYoutubeURLWithParameters(t *testing.T) {
|
|
||||||
input := `<iframe src="https://www.youtube.com/embed/test123?rel=0&controls=0"></iframe>`
|
|
||||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if expected != output {
|
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReplaceYoutubeURLAlreadyReplaced(t *testing.T) {
|
|
||||||
input := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin"></iframe>`
|
|
||||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/test123?rel=0&controls=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if expected != output {
|
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReplaceProtocolRelativeYoutubeURL(t *testing.T) {
|
|
||||||
input := `<iframe src="//www.youtube.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen"></iframe>`
|
|
||||||
expected := `<iframe src="https://www.youtube-nocookie.com/embed/Bf2W84jrGqs" width="560" height="314" allowfullscreen="allowfullscreen" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
|
||||||
output := Sanitize("http://example.org/", input)
|
|
||||||
|
|
||||||
if expected != output {
|
|
||||||
t.Errorf(`Wrong output: "%s" != "%s"`, expected, output)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
func TestReplaceIframeURL(t *testing.T) {
|
func TestReplaceIframeURL(t *testing.T) {
|
||||||
input := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0"></iframe>`
|
input := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0"></iframe>`
|
||||||
expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
expected := `<iframe src="https://player.vimeo.com/video/123456?title=0&byline=0" sandbox="allow-scripts allow-same-origin allow-popups" loading="lazy"></iframe>`
|
||||||
|
Loading…
x
Reference in New Issue
Block a user