From 3e0c784744c68ea963ab4003b2e01aa76fbf590c Mon Sep 17 00:00:00 2001 From: Nazar Kanaev Date: Thu, 1 Apr 2021 11:49:30 +0100 Subject: [PATCH] add whitelist from dompurify --- doc/thirdparty.txt | 3 +- src/content/sanitizer/sanitizer.go | 2 +- src/content/sanitizer/sanitizer_test.go | 2 +- src/content/sanitizer/whitelist.go | 166 ++++++++++++++++++++++++ 4 files changed, 170 insertions(+), 3 deletions(-) create mode 100644 src/content/sanitizer/whitelist.go diff --git a/doc/thirdparty.txt b/doc/thirdparty.txt index 1e83faa..6cb2413 100644 --- a/doc/thirdparty.txt +++ b/doc/thirdparty.txt @@ -11,7 +11,8 @@ The licenses are included, and the authorship comments are left intact. - sanitizer https://github.com/miniflux/v2 (commit:3cb04b2) Apache 2.0 - no changes + changed allowed tags to the one from https://github.com/cure53/DOMPurify + changed allowed uri schemes to the one from https://github.com/cure53/DOMPurify - systray https://github.com/getlantern/systray (commit:2c0986d) Apache 2.0 diff --git a/src/content/sanitizer/sanitizer.go b/src/content/sanitizer/sanitizer.go index 32b3ce1..b286a0a 100644 --- a/src/content/sanitizer/sanitizer.go +++ b/src/content/sanitizer/sanitizer.go @@ -2,7 +2,7 @@ // Use of this source code is governed by the Apache 2.0 // license that can be found in the LICENSE file. -package scraper +package sanitizer import ( "bytes" diff --git a/src/content/sanitizer/sanitizer_test.go b/src/content/sanitizer/sanitizer_test.go index 9a6e085..a1080ee 100644 --- a/src/content/sanitizer/sanitizer_test.go +++ b/src/content/sanitizer/sanitizer_test.go @@ -2,7 +2,7 @@ // Use of this source code is governed by the Apache 2.0 // license that can be found in the LICENSE file. -package scraper +package sanitizer import "testing" diff --git a/src/content/sanitizer/whitelist.go b/src/content/sanitizer/whitelist.go new file mode 100644 index 0000000..8105c6c --- /dev/null +++ b/src/content/sanitizer/whitelist.go @@ -0,0 +1,166 @@ +package sanitizer + +type set struct { + m map[string]bool +} + +func sset(vals []string) set { + m := make(map[string]bool) + for _, val := range vals { + m[val] = true + } + return set{m: m} +} + +func (s *set) has(val string) bool { + _, ok := s.m[val] + return ok +} + +// taken from: https://github.com/cure53/DOMPurify/blob/e1c19cf6/src/tags.js +var allowedTags = sset([]string{ + "a", + "abbr", + "acronym", + "address", + "area", + "article", + "aside", + "audio", + "b", + "bdi", + "bdo", + "big", + "blink", + "blockquote", + "body", + "br", + "button", + "canvas", + "caption", + "center", + "cite", + "code", + "col", + "colgroup", + "content", + "data", + "datalist", + "dd", + "decorator", + "del", + "details", + "dfn", + "dialog", + "dir", + "div", + "dl", + "dt", + "element", + "em", + "fieldset", + "figcaption", + "figure", + "font", + "footer", + "form", + "h1", + "h2", + "h3", + "h4", + "h5", + "h6", + "head", + "header", + "hgroup", + "hr", + "html", + "i", + "img", + "input", + "ins", + "kbd", + "label", + "legend", + "li", + "main", + "map", + "mark", + "marquee", + "menu", + "menuitem", + "meter", + "nav", + "nobr", + "ol", + "optgroup", + "option", + "output", + "p", + "picture", + "pre", + "progress", + "q", + "rp", + "rt", + "ruby", + "s", + "samp", + "section", + "select", + "shadow", + "small", + "source", + "spacer", + "span", + "strike", + "strong", + "style", + "sub", + "summary", + "sup", + "table", + "tbody", + "td", + "template", + "textarea", + "tfoot", + "th", + "thead", + "time", + "tr", + "track", + "tt", + "u", + "ul", + "var", + "video", + "wbr", +}) + +var allowedAttrs = map[string]set { + "img": sset([]string{"alt", "title", "src", "srcset", "sizes"}), + "audio": sset([]string{"src"}), + "video": sset([]string{"poster", "height", "width", "src"}), + "source": sset([]string{"src", "type", "srcset", "sizes", "media"}), + "td": sset([]string{"rowspan", "colspan"}), + "th": sset([]string{"rowspan", "colspan"}), + "q": sset([]string{"cite"}), + "a": sset([]string{"href", "title"}), + "time": sset([]string{"datetime"}), + "abbr": sset([]string{"title"}), + "acronym": sset([]string{"title"}), + "iframe": sset([]string{"width", "height", "frameborder", "src", "allowfullscreen"}), +} + +var allowedURISchemes = sset([]string{ + "http", + "https", + "ftp", + "ftps", + "tel", + "mailto", + "callto", + "cid", + "xmpp", +})