mirror of
https://github.com/nkanaev/yarr.git
synced 2026-06-10 02:13:21 +00:00
golines -w src
This commit is contained in:
@@ -27,10 +27,16 @@ var (
|
||||
|
||||
blacklistCandidatesRegexp = regexp.MustCompile(`(?i)popupbody|-ad|g-plus`)
|
||||
okMaybeItsACandidateRegexp = regexp.MustCompile(`(?i)and|article|body|column|main|shadow`)
|
||||
unlikelyCandidatesRegexp = regexp.MustCompile(`(?i)banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`)
|
||||
unlikelyCandidatesRegexp = regexp.MustCompile(
|
||||
`(?i)banner|breadcrumbs|combx|comment|community|cover-wrap|disqus|extra|foot|header|legends|menu|modal|related|remark|replies|rss|shoutbox|sidebar|skyscraper|social|sponsor|supplemental|ad-break|agegate|pagination|pager|popup|yom-remote`,
|
||||
)
|
||||
|
||||
negativeRegexp = regexp.MustCompile(`(?i)hidden|^hid$|hid$|hid|^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget|byline|author|dateline|writtenby|p-author`)
|
||||
positiveRegexp = regexp.MustCompile(`(?i)article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story`)
|
||||
negativeRegexp = regexp.MustCompile(
|
||||
`(?i)hidden|^hid$|hid$|hid|^hid |banner|combx|comment|com-|contact|foot|footer|footnote|masthead|media|meta|modal|outbrain|promo|related|scroll|share|shoutbox|sidebar|skyscraper|sponsor|shopping|tags|tool|widget|byline|author|dateline|writtenby|p-author`,
|
||||
)
|
||||
positiveRegexp = regexp.MustCompile(
|
||||
`(?i)article|body|content|entry|hentry|h-entry|main|page|pagination|post|text|blog|story`,
|
||||
)
|
||||
)
|
||||
|
||||
type nodeScores map[*html.Node]float32
|
||||
|
||||
@@ -146,7 +146,10 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
|
||||
}
|
||||
|
||||
attrNames = append(attrNames, attribute.Key)
|
||||
htmlAttrs = append(htmlAttrs, fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)))
|
||||
htmlAttrs = append(
|
||||
htmlAttrs,
|
||||
fmt.Sprintf(`%s="%s"`, attribute.Key, html.EscapeString(value)),
|
||||
)
|
||||
}
|
||||
|
||||
extraAttrNames, extraHTMLAttributes := getExtraAttributes(tagName)
|
||||
@@ -161,11 +164,25 @@ func sanitizeAttributes(baseURL, tagName string, attributes []html.Attribute) ([
|
||||
func getExtraAttributes(tagName string) ([]string, []string) {
|
||||
switch tagName {
|
||||
case "a":
|
||||
return []string{"rel", "target", "referrerpolicy"}, []string{`rel="noopener noreferrer"`, `target="_blank"`, `referrerpolicy="no-referrer"`}
|
||||
return []string{
|
||||
"rel",
|
||||
"target",
|
||||
"referrerpolicy",
|
||||
}, []string{
|
||||
`rel="noopener noreferrer"`,
|
||||
`target="_blank"`,
|
||||
`referrerpolicy="no-referrer"`,
|
||||
}
|
||||
case "video", "audio":
|
||||
return []string{"controls"}, []string{"controls"}
|
||||
case "iframe":
|
||||
return []string{"sandbox", "loading"}, []string{`sandbox="allow-scripts allow-same-origin allow-popups"`, `loading="lazy"`}
|
||||
return []string{
|
||||
"sandbox",
|
||||
"loading",
|
||||
}, []string{
|
||||
`sandbox="allow-scripts allow-same-origin allow-popups"`,
|
||||
`loading="lazy"`,
|
||||
}
|
||||
case "img":
|
||||
return []string{"loading"}, []string{`loading="lazy"`, `referrerpolicy="no-referrer"`}
|
||||
default:
|
||||
|
||||
Reference in New Issue
Block a user