do not strip out content inside table & code

This commit is contained in:
Nazar Kanaev
2021-04-05 11:04:24 +01:00
parent fa2fad0ff6
commit f590c358d2
4 changed files with 43 additions and 3 deletions

View File

@@ -32,6 +32,16 @@ func Query(node *html.Node, sel string) []*html.Node {
return FindNodes(node, matcher.Match)
}
func Closest(node *html.Node, sel string) *html.Node {
matcher := NewMatcher(sel)
for cur := node; cur != nil; cur = cur.Parent {
if matcher.Match(cur) {
return cur
}
}
return nil
}
func NewMatcher(sel string) Matcher {
multi := MultiMatch{}
parts := strings.Split(sel, ",")

View File

@@ -62,3 +62,28 @@ func TestQueryMulti(t *testing.T) {
t.Fatal("incorrect match")
}
}
func TestClosest(t *testing.T) {
html, _ := html.Parse(strings.NewReader(`
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title></title>
</head>
<body>
<div class="foo">
<p><a class="bar" href=""></a></p>
</div>
</body>
</html>
`))
link := Query(html, "a")
if link == nil || Attr(link[0], "class") != "bar" {
t.FailNow()
}
wrap := Closest(link[0], "div")
if wrap == nil || Attr(wrap, "class") != "foo" {
t.FailNow()
}
}