mirror of
https://github.com/nkanaev/yarr.git
synced 2025-09-13 18:00:05 +00:00
do not strip out content inside table & code
This commit is contained in:
@@ -32,6 +32,16 @@ func Query(node *html.Node, sel string) []*html.Node {
|
||||
return FindNodes(node, matcher.Match)
|
||||
}
|
||||
|
||||
func Closest(node *html.Node, sel string) *html.Node {
|
||||
matcher := NewMatcher(sel)
|
||||
for cur := node; cur != nil; cur = cur.Parent {
|
||||
if matcher.Match(cur) {
|
||||
return cur
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func NewMatcher(sel string) Matcher {
|
||||
multi := MultiMatch{}
|
||||
parts := strings.Split(sel, ",")
|
||||
|
@@ -62,3 +62,28 @@ func TestQueryMulti(t *testing.T) {
|
||||
t.Fatal("incorrect match")
|
||||
}
|
||||
}
|
||||
|
||||
func TestClosest(t *testing.T) {
|
||||
html, _ := html.Parse(strings.NewReader(`
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title></title>
|
||||
</head>
|
||||
<body>
|
||||
<div class="foo">
|
||||
<p><a class="bar" href=""></a></p>
|
||||
</div>
|
||||
</body>
|
||||
</html>
|
||||
`))
|
||||
link := Query(html, "a")
|
||||
if link == nil || Attr(link[0], "class") != "bar" {
|
||||
t.FailNow()
|
||||
}
|
||||
wrap := Closest(link[0], "div")
|
||||
if wrap == nil || Attr(wrap, "class") != "foo" {
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
Reference in New Issue
Block a user