forked from egnimos/chromedp-example
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.go
100 lines (93 loc) · 2.65 KB
/
main.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
// Command subtree is a chromedp example demonstrating how to populate and
// travel a subtree of the DOM.
package main
import (
"context"
"fmt"
"log"
"net/http"
"net/http/httptest"
"strings"
"time"
"github.com/chromedp/cdproto/cdp"
"github.com/chromedp/cdproto/dom"
"github.com/chromedp/chromedp"
)
func main() {
// create a test server to serve the page
ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
_, _ = fmt.Fprint(w, `
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Title</title>
</head>
<body>
<h1 id="title" class="link">
<a href="https://test.com/helloworld">
content of h1 1
</a>
<span>hello</span> world
</h1>
</body>
</html>
`,
)
}))
defer ts.Close()
// create context
ctx, cancel := chromedp.NewContext(context.Background())
defer cancel()
// run task list
err := chromedp.Run(ctx, travelSubtree(ts.URL, `title`, chromedp.ByID))
if err != nil {
log.Fatal(err)
}
}
// travelSubtree illustrates how to ask chromedp to populate a subtree of a node.
//
// https://github.com/chromedp/chromedp/issues/632#issuecomment-654213589
// @mvdan explains why node.Children is almost always empty:
// Nodes are only obtained from the browser on an on-demand basis.
// If we always held the entire DOM node tree in memory,
// our CPU and memory usage in Go would be far higher.
// And chromedp.FromNode can be used to retrieve the child nodes.
//
// Users get confused sometimes (why node.Children is empty while node.ChildNodeCount > 0?).
// And some users want to travel a subtree of the DOM more easy.
// So here comes the example.
func travelSubtree(pageUrl, of string, opts ...chromedp.QueryOption) chromedp.Tasks {
var nodes []*cdp.Node
return chromedp.Tasks{
chromedp.Navigate(pageUrl),
chromedp.Nodes(of, &nodes, opts...),
// ask chromedp to populate the subtree of a node
chromedp.ActionFunc(func(c context.Context) error {
// depth -1 for the entire subtree
// do your best to limit the size of the subtree
return dom.RequestChildNodes(nodes[0].NodeID).WithDepth(-1).Do(c)
}),
// wait a little while for dom.EventSetChildNodes to be fired and handled
chromedp.Sleep(time.Second),
chromedp.ActionFunc(func(c context.Context) error {
printNodes(nodes, 0)
return nil
}),
}
}
func printNodes(nodes []*cdp.Node, indent int) {
spaces := strings.Repeat(" ", indent)
for _, node := range nodes {
fmt.Print(spaces)
var extra interface{}
if node.NodeName == "#text" {
extra = node.NodeValue
} else {
extra = node.Attributes
}
fmt.Printf("%s: %q\n", node.NodeName, extra)
if node.ChildNodeCount > 0 {
printNodes(node.Children, indent+4)
}
}
}