Skip to content

Commit

Permalink
[enh] add Index attribute to HTMLElement - closes #211
Browse files Browse the repository at this point in the history
  • Loading branch information
asciimoo committed Aug 22, 2018
1 parent 5ed0687 commit d4e95b5
Show file tree
Hide file tree
Showing 2 changed files with 10 additions and 5 deletions.
6 changes: 4 additions & 2 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -927,9 +927,11 @@ func (c *Collector) handleOnHTML(resp *Response) error {
resp.Request.baseURL, _ = url.Parse(href)
}
for _, cc := range c.htmlCallbacks {
doc.Find(cc.Selector).Each(func(i int, s *goquery.Selection) {
i := 0
doc.Find(cc.Selector).Each(func(_ int, s *goquery.Selection) {
for _, n := range s.Nodes {
e := NewHTMLElementFromSelectionNode(resp, s, n)
e := NewHTMLElementFromSelectionNode(resp, s, n, i)
i += 1
if c.debugger != nil {
c.debugger.Event(createEvent("html", resp.Request.ID, c.ID, map[string]string{
"selector": cc.Selector,
Expand Down
9 changes: 6 additions & 3 deletions htmlelement.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,16 +34,19 @@ type HTMLElement struct {
// DOM is the goquery parsed DOM object of the page. DOM is relative
// to the current HTMLElement
DOM *goquery.Selection
// Index stores the position of the current element within all the elements matched by an OnHTML callback
Index int
}

// NewHTMLElementFromSelectionNode creates a HTMLElement from a goquery.Selection Node.
func NewHTMLElementFromSelectionNode(resp *Response, s *goquery.Selection, n *html.Node) *HTMLElement {
func NewHTMLElementFromSelectionNode(resp *Response, s *goquery.Selection, n *html.Node, idx int) *HTMLElement {
return &HTMLElement{
Name: n.Data,
Request: resp.Request,
Response: resp,
Text: goquery.NewDocumentFromNode(n).Text(),
DOM: s,
Index: idx,
attributes: n.Attr,
}
}
Expand Down Expand Up @@ -92,7 +95,7 @@ func (h *HTMLElement) ForEach(goquerySelector string, callback func(int, *HTMLEl
i := 0
h.DOM.Find(goquerySelector).Each(func(_ int, s *goquery.Selection) {
for _, n := range s.Nodes {
callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n))
callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n, i))
i++
}
})
Expand All @@ -107,7 +110,7 @@ func (h *HTMLElement) ForEachWithBreak(goquerySelector string, callback func(int
i := 0
h.DOM.Find(goquerySelector).EachWithBreak(func(_ int, s *goquery.Selection) bool {
for _, n := range s.Nodes {
if callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n)) {
if callback(i, NewHTMLElementFromSelectionNode(h.Response, s, n, i)) {
return true
}
i++
Expand Down

0 comments on commit d4e95b5

Please sign in to comment.