Skip to content

Commit

Permalink
[enh] speed up collector.scrape() and collector.fetch() | benchmark: …
Browse files Browse the repository at this point in the history
…1.57s -> 1.44s
  • Loading branch information
asciimoo committed Mar 3, 2018
1 parent 536a5d3 commit 703b364
Show file tree
Hide file tree
Showing 2 changed files with 7 additions and 7 deletions.
13 changes: 6 additions & 7 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -365,11 +365,7 @@ func (c *Collector) scrape(u, method string, depth int, requestData io.Reader, c
}
}
if hdr == nil {
hdr = make(http.Header)
hdr.Set("User-Agent", c.UserAgent)
if method == "POST" {
hdr.Add("Content-Type", "application/x-www-form-urlencoded")
}
hdr = http.Header{"User-Agent": []string{c.UserAgent}}
}
rc, ok := requestData.(io.ReadCloser)
if !ok && requestData != nil {
Expand Down Expand Up @@ -754,7 +750,7 @@ func (c *Collector) handleOnResponse(r *Response) {
}

func (c *Collector) handleOnHTML(resp *Response) {
if !strings.Contains(strings.ToLower(resp.Headers.Get("Content-Type")), "html") || len(c.htmlCallbacks) == 0 {
if len(c.htmlCallbacks) == 0 || !strings.Contains(strings.ToLower(resp.Headers.Get("Content-Type")), "html") {
return
}
doc, err := goquery.NewDocumentFromReader(bytes.NewBuffer(resp.Body))
Expand All @@ -778,8 +774,11 @@ func (c *Collector) handleOnHTML(resp *Response) {
}

func (c *Collector) handleOnXML(resp *Response) {
if len(c.xmlCallbacks) == 0 {
return
}
contentType := strings.ToLower(resp.Headers.Get("Content-Type"))
if (!strings.Contains(contentType, "html") && !strings.Contains(contentType, "xml")) || len(c.xmlCallbacks) == 0 {
if !strings.Contains(contentType, "html") && !strings.Contains(contentType, "xml") {
return
}

Expand Down
1 change: 1 addition & 0 deletions colly_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ func newTestServer() *httptest.Server {
mux := http.NewServeMux()

mux.HandleFunc("/", func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(200)
w.Write(serverIndexResponse)
})

Expand Down

0 comments on commit 703b364

Please sign in to comment.