Skip to content

Commit

Permalink
collector.FollowRedirects support
Browse files Browse the repository at this point in the history
  • Loading branch information
llonchj committed Mar 16, 2018
1 parent fa953e7 commit eaa6b9b
Showing 1 changed file with 11 additions and 0 deletions.
11 changes: 11 additions & 0 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,9 @@ type Collector struct {
// MaxDepth limits the recursion depth of visited URLs.
// Set it to 0 for infinite recursion (default).
MaxDepth int
// FollowRedirects allows Visit to handle redirects automatically
// Set it to false for the collector to handle 30x responses.
FollowRedirects bool
// AllowedDomains is a domain whitelist.
// Leave it blank to allow any domains to be visited
AllowedDomains []string
Expand Down Expand Up @@ -186,6 +189,9 @@ var envMap = map[string]func(*Collector, string){
"IGNORE_ROBOTSTXT": func(c *Collector, val string) {
c.IgnoreRobotsTxt = isYesString(val)
},
"FOLLOW_REDIRECTS": func(c *Collector, val string) {
c.FollowRedirects = isYesString(val)
},
"MAX_BODY_SIZE": func(c *Collector, val string) {
size, err := strconv.Atoi(val)
if err == nil {
Expand Down Expand Up @@ -327,6 +333,7 @@ func Debugger(d debug.Debugger) func(*Collector) {
func (c *Collector) Init() {
c.UserAgent = "colly - https://github.com/gocolly/colly"
c.MaxDepth = 0
c.FollowRedirects = true
c.store = &storage.InMemoryStorage{}
c.store.Init()
c.MaxBodySize = 10 * 1024 * 1024
Expand Down Expand Up @@ -992,6 +999,10 @@ func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Requ
return fmt.Errorf("Not following redirect to %s because its not in AllowedDomains", req.URL.Host)
}

if !c.FollowRedirects {
return http.ErrUseLastResponse
}

// Honor golangs default of maximum of 10 redirects
if len(via) >= 10 {
return http.ErrUseLastResponse
Expand Down

0 comments on commit eaa6b9b

Please sign in to comment.