Skip to content

Commit

Permalink
logic changes, RedirectHandler implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
llonchj committed Mar 16, 2018
1 parent eaa6b9b commit ca0264b
Showing 1 changed file with 10 additions and 8 deletions.
18 changes: 10 additions & 8 deletions colly.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,9 +57,6 @@ type Collector struct {
// MaxDepth limits the recursion depth of visited URLs.
// Set it to 0 for infinite recursion (default).
MaxDepth int
// FollowRedirects allows Visit to handle redirects automatically
// Set it to false for the collector to handle 30x responses.
FollowRedirects bool
// AllowedDomains is a domain whitelist.
// Leave it blank to allow any domains to be visited
AllowedDomains []string
Expand Down Expand Up @@ -94,7 +91,9 @@ type Collector struct {
ID uint32
// DetectCharset can enable character encoding detection for non-utf8 response bodies
// without explicit charset declaration. This feature uses https://github.com/saintfish/chardet
DetectCharset bool
DetectCharset bool
// RedirectHandler allows control on how a redirect will be managed
RedirectHandler func(req *http.Request, via []*http.Request) error
store storage.Storage
debugger debug.Debugger
robotsMap map[string]*robotstxt.RobotsData
Expand Down Expand Up @@ -190,7 +189,11 @@ var envMap = map[string]func(*Collector, string){
c.IgnoreRobotsTxt = isYesString(val)
},
"FOLLOW_REDIRECTS": func(c *Collector, val string) {
c.FollowRedirects = isYesString(val)
if !isYesString(val) {
c.RedirectHandler = func(req *http.Request, via []*http.Request) error {
return http.ErrUseLastResponse
}
}
},
"MAX_BODY_SIZE": func(c *Collector, val string) {
size, err := strconv.Atoi(val)
Expand Down Expand Up @@ -333,7 +336,6 @@ func Debugger(d debug.Debugger) func(*Collector) {
func (c *Collector) Init() {
c.UserAgent = "colly - https://github.com/gocolly/colly"
c.MaxDepth = 0
c.FollowRedirects = true
c.store = &storage.InMemoryStorage{}
c.store.Init()
c.MaxBodySize = 10 * 1024 * 1024
Expand Down Expand Up @@ -999,8 +1001,8 @@ func (c *Collector) checkRedirectFunc() func(req *http.Request, via []*http.Requ
return fmt.Errorf("Not following redirect to %s because its not in AllowedDomains", req.URL.Host)
}

if !c.FollowRedirects {
return http.ErrUseLastResponse
if c.RedirectHandler != nil {
return c.RedirectHandler(req, via)
}

// Honor golangs default of maximum of 10 redirects
Expand Down

0 comments on commit ca0264b

Please sign in to comment.