diff --git a/index/index.go b/index/index.go index 740c5f1d..052f7fd5 100644 --- a/index/index.go +++ b/index/index.go @@ -36,9 +36,9 @@ type Index struct { } type IndexOptions struct { - ExcludeDotFiles bool - SpecialFiles []string - AutoGeneratedFilePatterns []string + ExcludeDotFiles bool + SpecialFiles []string + AutoGeneratedFiles []string } type SearchOptions struct { @@ -78,11 +78,11 @@ type ExcludedFile struct { } type IndexRef struct { - Url string - Rev string - Time time.Time - dir string - AutoGeneratedFilePatterns []string + Url string + Rev string + Time time.Time + dir string + AutoGeneratedFiles []string } func (r *IndexRef) Dir() string { @@ -185,20 +185,6 @@ func (n *Index) Search(pat string, opt *SearchOptions) (*SearchResponse, error) } } - var autoGeneratedFre *regexp.Regexp - if len(n.Ref.AutoGeneratedFilePatterns) > 0 { - var pattern, sep string - for _, fp := range n.Ref.AutoGeneratedFilePatterns { - pattern += sep + "(" + fp + ")" - sep = "|" - } - autoGeneratedFre, err = regexp.Compile(pattern) - if err != nil { - return nil, err - } - } - - files := n.idx.PostingQuery(index.RegexpQuery(re.Syntax)) for _, file := range files { var matches []*Match @@ -249,12 +235,10 @@ func (n *Index) Search(pat string, opt *SearchOptions) (*SearchResponse, error) if len(matches) > 0 { filesCollected++ - autoGenerated := autoGeneratedFre != nil && autoGeneratedFre.MatchString(name, true, true) > 0 - results = append(results, &FileMatch{ Filename: name, Matches: matches, - AutoGenerated: autoGenerated, + AutoGenerated: containsString(n.Ref.AutoGeneratedFiles, name), }) } } @@ -506,11 +490,11 @@ func Build(opt *IndexOptions, dst, src, url, rev string) (*IndexRef, error) { } r := &IndexRef{ - Url: url, - Rev: rev, - Time: time.Now(), - dir: dst, - AutoGeneratedFilePatterns: opt.AutoGeneratedFilePatterns, + Url: url, + Rev: rev, + Time: time.Now(), + dir: dst, + AutoGeneratedFiles: opt.AutoGeneratedFiles, } if err := r.writeManifest(); err != nil { diff --git a/searcher/searcher.go b/searcher/searcher.go index eff47e1a..bfe4388d 100644 --- a/searcher/searcher.go +++ b/searcher/searcher.go @@ -414,9 +414,9 @@ func newSearcher( } opt := &index.IndexOptions{ - ExcludeDotFiles: repo.ExcludeDotFiles, - SpecialFiles: wd.SpecialFiles(), - AutoGeneratedFilePatterns: wd.AutoGeneratedFilePatterns(vcsDir), + ExcludeDotFiles: repo.ExcludeDotFiles, + SpecialFiles: wd.SpecialFiles(), + AutoGeneratedFiles: wd.AutoGeneratedFiles(vcsDir), } var idxDir string diff --git a/vcs/bzr.go b/vcs/bzr.go index e895f8ee..1797635d 100644 --- a/vcs/bzr.go +++ b/vcs/bzr.go @@ -78,6 +78,6 @@ func (g *BzrDriver) SpecialFiles() []string { } } -func (g *BzrDriver) AutoGeneratedFilePatterns(dir string) []string { +func (g *BzrDriver) AutoGeneratedFiles(dir string) []string { return []string{} } diff --git a/vcs/git.go b/vcs/git.go index ac16901b..004f0c2f 100644 --- a/vcs/git.go +++ b/vcs/git.go @@ -1,13 +1,11 @@ package vcs import ( - "bufio" "bytes" "encoding/json" "fmt" "io" "log" - "os" "os/exec" "path/filepath" "regexp" @@ -15,9 +13,9 @@ import ( ) const defaultRef = "master" +const autoGeneratedAttribute = "linguist-generated" var headBranchRegexp = regexp.MustCompile(`HEAD branch: (?P.+)`) -var autoGeneratedFileRegexp = regexp.MustCompile(`(?P.+) linguist-generated=true`) func init() { Register(newGit, "git") @@ -154,27 +152,49 @@ func (g *GitDriver) SpecialFiles() []string { } } -func (g *GitDriver) AutoGeneratedFilePatterns(dir string) []string { - var filePatterns []string - path := filepath.Join(dir, ".gitattributes") +func (g *GitDriver) AutoGeneratedFiles(dir string) []string { + var files []string + + filesCmd := exec.Command("git", "ls-files", "-z"); + filesCmd.Dir = dir + pipe, err := filesCmd.StdoutPipe(); - file, err := os.Open(path) if err != nil { - return filePatterns + log.Printf("Error occured when running git ls-files in %s: %s.", dir, err) + return files + } + + if err := filesCmd.Start(); err != nil { + log.Printf("Error occured when running git ls-files in %s: %s.", dir, err) + return files } - defer file.Close() - scanner := bufio.NewScanner(file) - for scanner.Scan() { - matches := autoGeneratedFileRegexp.FindStringSubmatch(scanner.Text()) - if len(matches) == 2 { - pattern := strings.ReplaceAll(matches[1], "**", "*") - pattern = strings.ReplaceAll(pattern, "*", ".*") - filePatterns = append(filePatterns, pattern) + attributesCmd := exec.Command("git", "check-attr", "--stdin", "-z", autoGeneratedAttribute); + attributesCmd.Dir = dir + attributesCmd.Stdin = pipe + + out, err := attributesCmd.Output() + + if err != nil { + log.Printf("Error occured when running git check-attr in %s: %s.", dir, err) + return files + } + + // Split by NUL and we expect the format: NUL NUL NUL + tokens := bytes.Split(out, []byte{0}) + + for i := 2; i < len(tokens); i+=3 { + if string(tokens[i]) == "true" && string(tokens[i-1]) == autoGeneratedAttribute { + files = append(files, string(tokens[i-2])) } } - return filePatterns + if err := filesCmd.Wait(); err != nil { + log.Printf("Error occured when running git ls-files in %s: %s.", dir, err) + return files + } + + return files } func (d *headBranchDetector) detectRef(dir string) string { diff --git a/vcs/hg.go b/vcs/hg.go index b1a43d99..ac285836 100644 --- a/vcs/hg.go +++ b/vcs/hg.go @@ -80,6 +80,6 @@ func (g *MercurialDriver) SpecialFiles() []string { } } -func (g *MercurialDriver) AutoGeneratedFilePatterns(dir string) []string { +func (g *MercurialDriver) AutoGeneratedFiles(dir string) []string { return []string{} } diff --git a/vcs/svn.go b/vcs/svn.go index 6568dacd..0c8571a3 100644 --- a/vcs/svn.go +++ b/vcs/svn.go @@ -101,6 +101,6 @@ func (g *SVNDriver) SpecialFiles() []string { } } -func (g *SVNDriver) AutoGeneratedFilePatterns(dir string) []string { +func (g *SVNDriver) AutoGeneratedFiles(dir string) []string { return []string{} } diff --git a/vcs/vcs.go b/vcs/vcs.go index 0b0b629d..26f0d4d8 100644 --- a/vcs/vcs.go +++ b/vcs/vcs.go @@ -27,8 +27,8 @@ type Driver interface { // Return a list of special filenames that should not be indexed. SpecialFiles() []string - // Return a list of file path patterns that are marked as auto-generated. - AutoGeneratedFilePatterns(dir string) []string + // Return a list of filenames that are marked as auto-generated. + AutoGeneratedFiles(dir string) []string }