Skip to content

Commit

Permalink
refactor to use filenames and no regex
Browse files Browse the repository at this point in the history
  • Loading branch information
ivantsepp authored and salemhilal committed Aug 29, 2022
1 parent 467a9cc commit 417edcc
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 55 deletions.
44 changes: 14 additions & 30 deletions index/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,9 +36,9 @@ type Index struct {
}

type IndexOptions struct {
ExcludeDotFiles bool
SpecialFiles []string
AutoGeneratedFilePatterns []string
ExcludeDotFiles bool
SpecialFiles []string
AutoGeneratedFiles []string
}

type SearchOptions struct {
Expand Down Expand Up @@ -78,11 +78,11 @@ type ExcludedFile struct {
}

type IndexRef struct {
Url string
Rev string
Time time.Time
dir string
AutoGeneratedFilePatterns []string
Url string
Rev string
Time time.Time
dir string
AutoGeneratedFiles []string
}

func (r *IndexRef) Dir() string {
Expand Down Expand Up @@ -185,20 +185,6 @@ func (n *Index) Search(pat string, opt *SearchOptions) (*SearchResponse, error)
}
}

var autoGeneratedFre *regexp.Regexp
if len(n.Ref.AutoGeneratedFilePatterns) > 0 {
var pattern, sep string
for _, fp := range n.Ref.AutoGeneratedFilePatterns {
pattern += sep + "(" + fp + ")"
sep = "|"
}
autoGeneratedFre, err = regexp.Compile(pattern)
if err != nil {
return nil, err
}
}


files := n.idx.PostingQuery(index.RegexpQuery(re.Syntax))
for _, file := range files {
var matches []*Match
Expand Down Expand Up @@ -249,12 +235,10 @@ func (n *Index) Search(pat string, opt *SearchOptions) (*SearchResponse, error)
if len(matches) > 0 {
filesCollected++

autoGenerated := autoGeneratedFre != nil && autoGeneratedFre.MatchString(name, true, true) > 0

results = append(results, &FileMatch{
Filename: name,
Matches: matches,
AutoGenerated: autoGenerated,
AutoGenerated: containsString(n.Ref.AutoGeneratedFiles, name),
})
}
}
Expand Down Expand Up @@ -506,11 +490,11 @@ func Build(opt *IndexOptions, dst, src, url, rev string) (*IndexRef, error) {
}

r := &IndexRef{
Url: url,
Rev: rev,
Time: time.Now(),
dir: dst,
AutoGeneratedFilePatterns: opt.AutoGeneratedFilePatterns,
Url: url,
Rev: rev,
Time: time.Now(),
dir: dst,
AutoGeneratedFiles: opt.AutoGeneratedFiles,
}

if err := r.writeManifest(); err != nil {
Expand Down
6 changes: 3 additions & 3 deletions searcher/searcher.go
Original file line number Diff line number Diff line change
Expand Up @@ -414,9 +414,9 @@ func newSearcher(
}

opt := &index.IndexOptions{
ExcludeDotFiles: repo.ExcludeDotFiles,
SpecialFiles: wd.SpecialFiles(),
AutoGeneratedFilePatterns: wd.AutoGeneratedFilePatterns(vcsDir),
ExcludeDotFiles: repo.ExcludeDotFiles,
SpecialFiles: wd.SpecialFiles(),
AutoGeneratedFiles: wd.AutoGeneratedFiles(vcsDir),
}

var idxDir string
Expand Down
2 changes: 1 addition & 1 deletion vcs/bzr.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,6 @@ func (g *BzrDriver) SpecialFiles() []string {
}
}

func (g *BzrDriver) AutoGeneratedFilePatterns(dir string) []string {
func (g *BzrDriver) AutoGeneratedFiles(dir string) []string {
return []string{}
}
54 changes: 37 additions & 17 deletions vcs/git.go
Original file line number Diff line number Diff line change
@@ -1,23 +1,21 @@
package vcs

import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"io"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
"strings"
)

const defaultRef = "master"
const autoGeneratedAttribute = "linguist-generated"

var headBranchRegexp = regexp.MustCompile(`HEAD branch: (?P<branch>.+)`)
var autoGeneratedFileRegexp = regexp.MustCompile(`(?P<path>.+) linguist-generated=true`)

func init() {
Register(newGit, "git")
Expand Down Expand Up @@ -154,27 +152,49 @@ func (g *GitDriver) SpecialFiles() []string {
}
}

func (g *GitDriver) AutoGeneratedFilePatterns(dir string) []string {
var filePatterns []string
path := filepath.Join(dir, ".gitattributes")
func (g *GitDriver) AutoGeneratedFiles(dir string) []string {
var files []string

filesCmd := exec.Command("git", "ls-files", "-z");
filesCmd.Dir = dir
pipe, err := filesCmd.StdoutPipe();

file, err := os.Open(path)
if err != nil {
return filePatterns
log.Printf("Error occured when running git ls-files in %s: %s.", dir, err)
return files
}

if err := filesCmd.Start(); err != nil {
log.Printf("Error occured when running git ls-files in %s: %s.", dir, err)
return files
}
defer file.Close()

scanner := bufio.NewScanner(file)
for scanner.Scan() {
matches := autoGeneratedFileRegexp.FindStringSubmatch(scanner.Text())
if len(matches) == 2 {
pattern := strings.ReplaceAll(matches[1], "**", "*")
pattern = strings.ReplaceAll(pattern, "*", ".*")
filePatterns = append(filePatterns, pattern)
attributesCmd := exec.Command("git", "check-attr", "--stdin", "-z", autoGeneratedAttribute);
attributesCmd.Dir = dir
attributesCmd.Stdin = pipe

out, err := attributesCmd.Output()

if err != nil {
log.Printf("Error occured when running git check-attr in %s: %s.", dir, err)
return files
}

// Split by NUL and we expect the format: <path> NUL <attribute> NUL <info> NUL
tokens := bytes.Split(out, []byte{0})

for i := 2; i < len(tokens); i+=3 {
if string(tokens[i]) == "true" && string(tokens[i-1]) == autoGeneratedAttribute {
files = append(files, string(tokens[i-2]))
}
}

return filePatterns
if err := filesCmd.Wait(); err != nil {
log.Printf("Error occured when running git ls-files in %s: %s.", dir, err)
return files
}

return files
}

func (d *headBranchDetector) detectRef(dir string) string {
Expand Down
2 changes: 1 addition & 1 deletion vcs/hg.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,6 @@ func (g *MercurialDriver) SpecialFiles() []string {
}
}

func (g *MercurialDriver) AutoGeneratedFilePatterns(dir string) []string {
func (g *MercurialDriver) AutoGeneratedFiles(dir string) []string {
return []string{}
}
2 changes: 1 addition & 1 deletion vcs/svn.go
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,6 @@ func (g *SVNDriver) SpecialFiles() []string {
}
}

func (g *SVNDriver) AutoGeneratedFilePatterns(dir string) []string {
func (g *SVNDriver) AutoGeneratedFiles(dir string) []string {
return []string{}
}
4 changes: 2 additions & 2 deletions vcs/vcs.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@ type Driver interface {
// Return a list of special filenames that should not be indexed.
SpecialFiles() []string

// Return a list of file path patterns that are marked as auto-generated.
AutoGeneratedFilePatterns(dir string) []string
// Return a list of filenames that are marked as auto-generated.
AutoGeneratedFiles(dir string) []string

}

Expand Down

0 comments on commit 417edcc

Please sign in to comment.