Skip to content

Commit

Permalink
Add alternate ctags parser and language map (#581)
Browse files Browse the repository at this point in the history
* Add alternate ctags parser and language map

* Fix minor issues

* Lint protobuf

* Fix nil access

* Remove debug statements

* Small fixes

* ctagsAddSymbols -> ctagsAddSymbolsParser

* Split out parser logic

* Fix reviews
  • Loading branch information
SuperAuguste committed Jun 1, 2023
1 parent 70af112 commit ffc7feb
Show file tree
Hide file tree
Showing 10 changed files with 562 additions and 228 deletions.
43 changes: 33 additions & 10 deletions build/builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,9 @@ type Options struct {
// if a valid binary couldn't be found.
CTagsPath string

// Same as CTagsPath but for scip-ctags
ScipCTagsPath string

// If set, ctags must succeed.
CTagsMustSucceed bool

Expand Down Expand Up @@ -113,6 +116,8 @@ type Options struct {
// since the last indexing job for this repository. These files will be tombstoned
// in the older shards for this repository.
changedOrRemovedFiles []string

LanguageMap ctags.LanguageMap
}

// HashOptions contains only the options in Options that upon modification leads to IndexState of IndexStateMismatch during the next index building.
Expand Down Expand Up @@ -244,7 +249,7 @@ type Builder struct {
todo []*zoekt.Document
size int

parser ctags.Parser
parserMap ctags.ParserMap

building sync.WaitGroup

Expand Down Expand Up @@ -282,12 +287,28 @@ func checkCTags() string {
return ""
}

func checkScipCTags() string {
if ctags := os.Getenv("SCIP_CTAGS_COMMAND"); ctags != "" {
return ctags
}

if ctags, err := exec.LookPath("scip-ctags"); err == nil {
return ctags
}

return ""
}

// SetDefaults sets reasonable default options.
func (o *Options) SetDefaults() {
if o.CTagsPath == "" && !o.DisableCTags {
o.CTagsPath = checkCTags()
}

if o.ScipCTagsPath == "" && !o.DisableCTags {
o.ScipCTagsPath = checkScipCTags()
}

if o.Parallelism == 0 {
o.Parallelism = 4
}
Expand Down Expand Up @@ -547,15 +568,17 @@ func NewBuilder(opts Options) (*Builder, error) {
return nil, fmt.Errorf("ctags binary not found, but CTagsMustSucceed set")
}

if opts.CTagsPath != "" {
parser, err := ctags.NewParser(opts.CTagsPath)
if err != nil && opts.CTagsMustSucceed {
return nil, fmt.Errorf("ctags.NewParser: %v", err)
}
parserMap, err := ctags.NewParserMap(ctags.ParserBinMap{
ctags.UniversalCTags: b.opts.CTagsPath,
ctags.ScipCTags: b.opts.ScipCTagsPath,
}, b.opts.CTagsMustSucceed)

b.parser = parser
if err != nil {
return nil, err
}

b.parserMap = parserMap

b.shardLogger = &lumberjack.Logger{
Filename: filepath.Join(opts.IndexDir, "zoekt-builder-shard-log.tsv"),
MaxSize: 100, // Megabyte
Expand Down Expand Up @@ -994,13 +1017,13 @@ func sortDocuments(todo []*zoekt.Document) {
}

func (b *Builder) buildShard(todo []*zoekt.Document, nextShardNum int) (*finishedShard, error) {
if !b.opts.DisableCTags && b.opts.CTagsPath != "" {
err := ctagsAddSymbolsParser(todo, b.parser)
if !b.opts.DisableCTags && (b.opts.CTagsPath != "" || b.opts.ScipCTagsPath != "") {
err := ctagsAddSymbolsParserMap(todo, b.opts.LanguageMap, b.parserMap)
if b.opts.CTagsMustSucceed && err != nil {
return nil, err
}
if err != nil {
log.Printf("ignoring %s error: %v", b.opts.CTagsPath, err)
log.Printf("ignoring universal:%s or scip:%s error: %v", b.opts.CTagsPath, b.opts.ScipCTagsPath, err)
}
}

Expand Down
14 changes: 13 additions & 1 deletion build/ctags.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,24 @@ import (
"github.com/sourcegraph/zoekt/ctags"
)

func ctagsAddSymbolsParser(todo []*zoekt.Document, parser ctags.Parser) error {
func ctagsAddSymbolsParserMap(todo []*zoekt.Document, languageMap ctags.LanguageMap, parserMap ctags.ParserMap) error {
for _, doc := range todo {
if doc.Symbols != nil {
continue
}

zoekt.DetermineLanguageIfUnknown(doc)

parserKind := languageMap[doc.Language]
if parserKind == ctags.NoCTags {
continue
}

parser := parserMap[parserKind]
if parser == nil {
parser = parserMap[ctags.UniversalCTags]
}

es, err := parser.Parse(doc.Name, doc.Content)
if err != nil {
return err
Expand Down
11 changes: 11 additions & 0 deletions cmd/zoekt-git-index/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ import (
"go.uber.org/automaxprocs/maxprocs"

"github.com/sourcegraph/zoekt/cmd"
"github.com/sourcegraph/zoekt/ctags"
"github.com/sourcegraph/zoekt/gitindex"
)

Expand All @@ -44,6 +45,7 @@ func run() int {
deltaShardNumberFallbackThreshold := flag.Uint64("delta_threshold", 0, "upper limit on the number of preexisting shards that can exist before attempting a delta build (0 to disable fallback behavior)")
offlineRanking := flag.String("offline_ranking", "", "the name of the file that contains the ranking info.")
offlineRankingVersion := flag.String("offline_ranking_version", "", "a version string identifying the contents in offline_ranking.")
languageMap := flag.String("language_map", "", "a mapping between a language and its ctags processor (a:0,b:3).")
flag.Parse()

// Tune GOMAXPROCS to match Linux container CPU quota.
Expand Down Expand Up @@ -96,6 +98,15 @@ func run() int {
gitRepos[repoDir] = name
}

opts.LanguageMap = make(ctags.LanguageMap)
for _, mapping := range strings.Split(*languageMap, ",") {
m := strings.Split(mapping, ":")
if len(m) != 2 {
continue
}
opts.LanguageMap[m[0]] = ctags.StringToParser(m[1])
}

exitStatus := 0
for dir, name := range gitRepos {
opts.RepositoryDescription.Name = name
Expand Down
14 changes: 14 additions & 0 deletions cmd/zoekt-sourcegraph-indexserver/index.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ import (

"github.com/sourcegraph/zoekt"
"github.com/sourcegraph/zoekt/build"
"github.com/sourcegraph/zoekt/ctags"

sglog "github.com/sourcegraph/log"
)
Expand Down Expand Up @@ -67,6 +68,9 @@ type IndexOptions struct {

// Archived is true if the repository is archived.
Archived bool

// Map from language to scip-ctags, universal-ctags, or neither
LanguageMap ctags.LanguageMap
}

// indexArgs represents the arguments we pass to zoekt-git-index
Expand Down Expand Up @@ -123,6 +127,8 @@ func (o *indexArgs) BuildOptions() *build.Options {
IsDelta: o.UseDelta,

DocumentRanksVersion: o.DocumentRanksVersion,

LanguageMap: o.LanguageMap,
}
}

Expand Down Expand Up @@ -389,6 +395,14 @@ func gitIndex(c gitIndexConfig, o *indexArgs, sourcegraph Sourcegraph, l sglog.L
args = append(args, "-delta_threshold", strconv.FormatUint(o.DeltaShardNumberFallbackThreshold, 10))
}

if len(o.LanguageMap) > 0 {
var languageMap []string
for language, parser := range o.LanguageMap {
languageMap = append(languageMap, language+":"+ctags.ParserToString(parser))
}
args = append(args, "-language_map", strings.Join(languageMap, ","))
}

args = append(args, buildOptions.Args()...)
args = append(args, gitDir)

Expand Down
Loading

0 comments on commit ffc7feb

Please sign in to comment.