Skip to content

Commit

Permalink
exec: initial commit of execgen tool
Browse files Browse the repository at this point in the history
Execgen will be our tool for generating templated code necessary for
columnarized execution. So far it only generates the
EncDatumRowsToColVec function, which is used by the columnarizer to
convert a RowSource into a columnarized Operator.

Release note: None
  • Loading branch information
solongordon committed Oct 18, 2018
1 parent 2a53922 commit 3d2038d
Show file tree
Hide file tree
Showing 9 changed files with 453 additions and 116 deletions.
14 changes: 11 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,8 @@ PROTOBUF_TARGETS := bin/.go_protobuf_sources bin/.gw_protobuf_sources bin/.cpp_p

DOCGEN_TARGETS := bin/.docgen_bnfs bin/.docgen_functions

EXECGEN_TARGETS = pkg/sql/exec/rowstovec.og.go

OPTGEN_TARGETS = \
pkg/sql/opt/memo/expr.og.go \
pkg/sql/opt/operator.og.go \
Expand Down Expand Up @@ -735,7 +737,7 @@ BUILDINFO = .buildinfo/tag .buildinfo/rev
BUILD_TAGGED_RELEASE =

$(go-targets): bin/.bootstrap $(BUILDINFO) $(CGO_FLAGS_FILES) $(PROTOBUF_TARGETS)
$(go-targets): $(SQLPARSER_TARGETS) $(OPTGEN_TARGETS)
$(go-targets): $(SQLPARSER_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS)
$(go-targets): override LINKFLAGS += \
-X "github.com/cockroachdb/cockroach/pkg/build.tag=$(shell cat .buildinfo/tag)" \
-X "github.com/cockroachdb/cockroach/pkg/build.rev=$(shell cat .buildinfo/rev)" \
Expand Down Expand Up @@ -914,7 +916,7 @@ dupl: bin/.bootstrap

.PHONY: generate
generate: ## Regenerate generated code.
generate: protobuf $(DOCGEN_TARGETS) $(OPTGEN_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen
generate: protobuf $(DOCGEN_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS) $(SQLPARSER_TARGETS) $(SETTINGS_DOC_PAGE) bin/langgen
$(GO) generate $(GOFLAGS) -tags '$(TAGS)' -ldflags '$(LINKFLAGS)' $(PKG)

.PHONY: lint
Expand Down Expand Up @@ -962,6 +964,7 @@ $(ARCHIVE): $(ARCHIVE).tmp
ARCHIVE_EXTRAS = \
$(BUILDINFO) \
$(SQLPARSER_TARGETS) \
$(EXECGEN_TARGETS) \
$(OPTGEN_TARGETS) \
pkg/ui/distccl/bindata.go pkg/ui/distoss/bindata.go

Expand Down Expand Up @@ -1307,6 +1310,9 @@ optgen-defs := pkg/sql/opt/ops/*.opt
optgen-norm-rules := pkg/sql/opt/norm/rules/*.opt
optgen-xform-rules := pkg/sql/opt/xform/rules/*.opt

pkg/sql/exec/rowstovec.og.go: bin/execgen
execgen -out $@ rowstovec

pkg/sql/opt/memo/expr.og.go: $(optgen-defs) bin/optgen
optgen -out $@ exprs $(optgen-defs)

Expand Down Expand Up @@ -1360,7 +1366,7 @@ clean: clean-c-deps
.PHONY: maintainer-clean
maintainer-clean: ## Like clean, but also remove some auto-generated source code.
maintainer-clean: clean ui-maintainer-clean
rm -f $(SQLPARSER_TARGETS) $(OPTGEN_TARGETS) $(UI_PROTOS_OSS) $(UI_PROTOS_CCL)
rm -f $(SQLPARSER_TARGETS) $(EXECGEN_TARGETS) $(OPTGEN_TARGETS) $(UI_PROTOS_OSS) $(UI_PROTOS_CCL)

.PHONY: unsafe-clean
unsafe-clean: ## Like maintainer-clean, but also remove ALL untracked/ignored files.
Expand All @@ -1378,6 +1384,7 @@ bins = \
bin/cockroach-oss \
bin/cockroach-short \
bin/docgen \
bin/execgen \
bin/generate-binary \
bin/github-post \
bin/github-pull-request-make \
Expand All @@ -1401,6 +1408,7 @@ testbins = \
bin/logictestccl

# Mappings for binaries that don't live in pkg/cmd.
execgen-package = ./pkg/sql/exec/execgen/cmd/execgen
langgen-package = ./pkg/sql/opt/optgen/cmd/langgen
optgen-package = ./pkg/sql/opt/optgen/cmd/optgen
logictest-package = ./pkg/sql/logictest
Expand Down
2 changes: 2 additions & 0 deletions build/variables.mk
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ define VALID_VARS
C_LIBS_OSS
DOCGEN_TARGETS
DUPLFLAGS
EXECGEN_TARGETS
EXTRA_XCMAKE_FLAGS
EXTRA_XCONFIGURE_FLAGS
FILES
Expand Down Expand Up @@ -156,6 +157,7 @@ define VALID_VARS
cmake-flags
configure-flags
cyan
execgen-package
go-targets
go-targets-ccl
have-defs
Expand Down
116 changes: 3 additions & 113 deletions pkg/sql/distsqlrun/columnarizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,9 @@
package distsqlrun

import (
"fmt"

"github.com/cockroachdb/cockroach/pkg/sql/exec"
"github.com/cockroachdb/cockroach/pkg/sql/exec/types"
"github.com/cockroachdb/cockroach/pkg/sql/sem/tree"
"github.com/cockroachdb/cockroach/pkg/sql/sqlbase"
"github.com/cockroachdb/cockroach/pkg/util/encoding"
)

// columnarizer turns a RowSource input into an exec.Operator output, by reading
Expand Down Expand Up @@ -91,115 +87,9 @@ func (c *columnarizer) Next() exec.ColBatch {

// Write each column into the output batch.
for idx, ct := range columnTypes {
vec := c.batch.ColVec(idx)
switch ct.SemanticType {
// TODO(solon): these should be autogenerated from a template.
case sqlbase.ColumnType_BOOL:
col := vec.Bool()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col.Set(i, bool(*ed.Datum.(*tree.DBool)))
}
case sqlbase.ColumnType_INT:
switch ct.Width {
case 8:
col := vec.Int8()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int8(*ed.Datum.(*tree.DInt))
}
case 16:
col := vec.Int16()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int16(*ed.Datum.(*tree.DInt))
}
case 32:
col := vec.Int32()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int32(*ed.Datum.(*tree.DInt))
}
case 0, 64:
col := vec.Int64()
for i := uint16(0); i < nRows; i++ {
if c.buffered[i][idx].Datum == nil {
if err := c.buffered[i][idx].EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
}
if c.buffered[i][idx].Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = int64(*c.buffered[i][idx].Datum.(*tree.DInt))
}
default:
panic(fmt.Sprintf("integer with unknown width %d", ct.Width))
}
case sqlbase.ColumnType_FLOAT:
col := vec.Float64()
for i := uint16(0); i < nRows; i++ {
ed := c.buffered[i][idx]
if err := ed.EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
if ed.Datum == tree.DNull {
vec.SetNull(i)
}
col[i] = float64(*ed.Datum.(*tree.DFloat))
}
case sqlbase.ColumnType_BYTES:
col := vec.Bytes()
for i := uint16(0); i < nRows; i++ {
if c.buffered[i][idx].Datum == nil {
if err := c.buffered[i][idx].EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
}
if c.buffered[i][idx].Datum == tree.DNull {
vec.SetNull(i)
}
col.Set(i, encoding.UnsafeConvertStringToBytes(string(*c.buffered[i][idx].Datum.(*tree.DBytes))))
}
case sqlbase.ColumnType_STRING:
col := vec.Bytes()
for i := uint16(0); i < nRows; i++ {
if c.buffered[i][idx].Datum == nil {
if err := c.buffered[i][idx].EnsureDecoded(&ct, &c.da); err != nil {
panic(err)
}
}
if c.buffered[i][idx].Datum == tree.DNull {
vec.SetNull(i)
}
col.Set(i, encoding.UnsafeConvertStringToBytes(string(*c.buffered[i][idx].Datum.(*tree.DString))))
}
default:
panic(fmt.Sprintf("Unsupported column type %s", ct.SQLString()))
err := exec.EncDatumRowsToColVec(c.buffered[:nRows], c.batch.ColVec(idx), idx, &ct, &c.da)
if err != nil {
panic(err)
}
}
return c.batch
Expand Down
1 change: 1 addition & 0 deletions pkg/sql/exec/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
*.og.go
157 changes: 157 additions & 0 deletions pkg/sql/exec/execgen/cmd/execgen/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// Copyright 2018 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

package main

import (
"bytes"
"flag"
"fmt"
"go/format"
"io"
"os"

"github.com/pkg/errors"
)

var (
errInvalidArgCount = errors.New("invalid number of arguments")
errUnrecognizedCommand = errors.New("unrecognized command")
)

func main() {
gen := execgen{useGoFmt: true, stdErr: os.Stderr}
if !gen.run(os.Args[1:]...) {
os.Exit(2)
}
}

type execgen struct {
// useGoFmt runs the go fmt tool on code generated by execgen, if this setting
// is true.
useGoFmt bool

// stdErr is the writer to which all standard error output will be redirected.
stdErr io.Writer

// cmdLine stores the set of flags used to invoke the Execgen tool.
cmdLine *flag.FlagSet
}

type generator func(io.Writer) error

var generators = map[string]generator{
"rowstovec": genRowsToVec,
}

func (g *execgen) run(args ...string) bool {
// Parse command line.
g.cmdLine = flag.NewFlagSet("execgen", flag.ContinueOnError)
g.cmdLine.SetOutput(g.stdErr)
g.cmdLine.Usage = g.usage
g.cmdLine.String("out", "", "output file name of generated code")
err := g.cmdLine.Parse(args)
if err != nil {
return false
}

// Get remaining args after any flags have been parsed.
args = g.cmdLine.Args()
if len(args) < 1 {
g.cmdLine.Usage()
g.reportError(errInvalidArgCount)
return false
}

cmd := args[0]
gen := generators[cmd]
if gen == nil {
g.reportError(errUnrecognizedCommand)
return false
}

if err := g.generate(gen); err != nil {
g.reportError(err)
return false
}
return true
}

func (g *execgen) generate(genFunc generator) error {
var buf bytes.Buffer
buf.WriteString("// Code generated by execgen; DO NOT EDIT.\n")

err := genFunc(&buf)
if err != nil {
return err
}

var b []byte
if g.useGoFmt {
b, err = format.Source(buf.Bytes())
if err != nil {
// Write out incorrect source for easier debugging.
b = buf.Bytes()
out := g.cmdLine.Lookup("out").Value.String()
err = fmt.Errorf("Code formatting failed with Go parse error\n%s:%s", out, err)
}
} else {
b = buf.Bytes()
}

if err != nil {
// Ignore any write error if another error already occurred.
_ = g.writeOutputFile(b)
return err
}
return g.writeOutputFile(b)
}

func (g *execgen) writeOutputFile(b []byte) error {
out := g.cmdLine.Lookup("out").Value.String()
if out != "" {
file, err := os.Create(out)
if err != nil {
return err
}
defer file.Close()

_, err = file.Write(b)
return err
}

// Send output to stderr.
_, err := g.stdErr.Write(b)
return err
}

// usage is a replacement usage function for the flags package.
func (g *execgen) usage() {
fmt.Fprintf(g.stdErr, "Execgen is a tool for generating templated code related to ")
fmt.Fprintf(g.stdErr, "columnarized execution.\n\n")

fmt.Fprintf(g.stdErr, "Usage:\n")
fmt.Fprintf(g.stdErr, "\texecgen [flags] command...\n\n")

fmt.Fprintf(g.stdErr, "The commands are:\n")
fmt.Fprintf(g.stdErr, "\trowstovec generate the EncDatumRowsToColVec function\n\n")

fmt.Fprintf(g.stdErr, "Flags:\n")
g.cmdLine.PrintDefaults()
fmt.Fprintf(g.stdErr, "\n")
}

func (g *execgen) reportError(err error) {
fmt.Fprintf(g.stdErr, "ERROR: %v\n", err)
}
Loading

0 comments on commit 3d2038d

Please sign in to comment.