Skip to content

Commit

Permalink
util/log: don't panic
Browse files Browse the repository at this point in the history
Previously, log.outputLogEntry could panic while holding the log mutex.
This would deadlock any goroutine that logged while recovering from the
panic, which is approximately all of the recover routines. Most
annoyingly, the crash reporter would deadlock, swallowing the cause of
the panic.

Avoid panicking while holding the log mutex and use l.exit instead,
which exists for this very purpose. In the process, enforce the
invariant that l.mu is held when l.exit is called. (The previous
behavior was, in fact, incorrect, as l.flushAll should not be called
without holding l.mu.)

Also add a Tcl test to ensure this doesn't break in the future.
  • Loading branch information
benesch authored and bdarnell committed Feb 10, 2018
1 parent d908364 commit 3421966
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 14 deletions.
15 changes: 15 additions & 0 deletions pkg/cli/interactive_tests/test_missing_log_output.tcl
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,21 @@ eexpect "255"
eexpect ":/# "
end_test

start_test "Check that a broken stderr prints a message to the log files."
send "$argv start -s=path=logs/db --insecure --logtostderr --verbosity=1 2>&1 | cat\r"
eexpect "CockroachDB node starting"
system "killall cat"
eexpect ":/# "
system "grep -F 'log: exiting because of error: write /dev/stderr: broken pipe' logs/db/logs/cockroach.log"
end_test

start_test "Check that a broken log file prints a message to stderr."
send "$argv start -s=path=/dev/null --insecure --logtostderr\r"
eexpect "log: exiting because of error: log: cannot create log: open"
eexpect "not a directory"
eexpect ":/# "
end_test

start_test "Check that a server started with only in-memory stores and no --log-dir automatically logs to stderr."
send "$argv start --insecure --store=type=mem,size=1GiB\r"
eexpect "CockroachDB node starting"
Expand Down
36 changes: 22 additions & 14 deletions pkg/util/log/clog.go
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,8 @@ func (l *loggingT) outputLogEntry(s Severity, file string, line int, msg string)
if err := l.createFile(); err != nil {
// Make sure the message appears somewhere.
l.outputToStderr(entry, stacks)
l.exitLocked(err)
l.mu.Unlock()
l.exit(err)
return
}
}
Expand All @@ -752,7 +752,9 @@ func (l *loggingT) outputLogEntry(s Severity, file string, line int, msg string)
data := buf.Bytes()

if _, err := l.file.Write(data); err != nil {
panic(err)
l.exitLocked(err)
l.mu.Unlock()
return
}
if l.syncWrites {
_ = l.file.Flush()
Expand All @@ -778,7 +780,7 @@ func (l *loggingT) outputLogEntry(s Severity, file string, line int, msg string)
func (l *loggingT) outputToStderr(entry Entry, stacks []byte) {
buf := l.processForStderr(entry, stacks)
if _, err := OrigStderr.Write(buf.Bytes()); err != nil {
panic(err)
l.exitLocked(err)
}
l.putBuffer(buf)
}
Expand Down Expand Up @@ -872,21 +874,27 @@ func getStacks(all bool) []byte {
// would make its use clumsier.
var logExitFunc func(error)

// exit is called if there is trouble creating or writing log files.
// It flushes the logs and exits the program; there's no point in hanging around.
// l.mu is held.
func (l *loggingT) exit(err error) {
fmt.Fprintf(OrigStderr, "log: exiting because of error: %s\n", err)
// exitLocked is called if there is trouble creating or writing log files, or
// writing to stderr. It flushes the logs and exits the program; there's no
// point in hanging around. l.mu is held.
func (l *loggingT) exitLocked(err error) {
l.mu.AssertHeld()
// Either stderr or our log file is broken. Try writing the error to both
// streams in the hope that one still works or else the user will have no idea
// why we crashed.
for _, w := range []io.Writer{OrigStderr, l.file} {
if w == nil {
continue
}
fmt.Fprintf(w, "log: exiting because of error: %s\n", err)
}
// If logExitFunc is set, we do that instead of exiting.
if logExitFunc != nil {
logExitFunc(err)
return
}
l.flushAll()
l.mu.Lock()
exitFunc := l.exitFunc
l.mu.Unlock()
exitFunc(2)
l.exitFunc(2)
}

// syncBuffer joins a bufio.Writer to its underlying file, providing access to the
Expand All @@ -908,13 +916,13 @@ func (sb *syncBuffer) Sync() error {
func (sb *syncBuffer) Write(p []byte) (n int, err error) {
if sb.nbytes+int64(len(p)) >= atomic.LoadInt64(&LogFileMaxSize) {
if err := sb.rotateFile(time.Now()); err != nil {
sb.logger.exit(err)
sb.logger.exitLocked(err)
}
}
n, err = sb.Writer.Write(p)
sb.nbytes += int64(n)
if err != nil {
sb.logger.exit(err)
sb.logger.exitLocked(err)
}
return
}
Expand Down

0 comments on commit 3421966

Please sign in to comment.