Skip to content

Commit

Permalink
runtime: shrink time histogram buckets
Browse files Browse the repository at this point in the history
There are lots of useless buckets with too much precision. Introduce a
minimum level of precision with a minimum bucket bit. This cuts down on
the size of a time histogram dramatically (~3x). Also, pick a smaller
sub bucket count; we don't need 6% precision.

Also, rename super-buckets to buckets to more closely line up with HDR
histogram literature.

Change-Id: I199449650e4b34f2a6dca3cf1d8edb071c6655c0
Reviewed-on: https://go-review.googlesource.com/c/go/+/427615
Run-TryBot: Michael Knyszek <mknyszek@google.com>
Auto-Submit: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Reviewed-by: Michael Pratt <mpratt@google.com>
  • Loading branch information
mknyszek authored and gopherbot committed Sep 16, 2022
1 parent 1fc8369 commit 87eda2a
Show file tree
Hide file tree
Showing 4 changed files with 159 additions and 128 deletions.
20 changes: 13 additions & 7 deletions src/runtime/export_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -1228,22 +1228,28 @@ func MSpanCountAlloc(ms *MSpan, bits []byte) int {
}

const (
TimeHistSubBucketBits = timeHistSubBucketBits
TimeHistNumSubBuckets = timeHistNumSubBuckets
TimeHistNumSuperBuckets = timeHistNumSuperBuckets
TimeHistSubBucketBits = timeHistSubBucketBits
TimeHistNumSubBuckets = timeHistNumSubBuckets
TimeHistNumBuckets = timeHistNumBuckets
TimeHistMinBucketBits = timeHistMinBucketBits
TimeHistMaxBucketBits = timeHistMaxBucketBits
)

type TimeHistogram timeHistogram

// Counts returns the counts for the given bucket, subBucket indices.
// Returns true if the bucket was valid, otherwise returns the counts
// for the underflow bucket and false.
func (th *TimeHistogram) Count(bucket, subBucket uint) (uint64, bool) {
// for the overflow bucket if bucket > 0 or the underflow bucket if
// bucket < 0, and false.
func (th *TimeHistogram) Count(bucket, subBucket int) (uint64, bool) {
t := (*timeHistogram)(th)
i := bucket*TimeHistNumSubBuckets + subBucket
if i >= uint(len(t.counts)) {
if bucket < 0 {
return t.underflow.Load(), false
}
i := bucket*TimeHistNumSubBuckets + subBucket
if i >= len(t.counts) {
return t.overflow.Load(), false
}
return t.counts[i].Load(), true
}

Expand Down
169 changes: 95 additions & 74 deletions src/runtime/histogram.go
Original file line number Diff line number Diff line change
Expand Up @@ -12,70 +12,88 @@ import (

const (
// For the time histogram type, we use an HDR histogram.
// Values are placed in super-buckets based solely on the most
// significant set bit. Thus, super-buckets are power-of-2 sized.
// Values are placed in buckets based solely on the most
// significant set bit. Thus, buckets are power-of-2 sized.
// Values are then placed into sub-buckets based on the value of
// the next timeHistSubBucketBits most significant bits. Thus,
// sub-buckets are linear within a super-bucket.
// sub-buckets are linear within a bucket.
//
// Therefore, the number of sub-buckets (timeHistNumSubBuckets)
// defines the error. This error may be computed as
// 1/timeHistNumSubBuckets*100%. For example, for 16 sub-buckets
// per super-bucket the error is approximately 6%.
// per bucket the error is approximately 6%.
//
// The number of super-buckets (timeHistNumSuperBuckets), on the
// other hand, defines the range. To reserve room for sub-buckets,
// bit timeHistSubBucketBits is the first bit considered for
// super-buckets, so super-bucket indices are adjusted accordingly.
// The number of buckets (timeHistNumBuckets), on the
// other hand, defines the range. To avoid producing a large number
// of buckets that are close together, especially for small numbers
// (e.g. 1, 2, 3, 4, 5 ns) that aren't very useful, timeHistNumBuckets
// is defined in terms of the least significant bit (timeHistMinBucketBits)
// that needs to be set before we start bucketing and the most
// significant bit (timeHistMaxBucketBits) that we bucket before we just
// dump it into a catch-all bucket.
//
// As an example, consider 45 super-buckets with 16 sub-buckets.
// As an example, consider the configuration:
//
// 00110
// ^----
// │ ^
// │ └---- Lowest 4 bits -> sub-bucket 6
// └------- Bit 4 unset -> super-bucket 0
// timeHistMinBucketBits = 9
// timeHistMaxBucketBits = 48
// timeHistSubBucketBits = 2
//
// 10110
// ^----
// │ ^
// │ └---- Next 4 bits -> sub-bucket 6
// └------- Bit 4 set -> super-bucket 1
// 100010
// ^----^
// │ ^ └-- Lower bits ignored
// │ └---- Next 4 bits -> sub-bucket 1
// └------- Bit 5 set -> super-bucket 2
// Then:
//
// Following this pattern, super-bucket 44 will have the bit 47 set. We don't
// have any buckets for higher values, so the highest sub-bucket will
// contain values of 2^48-1 nanoseconds or approx. 3 days. This range is
// more than enough to handle durations produced by the runtime.
timeHistSubBucketBits = 4
timeHistNumSubBuckets = 1 << timeHistSubBucketBits
timeHistNumSuperBuckets = 45
timeHistTotalBuckets = timeHistNumSuperBuckets*timeHistNumSubBuckets + 1
// 011000001
// ^--
// │ ^
// │ └---- Next 2 bits -> sub-bucket 3
// └------- Bit 9 unset -> bucket 0
//
// 110000001
// ^--
// │ ^
// │ └---- Next 2 bits -> sub-bucket 2
// └------- Bit 9 set -> bucket 1
//
// 1000000010
// ^-- ^
// │ ^ └-- Lower bits ignored
// │ └---- Next 2 bits -> sub-bucket 0
// └------- Bit 10 set -> bucket 2
//
// Following this pattern, bucket 38 will have the bit 46 set. We don't
// have any buckets for higher values, so we spill the rest into an overflow
// bucket containing values of 2^47-1 nanoseconds or approx. 1 day or more.
// This range is more than enough to handle durations produced by the runtime.
timeHistMinBucketBits = 9
timeHistMaxBucketBits = 48 // Note that this is exclusive; 1 higher than the actual range.
timeHistSubBucketBits = 2
timeHistNumSubBuckets = 1 << timeHistSubBucketBits
timeHistNumBuckets = timeHistMaxBucketBits - timeHistMinBucketBits + 1
// Two extra buckets, one for underflow, one for overflow.
timeHistTotalBuckets = timeHistNumBuckets*timeHistNumSubBuckets + 2
)

// timeHistogram represents a distribution of durations in
// nanoseconds.
//
// The accuracy and range of the histogram is defined by the
// timeHistSubBucketBits and timeHistNumSuperBuckets constants.
// timeHistSubBucketBits and timeHistNumBuckets constants.
//
// It is an HDR histogram with exponentially-distributed
// buckets and linearly distributed sub-buckets.
//
// The histogram is safe for concurrent reads and writes.
type timeHistogram struct {
counts [timeHistNumSuperBuckets * timeHistNumSubBuckets]atomic.Uint64
counts [timeHistNumBuckets * timeHistNumSubBuckets]atomic.Uint64

// underflow counts all the times we got a negative duration
// sample. Because of how time works on some platforms, it's
// possible to measure negative durations. We could ignore them,
// but we record them anyway because it's better to have some
// signal that it's happening than just missing samples.
underflow atomic.Uint64

// overflow counts all the times we got a duration that exceeded
// the range counts represents.
overflow atomic.Uint64
}

// record adds the given duration to the distribution.
Expand All @@ -85,36 +103,35 @@ type timeHistogram struct {
//
//go:nosplit
func (h *timeHistogram) record(duration int64) {
// If the duration is negative, capture that in underflow.
if duration < 0 {
h.underflow.Add(1)
return
}
// The index of the exponential bucket is just the index
// of the highest set bit adjusted for how many bits we
// use for the subbucket. Note that it's timeHistSubBucketsBits-1
// because we use the 0th bucket to hold values < timeHistNumSubBuckets.
var superBucket, subBucket uint
if duration >= timeHistNumSubBuckets {
// At this point, we know the duration value will always be
// at least timeHistSubBucketsBits long.
superBucket = uint(sys.Len64(uint64(duration))) - timeHistSubBucketBits
if superBucket*timeHistNumSubBuckets >= uint(len(h.counts)) {
// The bucket index we got is larger than what we support, so
// include this count in the highest bucket, which extends to
// infinity.
superBucket = timeHistNumSuperBuckets - 1
subBucket = timeHistNumSubBuckets - 1
} else {
// The linear subbucket index is just the timeHistSubBucketsBits
// bits after the top bit. To extract that value, shift down
// the duration such that we leave the top bit and the next bits
// intact, then extract the index.
subBucket = uint((duration >> (superBucket - 1)) % timeHistNumSubBuckets)
}
// bucketBit is the target bit for the bucket which is usually the
// highest 1 bit, but if we're less than the minimum, is the highest
// 1 bit of the minimum (which will be zero in the duration).
//
// bucket is the bucket index, which is the bucketBit minus the
// highest bit of the minimum, plus one to leave room for the catch-all
// bucket for samples lower than the minimum.
var bucketBit, bucket uint
if l := sys.Len64(uint64(duration)); l < timeHistMinBucketBits {
bucketBit = timeHistMinBucketBits
bucket = 0 // bucketBit - timeHistMinBucketBits
} else {
subBucket = uint(duration)
bucketBit = uint(l)
bucket = bucketBit - timeHistMinBucketBits + 1
}
// If the bucket we computed is greater than the number of buckets,
// count that in overflow.
if bucket >= timeHistNumBuckets {
h.overflow.Add(1)
return
}
h.counts[superBucket*timeHistNumSubBuckets+subBucket].Add(1)
// The sub-bucket index is just next timeHistSubBucketBits after the bucketBit.
subBucket := uint(duration>>(bucketBit-1-timeHistSubBucketBits)) % timeHistNumSubBuckets
h.counts[bucket*timeHistNumSubBuckets+subBucket].Add(1)
}

const (
Expand All @@ -137,33 +154,37 @@ func float64NegInf() float64 {
// not nanoseconds like the timeHistogram represents durations.
func timeHistogramMetricsBuckets() []float64 {
b := make([]float64, timeHistTotalBuckets+1)
// Underflow bucket.
b[0] = float64NegInf()
// Super-bucket 0 has no bits above timeHistSubBucketBits
// set, so just iterate over each bucket and assign the
// incrementing bucket.
for i := 0; i < timeHistNumSubBuckets; i++ {
bucketNanos := uint64(i)
b[i+1] = float64(bucketNanos) / 1e9

for j := 0; j < timeHistNumSubBuckets; j++ {
// No bucket bit for the first few buckets. Just sub-bucket bits after the
// min bucket bit.
bucketNanos := uint64(j) << (timeHistMinBucketBits - 1 - timeHistSubBucketBits)
// Convert nanoseconds to seconds via a division.
// These values will all be exactly representable by a float64.
b[j+1] = float64(bucketNanos) / 1e9
}
// Generate the rest of the super-buckets. It's easier to reason
// about if we cut out the 0'th bucket, so subtract one since
// we just handled that bucket.
for i := 0; i < timeHistNumSuperBuckets-1; i++ {
// Generate the rest of the buckets. It's easier to reason
// about if we cut out the 0'th bucket.
for i := timeHistMinBucketBits; i < timeHistMaxBucketBits; i++ {
for j := 0; j < timeHistNumSubBuckets; j++ {
// Set the super-bucket bit.
bucketNanos := uint64(1) << (i + timeHistSubBucketBits)
// Set the bucket bit.
bucketNanos := uint64(1) << (i - 1)
// Set the sub-bucket bits.
bucketNanos |= uint64(j) << i
// The index for this bucket is going to be the (i+1)'th super bucket
// (note that we're starting from zero, but handled the first super-bucket
bucketNanos |= uint64(j) << (i - 1 - timeHistSubBucketBits)
// The index for this bucket is going to be the (i+1)'th bucket
// (note that we're starting from zero, but handled the first bucket
// earlier, so we need to compensate), and the j'th sub bucket.
// Add 1 because we left space for -Inf.
bucketIndex := (i+1)*timeHistNumSubBuckets + j + 1
bucketIndex := (i-timeHistMinBucketBits+1)*timeHistNumSubBuckets + j + 1
// Convert nanoseconds to seconds via a division.
// These values will all be exactly representable by a float64.
b[bucketIndex] = float64(bucketNanos) / 1e9
}
}
// Overflow bucket.
b[len(b)-2] = float64(uint64(1)<<(timeHistMaxBucketBits-1)) / 1e9
b[len(b)-1] = float64Inf()
return b
}
Loading

0 comments on commit 87eda2a

Please sign in to comment.