From d2d30eda3e195ed5ab05f63fd8389e2463a57662 Mon Sep 17 00:00:00 2001 From: Will Scott Date: Sat, 18 Feb 2023 19:30:58 -0800 Subject: [PATCH 1/2] add latency / size metrics for error cases --- pool.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/pool.go b/pool.go index 5ab9379..d0fe2d8 100644 --- a/pool.go +++ b/pool.go @@ -349,7 +349,7 @@ func (p *pool) doFetch(ctx context.Context, from string, c cid.Cid) (b blocks.Bl requestId := uuid.NewString() goLogger.Debugw("doing fetch", "from", from, "of", c, "requestId", requestId) start := time.Now() - fb := time.Now() + fb := time.Unix(0, 0) code := 0 proto := "unknown" respReq := &http.Request{} @@ -357,8 +357,10 @@ func (p *pool) doFetch(ctx context.Context, from string, c cid.Cid) (b blocks.Bl defer func() { goLogger.Debugw("fetch result", "from", from, "of", c, "status", code, "size", received, "ttfb", int(fb.Sub(start).Milliseconds()), "duration", time.Since(start).Seconds()) fetchResponseMetric.WithLabelValues(fmt.Sprintf("%d", code)).Add(1) - if e == nil { + if fb.After(start) { fetchLatencyMetric.Observe(float64(fb.Sub(start).Milliseconds())) + } + if received > 0 { fetchSpeedMetric.Observe(float64(received) / time.Since(start).Seconds()) fetchSizeMetric.Observe(float64(received)) } From 05bb6a725b3e0f0888f818c263daeb2000fb3188 Mon Sep 17 00:00:00 2001 From: Marcin Rataj Date: Mon, 20 Feb 2023 01:00:24 +0100 Subject: [PATCH 2/2] refactor: lower cost of metrics and logs --- pool.go | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/pool.go b/pool.go index d0fe2d8..198569f 100644 --- a/pool.go +++ b/pool.go @@ -240,10 +240,7 @@ func (p *pool) fetchWith(ctx context.Context, c cid.Cid, with string) (blk block } for i := 0; i < len(nodes); i++ { - blk, err = p.doFetch(ctx, nodes[i], c) - if err != nil { - goLogger.Debugw("fetch failed", "from", nodes[i], "of", c, "attempt", i, "error", err) - } + blk, err = p.doFetch(ctx, nodes[i], c, i) var idx int var nm *Member @@ -345,7 +342,7 @@ func (p *pool) updateWeightUnlocked(node string, failure bool) (index int, membe var saturnReqTmpl = "https://%s/ipfs/%s?format=raw" // doFetch attempts to fetch a block from a given Saturn endpoint. It sends the retrieval logs to the logging endpoint upon a successful or failed attempt. -func (p *pool) doFetch(ctx context.Context, from string, c cid.Cid) (b blocks.Block, e error) { +func (p *pool) doFetch(ctx context.Context, from string, c cid.Cid, attempt int) (b blocks.Block, e error) { requestId := uuid.NewString() goLogger.Debugw("doing fetch", "from", from, "of", c, "requestId", requestId) start := time.Now() @@ -355,13 +352,15 @@ func (p *pool) doFetch(ctx context.Context, from string, c cid.Cid) (b blocks.Bl respReq := &http.Request{} received := 0 defer func() { - goLogger.Debugw("fetch result", "from", from, "of", c, "status", code, "size", received, "ttfb", int(fb.Sub(start).Milliseconds()), "duration", time.Since(start).Seconds()) + ttfbMs := fb.Sub(start).Milliseconds() + durationSecs := time.Since(start).Seconds() + goLogger.Debugw("fetch result", "from", from, "of", c, "status", code, "size", received, "ttfb", int(ttfbMs), "duration", durationSecs, "attempt", attempt, "error", e) fetchResponseMetric.WithLabelValues(fmt.Sprintf("%d", code)).Add(1) if fb.After(start) { - fetchLatencyMetric.Observe(float64(fb.Sub(start).Milliseconds())) + fetchLatencyMetric.Observe(float64(ttfbMs)) } if received > 0 { - fetchSpeedMetric.Observe(float64(received) / time.Since(start).Seconds()) + fetchSpeedMetric.Observe(float64(received) / durationSecs) fetchSizeMetric.Observe(float64(received)) } p.logger.queue <- log{ @@ -370,11 +369,11 @@ func (p *pool) doFetch(ctx context.Context, from string, c cid.Cid) (b blocks.Bl LocalTime: start, // TODO: does this include header sizes? NumBytesSent: received, - RequestDuration: time.Since(start).Seconds(), + RequestDuration: durationSecs, RequestID: requestId, HTTPStatusCode: code, HTTPProtocol: proto, - TTFBMS: int(fb.Sub(start).Milliseconds()), + TTFBMS: int(ttfbMs), // my address ClientAddress: "", Range: "",