Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add flag reporting-zipkin-sample-rate that sets the sample rate at which to sample Zipkin traces #7211

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion src/python/pants/reporting/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def register_options(cls, register):
help='The 64-bit ID for a parent span that invokes Pants. '
'zipkin-trace-id and zipkin-parent-id must both either be set or not set '
'when run Pants command')
register('--zipkin-sample-rate', advanced=True, default=100.0,
help='Rate at which to sample Zipkin traces. Value 0.0 - 100.0.')

def initialize(self, run_tracker, all_options, start_time=None):
"""Initialize with the given RunTracker.
Expand Down Expand Up @@ -100,6 +102,7 @@ def initialize(self, run_tracker, all_options, start_time=None):
zipkin_endpoint = self.get_options().zipkin_endpoint
trace_id = self.get_options().zipkin_trace_id
parent_id = self.get_options().zipkin_parent_id
sample_rate = self.get_options().zipkin_sample_rate

if zipkin_endpoint is None and trace_id is not None and parent_id is not None:
raise ValueError(
Expand All @@ -113,7 +116,7 @@ def initialize(self, run_tracker, all_options, start_time=None):
if zipkin_endpoint is not None:
zipkin_reporter_settings = ZipkinReporter.Settings(log_level=Report.INFO)
zipkin_reporter = ZipkinReporter(
run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id
run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id, sample_rate
)
report.add_reporter('zipkin', zipkin_reporter)

Expand Down
13 changes: 8 additions & 5 deletions src/python/pants/reporting/zipkin_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from py_zipkin import Encoding
from py_zipkin.transport import BaseTransportHandler
from py_zipkin.util import generate_random_64bit_string
from py_zipkin.zipkin import ZipkinAttrs, zipkin_span
from py_zipkin.zipkin import ZipkinAttrs, create_attrs_for_span, zipkin_span

from pants.base.workunit import WorkUnitLabel
from pants.reporting.reporter import Reporter
Expand Down Expand Up @@ -42,7 +42,7 @@ class ZipkinReporter(Reporter):
Reporter that implements Zipkin tracing.
"""

def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id, sample_rate):
"""
When trace_id and parent_id are set a Zipkin trace will be created with given trace_id
and parent_id. If trace_id and parent_id are set to None, a trace_id will be randomly
Expand All @@ -53,6 +53,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
:param string endpoint: The full HTTP URL of a zipkin server to which traces should be posted.
:param string trace_id: The overall 64 or 128-bit ID of the trace. May be None.
:param string parent_id: The 64-bit ID for a parent span that invokes Pants. May be None.
:param float sample_rate: Rate at which to sample Zipkin traces. Value 0.0 - 100.0.
"""
super(ZipkinReporter, self).__init__(run_tracker, settings)
# We keep track of connection between workunits and spans
Expand All @@ -61,6 +62,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
self.handler = HTTPTransportHandler(endpoint)
self.trace_id = trace_id
self.parent_id = parent_id
self.sample_rate = float(sample_rate)

def start_workunit(self, workunit):
"""Implementation of Reporter callback."""
Expand All @@ -84,13 +86,14 @@ def start_workunit(self, workunit):
is_sampled=True,
)
else:
zipkin_attrs = None
zipkin_attrs = create_attrs_for_span(
sample_rate=self.sample_rate, # Value between 0.0 and 100.0
)

span = zipkin_span(
service_name=service_name,
span_name=workunit.name,
transport_handler=self.handler,
sample_rate=100.0, # Value between 0.0 and 100.0
encoding=Encoding.V1_THRIFT,
zipkin_attrs=zipkin_attrs
)
Expand All @@ -104,7 +107,7 @@ def start_workunit(self, workunit):
# Goals and tasks save their start time at the beginning of their run.
# This start time is passed to workunit, because the workunit may be created much later.
span.start_timestamp = workunit.start_time
if first_span:
if first_span and span.zipkin_attrs.is_sampled:
span.logging_context.start_timestamp = workunit.start_time

def end_workunit(self, workunit):
Expand Down
17 changes: 17 additions & 0 deletions tests/python/pants_test/reporting/test_reporting_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,23 @@ def test_zipkin_reporter_with_given_trace_id_parent_id(self):
self.assertTrue(main_children)
self.assertTrue(any(span['name'] == 'cloc' for span in main_children))

def test_zipkin_reporter_with_zero_sample_rate(self):
ZipkinHandler = zipkin_handler()
with http_server(ZipkinHandler) as port:
endpoint = "http://localhost:{}".format(port)
command = [
'--reporting-zipkin-endpoint={}'.format(endpoint),
'--reporting-zipkin-sample-rate=0.0',
'cloc',
'src/python/pants:version'
]

pants_run = self.run_pants(command)
self.assert_success(pants_run)

num_of_traces = len(ZipkinHandler.traces)
self.assertEqual(num_of_traces, 0)

@staticmethod
def find_spans_by_name(trace, name):
return [span for span in trace if span['name'] == name]
Expand Down