Skip to content

Commit

Permalink
Add flag reporting-zipkin-sample-rate (#7211)
Browse files Browse the repository at this point in the history
### Problem
In the current implementation, every time the pants command is run with zipkin tracing turned on all the zipkin traces will be collected. It is not very convenient when the number of runs is very big. 

### Solution
Possibility to set the sample rate will allow us to have the number of traces that fits the constraints of Zipkin server. 

### Result
A flag `reporting-zipkin-sample-rate` was added that sets the sample rate at which to sample Zipkin traces. If flags `reporting-zipkin-trace-id` and `reporting-zipkin-parent-id` are set the sample rate will always be 100.0 (no matter what is set in `reporting-zipkin-sample-rate` flag).
  • Loading branch information
cattibrie authored and illicitonion committed Feb 6, 2019
1 parent 95638d3 commit b08c1fd
Show file tree
Hide file tree
Showing 3 changed files with 29 additions and 6 deletions.
5 changes: 4 additions & 1 deletion src/python/pants/reporting/reporting.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ def register_options(cls, register):
help='The 64-bit ID for a parent span that invokes Pants. '
'zipkin-trace-id and zipkin-parent-id must both either be set or not set '
'when run Pants command')
register('--zipkin-sample-rate', advanced=True, default=100.0,
help='Rate at which to sample Zipkin traces. Value 0.0 - 100.0.')

def initialize(self, run_tracker, all_options, start_time=None):
"""Initialize with the given RunTracker.
Expand Down Expand Up @@ -100,6 +102,7 @@ def initialize(self, run_tracker, all_options, start_time=None):
zipkin_endpoint = self.get_options().zipkin_endpoint
trace_id = self.get_options().zipkin_trace_id
parent_id = self.get_options().zipkin_parent_id
sample_rate = self.get_options().zipkin_sample_rate

if zipkin_endpoint is None and trace_id is not None and parent_id is not None:
raise ValueError(
Expand All @@ -113,7 +116,7 @@ def initialize(self, run_tracker, all_options, start_time=None):
if zipkin_endpoint is not None:
zipkin_reporter_settings = ZipkinReporter.Settings(log_level=Report.INFO)
zipkin_reporter = ZipkinReporter(
run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id
run_tracker, zipkin_reporter_settings, zipkin_endpoint, trace_id, parent_id, sample_rate
)
report.add_reporter('zipkin', zipkin_reporter)

Expand Down
13 changes: 8 additions & 5 deletions src/python/pants/reporting/zipkin_reporter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
from py_zipkin import Encoding
from py_zipkin.transport import BaseTransportHandler
from py_zipkin.util import generate_random_64bit_string
from py_zipkin.zipkin import ZipkinAttrs, zipkin_span
from py_zipkin.zipkin import ZipkinAttrs, create_attrs_for_span, zipkin_span

from pants.base.workunit import WorkUnitLabel
from pants.reporting.reporter import Reporter
Expand Down Expand Up @@ -42,7 +42,7 @@ class ZipkinReporter(Reporter):
Reporter that implements Zipkin tracing.
"""

def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id, sample_rate):
"""
When trace_id and parent_id are set a Zipkin trace will be created with given trace_id
and parent_id. If trace_id and parent_id are set to None, a trace_id will be randomly
Expand All @@ -53,6 +53,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
:param string endpoint: The full HTTP URL of a zipkin server to which traces should be posted.
:param string trace_id: The overall 64 or 128-bit ID of the trace. May be None.
:param string parent_id: The 64-bit ID for a parent span that invokes Pants. May be None.
:param float sample_rate: Rate at which to sample Zipkin traces. Value 0.0 - 100.0.
"""
super(ZipkinReporter, self).__init__(run_tracker, settings)
# We keep track of connection between workunits and spans
Expand All @@ -61,6 +62,7 @@ def __init__(self, run_tracker, settings, endpoint, trace_id, parent_id):
self.handler = HTTPTransportHandler(endpoint)
self.trace_id = trace_id
self.parent_id = parent_id
self.sample_rate = float(sample_rate)

def start_workunit(self, workunit):
"""Implementation of Reporter callback."""
Expand All @@ -84,13 +86,14 @@ def start_workunit(self, workunit):
is_sampled=True,
)
else:
zipkin_attrs = None
zipkin_attrs = create_attrs_for_span(
sample_rate=self.sample_rate, # Value between 0.0 and 100.0
)

span = zipkin_span(
service_name=service_name,
span_name=workunit.name,
transport_handler=self.handler,
sample_rate=100.0, # Value between 0.0 and 100.0
encoding=Encoding.V1_THRIFT,
zipkin_attrs=zipkin_attrs
)
Expand All @@ -104,7 +107,7 @@ def start_workunit(self, workunit):
# Goals and tasks save their start time at the beginning of their run.
# This start time is passed to workunit, because the workunit may be created much later.
span.start_timestamp = workunit.start_time
if first_span:
if first_span and span.zipkin_attrs.is_sampled:
span.logging_context.start_timestamp = workunit.start_time

def end_workunit(self, workunit):
Expand Down
17 changes: 17 additions & 0 deletions tests/python/pants_test/reporting/test_reporting_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -220,6 +220,23 @@ def test_zipkin_reporter_with_given_trace_id_parent_id(self):
self.assertTrue(main_children)
self.assertTrue(any(span['name'] == 'cloc' for span in main_children))

def test_zipkin_reporter_with_zero_sample_rate(self):
ZipkinHandler = zipkin_handler()
with http_server(ZipkinHandler) as port:
endpoint = "http://localhost:{}".format(port)
command = [
'--reporting-zipkin-endpoint={}'.format(endpoint),
'--reporting-zipkin-sample-rate=0.0',
'cloc',
'src/python/pants:version'
]

pants_run = self.run_pants(command)
self.assert_success(pants_run)

num_of_traces = len(ZipkinHandler.traces)
self.assertEqual(num_of_traces, 0)

@staticmethod
def find_spans_by_name(trace, name):
return [span for span in trace if span['name'] == name]
Expand Down

0 comments on commit b08c1fd

Please sign in to comment.