Skip to content

Commit

Permalink
SQLAlchemy: Use SQL operation and DB name as the Span name (open-tele…
Browse files Browse the repository at this point in the history
…metry#254)

Current instrumentation uses the entire SQL query as the operation name
which makes traces very hard to read and understand in addition to
introducing high-cardinality issues. This commit fixes the problem by
using only the SQL operation name and the DB name instead of the entire
query.
  • Loading branch information
owais committed Jan 20, 2021
1 parent d12f67f commit 8c8f278
Show file tree
Hide file tree
Showing 8 changed files with 59 additions and 41 deletions.
4 changes: 3 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Changed
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-wsgi` Return `None` for `CarrierGetter` if key not found
([#1374](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233))
([#233](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/233))
- `opentelemetry-instrumentation-grpc` Comply with updated spec, rework tests
([#236](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/236))
- `opentelemetry-instrumentation-asgi`, `opentelemetry-instrumentation-falcon`, `opentelemetry-instrumentation-flask`, `opentelemetry-instrumentation-pyramid`, `opentelemetry-instrumentation-wsgi` Renamed `host.port` attribute to `net.host.port`
Expand All @@ -57,6 +57,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235))
- `opentelemetry-exporter-datadog` `opentelemetry-sdk-extension-aws` Fix reference to ids_generator in sdk
([#235](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/235))
- `opentelemetry-instrumentation-sqlalchemy` Use SQL operation and DB name as span name.
([#254](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/254))

## [0.16b1](https://github.com/open-telemetry/opentelemetry-python-contrib/releases/tag/v0.16b1) - 2020-11-26

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.

from sqlalchemy.event import listen
from sqlalchemy.event import listen # pylint: disable=no-name-in-module

from opentelemetry import trace
from opentelemetry.instrumentation.sqlalchemy.version import __version__
Expand Down Expand Up @@ -72,22 +72,38 @@ def __init__(self, tracer, engine):
listen(engine, "after_cursor_execute", self._after_cur_exec)
listen(engine, "handle_error", self._handle_error)

def _operation_name(self, db_name, statement):
parts = []
if isinstance(statement, str):
# otel spec recommends against parsing SQL queries. We are not trying to parse SQL
# but simply truncating the statement to the first word. This covers probably >95%
# use cases and uses the SQL statement in span name correctly as per the spec.
# For some very special cases it might not record the correct statement if the SQL
# dialect is too weird but in any case it shouldn't break anything.
parts.append(statement.split()[0])
if db_name:
parts.append(db_name)
if not parts:
return self.vendor
return " ".join(parts)

# pylint: disable=unused-argument
def _before_cur_exec(self, conn, cursor, statement, *args):
attrs, found = _get_attributes_from_url(conn.engine.url)
if not found:
attrs = _get_attributes_from_cursor(self.vendor, cursor, attrs)

db_name = attrs.get(_DB, "")
self.current_span = self.tracer.start_span(
statement, kind=trace.SpanKind.CLIENT
self._operation_name(db_name, statement),
kind=trace.SpanKind.CLIENT,
)
with self.tracer.use_span(self.current_span, end_on_exit=False):
if self.current_span.is_recording():
self.current_span.set_attribute(_STMT, statement)
self.current_span.set_attribute("db.system", self.vendor)

if not _set_attributes_from_url(
self.current_span, conn.engine.url
):
_set_attributes_from_cursor(
self.current_span, self.vendor, cursor
)
for key, value in attrs.items():
self.current_span.set_attribute(key, value)

# pylint: disable=unused-argument
def _after_cur_exec(self, conn, cursor, statement, *args):
Expand All @@ -108,25 +124,22 @@ def _handle_error(self, context):
self.current_span.end()


def _set_attributes_from_url(span: trace.Span, url):
def _get_attributes_from_url(url):
"""Set connection tags from the url. return true if successful."""
if span.is_recording():
if url.host:
span.set_attribute(_HOST, url.host)
if url.port:
span.set_attribute(_PORT, url.port)
if url.database:
span.set_attribute(_DB, url.database)
if url.username:
span.set_attribute(_USER, url.username)

return bool(url.host)


def _set_attributes_from_cursor(span: trace.Span, vendor, cursor):
attrs = {}
if url.host:
attrs[_HOST] = url.host
if url.port:
attrs[_PORT] = url.port
if url.database:
attrs[_DB] = url.database
if url.username:
attrs[_USER] = url.username
return attrs, bool(url.host)


def _get_attributes_from_cursor(vendor, cursor, attrs):
"""Attempt to set db connection attributes by introspecting the cursor."""
if not span.is_recording():
return
if vendor == "postgresql":
# pylint: disable=import-outside-toplevel
from psycopg2.extensions import parse_dsn
Expand All @@ -135,6 +148,7 @@ def _set_attributes_from_cursor(span: trace.Span, vendor, cursor):
dsn = getattr(cursor.connection, "dsn", None)
if dsn:
data = parse_dsn(dsn)
span.set_attribute(_DB, data.get("dbname"))
span.set_attribute(_HOST, data.get("host"))
span.set_attribute(_PORT, int(data.get("port")))
attrs[_DB] = data.get("dbname")
attrs[_HOST] = data.get("host")
attrs[_PORT] = int(data.get("port"))
return attrs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def test_trace_integration(self):
spans = self.memory_exporter.get_finished_spans()

self.assertEqual(len(spans), 1)
self.assertEqual(spans[0].name, "SELECT 1 + 1;")
self.assertEqual(spans[0].name, "SELECT :memory:")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)

def test_not_recording(self):
Expand Down Expand Up @@ -68,5 +68,5 @@ def test_create_engine_wrapper(self):
spans = self.memory_exporter.get_finished_spans()

self.assertEqual(len(spans), 1)
self.assertEqual(spans[0].name, "SELECT 1 + 1;")
self.assertEqual(spans[0].name, "SELECT :memory:")
self.assertEqual(spans[0].kind, trace.SpanKind.CLIENT)
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def tearDown(self):
super().tearDown()

def _check_span(self, span, name):
if self.SQL_DB:
name = "{0} {1}".format(name, self.SQL_DB)
self.assertEqual(span.name, name)
self.assertEqual(span.attributes.get(_DB), self.SQL_DB)
self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET)
Expand All @@ -129,7 +131,7 @@ def test_orm_insert(self):
stmt += "(?, ?)"
else:
stmt += "(%(id)s, %(name)s)"
self._check_span(span, stmt)
self._check_span(span, "INSERT")
self.assertIn("INSERT INTO players", span.attributes.get(_STMT))
self.check_meta(span)

Expand All @@ -146,7 +148,7 @@ def test_session_query(self):
stmt += "?"
else:
stmt += "%(name_1)s"
self._check_span(span, stmt)
self._check_span(span, "SELECT")
self.assertIn(
"SELECT players.id AS players_id, players.name AS players_name \nFROM players \nWHERE players.name",
span.attributes.get(_STMT),
Expand All @@ -163,7 +165,7 @@ def test_engine_connect_execute(self):
spans = self.memory_exporter.get_finished_spans()
self.assertEqual(len(spans), 1)
span = spans[0]
self._check_span(span, stmt)
self._check_span(span, "SELECT")
self.assertEqual(span.attributes.get(_STMT), "SELECT * FROM players")
self.check_meta(span)

Expand All @@ -188,4 +190,4 @@ def test_parent(self):
self.assertEqual(parent_span.name, "sqlalch_op")
self.assertEqual(parent_span.instrumentation_info.name, "sqlalch_svc")

self.assertEqual(child_span.name, stmt)
self.assertEqual(child_span.name, "SELECT " + self.SQL_DB)
Original file line number Diff line number Diff line change
Expand Up @@ -56,14 +56,14 @@ def tearDown(self):

def test_engine_traced(self):
# ensures that the engine is traced
rows = self.conn.execute("SELECT 1").fetchall()
rows = self.conn.execute("SELECT").fetchall()
self.assertEqual(len(rows), 1)

traces = self.memory_exporter.get_finished_spans()
# trace composition
self.assertEqual(len(traces), 1)
span = traces[0]
# check subset of span fields
self.assertEqual(span.name, "SELECT 1")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertIs(span.status.status_code, trace.status.StatusCode.UNSET)
self.assertGreater((span.end_time - span.start_time), 0)
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def test_engine_execute_errors(self):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, "SELECT * FROM a_wrong_table")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def test_engine_execute_errors(self):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, "SELECT * FROM a_wrong_table")
self.assertEqual(span.name, "SELECT opentelemetry-tests")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def test_engine_execute_errors(self):
self.assertEqual(len(spans), 1)
span = spans[0]
# span fields
self.assertEqual(span.name, stmt)
self.assertEqual(span.name, "SELECT :memory:")
self.assertEqual(
span.attributes.get(_STMT), "SELECT * FROM a_wrong_table"
)
Expand Down

0 comments on commit 8c8f278

Please sign in to comment.