Skip to content

Commit

Permalink
Expose GRPC KeepAlive parameters to triton server (triton-inference-s…
Browse files Browse the repository at this point in the history
…erver#3089)

* Add GRPC KeepAlive parameters to GRPC server class, constructor, and builder with documented default values

* Add GRPC KeepAlive parameters to tritonserver CLI

* Run clang-format-6.0, remove TODO

* Add keepalive parameters to GRPCServer::Create(); Add reference link for keepalive default values; Add default values to tritonserver option help strings.

* Address review comments, (1) encapsulate keepalive params in KeepAliveOptions struct, (2) remove client mention in server-side arg help strings, (3) update copyright headers
  • Loading branch information
rmccorm4 committed Jul 19, 2021
1 parent d98e3c3 commit 6cb7472
Show file tree
Hide file tree
Showing 3 changed files with 138 additions and 17 deletions.
53 changes: 46 additions & 7 deletions src/servers/grpc_server.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -3920,10 +3920,10 @@ ModelStreamInferHandler::StreamInferResponseComplete(
response->set_error_message(status.error_message());

// repopulate the id so that client knows which request failed.
const char *id;
const char* id;
LOG_TRITONSERVER_ERROR(
TRITONSERVER_InferenceResponseId(iresponse, &id),
"couldn't retrieve id for failed request");
TRITONSERVER_InferenceResponseId(iresponse, &id),
"couldn't retrieve id for failed request");
LOG_VERBOSE(1) << "Failed for ID: " << id << std::endl;
response->mutable_infer_response()->set_id(id);
}
Expand Down Expand Up @@ -3977,11 +3977,13 @@ GRPCServer::GRPCServer(
const std::shared_ptr<SharedMemoryManager>& shm_manager,
const std::string& server_addr, bool use_ssl, const SslOptions& ssl_options,
const int infer_allocation_pool_size,
grpc_compression_level compression_level)
grpc_compression_level compression_level,
const KeepAliveOptions& keepalive_options)
: server_(server), trace_manager_(trace_manager), shm_manager_(shm_manager),
server_addr_(server_addr), use_ssl_(use_ssl), ssl_options_(ssl_options),
infer_allocation_pool_size_(infer_allocation_pool_size),
compression_level_(compression_level), running_(false)
compression_level_(compression_level),
keepalive_options_(keepalive_options), running_(false)
{
}

Expand All @@ -3997,12 +3999,13 @@ GRPCServer::Create(
const std::shared_ptr<SharedMemoryManager>& shm_manager, int32_t port,
bool use_ssl, const SslOptions& ssl_options, int infer_allocation_pool_size,
grpc_compression_level compression_level,
const KeepAliveOptions& keepalive_options,
std::unique_ptr<GRPCServer>* grpc_server)
{
const std::string addr = "0.0.0.0:" + std::to_string(port);
grpc_server->reset(new GRPCServer(
server, trace_manager, shm_manager, addr, use_ssl, ssl_options,
infer_allocation_pool_size, compression_level));
infer_allocation_pool_size, compression_level, keepalive_options));

return nullptr; // success
}
Expand Down Expand Up @@ -4039,6 +4042,42 @@ GRPCServer::Start()
grpc_builder_.AddListeningPort(server_addr_, credentials);
grpc_builder_.SetMaxMessageSize(MAX_GRPC_MESSAGE_SIZE);
grpc_builder_.RegisterService(&service_);
// GRPC KeepAlive Docs: https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
// NOTE: In order to work properly, the client-side settings should
// be in agreement with server-side settings.
grpc_builder_.AddChannelArgument(
GRPC_ARG_KEEPALIVE_TIME_MS, keepalive_options_.keepalive_time_ms);
grpc_builder_.AddChannelArgument(
GRPC_ARG_KEEPALIVE_TIMEOUT_MS, keepalive_options_.keepalive_timeout_ms);
grpc_builder_.AddChannelArgument(
GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
keepalive_options_.keepalive_permit_without_calls);
grpc_builder_.AddChannelArgument(
GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
keepalive_options_.http2_max_pings_without_data);
grpc_builder_.AddChannelArgument(
GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
keepalive_options_.http2_min_recv_ping_interval_without_data_ms);
grpc_builder_.AddChannelArgument(
GRPC_ARG_HTTP2_MAX_PING_STRIKES,
keepalive_options_.http2_max_ping_strikes);

LOG_VERBOSE(1) << "=== GRPC KeepAlive Options ===";
LOG_VERBOSE(1) << "keepalive_time_ms: "
<< keepalive_options_.keepalive_time_ms;
LOG_VERBOSE(1) << "keepalive_timeout_ms: "
<< keepalive_options_.keepalive_timeout_ms;
LOG_VERBOSE(1) << "keepalive_permit_without_calls: "
<< keepalive_options_.keepalive_permit_without_calls;
LOG_VERBOSE(1) << "http2_max_pings_without_data: "
<< keepalive_options_.http2_max_pings_without_data;
LOG_VERBOSE(1)
<< "http2_min_recv_ping_interval_without_data_ms: "
<< keepalive_options_.http2_min_recv_ping_interval_without_data_ms;
LOG_VERBOSE(1) << "http2_max_ping_strikes: "
<< keepalive_options_.http2_max_ping_strikes;
LOG_VERBOSE(1) << "==============================";

common_cq_ = grpc_builder_.AddCompletionQueue();
model_infer_cq_ = grpc_builder_.AddCompletionQueue();
model_stream_infer_cq_ = grpc_builder_.AddCompletionQueue();
Expand Down
25 changes: 23 additions & 2 deletions src/servers/grpc_server.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2019-2020, NVIDIA CORPORATION. All rights reserved.
// Copyright 2019-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -45,6 +45,23 @@ struct SslOptions {
bool use_mutual_auth;
};

// GRPC KeepAlive: https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
struct KeepAliveOptions {
explicit KeepAliveOptions()
: keepalive_time_ms(7200000), keepalive_timeout_ms(20000),
keepalive_permit_without_calls(false), http2_max_pings_without_data(2),
http2_min_recv_ping_interval_without_data_ms(300000),
http2_max_ping_strikes(2)
{
}
int keepalive_time_ms;
int keepalive_timeout_ms;
bool keepalive_permit_without_calls;
int http2_max_pings_without_data;
int http2_min_recv_ping_interval_without_data_ms;
int http2_max_ping_strikes;
};

class GRPCServer {
public:
static TRITONSERVER_Error* Create(
Expand All @@ -53,6 +70,7 @@ class GRPCServer {
const std::shared_ptr<SharedMemoryManager>& shm_manager, int32_t port,
bool use_ssl, const SslOptions& ssl_options,
int infer_allocation_pool_size, grpc_compression_level compression_level,
const KeepAliveOptions& keepalive_options,
std::unique_ptr<GRPCServer>* grpc_server);

~GRPCServer();
Expand Down Expand Up @@ -81,7 +99,8 @@ class GRPCServer {
const std::shared_ptr<SharedMemoryManager>& shm_manager,
const std::string& server_addr, bool use_ssl,
const SslOptions& ssl_options, const int infer_allocation_pool_size,
grpc_compression_level compression_level);
grpc_compression_level compression_level,
const KeepAliveOptions& keepalive_options);

std::shared_ptr<TRITONSERVER_Server> server_;
TraceManager* trace_manager_;
Expand All @@ -93,6 +112,8 @@ class GRPCServer {
const int infer_allocation_pool_size_;
grpc_compression_level compression_level_;

const KeepAliveOptions keepalive_options_;

std::unique_ptr<grpc::ServerCompletionQueue> common_cq_;
std::unique_ptr<grpc::ServerCompletionQueue> model_infer_cq_;
std::unique_ptr<grpc::ServerCompletionQueue> model_stream_infer_cq_;
Expand Down
77 changes: 69 additions & 8 deletions src/servers/main.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright (c) 2018-2021, NVIDIA CORPORATION. All rights reserved.
// Copyright 2018-2021, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -94,6 +94,8 @@ bool grpc_use_ssl_ = false;
nvidia::inferenceserver::SslOptions grpc_ssl_options_;
grpc_compression_level grpc_response_compression_level_ =
GRPC_COMPRESS_LEVEL_NONE;
// KeepAlive defaults: https://grpc.github.io/grpc/cpp/md_doc_keepalive.html
nvidia::inferenceserver::KeepAliveOptions grpc_keepalive_options_;
#endif // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
Expand Down Expand Up @@ -221,6 +223,12 @@ enum OptionId {
OPTION_GRPC_SERVER_KEY,
OPTION_GRPC_ROOT_CERT,
OPTION_GRPC_RESPONSE_COMPRESSION_LEVEL,
OPTION_GRPC_ARG_KEEPALIVE_TIME_MS,
OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS,
OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES,
#endif // TRITON_ENABLE_GRPC
#if defined(TRITON_ENABLE_SAGEMAKER)
OPTION_ALLOW_SAGEMAKER,
Expand Down Expand Up @@ -350,6 +358,38 @@ std::vector<Option> options_
"The compression level to be used while returning the infer response to "
"the peer. Allowed values are none, low, medium and high. By default, "
"compression level is selected as none."},
{OPTION_GRPC_ARG_KEEPALIVE_TIME_MS, "grpc-keepalive-time", Option::ArgInt,
"The period (in milliseconds) after which a keepalive ping is sent on "
"the transport. Default is 7200000 (2 hours)."},
{OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS, "grpc-keepalive-timeout",
Option::ArgInt,
"The period (in milliseconds) the sender of the keepalive ping waits "
"for an acknowledgement. If it does not receive an acknowledgment "
"within this time, it will close the connection. "
"Default is 20000 (20 seconds)."},
{OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS,
"grpc-keepalive-permit-without-calls", Option::ArgBool,
"Allows keepalive pings to be sent even if there are no calls in flight "
"(0 : false; 1 : true). Default is 0 (false)."},
{OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA,
"grpc-http2-max-pings-without-data", Option::ArgInt,
"The maximum number of pings that can be sent when there is no "
"data/header frame to be sent. gRPC Core will not continue sending "
"pings if we run over the limit. Setting it to 0 allows sending pings "
"without such a restriction. Default is 2."},
{OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS,
"grpc-http2-min-recv-ping-interval-without-data", Option::ArgInt,
"If there are no data/header frames being sent on the transport, this "
"channel argument on the server side controls the minimum time "
"(in milliseconds) that gRPC Core would expect between receiving "
"successive pings. If the time between successive pings is less than "
"this time, then the ping will be considered a bad ping from the peer. "
"Such a ping counts as a ‘ping strike’. Default is 300000 (5 minutes)."},
{OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES, "grpc-http2-max-ping-strikes",
Option::ArgInt,
"Maximum number of bad pings that the server will tolerate before "
"sending an HTTP2 GOAWAY frame and closing the transport. Setting it to "
"0 allows the server to accept any number of bad pings. Default is 2."},
#endif // TRITON_ENABLE_GRPC
#if defined(TRITON_ENABLE_SAGEMAKER)
{OPTION_ALLOW_SAGEMAKER, "allow-sagemaker", Option::ArgBool,
Expand Down Expand Up @@ -554,7 +594,7 @@ StartGrpcService(
TRITONSERVER_Error* err = nvidia::inferenceserver::GRPCServer::Create(
server, trace_manager, shm_manager, grpc_port_, grpc_use_ssl_,
grpc_ssl_options_, grpc_infer_allocation_pool_size_,
grpc_response_compression_level_, service);
grpc_response_compression_level_, grpc_keepalive_options_, service);
if (err == nullptr) {
err = (*service)->Start();
}
Expand Down Expand Up @@ -1048,7 +1088,7 @@ Parse(TRITONSERVER_ServerOptions** server_options, int argc, char** argv)
int32_t grpc_port = grpc_port_;
int32_t grpc_use_ssl = grpc_use_ssl_;
int32_t grpc_infer_allocation_pool_size = grpc_infer_allocation_pool_size_;
grpc_compression_level grpc_response_comression_level =
grpc_compression_level grpc_response_compression_level =
grpc_response_compression_level_;
#endif // TRITON_ENABLE_GRPC

Expand Down Expand Up @@ -1184,13 +1224,13 @@ Parse(TRITONSERVER_ServerOptions** server_options, int argc, char** argv)
std::transform(
mode_str.begin(), mode_str.end(), mode_str.begin(), ::tolower);
if (mode_str == "none") {
grpc_response_comression_level = GRPC_COMPRESS_LEVEL_NONE;
grpc_response_compression_level = GRPC_COMPRESS_LEVEL_NONE;
} else if (mode_str == "low") {
grpc_response_comression_level = GRPC_COMPRESS_LEVEL_LOW;
grpc_response_compression_level = GRPC_COMPRESS_LEVEL_LOW;
} else if (mode_str == "medium") {
grpc_response_comression_level = GRPC_COMPRESS_LEVEL_MED;
grpc_response_compression_level = GRPC_COMPRESS_LEVEL_MED;
} else if (mode_str == "high") {
grpc_response_comression_level = GRPC_COMPRESS_LEVEL_HIGH;
grpc_response_compression_level = GRPC_COMPRESS_LEVEL_HIGH;
} else {
std::cerr
<< "invalid argument for --grpc_infer_response_compression_level"
Expand All @@ -1200,6 +1240,27 @@ Parse(TRITONSERVER_ServerOptions** server_options, int argc, char** argv)
}
break;
}
case OPTION_GRPC_ARG_KEEPALIVE_TIME_MS:
grpc_keepalive_options_.keepalive_time_ms = ParseIntOption(optarg);
break;
case OPTION_GRPC_ARG_KEEPALIVE_TIMEOUT_MS:
grpc_keepalive_options_.keepalive_timeout_ms = ParseIntOption(optarg);
break;
case OPTION_GRPC_ARG_KEEPALIVE_PERMIT_WITHOUT_CALLS:
grpc_keepalive_options_.keepalive_permit_without_calls =
ParseBoolOption(optarg);
break;
case OPTION_GRPC_ARG_HTTP2_MAX_PINGS_WITHOUT_DATA:
grpc_keepalive_options_.http2_max_pings_without_data =
ParseIntOption(optarg);
break;
case OPTION_GRPC_ARG_HTTP2_MIN_RECV_PING_INTERVAL_WITHOUT_DATA_MS:
grpc_keepalive_options_.http2_min_recv_ping_interval_without_data_ms =
ParseIntOption(optarg);
break;
case OPTION_GRPC_ARG_HTTP2_MAX_PING_STRIKES:
grpc_keepalive_options_.http2_max_ping_strikes = ParseIntOption(optarg);
break;
#endif // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
Expand Down Expand Up @@ -1316,7 +1377,7 @@ Parse(TRITONSERVER_ServerOptions** server_options, int argc, char** argv)
grpc_port_ = grpc_port;
grpc_infer_allocation_pool_size_ = grpc_infer_allocation_pool_size;
grpc_use_ssl_ = grpc_use_ssl;
grpc_response_compression_level_ = grpc_response_comression_level;
grpc_response_compression_level_ = grpc_response_compression_level;
#endif // TRITON_ENABLE_GRPC

#ifdef TRITON_ENABLE_METRICS
Expand Down

0 comments on commit 6cb7472

Please sign in to comment.