API: support metrics for prometheus.(#2899) (#3189)

* API: support metrics for prometheus.

* Metrics: optimize metrics statistics info.

* Refine: remove redundant code.

* Refine: fix metrics srs_streams param.

* Metrics: add major param.

* Metrics: refine params and metric comments.

* For #2899: API: Support exporter for Prometheus. v5.0.67

Co-authored-by: winlin <winlin@vip.126.com>
pull/3195/head
chundonglinlin 2 years ago committed by GitHub
parent e31f3b0e64
commit 981cab40d3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -657,6 +657,21 @@ tencentcloud_apm {
debug_logging off; debug_logging off;
} }
# Prometheus exporter config.
# See https://prometheus.io/docs/instrumenting/exporters
exporter {
# Whether exporter is enabled.
# Overwrite by env SRS_EXPORTER_ENABLED
# Default: off
enabled off;
# The logging label to category the cluster servers.
# Overwrite by env SRS_EXPORTER_LABEL
label cn-beijing;
# The logging tag to category the cluster servers.
# Overwrite by env SRS_EXPORTER_TAG
tag cn-edge;
}
############################################################################################# #############################################################################################
# heartbeat/stats sections # heartbeat/stats sections
############################################################################################# #############################################################################################

@ -7,6 +7,7 @@ The changelog for SRS.
## SRS 5.0 Changelog ## SRS 5.0 Changelog
* v5.0, 2022-09-27, For [#2899](https://github.com/ossrs/srs/issues/2899): API: Support exporter for Prometheus. v5.0.67
* v5.0, 2022-09-27, For [#3167](https://github.com/ossrs/srs/issues/3167): WebRTC: Refine sequence jitter algorithm. v5.0.66 * v5.0, 2022-09-27, For [#3167](https://github.com/ossrs/srs/issues/3167): WebRTC: Refine sequence jitter algorithm. v5.0.66
* v5.0, 2022-09-22, Fix [#3164](https://github.com/ossrs/srs/issues/3164): SRT: Choppy when audio ts gap is too large. v5.0.65 * v5.0, 2022-09-22, Fix [#3164](https://github.com/ossrs/srs/issues/3164): SRT: Choppy when audio ts gap is too large. v5.0.65
* v5.0, 2022-09-16, APM: Support distributed tracing by Tencent Cloud APM. v5.0.64 * v5.0, 2022-09-16, APM: Support distributed tracing by Tencent Cloud APM. v5.0.64

@ -2227,6 +2227,7 @@ srs_error_t SrsConfig::check_normal_config()
&& n != "inotify_auto_reload" && n != "auto_reload_for_docker" && n != "tcmalloc_release_rate" && n != "inotify_auto_reload" && n != "auto_reload_for_docker" && n != "tcmalloc_release_rate"
&& n != "query_latest_version" && n != "first_wait_for_qlv" && n != "threads" && n != "query_latest_version" && n != "first_wait_for_qlv" && n != "threads"
&& n != "circuit_breaker" && n != "is_full" && n != "in_docker" && n != "tencentcloud_cls" && n != "circuit_breaker" && n != "is_full" && n != "in_docker" && n != "tencentcloud_cls"
&& n != "exporter"
) { ) {
return srs_error_new(ERROR_SYSTEM_CONFIG_INVALID, "illegal directive %s", n.c_str()); return srs_error_new(ERROR_SYSTEM_CONFIG_INVALID, "illegal directive %s", n.c_str());
} }
@ -3563,6 +3564,63 @@ bool SrsConfig::get_tencentcloud_apm_debug_logging()
return SRS_CONF_PERFER_FALSE(conf->arg0()); return SRS_CONF_PERFER_FALSE(conf->arg0());
} }
bool SrsConfig::get_exporter_enabled()
{
SRS_OVERWRITE_BY_ENV_BOOL("SRS_EXPORTER_ENABLED");
static bool DEFAULT = false;
SrsConfDirective* conf = root->get("exporter");
if (!conf) {
return DEFAULT;
}
conf = conf->get("enabled");
if (!conf) {
return DEFAULT;
}
return SRS_CONF_PERFER_FALSE(conf->arg0());
}
string SrsConfig::get_exporter_label()
{
SRS_OVERWRITE_BY_ENV_STRING("SRS_EXPORTER_LABEL");
static string DEFAULT = "";
SrsConfDirective* conf = root->get("exporter");
if (!conf) {
return DEFAULT;
}
conf = conf->get("label");
if (!conf) {
return DEFAULT;
}
return conf->arg0();
}
string SrsConfig::get_exporter_tag()
{
SRS_OVERWRITE_BY_ENV_STRING("SRS_EXPORTER_TAG");
static string DEFAULT = "";
SrsConfDirective* conf = root->get("exporter");
if (!conf) {
return DEFAULT;
}
conf = conf->get("tag");
if (!conf) {
return DEFAULT;
}
return conf->arg0();
}
vector<SrsConfDirective*> SrsConfig::get_stream_casters() vector<SrsConfDirective*> SrsConfig::get_stream_casters()
{ {
srs_assert(root); srs_assert(root);

@ -1086,6 +1086,11 @@ public:
// The device name configed in args of directive. // The device name configed in args of directive.
// @return the disk device name to stat. NULL if not configed. // @return the disk device name to stat. NULL if not configed.
virtual SrsConfDirective* get_stats_disk_device(); virtual SrsConfDirective* get_stats_disk_device();
public:
// Get Prometheus exporter config.
virtual bool get_exporter_enabled();
virtual std::string get_exporter_label();
virtual std::string get_exporter_tag();
}; };
#endif #endif

@ -1062,3 +1062,96 @@ srs_error_t SrsGoApiTcmalloc::serve_http(ISrsHttpResponseWriter* w, ISrsHttpMess
} }
#endif #endif
SrsGoApiMetrics::SrsGoApiMetrics()
{
enabled_ = _srs_config->get_exporter_enabled();
label_ = _srs_config->get_exporter_label();
tag_ = _srs_config->get_exporter_tag();
}
SrsGoApiMetrics::~SrsGoApiMetrics()
{
}
srs_error_t SrsGoApiMetrics::serve_http(ISrsHttpResponseWriter* w, ISrsHttpMessage* r)
{
// whether enabled the HTTP Metrics API.
if (!enabled_) {
return srs_api_response_code(w, r, ERROR_EXPORTER_DISABLED);
}
/*
* build_info gauge
* send_bytes_total counter
* receive_bytes_total counter
* streams gauge
* clients gauge
* clients_total counter
* error counter
*/
SrsStatistic* stat = SrsStatistic::instance();
std::stringstream ss;
// Build info from Config.
ss << "# HELP srs_build_info A metric with a constant '1' value labeled by build_date, version from which SRS was built.\n"
<< "# TYPE srs_build_info gauge\n"
<< "srs_build_info{"
<< "build_date=\"" << SRS_BUILD_DATE << "\","
<< "major=\"" << VERSION_MAJOR << "\","
<< "version=\"" << RTMP_SIG_SRS_VERSION << "\","
<< "code=\"" << RTMP_SIG_SRS_CODE<< "\"";
if (!label_.empty()) ss << ",label=\"" << label_ << "\"";
if (!tag_.empty()) ss << ",tag=\"" << tag_ << "\"";
ss << "} 1\n";
// Dump metrics by statistic.
int64_t send_bytes, recv_bytes, nstreams, nclients, total_nclients, nerrs;
stat->dumps_metrics(send_bytes, recv_bytes, nstreams, nclients, total_nclients, nerrs);
// The total of bytes sent.
ss << "# HELP srs_send_bytes_total SRS total sent bytes.\n"
<< "# TYPE srs_send_bytes_total counter\n"
<< "srs_send_bytes_total "
<< send_bytes
<< "\n";
// The total of bytes received.
ss << "# HELP srs_receive_bytes_total SRS total received bytes.\n"
<< "# TYPE srs_receive_bytes_total counter\n"
<< "srs_receive_bytes_total "
<< recv_bytes
<< "\n";
// Current number of online streams.
ss << "# HELP srs_streams The number of SRS concurrent streams.\n"
<< "# TYPE srs_streams gauge\n"
<< "srs_streams "
<< nstreams
<< "\n";
// Current number of online clients.
ss << "# HELP srs_clients The number of SRS concurrent clients.\n"
<< "# TYPE srs_clients gauge\n"
<< "srs_clients "
<< nclients
<< "\n";
// The total of clients connections.
ss << "# HELP srs_clients_total The total counts of SRS clients.\n"
<< "# TYPE srs_clients_total counter\n"
<< "srs_clients_total "
<< total_nclients
<< "\n";
// The total of clients errors.
ss << "# HELP srs_clients_errs_total The total errors of SRS clients.\n"
<< "# TYPE srs_clients_errs_total counter\n"
<< "srs_clients_errs_total "
<< nerrs
<< "\n";
return srs_api_response(w, r, ss.str());
}

@ -216,5 +216,18 @@ public:
}; };
#endif #endif
class SrsGoApiMetrics : public ISrsHttpHandler
{
private:
bool enabled_;
std::string label_;
std::string tag_;
public:
SrsGoApiMetrics();
virtual ~SrsGoApiMetrics();
public:
virtual srs_error_t serve_http(ISrsHttpResponseWriter* w, ISrsHttpMessage* r);
};
#endif #endif

@ -406,7 +406,7 @@ srs_error_t SrsHttpxConn::on_conn_done(srs_error_t r0)
{ {
// Only stat the HTTP streaming clients, ignore all API clients. // Only stat the HTTP streaming clients, ignore all API clients.
if (enable_stat_) { if (enable_stat_) {
SrsStatistic::instance()->on_disconnect(get_id().c_str()); SrsStatistic::instance()->on_disconnect(get_id().c_str(), r0);
SrsStatistic::instance()->kbps_add_delta(get_id().c_str(), conn->delta()); SrsStatistic::instance()->kbps_add_delta(get_id().c_str(), conn->delta());
} }

@ -331,7 +331,8 @@ srs_error_t SrsHlsStream::on_timer(srs_utime_t interval)
http_hooks_on_stop(info->req); http_hooks_on_stop(info->req);
SrsStatistic* stat = SrsStatistic::instance(); SrsStatistic* stat = SrsStatistic::instance();
stat->on_disconnect(ctx); // TODO: FIXME: Should finger out the err.
stat->on_disconnect(ctx, srs_success);
map_ctx_info_.erase(it); map_ctx_info_.erase(it);
srs_freep(info); srs_freep(info);

@ -458,7 +458,8 @@ SrsRtcPlayStream::~SrsRtcPlayStream()
// update the statistic when client coveried. // update the statistic when client coveried.
SrsStatistic* stat = SrsStatistic::instance(); SrsStatistic* stat = SrsStatistic::instance();
stat->on_disconnect(cid_.c_str()); // TODO: FIXME: Should finger out the err.
stat->on_disconnect(cid_.c_str(), srs_success);
} }
srs_error_t SrsRtcPlayStream::initialize(SrsRequest* req, std::map<uint32_t, SrsRtcTrackDescription*> sub_relations) srs_error_t SrsRtcPlayStream::initialize(SrsRequest* req, std::map<uint32_t, SrsRtcTrackDescription*> sub_relations)
@ -1108,7 +1109,8 @@ SrsRtcPublishStream::~SrsRtcPublishStream()
// update the statistic when client coveried. // update the statistic when client coveried.
SrsStatistic* stat = SrsStatistic::instance(); SrsStatistic* stat = SrsStatistic::instance();
stat->on_disconnect(cid_.c_str()); // TODO: FIXME: Should finger out the err.
stat->on_disconnect(cid_.c_str(), srs_success);
} }
srs_error_t SrsRtcPublishStream::initialize(SrsRequest* r, SrsRtcSourceDescription* stream_desc) srs_error_t SrsRtcPublishStream::initialize(SrsRequest* r, SrsRtcSourceDescription* stream_desc)

@ -749,7 +749,7 @@ srs_error_t SrsRtcTcpConn::cycle()
srs_error_t err = do_cycle(); srs_error_t err = do_cycle();
// Only stat the HTTP streaming clients, ignore all API clients. // Only stat the HTTP streaming clients, ignore all API clients.
SrsStatistic::instance()->on_disconnect(get_id().c_str()); SrsStatistic::instance()->on_disconnect(get_id().c_str(), err);
SrsStatistic::instance()->kbps_add_delta(get_id().c_str(), delta_); SrsStatistic::instance()->kbps_add_delta(get_id().c_str(), delta_);
// Because we use manager to manage this object, not the http connection object, so we must remove it here. // Because we use manager to manage this object, not the http connection object, so we must remove it here.

@ -1551,7 +1551,7 @@ srs_error_t SrsRtmpConn::cycle()
// Update statistic when done. // Update statistic when done.
SrsStatistic* stat = SrsStatistic::instance(); SrsStatistic* stat = SrsStatistic::instance();
stat->kbps_add_delta(get_id().c_str(), delta_); stat->kbps_add_delta(get_id().c_str(), delta_);
stat->on_disconnect(get_id().c_str()); stat->on_disconnect(get_id().c_str(), err);
// Notify manager to remove it. // Notify manager to remove it.
// Note that we create this object, so we use manager to remove it. // Note that we create this object, so we use manager to remove it.

@ -887,6 +887,10 @@ srs_error_t SrsServer::http_handle()
return srs_error_wrap(err, "handle tests errors"); return srs_error_wrap(err, "handle tests errors");
} }
#endif #endif
// metrics by prometheus
if ((err = http_api_mux->handle("/metrics", new SrsGoApiMetrics())) != srs_success) {
return srs_error_wrap(err, "handle tests errors");
}
// TODO: FIXME: for console. // TODO: FIXME: for console.
// TODO: FIXME: support reload. // TODO: FIXME: support reload.

@ -228,7 +228,7 @@ srs_error_t SrsMpegtsSrtConn::cycle()
// Update statistic when done. // Update statistic when done.
SrsStatistic* stat = SrsStatistic::instance(); SrsStatistic* stat = SrsStatistic::instance();
stat->kbps_add_delta(get_id().c_str(), delta_); stat->kbps_add_delta(get_id().c_str(), delta_);
stat->on_disconnect(get_id().c_str()); stat->on_disconnect(get_id().c_str(), err);
// Notify manager to remove it. // Notify manager to remove it.
// Note that we create this object, so we use manager to remove it. // Note that we create this object, so we use manager to remove it.

@ -241,6 +241,9 @@ SrsStatistic* SrsStatistic::_instance = NULL;
SrsStatistic::SrsStatistic() SrsStatistic::SrsStatistic()
{ {
kbps = new SrsKbps(); kbps = new SrsKbps();
nb_clients_ = 0;
nb_errs_ = 0;
} }
SrsStatistic::~SrsStatistic() SrsStatistic::~SrsStatistic()
@ -421,11 +424,13 @@ srs_error_t SrsStatistic::on_client(std::string id, SrsRequest* req, ISrsExpire*
// @see https://github.com/ossrs/srs/issues/2311 // @see https://github.com/ossrs/srs/issues/2311
srs_freep(client->req); srs_freep(client->req);
client->req = req->copy(); client->req = req->copy();
nb_clients_++;
return err; return err;
} }
void SrsStatistic::on_disconnect(std::string id) void SrsStatistic::on_disconnect(std::string id, srs_error_t err)
{ {
std::map<std::string, SrsStatisticClient*>::iterator it = clients.find(id); std::map<std::string, SrsStatisticClient*>::iterator it = clients.find(id);
if (it == clients.end()) return; if (it == clients.end()) return;
@ -440,6 +445,10 @@ void SrsStatistic::on_disconnect(std::string id)
stream->nb_clients--; stream->nb_clients--;
vhost->nb_clients--; vhost->nb_clients--;
if (srs_error_code(err) != ERROR_SUCCESS) {
nb_errs_++;
}
cleanup_stream(stream); cleanup_stream(stream);
} }
@ -721,3 +730,19 @@ SrsStatisticStream* SrsStatistic::create_stream(SrsStatisticVhost* vhost, SrsReq
return stream; return stream;
} }
srs_error_t SrsStatistic::dumps_metrics(int64_t& send_bytes, int64_t& recv_bytes, int64_t& nstreams, int64_t& nclients, int64_t& total_nclients, int64_t& nerrs)
{
srs_error_t err = srs_success;
send_bytes = kbps->get_send_bytes();
recv_bytes = kbps->get_recv_bytes();
nstreams = streams.size();
nclients = clients.size();
total_nclients = nb_clients_;
nerrs = nb_errs_;
return err;
}

@ -139,6 +139,11 @@ private:
std::map<std::string, SrsStatisticClient*> clients; std::map<std::string, SrsStatisticClient*> clients;
// The server total kbps. // The server total kbps.
SrsKbps* kbps; SrsKbps* kbps;
private:
// The total of clients connections.
int64_t nb_clients_;
// The total of clients errors.
int64_t nb_errs_;
private: private:
SrsStatistic(); SrsStatistic();
virtual ~SrsStatistic(); virtual ~SrsStatistic();
@ -177,7 +182,7 @@ public:
// @remark the on_disconnect always call, while the on_client is call when // @remark the on_disconnect always call, while the on_client is call when
// only got the request object, so the client specified by id maybe not // only got the request object, so the client specified by id maybe not
// exists in stat. // exists in stat.
virtual void on_disconnect(std::string id); virtual void on_disconnect(std::string id, srs_error_t err);
private: private:
// Cleanup the stream if stream is not active and for the last client. // Cleanup the stream if stream is not active and for the last client.
void cleanup_stream(SrsStatisticStream* stream); void cleanup_stream(SrsStatisticStream* stream);
@ -210,6 +215,9 @@ public:
private: private:
virtual SrsStatisticVhost* create_vhost(SrsRequest* req); virtual SrsStatisticVhost* create_vhost(SrsRequest* req);
virtual SrsStatisticStream* create_stream(SrsStatisticVhost* vhost, SrsRequest* req); virtual SrsStatisticStream* create_stream(SrsStatisticVhost* vhost, SrsRequest* req);
public:
// Dumps exporter metrics.
virtual srs_error_t dumps_metrics(int64_t& send_bytes, int64_t& recv_bytes, int64_t& nstreams, int64_t& nclients, int64_t& total_nclients, int64_t& nerrs);
}; };
// Generate a random string id, with constant prefix. // Generate a random string id, with constant prefix.

@ -9,6 +9,6 @@
#define VERSION_MAJOR 5 #define VERSION_MAJOR 5
#define VERSION_MINOR 0 #define VERSION_MINOR 0
#define VERSION_REVISION 66 #define VERSION_REVISION 67
#endif #endif

@ -100,6 +100,7 @@
XX(ERROR_APM_EXCEED_SIZE , 1087, "ApmExceedSize", "APM logs exceed max size 5MB") \ XX(ERROR_APM_EXCEED_SIZE , 1087, "ApmExceedSize", "APM logs exceed max size 5MB") \
XX(ERROR_APM_ENDPOINT , 1088, "ApmEndpoint", "APM endpoint is invalid") \ XX(ERROR_APM_ENDPOINT , 1088, "ApmEndpoint", "APM endpoint is invalid") \
XX(ERROR_APM_AUTH , 1089, "ApmAuth", "APM team or token is invalid") \ XX(ERROR_APM_AUTH , 1089, "ApmAuth", "APM team or token is invalid") \
XX(ERROR_EXPORTER_DISABLED , 1090, "ExporterDisable", "Prometheus exporter is disabled") \
/**************************************************/ /**************************************************/
/* RTMP protocol error. */ /* RTMP protocol error. */

Loading…
Cancel
Save