lws-metrics.h (11543B)
1 /* 2 * libwebsockets - small server side websockets and web server implementation 3 * 4 * Copyright (C) 2010 - 2021 Andy Green <andy@warmcat.com> 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a copy 7 * of this software and associated documentation files (the "Software"), to 8 * deal in the Software without restriction, including without limitation the 9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or 10 * sell copies of the Software, and to permit persons to whom the Software is 11 * furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included in 14 * all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 22 * IN THE SOFTWARE. 23 * 24 * Public apis related to metric collection and reporting 25 */ 26 27/* lws_metrics public part */ 28 29typedef uint64_t u_mt_t; 30 31enum { 32 LWSMTFL_REPORT_OUTLIERS = (1 << 0), 33 /**< track outliers and report them internally */ 34 LWSMTFL_REPORT_OOB = (1 << 1), 35 /**< report events as they happen */ 36 LWSMTFL_REPORT_INACTIVITY_AT_PERIODIC = (1 << 2), 37 /**< explicitly externally report no activity at periodic cb, by 38 * default no events in the period is just not reported */ 39 LWSMTFL_REPORT_MEAN = (1 << 3), 40 /**< average/min/max is meaningful, else only sum is meaningful */ 41 LWSMTFL_REPORT_ONLY_GO = (1 << 4), 42 /**< no-go pieces invalid */ 43 LWSMTFL_REPORT_DUTY_WALLCLOCK_US = (1 << 5), 44 /**< aggregate compares to wallclock us for duty cycle */ 45 LWSMTFL_REPORT_HIST = (1 << 6), 46 /**< our type is histogram (otherwise, sum / mean aggregation) */ 47}; 48 49/* 50 * lws_metrics_tag allows your object to accumulate OpenMetrics-style 51 * descriptive tags before accounting for it with a metrics object at the end. 52 * 53 * Tags should represent low entropy information that is likely to repeat 54 * identically, so, eg, http method name, not eg, latency in us which is 55 * unlikely to be seen the same twice. 56 * 57 * Tags are just a list of name=value pairs, used for qualifying the final 58 * metrics entry with decorations in additional dimensions. For example, 59 * rather than keep individual metrics on methods, scheme, mountpoint, result 60 * code, you can keep metrics on http transactions only, and qualify the 61 * transaction metrics entries with tags that can be queried on the metrics 62 * backend to get the finer-grained information. 63 * 64 * http_srv{code="404",mount="/",method="GET",scheme="http"} 3 65 * 66 * For OpenMetrics the tags are converted to a { list } and appended to the base 67 * metrics name before using with actual metrics objects, the same set of tags 68 * on different transactions resolve to the same qualification string. 69 */ 70 71typedef struct lws_metrics_tag { 72 lws_dll2_t list; 73 74 const char *name; /* tag, intended to be in .rodata, not copied */ 75 /* overallocated value */ 76} lws_metrics_tag_t; 77 78LWS_EXTERN LWS_VISIBLE int 79lws_metrics_tag_add(lws_dll2_owner_t *owner, const char *name, const char *val); 80 81#if defined(LWS_WITH_SYS_METRICS) 82/* 83 * wsi-specific version that also appends the tag value to the lifecycle tag 84 * used for logging the wsi identity 85 */ 86LWS_EXTERN LWS_VISIBLE int 87lws_metrics_tag_wsi_add(struct lws *wsi, const char *name, const char *val); 88#else 89#define lws_metrics_tag_wsi_add(_a, _b, _c) 90#endif 91 92#if defined(LWS_WITH_SECURE_STREAMS) 93/* 94 * ss-specific version that also appends the tag value to the lifecycle tag 95 * used for logging the ss identity 96 */ 97#if defined(LWS_WITH_SYS_METRICS) 98LWS_EXTERN LWS_VISIBLE int 99lws_metrics_tag_ss_add(struct lws_ss_handle *ss, const char *name, const char *val); 100#else 101#define lws_metrics_tag_ss_add(_a, _b, _c) 102#endif 103#endif 104 105LWS_EXTERN LWS_VISIBLE void 106lws_metrics_tags_destroy(lws_dll2_owner_t *owner); 107 108LWS_EXTERN LWS_VISIBLE size_t 109lws_metrics_tags_serialize(lws_dll2_owner_t *owner, char *buf, size_t len); 110 111LWS_EXTERN LWS_VISIBLE const char * 112lws_metrics_tag_get(lws_dll2_owner_t *owner, const char *name); 113 114/* histogram bucket */ 115 116typedef struct lws_metric_bucket { 117 struct lws_metric_bucket *next; 118 uint64_t count; 119 120 /* name + NUL is overallocated */ 121} lws_metric_bucket_t; 122 123/* get overallocated name of bucket from bucket pointer */ 124#define lws_metric_bucket_name_len(_b) (*((uint8_t *)&(_b)[1])) 125#define lws_metric_bucket_name(_b) (((const char *)&(_b)[1]) + 1) 126 127/* 128 * These represent persistent local event measurements. They may aggregate 129 * a large number of events inbetween external dumping of summaries of the 130 * period covered, in two different ways 131 * 132 * 1) aggregation by sum or mean, to absorb multiple scalar readings 133 * 134 * - go / no-go ratio counting 135 * - mean averaging for, eg, latencies 136 * - min / max for averaged values 137 * - period the stats covers 138 * 139 * 2) aggregation by histogram, to absorb a range of outcomes that may occur 140 * multiple times 141 * 142 * - add named buckets to histogram 143 * - bucket has a 64-bit count 144 * - bumping a bucket just increments the count if already exists, else adds 145 * a new one with count set to 1 146 * 147 * The same type with a union covers both cases. 148 * 149 * The lws_system ops api that hooks lws_metrics up to a metrics backend is 150 * given a pointer to these according to the related policy, eg, hourly, or 151 * every event passed straight through. 152 */ 153 154typedef struct lws_metric_pub { 155 const char *name; 156 /**< eg, "n.cn.dns", "vh.myendpoint" */ 157 void *backend_opaque; 158 /**< ignored by lws, backend handler completely owns it */ 159 160 lws_usec_t us_first; 161 /**< us time metric started collecting, reset to us_dumped at dump */ 162 lws_usec_t us_last; 163 /**< 0, or us time last event, reset to 0 at last dump */ 164 lws_usec_t us_dumped; 165 /**< 0 if never, else us time of last dump to external api */ 166 167 /* scope of data in .u is "since last dump" --> */ 168 169 union { 170 /* aggregation, by sum or mean */ 171 172 struct { 173 u_mt_t sum[2]; 174 /**< go, no-go summed for mean or plan sum */ 175 u_mt_t min; 176 /**< smallest individual measurement */ 177 u_mt_t max; 178 /**< largest individual measurement */ 179 180 uint32_t count[2]; 181 /**< go, no-go count of measurements in sum */ 182 } agg; 183 184 /* histogram with dynamic named buckets */ 185 186 struct { 187 lws_metric_bucket_t *head; 188 /**< first bucket in our bucket list */ 189 190 uint64_t total_count; 191 /**< total count in all of our buckets */ 192 uint32_t list_size; 193 /**< number of buckets in our bucket list */ 194 } hist; 195 } u; 196 197 uint8_t flags; 198 199} lws_metric_pub_t; 200 201LWS_EXTERN LWS_VISIBLE void 202lws_metrics_hist_bump_priv_tagged(lws_metric_pub_t *mt, lws_dll2_owner_t *tow, 203 lws_dll2_owner_t *tow2); 204 205 206/* 207 * Calipers are a helper struct for implementing "hanging latency" detection, 208 * where setting the start time and finding the end time may happen in more than 209 * one place. 210 * 211 * There are convenience wrappers to eliminate caliper definitions and code 212 * cleanly if WITH_SYS_METRICS is disabled for the build. 213 */ 214 215struct lws_metric; 216 217typedef struct lws_metric_caliper { 218 struct lws_dll2_owner mtags_owner; /**< collect tags here during 219 * caliper lifetime */ 220 struct lws_metric *mt; /**< NULL == inactive */ 221 lws_usec_t us_start; 222} lws_metric_caliper_t; 223 224#if defined(LWS_WITH_SYS_METRICS) 225#define lws_metrics_caliper_compose(_name) \ 226 lws_metric_caliper_t _name; 227#define lws_metrics_caliper_bind(_name, _mt) \ 228 { if (_name.mt) { \ 229 lwsl_err("caliper: overwrite %s\n", \ 230 lws_metrics_priv_to_pub(_name.mt)->name); \ 231 assert(0); } \ 232 _name.mt = _mt; _name.us_start = lws_now_usecs(); } 233#define lws_metrics_caliper_declare(_name, _mt) \ 234 lws_metric_caliper_t _name = { .mt = _mt, .us_start = lws_now_usecs() } 235#define lws_metrics_caliper_report(_name, _go_nogo) \ 236 { if (_name.us_start) { lws_metric_event(_name.mt, _go_nogo, \ 237 (u_mt_t)(lws_now_usecs() - \ 238 _name.us_start)); \ 239 } lws_metrics_caliper_done(_name); } 240#define lws_metrics_caliper_report_hist(_name, pwsi) if (_name.mt) { \ 241 lws_metrics_hist_bump_priv_tagged(lws_metrics_priv_to_pub(_name.mt), \ 242 &_name.mtags_owner, \ 243 pwsi ? &((pwsi)->cal_conn.mtags_owner) : NULL); \ 244 lws_metrics_caliper_done(_name); } 245 246#define lws_metrics_caliper_cancel(_name) { lws_metrics_caliper_done(_name); } 247#define lws_metrics_hist_bump(_mt, _name) \ 248 lws_metrics_hist_bump_(_mt, _name) 249#define lws_metrics_hist_bump_priv(_mt, _name) \ 250 lws_metrics_hist_bump_(lws_metrics_priv_to_pub(_mt), _name) 251#define lws_metrics_caliper_done(_name) { \ 252 _name.us_start = 0; _name.mt = NULL; \ 253 lws_metrics_tags_destroy(&_name.mtags_owner); } 254#else 255#define lws_metrics_caliper_compose(_name) 256#define lws_metrics_caliper_bind(_name, _mt) 257#define lws_metrics_caliper_declare(_name, _mp) 258#define lws_metrics_caliper_report(_name, _go_nogo) 259#define lws_metrics_caliper_report_hist(_name, pwsiconn) 260#define lws_metrics_caliper_cancel(_name) 261#define lws_metrics_hist_bump(_mt, _name) 262#define lws_metrics_hist_bump_priv(_mt, _name) 263#define lws_metrics_caliper_done(_name) 264#endif 265 266/** 267 * lws_metrics_format() - helper to format a metrics object for logging 268 * 269 * \param pub: public part of metrics object 270 * \param buf: output buffer to place string in 271 * \param len: available length of \p buf 272 * 273 * Helper for describing the state of a metrics object as a human-readable 274 * string, accounting for how its flags indicate what it contains. This is not 275 * how you would report metrics, but during development it can be useful to 276 * log them inbetween possibily long report intervals. 277 * 278 * It uses the metric's flags to adapt the format shown appropriately, eg, 279 * as a histogram if LWSMTFL_REPORT_HIST etc 280 */ 281LWS_EXTERN LWS_VISIBLE int 282lws_metrics_format(lws_metric_pub_t *pub, lws_metric_bucket_t **sub, 283 char *buf, size_t len); 284 285/** 286 * lws_metrics_hist_bump() - add or increment histogram bucket 287 * 288 * \param pub: public part of metrics object 289 * \param name: bucket name to increment 290 * 291 * Either increment the count of an existing bucket of the right name in the 292 * metrics object, or add a new bucket of the given name and set its count to 1. 293 * 294 * The metrics object must have been created with flag LWSMTFL_REPORT_HIST 295 * 296 * Normally, you will actually use the preprocessor wrapper 297 * lws_metrics_hist_bump() defined above, since this automatically takes care of 298 * removing itself from the build if WITH_SYS_METRICS is not defined, without 299 * needing any preprocessor conditionals. 300 */ 301LWS_EXTERN LWS_VISIBLE int 302lws_metrics_hist_bump_(lws_metric_pub_t *pub, const char *name); 303 304LWS_VISIBLE LWS_EXTERN int 305lws_metrics_foreach(struct lws_context *ctx, void *user, 306 int (*cb)(lws_metric_pub_t *pub, void *user)); 307 308LWS_VISIBLE LWS_EXTERN int 309lws_metrics_hist_bump_describe_wsi(struct lws *wsi, lws_metric_pub_t *pub, 310 const char *name); 311 312enum { 313 LMT_NORMAL = 0, /* related to successful events */ 314 LMT_OUTLIER, /* related to successful events outside of bounds */ 315 316 LMT_FAIL, /* related to failed events */ 317 318 LMT_COUNT, 319}; 320 321typedef enum lws_metric_rpt { 322 LMR_PERIODIC = 0, /* we are reporting on a schedule */ 323 LMR_OUTLIER, /* we are reporting the last outlier */ 324} lws_metric_rpt_kind_t; 325 326#define METRES_GO 0 327#define METRES_NOGO 1 328 329