lws-tokenize.h - cscg24-guacamole - CSCG 2024 Challenge 'Guacamole Mashup'

	cscg24-guacamole CSCG 2024 Challenge 'Guacamole Mashup'
	git clone https://git.sinitax.com/sinitax/cscg24-guacamole
	Log \| Files \| Refs \| sfeed.txt
lws-tokenize.h (10780B)
      1/*
      2 * libwebsockets - small server side websockets and web server implementation
      3 *
      4 * Copyright (C) 2010 - 2019 Andy Green <andy@warmcat.com>
      5 *
      6 * Permission is hereby granted, free of charge, to any person obtaining a copy
      7 * of this software and associated documentation files (the "Software"), to
      8 * deal in the Software without restriction, including without limitation the
      9 * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
     10 * sell copies of the Software, and to permit persons to whom the Software is
     11 * furnished to do so, subject to the following conditions:
     12 *
     13 * The above copyright notice and this permission notice shall be included in
     14 * all copies or substantial portions of the Software.
     15 *
     16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
     19 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     22 * IN THE SOFTWARE.
     23 */
     24
     25/* Do not treat - as a terminal character, so "my-token" is one token */
     26#define LWS_TOKENIZE_F_MINUS_NONTERM	(1 << 0)
     27/* Separately report aggregate colon-delimited tokens */
     28#define LWS_TOKENIZE_F_AGG_COLON	(1 << 1)
     29/* Enforce sequencing for a simple token , token , token ... list */
     30#define LWS_TOKENIZE_F_COMMA_SEP_LIST	(1 << 2)
     31/* Allow more characters in the tokens and less delimiters... default is
     32 * only alphanumeric + underscore in tokens */
     33#define LWS_TOKENIZE_F_RFC7230_DELIMS	(1 << 3)
     34/* Do not treat . as a terminal character, so "warmcat.com" is one token */
     35#define LWS_TOKENIZE_F_DOT_NONTERM	(1 << 4)
     36/* If something starts looking like a float, like 1.2, force to be string token.
     37 * This lets you receive dotted-quads like 192.168.0.1 as string tokens, and
     38 * avoids illegal float format detection like 1.myserver.com */
     39#define LWS_TOKENIZE_F_NO_FLOATS	(1 << 5)
     40/* Instead of LWS_TOKZE_INTEGER, report integers as any other string token */
     41#define LWS_TOKENIZE_F_NO_INTEGERS	(1 << 6)
     42/* # makes the rest of the line a comment */
     43#define LWS_TOKENIZE_F_HASH_COMMENT	(1 << 7)
     44/* Do not treat / as a terminal character, so "multipart/related" is one token */
     45#define LWS_TOKENIZE_F_SLASH_NONTERM	(1 << 8)
     46/* Do not treat * as a terminal character, so "myfile*" is one token */
     47#define LWS_TOKENIZE_F_ASTERISK_NONTERM	(1 << 9)
     48/* Do not treat = as a terminal character, so "x=y" is one token */
     49#define LWS_TOKENIZE_F_EQUALS_NONTERM	(1 << 10)
     50
     51typedef enum {
     52
     53	LWS_TOKZE_ERRS			=  5, /* the number of errors defined */
     54
     55	LWS_TOKZE_ERR_BROKEN_UTF8	= -5,	/* malformed or partial utf8 */
     56	LWS_TOKZE_ERR_UNTERM_STRING	= -4,	/* ended while we were in "" */
     57	LWS_TOKZE_ERR_MALFORMED_FLOAT	= -3,	/* like 0..1 or 0.1.1 */
     58	LWS_TOKZE_ERR_NUM_ON_LHS	= -2,	/* like 123= or 0.1= */
     59	LWS_TOKZE_ERR_COMMA_LIST	= -1,	/* like ",tok", or, "tok,," */
     60
     61	LWS_TOKZE_ENDED = 0,		/* no more content */
     62
     63	/* Note: results have ordinal 1+, EOT is 0 and errors are < 0 */
     64
     65	LWS_TOKZE_DELIMITER,		/* a delimiter appeared */
     66	LWS_TOKZE_TOKEN,		/* a token appeared */
     67	LWS_TOKZE_INTEGER,		/* an integer appeared */
     68	LWS_TOKZE_FLOAT,		/* a float appeared */
     69	LWS_TOKZE_TOKEN_NAME_EQUALS,	/* token [whitespace] = */
     70	LWS_TOKZE_TOKEN_NAME_COLON,	/* token [whitespace] : (only with
     71					   LWS_TOKENIZE_F_AGG_COLON flag) */
     72	LWS_TOKZE_QUOTED_STRING,	/* "*", where * may have any char */
     73
     74} lws_tokenize_elem;
     75
     76/*
     77 * helper enums to allow caller to enforce legal delimiter sequencing, eg
     78 * disallow "token,,token", "token,", and ",token"
     79 */
     80
     81enum lws_tokenize_delimiter_tracking {
     82	LWSTZ_DT_NEED_FIRST_CONTENT,
     83	LWSTZ_DT_NEED_DELIM,
     84	LWSTZ_DT_NEED_NEXT_CONTENT,
     85};
     86
     87typedef struct lws_tokenize {
     88	const char *start; /**< set to the start of the string to tokenize */
     89	const char *token; /**< the start of an identified token or delimiter */
     90	size_t len;	/**< set to the length of the string to tokenize */
     91	size_t token_len;	/**< the length of the identied token or delimiter */
     92
     93	uint16_t flags;	/**< optional LWS_TOKENIZE_F_ flags, or 0 */
     94	uint8_t delim;
     95
     96	int8_t e; /**< convenient for storing lws_tokenize return */
     97} lws_tokenize_t;
     98
     99/**
    100 * lws_tokenize() - breaks down a string into tokens and delimiters in-place
    101 *
    102 * \param ts: the lws_tokenize struct to init
    103 * \param start: the string to tokenize
    104 * \param flags: LWS_TOKENIZE_F_ option flags
    105 *
    106 * This initializes the tokenize struct to point to the given string, and
    107 * sets the length to 2GiB - 1 (so there must be a terminating NUL)... you can
    108 * override this requirement by setting ts.len yourself before using it.
    109 *
    110 * .delim is also initialized to LWSTZ_DT_NEED_FIRST_CONTENT.
    111 */
    112
    113LWS_VISIBLE LWS_EXTERN void
    114lws_tokenize_init(struct lws_tokenize *ts, const char *start, int flags);
    115
    116/**
    117 * lws_tokenize() - breaks down a string into tokens and delimiters in-place
    118 *
    119 * \param ts: the lws_tokenize struct with information and state on what to do
    120 *
    121 * The \p ts struct should have its start, len and flags members initialized to
    122 * reflect the string to be tokenized and any options.
    123 *
    124 * Then `lws_tokenize()` may be called repeatedly on the struct, returning one
    125 * of `lws_tokenize_elem` each time, and with the struct's `token` and
    126 * `token_len` members set to describe the content of the delimiter or token
    127 * payload each time.
    128 *
    129 * There are no allocations during the process.
    130 *
    131 * returns lws_tokenize_elem that was identified (LWS_TOKZE_ENDED means reached
    132 * the end of the string).
    133 */
    134
    135LWS_VISIBLE LWS_EXTERN lws_tokenize_elem
    136lws_tokenize(struct lws_tokenize *ts);
    137
    138/**
    139 * lws_tokenize_cstr() - copy token string to NUL-terminated buffer
    140 *
    141 * \param ts: pointer to lws_tokenize struct to operate on
    142 * \param str: destination buffer
    143 * \pparam max: bytes in destination buffer
    144 *
    145 * returns 0 if OK or nonzero if the string + NUL won't fit.
    146 */
    147
    148LWS_VISIBLE LWS_EXTERN int
    149lws_tokenize_cstr(struct lws_tokenize *ts, char *str, size_t max);
    150
    151
    152/*
    153 * lws_strexp: flexible string expansion helper api
    154 *
    155 * This stateful helper can handle multiple separate input chunks and multiple
    156 * output buffer loads with arbitrary boundaries between literals and expanded
    157 * symbols.  This allows it to handle fragmented input as well as arbitrarily
    158 * long symbol expansions that are bigger than the output buffer itself.
    159 *
    160 * A user callback is used to convert symbol names to the symbol value.
    161 *
    162 * A single byte buffer for input and another for output can process any
    163 * length substitution then.  The state object is around 64 bytes on a 64-bit
    164 * system and it only uses 8 bytes stack.
    165 */
    166
    167
    168typedef int (*lws_strexp_expand_cb)(void *priv, const char *name, char *out,
    169				    size_t *pos, size_t olen, size_t *exp_ofs);
    170
    171typedef struct lws_strexp {
    172	char			name[32];
    173	lws_strexp_expand_cb	cb;
    174	void			*priv;
    175	char			*out;
    176	size_t			olen;
    177	size_t			pos;
    178
    179	size_t			exp_ofs;
    180
    181	uint8_t			name_pos;
    182	char			state;
    183} lws_strexp_t;
    184
    185enum {
    186	LSTRX_DONE,			/* it completed OK */
    187	LSTRX_FILLED_OUT,		/* out buf filled and needs resetting */
    188	LSTRX_FATAL_NAME_TOO_LONG = -1,	/* fatal */
    189	LSTRX_FATAL_NAME_UNKNOWN  = -2,
    190};
    191
    192
    193/**
    194 * lws_strexp_init() - initialize an lws_strexp_t for use
    195 *
    196 * \p exp: the exp object to init
    197 * \p priv: the user's object pointer to pass to callback
    198 * \p cb: the callback to expand named objects
    199 * \p out: the start of the output buffer, or NULL just to get the length
    200 * \p olen: the length of the output buffer in bytes
    201 *
    202 * Prepares an lws_strexp_t for use and sets the initial output buffer
    203 *
    204 * If \p out is NULL, substitution proceeds normally, but no output is produced,
    205 * only the length is returned.  olen should be set to the largest feasible
    206 * overall length.  To use this mode, the substitution callback must also check
    207 * for NULL \p out and avoid producing the output.
    208 */
    209LWS_VISIBLE LWS_EXTERN void
    210lws_strexp_init(lws_strexp_t *exp, void *priv, lws_strexp_expand_cb cb,
    211		char *out, size_t olen);
    212
    213/**
    214 * lws_strexp_reset_out() - reset the output buffer on an existing strexp
    215 *
    216 * \p exp: the exp object to init
    217 * \p out: the start of the output buffer, or NULL to just get length
    218 * \p olen: the length of the output buffer in bytes
    219 *
    220 * Provides a new output buffer for lws_strexp_expand() to continue to write
    221 * into.  It can be the same as the old one if it has been copied out or used.
    222 * The position of the next write will be reset to the start of the given buf.
    223 *
    224 * If \p out is NULL, substitution proceeds normally, but no output is produced,
    225 * only the length is returned.  \p olen should be set to the largest feasible
    226 * overall length.  To use this mode, the substitution callback must also check
    227 * for NULL \p out and avoid producing the output.
    228 */
    229LWS_VISIBLE LWS_EXTERN void
    230lws_strexp_reset_out(lws_strexp_t *exp, char *out, size_t olen);
    231
    232/**
    233 * lws_strexp_expand() - copy / expand a string into the output buffer
    234 *
    235 * \p exp: the exp object for the copy / expansion
    236 * \p in: the start of the next input data
    237 * \p len: the length of the input data
    238 * \p pused_in: pointer to write the amount of input used
    239 * \p pused_out: pointer to write the amount of output used
    240 *
    241 * Copies in to the output buffer set in exp, expanding any ${name} tokens using
    242 * the callback.  \p *pused_in is set to the number of input chars used and
    243 * \p *pused_out the number of output characters used
    244 *
    245 * May return LSTRX_FILLED_OUT early with *pused < len if the output buffer is
    246 * filled.  Handle the output buffer and reset it with lws_strexp_reset_out()
    247 * before calling again with adjusted in / len to continue.
    248 *
    249 * In the case of large expansions, the expansion itself may fill the output
    250 * buffer, in which case the expansion callback returns the LSTRX_FILLED_OUT
    251 * and will be called again to continue with its *exp_ofs parameter set
    252 * appropriately.
    253 */
    254LWS_VISIBLE LWS_EXTERN int
    255lws_strexp_expand(lws_strexp_t *exp, const char *in, size_t len,
    256		  size_t *pused_in, size_t *pused_out);
    257
    258/**
    259 * lws_strcmp_wildcard() - strcmp but the first arg can have wildcards
    260 *
    261 * \p wildcard: a string that may contain zero to three *, and may lack a NUL
    262 * \p wlen: length of the wildcard string
    263 * \p check: string to test to see if it matches wildcard
    264 * \p clen: length of check string
    265 *
    266 * Like strcmp, but supports patterns like "a*", "a*b", "a*b*" etc
    267 * where a and b are arbitrary substrings.  Both the wc and check strings need
    268 * not be NUL terminated, but are specified by lengths.
    269 */
    270LWS_VISIBLE LWS_EXTERN int
    271lws_strcmp_wildcard(const char *wildcard, size_t wlen, const char *check,
    272		    size_t clen);