cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

demangle-rust.c (6620B)


      1// SPDX-License-Identifier: GPL-2.0
      2#include <string.h>
      3#include "debug.h"
      4
      5#include "demangle-rust.h"
      6
      7/*
      8 * Mangled Rust symbols look like this:
      9 *
     10 *     _$LT$std..sys..fd..FileDesc$u20$as$u20$core..ops..Drop$GT$::drop::hc68340e1baa4987a
     11 *
     12 * The original symbol is:
     13 *
     14 *     <std::sys::fd::FileDesc as core::ops::Drop>::drop
     15 *
     16 * The last component of the path is a 64-bit hash in lowercase hex, prefixed
     17 * with "h". Rust does not have a global namespace between crates, an illusion
     18 * which Rust maintains by using the hash to distinguish things that would
     19 * otherwise have the same symbol.
     20 *
     21 * Any path component not starting with a XID_Start character is prefixed with
     22 * "_".
     23 *
     24 * The following escape sequences are used:
     25 *
     26 *     ","  =>  $C$
     27 *     "@"  =>  $SP$
     28 *     "*"  =>  $BP$
     29 *     "&"  =>  $RF$
     30 *     "<"  =>  $LT$
     31 *     ">"  =>  $GT$
     32 *     "("  =>  $LP$
     33 *     ")"  =>  $RP$
     34 *     " "  =>  $u20$
     35 *     "'"  =>  $u27$
     36 *     "["  =>  $u5b$
     37 *     "]"  =>  $u5d$
     38 *     "~"  =>  $u7e$
     39 *
     40 * A double ".." means "::" and a single "." means "-".
     41 *
     42 * The only characters allowed in the mangled symbol are a-zA-Z0-9 and _.:$
     43 */
     44
     45static const char *hash_prefix = "::h";
     46static const size_t hash_prefix_len = 3;
     47static const size_t hash_len = 16;
     48
     49static bool is_prefixed_hash(const char *start);
     50static bool looks_like_rust(const char *sym, size_t len);
     51static bool unescape(const char **in, char **out, const char *seq, char value);
     52
     53/*
     54 * INPUT:
     55 *     sym: symbol that has been through BFD-demangling
     56 *
     57 * This function looks for the following indicators:
     58 *
     59 *  1. The hash must consist of "h" followed by 16 lowercase hex digits.
     60 *
     61 *  2. As a sanity check, the hash must use between 5 and 15 of the 16 possible
     62 *     hex digits. This is true of 99.9998% of hashes so once in your life you
     63 *     may see a false negative. The point is to notice path components that
     64 *     could be Rust hashes but are probably not, like "haaaaaaaaaaaaaaaa". In
     65 *     this case a false positive (non-Rust symbol has an important path
     66 *     component removed because it looks like a Rust hash) is worse than a
     67 *     false negative (the rare Rust symbol is not demangled) so this sets the
     68 *     balance in favor of false negatives.
     69 *
     70 *  3. There must be no characters other than a-zA-Z0-9 and _.:$
     71 *
     72 *  4. There must be no unrecognized $-sign sequences.
     73 *
     74 *  5. There must be no sequence of three or more dots in a row ("...").
     75 */
     76bool
     77rust_is_mangled(const char *sym)
     78{
     79	size_t len, len_without_hash;
     80
     81	if (!sym)
     82		return false;
     83
     84	len = strlen(sym);
     85	if (len <= hash_prefix_len + hash_len)
     86		/* Not long enough to contain "::h" + hash + something else */
     87		return false;
     88
     89	len_without_hash = len - (hash_prefix_len + hash_len);
     90	if (!is_prefixed_hash(sym + len_without_hash))
     91		return false;
     92
     93	return looks_like_rust(sym, len_without_hash);
     94}
     95
     96/*
     97 * A hash is the prefix "::h" followed by 16 lowercase hex digits. The hex
     98 * digits must comprise between 5 and 15 (inclusive) distinct digits.
     99 */
    100static bool is_prefixed_hash(const char *str)
    101{
    102	const char *end;
    103	bool seen[16];
    104	size_t i;
    105	int count;
    106
    107	if (strncmp(str, hash_prefix, hash_prefix_len))
    108		return false;
    109	str += hash_prefix_len;
    110
    111	memset(seen, false, sizeof(seen));
    112	for (end = str + hash_len; str < end; str++)
    113		if (*str >= '0' && *str <= '9')
    114			seen[*str - '0'] = true;
    115		else if (*str >= 'a' && *str <= 'f')
    116			seen[*str - 'a' + 10] = true;
    117		else
    118			return false;
    119
    120	/* Count how many distinct digits seen */
    121	count = 0;
    122	for (i = 0; i < 16; i++)
    123		if (seen[i])
    124			count++;
    125
    126	return count >= 5 && count <= 15;
    127}
    128
    129static bool looks_like_rust(const char *str, size_t len)
    130{
    131	const char *end = str + len;
    132
    133	while (str < end)
    134		switch (*str) {
    135		case '$':
    136			if (!strncmp(str, "$C$", 3))
    137				str += 3;
    138			else if (!strncmp(str, "$SP$", 4)
    139					|| !strncmp(str, "$BP$", 4)
    140					|| !strncmp(str, "$RF$", 4)
    141					|| !strncmp(str, "$LT$", 4)
    142					|| !strncmp(str, "$GT$", 4)
    143					|| !strncmp(str, "$LP$", 4)
    144					|| !strncmp(str, "$RP$", 4))
    145				str += 4;
    146			else if (!strncmp(str, "$u20$", 5)
    147					|| !strncmp(str, "$u27$", 5)
    148					|| !strncmp(str, "$u5b$", 5)
    149					|| !strncmp(str, "$u5d$", 5)
    150					|| !strncmp(str, "$u7e$", 5))
    151				str += 5;
    152			else
    153				return false;
    154			break;
    155		case '.':
    156			/* Do not allow three or more consecutive dots */
    157			if (!strncmp(str, "...", 3))
    158				return false;
    159			/* Fall through */
    160		case 'a' ... 'z':
    161		case 'A' ... 'Z':
    162		case '0' ... '9':
    163		case '_':
    164		case ':':
    165			str++;
    166			break;
    167		default:
    168			return false;
    169		}
    170
    171	return true;
    172}
    173
    174/*
    175 * INPUT:
    176 *     sym: symbol for which rust_is_mangled(sym) returns true
    177 *
    178 * The input is demangled in-place because the mangled name is always longer
    179 * than the demangled one.
    180 */
    181void
    182rust_demangle_sym(char *sym)
    183{
    184	const char *in;
    185	char *out;
    186	const char *end;
    187
    188	if (!sym)
    189		return;
    190
    191	in = sym;
    192	out = sym;
    193	end = sym + strlen(sym) - (hash_prefix_len + hash_len);
    194
    195	while (in < end)
    196		switch (*in) {
    197		case '$':
    198			if (!(unescape(&in, &out, "$C$", ',')
    199					|| unescape(&in, &out, "$SP$", '@')
    200					|| unescape(&in, &out, "$BP$", '*')
    201					|| unescape(&in, &out, "$RF$", '&')
    202					|| unescape(&in, &out, "$LT$", '<')
    203					|| unescape(&in, &out, "$GT$", '>')
    204					|| unescape(&in, &out, "$LP$", '(')
    205					|| unescape(&in, &out, "$RP$", ')')
    206					|| unescape(&in, &out, "$u20$", ' ')
    207					|| unescape(&in, &out, "$u27$", '\'')
    208					|| unescape(&in, &out, "$u5b$", '[')
    209					|| unescape(&in, &out, "$u5d$", ']')
    210					|| unescape(&in, &out, "$u7e$", '~'))) {
    211				pr_err("demangle-rust: unexpected escape sequence");
    212				goto done;
    213			}
    214			break;
    215		case '_':
    216			/*
    217			 * If this is the start of a path component and the next
    218			 * character is an escape sequence, ignore the
    219			 * underscore. The mangler inserts an underscore to make
    220			 * sure the path component begins with a XID_Start
    221			 * character.
    222			 */
    223			if ((in == sym || in[-1] == ':') && in[1] == '$')
    224				in++;
    225			else
    226				*out++ = *in++;
    227			break;
    228		case '.':
    229			if (in[1] == '.') {
    230				/* ".." becomes "::" */
    231				*out++ = ':';
    232				*out++ = ':';
    233				in += 2;
    234			} else {
    235				/* "." becomes "-" */
    236				*out++ = '-';
    237				in++;
    238			}
    239			break;
    240		case 'a' ... 'z':
    241		case 'A' ... 'Z':
    242		case '0' ... '9':
    243		case ':':
    244			*out++ = *in++;
    245			break;
    246		default:
    247			pr_err("demangle-rust: unexpected character '%c' in symbol\n",
    248				*in);
    249			goto done;
    250		}
    251
    252done:
    253	*out = '\0';
    254}
    255
    256static bool unescape(const char **in, char **out, const char *seq, char value)
    257{
    258	size_t len = strlen(seq);
    259
    260	if (strncmp(*in, seq, len))
    261		return false;
    262
    263	**out = value;
    264
    265	*in += len;
    266	*out += 1;
    267
    268	return true;
    269}