unicode.c - cachepc-linux - Fork of AMDESE/linux with modifications for CachePC side-channel attack

	cachepc-linux Fork of AMDESE/linux with modifications for CachePC side-channel attack
	git clone https://git.sinitax.com/sinitax/cachepc-linux
	Log \| Files \| Refs \| README \| LICENSE \| sfeed.txt
unicode.c (10456B)
      1// SPDX-License-Identifier: GPL-2.0-or-later
      2/*
      3 *   Some of the source code in this file came from fs/cifs/cifs_unicode.c
      4 *
      5 *   Copyright (c) International Business Machines  Corp., 2000,2009
      6 *   Modified by Steve French (sfrench@us.ibm.com)
      7 *   Modified by Namjae Jeon (linkinjeon@kernel.org)
      8 */
      9#include <linux/fs.h>
     10#include <linux/slab.h>
     11#include <asm/unaligned.h>
     12#include "glob.h"
     13#include "unicode.h"
     14#include "uniupr.h"
     15#include "smb_common.h"
     16
     17/*
     18 * smb_utf16_bytes() - how long will a string be after conversion?
     19 * @from:	pointer to input string
     20 * @maxbytes:	don't go past this many bytes of input string
     21 * @codepage:	destination codepage
     22 *
     23 * Walk a utf16le string and return the number of bytes that the string will
     24 * be after being converted to the given charset, not including any null
     25 * termination required. Don't walk past maxbytes in the source buffer.
     26 *
     27 * Return:	string length after conversion
     28 */
     29static int smb_utf16_bytes(const __le16 *from, int maxbytes,
     30			   const struct nls_table *codepage)
     31{
     32	int i;
     33	int charlen, outlen = 0;
     34	int maxwords = maxbytes / 2;
     35	char tmp[NLS_MAX_CHARSET_SIZE];
     36	__u16 ftmp;
     37
     38	for (i = 0; i < maxwords; i++) {
     39		ftmp = get_unaligned_le16(&from[i]);
     40		if (ftmp == 0)
     41			break;
     42
     43		charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE);
     44		if (charlen > 0)
     45			outlen += charlen;
     46		else
     47			outlen++;
     48	}
     49
     50	return outlen;
     51}
     52
     53/*
     54 * cifs_mapchar() - convert a host-endian char to proper char in codepage
     55 * @target:	where converted character should be copied
     56 * @src_char:	2 byte host-endian source character
     57 * @cp:		codepage to which character should be converted
     58 * @mapchar:	should character be mapped according to mapchars mount option?
     59 *
     60 * This function handles the conversion of a single character. It is the
     61 * responsibility of the caller to ensure that the target buffer is large
     62 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE).
     63 *
     64 * Return:	string length after conversion
     65 */
     66static int
     67cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp,
     68	     bool mapchar)
     69{
     70	int len = 1;
     71
     72	if (!mapchar)
     73		goto cp_convert;
     74
     75	/*
     76	 * BB: Cannot handle remapping UNI_SLASH until all the calls to
     77	 *     build_path_from_dentry are modified, as they use slash as
     78	 *     separator.
     79	 */
     80	switch (src_char) {
     81	case UNI_COLON:
     82		*target = ':';
     83		break;
     84	case UNI_ASTERISK:
     85		*target = '*';
     86		break;
     87	case UNI_QUESTION:
     88		*target = '?';
     89		break;
     90	case UNI_PIPE:
     91		*target = '|';
     92		break;
     93	case UNI_GRTRTHAN:
     94		*target = '>';
     95		break;
     96	case UNI_LESSTHAN:
     97		*target = '<';
     98		break;
     99	default:
    100		goto cp_convert;
    101	}
    102
    103out:
    104	return len;
    105
    106cp_convert:
    107	len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE);
    108	if (len <= 0) {
    109		*target = '?';
    110		len = 1;
    111	}
    112
    113	goto out;
    114}
    115
    116/*
    117 * is_char_allowed() - check for valid character
    118 * @ch:		input character to be checked
    119 *
    120 * Return:	1 if char is allowed, otherwise 0
    121 */
    122static inline int is_char_allowed(char *ch)
    123{
    124	/* check for control chars, wildcards etc. */
    125	if (!(*ch & 0x80) &&
    126	    (*ch <= 0x1f ||
    127	     *ch == '?' || *ch == '"' || *ch == '<' ||
    128	     *ch == '>' || *ch == '|'))
    129		return 0;
    130
    131	return 1;
    132}
    133
    134/*
    135 * smb_from_utf16() - convert utf16le string to local charset
    136 * @to:		destination buffer
    137 * @from:	source buffer
    138 * @tolen:	destination buffer size (in bytes)
    139 * @fromlen:	source buffer size (in bytes)
    140 * @codepage:	codepage to which characters should be converted
    141 * @mapchar:	should characters be remapped according to the mapchars option?
    142 *
    143 * Convert a little-endian utf16le string (as sent by the server) to a string
    144 * in the provided codepage. The tolen and fromlen parameters are to ensure
    145 * that the code doesn't walk off of the end of the buffer (which is always
    146 * a danger if the alignment of the source buffer is off). The destination
    147 * string is always properly null terminated and fits in the destination
    148 * buffer. Returns the length of the destination string in bytes (including
    149 * null terminator).
    150 *
    151 * Note that some windows versions actually send multiword UTF-16 characters
    152 * instead of straight UTF16-2. The linux nls routines however aren't able to
    153 * deal with those characters properly. In the event that we get some of
    154 * those characters, they won't be translated properly.
    155 *
    156 * Return:	string length after conversion
    157 */
    158static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen,
    159			  const struct nls_table *codepage, bool mapchar)
    160{
    161	int i, charlen, safelen;
    162	int outlen = 0;
    163	int nullsize = nls_nullsize(codepage);
    164	int fromwords = fromlen / 2;
    165	char tmp[NLS_MAX_CHARSET_SIZE];
    166	__u16 ftmp;
    167
    168	/*
    169	 * because the chars can be of varying widths, we need to take care
    170	 * not to overflow the destination buffer when we get close to the
    171	 * end of it. Until we get to this offset, we don't need to check
    172	 * for overflow however.
    173	 */
    174	safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize);
    175
    176	for (i = 0; i < fromwords; i++) {
    177		ftmp = get_unaligned_le16(&from[i]);
    178		if (ftmp == 0)
    179			break;
    180
    181		/*
    182		 * check to see if converting this character might make the
    183		 * conversion bleed into the null terminator
    184		 */
    185		if (outlen >= safelen) {
    186			charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar);
    187			if ((outlen + charlen) > (tolen - nullsize))
    188				break;
    189		}
    190
    191		/* put converted char into 'to' buffer */
    192		charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar);
    193		outlen += charlen;
    194	}
    195
    196	/* properly null-terminate string */
    197	for (i = 0; i < nullsize; i++)
    198		to[outlen++] = 0;
    199
    200	return outlen;
    201}
    202
    203/*
    204 * smb_strtoUTF16() - Convert character string to unicode string
    205 * @to:		destination buffer
    206 * @from:	source buffer
    207 * @len:	destination buffer size (in bytes)
    208 * @codepage:	codepage to which characters should be converted
    209 *
    210 * Return:	string length after conversion
    211 */
    212int smb_strtoUTF16(__le16 *to, const char *from, int len,
    213		   const struct nls_table *codepage)
    214{
    215	int charlen;
    216	int i;
    217	wchar_t wchar_to; /* needed to quiet sparse */
    218
    219	/* special case for utf8 to handle no plane0 chars */
    220	if (!strcmp(codepage->charset, "utf8")) {
    221		/*
    222		 * convert utf8 -> utf16, we assume we have enough space
    223		 * as caller should have assumed conversion does not overflow
    224		 * in destination len is length in wchar_t units (16bits)
    225		 */
    226		i  = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN,
    227				     (wchar_t *)to, len);
    228
    229		/* if success terminate and exit */
    230		if (i >= 0)
    231			goto success;
    232		/*
    233		 * if fails fall back to UCS encoding as this
    234		 * function should not return negative values
    235		 * currently can fail only if source contains
    236		 * invalid encoded characters
    237		 */
    238	}
    239
    240	for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) {
    241		charlen = codepage->char2uni(from, len, &wchar_to);
    242		if (charlen < 1) {
    243			/* A question mark */
    244			wchar_to = 0x003f;
    245			charlen = 1;
    246		}
    247		put_unaligned_le16(wchar_to, &to[i]);
    248	}
    249
    250success:
    251	put_unaligned_le16(0, &to[i]);
    252	return i;
    253}
    254
    255/*
    256 * smb_strndup_from_utf16() - copy a string from wire format to the local
    257 *		codepage
    258 * @src:	source string
    259 * @maxlen:	don't walk past this many bytes in the source string
    260 * @is_unicode:	is this a unicode string?
    261 * @codepage:	destination codepage
    262 *
    263 * Take a string given by the server, convert it to the local codepage and
    264 * put it in a new buffer. Returns a pointer to the new string or NULL on
    265 * error.
    266 *
    267 * Return:	destination string buffer or error ptr
    268 */
    269char *smb_strndup_from_utf16(const char *src, const int maxlen,
    270			     const bool is_unicode,
    271			     const struct nls_table *codepage)
    272{
    273	int len, ret;
    274	char *dst;
    275
    276	if (is_unicode) {
    277		len = smb_utf16_bytes((__le16 *)src, maxlen, codepage);
    278		len += nls_nullsize(codepage);
    279		dst = kmalloc(len, GFP_KERNEL);
    280		if (!dst)
    281			return ERR_PTR(-ENOMEM);
    282		ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage,
    283				     false);
    284		if (ret < 0) {
    285			kfree(dst);
    286			return ERR_PTR(-EINVAL);
    287		}
    288	} else {
    289		len = strnlen(src, maxlen);
    290		len++;
    291		dst = kmalloc(len, GFP_KERNEL);
    292		if (!dst)
    293			return ERR_PTR(-ENOMEM);
    294		strscpy(dst, src, len);
    295	}
    296
    297	return dst;
    298}
    299
    300/*
    301 * Convert 16 bit Unicode pathname to wire format from string in current code
    302 * page. Conversion may involve remapping up the six characters that are
    303 * only legal in POSIX-like OS (if they are present in the string). Path
    304 * names are little endian 16 bit Unicode on the wire
    305 */
    306/*
    307 * smbConvertToUTF16() - convert string from local charset to utf16
    308 * @target:	destination buffer
    309 * @source:	source buffer
    310 * @srclen:	source buffer size (in bytes)
    311 * @cp:		codepage to which characters should be converted
    312 * @mapchar:	should characters be remapped according to the mapchars option?
    313 *
    314 * Convert 16 bit Unicode pathname to wire format from string in current code
    315 * page. Conversion may involve remapping up the six characters that are
    316 * only legal in POSIX-like OS (if they are present in the string). Path
    317 * names are little endian 16 bit Unicode on the wire
    318 *
    319 * Return:	char length after conversion
    320 */
    321int smbConvertToUTF16(__le16 *target, const char *source, int srclen,
    322		      const struct nls_table *cp, int mapchars)
    323{
    324	int i, j, charlen;
    325	char src_char;
    326	__le16 dst_char;
    327	wchar_t tmp;
    328
    329	if (!mapchars)
    330		return smb_strtoUTF16(target, source, srclen, cp);
    331
    332	for (i = 0, j = 0; i < srclen; j++) {
    333		src_char = source[i];
    334		charlen = 1;
    335		switch (src_char) {
    336		case 0:
    337			put_unaligned(0, &target[j]);
    338			return j;
    339		case ':':
    340			dst_char = cpu_to_le16(UNI_COLON);
    341			break;
    342		case '*':
    343			dst_char = cpu_to_le16(UNI_ASTERISK);
    344			break;
    345		case '?':
    346			dst_char = cpu_to_le16(UNI_QUESTION);
    347			break;
    348		case '<':
    349			dst_char = cpu_to_le16(UNI_LESSTHAN);
    350			break;
    351		case '>':
    352			dst_char = cpu_to_le16(UNI_GRTRTHAN);
    353			break;
    354		case '|':
    355			dst_char = cpu_to_le16(UNI_PIPE);
    356			break;
    357		/*
    358		 * FIXME: We can not handle remapping backslash (UNI_SLASH)
    359		 * until all the calls to build_path_from_dentry are modified,
    360		 * as they use backslash as separator.
    361		 */
    362		default:
    363			charlen = cp->char2uni(source + i, srclen - i, &tmp);
    364			dst_char = cpu_to_le16(tmp);
    365
    366			/*
    367			 * if no match, use question mark, which at least in
    368			 * some cases serves as wild card
    369			 */
    370			if (charlen < 1) {
    371				dst_char = cpu_to_le16(0x003f);
    372				charlen = 1;
    373			}
    374		}
    375		/*
    376		 * character may take more than one byte in the source string,
    377		 * but will take exactly two bytes in the target string
    378		 */
    379		i += charlen;
    380		put_unaligned(dst_char, &target[j]);
    381	}
    382
    383	return j;
    384}