cachepc-linux

Fork of AMDESE/linux with modifications for CachePC side-channel attack
git clone https://git.sinitax.com/sinitax/cachepc-linux
Log | Files | Refs | README | LICENSE | sfeed.txt

unicode.c (9406B)


      1/*
      2 * unicode.c
      3 *
      4 * PURPOSE
      5 *	Routines for converting between UTF-8 and OSTA Compressed Unicode.
      6 *      Also handles filename mangling
      7 *
      8 * DESCRIPTION
      9 *	OSTA Compressed Unicode is explained in the OSTA UDF specification.
     10 *		http://www.osta.org/
     11 *	UTF-8 is explained in the IETF RFC XXXX.
     12 *		ftp://ftp.internic.net/rfc/rfcxxxx.txt
     13 *
     14 * COPYRIGHT
     15 *	This file is distributed under the terms of the GNU General Public
     16 *	License (GPL). Copies of the GPL can be obtained from:
     17 *		ftp://prep.ai.mit.edu/pub/gnu/GPL
     18 *	Each contributing author retains all rights to their own work.
     19 */
     20
     21#include "udfdecl.h"
     22
     23#include <linux/kernel.h>
     24#include <linux/string.h>	/* for memset */
     25#include <linux/nls.h>
     26#include <linux/crc-itu-t.h>
     27#include <linux/slab.h>
     28
     29#include "udf_sb.h"
     30
     31#define PLANE_SIZE 0x10000
     32#define UNICODE_MAX 0x10ffff
     33#define SURROGATE_MASK 0xfffff800
     34#define SURROGATE_PAIR 0x0000d800
     35#define SURROGATE_LOW  0x00000400
     36#define SURROGATE_CHAR_BITS 10
     37#define SURROGATE_CHAR_MASK ((1 << SURROGATE_CHAR_BITS) - 1)
     38
     39#define ILLEGAL_CHAR_MARK	'_'
     40#define EXT_MARK		'.'
     41#define CRC_MARK		'#'
     42#define EXT_SIZE		5
     43/* Number of chars we need to store generated CRC to make filename unique */
     44#define CRC_LEN			5
     45
     46static unicode_t get_utf16_char(const uint8_t *str_i, int str_i_max_len,
     47				int str_i_idx, int u_ch, unicode_t *ret)
     48{
     49	unicode_t c;
     50	int start_idx = str_i_idx;
     51
     52	/* Expand OSTA compressed Unicode to Unicode */
     53	c = str_i[str_i_idx++];
     54	if (u_ch > 1)
     55		c = (c << 8) | str_i[str_i_idx++];
     56	if ((c & SURROGATE_MASK) == SURROGATE_PAIR) {
     57		unicode_t next;
     58
     59		/* Trailing surrogate char */
     60		if (str_i_idx >= str_i_max_len) {
     61			c = UNICODE_MAX + 1;
     62			goto out;
     63		}
     64
     65		/* Low surrogate must follow the high one... */
     66		if (c & SURROGATE_LOW) {
     67			c = UNICODE_MAX + 1;
     68			goto out;
     69		}
     70
     71		WARN_ON_ONCE(u_ch != 2);
     72		next = str_i[str_i_idx++] << 8;
     73		next |= str_i[str_i_idx++];
     74		if ((next & SURROGATE_MASK) != SURROGATE_PAIR ||
     75		    !(next & SURROGATE_LOW)) {
     76			c = UNICODE_MAX + 1;
     77			goto out;
     78		}
     79
     80		c = PLANE_SIZE +
     81		    ((c & SURROGATE_CHAR_MASK) << SURROGATE_CHAR_BITS) +
     82		    (next & SURROGATE_CHAR_MASK);
     83	}
     84out:
     85	*ret = c;
     86	return str_i_idx - start_idx;
     87}
     88
     89
     90static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
     91			      int *str_o_idx,
     92			      const uint8_t *str_i, int str_i_max_len,
     93			      int *str_i_idx,
     94			      int u_ch, int *needsCRC,
     95			      int (*conv_f)(wchar_t, unsigned char *, int),
     96			      int translate)
     97{
     98	unicode_t c;
     99	int illChar = 0;
    100	int len, gotch = 0;
    101
    102	while (!gotch && *str_i_idx < str_i_max_len) {
    103		if (*str_o_idx >= str_o_max_len) {
    104			*needsCRC = 1;
    105			return gotch;
    106		}
    107
    108		len = get_utf16_char(str_i, str_i_max_len, *str_i_idx, u_ch,
    109				     &c);
    110		/* These chars cannot be converted. Replace them. */
    111		if (c == 0 || c > UNICODE_MAX || (conv_f && c > MAX_WCHAR_T) ||
    112		    (translate && c == '/')) {
    113			illChar = 1;
    114			if (!translate)
    115				gotch = 1;
    116		} else if (illChar)
    117			break;
    118		else
    119			gotch = 1;
    120		*str_i_idx += len;
    121	}
    122	if (illChar) {
    123		*needsCRC = 1;
    124		c = ILLEGAL_CHAR_MARK;
    125		gotch = 1;
    126	}
    127	if (gotch) {
    128		if (conv_f) {
    129			len = conv_f(c, &str_o[*str_o_idx],
    130				     str_o_max_len - *str_o_idx);
    131		} else {
    132			len = utf32_to_utf8(c, &str_o[*str_o_idx],
    133					    str_o_max_len - *str_o_idx);
    134			if (len < 0)
    135				len = -ENAMETOOLONG;
    136		}
    137		/* Valid character? */
    138		if (len >= 0)
    139			*str_o_idx += len;
    140		else if (len == -ENAMETOOLONG) {
    141			*needsCRC = 1;
    142			gotch = 0;
    143		} else {
    144			str_o[(*str_o_idx)++] = ILLEGAL_CHAR_MARK;
    145			*needsCRC = 1;
    146		}
    147	}
    148	return gotch;
    149}
    150
    151static int udf_name_from_CS0(struct super_block *sb,
    152			     uint8_t *str_o, int str_max_len,
    153			     const uint8_t *ocu, int ocu_len,
    154			     int translate)
    155{
    156	uint32_t c;
    157	uint8_t cmp_id;
    158	int idx, len;
    159	int u_ch;
    160	int needsCRC = 0;
    161	int ext_i_len, ext_max_len;
    162	int str_o_len = 0;	/* Length of resulting output */
    163	int ext_o_len = 0;	/* Extension output length */
    164	int ext_crc_len = 0;	/* Extension output length if used with CRC */
    165	int i_ext = -1;		/* Extension position in input buffer */
    166	int o_crc = 0;		/* Rightmost possible output pos for CRC+ext */
    167	unsigned short valueCRC;
    168	uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
    169	uint8_t crc[CRC_LEN];
    170	int (*conv_f)(wchar_t, unsigned char *, int);
    171
    172	if (str_max_len <= 0)
    173		return 0;
    174
    175	if (ocu_len == 0) {
    176		memset(str_o, 0, str_max_len);
    177		return 0;
    178	}
    179
    180	if (UDF_SB(sb)->s_nls_map)
    181		conv_f = UDF_SB(sb)->s_nls_map->uni2char;
    182	else
    183		conv_f = NULL;
    184
    185	cmp_id = ocu[0];
    186	if (cmp_id != 8 && cmp_id != 16) {
    187		memset(str_o, 0, str_max_len);
    188		pr_err("unknown compression code (%u)\n", cmp_id);
    189		return -EINVAL;
    190	}
    191	u_ch = cmp_id >> 3;
    192
    193	ocu++;
    194	ocu_len--;
    195
    196	if (ocu_len % u_ch) {
    197		pr_err("incorrect filename length (%d)\n", ocu_len + 1);
    198		return -EINVAL;
    199	}
    200
    201	if (translate) {
    202		/* Look for extension */
    203		for (idx = ocu_len - u_ch, ext_i_len = 0;
    204		     (idx >= 0) && (ext_i_len < EXT_SIZE);
    205		     idx -= u_ch, ext_i_len++) {
    206			c = ocu[idx];
    207			if (u_ch > 1)
    208				c = (c << 8) | ocu[idx + 1];
    209
    210			if (c == EXT_MARK) {
    211				if (ext_i_len)
    212					i_ext = idx;
    213				break;
    214			}
    215		}
    216		if (i_ext >= 0) {
    217			/* Convert extension */
    218			ext_max_len = min_t(int, sizeof(ext), str_max_len);
    219			ext[ext_o_len++] = EXT_MARK;
    220			idx = i_ext + u_ch;
    221			while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
    222						  ocu, ocu_len, &idx,
    223						  u_ch, &needsCRC,
    224						  conv_f, translate)) {
    225				if ((ext_o_len + CRC_LEN) < str_max_len)
    226					ext_crc_len = ext_o_len;
    227			}
    228		}
    229	}
    230
    231	idx = 0;
    232	while (1) {
    233		if (translate && (idx == i_ext)) {
    234			if (str_o_len > (str_max_len - ext_o_len))
    235				needsCRC = 1;
    236			break;
    237		}
    238
    239		if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
    240					ocu, ocu_len, &idx,
    241					u_ch, &needsCRC, conv_f, translate))
    242			break;
    243
    244		if (translate &&
    245		    (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
    246			o_crc = str_o_len;
    247	}
    248
    249	if (translate) {
    250		if (str_o_len <= 2 && str_o[0] == '.' &&
    251		    (str_o_len == 1 || str_o[1] == '.'))
    252			needsCRC = 1;
    253		if (needsCRC) {
    254			str_o_len = o_crc;
    255			valueCRC = crc_itu_t(0, ocu, ocu_len);
    256			crc[0] = CRC_MARK;
    257			crc[1] = hex_asc_upper_hi(valueCRC >> 8);
    258			crc[2] = hex_asc_upper_lo(valueCRC >> 8);
    259			crc[3] = hex_asc_upper_hi(valueCRC);
    260			crc[4] = hex_asc_upper_lo(valueCRC);
    261			len = min_t(int, CRC_LEN, str_max_len - str_o_len);
    262			memcpy(&str_o[str_o_len], crc, len);
    263			str_o_len += len;
    264			ext_o_len = ext_crc_len;
    265		}
    266		if (ext_o_len > 0) {
    267			memcpy(&str_o[str_o_len], ext, ext_o_len);
    268			str_o_len += ext_o_len;
    269		}
    270	}
    271
    272	return str_o_len;
    273}
    274
    275static int udf_name_to_CS0(struct super_block *sb,
    276			   uint8_t *ocu, int ocu_max_len,
    277			   const uint8_t *str_i, int str_len)
    278{
    279	int i, len;
    280	unsigned int max_val;
    281	int u_len, u_ch;
    282	unicode_t uni_char;
    283	int (*conv_f)(const unsigned char *, int, wchar_t *);
    284
    285	if (ocu_max_len <= 0)
    286		return 0;
    287
    288	if (UDF_SB(sb)->s_nls_map)
    289		conv_f = UDF_SB(sb)->s_nls_map->char2uni;
    290	else
    291		conv_f = NULL;
    292
    293	memset(ocu, 0, ocu_max_len);
    294	ocu[0] = 8;
    295	max_val = 0xff;
    296	u_ch = 1;
    297
    298try_again:
    299	u_len = 1;
    300	for (i = 0; i < str_len; i += len) {
    301		/* Name didn't fit? */
    302		if (u_len + u_ch > ocu_max_len)
    303			return 0;
    304		if (conv_f) {
    305			wchar_t wchar;
    306
    307			len = conv_f(&str_i[i], str_len - i, &wchar);
    308			if (len > 0)
    309				uni_char = wchar;
    310		} else {
    311			len = utf8_to_utf32(&str_i[i], str_len - i,
    312					    &uni_char);
    313		}
    314		/* Invalid character, deal with it */
    315		if (len <= 0 || uni_char > UNICODE_MAX) {
    316			len = 1;
    317			uni_char = '?';
    318		}
    319
    320		if (uni_char > max_val) {
    321			unicode_t c;
    322
    323			if (max_val == 0xff) {
    324				max_val = 0xffff;
    325				ocu[0] = 0x10;
    326				u_ch = 2;
    327				goto try_again;
    328			}
    329			/*
    330			 * Use UTF-16 encoding for chars outside we
    331			 * cannot encode directly.
    332			 */
    333			if (u_len + 2 * u_ch > ocu_max_len)
    334				return 0;
    335
    336			uni_char -= PLANE_SIZE;
    337			c = SURROGATE_PAIR |
    338			    ((uni_char >> SURROGATE_CHAR_BITS) &
    339			     SURROGATE_CHAR_MASK);
    340			ocu[u_len++] = (uint8_t)(c >> 8);
    341			ocu[u_len++] = (uint8_t)(c & 0xff);
    342			uni_char = SURROGATE_PAIR | SURROGATE_LOW |
    343					(uni_char & SURROGATE_CHAR_MASK);
    344		}
    345
    346		if (max_val == 0xffff)
    347			ocu[u_len++] = (uint8_t)(uni_char >> 8);
    348		ocu[u_len++] = (uint8_t)(uni_char & 0xff);
    349	}
    350
    351	return u_len;
    352}
    353
    354/*
    355 * Convert CS0 dstring to output charset. Warning: This function may truncate
    356 * input string if it is too long as it is used for informational strings only
    357 * and it is better to truncate the string than to refuse mounting a media.
    358 */
    359int udf_dstrCS0toChar(struct super_block *sb, uint8_t *utf_o, int o_len,
    360		      const uint8_t *ocu_i, int i_len)
    361{
    362	int s_len = 0;
    363
    364	if (i_len > 0) {
    365		s_len = ocu_i[i_len - 1];
    366		if (s_len >= i_len) {
    367			pr_warn("incorrect dstring lengths (%d/%d),"
    368				" truncating\n", s_len, i_len);
    369			s_len = i_len - 1;
    370			/* 2-byte encoding? Need to round properly... */
    371			if (ocu_i[0] == 16)
    372				s_len -= (s_len - 1) & 2;
    373		}
    374	}
    375
    376	return udf_name_from_CS0(sb, utf_o, o_len, ocu_i, s_len, 0);
    377}
    378
    379int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
    380		     uint8_t *dname, int dlen)
    381{
    382	int ret;
    383
    384	if (!slen)
    385		return -EIO;
    386
    387	if (dlen <= 0)
    388		return 0;
    389
    390	ret = udf_name_from_CS0(sb, dname, dlen, sname, slen, 1);
    391	/* Zero length filename isn't valid... */
    392	if (ret == 0)
    393		ret = -EINVAL;
    394	return ret;
    395}
    396
    397int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
    398		     uint8_t *dname, int dlen)
    399{
    400	return udf_name_to_CS0(sb, dname, dlen, sname, slen);
    401}
    402