unicode.c (10456B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * Some of the source code in this file came from fs/cifs/cifs_unicode.c 4 * 5 * Copyright (c) International Business Machines Corp., 2000,2009 6 * Modified by Steve French (sfrench@us.ibm.com) 7 * Modified by Namjae Jeon (linkinjeon@kernel.org) 8 */ 9#include <linux/fs.h> 10#include <linux/slab.h> 11#include <asm/unaligned.h> 12#include "glob.h" 13#include "unicode.h" 14#include "uniupr.h" 15#include "smb_common.h" 16 17/* 18 * smb_utf16_bytes() - how long will a string be after conversion? 19 * @from: pointer to input string 20 * @maxbytes: don't go past this many bytes of input string 21 * @codepage: destination codepage 22 * 23 * Walk a utf16le string and return the number of bytes that the string will 24 * be after being converted to the given charset, not including any null 25 * termination required. Don't walk past maxbytes in the source buffer. 26 * 27 * Return: string length after conversion 28 */ 29static int smb_utf16_bytes(const __le16 *from, int maxbytes, 30 const struct nls_table *codepage) 31{ 32 int i; 33 int charlen, outlen = 0; 34 int maxwords = maxbytes / 2; 35 char tmp[NLS_MAX_CHARSET_SIZE]; 36 __u16 ftmp; 37 38 for (i = 0; i < maxwords; i++) { 39 ftmp = get_unaligned_le16(&from[i]); 40 if (ftmp == 0) 41 break; 42 43 charlen = codepage->uni2char(ftmp, tmp, NLS_MAX_CHARSET_SIZE); 44 if (charlen > 0) 45 outlen += charlen; 46 else 47 outlen++; 48 } 49 50 return outlen; 51} 52 53/* 54 * cifs_mapchar() - convert a host-endian char to proper char in codepage 55 * @target: where converted character should be copied 56 * @src_char: 2 byte host-endian source character 57 * @cp: codepage to which character should be converted 58 * @mapchar: should character be mapped according to mapchars mount option? 59 * 60 * This function handles the conversion of a single character. It is the 61 * responsibility of the caller to ensure that the target buffer is large 62 * enough to hold the result of the conversion (at least NLS_MAX_CHARSET_SIZE). 63 * 64 * Return: string length after conversion 65 */ 66static int 67cifs_mapchar(char *target, const __u16 src_char, const struct nls_table *cp, 68 bool mapchar) 69{ 70 int len = 1; 71 72 if (!mapchar) 73 goto cp_convert; 74 75 /* 76 * BB: Cannot handle remapping UNI_SLASH until all the calls to 77 * build_path_from_dentry are modified, as they use slash as 78 * separator. 79 */ 80 switch (src_char) { 81 case UNI_COLON: 82 *target = ':'; 83 break; 84 case UNI_ASTERISK: 85 *target = '*'; 86 break; 87 case UNI_QUESTION: 88 *target = '?'; 89 break; 90 case UNI_PIPE: 91 *target = '|'; 92 break; 93 case UNI_GRTRTHAN: 94 *target = '>'; 95 break; 96 case UNI_LESSTHAN: 97 *target = '<'; 98 break; 99 default: 100 goto cp_convert; 101 } 102 103out: 104 return len; 105 106cp_convert: 107 len = cp->uni2char(src_char, target, NLS_MAX_CHARSET_SIZE); 108 if (len <= 0) { 109 *target = '?'; 110 len = 1; 111 } 112 113 goto out; 114} 115 116/* 117 * is_char_allowed() - check for valid character 118 * @ch: input character to be checked 119 * 120 * Return: 1 if char is allowed, otherwise 0 121 */ 122static inline int is_char_allowed(char *ch) 123{ 124 /* check for control chars, wildcards etc. */ 125 if (!(*ch & 0x80) && 126 (*ch <= 0x1f || 127 *ch == '?' || *ch == '"' || *ch == '<' || 128 *ch == '>' || *ch == '|')) 129 return 0; 130 131 return 1; 132} 133 134/* 135 * smb_from_utf16() - convert utf16le string to local charset 136 * @to: destination buffer 137 * @from: source buffer 138 * @tolen: destination buffer size (in bytes) 139 * @fromlen: source buffer size (in bytes) 140 * @codepage: codepage to which characters should be converted 141 * @mapchar: should characters be remapped according to the mapchars option? 142 * 143 * Convert a little-endian utf16le string (as sent by the server) to a string 144 * in the provided codepage. The tolen and fromlen parameters are to ensure 145 * that the code doesn't walk off of the end of the buffer (which is always 146 * a danger if the alignment of the source buffer is off). The destination 147 * string is always properly null terminated and fits in the destination 148 * buffer. Returns the length of the destination string in bytes (including 149 * null terminator). 150 * 151 * Note that some windows versions actually send multiword UTF-16 characters 152 * instead of straight UTF16-2. The linux nls routines however aren't able to 153 * deal with those characters properly. In the event that we get some of 154 * those characters, they won't be translated properly. 155 * 156 * Return: string length after conversion 157 */ 158static int smb_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 159 const struct nls_table *codepage, bool mapchar) 160{ 161 int i, charlen, safelen; 162 int outlen = 0; 163 int nullsize = nls_nullsize(codepage); 164 int fromwords = fromlen / 2; 165 char tmp[NLS_MAX_CHARSET_SIZE]; 166 __u16 ftmp; 167 168 /* 169 * because the chars can be of varying widths, we need to take care 170 * not to overflow the destination buffer when we get close to the 171 * end of it. Until we get to this offset, we don't need to check 172 * for overflow however. 173 */ 174 safelen = tolen - (NLS_MAX_CHARSET_SIZE + nullsize); 175 176 for (i = 0; i < fromwords; i++) { 177 ftmp = get_unaligned_le16(&from[i]); 178 if (ftmp == 0) 179 break; 180 181 /* 182 * check to see if converting this character might make the 183 * conversion bleed into the null terminator 184 */ 185 if (outlen >= safelen) { 186 charlen = cifs_mapchar(tmp, ftmp, codepage, mapchar); 187 if ((outlen + charlen) > (tolen - nullsize)) 188 break; 189 } 190 191 /* put converted char into 'to' buffer */ 192 charlen = cifs_mapchar(&to[outlen], ftmp, codepage, mapchar); 193 outlen += charlen; 194 } 195 196 /* properly null-terminate string */ 197 for (i = 0; i < nullsize; i++) 198 to[outlen++] = 0; 199 200 return outlen; 201} 202 203/* 204 * smb_strtoUTF16() - Convert character string to unicode string 205 * @to: destination buffer 206 * @from: source buffer 207 * @len: destination buffer size (in bytes) 208 * @codepage: codepage to which characters should be converted 209 * 210 * Return: string length after conversion 211 */ 212int smb_strtoUTF16(__le16 *to, const char *from, int len, 213 const struct nls_table *codepage) 214{ 215 int charlen; 216 int i; 217 wchar_t wchar_to; /* needed to quiet sparse */ 218 219 /* special case for utf8 to handle no plane0 chars */ 220 if (!strcmp(codepage->charset, "utf8")) { 221 /* 222 * convert utf8 -> utf16, we assume we have enough space 223 * as caller should have assumed conversion does not overflow 224 * in destination len is length in wchar_t units (16bits) 225 */ 226 i = utf8s_to_utf16s(from, len, UTF16_LITTLE_ENDIAN, 227 (wchar_t *)to, len); 228 229 /* if success terminate and exit */ 230 if (i >= 0) 231 goto success; 232 /* 233 * if fails fall back to UCS encoding as this 234 * function should not return negative values 235 * currently can fail only if source contains 236 * invalid encoded characters 237 */ 238 } 239 240 for (i = 0; len > 0 && *from; i++, from += charlen, len -= charlen) { 241 charlen = codepage->char2uni(from, len, &wchar_to); 242 if (charlen < 1) { 243 /* A question mark */ 244 wchar_to = 0x003f; 245 charlen = 1; 246 } 247 put_unaligned_le16(wchar_to, &to[i]); 248 } 249 250success: 251 put_unaligned_le16(0, &to[i]); 252 return i; 253} 254 255/* 256 * smb_strndup_from_utf16() - copy a string from wire format to the local 257 * codepage 258 * @src: source string 259 * @maxlen: don't walk past this many bytes in the source string 260 * @is_unicode: is this a unicode string? 261 * @codepage: destination codepage 262 * 263 * Take a string given by the server, convert it to the local codepage and 264 * put it in a new buffer. Returns a pointer to the new string or NULL on 265 * error. 266 * 267 * Return: destination string buffer or error ptr 268 */ 269char *smb_strndup_from_utf16(const char *src, const int maxlen, 270 const bool is_unicode, 271 const struct nls_table *codepage) 272{ 273 int len, ret; 274 char *dst; 275 276 if (is_unicode) { 277 len = smb_utf16_bytes((__le16 *)src, maxlen, codepage); 278 len += nls_nullsize(codepage); 279 dst = kmalloc(len, GFP_KERNEL); 280 if (!dst) 281 return ERR_PTR(-ENOMEM); 282 ret = smb_from_utf16(dst, (__le16 *)src, len, maxlen, codepage, 283 false); 284 if (ret < 0) { 285 kfree(dst); 286 return ERR_PTR(-EINVAL); 287 } 288 } else { 289 len = strnlen(src, maxlen); 290 len++; 291 dst = kmalloc(len, GFP_KERNEL); 292 if (!dst) 293 return ERR_PTR(-ENOMEM); 294 strscpy(dst, src, len); 295 } 296 297 return dst; 298} 299 300/* 301 * Convert 16 bit Unicode pathname to wire format from string in current code 302 * page. Conversion may involve remapping up the six characters that are 303 * only legal in POSIX-like OS (if they are present in the string). Path 304 * names are little endian 16 bit Unicode on the wire 305 */ 306/* 307 * smbConvertToUTF16() - convert string from local charset to utf16 308 * @target: destination buffer 309 * @source: source buffer 310 * @srclen: source buffer size (in bytes) 311 * @cp: codepage to which characters should be converted 312 * @mapchar: should characters be remapped according to the mapchars option? 313 * 314 * Convert 16 bit Unicode pathname to wire format from string in current code 315 * page. Conversion may involve remapping up the six characters that are 316 * only legal in POSIX-like OS (if they are present in the string). Path 317 * names are little endian 16 bit Unicode on the wire 318 * 319 * Return: char length after conversion 320 */ 321int smbConvertToUTF16(__le16 *target, const char *source, int srclen, 322 const struct nls_table *cp, int mapchars) 323{ 324 int i, j, charlen; 325 char src_char; 326 __le16 dst_char; 327 wchar_t tmp; 328 329 if (!mapchars) 330 return smb_strtoUTF16(target, source, srclen, cp); 331 332 for (i = 0, j = 0; i < srclen; j++) { 333 src_char = source[i]; 334 charlen = 1; 335 switch (src_char) { 336 case 0: 337 put_unaligned(0, &target[j]); 338 return j; 339 case ':': 340 dst_char = cpu_to_le16(UNI_COLON); 341 break; 342 case '*': 343 dst_char = cpu_to_le16(UNI_ASTERISK); 344 break; 345 case '?': 346 dst_char = cpu_to_le16(UNI_QUESTION); 347 break; 348 case '<': 349 dst_char = cpu_to_le16(UNI_LESSTHAN); 350 break; 351 case '>': 352 dst_char = cpu_to_le16(UNI_GRTRTHAN); 353 break; 354 case '|': 355 dst_char = cpu_to_le16(UNI_PIPE); 356 break; 357 /* 358 * FIXME: We can not handle remapping backslash (UNI_SLASH) 359 * until all the calls to build_path_from_dentry are modified, 360 * as they use backslash as separator. 361 */ 362 default: 363 charlen = cp->char2uni(source + i, srclen - i, &tmp); 364 dst_char = cpu_to_le16(tmp); 365 366 /* 367 * if no match, use question mark, which at least in 368 * some cases serves as wild card 369 */ 370 if (charlen < 1) { 371 dst_char = cpu_to_le16(0x003f); 372 charlen = 1; 373 } 374 } 375 /* 376 * character may take more than one byte in the source string, 377 * but will take exactly two bytes in the target string 378 */ 379 i += charlen; 380 put_unaligned(dst_char, &target[j]); 381 } 382 383 return j; 384}