unistr.c (11671B)
1// SPDX-License-Identifier: GPL-2.0-or-later 2/* 3 * unistr.c - NTFS Unicode string handling. Part of the Linux-NTFS project. 4 * 5 * Copyright (c) 2001-2006 Anton Altaparmakov 6 */ 7 8#include <linux/slab.h> 9 10#include "types.h" 11#include "debug.h" 12#include "ntfs.h" 13 14/* 15 * IMPORTANT 16 * ========= 17 * 18 * All these routines assume that the Unicode characters are in little endian 19 * encoding inside the strings!!! 20 */ 21 22/* 23 * This is used by the name collation functions to quickly determine what 24 * characters are (in)valid. 25 */ 26static const u8 legal_ansi_char_array[0x40] = { 27 0x00, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 28 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 29 30 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 31 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 32 33 0x17, 0x07, 0x18, 0x17, 0x17, 0x17, 0x17, 0x17, 34 0x17, 0x17, 0x18, 0x16, 0x16, 0x17, 0x07, 0x00, 35 36 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 0x17, 37 0x17, 0x17, 0x04, 0x16, 0x18, 0x16, 0x18, 0x18, 38}; 39 40/** 41 * ntfs_are_names_equal - compare two Unicode names for equality 42 * @s1: name to compare to @s2 43 * @s1_len: length in Unicode characters of @s1 44 * @s2: name to compare to @s1 45 * @s2_len: length in Unicode characters of @s2 46 * @ic: ignore case bool 47 * @upcase: upcase table (only if @ic == IGNORE_CASE) 48 * @upcase_size: length in Unicode characters of @upcase (if present) 49 * 50 * Compare the names @s1 and @s2 and return 'true' (1) if the names are 51 * identical, or 'false' (0) if they are not identical. If @ic is IGNORE_CASE, 52 * the @upcase table is used to performa a case insensitive comparison. 53 */ 54bool ntfs_are_names_equal(const ntfschar *s1, size_t s1_len, 55 const ntfschar *s2, size_t s2_len, const IGNORE_CASE_BOOL ic, 56 const ntfschar *upcase, const u32 upcase_size) 57{ 58 if (s1_len != s2_len) 59 return false; 60 if (ic == CASE_SENSITIVE) 61 return !ntfs_ucsncmp(s1, s2, s1_len); 62 return !ntfs_ucsncasecmp(s1, s2, s1_len, upcase, upcase_size); 63} 64 65/** 66 * ntfs_collate_names - collate two Unicode names 67 * @name1: first Unicode name to compare 68 * @name2: second Unicode name to compare 69 * @err_val: if @name1 contains an invalid character return this value 70 * @ic: either CASE_SENSITIVE or IGNORE_CASE 71 * @upcase: upcase table (ignored if @ic is CASE_SENSITIVE) 72 * @upcase_len: upcase table size (ignored if @ic is CASE_SENSITIVE) 73 * 74 * ntfs_collate_names collates two Unicode names and returns: 75 * 76 * -1 if the first name collates before the second one, 77 * 0 if the names match, 78 * 1 if the second name collates before the first one, or 79 * @err_val if an invalid character is found in @name1 during the comparison. 80 * 81 * The following characters are considered invalid: '"', '*', '<', '>' and '?'. 82 */ 83int ntfs_collate_names(const ntfschar *name1, const u32 name1_len, 84 const ntfschar *name2, const u32 name2_len, 85 const int err_val, const IGNORE_CASE_BOOL ic, 86 const ntfschar *upcase, const u32 upcase_len) 87{ 88 u32 cnt, min_len; 89 u16 c1, c2; 90 91 min_len = name1_len; 92 if (name1_len > name2_len) 93 min_len = name2_len; 94 for (cnt = 0; cnt < min_len; ++cnt) { 95 c1 = le16_to_cpu(*name1++); 96 c2 = le16_to_cpu(*name2++); 97 if (ic) { 98 if (c1 < upcase_len) 99 c1 = le16_to_cpu(upcase[c1]); 100 if (c2 < upcase_len) 101 c2 = le16_to_cpu(upcase[c2]); 102 } 103 if (c1 < 64 && legal_ansi_char_array[c1] & 8) 104 return err_val; 105 if (c1 < c2) 106 return -1; 107 if (c1 > c2) 108 return 1; 109 } 110 if (name1_len < name2_len) 111 return -1; 112 if (name1_len == name2_len) 113 return 0; 114 /* name1_len > name2_len */ 115 c1 = le16_to_cpu(*name1); 116 if (c1 < 64 && legal_ansi_char_array[c1] & 8) 117 return err_val; 118 return 1; 119} 120 121/** 122 * ntfs_ucsncmp - compare two little endian Unicode strings 123 * @s1: first string 124 * @s2: second string 125 * @n: maximum unicode characters to compare 126 * 127 * Compare the first @n characters of the Unicode strings @s1 and @s2, 128 * The strings in little endian format and appropriate le16_to_cpu() 129 * conversion is performed on non-little endian machines. 130 * 131 * The function returns an integer less than, equal to, or greater than zero 132 * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 133 * to be less than, to match, or be greater than @s2. 134 */ 135int ntfs_ucsncmp(const ntfschar *s1, const ntfschar *s2, size_t n) 136{ 137 u16 c1, c2; 138 size_t i; 139 140 for (i = 0; i < n; ++i) { 141 c1 = le16_to_cpu(s1[i]); 142 c2 = le16_to_cpu(s2[i]); 143 if (c1 < c2) 144 return -1; 145 if (c1 > c2) 146 return 1; 147 if (!c1) 148 break; 149 } 150 return 0; 151} 152 153/** 154 * ntfs_ucsncasecmp - compare two little endian Unicode strings, ignoring case 155 * @s1: first string 156 * @s2: second string 157 * @n: maximum unicode characters to compare 158 * @upcase: upcase table 159 * @upcase_size: upcase table size in Unicode characters 160 * 161 * Compare the first @n characters of the Unicode strings @s1 and @s2, 162 * ignoring case. The strings in little endian format and appropriate 163 * le16_to_cpu() conversion is performed on non-little endian machines. 164 * 165 * Each character is uppercased using the @upcase table before the comparison. 166 * 167 * The function returns an integer less than, equal to, or greater than zero 168 * if @s1 (or the first @n Unicode characters thereof) is found, respectively, 169 * to be less than, to match, or be greater than @s2. 170 */ 171int ntfs_ucsncasecmp(const ntfschar *s1, const ntfschar *s2, size_t n, 172 const ntfschar *upcase, const u32 upcase_size) 173{ 174 size_t i; 175 u16 c1, c2; 176 177 for (i = 0; i < n; ++i) { 178 if ((c1 = le16_to_cpu(s1[i])) < upcase_size) 179 c1 = le16_to_cpu(upcase[c1]); 180 if ((c2 = le16_to_cpu(s2[i])) < upcase_size) 181 c2 = le16_to_cpu(upcase[c2]); 182 if (c1 < c2) 183 return -1; 184 if (c1 > c2) 185 return 1; 186 if (!c1) 187 break; 188 } 189 return 0; 190} 191 192void ntfs_upcase_name(ntfschar *name, u32 name_len, const ntfschar *upcase, 193 const u32 upcase_len) 194{ 195 u32 i; 196 u16 u; 197 198 for (i = 0; i < name_len; i++) 199 if ((u = le16_to_cpu(name[i])) < upcase_len) 200 name[i] = upcase[u]; 201} 202 203void ntfs_file_upcase_value(FILE_NAME_ATTR *file_name_attr, 204 const ntfschar *upcase, const u32 upcase_len) 205{ 206 ntfs_upcase_name((ntfschar*)&file_name_attr->file_name, 207 file_name_attr->file_name_length, upcase, upcase_len); 208} 209 210int ntfs_file_compare_values(FILE_NAME_ATTR *file_name_attr1, 211 FILE_NAME_ATTR *file_name_attr2, 212 const int err_val, const IGNORE_CASE_BOOL ic, 213 const ntfschar *upcase, const u32 upcase_len) 214{ 215 return ntfs_collate_names((ntfschar*)&file_name_attr1->file_name, 216 file_name_attr1->file_name_length, 217 (ntfschar*)&file_name_attr2->file_name, 218 file_name_attr2->file_name_length, 219 err_val, ic, upcase, upcase_len); 220} 221 222/** 223 * ntfs_nlstoucs - convert NLS string to little endian Unicode string 224 * @vol: ntfs volume which we are working with 225 * @ins: input NLS string buffer 226 * @ins_len: length of input string in bytes 227 * @outs: on return contains the allocated output Unicode string buffer 228 * 229 * Convert the input string @ins, which is in whatever format the loaded NLS 230 * map dictates, into a little endian, 2-byte Unicode string. 231 * 232 * This function allocates the string and the caller is responsible for 233 * calling kmem_cache_free(ntfs_name_cache, *@outs); when finished with it. 234 * 235 * On success the function returns the number of Unicode characters written to 236 * the output string *@outs (>= 0), not counting the terminating Unicode NULL 237 * character. *@outs is set to the allocated output string buffer. 238 * 239 * On error, a negative number corresponding to the error code is returned. In 240 * that case the output string is not allocated. Both *@outs and *@outs_len 241 * are then undefined. 242 * 243 * This might look a bit odd due to fast path optimization... 244 */ 245int ntfs_nlstoucs(const ntfs_volume *vol, const char *ins, 246 const int ins_len, ntfschar **outs) 247{ 248 struct nls_table *nls = vol->nls_map; 249 ntfschar *ucs; 250 wchar_t wc; 251 int i, o, wc_len; 252 253 /* We do not trust outside sources. */ 254 if (likely(ins)) { 255 ucs = kmem_cache_alloc(ntfs_name_cache, GFP_NOFS); 256 if (likely(ucs)) { 257 for (i = o = 0; i < ins_len; i += wc_len) { 258 wc_len = nls->char2uni(ins + i, ins_len - i, 259 &wc); 260 if (likely(wc_len >= 0 && 261 o < NTFS_MAX_NAME_LEN)) { 262 if (likely(wc)) { 263 ucs[o++] = cpu_to_le16(wc); 264 continue; 265 } /* else if (!wc) */ 266 break; 267 } /* else if (wc_len < 0 || 268 o >= NTFS_MAX_NAME_LEN) */ 269 goto name_err; 270 } 271 ucs[o] = 0; 272 *outs = ucs; 273 return o; 274 } /* else if (!ucs) */ 275 ntfs_error(vol->sb, "Failed to allocate buffer for converted " 276 "name from ntfs_name_cache."); 277 return -ENOMEM; 278 } /* else if (!ins) */ 279 ntfs_error(vol->sb, "Received NULL pointer."); 280 return -EINVAL; 281name_err: 282 kmem_cache_free(ntfs_name_cache, ucs); 283 if (wc_len < 0) { 284 ntfs_error(vol->sb, "Name using character set %s contains " 285 "characters that cannot be converted to " 286 "Unicode.", nls->charset); 287 i = -EILSEQ; 288 } else /* if (o >= NTFS_MAX_NAME_LEN) */ { 289 ntfs_error(vol->sb, "Name is too long (maximum length for a " 290 "name on NTFS is %d Unicode characters.", 291 NTFS_MAX_NAME_LEN); 292 i = -ENAMETOOLONG; 293 } 294 return i; 295} 296 297/** 298 * ntfs_ucstonls - convert little endian Unicode string to NLS string 299 * @vol: ntfs volume which we are working with 300 * @ins: input Unicode string buffer 301 * @ins_len: length of input string in Unicode characters 302 * @outs: on return contains the (allocated) output NLS string buffer 303 * @outs_len: length of output string buffer in bytes 304 * 305 * Convert the input little endian, 2-byte Unicode string @ins, of length 306 * @ins_len into the string format dictated by the loaded NLS. 307 * 308 * If *@outs is NULL, this function allocates the string and the caller is 309 * responsible for calling kfree(*@outs); when finished with it. In this case 310 * @outs_len is ignored and can be 0. 311 * 312 * On success the function returns the number of bytes written to the output 313 * string *@outs (>= 0), not counting the terminating NULL byte. If the output 314 * string buffer was allocated, *@outs is set to it. 315 * 316 * On error, a negative number corresponding to the error code is returned. In 317 * that case the output string is not allocated. The contents of *@outs are 318 * then undefined. 319 * 320 * This might look a bit odd due to fast path optimization... 321 */ 322int ntfs_ucstonls(const ntfs_volume *vol, const ntfschar *ins, 323 const int ins_len, unsigned char **outs, int outs_len) 324{ 325 struct nls_table *nls = vol->nls_map; 326 unsigned char *ns; 327 int i, o, ns_len, wc; 328 329 /* We don't trust outside sources. */ 330 if (ins) { 331 ns = *outs; 332 ns_len = outs_len; 333 if (ns && !ns_len) { 334 wc = -ENAMETOOLONG; 335 goto conversion_err; 336 } 337 if (!ns) { 338 ns_len = ins_len * NLS_MAX_CHARSET_SIZE; 339 ns = kmalloc(ns_len + 1, GFP_NOFS); 340 if (!ns) 341 goto mem_err_out; 342 } 343 for (i = o = 0; i < ins_len; i++) { 344retry: wc = nls->uni2char(le16_to_cpu(ins[i]), ns + o, 345 ns_len - o); 346 if (wc > 0) { 347 o += wc; 348 continue; 349 } else if (!wc) 350 break; 351 else if (wc == -ENAMETOOLONG && ns != *outs) { 352 unsigned char *tc; 353 /* Grow in multiples of 64 bytes. */ 354 tc = kmalloc((ns_len + 64) & 355 ~63, GFP_NOFS); 356 if (tc) { 357 memcpy(tc, ns, ns_len); 358 ns_len = ((ns_len + 64) & ~63) - 1; 359 kfree(ns); 360 ns = tc; 361 goto retry; 362 } /* No memory so goto conversion_error; */ 363 } /* wc < 0, real error. */ 364 goto conversion_err; 365 } 366 ns[o] = 0; 367 *outs = ns; 368 return o; 369 } /* else (!ins) */ 370 ntfs_error(vol->sb, "Received NULL pointer."); 371 return -EINVAL; 372conversion_err: 373 ntfs_error(vol->sb, "Unicode name contains characters that cannot be " 374 "converted to character set %s. You might want to " 375 "try to use the mount option nls=utf8.", nls->charset); 376 if (ns != *outs) 377 kfree(ns); 378 if (wc != -ENAMETOOLONG) 379 wc = -EILSEQ; 380 return wc; 381mem_err_out: 382 ntfs_error(vol->sb, "Failed to allocate name!"); 383 return -ENOMEM; 384}