unicode.c (10742B)
1// SPDX-License-Identifier: GPL-2.0 2/* 3 * linux/fs/hfsplus/unicode.c 4 * 5 * Copyright (C) 2001 6 * Brad Boyer (flar@allandria.com) 7 * (C) 2003 Ardis Technologies <roman@ardistech.com> 8 * 9 * Handler routines for unicode strings 10 */ 11 12#include <linux/types.h> 13#include <linux/nls.h> 14#include "hfsplus_fs.h" 15#include "hfsplus_raw.h" 16 17/* Fold the case of a unicode char, given the 16 bit value */ 18/* Returns folded char, or 0 if ignorable */ 19static inline u16 case_fold(u16 c) 20{ 21 u16 tmp; 22 23 tmp = hfsplus_case_fold_table[c >> 8]; 24 if (tmp) 25 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)]; 26 else 27 tmp = c; 28 return tmp; 29} 30 31/* Compare unicode strings, return values like normal strcmp */ 32int hfsplus_strcasecmp(const struct hfsplus_unistr *s1, 33 const struct hfsplus_unistr *s2) 34{ 35 u16 len1, len2, c1, c2; 36 const hfsplus_unichr *p1, *p2; 37 38 len1 = be16_to_cpu(s1->length); 39 len2 = be16_to_cpu(s2->length); 40 p1 = s1->unicode; 41 p2 = s2->unicode; 42 43 while (1) { 44 c1 = c2 = 0; 45 46 while (len1 && !c1) { 47 c1 = case_fold(be16_to_cpu(*p1)); 48 p1++; 49 len1--; 50 } 51 while (len2 && !c2) { 52 c2 = case_fold(be16_to_cpu(*p2)); 53 p2++; 54 len2--; 55 } 56 57 if (c1 != c2) 58 return (c1 < c2) ? -1 : 1; 59 if (!c1 && !c2) 60 return 0; 61 } 62} 63 64/* Compare names as a sequence of 16-bit unsigned integers */ 65int hfsplus_strcmp(const struct hfsplus_unistr *s1, 66 const struct hfsplus_unistr *s2) 67{ 68 u16 len1, len2, c1, c2; 69 const hfsplus_unichr *p1, *p2; 70 int len; 71 72 len1 = be16_to_cpu(s1->length); 73 len2 = be16_to_cpu(s2->length); 74 p1 = s1->unicode; 75 p2 = s2->unicode; 76 77 for (len = min(len1, len2); len > 0; len--) { 78 c1 = be16_to_cpu(*p1); 79 c2 = be16_to_cpu(*p2); 80 if (c1 != c2) 81 return c1 < c2 ? -1 : 1; 82 p1++; 83 p2++; 84 } 85 86 return len1 < len2 ? -1 : 87 len1 > len2 ? 1 : 0; 88} 89 90 91#define Hangul_SBase 0xac00 92#define Hangul_LBase 0x1100 93#define Hangul_VBase 0x1161 94#define Hangul_TBase 0x11a7 95#define Hangul_SCount 11172 96#define Hangul_LCount 19 97#define Hangul_VCount 21 98#define Hangul_TCount 28 99#define Hangul_NCount (Hangul_VCount * Hangul_TCount) 100 101 102static u16 *hfsplus_compose_lookup(u16 *p, u16 cc) 103{ 104 int i, s, e; 105 106 s = 1; 107 e = p[1]; 108 if (!e || cc < p[s * 2] || cc > p[e * 2]) 109 return NULL; 110 do { 111 i = (s + e) / 2; 112 if (cc > p[i * 2]) 113 s = i + 1; 114 else if (cc < p[i * 2]) 115 e = i - 1; 116 else 117 return hfsplus_compose_table + p[i * 2 + 1]; 118 } while (s <= e); 119 return NULL; 120} 121 122int hfsplus_uni2asc(struct super_block *sb, 123 const struct hfsplus_unistr *ustr, 124 char *astr, int *len_p) 125{ 126 const hfsplus_unichr *ip; 127 struct nls_table *nls = HFSPLUS_SB(sb)->nls; 128 u8 *op; 129 u16 cc, c0, c1; 130 u16 *ce1, *ce2; 131 int i, len, ustrlen, res, compose; 132 133 op = astr; 134 ip = ustr->unicode; 135 ustrlen = be16_to_cpu(ustr->length); 136 len = *len_p; 137 ce1 = NULL; 138 compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 139 140 while (ustrlen > 0) { 141 c0 = be16_to_cpu(*ip++); 142 ustrlen--; 143 /* search for single decomposed char */ 144 if (likely(compose)) 145 ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0); 146 if (ce1) 147 cc = ce1[0]; 148 else 149 cc = 0; 150 if (cc) { 151 /* start of a possibly decomposed Hangul char */ 152 if (cc != 0xffff) 153 goto done; 154 if (!ustrlen) 155 goto same; 156 c1 = be16_to_cpu(*ip) - Hangul_VBase; 157 if (c1 < Hangul_VCount) { 158 /* compose the Hangul char */ 159 cc = (c0 - Hangul_LBase) * Hangul_VCount; 160 cc = (cc + c1) * Hangul_TCount; 161 cc += Hangul_SBase; 162 ip++; 163 ustrlen--; 164 if (!ustrlen) 165 goto done; 166 c1 = be16_to_cpu(*ip) - Hangul_TBase; 167 if (c1 > 0 && c1 < Hangul_TCount) { 168 cc += c1; 169 ip++; 170 ustrlen--; 171 } 172 goto done; 173 } 174 } 175 while (1) { 176 /* main loop for common case of not composed chars */ 177 if (!ustrlen) 178 goto same; 179 c1 = be16_to_cpu(*ip); 180 if (likely(compose)) 181 ce1 = hfsplus_compose_lookup( 182 hfsplus_compose_table, c1); 183 if (ce1) 184 break; 185 switch (c0) { 186 case 0: 187 c0 = 0x2400; 188 break; 189 case '/': 190 c0 = ':'; 191 break; 192 } 193 res = nls->uni2char(c0, op, len); 194 if (res < 0) { 195 if (res == -ENAMETOOLONG) 196 goto out; 197 *op = '?'; 198 res = 1; 199 } 200 op += res; 201 len -= res; 202 c0 = c1; 203 ip++; 204 ustrlen--; 205 } 206 ce2 = hfsplus_compose_lookup(ce1, c0); 207 if (ce2) { 208 i = 1; 209 while (i < ustrlen) { 210 ce1 = hfsplus_compose_lookup(ce2, 211 be16_to_cpu(ip[i])); 212 if (!ce1) 213 break; 214 i++; 215 ce2 = ce1; 216 } 217 cc = ce2[0]; 218 if (cc) { 219 ip += i; 220 ustrlen -= i; 221 goto done; 222 } 223 } 224same: 225 switch (c0) { 226 case 0: 227 cc = 0x2400; 228 break; 229 case '/': 230 cc = ':'; 231 break; 232 default: 233 cc = c0; 234 } 235done: 236 res = nls->uni2char(cc, op, len); 237 if (res < 0) { 238 if (res == -ENAMETOOLONG) 239 goto out; 240 *op = '?'; 241 res = 1; 242 } 243 op += res; 244 len -= res; 245 } 246 res = 0; 247out: 248 *len_p = (char *)op - astr; 249 return res; 250} 251 252/* 253 * Convert one or more ASCII characters into a single unicode character. 254 * Returns the number of ASCII characters corresponding to the unicode char. 255 */ 256static inline int asc2unichar(struct super_block *sb, const char *astr, int len, 257 wchar_t *uc) 258{ 259 int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc); 260 if (size <= 0) { 261 *uc = '?'; 262 size = 1; 263 } 264 switch (*uc) { 265 case 0x2400: 266 *uc = 0; 267 break; 268 case ':': 269 *uc = '/'; 270 break; 271 } 272 return size; 273} 274 275/* Decomposes a non-Hangul unicode character. */ 276static u16 *hfsplus_decompose_nonhangul(wchar_t uc, int *size) 277{ 278 int off; 279 280 off = hfsplus_decompose_table[(uc >> 12) & 0xf]; 281 if (off == 0 || off == 0xffff) 282 return NULL; 283 284 off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)]; 285 if (!off) 286 return NULL; 287 288 off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)]; 289 if (!off) 290 return NULL; 291 292 off = hfsplus_decompose_table[off + (uc & 0xf)]; 293 *size = off & 3; 294 if (*size == 0) 295 return NULL; 296 return hfsplus_decompose_table + (off / 4); 297} 298 299/* 300 * Try to decompose a unicode character as Hangul. Return 0 if @uc is not 301 * precomposed Hangul, otherwise return the length of the decomposition. 302 * 303 * This function was adapted from sample code from the Unicode Standard 304 * Annex #15: Unicode Normalization Forms, version 3.2.0. 305 * 306 * Copyright (C) 1991-2018 Unicode, Inc. All rights reserved. Distributed 307 * under the Terms of Use in http://www.unicode.org/copyright.html. 308 */ 309static int hfsplus_try_decompose_hangul(wchar_t uc, u16 *result) 310{ 311 int index; 312 int l, v, t; 313 314 index = uc - Hangul_SBase; 315 if (index < 0 || index >= Hangul_SCount) 316 return 0; 317 318 l = Hangul_LBase + index / Hangul_NCount; 319 v = Hangul_VBase + (index % Hangul_NCount) / Hangul_TCount; 320 t = Hangul_TBase + index % Hangul_TCount; 321 322 result[0] = l; 323 result[1] = v; 324 if (t != Hangul_TBase) { 325 result[2] = t; 326 return 3; 327 } 328 return 2; 329} 330 331/* Decomposes a single unicode character. */ 332static u16 *decompose_unichar(wchar_t uc, int *size, u16 *hangul_buffer) 333{ 334 u16 *result; 335 336 /* Hangul is handled separately */ 337 result = hangul_buffer; 338 *size = hfsplus_try_decompose_hangul(uc, result); 339 if (*size == 0) 340 result = hfsplus_decompose_nonhangul(uc, size); 341 return result; 342} 343 344int hfsplus_asc2uni(struct super_block *sb, 345 struct hfsplus_unistr *ustr, int max_unistr_len, 346 const char *astr, int len) 347{ 348 int size, dsize, decompose; 349 u16 *dstr, outlen = 0; 350 wchar_t c; 351 u16 dhangul[3]; 352 353 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 354 while (outlen < max_unistr_len && len > 0) { 355 size = asc2unichar(sb, astr, len, &c); 356 357 if (decompose) 358 dstr = decompose_unichar(c, &dsize, dhangul); 359 else 360 dstr = NULL; 361 if (dstr) { 362 if (outlen + dsize > max_unistr_len) 363 break; 364 do { 365 ustr->unicode[outlen++] = cpu_to_be16(*dstr++); 366 } while (--dsize > 0); 367 } else 368 ustr->unicode[outlen++] = cpu_to_be16(c); 369 370 astr += size; 371 len -= size; 372 } 373 ustr->length = cpu_to_be16(outlen); 374 if (len > 0) 375 return -ENAMETOOLONG; 376 return 0; 377} 378 379/* 380 * Hash a string to an integer as appropriate for the HFS+ filesystem. 381 * Composed unicode characters are decomposed and case-folding is performed 382 * if the appropriate bits are (un)set on the superblock. 383 */ 384int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str) 385{ 386 struct super_block *sb = dentry->d_sb; 387 const char *astr; 388 const u16 *dstr; 389 int casefold, decompose, size, len; 390 unsigned long hash; 391 wchar_t c; 392 u16 c2; 393 u16 dhangul[3]; 394 395 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 396 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 397 hash = init_name_hash(dentry); 398 astr = str->name; 399 len = str->len; 400 while (len > 0) { 401 int dsize; 402 size = asc2unichar(sb, astr, len, &c); 403 astr += size; 404 len -= size; 405 406 if (decompose) 407 dstr = decompose_unichar(c, &dsize, dhangul); 408 else 409 dstr = NULL; 410 if (dstr) { 411 do { 412 c2 = *dstr++; 413 if (casefold) 414 c2 = case_fold(c2); 415 if (!casefold || c2) 416 hash = partial_name_hash(c2, hash); 417 } while (--dsize > 0); 418 } else { 419 c2 = c; 420 if (casefold) 421 c2 = case_fold(c2); 422 if (!casefold || c2) 423 hash = partial_name_hash(c2, hash); 424 } 425 } 426 str->hash = end_name_hash(hash); 427 428 return 0; 429} 430 431/* 432 * Compare strings with HFS+ filename ordering. 433 * Composed unicode characters are decomposed and case-folding is performed 434 * if the appropriate bits are (un)set on the superblock. 435 */ 436int hfsplus_compare_dentry(const struct dentry *dentry, 437 unsigned int len, const char *str, const struct qstr *name) 438{ 439 struct super_block *sb = dentry->d_sb; 440 int casefold, decompose, size; 441 int dsize1, dsize2, len1, len2; 442 const u16 *dstr1, *dstr2; 443 const char *astr1, *astr2; 444 u16 c1, c2; 445 wchar_t c; 446 u16 dhangul_1[3], dhangul_2[3]; 447 448 casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags); 449 decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags); 450 astr1 = str; 451 len1 = len; 452 astr2 = name->name; 453 len2 = name->len; 454 dsize1 = dsize2 = 0; 455 dstr1 = dstr2 = NULL; 456 457 while (len1 > 0 && len2 > 0) { 458 if (!dsize1) { 459 size = asc2unichar(sb, astr1, len1, &c); 460 astr1 += size; 461 len1 -= size; 462 463 if (decompose) 464 dstr1 = decompose_unichar(c, &dsize1, 465 dhangul_1); 466 if (!decompose || !dstr1) { 467 c1 = c; 468 dstr1 = &c1; 469 dsize1 = 1; 470 } 471 } 472 473 if (!dsize2) { 474 size = asc2unichar(sb, astr2, len2, &c); 475 astr2 += size; 476 len2 -= size; 477 478 if (decompose) 479 dstr2 = decompose_unichar(c, &dsize2, 480 dhangul_2); 481 if (!decompose || !dstr2) { 482 c2 = c; 483 dstr2 = &c2; 484 dsize2 = 1; 485 } 486 } 487 488 c1 = *dstr1; 489 c2 = *dstr2; 490 if (casefold) { 491 c1 = case_fold(c1); 492 if (!c1) { 493 dstr1++; 494 dsize1--; 495 continue; 496 } 497 c2 = case_fold(c2); 498 if (!c2) { 499 dstr2++; 500 dsize2--; 501 continue; 502 } 503 } 504 if (c1 < c2) 505 return -1; 506 else if (c1 > c2) 507 return 1; 508 509 dstr1++; 510 dsize1--; 511 dstr2++; 512 dsize2--; 513 } 514 515 if (len1 < len2) 516 return -1; 517 if (len1 > len2) 518 return 1; 519 return 0; 520}