cifs_unicode.h (9794B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * cifs_unicode: Unicode kernel case support 4 * 5 * Function: 6 * Convert a unicode character to upper or lower case using 7 * compressed tables. 8 * 9 * Copyright (c) International Business Machines Corp., 2000,2009 10 * 11 * Notes: 12 * These APIs are based on the C library functions. The semantics 13 * should match the C functions but with expanded size operands. 14 * 15 * The upper/lower functions are based on a table created by mkupr. 16 * This is a compressed table of upper and lower case conversion. 17 */ 18#ifndef _CIFS_UNICODE_H 19#define _CIFS_UNICODE_H 20 21#include <asm/byteorder.h> 22#include <linux/types.h> 23#include <linux/nls.h> 24 25#define UNIUPR_NOLOWER /* Example to not expand lower case tables */ 26 27/* 28 * Windows maps these to the user defined 16 bit Unicode range since they are 29 * reserved symbols (along with \ and /), otherwise illegal to store 30 * in filenames in NTFS 31 */ 32#define UNI_ASTERISK (__u16) ('*' + 0xF000) 33#define UNI_QUESTION (__u16) ('?' + 0xF000) 34#define UNI_COLON (__u16) (':' + 0xF000) 35#define UNI_GRTRTHAN (__u16) ('>' + 0xF000) 36#define UNI_LESSTHAN (__u16) ('<' + 0xF000) 37#define UNI_PIPE (__u16) ('|' + 0xF000) 38#define UNI_SLASH (__u16) ('\\' + 0xF000) 39 40/* 41 * Macs use an older "SFM" mapping of the symbols above. Fortunately it does 42 * not conflict (although almost does) with the mapping above. 43 */ 44 45#define SFM_DOUBLEQUOTE ((__u16) 0xF020) 46#define SFM_ASTERISK ((__u16) 0xF021) 47#define SFM_QUESTION ((__u16) 0xF025) 48#define SFM_COLON ((__u16) 0xF022) 49#define SFM_GRTRTHAN ((__u16) 0xF024) 50#define SFM_LESSTHAN ((__u16) 0xF023) 51#define SFM_PIPE ((__u16) 0xF027) 52#define SFM_SLASH ((__u16) 0xF026) 53#define SFM_SPACE ((__u16) 0xF028) 54#define SFM_PERIOD ((__u16) 0xF029) 55 56/* 57 * Mapping mechanism to use when one of the seven reserved characters is 58 * encountered. We can only map using one of the mechanisms at a time 59 * since otherwise readdir could return directory entries which we would 60 * not be able to open 61 * 62 * NO_MAP_UNI_RSVD = do not perform any remapping of the character 63 * SFM_MAP_UNI_RSVD = map reserved characters using SFM scheme (MAC compatible) 64 * SFU_MAP_UNI_RSVD = map reserved characters ala SFU ("mapchars" option) 65 * 66 */ 67#define NO_MAP_UNI_RSVD 0 68#define SFM_MAP_UNI_RSVD 1 69#define SFU_MAP_UNI_RSVD 2 70 71/* Just define what we want from uniupr.h. We don't want to define the tables 72 * in each source file. 73 */ 74#ifndef UNICASERANGE_DEFINED 75struct UniCaseRange { 76 wchar_t start; 77 wchar_t end; 78 signed char *table; 79}; 80#endif /* UNICASERANGE_DEFINED */ 81 82#ifndef UNIUPR_NOUPPER 83extern signed char CifsUniUpperTable[512]; 84extern const struct UniCaseRange CifsUniUpperRange[]; 85#endif /* UNIUPR_NOUPPER */ 86 87#ifndef UNIUPR_NOLOWER 88extern signed char CifsUniLowerTable[512]; 89extern const struct UniCaseRange CifsUniLowerRange[]; 90#endif /* UNIUPR_NOLOWER */ 91 92#ifdef __KERNEL__ 93int cifs_from_utf16(char *to, const __le16 *from, int tolen, int fromlen, 94 const struct nls_table *cp, int map_type); 95int cifs_utf16_bytes(const __le16 *from, int maxbytes, 96 const struct nls_table *codepage); 97int cifs_strtoUTF16(__le16 *, const char *, int, const struct nls_table *); 98char *cifs_strndup_from_utf16(const char *src, const int maxlen, 99 const bool is_unicode, 100 const struct nls_table *codepage); 101extern int cifsConvertToUTF16(__le16 *target, const char *source, int maxlen, 102 const struct nls_table *cp, int mapChars); 103extern int cifs_remap(struct cifs_sb_info *cifs_sb); 104extern __le16 *cifs_strndup_to_utf16(const char *src, const int maxlen, 105 int *utf16_len, const struct nls_table *cp, 106 int remap); 107#endif 108 109wchar_t cifs_toupper(wchar_t in); 110 111/* 112 * UniStrcat: Concatenate the second string to the first 113 * 114 * Returns: 115 * Address of the first string 116 */ 117static inline __le16 * 118UniStrcat(__le16 *ucs1, const __le16 *ucs2) 119{ 120 __le16 *anchor = ucs1; /* save a pointer to start of ucs1 */ 121 122 while (*ucs1++) ; /* To end of first string */ 123 ucs1--; /* Return to the null */ 124 while ((*ucs1++ = *ucs2++)) ; /* copy string 2 over */ 125 return anchor; 126} 127 128/* 129 * UniStrchr: Find a character in a string 130 * 131 * Returns: 132 * Address of first occurrence of character in string 133 * or NULL if the character is not in the string 134 */ 135static inline wchar_t * 136UniStrchr(const wchar_t *ucs, wchar_t uc) 137{ 138 while ((*ucs != uc) && *ucs) 139 ucs++; 140 141 if (*ucs == uc) 142 return (wchar_t *) ucs; 143 return NULL; 144} 145 146/* 147 * UniStrcmp: Compare two strings 148 * 149 * Returns: 150 * < 0: First string is less than second 151 * = 0: Strings are equal 152 * > 0: First string is greater than second 153 */ 154static inline int 155UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2) 156{ 157 while ((*ucs1 == *ucs2) && *ucs1) { 158 ucs1++; 159 ucs2++; 160 } 161 return (int) *ucs1 - (int) *ucs2; 162} 163 164/* 165 * UniStrcpy: Copy a string 166 */ 167static inline wchar_t * 168UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2) 169{ 170 wchar_t *anchor = ucs1; /* save the start of result string */ 171 172 while ((*ucs1++ = *ucs2++)) ; 173 return anchor; 174} 175 176/* 177 * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes) 178 */ 179static inline size_t 180UniStrlen(const wchar_t *ucs1) 181{ 182 int i = 0; 183 184 while (*ucs1++) 185 i++; 186 return i; 187} 188 189/* 190 * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a 191 * string (length limited) 192 */ 193static inline size_t 194UniStrnlen(const wchar_t *ucs1, int maxlen) 195{ 196 int i = 0; 197 198 while (*ucs1++) { 199 i++; 200 if (i >= maxlen) 201 break; 202 } 203 return i; 204} 205 206/* 207 * UniStrncat: Concatenate length limited string 208 */ 209static inline wchar_t * 210UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 211{ 212 wchar_t *anchor = ucs1; /* save pointer to string 1 */ 213 214 while (*ucs1++) ; 215 ucs1--; /* point to null terminator of s1 */ 216 while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */ 217 ucs1++; 218 ucs2++; 219 } 220 *ucs1 = 0; /* Null terminate the result */ 221 return (anchor); 222} 223 224/* 225 * UniStrncmp: Compare length limited string 226 */ 227static inline int 228UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) 229{ 230 if (!n) 231 return 0; /* Null strings are equal */ 232 while ((*ucs1 == *ucs2) && *ucs1 && --n) { 233 ucs1++; 234 ucs2++; 235 } 236 return (int) *ucs1 - (int) *ucs2; 237} 238 239/* 240 * UniStrncmp_le: Compare length limited string - native to little-endian 241 */ 242static inline int 243UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) 244{ 245 if (!n) 246 return 0; /* Null strings are equal */ 247 while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) { 248 ucs1++; 249 ucs2++; 250 } 251 return (int) *ucs1 - (int) __le16_to_cpu(*ucs2); 252} 253 254/* 255 * UniStrncpy: Copy length limited string with pad 256 */ 257static inline wchar_t * 258UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 259{ 260 wchar_t *anchor = ucs1; 261 262 while (n-- && *ucs2) /* Copy the strings */ 263 *ucs1++ = *ucs2++; 264 265 n++; 266 while (n--) /* Pad with nulls */ 267 *ucs1++ = 0; 268 return anchor; 269} 270 271/* 272 * UniStrncpy_le: Copy length limited string with pad to little-endian 273 */ 274static inline wchar_t * 275UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 276{ 277 wchar_t *anchor = ucs1; 278 279 while (n-- && *ucs2) /* Copy the strings */ 280 *ucs1++ = __le16_to_cpu(*ucs2++); 281 282 n++; 283 while (n--) /* Pad with nulls */ 284 *ucs1++ = 0; 285 return anchor; 286} 287 288/* 289 * UniStrstr: Find a string in a string 290 * 291 * Returns: 292 * Address of first match found 293 * NULL if no matching string is found 294 */ 295static inline wchar_t * 296UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2) 297{ 298 const wchar_t *anchor1 = ucs1; 299 const wchar_t *anchor2 = ucs2; 300 301 while (*ucs1) { 302 if (*ucs1 == *ucs2) { 303 /* Partial match found */ 304 ucs1++; 305 ucs2++; 306 } else { 307 if (!*ucs2) /* Match found */ 308 return (wchar_t *) anchor1; 309 ucs1 = ++anchor1; /* No match */ 310 ucs2 = anchor2; 311 } 312 } 313 314 if (!*ucs2) /* Both end together */ 315 return (wchar_t *) anchor1; /* Match found */ 316 return NULL; /* No match */ 317} 318 319#ifndef UNIUPR_NOUPPER 320/* 321 * UniToupper: Convert a unicode character to upper case 322 */ 323static inline wchar_t 324UniToupper(register wchar_t uc) 325{ 326 register const struct UniCaseRange *rp; 327 328 if (uc < sizeof(CifsUniUpperTable)) { 329 /* Latin characters */ 330 return uc + CifsUniUpperTable[uc]; /* Use base tables */ 331 } else { 332 rp = CifsUniUpperRange; /* Use range tables */ 333 while (rp->start) { 334 if (uc < rp->start) /* Before start of range */ 335 return uc; /* Uppercase = input */ 336 if (uc <= rp->end) /* In range */ 337 return uc + rp->table[uc - rp->start]; 338 rp++; /* Try next range */ 339 } 340 } 341 return uc; /* Past last range */ 342} 343 344/* 345 * UniStrupr: Upper case a unicode string 346 */ 347static inline __le16 * 348UniStrupr(register __le16 *upin) 349{ 350 register __le16 *up; 351 352 up = upin; 353 while (*up) { /* For all characters */ 354 *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); 355 up++; 356 } 357 return upin; /* Return input pointer */ 358} 359#endif /* UNIUPR_NOUPPER */ 360 361#ifndef UNIUPR_NOLOWER 362/* 363 * UniTolower: Convert a unicode character to lower case 364 */ 365static inline wchar_t 366UniTolower(register wchar_t uc) 367{ 368 register const struct UniCaseRange *rp; 369 370 if (uc < sizeof(CifsUniLowerTable)) { 371 /* Latin characters */ 372 return uc + CifsUniLowerTable[uc]; /* Use base tables */ 373 } else { 374 rp = CifsUniLowerRange; /* Use range tables */ 375 while (rp->start) { 376 if (uc < rp->start) /* Before start of range */ 377 return uc; /* Uppercase = input */ 378 if (uc <= rp->end) /* In range */ 379 return uc + rp->table[uc - rp->start]; 380 rp++; /* Try next range */ 381 } 382 } 383 return uc; /* Past last range */ 384} 385 386/* 387 * UniStrlwr: Lower case a unicode string 388 */ 389static inline wchar_t * 390UniStrlwr(register wchar_t *upin) 391{ 392 register wchar_t *up; 393 394 up = upin; 395 while (*up) { /* For all characters */ 396 *up = UniTolower(*up); 397 up++; 398 } 399 return upin; /* Return input pointer */ 400} 401 402#endif 403 404#endif /* _CIFS_UNICODE_H */