unicode.h (8359B)
1/* SPDX-License-Identifier: GPL-2.0-or-later */ 2/* 3 * Some of the source code in this file came from fs/cifs/cifs_unicode.c 4 * cifs_unicode: Unicode kernel case support 5 * 6 * Function: 7 * Convert a unicode character to upper or lower case using 8 * compressed tables. 9 * 10 * Copyright (c) International Business Machines Corp., 2000,2009 11 * 12 * 13 * Notes: 14 * These APIs are based on the C library functions. The semantics 15 * should match the C functions but with expanded size operands. 16 * 17 * The upper/lower functions are based on a table created by mkupr. 18 * This is a compressed table of upper and lower case conversion. 19 * 20 */ 21#ifndef _CIFS_UNICODE_H 22#define _CIFS_UNICODE_H 23 24#include <asm/byteorder.h> 25#include <linux/types.h> 26#include <linux/nls.h> 27 28#define UNIUPR_NOLOWER /* Example to not expand lower case tables */ 29 30/* 31 * Windows maps these to the user defined 16 bit Unicode range since they are 32 * reserved symbols (along with \ and /), otherwise illegal to store 33 * in filenames in NTFS 34 */ 35#define UNI_ASTERISK ((__u16)('*' + 0xF000)) 36#define UNI_QUESTION ((__u16)('?' + 0xF000)) 37#define UNI_COLON ((__u16)(':' + 0xF000)) 38#define UNI_GRTRTHAN ((__u16)('>' + 0xF000)) 39#define UNI_LESSTHAN ((__u16)('<' + 0xF000)) 40#define UNI_PIPE ((__u16)('|' + 0xF000)) 41#define UNI_SLASH ((__u16)('\\' + 0xF000)) 42 43/* Just define what we want from uniupr.h. We don't want to define the tables 44 * in each source file. 45 */ 46#ifndef UNICASERANGE_DEFINED 47struct UniCaseRange { 48 wchar_t start; 49 wchar_t end; 50 signed char *table; 51}; 52#endif /* UNICASERANGE_DEFINED */ 53 54#ifndef UNIUPR_NOUPPER 55extern signed char SmbUniUpperTable[512]; 56extern const struct UniCaseRange SmbUniUpperRange[]; 57#endif /* UNIUPR_NOUPPER */ 58 59#ifndef UNIUPR_NOLOWER 60extern signed char CifsUniLowerTable[512]; 61extern const struct UniCaseRange CifsUniLowerRange[]; 62#endif /* UNIUPR_NOLOWER */ 63 64#ifdef __KERNEL__ 65int smb_strtoUTF16(__le16 *to, const char *from, int len, 66 const struct nls_table *codepage); 67char *smb_strndup_from_utf16(const char *src, const int maxlen, 68 const bool is_unicode, 69 const struct nls_table *codepage); 70int smbConvertToUTF16(__le16 *target, const char *source, int srclen, 71 const struct nls_table *cp, int mapchars); 72char *ksmbd_extract_sharename(char *treename); 73#endif 74 75/* 76 * UniStrcat: Concatenate the second string to the first 77 * 78 * Returns: 79 * Address of the first string 80 */ 81static inline wchar_t *UniStrcat(wchar_t *ucs1, const wchar_t *ucs2) 82{ 83 wchar_t *anchor = ucs1; /* save a pointer to start of ucs1 */ 84 85 while (*ucs1++) 86 /*NULL*/; /* To end of first string */ 87 ucs1--; /* Return to the null */ 88 while ((*ucs1++ = *ucs2++)) 89 /*NULL*/; /* copy string 2 over */ 90 return anchor; 91} 92 93/* 94 * UniStrchr: Find a character in a string 95 * 96 * Returns: 97 * Address of first occurrence of character in string 98 * or NULL if the character is not in the string 99 */ 100static inline wchar_t *UniStrchr(const wchar_t *ucs, wchar_t uc) 101{ 102 while ((*ucs != uc) && *ucs) 103 ucs++; 104 105 if (*ucs == uc) 106 return (wchar_t *)ucs; 107 return NULL; 108} 109 110/* 111 * UniStrcmp: Compare two strings 112 * 113 * Returns: 114 * < 0: First string is less than second 115 * = 0: Strings are equal 116 * > 0: First string is greater than second 117 */ 118static inline int UniStrcmp(const wchar_t *ucs1, const wchar_t *ucs2) 119{ 120 while ((*ucs1 == *ucs2) && *ucs1) { 121 ucs1++; 122 ucs2++; 123 } 124 return (int)*ucs1 - (int)*ucs2; 125} 126 127/* 128 * UniStrcpy: Copy a string 129 */ 130static inline wchar_t *UniStrcpy(wchar_t *ucs1, const wchar_t *ucs2) 131{ 132 wchar_t *anchor = ucs1; /* save the start of result string */ 133 134 while ((*ucs1++ = *ucs2++)) 135 /*NULL*/; 136 return anchor; 137} 138 139/* 140 * UniStrlen: Return the length of a string (in 16 bit Unicode chars not bytes) 141 */ 142static inline size_t UniStrlen(const wchar_t *ucs1) 143{ 144 int i = 0; 145 146 while (*ucs1++) 147 i++; 148 return i; 149} 150 151/* 152 * UniStrnlen: Return the length (in 16 bit Unicode chars not bytes) of a 153 * string (length limited) 154 */ 155static inline size_t UniStrnlen(const wchar_t *ucs1, int maxlen) 156{ 157 int i = 0; 158 159 while (*ucs1++) { 160 i++; 161 if (i >= maxlen) 162 break; 163 } 164 return i; 165} 166 167/* 168 * UniStrncat: Concatenate length limited string 169 */ 170static inline wchar_t *UniStrncat(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 171{ 172 wchar_t *anchor = ucs1; /* save pointer to string 1 */ 173 174 while (*ucs1++) 175 /*NULL*/; 176 ucs1--; /* point to null terminator of s1 */ 177 while (n-- && (*ucs1 = *ucs2)) { /* copy s2 after s1 */ 178 ucs1++; 179 ucs2++; 180 } 181 *ucs1 = 0; /* Null terminate the result */ 182 return anchor; 183} 184 185/* 186 * UniStrncmp: Compare length limited string 187 */ 188static inline int UniStrncmp(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) 189{ 190 if (!n) 191 return 0; /* Null strings are equal */ 192 while ((*ucs1 == *ucs2) && *ucs1 && --n) { 193 ucs1++; 194 ucs2++; 195 } 196 return (int)*ucs1 - (int)*ucs2; 197} 198 199/* 200 * UniStrncmp_le: Compare length limited string - native to little-endian 201 */ 202static inline int 203UniStrncmp_le(const wchar_t *ucs1, const wchar_t *ucs2, size_t n) 204{ 205 if (!n) 206 return 0; /* Null strings are equal */ 207 while ((*ucs1 == __le16_to_cpu(*ucs2)) && *ucs1 && --n) { 208 ucs1++; 209 ucs2++; 210 } 211 return (int)*ucs1 - (int)__le16_to_cpu(*ucs2); 212} 213 214/* 215 * UniStrncpy: Copy length limited string with pad 216 */ 217static inline wchar_t *UniStrncpy(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 218{ 219 wchar_t *anchor = ucs1; 220 221 while (n-- && *ucs2) /* Copy the strings */ 222 *ucs1++ = *ucs2++; 223 224 n++; 225 while (n--) /* Pad with nulls */ 226 *ucs1++ = 0; 227 return anchor; 228} 229 230/* 231 * UniStrncpy_le: Copy length limited string with pad to little-endian 232 */ 233static inline wchar_t *UniStrncpy_le(wchar_t *ucs1, const wchar_t *ucs2, size_t n) 234{ 235 wchar_t *anchor = ucs1; 236 237 while (n-- && *ucs2) /* Copy the strings */ 238 *ucs1++ = __le16_to_cpu(*ucs2++); 239 240 n++; 241 while (n--) /* Pad with nulls */ 242 *ucs1++ = 0; 243 return anchor; 244} 245 246/* 247 * UniStrstr: Find a string in a string 248 * 249 * Returns: 250 * Address of first match found 251 * NULL if no matching string is found 252 */ 253static inline wchar_t *UniStrstr(const wchar_t *ucs1, const wchar_t *ucs2) 254{ 255 const wchar_t *anchor1 = ucs1; 256 const wchar_t *anchor2 = ucs2; 257 258 while (*ucs1) { 259 if (*ucs1 == *ucs2) { 260 /* Partial match found */ 261 ucs1++; 262 ucs2++; 263 } else { 264 if (!*ucs2) /* Match found */ 265 return (wchar_t *)anchor1; 266 ucs1 = ++anchor1; /* No match */ 267 ucs2 = anchor2; 268 } 269 } 270 271 if (!*ucs2) /* Both end together */ 272 return (wchar_t *)anchor1; /* Match found */ 273 return NULL; /* No match */ 274} 275 276#ifndef UNIUPR_NOUPPER 277/* 278 * UniToupper: Convert a unicode character to upper case 279 */ 280static inline wchar_t UniToupper(register wchar_t uc) 281{ 282 register const struct UniCaseRange *rp; 283 284 if (uc < sizeof(SmbUniUpperTable)) { 285 /* Latin characters */ 286 return uc + SmbUniUpperTable[uc]; /* Use base tables */ 287 } 288 289 rp = SmbUniUpperRange; /* Use range tables */ 290 while (rp->start) { 291 if (uc < rp->start) /* Before start of range */ 292 return uc; /* Uppercase = input */ 293 if (uc <= rp->end) /* In range */ 294 return uc + rp->table[uc - rp->start]; 295 rp++; /* Try next range */ 296 } 297 return uc; /* Past last range */ 298} 299 300/* 301 * UniStrupr: Upper case a unicode string 302 */ 303static inline __le16 *UniStrupr(register __le16 *upin) 304{ 305 register __le16 *up; 306 307 up = upin; 308 while (*up) { /* For all characters */ 309 *up = cpu_to_le16(UniToupper(le16_to_cpu(*up))); 310 up++; 311 } 312 return upin; /* Return input pointer */ 313} 314#endif /* UNIUPR_NOUPPER */ 315 316#ifndef UNIUPR_NOLOWER 317/* 318 * UniTolower: Convert a unicode character to lower case 319 */ 320static inline wchar_t UniTolower(register wchar_t uc) 321{ 322 register const struct UniCaseRange *rp; 323 324 if (uc < sizeof(CifsUniLowerTable)) { 325 /* Latin characters */ 326 return uc + CifsUniLowerTable[uc]; /* Use base tables */ 327 } 328 329 rp = CifsUniLowerRange; /* Use range tables */ 330 while (rp->start) { 331 if (uc < rp->start) /* Before start of range */ 332 return uc; /* Uppercase = input */ 333 if (uc <= rp->end) /* In range */ 334 return uc + rp->table[uc - rp->start]; 335 rp++; /* Try next range */ 336 } 337 return uc; /* Past last range */ 338} 339 340/* 341 * UniStrlwr: Lower case a unicode string 342 */ 343static inline wchar_t *UniStrlwr(register wchar_t *upin) 344{ 345 register wchar_t *up; 346 347 up = upin; 348 while (*up) { /* For all characters */ 349 *up = UniTolower(*up); 350 up++; 351 } 352 return upin; /* Return input pointer */ 353} 354 355#endif 356 357#endif /* _CIFS_UNICODE_H */