SDL_iconv.c (29065B)
1/* 2 Simple DirectMedia Layer 3 Copyright (C) 1997-2014 Sam Lantinga <slouken@libsdl.org> 4 5 This software is provided 'as-is', without any express or implied 6 warranty. In no event will the authors be held liable for any damages 7 arising from the use of this software. 8 9 Permission is granted to anyone to use this software for any purpose, 10 including commercial applications, and to alter it and redistribute it 11 freely, subject to the following restrictions: 12 13 1. The origin of this software must not be misrepresented; you must not 14 claim that you wrote the original software. If you use this software 15 in a product, an acknowledgment in the product documentation would be 16 appreciated but is not required. 17 2. Altered source versions must be plainly marked as such, and must not be 18 misrepresented as being the original software. 19 3. This notice may not be removed or altered from any source distribution. 20*/ 21#include "../SDL_internal.h" 22 23/* This file contains portable iconv functions for SDL */ 24 25#include "SDL_stdinc.h" 26#include "SDL_endian.h" 27 28#ifdef HAVE_ICONV 29 30/* Depending on which standard the iconv() was implemented with, 31 iconv() may or may not use const char ** for the inbuf param. 32 If we get this wrong, it's just a warning, so no big deal. 33*/ 34#if defined(_XGP6) || defined(__APPLE__) || \ 35 (defined(__GLIBC__) && ((__GLIBC__ > 2) || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 2)) || \ 36 (defined(_NEWLIB_VERSION))) 37#define ICONV_INBUF_NONCONST 38#endif 39 40#include <errno.h> 41 42SDL_COMPILE_TIME_ASSERT(iconv_t, sizeof (iconv_t) <= sizeof (SDL_iconv_t)); 43 44SDL_iconv_t 45SDL_iconv_open(const char *tocode, const char *fromcode) 46{ 47 return (SDL_iconv_t) ((size_t) iconv_open(tocode, fromcode)); 48} 49 50int 51SDL_iconv_close(SDL_iconv_t cd) 52{ 53 return iconv_close((iconv_t) ((size_t) cd)); 54} 55 56size_t 57SDL_iconv(SDL_iconv_t cd, 58 const char **inbuf, size_t * inbytesleft, 59 char **outbuf, size_t * outbytesleft) 60{ 61 size_t retCode; 62#ifdef ICONV_INBUF_NONCONST 63 retCode = iconv((iconv_t) ((size_t) cd), (char **) inbuf, inbytesleft, outbuf, outbytesleft); 64#else 65 retCode = iconv((iconv_t) ((size_t) cd), inbuf, inbytesleft, outbuf, outbytesleft); 66#endif 67 if (retCode == (size_t) - 1) { 68 switch (errno) { 69 case E2BIG: 70 return SDL_ICONV_E2BIG; 71 case EILSEQ: 72 return SDL_ICONV_EILSEQ; 73 case EINVAL: 74 return SDL_ICONV_EINVAL; 75 default: 76 return SDL_ICONV_ERROR; 77 } 78 } 79 return retCode; 80} 81 82#else 83 84/* Lots of useful information on Unicode at: 85 http://www.cl.cam.ac.uk/~mgk25/unicode.html 86*/ 87 88#define UNICODE_BOM 0xFEFF 89 90#define UNKNOWN_ASCII '?' 91#define UNKNOWN_UNICODE 0xFFFD 92 93enum 94{ 95 ENCODING_UNKNOWN, 96 ENCODING_ASCII, 97 ENCODING_LATIN1, 98 ENCODING_UTF8, 99 ENCODING_UTF16, /* Needs byte order marker */ 100 ENCODING_UTF16BE, 101 ENCODING_UTF16LE, 102 ENCODING_UTF32, /* Needs byte order marker */ 103 ENCODING_UTF32BE, 104 ENCODING_UTF32LE, 105 ENCODING_UCS2BE, 106 ENCODING_UCS2LE, 107 ENCODING_UCS4BE, 108 ENCODING_UCS4LE, 109}; 110#if SDL_BYTEORDER == SDL_BIG_ENDIAN 111#define ENCODING_UTF16NATIVE ENCODING_UTF16BE 112#define ENCODING_UTF32NATIVE ENCODING_UTF32BE 113#define ENCODING_UCS2NATIVE ENCODING_UCS2BE 114#define ENCODING_UCS4NATIVE ENCODING_UCS4BE 115#else 116#define ENCODING_UTF16NATIVE ENCODING_UTF16LE 117#define ENCODING_UTF32NATIVE ENCODING_UTF32LE 118#define ENCODING_UCS2NATIVE ENCODING_UCS2LE 119#define ENCODING_UCS4NATIVE ENCODING_UCS4LE 120#endif 121 122struct _SDL_iconv_t 123{ 124 int src_fmt; 125 int dst_fmt; 126}; 127 128static struct 129{ 130 const char *name; 131 int format; 132} encodings[] = { 133/* *INDENT-OFF* */ 134 { "ASCII", ENCODING_ASCII }, 135 { "US-ASCII", ENCODING_ASCII }, 136 { "8859-1", ENCODING_LATIN1 }, 137 { "ISO-8859-1", ENCODING_LATIN1 }, 138 { "UTF8", ENCODING_UTF8 }, 139 { "UTF-8", ENCODING_UTF8 }, 140 { "UTF16", ENCODING_UTF16 }, 141 { "UTF-16", ENCODING_UTF16 }, 142 { "UTF16BE", ENCODING_UTF16BE }, 143 { "UTF-16BE", ENCODING_UTF16BE }, 144 { "UTF16LE", ENCODING_UTF16LE }, 145 { "UTF-16LE", ENCODING_UTF16LE }, 146 { "UTF32", ENCODING_UTF32 }, 147 { "UTF-32", ENCODING_UTF32 }, 148 { "UTF32BE", ENCODING_UTF32BE }, 149 { "UTF-32BE", ENCODING_UTF32BE }, 150 { "UTF32LE", ENCODING_UTF32LE }, 151 { "UTF-32LE", ENCODING_UTF32LE }, 152 { "UCS2", ENCODING_UCS2BE }, 153 { "UCS-2", ENCODING_UCS2BE }, 154 { "UCS-2LE", ENCODING_UCS2LE }, 155 { "UCS-2BE", ENCODING_UCS2BE }, 156 { "UCS-2-INTERNAL", ENCODING_UCS2NATIVE }, 157 { "UCS4", ENCODING_UCS4BE }, 158 { "UCS-4", ENCODING_UCS4BE }, 159 { "UCS-4LE", ENCODING_UCS4LE }, 160 { "UCS-4BE", ENCODING_UCS4BE }, 161 { "UCS-4-INTERNAL", ENCODING_UCS4NATIVE }, 162/* *INDENT-ON* */ 163}; 164 165static const char * 166getlocale(char *buffer, size_t bufsize) 167{ 168 const char *lang; 169 char *ptr; 170 171 lang = SDL_getenv("LC_ALL"); 172 if (!lang) { 173 lang = SDL_getenv("LC_CTYPE"); 174 } 175 if (!lang) { 176 lang = SDL_getenv("LC_MESSAGES"); 177 } 178 if (!lang) { 179 lang = SDL_getenv("LANG"); 180 } 181 if (!lang || !*lang || SDL_strcmp(lang, "C") == 0) { 182 lang = "ASCII"; 183 } 184 185 /* We need to trim down strings like "en_US.UTF-8@blah" to "UTF-8" */ 186 ptr = SDL_strchr(lang, '.'); 187 if (ptr != NULL) { 188 lang = ptr + 1; 189 } 190 191 SDL_strlcpy(buffer, lang, bufsize); 192 ptr = SDL_strchr(buffer, '@'); 193 if (ptr != NULL) { 194 *ptr = '\0'; /* chop end of string. */ 195 } 196 197 return buffer; 198} 199 200SDL_iconv_t 201SDL_iconv_open(const char *tocode, const char *fromcode) 202{ 203 int src_fmt = ENCODING_UNKNOWN; 204 int dst_fmt = ENCODING_UNKNOWN; 205 int i; 206 char fromcode_buffer[64]; 207 char tocode_buffer[64]; 208 209 if (!fromcode || !*fromcode) { 210 fromcode = getlocale(fromcode_buffer, sizeof(fromcode_buffer)); 211 } 212 if (!tocode || !*tocode) { 213 tocode = getlocale(tocode_buffer, sizeof(tocode_buffer)); 214 } 215 for (i = 0; i < SDL_arraysize(encodings); ++i) { 216 if (SDL_strcasecmp(fromcode, encodings[i].name) == 0) { 217 src_fmt = encodings[i].format; 218 if (dst_fmt != ENCODING_UNKNOWN) { 219 break; 220 } 221 } 222 if (SDL_strcasecmp(tocode, encodings[i].name) == 0) { 223 dst_fmt = encodings[i].format; 224 if (src_fmt != ENCODING_UNKNOWN) { 225 break; 226 } 227 } 228 } 229 if (src_fmt != ENCODING_UNKNOWN && dst_fmt != ENCODING_UNKNOWN) { 230 SDL_iconv_t cd = (SDL_iconv_t) SDL_malloc(sizeof(*cd)); 231 if (cd) { 232 cd->src_fmt = src_fmt; 233 cd->dst_fmt = dst_fmt; 234 return cd; 235 } 236 } 237 return (SDL_iconv_t) - 1; 238} 239 240size_t 241SDL_iconv(SDL_iconv_t cd, 242 const char **inbuf, size_t * inbytesleft, 243 char **outbuf, size_t * outbytesleft) 244{ 245 /* For simplicity, we'll convert everything to and from UCS-4 */ 246 const char *src; 247 char *dst; 248 size_t srclen, dstlen; 249 Uint32 ch = 0; 250 size_t total; 251 252 if (!inbuf || !*inbuf) { 253 /* Reset the context */ 254 return 0; 255 } 256 if (!outbuf || !*outbuf || !outbytesleft || !*outbytesleft) { 257 return SDL_ICONV_E2BIG; 258 } 259 src = *inbuf; 260 srclen = (inbytesleft ? *inbytesleft : 0); 261 dst = *outbuf; 262 dstlen = *outbytesleft; 263 264 switch (cd->src_fmt) { 265 case ENCODING_UTF16: 266 /* Scan for a byte order marker */ 267 { 268 Uint8 *p = (Uint8 *) src; 269 size_t n = srclen / 2; 270 while (n) { 271 if (p[0] == 0xFF && p[1] == 0xFE) { 272 cd->src_fmt = ENCODING_UTF16BE; 273 break; 274 } else if (p[0] == 0xFE && p[1] == 0xFF) { 275 cd->src_fmt = ENCODING_UTF16LE; 276 break; 277 } 278 p += 2; 279 --n; 280 } 281 if (n == 0) { 282 /* We can't tell, default to host order */ 283 cd->src_fmt = ENCODING_UTF16NATIVE; 284 } 285 } 286 break; 287 case ENCODING_UTF32: 288 /* Scan for a byte order marker */ 289 { 290 Uint8 *p = (Uint8 *) src; 291 size_t n = srclen / 4; 292 while (n) { 293 if (p[0] == 0xFF && p[1] == 0xFE && 294 p[2] == 0x00 && p[3] == 0x00) { 295 cd->src_fmt = ENCODING_UTF32BE; 296 break; 297 } else if (p[0] == 0x00 && p[1] == 0x00 && 298 p[2] == 0xFE && p[3] == 0xFF) { 299 cd->src_fmt = ENCODING_UTF32LE; 300 break; 301 } 302 p += 4; 303 --n; 304 } 305 if (n == 0) { 306 /* We can't tell, default to host order */ 307 cd->src_fmt = ENCODING_UTF32NATIVE; 308 } 309 } 310 break; 311 } 312 313 switch (cd->dst_fmt) { 314 case ENCODING_UTF16: 315 /* Default to host order, need to add byte order marker */ 316 if (dstlen < 2) { 317 return SDL_ICONV_E2BIG; 318 } 319 *(Uint16 *) dst = UNICODE_BOM; 320 dst += 2; 321 dstlen -= 2; 322 cd->dst_fmt = ENCODING_UTF16NATIVE; 323 break; 324 case ENCODING_UTF32: 325 /* Default to host order, need to add byte order marker */ 326 if (dstlen < 4) { 327 return SDL_ICONV_E2BIG; 328 } 329 *(Uint32 *) dst = UNICODE_BOM; 330 dst += 4; 331 dstlen -= 4; 332 cd->dst_fmt = ENCODING_UTF32NATIVE; 333 break; 334 } 335 336 total = 0; 337 while (srclen > 0) { 338 /* Decode a character */ 339 switch (cd->src_fmt) { 340 case ENCODING_ASCII: 341 { 342 Uint8 *p = (Uint8 *) src; 343 ch = (Uint32) (p[0] & 0x7F); 344 ++src; 345 --srclen; 346 } 347 break; 348 case ENCODING_LATIN1: 349 { 350 Uint8 *p = (Uint8 *) src; 351 ch = (Uint32) p[0]; 352 ++src; 353 --srclen; 354 } 355 break; 356 case ENCODING_UTF8: /* RFC 3629 */ 357 { 358 Uint8 *p = (Uint8 *) src; 359 size_t left = 0; 360 SDL_bool overlong = SDL_FALSE; 361 if (p[0] >= 0xFC) { 362 if ((p[0] & 0xFE) != 0xFC) { 363 /* Skip illegal sequences 364 return SDL_ICONV_EILSEQ; 365 */ 366 ch = UNKNOWN_UNICODE; 367 } else { 368 if (p[0] == 0xFC && srclen > 1 && (p[1] & 0xFC) == 0x80) { 369 overlong = SDL_TRUE; 370 } 371 ch = (Uint32) (p[0] & 0x01); 372 left = 5; 373 } 374 } else if (p[0] >= 0xF8) { 375 if ((p[0] & 0xFC) != 0xF8) { 376 /* Skip illegal sequences 377 return SDL_ICONV_EILSEQ; 378 */ 379 ch = UNKNOWN_UNICODE; 380 } else { 381 if (p[0] == 0xF8 && srclen > 1 && (p[1] & 0xF8) == 0x80) { 382 overlong = SDL_TRUE; 383 } 384 ch = (Uint32) (p[0] & 0x03); 385 left = 4; 386 } 387 } else if (p[0] >= 0xF0) { 388 if ((p[0] & 0xF8) != 0xF0) { 389 /* Skip illegal sequences 390 return SDL_ICONV_EILSEQ; 391 */ 392 ch = UNKNOWN_UNICODE; 393 } else { 394 if (p[0] == 0xF0 && srclen > 1 && (p[1] & 0xF0) == 0x80) { 395 overlong = SDL_TRUE; 396 } 397 ch = (Uint32) (p[0] & 0x07); 398 left = 3; 399 } 400 } else if (p[0] >= 0xE0) { 401 if ((p[0] & 0xF0) != 0xE0) { 402 /* Skip illegal sequences 403 return SDL_ICONV_EILSEQ; 404 */ 405 ch = UNKNOWN_UNICODE; 406 } else { 407 if (p[0] == 0xE0 && srclen > 1 && (p[1] & 0xE0) == 0x80) { 408 overlong = SDL_TRUE; 409 } 410 ch = (Uint32) (p[0] & 0x0F); 411 left = 2; 412 } 413 } else if (p[0] >= 0xC0) { 414 if ((p[0] & 0xE0) != 0xC0) { 415 /* Skip illegal sequences 416 return SDL_ICONV_EILSEQ; 417 */ 418 ch = UNKNOWN_UNICODE; 419 } else { 420 if ((p[0] & 0xDE) == 0xC0) { 421 overlong = SDL_TRUE; 422 } 423 ch = (Uint32) (p[0] & 0x1F); 424 left = 1; 425 } 426 } else { 427 if ((p[0] & 0x80) != 0x00) { 428 /* Skip illegal sequences 429 return SDL_ICONV_EILSEQ; 430 */ 431 ch = UNKNOWN_UNICODE; 432 } else { 433 ch = (Uint32) p[0]; 434 } 435 } 436 ++src; 437 --srclen; 438 if (srclen < left) { 439 return SDL_ICONV_EINVAL; 440 } 441 while (left--) { 442 ++p; 443 if ((p[0] & 0xC0) != 0x80) { 444 /* Skip illegal sequences 445 return SDL_ICONV_EILSEQ; 446 */ 447 ch = UNKNOWN_UNICODE; 448 break; 449 } 450 ch <<= 6; 451 ch |= (p[0] & 0x3F); 452 ++src; 453 --srclen; 454 } 455 if (overlong) { 456 /* Potential security risk 457 return SDL_ICONV_EILSEQ; 458 */ 459 ch = UNKNOWN_UNICODE; 460 } 461 if ((ch >= 0xD800 && ch <= 0xDFFF) || 462 (ch == 0xFFFE || ch == 0xFFFF) || ch > 0x10FFFF) { 463 /* Skip illegal sequences 464 return SDL_ICONV_EILSEQ; 465 */ 466 ch = UNKNOWN_UNICODE; 467 } 468 } 469 break; 470 case ENCODING_UTF16BE: /* RFC 2781 */ 471 { 472 Uint8 *p = (Uint8 *) src; 473 Uint16 W1, W2; 474 if (srclen < 2) { 475 return SDL_ICONV_EINVAL; 476 } 477 W1 = ((Uint16) p[0] << 8) | (Uint16) p[1]; 478 src += 2; 479 srclen -= 2; 480 if (W1 < 0xD800 || W1 > 0xDFFF) { 481 ch = (Uint32) W1; 482 break; 483 } 484 if (W1 > 0xDBFF) { 485 /* Skip illegal sequences 486 return SDL_ICONV_EILSEQ; 487 */ 488 ch = UNKNOWN_UNICODE; 489 break; 490 } 491 if (srclen < 2) { 492 return SDL_ICONV_EINVAL; 493 } 494 p = (Uint8 *) src; 495 W2 = ((Uint16) p[0] << 8) | (Uint16) p[1]; 496 src += 2; 497 srclen -= 2; 498 if (W2 < 0xDC00 || W2 > 0xDFFF) { 499 /* Skip illegal sequences 500 return SDL_ICONV_EILSEQ; 501 */ 502 ch = UNKNOWN_UNICODE; 503 break; 504 } 505 ch = (((Uint32) (W1 & 0x3FF) << 10) | 506 (Uint32) (W2 & 0x3FF)) + 0x10000; 507 } 508 break; 509 case ENCODING_UTF16LE: /* RFC 2781 */ 510 { 511 Uint8 *p = (Uint8 *) src; 512 Uint16 W1, W2; 513 if (srclen < 2) { 514 return SDL_ICONV_EINVAL; 515 } 516 W1 = ((Uint16) p[1] << 8) | (Uint16) p[0]; 517 src += 2; 518 srclen -= 2; 519 if (W1 < 0xD800 || W1 > 0xDFFF) { 520 ch = (Uint32) W1; 521 break; 522 } 523 if (W1 > 0xDBFF) { 524 /* Skip illegal sequences 525 return SDL_ICONV_EILSEQ; 526 */ 527 ch = UNKNOWN_UNICODE; 528 break; 529 } 530 if (srclen < 2) { 531 return SDL_ICONV_EINVAL; 532 } 533 p = (Uint8 *) src; 534 W2 = ((Uint16) p[1] << 8) | (Uint16) p[0]; 535 src += 2; 536 srclen -= 2; 537 if (W2 < 0xDC00 || W2 > 0xDFFF) { 538 /* Skip illegal sequences 539 return SDL_ICONV_EILSEQ; 540 */ 541 ch = UNKNOWN_UNICODE; 542 break; 543 } 544 ch = (((Uint32) (W1 & 0x3FF) << 10) | 545 (Uint32) (W2 & 0x3FF)) + 0x10000; 546 } 547 break; 548 case ENCODING_UCS2LE: 549 { 550 Uint8 *p = (Uint8 *) src; 551 if (srclen < 2) { 552 return SDL_ICONV_EINVAL; 553 } 554 ch = ((Uint32) p[1] << 8) | (Uint32) p[0]; 555 src += 2; 556 srclen -= 2; 557 } 558 break; 559 case ENCODING_UCS2BE: 560 { 561 Uint8 *p = (Uint8 *) src; 562 if (srclen < 2) { 563 return SDL_ICONV_EINVAL; 564 } 565 ch = ((Uint32) p[0] << 8) | (Uint32) p[1]; 566 src += 2; 567 srclen -= 2; 568 } 569 break; 570 case ENCODING_UCS4BE: 571 case ENCODING_UTF32BE: 572 { 573 Uint8 *p = (Uint8 *) src; 574 if (srclen < 4) { 575 return SDL_ICONV_EINVAL; 576 } 577 ch = ((Uint32) p[0] << 24) | 578 ((Uint32) p[1] << 16) | 579 ((Uint32) p[2] << 8) | (Uint32) p[3]; 580 src += 4; 581 srclen -= 4; 582 } 583 break; 584 case ENCODING_UCS4LE: 585 case ENCODING_UTF32LE: 586 { 587 Uint8 *p = (Uint8 *) src; 588 if (srclen < 4) { 589 return SDL_ICONV_EINVAL; 590 } 591 ch = ((Uint32) p[3] << 24) | 592 ((Uint32) p[2] << 16) | 593 ((Uint32) p[1] << 8) | (Uint32) p[0]; 594 src += 4; 595 srclen -= 4; 596 } 597 break; 598 } 599 600 /* Encode a character */ 601 switch (cd->dst_fmt) { 602 case ENCODING_ASCII: 603 { 604 Uint8 *p = (Uint8 *) dst; 605 if (dstlen < 1) { 606 return SDL_ICONV_E2BIG; 607 } 608 if (ch > 0x7F) { 609 *p = UNKNOWN_ASCII; 610 } else { 611 *p = (Uint8) ch; 612 } 613 ++dst; 614 --dstlen; 615 } 616 break; 617 case ENCODING_LATIN1: 618 { 619 Uint8 *p = (Uint8 *) dst; 620 if (dstlen < 1) { 621 return SDL_ICONV_E2BIG; 622 } 623 if (ch > 0xFF) { 624 *p = UNKNOWN_ASCII; 625 } else { 626 *p = (Uint8) ch; 627 } 628 ++dst; 629 --dstlen; 630 } 631 break; 632 case ENCODING_UTF8: /* RFC 3629 */ 633 { 634 Uint8 *p = (Uint8 *) dst; 635 if (ch > 0x10FFFF) { 636 ch = UNKNOWN_UNICODE; 637 } 638 if (ch <= 0x7F) { 639 if (dstlen < 1) { 640 return SDL_ICONV_E2BIG; 641 } 642 *p = (Uint8) ch; 643 ++dst; 644 --dstlen; 645 } else if (ch <= 0x7FF) { 646 if (dstlen < 2) { 647 return SDL_ICONV_E2BIG; 648 } 649 p[0] = 0xC0 | (Uint8) ((ch >> 6) & 0x1F); 650 p[1] = 0x80 | (Uint8) (ch & 0x3F); 651 dst += 2; 652 dstlen -= 2; 653 } else if (ch <= 0xFFFF) { 654 if (dstlen < 3) { 655 return SDL_ICONV_E2BIG; 656 } 657 p[0] = 0xE0 | (Uint8) ((ch >> 12) & 0x0F); 658 p[1] = 0x80 | (Uint8) ((ch >> 6) & 0x3F); 659 p[2] = 0x80 | (Uint8) (ch & 0x3F); 660 dst += 3; 661 dstlen -= 3; 662 } else if (ch <= 0x1FFFFF) { 663 if (dstlen < 4) { 664 return SDL_ICONV_E2BIG; 665 } 666 p[0] = 0xF0 | (Uint8) ((ch >> 18) & 0x07); 667 p[1] = 0x80 | (Uint8) ((ch >> 12) & 0x3F); 668 p[2] = 0x80 | (Uint8) ((ch >> 6) & 0x3F); 669 p[3] = 0x80 | (Uint8) (ch & 0x3F); 670 dst += 4; 671 dstlen -= 4; 672 } else if (ch <= 0x3FFFFFF) { 673 if (dstlen < 5) { 674 return SDL_ICONV_E2BIG; 675 } 676 p[0] = 0xF8 | (Uint8) ((ch >> 24) & 0x03); 677 p[1] = 0x80 | (Uint8) ((ch >> 18) & 0x3F); 678 p[2] = 0x80 | (Uint8) ((ch >> 12) & 0x3F); 679 p[3] = 0x80 | (Uint8) ((ch >> 6) & 0x3F); 680 p[4] = 0x80 | (Uint8) (ch & 0x3F); 681 dst += 5; 682 dstlen -= 5; 683 } else { 684 if (dstlen < 6) { 685 return SDL_ICONV_E2BIG; 686 } 687 p[0] = 0xFC | (Uint8) ((ch >> 30) & 0x01); 688 p[1] = 0x80 | (Uint8) ((ch >> 24) & 0x3F); 689 p[2] = 0x80 | (Uint8) ((ch >> 18) & 0x3F); 690 p[3] = 0x80 | (Uint8) ((ch >> 12) & 0x3F); 691 p[4] = 0x80 | (Uint8) ((ch >> 6) & 0x3F); 692 p[5] = 0x80 | (Uint8) (ch & 0x3F); 693 dst += 6; 694 dstlen -= 6; 695 } 696 } 697 break; 698 case ENCODING_UTF16BE: /* RFC 2781 */ 699 { 700 Uint8 *p = (Uint8 *) dst; 701 if (ch > 0x10FFFF) { 702 ch = UNKNOWN_UNICODE; 703 } 704 if (ch < 0x10000) { 705 if (dstlen < 2) { 706 return SDL_ICONV_E2BIG; 707 } 708 p[0] = (Uint8) (ch >> 8); 709 p[1] = (Uint8) ch; 710 dst += 2; 711 dstlen -= 2; 712 } else { 713 Uint16 W1, W2; 714 if (dstlen < 4) { 715 return SDL_ICONV_E2BIG; 716 } 717 ch = ch - 0x10000; 718 W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF); 719 W2 = 0xDC00 | (Uint16) (ch & 0x3FF); 720 p[0] = (Uint8) (W1 >> 8); 721 p[1] = (Uint8) W1; 722 p[2] = (Uint8) (W2 >> 8); 723 p[3] = (Uint8) W2; 724 dst += 4; 725 dstlen -= 4; 726 } 727 } 728 break; 729 case ENCODING_UTF16LE: /* RFC 2781 */ 730 { 731 Uint8 *p = (Uint8 *) dst; 732 if (ch > 0x10FFFF) { 733 ch = UNKNOWN_UNICODE; 734 } 735 if (ch < 0x10000) { 736 if (dstlen < 2) { 737 return SDL_ICONV_E2BIG; 738 } 739 p[1] = (Uint8) (ch >> 8); 740 p[0] = (Uint8) ch; 741 dst += 2; 742 dstlen -= 2; 743 } else { 744 Uint16 W1, W2; 745 if (dstlen < 4) { 746 return SDL_ICONV_E2BIG; 747 } 748 ch = ch - 0x10000; 749 W1 = 0xD800 | (Uint16) ((ch >> 10) & 0x3FF); 750 W2 = 0xDC00 | (Uint16) (ch & 0x3FF); 751 p[1] = (Uint8) (W1 >> 8); 752 p[0] = (Uint8) W1; 753 p[3] = (Uint8) (W2 >> 8); 754 p[2] = (Uint8) W2; 755 dst += 4; 756 dstlen -= 4; 757 } 758 } 759 break; 760 case ENCODING_UCS2BE: 761 { 762 Uint8 *p = (Uint8 *) dst; 763 if (ch > 0xFFFF) { 764 ch = UNKNOWN_UNICODE; 765 } 766 if (dstlen < 2) { 767 return SDL_ICONV_E2BIG; 768 } 769 p[0] = (Uint8) (ch >> 8); 770 p[1] = (Uint8) ch; 771 dst += 2; 772 dstlen -= 2; 773 } 774 break; 775 case ENCODING_UCS2LE: 776 { 777 Uint8 *p = (Uint8 *) dst; 778 if (ch > 0xFFFF) { 779 ch = UNKNOWN_UNICODE; 780 } 781 if (dstlen < 2) { 782 return SDL_ICONV_E2BIG; 783 } 784 p[1] = (Uint8) (ch >> 8); 785 p[0] = (Uint8) ch; 786 dst += 2; 787 dstlen -= 2; 788 } 789 break; 790 case ENCODING_UTF32BE: 791 if (ch > 0x10FFFF) { 792 ch = UNKNOWN_UNICODE; 793 } 794 case ENCODING_UCS4BE: 795 if (ch > 0x7FFFFFFF) { 796 ch = UNKNOWN_UNICODE; 797 } 798 { 799 Uint8 *p = (Uint8 *) dst; 800 if (dstlen < 4) { 801 return SDL_ICONV_E2BIG; 802 } 803 p[0] = (Uint8) (ch >> 24); 804 p[1] = (Uint8) (ch >> 16); 805 p[2] = (Uint8) (ch >> 8); 806 p[3] = (Uint8) ch; 807 dst += 4; 808 dstlen -= 4; 809 } 810 break; 811 case ENCODING_UTF32LE: 812 if (ch > 0x10FFFF) { 813 ch = UNKNOWN_UNICODE; 814 } 815 case ENCODING_UCS4LE: 816 if (ch > 0x7FFFFFFF) { 817 ch = UNKNOWN_UNICODE; 818 } 819 { 820 Uint8 *p = (Uint8 *) dst; 821 if (dstlen < 4) { 822 return SDL_ICONV_E2BIG; 823 } 824 p[3] = (Uint8) (ch >> 24); 825 p[2] = (Uint8) (ch >> 16); 826 p[1] = (Uint8) (ch >> 8); 827 p[0] = (Uint8) ch; 828 dst += 4; 829 dstlen -= 4; 830 } 831 break; 832 } 833 834 /* Update state */ 835 *inbuf = src; 836 *inbytesleft = srclen; 837 *outbuf = dst; 838 *outbytesleft = dstlen; 839 ++total; 840 } 841 return total; 842} 843 844int 845SDL_iconv_close(SDL_iconv_t cd) 846{ 847 if (cd != (SDL_iconv_t)-1) { 848 SDL_free(cd); 849 } 850 return 0; 851} 852 853#endif /* !HAVE_ICONV */ 854 855char * 856SDL_iconv_string(const char *tocode, const char *fromcode, const char *inbuf, 857 size_t inbytesleft) 858{ 859 SDL_iconv_t cd; 860 char *string; 861 size_t stringsize; 862 char *outbuf; 863 size_t outbytesleft; 864 size_t retCode = 0; 865 866 cd = SDL_iconv_open(tocode, fromcode); 867 if (cd == (SDL_iconv_t) - 1) { 868 /* See if we can recover here (fixes iconv on Solaris 11) */ 869 if (!tocode || !*tocode) { 870 tocode = "UTF-8"; 871 } 872 if (!fromcode || !*fromcode) { 873 fromcode = "UTF-8"; 874 } 875 cd = SDL_iconv_open(tocode, fromcode); 876 } 877 if (cd == (SDL_iconv_t) - 1) { 878 return NULL; 879 } 880 881 stringsize = inbytesleft > 4 ? inbytesleft : 4; 882 string = SDL_malloc(stringsize); 883 if (!string) { 884 SDL_iconv_close(cd); 885 return NULL; 886 } 887 outbuf = string; 888 outbytesleft = stringsize; 889 SDL_memset(outbuf, 0, 4); 890 891 while (inbytesleft > 0) { 892 retCode = SDL_iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft); 893 switch (retCode) { 894 case SDL_ICONV_E2BIG: 895 { 896 char *oldstring = string; 897 stringsize *= 2; 898 string = SDL_realloc(string, stringsize); 899 if (!string) { 900 SDL_iconv_close(cd); 901 return NULL; 902 } 903 outbuf = string + (outbuf - oldstring); 904 outbytesleft = stringsize - (outbuf - string); 905 SDL_memset(outbuf, 0, 4); 906 } 907 break; 908 case SDL_ICONV_EILSEQ: 909 /* Try skipping some input data - not perfect, but... */ 910 ++inbuf; 911 --inbytesleft; 912 break; 913 case SDL_ICONV_EINVAL: 914 case SDL_ICONV_ERROR: 915 /* We can't continue... */ 916 inbytesleft = 0; 917 break; 918 } 919 } 920 SDL_iconv_close(cd); 921 922 return string; 923} 924 925/* vi: set ts=4 sw=4 expandtab: */