iconv.c (8178B)
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20#include "config.h" 21#include "common/iconv.h" 22 23#include <guacamole/unicode.h> 24#include <stdint.h> 25 26/** 27 * Lookup table for Unicode code points, indexed by CP-1252 codepoint. 28 */ 29const static int __GUAC_RDP_CP1252_CODEPOINT[32] = { 30 0x20AC, /* 0x80 */ 31 0xFFFD, /* 0x81 */ 32 0x201A, /* 0x82 */ 33 0x0192, /* 0x83 */ 34 0x201E, /* 0x84 */ 35 0x2026, /* 0x85 */ 36 0x2020, /* 0x86 */ 37 0x2021, /* 0x87 */ 38 0x02C6, /* 0x88 */ 39 0x2030, /* 0x89 */ 40 0x0160, /* 0x8A */ 41 0x2039, /* 0x8B */ 42 0x0152, /* 0x8C */ 43 0xFFFD, /* 0x8D */ 44 0x017D, /* 0x8E */ 45 0xFFFD, /* 0x8F */ 46 0xFFFD, /* 0x90 */ 47 0x2018, /* 0x91 */ 48 0x2019, /* 0x92 */ 49 0x201C, /* 0x93 */ 50 0x201D, /* 0x94 */ 51 0x2022, /* 0x95 */ 52 0x2013, /* 0x96 */ 53 0x2014, /* 0x97 */ 54 0x02DC, /* 0x98 */ 55 0x2122, /* 0x99 */ 56 0x0161, /* 0x9A */ 57 0x203A, /* 0x9B */ 58 0x0153, /* 0x9C */ 59 0xFFFD, /* 0x9D */ 60 0x017E, /* 0x9E */ 61 0x0178, /* 0x9F */ 62}; 63 64int guac_iconv(guac_iconv_read* reader, const char** input, int in_remaining, 65 guac_iconv_write* writer, char** output, int out_remaining) { 66 67 while (in_remaining > 0 && out_remaining > 0) { 68 69 int value; 70 const char* read_start; 71 char* write_start; 72 73 /* Read character */ 74 read_start = *input; 75 value = reader(input, in_remaining); 76 in_remaining -= *input - read_start; 77 78 /* Write character */ 79 write_start = *output; 80 writer(output, out_remaining, value); 81 out_remaining -= *output - write_start; 82 83 /* Stop if null terminator reached */ 84 if (value == 0) 85 return 1; 86 87 } 88 89 /* Null terminator not reached */ 90 return 0; 91 92} 93 94int GUAC_READ_UTF8(const char** input, int remaining) { 95 96 int value; 97 98 *input += guac_utf8_read(*input, remaining, &value); 99 return value; 100 101} 102 103int GUAC_READ_UTF16(const char** input, int remaining) { 104 105 int value; 106 107 /* Bail if not enough data */ 108 if (remaining < 2) 109 return 0; 110 111 /* Read two bytes as integer */ 112 value = *((uint16_t*) *input); 113 *input += 2; 114 115 return value; 116 117} 118 119int GUAC_READ_CP1252(const char** input, int remaining) { 120 121 int value = *((unsigned char*) *input); 122 123 /* Replace value with exception if not identical to ISO-8859-1 */ 124 if (value >= 0x80 && value <= 0x9F) 125 value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80]; 126 127 (*input)++; 128 return value; 129 130} 131 132int GUAC_READ_ISO8859_1(const char** input, int remaining) { 133 134 int value = *((unsigned char*) *input); 135 136 (*input)++; 137 return value; 138 139} 140 141/** 142 * Invokes the given reader function, automatically normalizing newline 143 * sequences as Unix-style newline characters ('\n'). All other charaters are 144 * read verbatim. 145 * 146 * @param reader 147 * The reader to use to read the given character. 148 * 149 * @param input 150 * Pointer to the location within the input buffer that the next character 151 * should be read from. 152 * 153 * @param remaining 154 * The number of bytes remaining in the input buffer. 155 * 156 * @return 157 * The codepoint that was read, or zero if the end of the input string has 158 * been reached. 159 */ 160static int guac_iconv_read_normalized(guac_iconv_read* reader, 161 const char** input, int remaining) { 162 163 /* Read requested character */ 164 const char* input_start = *input; 165 int value = reader(input, remaining); 166 167 /* Automatically translate CRLF pairs to simple newlines */ 168 if (value == '\r') { 169 170 /* Peek ahead by one character, adjusting remaining bytes relative to 171 * last read */ 172 int peek_remaining = remaining - (*input - input_start); 173 const char* peek_input = *input; 174 int peek_value = reader(&peek_input, peek_remaining); 175 176 /* Consider read value to be a newline if we have encountered a "\r\n" 177 * (CRLF) pair */ 178 if (peek_value == '\n') { 179 value = '\n'; 180 *input = peek_input; 181 } 182 183 } 184 185 return value; 186 187} 188 189int GUAC_READ_UTF8_NORMALIZED(const char** input, int remaining) { 190 return guac_iconv_read_normalized(GUAC_READ_UTF8, input, remaining); 191} 192 193int GUAC_READ_UTF16_NORMALIZED(const char** input, int remaining) { 194 return guac_iconv_read_normalized(GUAC_READ_UTF16, input, remaining); 195} 196 197int GUAC_READ_CP1252_NORMALIZED(const char** input, int remaining) { 198 return guac_iconv_read_normalized(GUAC_READ_CP1252, input, remaining); 199} 200 201int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) { 202 return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining); 203} 204 205void GUAC_WRITE_UTF8(char** output, int remaining, int value) { 206 *output += guac_utf8_write(value, *output, remaining); 207} 208 209void GUAC_WRITE_UTF16(char** output, int remaining, int value) { 210 211 /* Bail if not enough data */ 212 if (remaining < 2) 213 return; 214 215 /* Write two bytes as integer */ 216 *((uint16_t*) *output) = value; 217 *output += 2; 218 219} 220 221void GUAC_WRITE_CP1252(char** output, int remaining, int value) { 222 223 /* If not in ISO-8859-1 part of CP1252, check lookup table */ 224 if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) { 225 226 int i; 227 int replacement_value = '?'; 228 const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT; 229 230 /* Search lookup table for value */ 231 for (i=0x80; i<=0x9F; i++, codepoint++) { 232 if (*codepoint == value) { 233 replacement_value = i; 234 break; 235 } 236 } 237 238 /* Replace value with discovered value (or question mark) */ 239 value = replacement_value; 240 241 } 242 243 *((unsigned char*) *output) = (unsigned char) value; 244 (*output)++; 245} 246 247void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) { 248 249 /* Translate to question mark if out of range */ 250 if (value > 0xFF) 251 value = '?'; 252 253 *((unsigned char*) *output) = (unsigned char) value; 254 (*output)++; 255} 256 257/** 258 * Invokes the given writer function, automatically writing newline characters 259 * ('\n') as CRLF ("\r\n"). All other charaters are written verbatim. 260 * 261 * @param writer 262 * The writer to use to write the given character. 263 * 264 * @param output 265 * Pointer to the location within the output buffer that the next character 266 * should be written. 267 * 268 * @param remaining 269 * The number of bytes remaining in the output buffer. 270 * 271 * @param value 272 * The codepoint of the character to write. 273 */ 274static void guac_iconv_write_crlf(guac_iconv_write* writer, char** output, 275 int remaining, int value) { 276 277 if (value != '\n') { 278 writer(output, remaining, value); 279 return; 280 } 281 282 char* output_start = *output; 283 writer(output, remaining, '\r'); 284 285 remaining -= *output - output_start; 286 if (remaining > 0) 287 writer(output, remaining, '\n'); 288 289} 290 291void GUAC_WRITE_UTF8_CRLF(char** output, int remaining, int value) { 292 guac_iconv_write_crlf(GUAC_WRITE_UTF8, output, remaining, value); 293} 294 295void GUAC_WRITE_UTF16_CRLF(char** output, int remaining, int value) { 296 guac_iconv_write_crlf(GUAC_WRITE_UTF16, output, remaining, value); 297} 298 299void GUAC_WRITE_CP1252_CRLF(char** output, int remaining, int value) { 300 guac_iconv_write_crlf(GUAC_WRITE_CP1252, output, remaining, value); 301} 302 303void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) { 304 guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value); 305} 306