cscg24-guacamole

CSCG 2024 Challenge 'Guacamole Mashup'
git clone https://git.sinitax.com/sinitax/cscg24-guacamole
Log | Files | Refs | sfeed.txt

iconv.c (8178B)


      1/*
      2 * Licensed to the Apache Software Foundation (ASF) under one
      3 * or more contributor license agreements.  See the NOTICE file
      4 * distributed with this work for additional information
      5 * regarding copyright ownership.  The ASF licenses this file
      6 * to you under the Apache License, Version 2.0 (the
      7 * "License"); you may not use this file except in compliance
      8 * with the License.  You may obtain a copy of the License at
      9 *
     10 *   http://www.apache.org/licenses/LICENSE-2.0
     11 *
     12 * Unless required by applicable law or agreed to in writing,
     13 * software distributed under the License is distributed on an
     14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
     15 * KIND, either express or implied.  See the License for the
     16 * specific language governing permissions and limitations
     17 * under the License.
     18 */
     19
     20#include "config.h"
     21#include "common/iconv.h"
     22
     23#include <guacamole/unicode.h>
     24#include <stdint.h>
     25
     26/**
     27 * Lookup table for Unicode code points, indexed by CP-1252 codepoint.
     28 */
     29const static int __GUAC_RDP_CP1252_CODEPOINT[32] = {
     30    0x20AC, /* 0x80 */
     31    0xFFFD, /* 0x81 */
     32    0x201A, /* 0x82 */
     33    0x0192, /* 0x83 */
     34    0x201E, /* 0x84 */
     35    0x2026, /* 0x85 */
     36    0x2020, /* 0x86 */
     37    0x2021, /* 0x87 */
     38    0x02C6, /* 0x88 */
     39    0x2030, /* 0x89 */
     40    0x0160, /* 0x8A */
     41    0x2039, /* 0x8B */
     42    0x0152, /* 0x8C */
     43    0xFFFD, /* 0x8D */
     44    0x017D, /* 0x8E */
     45    0xFFFD, /* 0x8F */
     46    0xFFFD, /* 0x90 */
     47    0x2018, /* 0x91 */
     48    0x2019, /* 0x92 */
     49    0x201C, /* 0x93 */
     50    0x201D, /* 0x94 */
     51    0x2022, /* 0x95 */
     52    0x2013, /* 0x96 */
     53    0x2014, /* 0x97 */
     54    0x02DC, /* 0x98 */
     55    0x2122, /* 0x99 */
     56    0x0161, /* 0x9A */
     57    0x203A, /* 0x9B */
     58    0x0153, /* 0x9C */
     59    0xFFFD, /* 0x9D */
     60    0x017E, /* 0x9E */
     61    0x0178, /* 0x9F */
     62};
     63
     64int guac_iconv(guac_iconv_read* reader, const char** input, int in_remaining,
     65               guac_iconv_write* writer, char** output, int out_remaining) {
     66
     67    while (in_remaining > 0 && out_remaining > 0) {
     68
     69        int value;
     70        const char* read_start;
     71        char* write_start;
     72
     73        /* Read character */
     74        read_start = *input;
     75        value = reader(input, in_remaining);
     76        in_remaining -= *input - read_start;
     77
     78        /* Write character */
     79        write_start = *output;
     80        writer(output, out_remaining, value);
     81        out_remaining -= *output - write_start;
     82
     83        /* Stop if null terminator reached */
     84        if (value == 0)
     85            return 1;
     86
     87    }
     88
     89    /* Null terminator not reached */
     90    return 0;
     91
     92}
     93
     94int GUAC_READ_UTF8(const char** input, int remaining) {
     95
     96    int value;
     97
     98    *input += guac_utf8_read(*input, remaining, &value);
     99    return value;
    100
    101}
    102
    103int GUAC_READ_UTF16(const char** input, int remaining) {
    104
    105    int value;
    106
    107    /* Bail if not enough data */
    108    if (remaining < 2)
    109        return 0;
    110
    111    /* Read two bytes as integer */
    112    value = *((uint16_t*) *input);
    113    *input += 2;
    114
    115    return value;
    116
    117}
    118
    119int GUAC_READ_CP1252(const char** input, int remaining) {
    120
    121    int value = *((unsigned char*) *input);
    122
    123    /* Replace value with exception if not identical to ISO-8859-1 */
    124    if (value >= 0x80 && value <= 0x9F)
    125        value = __GUAC_RDP_CP1252_CODEPOINT[value - 0x80];
    126
    127    (*input)++;
    128    return value;
    129
    130}
    131
    132int GUAC_READ_ISO8859_1(const char** input, int remaining) {
    133
    134    int value = *((unsigned char*) *input);
    135
    136    (*input)++;
    137    return value;
    138
    139}
    140
    141/**
    142 * Invokes the given reader function, automatically normalizing newline
    143 * sequences as Unix-style newline characters ('\n').  All other charaters are
    144 * read verbatim.
    145 *
    146 * @param reader
    147 *     The reader to use to read the given character.
    148 *
    149 * @param input
    150 *     Pointer to the location within the input buffer that the next character
    151 *     should be read from.
    152 *
    153 * @param remaining
    154 *     The number of bytes remaining in the input buffer.
    155 *
    156 * @return
    157 *     The codepoint that was read, or zero if the end of the input string has
    158 *     been reached.
    159 */
    160static int guac_iconv_read_normalized(guac_iconv_read* reader,
    161        const char** input, int remaining) {
    162
    163    /* Read requested character */
    164    const char* input_start = *input;
    165    int value = reader(input, remaining);
    166
    167    /* Automatically translate CRLF pairs to simple newlines */
    168    if (value == '\r') {
    169
    170        /* Peek ahead by one character, adjusting remaining bytes relative to
    171         * last read */
    172        int peek_remaining = remaining - (*input - input_start);
    173        const char* peek_input = *input;
    174        int peek_value = reader(&peek_input, peek_remaining);
    175
    176        /* Consider read value to be a newline if we have encountered a "\r\n"
    177         * (CRLF) pair */
    178        if (peek_value == '\n') {
    179            value = '\n';
    180            *input = peek_input;
    181        }
    182
    183    }
    184
    185    return value;
    186
    187}
    188
    189int GUAC_READ_UTF8_NORMALIZED(const char** input, int remaining) {
    190    return guac_iconv_read_normalized(GUAC_READ_UTF8, input, remaining);
    191}
    192
    193int GUAC_READ_UTF16_NORMALIZED(const char** input, int remaining) {
    194    return guac_iconv_read_normalized(GUAC_READ_UTF16, input, remaining);
    195}
    196
    197int GUAC_READ_CP1252_NORMALIZED(const char** input, int remaining) {
    198    return guac_iconv_read_normalized(GUAC_READ_CP1252, input, remaining);
    199}
    200
    201int GUAC_READ_ISO8859_1_NORMALIZED(const char** input, int remaining) {
    202    return guac_iconv_read_normalized(GUAC_READ_ISO8859_1, input, remaining);
    203}
    204
    205void GUAC_WRITE_UTF8(char** output, int remaining, int value) {
    206    *output += guac_utf8_write(value, *output, remaining);
    207}
    208
    209void GUAC_WRITE_UTF16(char** output, int remaining, int value) {
    210
    211    /* Bail if not enough data */
    212    if (remaining < 2)
    213        return;
    214
    215    /* Write two bytes as integer */
    216    *((uint16_t*) *output) = value;
    217    *output += 2;
    218
    219}
    220
    221void GUAC_WRITE_CP1252(char** output, int remaining, int value) {
    222
    223    /* If not in ISO-8859-1 part of CP1252, check lookup table */
    224    if ((value >= 0x80 && value <= 0x9F) || value > 0xFF) {
    225
    226        int i;
    227        int replacement_value = '?';
    228        const int* codepoint = __GUAC_RDP_CP1252_CODEPOINT;
    229
    230        /* Search lookup table for value */
    231        for (i=0x80; i<=0x9F; i++, codepoint++) {
    232            if (*codepoint == value) {
    233                replacement_value = i;
    234                break;
    235            }
    236        }
    237
    238        /* Replace value with discovered value (or question mark) */
    239        value = replacement_value;
    240
    241    }
    242
    243    *((unsigned char*) *output) = (unsigned char) value;
    244    (*output)++;
    245}
    246
    247void GUAC_WRITE_ISO8859_1(char** output, int remaining, int value) {
    248
    249    /* Translate to question mark if out of range */
    250    if (value > 0xFF)
    251        value = '?';
    252
    253    *((unsigned char*) *output) = (unsigned char) value;
    254    (*output)++;
    255}
    256
    257/**
    258 * Invokes the given writer function, automatically writing newline characters
    259 * ('\n') as CRLF ("\r\n"). All other charaters are written verbatim.
    260 *
    261 * @param writer
    262 *     The writer to use to write the given character.
    263 *
    264 * @param output
    265 *     Pointer to the location within the output buffer that the next character
    266 *     should be written.
    267 *
    268 * @param remaining
    269 *     The number of bytes remaining in the output buffer.
    270 *
    271 * @param value
    272 *     The codepoint of the character to write.
    273 */
    274static void guac_iconv_write_crlf(guac_iconv_write* writer, char** output,
    275        int remaining, int value) {
    276
    277    if (value != '\n') {
    278        writer(output, remaining, value);
    279        return;
    280    }
    281
    282    char* output_start = *output;
    283    writer(output, remaining, '\r');
    284
    285    remaining -= *output - output_start;
    286    if (remaining > 0)
    287        writer(output, remaining, '\n');
    288
    289}
    290
    291void GUAC_WRITE_UTF8_CRLF(char** output, int remaining, int value) {
    292    guac_iconv_write_crlf(GUAC_WRITE_UTF8, output, remaining, value);
    293}
    294
    295void GUAC_WRITE_UTF16_CRLF(char** output, int remaining, int value) {
    296    guac_iconv_write_crlf(GUAC_WRITE_UTF16, output, remaining, value);
    297}
    298
    299void GUAC_WRITE_CP1252_CRLF(char** output, int remaining, int value) {
    300    guac_iconv_write_crlf(GUAC_WRITE_CP1252, output, remaining, value);
    301}
    302
    303void GUAC_WRITE_ISO8859_1_CRLF(char** output, int remaining, int value) {
    304    guac_iconv_write_crlf(GUAC_WRITE_ISO8859_1, output, remaining, value);
    305}
    306