unicode.h (2844B)
1/* 2 * Licensed to the Apache Software Foundation (ASF) under one 3 * or more contributor license agreements. See the NOTICE file 4 * distributed with this work for additional information 5 * regarding copyright ownership. The ASF licenses this file 6 * to you under the Apache License, Version 2.0 (the 7 * "License"); you may not use this file except in compliance 8 * with the License. You may obtain a copy of the License at 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, 13 * software distributed under the License is distributed on an 14 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY 15 * KIND, either express or implied. See the License for the 16 * specific language governing permissions and limitations 17 * under the License. 18 */ 19 20 21#ifndef _GUAC_UNICODE_H 22#define _GUAC_UNICODE_H 23 24/** 25 * Provides functions for manipulating Unicode strings. 26 * 27 * @file unicode.h 28 */ 29 30#include <stddef.h> 31 32/** 33 * Given the initial byte of a single UTF-8 character, returns the overall 34 * byte size of the entire character. 35 * 36 * @param c The initial byte of the character to check. 37 * @return The number of bytes in the given character overall. 38 */ 39size_t guac_utf8_charsize(unsigned char c); 40 41/** 42 * Given a UTF-8-encoded string, returns the length of the string in characters 43 * (not bytes). 44 * 45 * @param str The UTF-8 string to calculate the length of. 46 * @return The length in characters of the given UTF-8 string. 47 */ 48size_t guac_utf8_strlen(const char* str); 49 50/** 51 * Given destination buffer and its length, writes the given codepoint as UTF-8 52 * to the buffer, returning the number of bytes written. If there is not enough 53 * space in the buffer to write the character, no bytes are written at all. 54 * 55 * @param codepoint The Unicode codepoint to write to the buffer. 56 * @param utf8 The buffer to write to. 57 * @param length The length of the buffer, in bytes. 58 * @return The number of bytes written, which may be zero if there is not 59 * enough space in the buffer to write the UTF-8 character. 60 */ 61int guac_utf8_write(int codepoint, char* utf8, int length); 62 63/** 64 * Given a buffer containing UTF-8 characters, reads the first codepoint in the 65 * buffer, returning the length of the codepoint in bytes. If no codepoint 66 * could be read, zero is returned. 67 * 68 * @param utf8 A buffer containing UTF-8 characters. 69 * @param length The length of the buffer, in bytes. 70 * @param codepoint A pointer to an integer which will contain the codepoint 71 * read, if any. If no character can be read, the integer 72 * will be left untouched. 73 * @return The number of bytes read, which may be zero if there is not enough 74 * space in the buffer to read a character. 75 */ 76int guac_utf8_read(const char* utf8, int length, int* codepoint); 77 78#endif 79