utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit ad277229234b77ac3d334860f6857921060dcb37
parent 498ecbddd87f2555a730e90810db7744cf416b82
Author: Tony Kelman <tony@kelman.net>
Date:   Sat,  4 Apr 2015 21:17:13 -0700

Use a new typedef utf8proc_ssize_t to avoid define collisions

with MSVC

Diffstat:
Mtest/graphemetest.c | 2+-
Mutf8proc.c | 46+++++++++++++++++++++++-----------------------
Mutf8proc.h | 27++++++++++++++-------------
3 files changed, 38 insertions(+), 37 deletions(-)

diff --git a/test/graphemetest.c b/test/graphemetest.c @@ -41,7 +41,7 @@ int main(int argc, char **argv) if (si) { uint8_t utf8[1024]; /* copy src without 0xff grapheme separators */ size_t i = 0, j = 0; - ssize_t glen; + utf8proc_ssize_t glen; uint8_t *g; /* utf8proc_map grapheme results */ while (i < si) { if (src[i] != '/') diff --git a/utf8proc.c b/utf8proc.c @@ -91,7 +91,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void) { return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) ""; } -UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) { +UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode) { switch (errcode) { case UTF8PROC_ERROR_NOMEM: return "Memory for processing UTF-8 data could not be allocated."; @@ -108,8 +108,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) { } } -UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate( - const uint8_t *str, ssize_t strlen, int32_t *dst +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate( + const uint8_t *str, utf8proc_ssize_t strlen, int32_t *dst ) { int length; int i; @@ -155,7 +155,7 @@ UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc) { else return true; } -UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) { +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) { if (uc < 0x00) { return 0; } else if (uc < 0x80) { @@ -250,7 +250,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) { return utf8proc_decompose_char((replacement_uc), dst, bufsize, \ options & ~UTF8PROC_LUMP, last_boundclass) -UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { const utf8proc_property_t *property; utf8proc_propval_t category; int32_t hangul_sindex; @@ -313,7 +313,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi if (options & UTF8PROC_CASEFOLD) { if (property->casefold_mapping) { const int32_t *casefold_entry; - ssize_t written = 0; + utf8proc_ssize_t written = 0; for (casefold_entry = property->casefold_mapping; *casefold_entry >= 0; casefold_entry++) { written += utf8proc_decompose_char(*casefold_entry, dst+written, @@ -328,7 +328,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi if (property->decomp_mapping && (!property->decomp_type || (options & UTF8PROC_COMPAT))) { const int32_t *decomp_entry; - ssize_t written = 0; + utf8proc_ssize_t written = 0; for (decomp_entry = property->decomp_mapping; *decomp_entry >= 0; decomp_entry++) { written += utf8proc_decompose_char(*decomp_entry, dst+written, @@ -354,12 +354,12 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssi return 1; } -UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( - const uint8_t *str, ssize_t strlen, - int32_t *buffer, ssize_t bufsize, utf8proc_option_t options +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( + const uint8_t *str, utf8proc_ssize_t strlen, + int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options ) { /* strlen will be ignored, if UTF8PROC_NULLTERM is set in options */ - ssize_t wpos = 0; + utf8proc_ssize_t wpos = 0; if ((options & UTF8PROC_COMPOSE) && (options & UTF8PROC_DECOMPOSE)) return UTF8PROC_ERROR_INVALIDOPTS; if ((options & UTF8PROC_STRIPMARK) && @@ -367,8 +367,8 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( return UTF8PROC_ERROR_INVALIDOPTS; { int32_t uc; - ssize_t rpos = 0; - ssize_t decomp_result; + utf8proc_ssize_t rpos = 0; + utf8proc_ssize_t decomp_result; int boundclass = UTF8PROC_BOUNDCLASS_START; while (1) { if (options & UTF8PROC_NULLTERM) { @@ -395,7 +395,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( } } if ((options & (UTF8PROC_COMPOSE|UTF8PROC_DECOMPOSE)) && bufsize >= wpos) { - ssize_t pos = 0; + utf8proc_ssize_t pos = 0; while (pos < wpos-1) { int32_t uc1, uc2; const utf8proc_property_t *property1, *property2; @@ -416,12 +416,12 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( return wpos; } -UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options) { +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options) { /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored ASSERT: 'buffer' has one spare byte of free space at the end! */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { - ssize_t rpos; - ssize_t wpos = 0; + utf8proc_ssize_t rpos; + utf8proc_ssize_t wpos = 0; int32_t uc; for (rpos = 0; rpos < length; rpos++) { uc = buffer[rpos]; @@ -455,8 +455,8 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut int32_t current_char; const utf8proc_property_t *starter_property = NULL, *current_property; utf8proc_propval_t max_combining_class = -1; - ssize_t rpos; - ssize_t wpos = 0; + utf8proc_ssize_t rpos; + utf8proc_ssize_t wpos = 0; int32_t composition; for (rpos = 0; rpos < length; rpos++) { current_char = buffer[rpos]; @@ -520,7 +520,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut length = wpos; } { - ssize_t rpos, wpos = 0; + utf8proc_ssize_t rpos, wpos = 0; int32_t uc; for (rpos = 0; rpos < length; rpos++) { uc = buffer[rpos]; @@ -531,11 +531,11 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, ut } } -UTF8PROC_DLLEXPORT ssize_t utf8proc_map( - const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( + const uint8_t *str, utf8proc_ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options ) { int32_t *buffer; - ssize_t result; + utf8proc_ssize_t result; *dstptr = NULL; result = utf8proc_decompose(str, strlen, NULL, 0, options); if (result < 0) return result; diff --git a/utf8proc.h b/utf8proc.h @@ -83,9 +83,9 @@ typedef short int16_t; typedef unsigned short uint16_t; typedef int int32_t; # ifdef _WIN64 -# define ssize_t __int64 +typedef __int64 utf8proc_ssize_t; # else -# define ssize_t int +typedef int utf8proc_ssize_t; # endif # ifndef __cplusplus typedef unsigned char bool; @@ -94,6 +94,7 @@ enum {false, true}; #else # include <stdbool.h> # include <inttypes.h> +typedef ssize_t utf8proc_ssize_t; #endif #include <limits.h> @@ -364,7 +365,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_version(void); * Returns an informative error string for the given utf8proc error code * (e.g. the error codes returned by @ref utf8proc_map). */ -UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode); +UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(utf8proc_ssize_t errcode); /** * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`. @@ -376,7 +377,7 @@ UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode); * In case of success, the number of bytes read is returned; otherwise, a * negative error code is returned. */ -UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *codepoint_ref); +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_iterate(const uint8_t *str, utf8proc_ssize_t strlen, int32_t *codepoint_ref); /** * Check if a codepoint is valid (regardless of whether it has been @@ -395,7 +396,7 @@ UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint); * * This function does not check whether `codepoint` is valid Unicode. */ -UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst); +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst); /** * Look up the properties for a given codepoint. @@ -438,8 +439,8 @@ UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t code * required buffer size is returned, while the buffer will be overwritten with * undefined data. */ -UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char( - int32_t codepoint, int32_t *dst, ssize_t bufsize, +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose_char( + int32_t codepoint, int32_t *dst, utf8proc_ssize_t bufsize, utf8proc_option_t options, int *last_boundclass ); @@ -459,9 +460,9 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char( * required buffer size is returned, while the buffer will be overwritten with * undefined data. */ -UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( - const uint8_t *str, ssize_t strlen, - int32_t *buffer, ssize_t bufsize, utf8proc_option_t options +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_decompose( + const uint8_t *str, utf8proc_ssize_t strlen, + int32_t *buffer, utf8proc_ssize_t bufsize, utf8proc_option_t options ); /** @@ -489,7 +490,7 @@ UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( * entries of the array pointed to by `str` have to be in the * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! */ -UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options); +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_reencode(int32_t *buffer, utf8proc_ssize_t length, utf8proc_option_t options); /** * Given a pair of consecutive codepoints, return whether a grapheme break is @@ -537,8 +538,8 @@ UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint); * @note The memory of the new UTF-8 string will have been allocated * with `malloc`, and should therefore be deallocated with `free`. */ -UTF8PROC_DLLEXPORT ssize_t utf8proc_map( - const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options +UTF8PROC_DLLEXPORT utf8proc_ssize_t utf8proc_map( + const uint8_t *str, utf8proc_ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options ); /** @name Unicode normalization