utf8proc

A clean C library for processing UTF-8 Unicode data
git clone https://git.sinitax.com/juliastrings/utf8proc
Log | Files | Refs | README | LICENSE | sfeed.txt

commit a1c429a45b17c05abb6c67d959350fae054ea952
parent 0528e9cda649de9cb14f95c9fa37fc8cc929f856
Author: Steven G. Johnson <stevenj@mit.edu>
Date:   Mon, 30 Mar 2015 11:05:51 -0400

rename DLLEXPORT to UTF8PROC_DLLEXPORT to prevent conflicts with other header files that define DLLEXPORT

Diffstat:
Mutf8proc.c | 38+++++++++++++++++++-------------------
Mutf8proc.h | 46+++++++++++++++++++++++-----------------------
2 files changed, 42 insertions(+), 42 deletions(-)

diff --git a/utf8proc.c b/utf8proc.c @@ -44,7 +44,7 @@ #include "utf8proc_data.c" -DLLEXPORT const int8_t utf8proc_utf8class[256] = { +UTF8PROC_DLLEXPORT const int8_t utf8proc_utf8class[256] = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, @@ -87,11 +87,11 @@ DLLEXPORT const int8_t utf8proc_utf8class[256] = { be different, being based on ABI compatibility.): */ #define STRINGIZEx(x) #x #define STRINGIZE(x) STRINGIZEx(x) -DLLEXPORT const char *utf8proc_version(void) { +UTF8PROC_DLLEXPORT const char *utf8proc_version(void) { return STRINGIZE(UTF8PROC_VERSION_MAJOR) "." STRINGIZE(UTF8PROC_VERSION_MINOR) "." STRINGIZE(UTF8PROC_VERSION_PATCH) ""; } -DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) { +UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) { switch (errcode) { case UTF8PROC_ERROR_NOMEM: return "Memory for processing UTF-8 data could not be allocated."; @@ -108,7 +108,7 @@ DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode) { } } -DLLEXPORT ssize_t utf8proc_iterate( +UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate( const uint8_t *str, ssize_t strlen, int32_t *dst ) { int length; @@ -148,14 +148,14 @@ DLLEXPORT ssize_t utf8proc_iterate( return length; } -DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc) { +UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t uc) { if (uc < 0 || uc >= 0x110000 || ((uc & 0xFFFF) >= 0xFFFE) || (uc >= 0xD800 && uc < 0xE000) || (uc >= 0xFDD0 && uc < 0xFDF0)) return false; else return true; } -DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) { +UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t uc, uint8_t *dst) { if (uc < 0x00) { return 0; } else if (uc < 0x80) { @@ -195,7 +195,7 @@ static const utf8proc_property_t *get_property(int32_t uc) { ); } -DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) { +UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t uc) { return uc < 0 || uc >= 0x110000 ? utf8proc_properties : get_property(uc); } @@ -226,22 +226,22 @@ static bool grapheme_break(int lbc, int tbc) { } /* return whether there is a grapheme break between codepoints c1 and c2 */ -DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2) { +UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t c1, int32_t c2) { return grapheme_break(utf8proc_get_property(c1)->boundclass, utf8proc_get_property(c2)->boundclass); } /* return a character width analogous to wcwidth (except portable and hopefully less buggy than most system wcwidth functions). */ -DLLEXPORT int utf8proc_charwidth(int32_t c) { +UTF8PROC_DLLEXPORT int utf8proc_charwidth(int32_t c) { return utf8proc_get_property(c)->charwidth; } -DLLEXPORT utf8proc_category_t utf8proc_category(int32_t c) { +UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(int32_t c) { return utf8proc_get_property(c)->category; } -DLLEXPORT const char *utf8proc_category_string(int32_t c) { +UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t c) { static const char s[][3] = {"Cn","Lu","Ll","Lt","Lm","Lo","Mn","Mc","Me","Nd","Nl","No","Pc","Pd","Ps","Pe","Pi","Pf","Po","Sm","Sc","Sk","So","Zs","Zl","Zp","Cc","Cf","Cs","Co"}; return s[utf8proc_category(c)]; } @@ -250,7 +250,7 @@ DLLEXPORT const char *utf8proc_category_string(int32_t c) { return utf8proc_decompose_char((replacement_uc), dst, bufsize, \ options & ~UTF8PROC_LUMP, last_boundclass) -DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { +UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufsize, utf8proc_option_t options, int *last_boundclass) { const utf8proc_property_t *property; utf8proc_propval_t category; int32_t hangul_sindex; @@ -354,7 +354,7 @@ DLLEXPORT ssize_t utf8proc_decompose_char(int32_t uc, int32_t *dst, ssize_t bufs return 1; } -DLLEXPORT ssize_t utf8proc_decompose( +UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( const uint8_t *str, ssize_t strlen, int32_t *buffer, ssize_t bufsize, utf8proc_option_t options ) { @@ -416,7 +416,7 @@ DLLEXPORT ssize_t utf8proc_decompose( return wpos; } -DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options) { +UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options) { /* UTF8PROC_NULLTERM option will be ignored, 'length' is never ignored ASSERT: 'buffer' has one spare byte of free space at the end! */ if (options & (UTF8PROC_NLF2LS | UTF8PROC_NLF2PS | UTF8PROC_STRIPCC)) { @@ -531,7 +531,7 @@ DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_op } } -DLLEXPORT ssize_t utf8proc_map( +UTF8PROC_DLLEXPORT ssize_t utf8proc_map( const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options ) { int32_t *buffer; @@ -560,28 +560,28 @@ DLLEXPORT ssize_t utf8proc_map( return result; } -DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str) { +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str) { uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_DECOMPOSE); return retval; } -DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str) { +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str) { uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE); return retval; } -DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str) { +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str) { uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_DECOMPOSE | UTF8PROC_COMPAT); return retval; } -DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str) { +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str) { uint8_t *retval; utf8proc_map(str, 0, &retval, UTF8PROC_NULLTERM | UTF8PROC_STABLE | UTF8PROC_COMPOSE | UTF8PROC_COMPAT); diff --git a/utf8proc.h b/utf8proc.h @@ -99,14 +99,14 @@ enum {false, true}; #ifdef _WIN32 # ifdef UTF8PROC_EXPORTS -# define DLLEXPORT __declspec(dllexport) +# define UTF8PROC_DLLEXPORT __declspec(dllexport) # else -# define DLLEXPORT __declspec(dllimport) +# define UTF8PROC_DLLEXPORT __declspec(dllimport) # endif #elif __GNUC__ >= 4 -# define DLLEXPORT __attribute__ ((visibility("default"))) +# define UTF8PROC_DLLEXPORT __attribute__ ((visibility("default"))) #else -# define DLLEXPORT +# define UTF8PROC_DLLEXPORT #endif #ifdef __cplusplus @@ -351,20 +351,20 @@ typedef enum { * Array containing the byte lengths of a UTF-8 encoded codepoint based * on the first byte. */ -DLLEXPORT extern const int8_t utf8proc_utf8class[256]; +UTF8PROC_DLLEXPORT extern const int8_t utf8proc_utf8class[256]; /** * Returns the utf8proc API version as a string MAJOR.MINOR.PATCH * (http://semver.org format), possibly with a "-dev" suffix for * development versions. */ -DLLEXPORT const char *utf8proc_version(void); +UTF8PROC_DLLEXPORT const char *utf8proc_version(void); /** * Returns an informative error string for the given utf8proc error code * (e.g. the error codes returned by @ref utf8proc_map). */ -DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode); +UTF8PROC_DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode); /** * Reads a single codepoint from the UTF-8 sequence being pointed to by `str`. @@ -376,7 +376,7 @@ DLLEXPORT const char *utf8proc_errmsg(ssize_t errcode); * In case of success, the number of bytes read is returned; otherwise, a * negative error code is returned. */ -DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *codepoint_ref); +UTF8PROC_DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t *codepoint_ref); /** * Check if a codepoint is valid (regardless of whether it has been @@ -384,7 +384,7 @@ DLLEXPORT ssize_t utf8proc_iterate(const uint8_t *str, ssize_t strlen, int32_t * * * @return 1 if the given `codepoint` is valid and otherwise return 0. */ -DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint); +UTF8PROC_DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint); /** * Encodes the codepoint as an UTF-8 string in the byte array pointed @@ -395,7 +395,7 @@ DLLEXPORT bool utf8proc_codepoint_valid(int32_t codepoint); * * This function does not check whether `codepoint` is valid Unicode. */ -DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst); +UTF8PROC_DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst); /** * Look up the properties for a given codepoint. @@ -409,7 +409,7 @@ DLLEXPORT ssize_t utf8proc_encode_char(int32_t codepoint, uint8_t *dst); * If the codepoint is unassigned or invalid, a pointer to a special struct is * returned in which `category` is 0 (@ref UTF8PROC_CATEGORY_CN). */ -DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t codepoint); +UTF8PROC_DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t codepoint); /** Decompose a codepoint into an array of codepoints. * @@ -438,7 +438,7 @@ DLLEXPORT const utf8proc_property_t *utf8proc_get_property(int32_t codepoint); * required buffer size is returned, while the buffer will be overwritten with * undefined data. */ -DLLEXPORT ssize_t utf8proc_decompose_char( +UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose_char( int32_t codepoint, int32_t *dst, ssize_t bufsize, utf8proc_option_t options, int *last_boundclass ); @@ -459,7 +459,7 @@ DLLEXPORT ssize_t utf8proc_decompose_char( * required buffer size is returned, while the buffer will be overwritten with * undefined data. */ -DLLEXPORT ssize_t utf8proc_decompose( +UTF8PROC_DLLEXPORT ssize_t utf8proc_decompose( const uint8_t *str, ssize_t strlen, int32_t *buffer, ssize_t bufsize, utf8proc_option_t options ); @@ -489,13 +489,13 @@ DLLEXPORT ssize_t utf8proc_decompose( * entries of the array pointed to by `str` have to be in the * range `0x0000` to `0x10FFFF`. Otherwise, the program might crash! */ -DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options); +UTF8PROC_DLLEXPORT ssize_t utf8proc_reencode(int32_t *buffer, ssize_t length, utf8proc_option_t options); /** * Given a pair of consecutive codepoints, return whether a grapheme break is * permitted between them (as defined by the extended grapheme clusters in UAX#29). */ -DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t codepoint2); +UTF8PROC_DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t codepoint2); /** * Given a codepoint, return a character width analogous to `wcwidth(codepoint)`, @@ -505,19 +505,19 @@ DLLEXPORT bool utf8proc_grapheme_break(int32_t codepoint1, int32_t codepoint2); * @note * If you want to check for particular types of non-printable characters, * (analogous to `isprint` or `iscntrl`), use @ref utf8proc_category. */ -DLLEXPORT int utf8proc_charwidth(int32_t codepoint); +UTF8PROC_DLLEXPORT int utf8proc_charwidth(int32_t codepoint); /** * Return the Unicode category for the codepoint (one of the * @ref utf8proc_category_t constants.) */ -DLLEXPORT utf8proc_category_t utf8proc_category(int32_t codepoint); +UTF8PROC_DLLEXPORT utf8proc_category_t utf8proc_category(int32_t codepoint); /** * Return the two-letter (nul-terminated) Unicode category string for * the codepoint (e.g. `"Lu"` or `"Co"`). */ -DLLEXPORT const char *utf8proc_category_string(int32_t codepoint); +UTF8PROC_DLLEXPORT const char *utf8proc_category_string(int32_t codepoint); /** * Maps the given UTF-8 string pointed to by `str` to a new UTF-8 @@ -537,7 +537,7 @@ DLLEXPORT const char *utf8proc_category_string(int32_t codepoint); * @note The memory of the new UTF-8 string will have been allocated * with `malloc`, and should therefore be deallocated with `free`. */ -DLLEXPORT ssize_t utf8proc_map( +UTF8PROC_DLLEXPORT ssize_t utf8proc_map( const uint8_t *str, ssize_t strlen, uint8_t **dstptr, utf8proc_option_t options ); @@ -550,13 +550,13 @@ DLLEXPORT ssize_t utf8proc_map( */ /** @{ */ /** NFD normalization (@ref UTF8PROC_DECOMPOSE). */ -DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str); +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFD(const uint8_t *str); /** NFC normalization (@ref UTF8PROC_COMPOSE). */ -DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str); +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFC(const uint8_t *str); /** NFD normalization (@ref UTF8PROC_DECOMPOSE and @ref UTF8PROC_COMPAT). */ -DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str); +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKD(const uint8_t *str); /** NFD normalization (@ref UTF8PROC_COMPOSE and @ref UTF8PROC_COMPAT). */ -DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str); +UTF8PROC_DLLEXPORT uint8_t *utf8proc_NFKC(const uint8_t *str); /** @} */ #ifdef __cplusplus