Public interface for UTF-8 functions. More...
Go to the source code of this file.
Macros | |
#define | UTF8_VERSION_MAKE(_major, _minor, _bugfix) ((_major) * 10000) + ((_minor) * 100) + (_bugfix) |
Macro for creating a version number from a major, minor and bugfix number. More... | |
#define | UTF8_VERSION_MAJOR 1 |
The major version number of this release. More... | |
#define | UTF8_VERSION_MINOR 5 |
The minor version number of this release. More... | |
#define | UTF8_VERSION_BUGFIX 0 |
The bugfix version number of this release. More... | |
#define | UTF8_VERSION UTF8_VERSION_MAKE(UTF8_VERSION_MAJOR, UTF8_VERSION_MINOR, UTF8_VERSION_BUGFIX) |
The version number as an integer. More... | |
#define | UTF8_VERSION_STRING "1.5.0" |
The verion number as a string. More... | |
#define | UTF8_VERSION_GUARD(_major, _minor, _bugfix) (UTF8_VERSION >= UTF8_VERSION_MAKE(_major, _minor, _bugfix)) |
Check if feature is supported by the current release. More... | |
#define | UTF8_ERR_NONE (0) |
No errors. More... | |
#define | UTF8_ERR_INVALID_DATA (-1) |
Input data is invalid. More... | |
#define | UTF8_ERR_INVALID_FLAG (-2) |
Input flag is invalid. More... | |
#define | UTF8_ERR_NOT_ENOUGH_SPACE (-3) |
Not enough space in buffer to store result. More... | |
#define | UTF8_ERR_OVERLAPPING_PARAMETERS (-4) |
Input and output buffers overlap in memory. More... | |
#define | UTF8_ERR_INVALID_LOCALE (-5) |
Invalid locale specified. More... | |
#define | UTF8_LOCALE_DEFAULT 0 |
Used for text unaffected by changes in locale. More... | |
#define | UTF8_LOCALE_LITHUANIAN 1 |
Changes behavior of the case mapping implementation when processing specific code points. For more information, see here: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt. More... | |
#define | UTF8_LOCALE_TURKISH_AND_AZERI_LATIN 2 |
Changes behavior of the case mapping implementation when processing specific code points. For more information, see here: ftp://ftp.unicode.org/Public/UNIDATA/SpecialCasing.txt. More... | |
#define | UTF8_LOCALE_MAXIMUM 3 |
Terminal value for locales. Valid locales do not exceed this value. More... | |
#define | UTF8_NORMALIZE_COMPOSE 0x00000001 |
Normalize input to Normalization Form C (NFC). More... | |
#define | UTF8_NORMALIZE_DECOMPOSE 0x00000002 |
Normalize input to Normalization Form D (NFD). More... | |
#define | UTF8_NORMALIZE_COMPATIBILITY 0x00000004 |
Changes Normalization Form from NFC to NFKC or from NFD to NFKD. More... | |
#define | UTF8_NORMALIZATION_RESULT_YES (0) |
Text is stable and does not need to be normalized. More... | |
#define | UTF8_NORMALIZATION_RESULT_MAYBE (1) |
Text is unstable, but normalization may be skipped. More... | |
#define | UTF8_NORMALIZATION_RESULT_NO (2) |
Text is unstable and must be normalized. More... | |
#define | UTF8_CATEGORY_LETTER_UPPERCASE 0x00000001 |
Uppercase letter code points, Lu in the Unicode database. More... | |
#define | UTF8_CATEGORY_LETTER_LOWERCASE 0x00000002 |
Lowercase letter code points, Ll in the Unicode database. More... | |
#define | UTF8_CATEGORY_LETTER_TITLECASE 0x00000004 |
Titlecase letter code points, Lt in the Unicode database. More... | |
#define | UTF8_CATEGORY_LETTER_MODIFIER 0x00000008 |
Modifier letter code points, Lm in the Unicode database. More... | |
#define | UTF8_CATEGORY_LETTER_OTHER 0x00000010 |
Other letter code points, Lo in the Unicode database. More... | |
#define | UTF8_CATEGORY_LETTER |
Combined flag for all letter categories. More... | |
#define | UTF8_CATEGORY_CASE_MAPPED |
Combined flag for all letter categories with case mapping. More... | |
#define | UTF8_CATEGORY_MARK_NON_SPACING 0x00000020 |
Non-spacing mark code points, Mn in the Unicode database. More... | |
#define | UTF8_CATEGORY_MARK_SPACING 0x00000040 |
Spacing mark code points, Mc in the Unicode database. More... | |
#define | UTF8_CATEGORY_MARK_ENCLOSING 0x00000080 |
Enclosing mark code points, Me in the Unicode database. More... | |
#define | UTF8_CATEGORY_MARK |
Combined flag for all mark categories. More... | |
#define | UTF8_CATEGORY_NUMBER_DECIMAL 0x00000100 |
Decimal number code points, Nd in the Unicode database. More... | |
#define | UTF8_CATEGORY_NUMBER_LETTER 0x00000200 |
Letter number code points, Nl in the Unicode database. More... | |
#define | UTF8_CATEGORY_NUMBER_OTHER 0x00000400 |
Other number code points, No in the Unicode database. More... | |
#define | UTF8_CATEGORY_NUMBER |
Combined flag for all number categories. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_CONNECTOR 0x00000800 |
Connector punctuation category, Pc in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_DASH 0x00001000 |
Dash punctuation category, Pd in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_OPEN 0x00002000 |
Open punctuation category, Ps in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_CLOSE 0x00004000 |
Close punctuation category, Pe in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_INITIAL 0x00008000 |
Initial punctuation category, Pi in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_FINAL 0x00010000 |
Final punctuation category, Pf in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION_OTHER 0x00020000 |
Other punctuation category, Po in the Unicode database. More... | |
#define | UTF8_CATEGORY_PUNCTUATION |
Combined flag for all punctuation categories. More... | |
#define | UTF8_CATEGORY_SYMBOL_MATH 0x00040000 |
Math symbol category, Sm in the Unicode database. More... | |
#define | UTF8_CATEGORY_SYMBOL_CURRENCY 0x00080000 |
Currency symbol category, Sc in the Unicode database. More... | |
#define | UTF8_CATEGORY_SYMBOL_MODIFIER 0x00100000 |
Modifier symbol category, Sk in the Unicode database. More... | |
#define | UTF8_CATEGORY_SYMBOL_OTHER 0x00200000 |
Other symbol category, So in the Unicode database. More... | |
#define | UTF8_CATEGORY_SYMBOL |
Combined flag for all symbol categories. More... | |
#define | UTF8_CATEGORY_SEPARATOR_SPACE 0x00400000 |
Space separator category, Zs in the Unicode database. More... | |
#define | UTF8_CATEGORY_SEPARATOR_LINE 0x00800000 |
Line separator category, Zl in the Unicode database. More... | |
#define | UTF8_CATEGORY_SEPARATOR_PARAGRAPH 0x01000000 |
Paragraph separator category, Zp in the Unicode database. More... | |
#define | UTF8_CATEGORY_SEPARATOR |
Combined flag for all separator categories. More... | |
#define | UTF8_CATEGORY_CONTROL 0x02000000 |
Control category, Cc in the Unicode database. More... | |
#define | UTF8_CATEGORY_FORMAT 0x04000000 |
Format category, Cf in the Unicode database. More... | |
#define | UTF8_CATEGORY_SURROGATE 0x08000000 |
Surrogate category, Cs in the Unicode database. More... | |
#define | UTF8_CATEGORY_PRIVATE_USE 0x10000000 |
Private use category, Co in the Unicode database. More... | |
#define | UTF8_CATEGORY_UNASSIGNED 0x20000000 |
Unassigned category, Cn in the Unicode database. More... | |
#define | UTF8_CATEGORY_COMPATIBILITY 0x40000000 |
Flag used for maintaining backwards compatibility with POSIX functions, not found in the Unicode database. More... | |
#define | UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER 0x80000000 |
Flag used for checking only the general category of code points at the start of a grapheme cluster. More... | |
#define | UTF8_CATEGORY_ISCNTRL |
Flag used for maintaining backwards compatibility with POSIX iscntrl function. More... | |
#define | UTF8_CATEGORY_ISPRINT |
Flag used for maintaining backwards compatibility with POSIX isprint function. More... | |
#define | UTF8_CATEGORY_ISSPACE |
Flag used for maintaining backwards compatibility with POSIX isspace function. More... | |
#define | UTF8_CATEGORY_ISBLANK |
Flag used for maintaining backwards compatibility with POSIX isblank function. More... | |
#define | UTF8_CATEGORY_ISGRAPH |
Flag used for maintaining backwards compatibility with POSIX isgraph function. More... | |
#define | UTF8_CATEGORY_ISPUNCT |
Flag used for maintaining backwards compatibility with POSIX ispunct function. More... | |
#define | UTF8_CATEGORY_ISALNUM |
Flag used for maintaining backwards compatibility with POSIX isalnum function. More... | |
#define | UTF8_CATEGORY_ISALPHA |
Flag used for maintaining backwards compatibility with POSIX isalpha function. More... | |
#define | UTF8_CATEGORY_ISUPPER |
Flag used for maintaining backwards compatibility with POSIX isupper function. More... | |
#define | UTF8_CATEGORY_ISLOWER |
Flag used for maintaining backwards compatibility with POSIX islower function. More... | |
#define | UTF8_CATEGORY_ISDIGIT |
Flag used for maintaining backwards compatibility with POSIX isdigit function. More... | |
#define | UTF8_CATEGORY_ISXDIGIT |
Flag used for maintaining backwards compatibility with POSIX isxdigit function. More... | |
#define | UTF8_WCHAR_SIZE (2) |
Specifies the size of the wchar_t type. On Windows this is two bytes, on POSIX systems it is four. If not specified on the command line, the compiler tries to automatically determine the size of the wchar_t type based on the environment. More... | |
#define | UTF8_WCHAR_UTF16 (1) |
The wchar_t type is treated as UTF-16 (two byte variable length encoding). More... | |
#define | UTF8_API |
Calling convention for public functions. More... | |
Typedefs | |
typedef uint16_t | utf16_t |
UTF-16 encoded code point. More... | |
typedef uint32_t | unicode_t |
UTF-32 encoded code point. More... | |
Functions | |
UTF8_API size_t | utf8len (const char *text) |
Get the length in code points of a UTF-8 encoded string. More... | |
UTF8_API size_t | utf16toutf8 (const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors) |
Convert a UTF-16 encoded string to a UTF-8 encoded string. More... | |
UTF8_API size_t | utf32toutf8 (const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors) |
Convert a UTF-32 encoded string to a UTF-8 encoded string. More... | |
UTF8_API size_t | widetoutf8 (const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors) |
Convert a wide string to a UTF-8 encoded string. More... | |
UTF8_API size_t | utf8toutf16 (const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors) |
Convert a UTF-8 encoded string to a UTF-16 encoded string. More... | |
UTF8_API size_t | utf8toutf32 (const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors) |
Convert a UTF-8 encoded string to a UTF-32 encoded string. More... | |
UTF8_API size_t | utf8towide (const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors) |
Convert a UTF-8 encoded string to a wide string. More... | |
UTF8_API const char * | utf8seek (const char *text, size_t textSize, const char *textStart, off_t offset, int direction) |
Seek into a UTF-8 encoded string. More... | |
UTF8_API size_t | utf8envlocale () |
Returns the environment's locale as an enum value. More... | |
UTF8_API size_t | utf8toupper (const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors) |
Convert UTF-8 encoded text to uppercase. More... | |
UTF8_API size_t | utf8tolower (const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors) |
Convert UTF-8 encoded text to lowercase. More... | |
UTF8_API size_t | utf8totitle (const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors) |
Convert UTF-8 encoded text to titlecase. More... | |
UTF8_API size_t | utf8casefold (const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors) |
Remove case distinction from UTF-8 encoded text. More... | |
UTF8_API uint8_t | utf8isnormalized (const char *input, size_t inputSize, size_t flags, size_t *offset) |
Check if a string is stable in the specified Unicode Normalization Form. More... | |
UTF8_API size_t | utf8normalize (const char *input, size_t inputSize, char *target, size_t targetSize, size_t flags, int32_t *errors) |
Normalize a string to the specified Unicode Normalization Form. More... | |
UTF8_API size_t | utf8iscategory (const char *input, size_t inputSize, size_t flags) |
Check if the input string conforms to the category specified by the flags. More... | |
Public interface for UTF-8 functions.
utf8rewind
is a system library written in C designed to extend the default string handling functions with support for UTF-8 encoded text.