26 #ifndef _UTF8REWIND_H_ 27 #define _UTF8REWIND_H_ 82 #define UTF8_VERSION_MAKE(_major, _minor, _bugfix) \ 83 ((_major) * 10000) + ((_minor) * 100) + (_bugfix) 89 #define UTF8_VERSION_MAJOR 1 95 #define UTF8_VERSION_MINOR 5 101 #define UTF8_VERSION_BUGFIX 1 107 #define UTF8_VERSION \ 108 UTF8_VERSION_MAKE(UTF8_VERSION_MAJOR, UTF8_VERSION_MINOR, UTF8_VERSION_BUGFIX) 114 #define UTF8_VERSION_STRING "1.5.1" 120 #define UTF8_VERSION_GUARD(_major, _minor, _bugfix) \ 121 (UTF8_VERSION >= UTF8_VERSION_MAKE(_major, _minor, _bugfix)) 136 #define UTF8_ERR_NONE (0) 142 #define UTF8_ERR_INVALID_DATA (-1) 148 #define UTF8_ERR_INVALID_FLAG (-2) 154 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3) 160 #define UTF8_ERR_OVERLAPPING_PARAMETERS (-4) 166 #define UTF8_ERR_INVALID_LOCALE (-5) 181 #define UTF8_LOCALE_DEFAULT 0 189 #define UTF8_LOCALE_LITHUANIAN 1 197 #define UTF8_LOCALE_TURKISH_AND_AZERI_LATIN 2 203 #define UTF8_LOCALE_MAXIMUM 3 218 #define UTF8_NORMALIZE_COMPOSE 0x00000001 224 #define UTF8_NORMALIZE_DECOMPOSE 0x00000002 230 #define UTF8_NORMALIZE_COMPATIBILITY 0x00000004 236 #define UTF8_NORMALIZATION_RESULT_YES (0) 242 #define UTF8_NORMALIZATION_RESULT_MAYBE (1) 248 #define UTF8_NORMALIZATION_RESULT_NO (2) 263 #define UTF8_CATEGORY_LETTER_UPPERCASE 0x00000001 269 #define UTF8_CATEGORY_LETTER_LOWERCASE 0x00000002 275 #define UTF8_CATEGORY_LETTER_TITLECASE 0x00000004 281 #define UTF8_CATEGORY_LETTER_MODIFIER 0x00000008 287 #define UTF8_CATEGORY_LETTER_OTHER 0x00000010 293 #define UTF8_CATEGORY_LETTER \ 294 (UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \ 295 UTF8_CATEGORY_LETTER_TITLECASE | UTF8_CATEGORY_LETTER_MODIFIER | \ 296 UTF8_CATEGORY_LETTER_OTHER) 302 #define UTF8_CATEGORY_CASE_MAPPED \ 303 (UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \ 304 UTF8_CATEGORY_LETTER_TITLECASE) 310 #define UTF8_CATEGORY_MARK_NON_SPACING 0x00000020 316 #define UTF8_CATEGORY_MARK_SPACING 0x00000040 322 #define UTF8_CATEGORY_MARK_ENCLOSING 0x00000080 328 #define UTF8_CATEGORY_MARK \ 329 (UTF8_CATEGORY_MARK_NON_SPACING | UTF8_CATEGORY_MARK_SPACING | \ 330 UTF8_CATEGORY_MARK_ENCLOSING) 336 #define UTF8_CATEGORY_NUMBER_DECIMAL 0x00000100 342 #define UTF8_CATEGORY_NUMBER_LETTER 0x00000200 348 #define UTF8_CATEGORY_NUMBER_OTHER 0x00000400 354 #define UTF8_CATEGORY_NUMBER \ 355 (UTF8_CATEGORY_NUMBER_DECIMAL | UTF8_CATEGORY_NUMBER_LETTER | \ 356 UTF8_CATEGORY_NUMBER_OTHER) 362 #define UTF8_CATEGORY_PUNCTUATION_CONNECTOR 0x00000800 368 #define UTF8_CATEGORY_PUNCTUATION_DASH 0x00001000 374 #define UTF8_CATEGORY_PUNCTUATION_OPEN 0x00002000 380 #define UTF8_CATEGORY_PUNCTUATION_CLOSE 0x00004000 386 #define UTF8_CATEGORY_PUNCTUATION_INITIAL 0x00008000 392 #define UTF8_CATEGORY_PUNCTUATION_FINAL 0x00010000 398 #define UTF8_CATEGORY_PUNCTUATION_OTHER 0x00020000 404 #define UTF8_CATEGORY_PUNCTUATION \ 405 (UTF8_CATEGORY_PUNCTUATION_CONNECTOR | UTF8_CATEGORY_PUNCTUATION_DASH | \ 406 UTF8_CATEGORY_PUNCTUATION_OPEN | UTF8_CATEGORY_PUNCTUATION_CLOSE | \ 407 UTF8_CATEGORY_PUNCTUATION_INITIAL | UTF8_CATEGORY_PUNCTUATION_FINAL | \ 408 UTF8_CATEGORY_PUNCTUATION_OTHER) 414 #define UTF8_CATEGORY_SYMBOL_MATH 0x00040000 420 #define UTF8_CATEGORY_SYMBOL_CURRENCY 0x00080000 426 #define UTF8_CATEGORY_SYMBOL_MODIFIER 0x00100000 432 #define UTF8_CATEGORY_SYMBOL_OTHER 0x00200000 438 #define UTF8_CATEGORY_SYMBOL \ 439 (UTF8_CATEGORY_SYMBOL_MATH | UTF8_CATEGORY_SYMBOL_CURRENCY | \ 440 UTF8_CATEGORY_SYMBOL_MODIFIER | UTF8_CATEGORY_SYMBOL_OTHER) 446 #define UTF8_CATEGORY_SEPARATOR_SPACE 0x00400000 452 #define UTF8_CATEGORY_SEPARATOR_LINE 0x00800000 458 #define UTF8_CATEGORY_SEPARATOR_PARAGRAPH 0x01000000 464 #define UTF8_CATEGORY_SEPARATOR \ 465 (UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_SEPARATOR_LINE | \ 466 UTF8_CATEGORY_SEPARATOR_PARAGRAPH) 472 #define UTF8_CATEGORY_CONTROL 0x02000000 478 #define UTF8_CATEGORY_FORMAT 0x04000000 484 #define UTF8_CATEGORY_SURROGATE 0x08000000 490 #define UTF8_CATEGORY_PRIVATE_USE 0x10000000 496 #define UTF8_CATEGORY_UNASSIGNED 0x20000000 503 #define UTF8_CATEGORY_COMPATIBILITY 0x40000000 510 #define UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER 0x80000000 517 #define UTF8_CATEGORY_ISCNTRL \ 518 (UTF8_CATEGORY_COMPATIBILITY | \ 519 UTF8_CATEGORY_CONTROL) 526 #define UTF8_CATEGORY_ISPRINT \ 527 (UTF8_CATEGORY_COMPATIBILITY | \ 528 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \ 529 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL | \ 530 UTF8_CATEGORY_SEPARATOR) 537 #define UTF8_CATEGORY_ISSPACE \ 538 (UTF8_CATEGORY_COMPATIBILITY | \ 539 UTF8_CATEGORY_SEPARATOR_SPACE) 546 #define UTF8_CATEGORY_ISBLANK \ 547 (UTF8_CATEGORY_COMPATIBILITY | \ 548 UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_PRIVATE_USE) 555 #define UTF8_CATEGORY_ISGRAPH \ 556 (UTF8_CATEGORY_COMPATIBILITY | \ 557 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \ 558 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL) 565 #define UTF8_CATEGORY_ISPUNCT \ 566 (UTF8_CATEGORY_COMPATIBILITY | \ 567 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL) 574 #define UTF8_CATEGORY_ISALNUM \ 575 (UTF8_CATEGORY_COMPATIBILITY | \ 576 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER) 583 #define UTF8_CATEGORY_ISALPHA \ 584 (UTF8_CATEGORY_COMPATIBILITY | \ 585 UTF8_CATEGORY_LETTER) 592 #define UTF8_CATEGORY_ISUPPER \ 593 (UTF8_CATEGORY_COMPATIBILITY | \ 594 UTF8_CATEGORY_LETTER_UPPERCASE) 601 #define UTF8_CATEGORY_ISLOWER \ 602 (UTF8_CATEGORY_COMPATIBILITY | \ 603 UTF8_CATEGORY_LETTER_LOWERCASE) 610 #define UTF8_CATEGORY_ISDIGIT \ 611 (UTF8_CATEGORY_COMPATIBILITY | \ 612 UTF8_CATEGORY_NUMBER) 619 #define UTF8_CATEGORY_ISXDIGIT \ 620 (UTF8_CATEGORY_COMPATIBILITY | \ 621 UTF8_CATEGORY_NUMBER | UTF8_CATEGORY_PRIVATE_USE) 640 #ifndef UTF8_WCHAR_SIZE 641 #if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX) 642 #define UTF8_WCHAR_SIZE (4) 644 #define UTF8_WCHAR_SIZE (2) 648 #if (UTF8_WCHAR_SIZE == 4) 654 #define UTF8_WCHAR_UTF32 (1) 655 #elif (UTF8_WCHAR_SIZE == 2) 661 #define UTF8_WCHAR_UTF16 (1) 663 #error Invalid size for wchar_t type. 673 #define UTF8_API extern "C" 893 UTF8_API size_t widetoutf8(
const wchar_t* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
1064 UTF8_API size_t utf8towide(
const char* input,
size_t inputSize,
wchar_t* target,
size_t targetSize, int32_t* errors);
1119 UTF8_API const char*
utf8seek(
const char* text,
size_t textSize,
const char* textStart, off_t offset,
int direction);
1267 UTF8_API size_t utf8toupper(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1363 UTF8_API size_t utf8tolower(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1446 UTF8_API size_t utf8totitle(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1551 UTF8_API size_t utf8casefold(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1769 UTF8_API size_t utf8normalize(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t flags, int32_t* errors);
UTF8_API size_t utf8totitle(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Convert UTF-8 encoded text to titlecase.
UTF8_API size_t utf8iscategory(const char *input, size_t inputSize, size_t flags)
Check if the input string conforms to the category specified by the flags.
UTF8_API const char * utf8seek(const char *text, size_t textSize, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.
UTF8_API size_t utf8envlocale()
Returns the environment's locale as an enum value.
UTF8_API size_t utf8towide(const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a wide string.
#define UTF8_API
Calling convention for public functions.
Definition: utf8rewind.h:675
UTF8_API size_t utf8toutf16(const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-16 encoded string.
uint16_t utf16_t
UTF-16 encoded code point.
Definition: utf8rewind.h:692
UTF8_API size_t utf8toupper(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Convert UTF-8 encoded text to uppercase.
UTF8_API size_t utf32toutf8(const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-32 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8len(const char *text)
Get the length in code points of a UTF-8 encoded string.
UTF8_API size_t utf8tolower(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Convert UTF-8 encoded text to lowercase.
UTF8_API size_t widetoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a wide string to a UTF-8 encoded string.
UTF8_API size_t utf16toutf8(const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-16 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8casefold(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Remove case distinction from UTF-8 encoded text.
UTF8_API uint8_t utf8isnormalized(const char *input, size_t inputSize, size_t flags, size_t *offset)
Check if a string is stable in the specified Unicode Normalization Form.
UTF8_API size_t utf8normalize(const char *input, size_t inputSize, char *target, size_t targetSize, size_t flags, int32_t *errors)
Normalize a string to the specified Unicode Normalization Form.
UTF8_API size_t utf8toutf32(const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-32 encoded string.
uint32_t unicode_t
UTF-32 encoded code point.
Definition: utf8rewind.h:698