26 #ifndef _UTF8REWIND_H_
27 #define _UTF8REWIND_H_
82 #define UTF8_VERSION_MAKE(_major, _minor, _bugfix) \
83 ((_major) * 10000) + ((_minor) * 100) + (_bugfix)
89 #define UTF8_VERSION_MAJOR 1
95 #define UTF8_VERSION_MINOR 5
101 #define UTF8_VERSION_BUGFIX 0
107 #define UTF8_VERSION \
108 UTF8_VERSION_MAKE(UTF8_VERSION_MAJOR, UTF8_VERSION_MINOR, UTF8_VERSION_BUGFIX)
114 #define UTF8_VERSION_STRING "1.5.0"
120 #define UTF8_VERSION_GUARD(_major, _minor, _bugfix) \
121 (UTF8_VERSION >= UTF8_VERSION_MAKE(_major, _minor, _bugfix))
136 #define UTF8_ERR_NONE (0)
142 #define UTF8_ERR_INVALID_DATA (-1)
148 #define UTF8_ERR_INVALID_FLAG (-2)
154 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3)
160 #define UTF8_ERR_OVERLAPPING_PARAMETERS (-4)
166 #define UTF8_ERR_INVALID_LOCALE (-5)
181 #define UTF8_LOCALE_DEFAULT 0
189 #define UTF8_LOCALE_LITHUANIAN 1
197 #define UTF8_LOCALE_TURKISH_AND_AZERI_LATIN 2
203 #define UTF8_LOCALE_MAXIMUM 3
218 #define UTF8_NORMALIZE_COMPOSE 0x00000001
224 #define UTF8_NORMALIZE_DECOMPOSE 0x00000002
230 #define UTF8_NORMALIZE_COMPATIBILITY 0x00000004
236 #define UTF8_NORMALIZATION_RESULT_YES (0)
242 #define UTF8_NORMALIZATION_RESULT_MAYBE (1)
248 #define UTF8_NORMALIZATION_RESULT_NO (2)
263 #define UTF8_CATEGORY_LETTER_UPPERCASE 0x00000001
269 #define UTF8_CATEGORY_LETTER_LOWERCASE 0x00000002
275 #define UTF8_CATEGORY_LETTER_TITLECASE 0x00000004
281 #define UTF8_CATEGORY_LETTER_MODIFIER 0x00000008
287 #define UTF8_CATEGORY_LETTER_OTHER 0x00000010
293 #define UTF8_CATEGORY_LETTER \
294 (UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \
295 UTF8_CATEGORY_LETTER_TITLECASE | UTF8_CATEGORY_LETTER_MODIFIER | \
296 UTF8_CATEGORY_LETTER_OTHER)
302 #define UTF8_CATEGORY_CASE_MAPPED \
303 (UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \
304 UTF8_CATEGORY_LETTER_TITLECASE)
310 #define UTF8_CATEGORY_MARK_NON_SPACING 0x00000020
316 #define UTF8_CATEGORY_MARK_SPACING 0x00000040
322 #define UTF8_CATEGORY_MARK_ENCLOSING 0x00000080
328 #define UTF8_CATEGORY_MARK \
329 (UTF8_CATEGORY_MARK_NON_SPACING | UTF8_CATEGORY_MARK_SPACING | \
330 UTF8_CATEGORY_MARK_ENCLOSING)
336 #define UTF8_CATEGORY_NUMBER_DECIMAL 0x00000100
342 #define UTF8_CATEGORY_NUMBER_LETTER 0x00000200
348 #define UTF8_CATEGORY_NUMBER_OTHER 0x00000400
354 #define UTF8_CATEGORY_NUMBER \
355 (UTF8_CATEGORY_NUMBER_DECIMAL | UTF8_CATEGORY_NUMBER_LETTER | \
356 UTF8_CATEGORY_NUMBER_OTHER)
362 #define UTF8_CATEGORY_PUNCTUATION_CONNECTOR 0x00000800
368 #define UTF8_CATEGORY_PUNCTUATION_DASH 0x00001000
374 #define UTF8_CATEGORY_PUNCTUATION_OPEN 0x00002000
380 #define UTF8_CATEGORY_PUNCTUATION_CLOSE 0x00004000
386 #define UTF8_CATEGORY_PUNCTUATION_INITIAL 0x00008000
392 #define UTF8_CATEGORY_PUNCTUATION_FINAL 0x00010000
398 #define UTF8_CATEGORY_PUNCTUATION_OTHER 0x00020000
404 #define UTF8_CATEGORY_PUNCTUATION \
405 (UTF8_CATEGORY_PUNCTUATION_CONNECTOR | UTF8_CATEGORY_PUNCTUATION_DASH | \
406 UTF8_CATEGORY_PUNCTUATION_OPEN | UTF8_CATEGORY_PUNCTUATION_CLOSE | \
407 UTF8_CATEGORY_PUNCTUATION_INITIAL | UTF8_CATEGORY_PUNCTUATION_FINAL | \
408 UTF8_CATEGORY_PUNCTUATION_OTHER)
414 #define UTF8_CATEGORY_SYMBOL_MATH 0x00040000
420 #define UTF8_CATEGORY_SYMBOL_CURRENCY 0x00080000
426 #define UTF8_CATEGORY_SYMBOL_MODIFIER 0x00100000
432 #define UTF8_CATEGORY_SYMBOL_OTHER 0x00200000
438 #define UTF8_CATEGORY_SYMBOL \
439 (UTF8_CATEGORY_SYMBOL_MATH | UTF8_CATEGORY_SYMBOL_CURRENCY | \
440 UTF8_CATEGORY_SYMBOL_MODIFIER | UTF8_CATEGORY_SYMBOL_OTHER)
446 #define UTF8_CATEGORY_SEPARATOR_SPACE 0x00400000
452 #define UTF8_CATEGORY_SEPARATOR_LINE 0x00800000
458 #define UTF8_CATEGORY_SEPARATOR_PARAGRAPH 0x01000000
464 #define UTF8_CATEGORY_SEPARATOR \
465 (UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_SEPARATOR_LINE | \
466 UTF8_CATEGORY_SEPARATOR_PARAGRAPH)
472 #define UTF8_CATEGORY_CONTROL 0x02000000
478 #define UTF8_CATEGORY_FORMAT 0x04000000
484 #define UTF8_CATEGORY_SURROGATE 0x08000000
490 #define UTF8_CATEGORY_PRIVATE_USE 0x10000000
496 #define UTF8_CATEGORY_UNASSIGNED 0x20000000
503 #define UTF8_CATEGORY_COMPATIBILITY 0x40000000
510 #define UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER 0x80000000
517 #define UTF8_CATEGORY_ISCNTRL \
518 (UTF8_CATEGORY_COMPATIBILITY | \
519 UTF8_CATEGORY_CONTROL)
526 #define UTF8_CATEGORY_ISPRINT \
527 (UTF8_CATEGORY_COMPATIBILITY | \
528 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
529 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL | \
530 UTF8_CATEGORY_SEPARATOR)
537 #define UTF8_CATEGORY_ISSPACE \
538 (UTF8_CATEGORY_COMPATIBILITY | \
539 UTF8_CATEGORY_SEPARATOR_SPACE)
546 #define UTF8_CATEGORY_ISBLANK \
547 (UTF8_CATEGORY_COMPATIBILITY | \
548 UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_PRIVATE_USE)
555 #define UTF8_CATEGORY_ISGRAPH \
556 (UTF8_CATEGORY_COMPATIBILITY | \
557 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
558 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)
565 #define UTF8_CATEGORY_ISPUNCT \
566 (UTF8_CATEGORY_COMPATIBILITY | \
567 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)
574 #define UTF8_CATEGORY_ISALNUM \
575 (UTF8_CATEGORY_COMPATIBILITY | \
576 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER)
583 #define UTF8_CATEGORY_ISALPHA \
584 (UTF8_CATEGORY_COMPATIBILITY | \
585 UTF8_CATEGORY_LETTER)
592 #define UTF8_CATEGORY_ISUPPER \
593 (UTF8_CATEGORY_COMPATIBILITY | \
594 UTF8_CATEGORY_LETTER_UPPERCASE)
601 #define UTF8_CATEGORY_ISLOWER \
602 (UTF8_CATEGORY_COMPATIBILITY | \
603 UTF8_CATEGORY_LETTER_LOWERCASE)
610 #define UTF8_CATEGORY_ISDIGIT \
611 (UTF8_CATEGORY_COMPATIBILITY | \
612 UTF8_CATEGORY_NUMBER)
619 #define UTF8_CATEGORY_ISXDIGIT \
620 (UTF8_CATEGORY_COMPATIBILITY | \
621 UTF8_CATEGORY_NUMBER | UTF8_CATEGORY_PRIVATE_USE)
640 #ifndef UTF8_WCHAR_SIZE
641 #if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX)
642 #define UTF8_WCHAR_SIZE (4)
644 #define UTF8_WCHAR_SIZE (2)
648 #if (UTF8_WCHAR_SIZE == 4)
654 #define UTF8_WCHAR_UTF32 (1)
655 #elif (UTF8_WCHAR_SIZE == 2)
661 #define UTF8_WCHAR_UTF16 (1)
663 #error Invalid size for wchar_t type.
673 #define UTF8_API extern "C"
893 UTF8_API size_t widetoutf8(
const wchar_t* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
1064 UTF8_API size_t utf8towide(
const char* input,
size_t inputSize,
wchar_t* target,
size_t targetSize, int32_t* errors);
1119 UTF8_API const char*
utf8seek(
const char* text,
size_t textSize,
const char* textStart, off_t offset,
int direction);
1267 UTF8_API size_t utf8toupper(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1363 UTF8_API size_t utf8tolower(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1446 UTF8_API size_t utf8totitle(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1551 UTF8_API size_t utf8casefold(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t locale, int32_t* errors);
1769 UTF8_API size_t utf8normalize(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t flags, int32_t* errors);
UTF8_API size_t utf8totitle(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Convert UTF-8 encoded text to titlecase.
UTF8_API size_t utf8iscategory(const char *input, size_t inputSize, size_t flags)
Check if the input string conforms to the category specified by the flags.
UTF8_API const char * utf8seek(const char *text, size_t textSize, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.
UTF8_API size_t utf8envlocale()
Returns the environment's locale as an enum value.
UTF8_API size_t utf8towide(const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a wide string.
#define UTF8_API
Calling convention for public functions.
Definition: utf8rewind.h:675
UTF8_API size_t utf8toutf16(const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-16 encoded string.
uint16_t utf16_t
UTF-16 encoded code point.
Definition: utf8rewind.h:692
UTF8_API size_t utf8toupper(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Convert UTF-8 encoded text to uppercase.
UTF8_API size_t utf32toutf8(const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-32 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8len(const char *text)
Get the length in code points of a UTF-8 encoded string.
UTF8_API size_t utf8tolower(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Convert UTF-8 encoded text to lowercase.
UTF8_API size_t widetoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a wide string to a UTF-8 encoded string.
UTF8_API size_t utf16toutf8(const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-16 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8casefold(const char *input, size_t inputSize, char *target, size_t targetSize, size_t locale, int32_t *errors)
Remove case distinction from UTF-8 encoded text.
UTF8_API uint8_t utf8isnormalized(const char *input, size_t inputSize, size_t flags, size_t *offset)
Check if a string is stable in the specified Unicode Normalization Form.
UTF8_API size_t utf8normalize(const char *input, size_t inputSize, char *target, size_t targetSize, size_t flags, int32_t *errors)
Normalize a string to the specified Unicode Normalization Form.
UTF8_API size_t utf8toutf32(const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-32 encoded string.
uint32_t unicode_t
UTF-32 encoded code point.
Definition: utf8rewind.h:698