26 #ifndef _UTF8REWIND_H_
27 #define _UTF8REWIND_H_
52 #define UTF8_VERSION_MAKE(_major, _minor, _bugfix) \
53 ((_major) * 10000) + ((_minor) * 100) + (_bugfix)
59 #define UTF8_VERSION_MAJOR 1
65 #define UTF8_VERSION_MINOR 4
71 #define UTF8_VERSION_BUGFIX 1
77 #define UTF8_VERSION \
78 UTF8_VERSION_MAKE(UTF8_VERSION_MAJOR, UTF8_VERSION_MINOR, UTF8_VERSION_BUGFIX)
84 #define UTF8_VERSION_STRING "1.4.1"
90 #define UTF8_VERSION_GUARD(_major, _minor, _bugfix) \
91 (UTF8_VERSION >= UTF8_VERSION_MAKE(_major, _minor, _bugfix))
107 #define UTF8_ERR_NONE (0)
113 #define UTF8_ERR_INVALID_DATA (-1)
119 #define UTF8_ERR_INVALID_FLAG (-2)
125 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3)
131 #define UTF8_ERR_OVERLAPPING_PARAMETERS (-4)
152 #ifndef UTF8_WCHAR_SIZE
153 #if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX)
154 #define UTF8_WCHAR_SIZE (4)
156 #define UTF8_WCHAR_SIZE (2)
160 #if (UTF8_WCHAR_SIZE == 4)
165 #define UTF8_WCHAR_UTF32 (1)
166 #elif (UTF8_WCHAR_SIZE == 2)
171 #define UTF8_WCHAR_UTF16 (1)
173 #error Invalid size for wchar_t type.
183 #define UTF8_API extern "C"
392 UTF8_API size_t widetoutf8(
const wchar_t* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
560 UTF8_API size_t utf8towide(
const char* input,
size_t inputSize,
wchar_t* target,
size_t targetSize, int32_t* errors);
615 UTF8_API const char*
utf8seek(
const char* text,
size_t textSize,
const char* textStart, off_t offset,
int direction);
704 UTF8_API size_t utf8toupper(
const char* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
798 UTF8_API size_t utf8tolower(
const char* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
886 UTF8_API size_t utf8totitle(
const char* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
996 UTF8_API size_t utf8casefold(
const char* input,
size_t inputSize,
char* target,
size_t targetSize, int32_t* errors);
1008 #define UTF8_NORMALIZE_COMPOSE 0x00000001
1014 #define UTF8_NORMALIZE_DECOMPOSE 0x00000002
1020 #define UTF8_NORMALIZE_COMPATIBILITY 0x00000004
1026 #define UTF8_NORMALIZATION_RESULT_YES (0)
1032 #define UTF8_NORMALIZATION_RESULT_MAYBE (1)
1038 #define UTF8_NORMALIZATION_RESULT_NO (2)
1260 UTF8_API size_t utf8normalize(
const char* input,
size_t inputSize,
char* target,
size_t targetSize,
size_t flags, int32_t* errors);
1273 #define UTF8_CATEGORY_LETTER_UPPERCASE 0x00000001
1279 #define UTF8_CATEGORY_LETTER_LOWERCASE 0x00000002
1285 #define UTF8_CATEGORY_LETTER_TITLECASE 0x00000004
1291 #define UTF8_CATEGORY_LETTER_MODIFIER 0x00000008
1297 #define UTF8_CATEGORY_LETTER_OTHER 0x00000010
1303 #define UTF8_CATEGORY_LETTER \
1304 (UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \
1305 UTF8_CATEGORY_LETTER_TITLECASE | UTF8_CATEGORY_LETTER_MODIFIER | \
1306 UTF8_CATEGORY_LETTER_OTHER)
1312 #define UTF8_CATEGORY_CASE_MAPPED \
1313 (UTF8_CATEGORY_LETTER_UPPERCASE | UTF8_CATEGORY_LETTER_LOWERCASE | \
1314 UTF8_CATEGORY_LETTER_TITLECASE)
1320 #define UTF8_CATEGORY_MARK_NON_SPACING 0x00000020
1326 #define UTF8_CATEGORY_MARK_SPACING 0x00000040
1332 #define UTF8_CATEGORY_MARK_ENCLOSING 0x00000080
1338 #define UTF8_CATEGORY_MARK \
1339 (UTF8_CATEGORY_MARK_NON_SPACING | UTF8_CATEGORY_MARK_SPACING | \
1340 UTF8_CATEGORY_MARK_ENCLOSING)
1346 #define UTF8_CATEGORY_NUMBER_DECIMAL 0x00000100
1352 #define UTF8_CATEGORY_NUMBER_LETTER 0x00000200
1358 #define UTF8_CATEGORY_NUMBER_OTHER 0x00000400
1364 #define UTF8_CATEGORY_NUMBER \
1365 (UTF8_CATEGORY_NUMBER_DECIMAL | UTF8_CATEGORY_NUMBER_LETTER | \
1366 UTF8_CATEGORY_NUMBER_OTHER)
1372 #define UTF8_CATEGORY_PUNCTUATION_CONNECTOR 0x00000800
1378 #define UTF8_CATEGORY_PUNCTUATION_DASH 0x00001000
1384 #define UTF8_CATEGORY_PUNCTUATION_OPEN 0x00002000
1390 #define UTF8_CATEGORY_PUNCTUATION_CLOSE 0x00004000
1396 #define UTF8_CATEGORY_PUNCTUATION_INITIAL 0x00008000
1402 #define UTF8_CATEGORY_PUNCTUATION_FINAL 0x00010000
1408 #define UTF8_CATEGORY_PUNCTUATION_OTHER 0x00020000
1414 #define UTF8_CATEGORY_PUNCTUATION \
1415 (UTF8_CATEGORY_PUNCTUATION_CONNECTOR | UTF8_CATEGORY_PUNCTUATION_DASH | \
1416 UTF8_CATEGORY_PUNCTUATION_OPEN | UTF8_CATEGORY_PUNCTUATION_CLOSE | \
1417 UTF8_CATEGORY_PUNCTUATION_INITIAL | UTF8_CATEGORY_PUNCTUATION_FINAL | \
1418 UTF8_CATEGORY_PUNCTUATION_OTHER)
1424 #define UTF8_CATEGORY_SYMBOL_MATH 0x00040000
1430 #define UTF8_CATEGORY_SYMBOL_CURRENCY 0x00080000
1436 #define UTF8_CATEGORY_SYMBOL_MODIFIER 0x00100000
1442 #define UTF8_CATEGORY_SYMBOL_OTHER 0x00200000
1448 #define UTF8_CATEGORY_SYMBOL \
1449 (UTF8_CATEGORY_SYMBOL_MATH | UTF8_CATEGORY_SYMBOL_CURRENCY | \
1450 UTF8_CATEGORY_SYMBOL_MODIFIER | UTF8_CATEGORY_SYMBOL_OTHER)
1456 #define UTF8_CATEGORY_SEPARATOR_SPACE 0x00400000
1462 #define UTF8_CATEGORY_SEPARATOR_LINE 0x00800000
1468 #define UTF8_CATEGORY_SEPARATOR_PARAGRAPH 0x01000000
1474 #define UTF8_CATEGORY_SEPARATOR \
1475 (UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_SEPARATOR_LINE | \
1476 UTF8_CATEGORY_SEPARATOR_PARAGRAPH)
1482 #define UTF8_CATEGORY_CONTROL 0x02000000
1488 #define UTF8_CATEGORY_FORMAT 0x04000000
1494 #define UTF8_CATEGORY_SURROGATE 0x08000000
1500 #define UTF8_CATEGORY_PRIVATE_USE 0x10000000
1506 #define UTF8_CATEGORY_UNASSIGNED 0x20000000
1513 #define UTF8_CATEGORY_COMPATIBILITY 0x40000000
1520 #define UTF8_CATEGORY_IGNORE_GRAPHEME_CLUSTER 0x80000000
1527 #define UTF8_CATEGORY_ISCNTRL \
1528 (UTF8_CATEGORY_COMPATIBILITY | \
1529 UTF8_CATEGORY_CONTROL)
1536 #define UTF8_CATEGORY_ISPRINT \
1537 (UTF8_CATEGORY_COMPATIBILITY | \
1538 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
1539 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL | \
1540 UTF8_CATEGORY_SEPARATOR)
1547 #define UTF8_CATEGORY_ISSPACE \
1548 (UTF8_CATEGORY_COMPATIBILITY | \
1549 UTF8_CATEGORY_SEPARATOR_SPACE)
1556 #define UTF8_CATEGORY_ISBLANK \
1557 (UTF8_CATEGORY_COMPATIBILITY | \
1558 UTF8_CATEGORY_SEPARATOR_SPACE | UTF8_CATEGORY_PRIVATE_USE)
1565 #define UTF8_CATEGORY_ISGRAPH \
1566 (UTF8_CATEGORY_COMPATIBILITY | \
1567 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER | \
1568 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)
1575 #define UTF8_CATEGORY_ISPUNCT \
1576 (UTF8_CATEGORY_COMPATIBILITY | \
1577 UTF8_CATEGORY_PUNCTUATION | UTF8_CATEGORY_SYMBOL)
1584 #define UTF8_CATEGORY_ISALNUM \
1585 (UTF8_CATEGORY_COMPATIBILITY | \
1586 UTF8_CATEGORY_LETTER | UTF8_CATEGORY_NUMBER)
1593 #define UTF8_CATEGORY_ISALPHA \
1594 (UTF8_CATEGORY_COMPATIBILITY | \
1595 UTF8_CATEGORY_LETTER)
1602 #define UTF8_CATEGORY_ISUPPER \
1603 (UTF8_CATEGORY_COMPATIBILITY | \
1604 UTF8_CATEGORY_LETTER_UPPERCASE)
1611 #define UTF8_CATEGORY_ISLOWER \
1612 (UTF8_CATEGORY_COMPATIBILITY | \
1613 UTF8_CATEGORY_LETTER_LOWERCASE)
1620 #define UTF8_CATEGORY_ISDIGIT \
1621 (UTF8_CATEGORY_COMPATIBILITY | \
1622 UTF8_CATEGORY_NUMBER)
1629 #define UTF8_CATEGORY_ISXDIGIT \
1630 (UTF8_CATEGORY_COMPATIBILITY | \
1631 UTF8_CATEGORY_NUMBER | UTF8_CATEGORY_PRIVATE_USE)
uint32_t unicode_t
UTF-32 encoded code point.
Definition: utf8rewind.h:203
UTF8_API size_t utf8totitle(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to titlecase.
#define UTF8_API
Calling convention for public functions.
Definition: utf8rewind.h:185
UTF8_API size_t utf8normalize(const char *input, size_t inputSize, char *target, size_t targetSize, size_t flags, int32_t *errors)
Normalize a string to the specified Unicode Normalization Form.
UTF8_API size_t utf8iscategory(const char *input, size_t inputSize, size_t flags)
Check if the input string conforms to the category specified by the flags.
UTF8_API size_t utf8toutf32(const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-32 encoded string.
UTF8_API size_t utf8toupper(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to uppercase.
uint16_t utf16_t
UTF-16 encoded code point.
Definition: utf8rewind.h:197
UTF8_API size_t utf8casefold(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Remove case distinction from UTF-8 encoded text.
UTF8_API size_t utf16toutf8(const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-16 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8toutf16(const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-16 encoded string.
UTF8_API size_t utf8towide(const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a wide string.
UTF8_API uint8_t utf8isnormalized(const char *input, size_t inputSize, size_t flags, size_t *offset)
Check if a string is stable in the specified Unicode Normalization Form.
UTF8_API size_t utf8tolower(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to lowercase.
UTF8_API size_t utf8len(const char *text)
Get the length in code points of a UTF-8 encoded string.
UTF8_API size_t utf32toutf8(const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-32 encoded string to a UTF-8 encoded string.
UTF8_API size_t widetoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a wide string to a UTF-8 encoded string.
UTF8_API const char * utf8seek(const char *text, size_t textSize, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.