utf8rewind  1.2.0
Cross-platform library for UTF-8 encoded text
utf8rewind.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014-2015 Quinten Lansu
3 
4  Permission is hereby granted, free of charge, to any person
5  obtaining a copy of this software and associated documentation
6  files (the "Software"), to deal in the Software without
7  restriction, including without limitation the rights to use,
8  copy, modify, merge, publish, distribute, sublicense, and/or
9  sell copies of the Software, and to permit persons to whom the
10  Software is furnished to do so, subject to the following
11  conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 #ifndef _UTF8REWIND_H_
27 #define _UTF8REWIND_H_
28 
34 #include <stddef.h>
35 #include <stdio.h>
36 #include <stdint.h>
37 #include <string.h>
38 #include <wchar.h>
39 
49 #define UTF8_ERR_NONE (0)
50 
55 #define UTF8_ERR_INVALID_DATA (-1)
56 
61 #define UTF8_ERR_INVALID_FLAG (-2)
62 
67 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3)
68 
73 #define UTF8_ERR_OVERLAPPING_PARAMETERS (-4)
74 
83 #define UTF8_NORMALIZE_COMPOSE 0x00000001
84 
89 #define UTF8_NORMALIZE_DECOMPOSE 0x00000002
90 
95 #define UTF8_NORMALIZE_COMPATIBILITY 0x00000004
96 
101 #define UTF8_NORMALIZATION_RESULT_YES (0)
102 
107 #define UTF8_NORMALIZATION_RESULT_MAYBE (1)
108 
113 #define UTF8_NORMALIZATION_RESULT_NO (2)
114 
127 #ifndef UTF8_WCHAR_SIZE
128  #if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX)
129  #define UTF8_WCHAR_SIZE (4)
130  #else
131  #define UTF8_WCHAR_SIZE (2)
132  #endif
133 #endif
134 
135 #if (UTF8_WCHAR_SIZE == 4)
136 
140  #define UTF8_WCHAR_UTF32 (1)
141 #elif (UTF8_WCHAR_SIZE == 2)
142 
146  #define UTF8_WCHAR_UTF16 (1)
147 #else
148  #error Invalid size for wchar_t type.
149 #endif
150 
156 #ifndef UTF8_API
157  #ifdef __cplusplus
158  #define UTF8_API extern "C"
159  #else
160  #define UTF8_API
161  #endif
162 #endif
163 
172 typedef uint16_t utf16_t;
173 
178 typedef uint32_t unicode_t;
179 
197 UTF8_API size_t utf8len(const char* text);
198 
245 UTF8_API size_t utf16toutf8(const utf16_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
246 
305 UTF8_API size_t utf32toutf8(const unicode_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
306 
370 UTF8_API size_t widetoutf8(const wchar_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
371 
417 UTF8_API size_t utf8toutf16(const char* input, size_t inputSize, utf16_t* target, size_t targetSize, int32_t* errors);
418 
462 UTF8_API size_t utf8toutf32(const char* input, size_t inputSize, unicode_t* target, size_t targetSize, int32_t* errors);
463 
535 UTF8_API size_t utf8towide(const char* input, size_t inputSize, wchar_t* target, size_t targetSize, int32_t* errors);
536 
587 UTF8_API const char* utf8seek(const char* text, const char* textStart, off_t offset, int direction);
588 
665 UTF8_API size_t utf8toupper(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
666 
748 UTF8_API size_t utf8tolower(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
749 
825 UTF8_API size_t utf8totitle(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
826 
910 UTF8_API uint8_t utf8isnormalized(const char* input, size_t inputSize, size_t flags, size_t* offset);
911 
1043 UTF8_API size_t utf8normalize(const char* input, size_t inputSize, char* target, size_t targetSize, size_t flags, int32_t* errors);
1044 
1045 #endif /* _UTF8REWIND_H_ */
uint32_t unicode_t
Unicode codepoint.
Definition: utf8rewind.h:178
UTF8_API size_t utf8totitle(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to titlecase.
UTF8_API size_t utf8normalize(const char *input, size_t inputSize, char *target, size_t targetSize, size_t flags, int32_t *errors)
Normalize a string to the specified Unicode Normalization Form.
#define UTF8_API
Calling convention for public functions.
Definition: utf8rewind.h:160
UTF8_API size_t utf8toutf32(const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-32 encoded string.
UTF8_API size_t utf8toupper(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to uppercase.
uint16_t utf16_t
UTF-16 encoded codepoint.
Definition: utf8rewind.h:172
UTF8_API size_t utf16toutf8(const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-16 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8toutf16(const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-16 encoded string.
UTF8_API size_t utf8towide(const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a wide string.
UTF8_API uint8_t utf8isnormalized(const char *input, size_t inputSize, size_t flags, size_t *offset)
Check if a string is stable in the specified Unicode Normalization Form.
UTF8_API size_t utf8tolower(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to lowercase.
UTF8_API const char * utf8seek(const char *text, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.
UTF8_API size_t utf8len(const char *text)
Get the length in codepoints of a UTF-8 encoded string.
UTF8_API size_t utf32toutf8(const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-32 encoded string to a UTF-8 encoded string.
UTF8_API size_t widetoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a wide string to a UTF-8 encoded string.