utf8rewind  1.3.0
System library for processing UTF-8 encoded text
utf8rewind.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014-2015 Quinten Lansu
3 
4  Permission is hereby granted, free of charge, to any person
5  obtaining a copy of this software and associated documentation
6  files (the "Software"), to deal in the Software without
7  restriction, including without limitation the rights to use,
8  copy, modify, merge, publish, distribute, sublicense, and/or
9  sell copies of the Software, and to permit persons to whom the
10  Software is furnished to do so, subject to the following
11  conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 #ifndef _UTF8REWIND_H_
27 #define _UTF8REWIND_H_
28 
34 #include <locale.h>
35 #include <stddef.h>
36 #include <stdio.h>
37 #include <stdint.h>
38 #include <string.h>
39 #include <wchar.h>
40 
51 #define UTF8_VERSION_MAKE(_major, _minor, _bugfix) \
52  ((_major) * 10000) + ((_minor) * 100) + (_bugfix)
53 
58 #define UTF8_VERSION_MAJOR 1
59 
64 #define UTF8_VERSION_MINOR 3
65 
70 #define UTF8_VERSION_BUGFIX 0
71 
76 #define UTF8_VERSION \
77  UTF8_VERSION_MAKE(UTF8_VERSION_MAJOR, UTF8_VERSION_MINOR, UTF8_VERSION_BUGFIX)
78 
83 #define UTF8_VERSION_STRING "1.3.0"
84 
89 #define UTF8_VERSION_GUARD(_major, _minor, _bugfix) \
90  (UTF8_VERSION >= UTF8_VERSION_MAKE(_major, _minor, _bugfix))
91 
105 #define UTF8_ERR_NONE (0)
106 
111 #define UTF8_ERR_INVALID_DATA (-1)
112 
117 #define UTF8_ERR_INVALID_FLAG (-2)
118 
123 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3)
124 
129 #define UTF8_ERR_OVERLAPPING_PARAMETERS (-4)
130 
139 #define UTF8_NORMALIZE_COMPOSE 0x00000001
140 
145 #define UTF8_NORMALIZE_DECOMPOSE 0x00000002
146 
151 #define UTF8_NORMALIZE_COMPATIBILITY 0x00000004
152 
157 #define UTF8_NORMALIZATION_RESULT_YES (0)
158 
163 #define UTF8_NORMALIZATION_RESULT_MAYBE (1)
164 
169 #define UTF8_NORMALIZATION_RESULT_NO (2)
170 
184 #ifndef UTF8_WCHAR_SIZE
185  #if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX)
186  #define UTF8_WCHAR_SIZE (4)
187  #else
188  #define UTF8_WCHAR_SIZE (2)
189  #endif
190 #endif
191 
192 #if (UTF8_WCHAR_SIZE == 4)
193 
197  #define UTF8_WCHAR_UTF32 (1)
198 #elif (UTF8_WCHAR_SIZE == 2)
199 
203  #define UTF8_WCHAR_UTF16 (1)
204 #else
205  #error Invalid size for wchar_t type.
206 #endif
207 
213 #ifndef UTF8_API
214  #ifdef __cplusplus
215  #define UTF8_API extern "C"
216  #else
217  #define UTF8_API
218  #endif
219 #endif
220 
229 typedef uint16_t utf16_t;
230 
235 typedef uint32_t unicode_t;
236 
254 UTF8_API size_t utf8len(const char* text);
255 
301 UTF8_API size_t utf16toutf8(const utf16_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
302 
360 UTF8_API size_t utf32toutf8(const unicode_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
361 
424 UTF8_API size_t widetoutf8(const wchar_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
425 
472 UTF8_API size_t utf8toutf16(const char* input, size_t inputSize, utf16_t* target, size_t targetSize, int32_t* errors);
473 
518 UTF8_API size_t utf8toutf32(const char* input, size_t inputSize, unicode_t* target, size_t targetSize, int32_t* errors);
519 
592 UTF8_API size_t utf8towide(const char* input, size_t inputSize, wchar_t* target, size_t targetSize, int32_t* errors);
593 
645 UTF8_API const char* utf8seek(const char* text, size_t textSize, const char* textStart, off_t offset, int direction);
646 
733 UTF8_API size_t utf8toupper(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
734 
826 UTF8_API size_t utf8tolower(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
827 
913 UTF8_API size_t utf8totitle(const char* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
914 
998 UTF8_API uint8_t utf8isnormalized(const char* input, size_t inputSize, size_t flags, size_t* offset);
999 
1131 UTF8_API size_t utf8normalize(const char* input, size_t inputSize, char* target, size_t targetSize, size_t flags, int32_t* errors);
1132 
1133 #endif /* _UTF8REWIND_H_ */
uint32_t unicode_t
UTF-32 encoded code point.
Definition: utf8rewind.h:235
UTF8_API size_t utf8totitle(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to titlecase.
UTF8_API size_t utf8normalize(const char *input, size_t inputSize, char *target, size_t targetSize, size_t flags, int32_t *errors)
Normalize a string to the specified Unicode Normalization Form.
#define UTF8_API
Calling convention for public functions.
Definition: utf8rewind.h:217
UTF8_API size_t utf8toutf32(const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-32 encoded string.
UTF8_API size_t utf8toupper(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to uppercase.
uint16_t utf16_t
UTF-16 encoded code point.
Definition: utf8rewind.h:229
UTF8_API size_t utf16toutf8(const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-16 encoded string to a UTF-8 encoded string.
UTF8_API size_t utf8toutf16(const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-16 encoded string.
UTF8_API size_t utf8towide(const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a wide string.
UTF8_API uint8_t utf8isnormalized(const char *input, size_t inputSize, size_t flags, size_t *offset)
Check if a string is stable in the specified Unicode Normalization Form.
UTF8_API size_t utf8tolower(const char *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert UTF-8 encoded text to lowercase.
UTF8_API size_t utf8len(const char *text)
Get the length in code points of a UTF-8 encoded string.
UTF8_API size_t utf32toutf8(const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-32 encoded string to a UTF-8 encoded string.
UTF8_API size_t widetoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a wide string to a UTF-8 encoded string.
UTF8_API const char * utf8seek(const char *text, size_t textSize, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.