utf8rewind
 All Files Functions Typedefs Macros Groups Pages
utf8rewind.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014 Quinten Lansu
3 
4  Permission is hereby granted, free of charge, to any person
5  obtaining a copy of this software and associated documentation
6  files (the "Software"), to deal in the Software without
7  restriction, including without limitation the rights to use,
8  copy, modify, merge, publish, distribute, sublicense, and/or
9  sell copies of the Software, and to permit persons to whom the
10  Software is furnished to do so, subject to the following
11  conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
31 #ifndef _UTF8REWIND_H_
32 #define _UTF8REWIND_H_
33 
35 #include <stddef.h>
36 #include <stdio.h>
37 #include <stdint.h>
38 #include <string.h>
39 #include <wchar.h>
41 
42 #define UTF8_ERR_INVALID_CHARACTER (-1)
43 #define UTF8_ERR_INVALID_DATA (-2)
44 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3)
45 #define UTF8_ERR_OUT_OF_RANGE (-4)
46 #define UTF8_ERR_UNHANDLED_SURROGATE_PAIR (-5)
47 #define UTF8_ERR_UNMATCHED_HIGH_SURROGATE_PAIR (-6)
48 #define UTF8_ERR_UNMATCHED_LOW_SURROGATE_PAIR (-7)
49 
52 
53 #ifndef UTF8_WCHAR_SIZE
54  #if (__SIZEOF_WCHAR_T__ == 4) || (WCHAR_MAX > UINT16_MAX) || (__WCHAR_MAX__ > UINT16_MAX)
55  #define UTF8_WCHAR_SIZE (4)
56  #else
57  #define UTF8_WCHAR_SIZE (2)
58  #endif
59 #endif
60 
61 #if (UTF8_WCHAR_SIZE == 4)
62  #define UTF8_WCHAR_UTF32 (1)
63 #elif (UTF8_WCHAR_SIZE == 2)
64  #define UTF8_WCHAR_UTF16 (1)
65 #else
66  #error Invalid size for wchar_t type.
67 #endif
68 
70 
71 #if defined(__cplusplus)
72 extern "C" {
73 #endif
74 
75 typedef uint32_t unicode_t;
76 typedef uint16_t ucs2_t;
77 typedef uint16_t utf16_t;
79 
85 int8_t utf8charvalid(char encodedCharacter);
86 
88 
100 size_t utf8charlen(char encodedCharacter);
101 
103 
118 size_t utf8len(const char* text);
119 
121 
165 size_t utf16toutf8(const utf16_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
166 
168 
230 size_t utf32toutf8(const unicode_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
231 
233 
294 size_t widetoutf8(const wchar_t* input, size_t inputSize, char* target, size_t targetSize, int32_t* errors);
295 
297 
337 size_t utf8toutf16(const char* input, size_t inputSize, utf16_t* target, size_t targetSize, int32_t* errors);
338 
340 
380 size_t utf8toutf32(const char* input, size_t inputSize, unicode_t* target, size_t targetSize, int32_t* errors);
381 
383 
450 size_t utf8towide(const char* input, size_t inputSize, wchar_t* target, size_t targetSize, int32_t* errors);
451 
453 
501 const char* utf8seek(const char* text, const char* textStart, off_t offset, int direction);
502 
503 #if defined(__cplusplus)
504 }
505 #endif
506 
507 #endif
size_t utf8towide(const char *input, size_t inputSize, wchar_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a wide string.
size_t utf8toutf16(const char *input, size_t inputSize, utf16_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-16 encoded string.
size_t utf32toutf8(const unicode_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-32 encoded string to a UTF-8 encoded string.
size_t utf8charlen(char encodedCharacter)
Returns the length in bytes of the encoded character.
size_t utf8len(const char *text)
Get the length in codepoints of a UTF-8 encoded string.
int8_t utf8charvalid(char encodedCharacter)
Check if a character is valid according to UTF-8 encoding.
const char * utf8seek(const char *text, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.
size_t utf16toutf8(const utf16_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a UTF-16 encoded string to a UTF-8 encoded string.
uint16_t ucs2_t
Definition: utf8rewind.h:76
size_t utf8toutf32(const char *input, size_t inputSize, unicode_t *target, size_t targetSize, int32_t *errors)
Convert a UTF-8 encoded string to a UTF-32 encoded string.
uint16_t utf16_t
Definition: utf8rewind.h:77
uint32_t unicode_t
Definition: utf8rewind.h:75
size_t widetoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize, int32_t *errors)
Convert a wide string to a UTF-8 encoded string.