utf8rewind
 All Files Functions Typedefs Macros Pages
utf8rewind.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014 Quinten Lansu
3 
4  Permission is hereby granted, free of charge, to any person
5  obtaining a copy of this software and associated documentation
6  files (the "Software"), to deal in the Software without
7  restriction, including without limitation the rights to use,
8  copy, modify, merge, publish, distribute, sublicense, and/or
9  sell copies of the Software, and to permit persons to whom the
10  Software is furnished to do so, subject to the following
11  conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
31 #ifndef _UTF8REWIND_H_
32 #define _UTF8REWIND_H_
33 
35 #include <stdio.h>
36 #include <string.h>
37 #include <wchar.h>
39 
40 #define UTF8_ERR_INVALID_CHARACTER (-1)
41 #define UTF8_ERR_INVALID_DATA (-2)
42 #define UTF8_ERR_NOT_ENOUGH_SPACE (-3)
43 #define UTF8_ERR_OUT_OF_RANGE (-4)
44 #define UTF8_ERR_UNHANDLED_SURROGATE_PAIR (-5)
45 #define UTF8_ERR_UNMATCHED_HIGH_SURROGATE_PAIR (-6)
46 #define UTF8_ERR_UNMATCHED_LOW_SURROGATE_PAIR (-7)
47 
48 #if defined(__cplusplus)
49 extern "C" {
50 #endif
51 
52 typedef unsigned int unicode_t;
53 typedef unsigned short ucs2_t;
54 typedef unsigned short utf16_t;
56 
62 int utf8charvalid(char encodedCharacter);
63 
65 
78 int utf8charlen(char encodedCharacter);
79 
81 
97 int utf8len(const char* text);
98 
100 
128 int utf8encode(unicode_t codepoint, char* target, size_t targetSize);
129 
131 
178 int utf8convertucs2(ucs2_t codepoint, char* target, size_t targetSize);
179 
181 
231 int wctoutf8(const wchar_t* input, size_t inputSize, char* target, size_t targetSize);
232 
234 
276 int utf8decode(const char* text, unicode_t* result);
277 
279 
308 int utf8towc(const char* input, size_t inputSize, wchar_t* target, size_t targetSize);
309 
311 
344 const char* utf8seek(const char* text, const char* textStart, off_t offset, int direction);
345 
346 #if defined(__cplusplus)
347 }
348 #endif
349 
350 #endif
int utf8decode(const char *text, unicode_t *result)
Decode a UTF-8 encoded codepoint to a Unicode codepoint.
int utf8charvalid(char encodedCharacter)
Check if a character is valid according to UTF-8 encoding.
int utf8convertucs2(ucs2_t codepoint, char *target, size_t targetSize)
Convert a UCS-2 codepoint to UTF-8.
unsigned int unicode_t
Definition: utf8rewind.h:52
int utf8len(const char *text)
Get the length in codepoints of a UTF-8 encoded string.
int wctoutf8(const wchar_t *input, size_t inputSize, char *target, size_t targetSize)
Convert a UTF-16 encoded string to UTF-8.
const char * utf8seek(const char *text, const char *textStart, off_t offset, int direction)
Seek into a UTF-8 encoded string.
int utf8charlen(char encodedCharacter)
Returns the length in bytes of the encoded character.
int utf8towc(const char *input, size_t inputSize, wchar_t *target, size_t targetSize)
Convert a UTF-8 encoded string to UTF-16.
unsigned short ucs2_t
Definition: utf8rewind.h:53
unsigned short utf16_t
Definition: utf8rewind.h:54
int utf8encode(unicode_t codepoint, char *target, size_t targetSize)
Encode a Unicode codepoint to UTF-8.