26 #ifndef _UTF8REWIND_INTERNAL_CODEPOINT_H_
27 #define _UTF8REWIND_INTERNAL_CODEPOINT_H_
47 #define MAX_BASIC_LATIN 0x007F
53 #define MAX_LATIN_1 0x00FF
59 #define MAX_BASIC_MULTILINGUAL_PLANE 0xFFFF
65 #define MAX_LEGAL_UNICODE 0x10FFFF
71 #define REPLACEMENT_CHARACTER 0xFFFD
77 #define REPLACEMENT_CHARACTER_STRING "\xEF\xBF\xBD"
83 #define REPLACEMENT_CHARACTER_STRING_LENGTH 3
89 #define SURROGATE_HIGH_START 0xD800
95 #define SURROGATE_HIGH_END 0xDBFF
101 #define SURROGATE_LOW_START 0xDC00
107 #define SURROGATE_LOW_END 0xDFFF
113 #define HANGUL_JAMO_FIRST 0x1100
119 #define HANGUL_JAMO_LAST 0x11FF
126 #define HANGUL_L_FIRST 0x1100
133 #define HANGUL_L_LAST 0x1112
139 #define HANGUL_L_COUNT 19
146 #define HANGUL_V_FIRST 0x1161
153 #define HANGUL_V_LAST 0x1175
159 #define HANGUL_V_COUNT 21
166 #define HANGUL_T_FIRST 0x11A7
173 #define HANGUL_T_LAST 0x11C2
179 #define HANGUL_T_COUNT 28
185 #define HANGUL_N_COUNT 588
191 #define HANGUL_S_FIRST 0xAC00
197 #define HANGUL_S_LAST 0xD7A3
203 #define HANGUL_S_COUNT 11172
205 #define CP_LATIN_CAPITAL_LETTER_I 0x0049
206 #define CP_LATIN_CAPITAL_LETTER_J 0x004A
207 #define CP_LATIN_SMALL_LETTER_I 0x0069
208 #define CP_LATIN_SMALL_LETTER_J 0x006A
209 #define CP_LATIN_CAPITAL_LETTER_I_WITH_GRAVE 0x00CC
210 #define CP_LATIN_CAPITAL_LETTER_I_WITH_ACUTE 0x00CD
211 #define CP_LATIN_CAPITAL_LETTER_I_WITH_TILDE 0x0128
212 #define CP_LATIN_CAPITAL_LETTER_I_WITH_OGONEK 0x012E
213 #define CP_LATIN_SMALL_LETTER_I_WITH_OGONEK 0x012F
214 #define CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
215 #define CP_LATIN_SMALL_LETTER_DOTLESS_I 0x0131
216 #define CP_COMBINING_GRAVE_ACCENT 0x0300
217 #define CP_COMBINING_ACUTE_ACCENT 0x0301
218 #define CP_COMBINING_TILDE_ACCENT 0x0303
219 #define CP_COMBINING_DOT_ABOVE 0x0307
220 #define CP_COMBINING_GREEK_YPOGEGRAMMENI 0x0345
221 #define CP_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
230 extern const uint8_t codepoint_decoded_length[256];
243 uint8_t codepoint_write(
unicode_t encoded,
char** target,
size_t* targetSize);
254 uint8_t codepoint_read(
const char* input,
size_t inputSize,
unicode_t* decoded);
uint32_t unicode_t
UTF-32 encoded code point.
Definition: utf8rewind.h:203
Public interface for UTF-8 functions.