utf8rewind  1.4.1
System library for processing UTF-8 encoded text
codepoint.h
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014-2016 Quinten Lansu
3 
4  Permission is hereby granted, free of charge, to any person
5  obtaining a copy of this software and associated documentation
6  files (the "Software"), to deal in the Software without
7  restriction, including without limitation the rights to use,
8  copy, modify, merge, publish, distribute, sublicense, and/or
9  sell copies of the Software, and to permit persons to whom the
10  Software is furnished to do so, subject to the following
11  conditions:
12 
13  The above copyright notice and this permission notice shall be
14  included in all copies or substantial portions of the Software.
15 
16  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
18  OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19  NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
20  HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
21  WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
23  OTHER DEALINGS IN THE SOFTWARE.
24 */
25 
26 #ifndef _UTF8REWIND_INTERNAL_CODEPOINT_H_
27 #define _UTF8REWIND_INTERNAL_CODEPOINT_H_
28 
36 #include "utf8rewind.h"
37 
47 #define MAX_BASIC_LATIN 0x007F
48 
53 #define MAX_LATIN_1 0x00FF
54 
59 #define MAX_BASIC_MULTILINGUAL_PLANE 0xFFFF
60 
65 #define MAX_LEGAL_UNICODE 0x10FFFF
66 
71 #define REPLACEMENT_CHARACTER 0xFFFD
72 
77 #define REPLACEMENT_CHARACTER_STRING "\xEF\xBF\xBD"
78 
83 #define REPLACEMENT_CHARACTER_STRING_LENGTH 3
84 
89 #define SURROGATE_HIGH_START 0xD800
90 
95 #define SURROGATE_HIGH_END 0xDBFF
96 
101 #define SURROGATE_LOW_START 0xDC00
102 
107 #define SURROGATE_LOW_END 0xDFFF
108 
113 #define HANGUL_JAMO_FIRST 0x1100
114 
119 #define HANGUL_JAMO_LAST 0x11FF
120 
126 #define HANGUL_L_FIRST 0x1100
127 
133 #define HANGUL_L_LAST 0x1112
134 
139 #define HANGUL_L_COUNT 19
140 
146 #define HANGUL_V_FIRST 0x1161
147 
153 #define HANGUL_V_LAST 0x1175
154 
159 #define HANGUL_V_COUNT 21
160 
166 #define HANGUL_T_FIRST 0x11A7
167 
173 #define HANGUL_T_LAST 0x11C2
174 
179 #define HANGUL_T_COUNT 28
180 
185 #define HANGUL_N_COUNT 588 /* VCount * TCount */
186 
191 #define HANGUL_S_FIRST 0xAC00
192 
197 #define HANGUL_S_LAST 0xD7A3
198 
203 #define HANGUL_S_COUNT 11172 /* LCount * NCount */
204 
205 #define CP_LATIN_CAPITAL_LETTER_I 0x0049
206 #define CP_LATIN_CAPITAL_LETTER_J 0x004A
207 #define CP_LATIN_SMALL_LETTER_I 0x0069
208 #define CP_LATIN_SMALL_LETTER_J 0x006A
209 #define CP_LATIN_CAPITAL_LETTER_I_WITH_GRAVE 0x00CC
210 #define CP_LATIN_CAPITAL_LETTER_I_WITH_ACUTE 0x00CD
211 #define CP_LATIN_CAPITAL_LETTER_I_WITH_TILDE 0x0128
212 #define CP_LATIN_CAPITAL_LETTER_I_WITH_OGONEK 0x012E
213 #define CP_LATIN_SMALL_LETTER_I_WITH_OGONEK 0x012F
214 #define CP_LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
215 #define CP_LATIN_SMALL_LETTER_DOTLESS_I 0x0131
216 #define CP_COMBINING_GRAVE_ACCENT 0x0300
217 #define CP_COMBINING_ACUTE_ACCENT 0x0301
218 #define CP_COMBINING_TILDE_ACCENT 0x0303
219 #define CP_COMBINING_DOT_ABOVE 0x0307
220 #define CP_COMBINING_GREEK_YPOGEGRAMMENI 0x0345
221 #define CP_COMBINING_GRAPHEME_JOINER 0x034F
222 #define CP_GREEK_CAPITAL_LETTER_SIGMA 0x03A3
223 
224 #define CCC_NOT_REORDERED 0
225 #define CCC_OVERLAY 1
226 #define CCC_NUKTA 7
227 #define CCC_KANA_VOICING 8
228 #define CCC_VIRAMA 9
229 #define CCC_FIXED_POSITION_START 10
230 #define CCC_FIXED_POSITION_END 199
231 #define CCC_ATTACHED_BELOW_LEFT 200
232 #define CCC_ATTACHED_BELOW 202
233 #define CCC_ATTACHED_BOTTOM_RIGHT 204
234 #define CCC_ATTACHED_LEFT 208
235 #define CCC_ATTACHED_RIGHT 210
236 #define CCC_ATTACHED_TOP_LEFT 212
237 #define CCC_ATTACHED_ABOVE 214
238 #define CCC_ATTACHED_ABOVE_RIGHT 216
239 #define CCC_BELOW_LEFT 218
240 #define CCC_BELOW 220
241 #define CCC_BELOW_RIGHT 222
242 #define CCC_LEFT 224
243 #define CCC_RIGHT 226
244 #define CCC_ABOVE_LEFT 228
245 #define CCC_ABOVE 230
246 #define CCC_ABOVE_RIGHT 232
247 #define CCC_DOUBLE_BELOW 233
248 #define CCC_DOUBLE_ABOVE 234
249 #define CCC_IOTA_SUBSCRIPT 240
250 #define CCC_INVALID 255
251 
259 extern const uint8_t codepoint_decoded_length[256];
260 
272 uint8_t codepoint_write(unicode_t encoded, char** target, size_t* targetSize);
273 
283 uint8_t codepoint_read(const char* input, size_t inputSize, unicode_t* decoded);
284 
291 #endif /* _UTF8REWIND_INTERNAL_CODEPOINT_H_ */
uint32_t unicode_t
UTF-32 encoded code point.
Definition: utf8rewind.h:203
Public interface for UTF-8 functions.