File : unicode.adb
-----------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2001 --
-- ACT-Europe --
-- Author: Emmanuel Briot --
-- --
-- This library is free software; you can redistribute it and/or --
-- modify it under the terms of the GNU General Public --
-- License as published by the Free Software Foundation; either --
-- version 2 of the License, or (at your option) any later version. --
-- --
-- This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --
-- General Public License for more details. --
-- --
-- You should have received a copy of the GNU General Public --
-- License along with this library; if not, write to the --
-- Free Software Foundation, Inc., 59 Temple Place - Suite 330, --
-- Boston, MA 02111-1307, USA. --
-- --
-- As a special exception, if other files instantiate generics from --
-- this unit, or you link this unit with other files to produce an --
-- executable, this unit does not by itself cause the resulting --
-- executable to be covered by the GNU General Public License. This --
-- exception does not however invalidate any other reasons why the --
-- executable file might be covered by the GNU Public License. --
-----------------------------------------------------------------------
with Unicode.Names.Basic_Latin; use Unicode.Names.Basic_Latin;
with Unicode.Names.Latin_Extended_A; use Unicode.Names.Latin_Extended_A;
package body Unicode is
--------------------
-- Is_White_Space --
--------------------
function Is_White_Space (Char : Unicode_Char) return Boolean is
begin
return Char = Space
or else Char = Horizontal_Tabulation
or else Char = Line_Feed
or else Char = Carriage_Return;
end Is_White_Space;
---------------
-- Is_Letter --
---------------
function Is_Letter (Char : Unicode_Char) return Boolean is
begin
return Is_Base_Char (Char) or else Is_Ideographic (Char);
end Is_Letter;
------------------
-- Is_Base_Char --
------------------
function Is_Base_Char (Char : Unicode_Char) return Boolean is
begin
case Char is
when Nul .. 16#00FF# =>
return Char in Latin_Capital_Letter_A .. Latin_Capital_Letter_Z
or else Char in Latin_Small_Letter_A .. Latin_Small_Letter_Z
or else Char in 16#00C0# .. 16#00D6#
or else Char in 16#00D8# .. 16#00F6#
or else Char in 16#00F8# .. 16#00FF#;
when Latin_Capital_Letter_A_With_Macron .. 16#0217# =>
return Char in Latin_Capital_Letter_A_With_Macron
.. Latin_Small_Letter_Dotless_I
or else Char in Latin_Capital_Letter_J_With_Circumflex
.. Latin_Small_Letter_L_With_Caron
or else Char in Latin_Capital_Letter_L_With_Stroke
.. Latin_Small_Letter_N_With_Caron
or else Char in Latin_Capital_Letter_Eng
.. Latin_Small_Letter_Z_With_Caron
or else Char in 16#0180# .. 16#01C3#
or else Char in 16#01CD# .. 16#01F0#
or else Char in 16#01F4# .. 16#01F5#
or else Char in 16#01FA# .. 16#0217#;
when 16#0218# .. 16#03FF# =>
return Char in 16#0250# .. 16#02A8#
or else Char in 16#02BB# .. 16#02C1#
or else Char = 16#0386#
or else Char in 16#0388# .. 16#038A#
or else Char = 16#038c#
or else Char in 16#038E# .. 16#03A1#
or else Char in 16#03A3# .. 16#03CE#
or else Char in 16#03D0# .. 16#03D6#
or else Char = 16#03DA#
or else Char = 16#03DC#
or else Char = 16#03DE#
or else Char = 16#03E0#
or else Char in 16#03E2# .. 16#03F3#;
when 16#0400# .. 16#04FF# =>
return Char in 16#0401# .. 16#040C#
or else Char in 16#040E# .. 16#044F#
or else Char in 16#0451# .. 16#045C#
or else Char in 16#045E# .. 16#0481#
or else Char in 16#0490# .. 16#04C4#
or else Char in 16#04C7# .. 16#04C8#
or else Char in 16#04CB# .. 16#04CC#
or else Char in 16#04D0# .. 16#04EB#
or else Char in 16#04EE# .. 16#04F5#
or else Char in 16#04F8# .. 16#04F9#;
when 16#0500# .. 16#05FF# =>
return Char in 16#0531# .. 16#0556#
or else Char = 16#0559#
or else Char in 16#0561# .. 16#0586#
or else Char in 16#05D0# .. 16#05EA#
or else Char in 16#05F0# .. 16#05F2#;
when 16#0600# .. 16#06FF# =>
return Char in 16#0621# .. 16#063A#
or else Char in 16#0641# .. 16#064A#
or else Char in 16#0671# .. 16#06B7#
or else Char in 16#06BA# .. 16#06BE#
or else Char in 16#06C0# .. 16#06CE#
or else Char in 16#06D0# .. 16#06D3#
or else Char = 16#06D5#
or else Char in 16#06E5# .. 16#06E6#;
when 16#0700# .. 16#09FF# =>
return Char in 16#0905# .. 16#0939#
or else Char = 16#093D#
or else Char in 16#0958# .. 16#0961#
or else Char in 16#0985# .. 16#098C#
or else Char in 16#098F# .. 16#0990#
or else Char in 16#0993# .. 16#09A8#
or else Char in 16#09AA# .. 16#09B0#
or else Char = 16#09B2#
or else Char in 16#09B6# .. 16#09B9#
or else Char in 16#09DC# .. 16#09DD#
or else Char in 16#09DF# .. 16#09E1#
or else Char in 16#09F0# .. 16#09F1#;
when 16#0A00# .. 16#0AFF# =>
return Char in 16#0A05# .. 16#0A0A#
or else Char in 16#0A0F# .. 16#0A10#
or else Char in 16#0A13# .. 16#0A28#
or else Char in 16#0A2A# .. 16#0A30#
or else Char in 16#0A32# .. 16#0A33#
or else Char in 16#0A35# .. 16#0A36#
or else Char in 16#0A38# .. 16#0A39#
or else Char in 16#0A59# .. 16#0A5C#
or else Char = 16#0A5E#
or else Char in 16#0A72# .. 16#0A74#
or else Char in 16#0A85# .. 16#0A8B#
or else Char = 16#0A8D#
or else Char in 16#0A8F# .. 16#0A91#
or else Char in 16#0A93# .. 16#0AA8#
or else Char in 16#0AAA# .. 16#0AB0#
or else Char in 16#0AB2# .. 16#0AB3#
or else Char in 16#0AB5# .. 16#0AB9#
or else Char = 16#0ABD#
or else Char = 16#0AE0#;
when 16#0B00# .. 16#0BFF# =>
return Char in 16#0B05# .. 16#0B0C#
or else Char in 16#0B0F# .. 16#0B10#
or else Char in 16#0B13# .. 16#0B28#
or else Char in 16#0B2A# .. 16#0B30#
or else Char in 16#0B32# .. 16#0B33#
or else Char in 16#0B36# .. 16#0B39#
or else Char = 16#0B3D#
or else Char in 16#0B5C# .. 16#0B5D#
or else Char in 16#0B5F# .. 16#0B61#
or else Char in 16#0B85# .. 16#0B8A#
or else Char in 16#0B8E# .. 16#0B90#
or else Char in 16#0B92# .. 16#0B95#
or else Char in 16#0B99# .. 16#0B9A#
or else Char = 16#0B9C#
or else Char in 16#0B9E# .. 16#0B9F#
or else Char in 16#0BA3# .. 16#0BA4#
or else Char in 16#0BA8# .. 16#0BAA#
or else Char in 16#0BAE# .. 16#0BB5#
or else Char in 16#0BB7# .. 16#0BB9#;
when 16#0C00# .. 16#0CFF# =>
return Char in 16#0C05# .. 16#0C0C#
or else Char in 16#0C0E# .. 16#0C10#
or else Char in 16#0C12# .. 16#0C28#
or else Char in 16#0C2A# .. 16#0C33#
or else Char in 16#0C35# .. 16#0C39#
or else Char in 16#0C60# .. 16#0C61#
or else Char in 16#0C85# .. 16#0C8C#
or else Char in 16#0C8E# .. 16#0C90#
or else Char in 16#0C92# .. 16#0CA8#
or else Char in 16#0CAA# .. 16#0CB3#
or else Char in 16#0CB5# .. 16#0CB9#
or else Char = 16#0CDE#
or else Char in 16#0CE0# .. 16#0CE1#;
when 16#0D00# .. 16#0DFF# =>
return Char in 16#0D05# .. 16#0D0C#
or else Char in 16#0D0E# .. 16#0D10#
or else Char in 16#0D12# .. 16#0D28#
or else Char in 16#0D2A# .. 16#0D39#
or else Char in 16#0D60# .. 16#0D61#;
when 16#0E00# .. 16#0EFF# =>
return Char in 16#0E01# .. 16#0E2E#
or else Char = 16#0E30#
or else Char in 16#0E32# .. 16#0E33#
or else Char in 16#0E40# .. 16#0E45#
or else Char in 16#0E81# .. 16#0E82#
or else Char = 16#0E84#
or else Char in 16#0E87# .. 16#0E88#
or else Char = 16#0E8A#
or else Char = 16#0E8D#
or else Char in 16#0E94# .. 16#0E97#
or else Char in 16#0E99# .. 16#0E9F#
or else Char in 16#0EA1# .. 16#0EA3#
or else Char = 16#0EA5#
or else Char = 16#0EA7#
or else Char in 16#0EAA# .. 16#0EAB#
or else Char in 16#0EAD# .. 16#0EAE#
or else Char = 16#0EB0#
or else Char in 16#0EB2# .. 16#0EB3#
or else Char = 16#0EBD#
or else Char in 16#0EC0# .. 16#0EC4#;
when 16#0F00# .. 16#0FFF# =>
return Char in 16#0F40# .. 16#0F47#
or else Char in 16#0F49# .. 16#0F69#;
when 16#1000# .. 16#11FF# =>
return Char in 16#10A0# .. 16#10C5#
or else Char in 16#10D0# .. 16#10F6#
or else Char = 16#1100#
or else Char in 16#1102# .. 16#1103#
or else Char in 16#1105# .. 16#1107#
or else Char = 16#1109#
or else Char in 16#110B# .. 16#110C#
or else Char in 16#110E# .. 16#1112#
or else Char = 16#113C#
or else Char = 16#113E#
or else Char = 16#1140#
or else Char = 16#114C#
or else Char = 16#114E#
or else Char = 16#1150#
or else Char in 16#1154# .. 16#1155#
or else Char = 16#1159#
or else Char in 16#115F# .. 16#1161#
or else Char = 16#1163#
or else Char = 16#1165#
or else Char = 16#1167#
or else Char = 16#1169#
or else Char in 16#116D# .. 16#116E#
or else Char in 16#1172# .. 16#1173#
or else Char = 16#1175#
or else Char = 16#119E#
or else Char = 16#11A8#
or else Char = 16#11AB#
or else Char in 16#11AE# .. 16#11AF#
or else Char in 16#11B7# .. 16#11B8#
or else Char = 16#11BA#
or else Char in 16#11BC# .. 16#11C2#
or else Char = 16#11EB#
or else Char = 16#11F0#
or else Char = 16#11F9#;
when 16#1200# .. 16#1FFF# =>
return Char in 16#1E00# .. 16#1E9B#
or else Char in 16#1EA0# .. 16#1EF9#
or else Char in 16#1F00# .. 16#1F15#
or else Char in 16#1F18# .. 16#1F1D#
or else Char in 16#1F20# .. 16#1F45#
or else Char in 16#1F48# .. 16#1F4D#
or else Char in 16#1F50# .. 16#1F57#
or else Char = 16#1F59#
or else Char = 16#1F5B#
or else Char = 16#1F5D#
or else Char in 16#1F5F# .. 16#1F7D#
or else Char in 16#1F80# .. 16#1FB4#
or else Char in 16#1FB6# .. 16#1FBC#
or else Char = 16#1FBE#
or else Char in 16#1FC2# .. 16#1FC4#
or else Char in 16#1FC6# .. 16#1FCC#
or else Char in 16#1FD0# .. 16#1FD3#
or else Char in 16#1FD6# .. 16#1FDB#
or else Char in 16#1FE0# .. 16#1FEC#
or else Char in 16#1FF2# .. 16#1FF4#
or else Char in 16#1FF6# .. 16#1FFC#;
when others =>
return Char = 16#2126#
or else Char in 16#212A# .. 16#212B#
or else Char = 16#212E#
or else Char in 16#2180# .. 16#2182#
or else Char in 16#3041# .. 16#3094#
or else Char in 16#30A1# .. 16#30FA#;
end case;
end Is_Base_Char;
--------------
-- Is_Digit --
--------------
function Is_Digit (Char : Unicode_Char) return Boolean is
begin
return Char in Digit_Zero .. Digit_Nine
or else (Char > 16#FF#
and then (Char in 16#0660# .. 16#0669#
or else Char in 16#06F0# .. 16#06F9#
or else Char in 16#0966# .. 16#096F#
or else Char in 16#09E6# .. 16#09EF#
or else Char in 16#0A66# .. 16#0A6F#
or else Char in 16#0AE6# .. 16#0AEF#
or else Char in 16#0B66# .. 16#0B6F#
or else Char in 16#0BE7# .. 16#0BEF#
or else Char in 16#0C66# .. 16#0C6F#
or else Char in 16#0CE6# .. 16#0CEF#
or else Char in 16#0D66# .. 16#0D6F#
or else Char in 16#0E50# .. 16#0E59#
or else Char in 16#0ED0# .. 16#0ED9#
or else Char in 16#0F20# .. 16#0F29#));
end Is_Digit;
-----------------------
-- Is_Combining_Char --
-----------------------
function Is_Combining_Char (Char : Unicode_Char) return Boolean is
begin
case Char is
when Nul .. 16#02FF# =>
return False;
when 16#0300# .. 16#05FF# =>
return Char in 16#0300# .. 16#0345#
or else Char in 16#0360# .. 16#0361#
or else Char in 16#0483# .. 16#0486#
or else Char in 16#0591# .. 16#05A1#
or else Char in 16#05A3# .. 16#05B9#
or else Char in 16#05BB# .. 16#05BD#
or else Char = 16#05BF#
or else Char in 16#05C1# .. 16#05C2#
or else Char = 16#05C4#;
when 16#0600# .. 16#06FF# =>
return Char in 16#064B# .. 16#0652#
or else Char = 16#0670#
or else Char in 16#06D6# .. 16#06DC#
or else Char in 16#06DD# .. 16#06DF#
or else Char in 16#06E0# .. 16#06E4#
or else Char in 16#06E7# .. 16#06E8#
or else Char in 16#06EA# .. 16#06ED#;
when 16#0700# .. 16#09FF# =>
return Char in 16#0901# .. 16#0903#
or else Char = 16#093C#
or else Char in 16#093E# .. 16#094C#
or else Char = 16#094D#
or else Char in 16#0951# .. 16#0954#
or else Char in 16#0962# .. 16#0963#
or else Char in 16#0981# .. 16#0983#
or else Char = 16#09BC#
or else Char = 16#09BE#
or else Char = 16#09BF#
or else Char in 16#09C0# .. 16#09C4#
or else Char in 16#09C7# .. 16#09C8#
or else Char in 16#09CB# .. 16#09CD#
or else Char = 16#09D7#
or else Char in 16#09E2# .. 16#09E3#;
when 16#0A00# .. 16#0AFF# =>
return Char = 16#0A02#
or else Char = 16#0A3C#
or else Char = 16#0A3E#
or else Char = 16#0A3F#
or else Char in 16#0A40# .. 16#0A42#
or else Char in 16#0A47# .. 16#0A48#
or else Char in 16#0A4B# .. 16#0A4D#
or else Char in 16#0A70# .. 16#0A71#
or else Char in 16#0A81# .. 16#0A83#
or else Char = 16#0ABC#
or else Char in 16#0ABE# .. 16#0AC5#
or else Char in 16#0AC7# .. 16#0AC9#
or else Char in 16#0ACB# .. 16#0ACD#;
when 16#0B00# .. 16#0BFF# =>
return Char in 16#0B01# .. 16#0B03#
or else Char = 16#0B3C#
or else Char in 16#0B3E# .. 16#0B43#
or else Char in 16#0B47# .. 16#0B48#
or else Char in 16#0B4B# .. 16#0B4D#
or else Char in 16#0B56# .. 16#0B57#
or else Char in 16#0B82# .. 16#0B83#
or else Char in 16#0BBE# .. 16#0BC2#
or else Char in 16#0BC6# .. 16#0BC8#
or else Char in 16#0BCA# .. 16#0BCD#
or else Char = 16#0BD7#;
when 16#0C00# .. 16#0CFF# =>
return Char in 16#0C01# .. 16#0C03#
or else Char in 16#0C3E# .. 16#0C44#
or else Char in 16#0C46# .. 16#0C48#
or else Char in 16#0C4A# .. 16#0C4D#
or else Char in 16#0C55# .. 16#0C56#
or else Char in 16#0C82# .. 16#0C83#
or else Char in 16#0CBE# .. 16#0CC4#
or else Char in 16#0CC6# .. 16#0CC8#
or else Char in 16#0CCA# .. 16#0CCD#
or else Char in 16#0CD5# .. 16#0CD6#;
when 16#0D00# .. 16#0DFF# =>
return Char in 16#0D02# .. 16#0D03#
or else Char in 16#0D3E# .. 16#0D43#
or else Char in 16#0D46# .. 16#0D48#
or else Char in 16#0D4A# .. 16#0D4D#
or else Char = 16#0D57#;
when 16#0E00# .. 16#0EFF# =>
return Char = 16#0E31#
or else Char in 16#0E34# .. 16#0E3A#
or else Char in 16#0E47# .. 16#0E4E#
or else Char = 16#0EB1#
or else Char in 16#0EB4# .. 16#0EB9#
or else Char in 16#0EBB# .. 16#0EBC#
or else Char in 16#0EC8# .. 16#0ECD#;
when 16#0F00# .. 16#0FFF# =>
return Char in 16#0F18# .. 16#0F19#
or else Char = 16#0F35#
or else Char = 16#0F37#
or else Char = 16#0F39#
or else Char = 16#0F3E#
or else Char = 16#0F3F#
or else Char in 16#0F71# .. 16#0F84#
or else Char in 16#0F86# .. 16#0F8B#
or else Char in 16#0F90# .. 16#0F95#
or else Char = 16#0F97#
or else Char in 16#0F99# .. 16#0FAD#
or else Char in 16#0FB1# .. 16#0FB7#
or else Char = 16#0FB9#;
when others =>
return Char in 16#20D0# .. 16#20DC#
or else Char = 16#20E1#
or else Char in 16#302A# .. 16#302F#
or else Char = 16#3099#
or else Char = 16#309A#;
end case;
end Is_Combining_Char;
-----------------
-- Is_Extender --
-----------------
function Is_Extender (Char : Unicode_Char) return Boolean is
begin
return Char = 16#00B7#
or else (Char > 16#FF#
and then (Char = 16#02D0#
or else Char = 16#02D1#
or else Char = 16#0387#
or else Char = 16#0640#
or else Char = 16#0E46#
or else Char = 16#0EC6#
or else Char = 16#3005#
or else Char in 16#3031# .. 16#3035#
or else Char in 16#309D# .. 16#309E#
or else Char in 16#30FC# .. 16#30FE#));
end Is_Extender;
--------------------
-- Is_Ideographic --
--------------------
function Is_Ideographic (Char : Unicode_Char) return Boolean is
begin
return Char in 16#4E00# .. 16#9FA5#
or else Char = 16#3007#
or else Char in 16#3021# .. 16#3029#;
end Is_Ideographic;
----------------
-- To_Unicode --
----------------
function To_Unicode (C : Character) return Unicode_Char is
begin
return Character'Pos (C);
end To_Unicode;
end Unicode;