File : unicode-ces-basic_8bit.ads
-----------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2001 --
-- ACT-Europe --
-- Author: Emmanuel Briot --
-- --
-- This library is free software; you can redistribute it and/or --
-- modify it under the terms of the GNU General Public --
-- License as published by the Free Software Foundation; either --
-- version 2 of the License, or (at your option) any later version. --
-- --
-- This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --
-- General Public License for more details. --
-- --
-- You should have received a copy of the GNU General Public --
-- License along with this library; if not, write to the --
-- Free Software Foundation, Inc., 59 Temple Place - Suite 330, --
-- Boston, MA 02111-1307, USA. --
-- --
-- As a special exception, if other files instantiate generics from --
-- this unit, or you link this unit with other files to produce an --
-- executable, this unit does not by itself cause the resulting --
-- executable to be covered by the GNU General Public License. This --
-- exception does not however invalidate any other reasons why the --
-- executable file might be covered by the GNU Public License. --
-----------------------------------------------------------------------
-- This package implements a basic 8bit encoding.
-- Only code points from 16#00# to 16#FF# can be encoded in such strings.
-- These are the standard Ada Strings.
--
-- However, then can be used to read files that contain accented characters,
-- in combination with Unicode.CCS.Iso_8859_1 for instance
with Unicode.CES.Utf32;
with Unicode.CCS;
with Unchecked_Deallocation;
package Unicode.CES.Basic_8bit is
-----------
-- Types --
-----------
subtype Basic_8bit_String is String;
type Basic_8bit_String_Access is access Basic_8bit_String;
-- A heigh bit string, undefined byte-order
-------------------------------------------
-- Conversion to and from byte sequences --
-------------------------------------------
function Encode (Char : Unicode_Char) return Basic_8bit_String;
-- Return the byte sequence representing Char in the 8bit character
-- encoding form
-- Invalid_Encoding is raised if Char can not be converted.
function Read (Str : Basic_8bit_String; Index : Positive)
return Unicode_Char;
-- Return the character starting at location Index in Str
function Width (Char : Unicode_Char) return Natural;
-- Return the number of bytes occupied by the 8bit representation of Char
function Length (Str : Basic_8bit_String) return Natural;
-- Return the number of characters in Str
------------------------------------------
-- Conversion to and from 8bit-encoding --
------------------------------------------
function From_Utf32
(Str : Unicode.CES.Utf32.Utf32_LE_String)
return Basic_8bit_String;
-- Return a new string, from a utf32-encoded string.
function To_Utf32
(Str : Basic_8bit_String)
return Unicode.CES.Utf32.Utf32_LE_String;
-- Return a new utf32-encoded string, from a standard Ada string.
---------------------------------------------
-- Byte order and character set conversion --
---------------------------------------------
function To_Unicode_LE
(Str : Basic_8bit_String;
Cs : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
Order : Byte_Order := Default_Byte_Order) return Basic_8bit_String;
-- Convert Str to a Unicode string, assuming it contains code points from
-- the character set CS.
-- Byte-order is irrelevant for 8bit strings, but is kept for interface
-- compatibility with other similar functions
function To_CS
(Str : Basic_8bit_String;
Cs : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
Order : Byte_Order := Default_Byte_Order) return Basic_8bit_String;
-- Convert Str to the character set Cs, assuming it contains Unicode
-- characters.
---------------------
-- Encoding Scheme --
---------------------
Basic_8bit_Encoding : constant Encoding_Scheme :=
(Read => Read'Access,
Width => Width'Access,
Encode => Encode_Function' (Encode'Access),
Length => Length'Access);
------------------
-- Deallocation --
------------------
procedure Free is new Unchecked_Deallocation
(Basic_8bit_String, Basic_8bit_String_Access);
private
pragma Inline (Width);
end Unicode.CES.Basic_8bit;