File : input_sources.ads
-----------------------------------------------------------------------
-- XML/Ada - An XML suite for Ada95 --
-- --
-- Copyright (C) 2001 --
-- ACT-Europe --
-- Author: Emmanuel Briot --
-- --
-- This library is free software; you can redistribute it and/or --
-- modify it under the terms of the GNU General Public --
-- License as published by the Free Software Foundation; either --
-- version 2 of the License, or (at your option) any later version. --
-- --
-- This library is distributed in the hope that it will be useful, --
-- but WITHOUT ANY WARRANTY; without even the implied warranty of --
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU --
-- General Public License for more details. --
-- --
-- You should have received a copy of the GNU General Public --
-- License along with this library; if not, write to the --
-- Free Software Foundation, Inc., 59 Temple Place - Suite 330, --
-- Boston, MA 02111-1307, USA. --
-- --
-- As a special exception, if other files instantiate generics from --
-- this unit, or you link this unit with other files to produce an --
-- executable, this unit does not by itself cause the resulting --
-- executable to be covered by the GNU General Public License. This --
-- exception does not however invalidate any other reasons why the --
-- executable file might be covered by the GNU Public License. --
-----------------------------------------------------------------------
-- <description>
-- This package provides a hierarchy of objects that return characters
-- that can then be used for different tasks.
-- It is not possible to go backward, nor to previous characters. This
-- interface is intentionally kept minimal, so that it can easily be used
-- with files, sockets, ...
-- </description>
with Unicode;
with Unicode.CES;
with Unicode.CES.Basic_8bit;
with Unicode.CCS;
package Input_Sources is
type Input_Source is abstract tagged limited private;
-- General object for reading characters, one at a time.
type Input_Source_Access is access all Input_Source'Class;
procedure Next_Char
(From : in out Input_Source;
C : out Unicode.Unicode_Char) is abstract;
-- Return a single character from From.
-- This also increments the internal index, so that the nex time this
-- function is called the next character in the stream is returned.
function Eof (From : Input_Source) return Boolean is abstract;
-- Return True if there is no more character to read on the stream
function Prolog_Size (From : Input_Source) return Natural;
-- Return the number of characters that were ignored at the beginning
-- of the stream (for instance because they indicated the encoding used
-- in the file).
procedure Set_Encoding
(Input : in out Input_Source;
Es : Unicode.CES.Encoding_Scheme);
-- Set the encoding associated with the input stream.
-- This can be used to convert from any type of encoding for the byte
-- sequence (Utf8, Utf16, ..) and any character set (Latin-1, Unicode,..)
-- to unicode characters.
-- Input_Sources are encouraged to guess the encoding whenever possible,
-- but you can override that default at any time.
function Get_Encoding (Input : Input_Source)
return Unicode.CES.Encoding_Scheme;
-- Return the encoding scheme associated with the input
procedure Set_Character_Set
(Input : in out Input_Source;
Cs : Unicode.CCS.Character_Set);
-- Set the character set associated with the stream.
-- It isn't possible to get the character set automatically for a stream.
-- As a result, the default one is always considered to be Unicode
function Get_Character_Set (Input : Input_Source)
return Unicode.CCS.Character_Set;
-- Return the character set associated with the input.
procedure Set_System_Id
(Input : in out Input_Source;
Id : Unicode.CES.Byte_Sequence);
-- Set the system ID associated with the input source.
-- Although this is optional, it is still useful since it can be used to
-- resolve relative URI's from documents.
function Get_System_Id (Input : Input_Source)
return Unicode.CES.Byte_Sequence;
-- Return the system Id.
procedure Set_Public_Id
(Input : in out Input_Source;
Id : Unicode.CES.Byte_Sequence);
-- This will be provided as part of the location information, if it is
-- given.
function Get_Public_Id (Input : Input_Source)
return Unicode.CES.Byte_Sequence;
-- Return the public Id.
procedure Close (Input : in out Input_Source);
-- Free the memory allocated in the input.
private
type Input_Source is abstract tagged limited record
Prolog_Size : Natural := 0;
Es : Unicode.CES.Encoding_Scheme :=
Unicode.CES.Basic_8bit.Basic_8bit_Encoding;
Cs : Unicode.CCS.Character_Set :=
Unicode.CCS.Unicode_Character_Set;
Public_Id : Unicode.CES.Byte_Sequence_Access;
System_Id : Unicode.CES.Byte_Sequence_Access;
end record;
end Input_Sources;