File : input_sources.ads


-----------------------------------------------------------------------

--                XML/Ada - An XML suite for Ada95                   --

--                                                                   --

--                       Copyright (C) 2001                          --

--                            ACT-Europe                             --

--                       Author: Emmanuel Briot                      --

--                                                                   --

-- This library is free software; you can redistribute it and/or     --

-- modify it under the terms of the GNU General Public               --

-- License as published by the Free Software Foundation; either      --

-- version 2 of the License, or (at your option) any later version.  --

--                                                                   --

-- This library is distributed in the hope that it will be useful,   --

-- but WITHOUT ANY WARRANTY; without even the implied warranty of    --

-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU --

-- General Public License for more details.                          --

--                                                                   --

-- You should have received a copy of the GNU General Public         --

-- License along with this library; if not, write to the             --

-- Free Software Foundation, Inc., 59 Temple Place - Suite 330,      --

-- Boston, MA 02111-1307, USA.                                       --

--                                                                   --

-- As a special exception, if other files instantiate generics from  --

-- this unit, or you link this unit with other files to produce an   --

-- executable, this  unit  does not  by itself cause  the resulting  --

-- executable to be covered by the GNU General Public License. This  --

-- exception does not however invalidate any other reasons why the   --

-- executable file  might be covered by the  GNU Public License.     --

-----------------------------------------------------------------------


--  <description>

--  This package provides a hierarchy of objects that return characters

--  that can then be used for different tasks.

--  It is not possible to go backward, nor to previous characters. This

--  interface is intentionally kept minimal, so that it can easily be used

--  with files, sockets, ...

--  </description>


with Unicode;
with Unicode.CES;
with Unicode.CES.Basic_8bit;
with Unicode.CCS;

package Input_Sources is

   type Input_Source is abstract tagged limited private;
   --  General object for reading characters, one at a time.


   type Input_Source_Access is access all Input_Source'Class;

   procedure Next_Char
     (From : in out Input_Source;
      C    : out Unicode.Unicode_Char) is abstract;
   --  Return a single character from From.

   --  This also increments the internal index, so that the nex time this

   --  function is called the next character in the stream is returned.


   function Eof (From : Input_Source) return Boolean is abstract;
   --  Return True if there is no more character to read on the stream


   function Prolog_Size (From : Input_Source) return Natural;
   --  Return the number of characters that were ignored at the beginning

   --  of the stream (for instance because they indicated the encoding used

   --  in the file).


   procedure Set_Encoding
     (Input : in out Input_Source;
      Es    : Unicode.CES.Encoding_Scheme);
   --  Set the encoding associated with the input stream.

   --  This can be used to convert from any type of encoding for the byte

   --  sequence (Utf8, Utf16, ..) and any character set (Latin-1, Unicode,..)

   --  to unicode characters.

   --  Input_Sources are encouraged to guess the encoding whenever possible,

   --  but you can override that default at any time.


   function Get_Encoding (Input : Input_Source)
      return Unicode.CES.Encoding_Scheme;
   --  Return the encoding scheme associated with the input


   procedure Set_Character_Set
     (Input : in out Input_Source;
      Cs    : Unicode.CCS.Character_Set);
   --  Set the character set associated with the stream.

   --  It isn't possible to get the character set automatically for a stream.

   --  As a result, the default one is always considered to be Unicode


   function Get_Character_Set (Input : Input_Source)
      return Unicode.CCS.Character_Set;
   --  Return the character set associated with the input.


   procedure Set_System_Id
     (Input : in out Input_Source;
      Id    : Unicode.CES.Byte_Sequence);
   --  Set the system ID associated with the input source.

   --  Although this is optional, it is still useful since it can be used to

   --  resolve relative URI's from documents.


   function Get_System_Id (Input : Input_Source)
      return Unicode.CES.Byte_Sequence;
   --  Return the system Id.


   procedure Set_Public_Id
     (Input : in out Input_Source;
      Id    : Unicode.CES.Byte_Sequence);
   --  This will be provided as part of the location information, if it is

   --  given.


   function Get_Public_Id (Input : Input_Source)
      return Unicode.CES.Byte_Sequence;
   --  Return the public Id.


   procedure Close (Input : in out Input_Source);
   --  Free the memory allocated in the input.


private
   type Input_Source is abstract tagged limited record
      Prolog_Size : Natural := 0;
      Es          : Unicode.CES.Encoding_Scheme :=
        Unicode.CES.Basic_8bit.Basic_8bit_Encoding;
      Cs          : Unicode.CCS.Character_Set :=
        Unicode.CCS.Unicode_Character_Set;
      Public_Id   : Unicode.CES.Byte_Sequence_Access;
      System_Id   : Unicode.CES.Byte_Sequence_Access;
   end record;
end Input_Sources;