File : adagio-unicode.adb


------------------------------------------------------------------------------

--                         ADAGIO - ADALID - AENEA.                         --

--                                                                          --

--                            Copyright (C) 2003                            --

--                                 A. Mosteo.                               --

--                                                                          --

--  Authors: A. Mosteo. (adagio@mosteo.com)                                 --

--                                                                          --

--  If you have any questions in regard to this software, please address    --

--  them to the above email.                                                --

--                                                                          --

--  This program is free software; you can redistribute it and/or modify    --

--  it under the terms of the GNU General Public License as published by    --

--  the Free Software Foundation; either version 2 of the License, or (at   --

--  your option) any later version.                                         --

--                                                                          --

--  This program is distributed in the hope that it will be useful, but     --

--  WITHOUT ANY WARRANTY; without even the implied warranty of              --

--  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU       --

--  General Public License for more details.                                --

--                                                                          --

--  You should have received a copy of the GNU General Public License       --

--  along with this library; if not, write to the Free Software Foundation, --

--  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.          --

--                                                                          --

--  You are not allowed to use any part of this code to develop a program   --

--  whose output would be used to harass or prosecute other users of the    --

--  networks Adagio connects with. All data collected with Adagio or a tool --

--  containing Adagio code about other network users must remain            --

--  confidential and cannot be made public by any mean, nor be used to      --

--  harass or legally prosecute these users.                                --

------------------------------------------------------------------------------

--  $Id: adagio-unicode.adb,v 1.4 2004/01/21 21:05:42 Jano Exp $


--  Helper functions to deal with unicode strings


with Adagio.Network.Endian;

with Unicode.CCS.Iso_8859_1;  use Unicode.CCS;
with Unicode.CES.Utf8;        use Unicode.CES;
with Unicode.CES.Basic_8bit;

package body Adagio.Unicode is

   -- Decode an utf8 string, ensuring its Latin1:

   function From_utf8 (this : in String) return String is
   begin
      return
         Basic_8bit.From_utf32 (
            utf8.To_Utf32 (
               utf8.To_unicode_LE (
                  this, 
                  Cs => Iso_8859_1.Iso_8859_1_character_set)));
   exception
      when Invalid_code | Invalid_encoding =>
         raise Invalid_encoding;
   end From_utf8;

   -- Decode a raw 16-bit unicode string: 

   -- As we support only latin1, a high byte /= 0 raises Constraint_error

   function From_unicode16 (this : in String; Big_endian : in Boolean) 
      return String is
      Pos    : Integer := this'First;
      Result : Ustring;
   begin
      while Pos < this'Last loop
         if Big_endian then
            if Character'Pos (this (Pos)) /= 0 then
               raise Constraint_error;
            end if;
            Pos := Pos + 1;
         end if;

         ASU.Append (Result, this (Pos));
         Pos := Pos + 1;

         if not Big_endian then
            if Character'Pos (this (Pos)) /= 0 then
               raise Constraint_error;
            end if;
            Pos := Pos + 1;
         end if;
      end loop;

      return S (Result);
   end From_unicode16;

   -- Returns a Latin1 string from a G2 encoded string

   -- It can be a UTF8 encoded string or a

   -- 16 bit (endianness applies then) raw unicode character string.

   -- The 16#ff# selector must be the first character in that case.

   -- May raise exception if Latin1 can't hold the resulting string.

   function G2_to_string (this : in String; Big_endian : in Boolean) 
      return String is
   begin
      if This'Length = 0 then
         return "";
      elsif this (this'First) = Character'Val (16#ff#) then
         return From_unicode16 (
            this (this'First + 1 .. this'Last), Big_endian);
      else
         return From_utf8 (this);
      end if;
   end G2_to_string;

   -- Returns a Utf8 encoded string from Latin1 (Ada default)

   function To_utf8 (this : in String) return String is
      Result : Ustring;
   begin
      for N in this'Range loop
         -- We can do that directly because Latin1 maps directly into unicode.

         ASU.Append (Result, Utf8.Encode (Character'Pos (this (N))));
      end loop;

      return S (Result);
   end To_utf8;

end Adagio.Unicode;