File : unicode-ces-utf32.adb


-----------------------------------------------------------------------

--                XML/Ada - An XML suite for Ada95                   --

--                                                                   --

--                       Copyright (C) 2001                          --

--                            ACT-Europe                             --

--                       Author: Emmanuel Briot                      --

--                                                                   --

-- This library is free software; you can redistribute it and/or     --

-- modify it under the terms of the GNU General Public               --

-- License as published by the Free Software Foundation; either      --

-- version 2 of the License, or (at your option) any later version.  --

--                                                                   --

-- This library is distributed in the hope that it will be useful,   --

-- but WITHOUT ANY WARRANTY; without even the implied warranty of    --

-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU --

-- General Public License for more details.                          --

--                                                                   --

-- You should have received a copy of the GNU General Public         --

-- License along with this library; if not, write to the             --

-- Free Software Foundation, Inc., 59 Temple Place - Suite 330,      --

-- Boston, MA 02111-1307, USA.                                       --

--                                                                   --

-- As a special exception, if other files instantiate generics from  --

-- this unit, or you link this unit with other files to produce an   --

-- executable, this  unit  does not  by itself cause  the resulting  --

-- executable to be covered by the GNU General Public License. This  --

-- exception does not however invalidate any other reasons why the   --

-- executable file  might be covered by the  GNU Public License.     --

-----------------------------------------------------------------------


with Unicode.Names.Basic_Latin;  use Unicode.Names.Basic_Latin;
with Unicode.CCS;                use Unicode.CCS;

package body Unicode.CES.Utf32 is

   ------------

   -- Encode --

   ------------


   function Encode (Char : Unicode_Char) return Utf32_LE_String is
   begin
      return Character'Val (Char and 16#000000FF#)
        & Character'Val ((Char and 16#0000FF00#) / (2 ** 8))
        & Character'Val ((Char and 16#00FF0000#) / (2 ** 16))
        & Character'Val ((Char and 16#FF000000#) / (2 ** 24));
   end Encode;

   ---------------

   -- Encode_BE --

   ---------------


   function Encode_BE (Char : Unicode_Char) return Utf32_LE_String is
   begin
      return Character'Val ((Char and 16#FF000000#) / (2 ** 24))
        & Character'Val ((Char and 16#00FF0000#) / (2 ** 16))
        & Character'Val ((Char and 16#0000FF00#) / (2 ** 8))
        &  Character'Val (Char and 16#000000FF#);
   end Encode_BE;

   ----------

   -- Read --

   ----------


   function Read (Str : Utf32_LE_String; Index : Positive)
      return Unicode_Char is
   begin
      if Index > Str'Last - 3 then
         return Nul;
      end if;
      return Character'Pos (Str (Index))
        + Character'Pos (Str (Index + 1)) * (2 ** 8)
        + Character'Pos (Str (Index + 2)) * (2 ** 16)
        + Character'Pos (Str (Index + 3)) * (2 ** 24);
   end Read;

   -------------

   -- Read_BE --

   -------------


   function Read_BE (Str : Utf32_BE_String; Index : Positive)
      return Unicode_Char is
   begin
      if Index > Str'Last - 3 then
         return Nul;
      end if;
      return Character'Pos (Str (Index + 3))
        + Character'Pos (Str (Index + 2)) * (2 ** 8)
        + Character'Pos (Str (Index + 1)) * (2 ** 16)
        + Character'Pos (Str (Index)) * (2 ** 24);
   end Read_BE;

   -----------

   -- Width --

   -----------


   function Width (Char : Unicode_Char) return Natural is
   begin
      return 4;
   end Width;

   ------------

   -- Length --

   ------------


   function Length (Str : Utf32_String) return Natural is
   begin
      return Str'Length / 4;
   end Length;

   -----------------

   -- To_Utf32_LE --

   -----------------


   function To_Unicode_LE
     (Str   : Utf32_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Utf32_LE_String
   is
      Offset : Natural;
      O : Byte_Order := Order;
      J : Positive := Str'First;
      S : Utf32_LE_String (1 .. Str'Length);
      C : Unicode_Char;
      BOM : Bom_Type;

   begin
      Read_Bom (Str, Offset, BOM);

      case BOM is
         when Utf32_LE => O := Low_Byte_First;
         when Utf32_BE => O := High_Byte_First;
         when Unknown  => null;
         when others   => raise Invalid_Encoding;
      end case;

      if O = Low_Byte_First then
         if Cs.To_Unicode = Identity'Access then
            return Str (Str'First + Offset .. Str'Last);
         else
            J := J + Offset;
            while J <= Str'Last loop
               C := Cs.To_Unicode (Read (Str, J));
               S (J .. J + 3) := Encode (C);
               J := J + 4;
            end loop;
            return S (S'First + Offset .. S'Last);
         end if;
      else
         J := J + Offset;
         if Cs.To_Unicode = Identity'Access then
            while J <= Str'Last loop
               S (J + 3) := Str (J);
               S (J + 2) := Str (J + 1);
               S (J + 1) := Str (J + 2);
               S (J)     := Str (J + 3);
               J := J + 4;
            end loop;
         else
            while J <= Str'Last loop
               C := Cs.To_Unicode (Read_BE (Str, J));
               S (J .. J + 3) := Encode (C);
               J := J + 4;
            end loop;
         end if;
         return S (S'First + Offset .. S'Last);
      end if;
   end To_Unicode_LE;

   -----------

   -- To_CS --

   -----------


   function To_CS
     (Str   : Utf32_LE_String;
      Cs    : Unicode.CCS.Character_Set := Unicode.CCS.Unicode_Character_Set;
      Order : Byte_Order := Default_Byte_Order) return Utf32_String
   is
      J : Positive := Str'First;
      S : Utf32_LE_String (1 .. Str'Length);
      C : Unicode_Char;

   begin
      if Order = Low_Byte_First then
         if Cs.To_CS = Identity'Access then
            return Str;
         else
            while J <= Str'Last loop
               C := Cs.To_CS (Read (Str, J));
               S (J .. J + 3) := Encode (C);
               J := J + 4;
            end loop;
            return S;
         end if;
      else
         if Cs.To_CS = Identity'Access then
            while J <= Str'Last loop
               S (J + 3) := Str (J);
               S (J + 2) := Str (J + 1);
               S (J + 1) := Str (J + 2);
               S (J)     := Str (J + 3);
               J := J + 4;
            end loop;
         else
            while J <= Str'Last loop
               C := Cs.To_CS (Read_BE (Str, J));
               S (J .. J + 3) := Encode_BE (C);
               J := J + 4;
            end loop;
         end if;
         return S;
      end if;
   end To_CS;

end Unicode.CES.Utf32;