1. with Ada.Streams;                       use Ada.Streams; 
  2. with Ada.Streams.Stream_IO;             use Ada.Streams.Stream_IO; 
  3. with Interfaces;                        use Interfaces; 
  4.  
  5. private with Ada.Containers.Doubly_Linked_Lists; 
  6. private with Ada.Unchecked_Deallocation; 
  7.  
  8. package Tokens.Tokenizers is 
  9.  
  10.     -- A Tokenizer reads from an input stream and splits the text using a set of 
  11.     -- token-delimiter characters to returns a series of Token objects. If the 
  12.     -- stream contains character sequences that are not recognizable tokens, or 
  13.     -- are malformed tokens, an exception will be raised and the erroneous text 
  14.     -- discarded. 
  15.     type Tokenizer is new Limited_Object with private; 
  16.     type A_Tokenizer is access all Tokenizer'Class; 
  17.  
  18.     -- Creates a new Tokenizer. 
  19.     function Create_Tokenizer return A_Tokenizer; 
  20.  
  21.     -- Returns the tokenizer's current location in the input stream (or the 
  22.     -- location of the next token to be returned, if tokens have been pushed 
  23.     -- back.) 
  24.     function Get_Location( this : not null access Tokenizer'Class ) return Token_Location; 
  25.  
  26.     -- Returns the next Token read from the input stream. If tokens were 
  27.     -- previously pushed back to the tokenizer, the most recent will be returned 
  28.     -- now instead. Token_Exception will be raised if the input stream contents 
  29.     -- can't be recognized as a token or the token is malformed. 
  30.     function Get_Next( this : not null access Tokenizer'Class ) return A_Token; 
  31.  
  32.     -- Returns the most recent Token received from the Tokenizer. The next call 
  33.     -- to Get_Next will return 'token' instead of reading from the token stream. 
  34.     -- Multiple tokens can be returned to the Tokenizer, in the order in which 
  35.     -- they were originally received by the caller. 'token' will be consumed. 
  36.     procedure Push_Back( this  : not null access Tokenizer'Class; 
  37.                          token : in out A_Token ); 
  38.     pragma Precondition( token /= null ); 
  39.     pragma Postcondition( token = null ); 
  40.  
  41.     -- Sets the input stream for reading characters. The state of the Tokenizer 
  42.     -- will also be reset. If 'stream' is null, the Tokenizer's input will be 
  43.     -- cleared. 
  44.     procedure Set_Input( this   : not null access Tokenizer'Class; 
  45.                          stream : Stream_Access ); 
  46.  
  47.     -- Deletes the Tokenizer. 
  48.     procedure Delete( this : in out A_Tokenizer ); 
  49.     pragma Postcondition( this = null ); 
  50.  
  51.     -- Raised when a token is malformed or unrecognized. 
  52.     Token_Exception : exception; 
  53.  
  54. private 
  55.  
  56.     -- Raised when the input stream is empty. 
  57.     End_Error : exception renames Ada.Streams.Stream_IO.End_Error; 
  58.  
  59.     CT_ANY        : constant := 0; 
  60.     CT_WHITESPACE : constant := 1; 
  61.     CT_DELIMITER  : constant := 2; 
  62.  
  63.     type Character_Type_Array is array(Character) of Unsigned_8; 
  64.  
  65.     -- charTypes is populated at elaboration time and constant thereafter 
  66.     charTypes : Character_Type_Array := Character_Type_Array'(others => CT_ANY); 
  67.  
  68.     -- Returns True if 'c' is a token delimiter character. 
  69.     function Is_Delimiter( c : Character ) return Boolean; 
  70.  
  71.     -- Returns True if 'c' is a whitespace character. 
  72.     function Is_Whitespace( c : Character ) return Boolean; 
  73.  
  74.     ---------------------------------------------------------------------------- 
  75.  
  76.     package Token_Lists is new Ada.Containers.Doubly_Linked_Lists( A_Token, "=" ); 
  77.  
  78.     type Character_Array is array(Integer range <>) of Character; 
  79.     type A_Character_Array is access all Character_Array; 
  80.  
  81.     type A_String is access all String; 
  82.  
  83.     procedure Delete is new Ada.Unchecked_Deallocation( String, A_String ); 
  84.  
  85.     ---------------------------------------------------------------------------- 
  86.  
  87.     type Tokenizer is new Limited_Object with 
  88.         record 
  89.             stream         : Stream_Access := null; 
  90.             loc            : Token_Location := (line => 1, col => 1); 
  91.             tokenLoc       : Token_Location; 
  92.             peeked         : Boolean := False; 
  93.             peekChar       : Character := ASCII.NUL; 
  94.             returnedTokens : Token_Lists.List; 
  95.             tokenBuf       : A_String := new String(1..32); 
  96.         end record; 
  97.  
  98.     procedure Delete( this : in out Tokenizer ); 
  99.  
  100.     -- Enlarges the token string buffer by a factor of 2. 
  101.     procedure Grow_Buffer( this : not null access Tokenizer'Class ); 
  102.  
  103.     -- Marks the current parsing location. 
  104.     procedure Mark_Token_Location( this : not null access Tokenizer'Class ); 
  105.  
  106.     -- Returns the next character from the input stream. Raises End_Error if the 
  107.     -- stream is empty. 
  108.     function Read( this : not null access Tokenizer'Class ) return Character; 
  109.  
  110. end Tokens.Tokenizers;