logo

class

compiler::Tokenizer

sys::Obj
  compiler::CompilerSupport
    compiler::Tokenizer
   1  //
   2  // Copyright (c) 2006, Brian Frank and Andy Frank
   3  // Licensed under the Academic Free License version 3.0
   4  //
   5  // History:
   6  //    3 Sep 05  Brian Frank  Creation
   7  //   18 May 06  Brian Frank  Ported from Java to Fan
   8  //
   9  
  10  **
  11  ** Tokenizer inputs a Str and output a list of Tokens
  12  **
  13  class Tokenizer : CompilerSupport
  14  {
  15  
  16  //////////////////////////////////////////////////////////////////////////
  17  // Constructor
  18  //////////////////////////////////////////////////////////////////////////
  19  
  20    **
  21    ** Construct with characters of source file.  The buffer
  22    ** passed must be normalized in that all newlines must be
  23    ** represented strictly as \n and not \r or \r\n (see
  24    ** File.readAllStr).  If isDoc is false, we skip all star-star
  25    ** Fandoc comments.
  26    **
  27    new make(Compiler compiler, Location location, Str buf, Bool isDoc)
  28      : super(compiler)
  29    {
  30      this.buf      = buf
  31      this.filename = location.file
  32      this.isDoc    = isDoc
  33      this.tokens   = TokenVal[,]
  34      this.inStrLiteral = false
  35  
  36      // initialize cur and peek
  37      cur = peek = ' '
  38      if (buf.size > 0) cur  = buf[0]
  39      if (buf.size > 1) peek = buf[1]
  40      pos = 0
  41  
  42      // if first line starts with #, then treat it like an end of
  43      // line, so that Unix guys can specify the executable to run
  44      if (cur === '#')
  45      {
  46        while (true)
  47        {
  48          if (cur === '\n') { consume; break }
  49          if (cur === 0) break
  50          consume
  51        }
  52      }
  53    }
  54  
  55  //////////////////////////////////////////////////////////////////////////
  56  // Access
  57  //////////////////////////////////////////////////////////////////////////
  58  
  59    **
  60    ** Tokenize the entire input into a list of tokens.
  61    **
  62    TokenVal[] tokenize()
  63    {
  64      while (true)
  65      {
  66        tok := next
  67        tokens.add(tok)
  68        if (tok.kind === Token.eof) break
  69      }
  70      return tokens
  71    }
  72  
  73    **
  74    ** Return the next token in the buffer.
  75    **
  76    private TokenVal next()
  77    {
  78      while (true)
  79      {
  80        // save current line
  81        line := this.line
  82        col  := this.col
  83  
  84        // find next token
  85        TokenVal tok := find
  86        if (tok == null) continue
  87  
  88        // fill in token's location
  89        tok.file = filename
  90        tok.line = line
  91        tok.col  = col
  92        tok.newline = lastLine < line
  93  
  94        // save last line
  95        lastLine = line
  96  
  97        return tok
  98      }
  99      return null // TODO - shouldn't need this
 100    }
 101  
 102    **
 103    ** Find the next token or return null.
 104    **
 105    private TokenVal find()
 106    {
 107      // skip whitespace
 108      if (cur.isSpace) { consume; return null }
 109  
 110      // raw string literal r"c:\dir\foo.txt"
 111      if (cur === 'r' && peek === '"' && !inStrLiteral) return rawStr
 112  
 113      // alpha means keyword or identifier
 114      if (cur.isAlpha || cur === '_') return word
 115  
 116      // number or .number (note that + and - are handled as unary operator)
 117      if (cur.isDigit) return number
 118      if (cur === '.' && peek.isDigit) return number
 119  
 120      // str literal
 121      if (cur === '"')  return str
 122      if (cur === '`')  return uri
 123      if (cur === '\'') return ch
 124  
 125      // comments
 126      if (cur === '*' && peek === '*') return docComment
 127      if (cur === '/' && peek === '/') return skipCommentSL
 128      if (cur === '/' && peek === '*') return skipCommentML
 129  
 130      // symbols
 131      return symbol
 132    }
 133  
 134  //////////////////////////////////////////////////////////////////////////
 135  // Word
 136  //////////////////////////////////////////////////////////////////////////
 137  
 138    **
 139    ** Parse a word token: alpha (alpha|number)*
 140    ** Words are either keywords or identifiers
 141    **
 142    private TokenVal word()
 143    {
 144      // store starting position of word
 145      start := pos
 146  
 147      // find end of word to compute length
 148      while (cur.isAlphaNum || cur === '_') consume
 149  
 150      // create Str (gc note this string might now reference buf)
 151      word := buf[start...pos]
 152  
 153      // check keywords
 154      keyword := Token.keywords[word]
 155      if (keyword != null)
 156        return TokenVal.make(keyword)
 157  
 158      // otherwise this is a normal identifier
 159      return TokenVal.make(Token.identifier, word)
 160    }
 161  
 162  //////////////////////////////////////////////////////////////////////////
 163  // Number
 164  //////////////////////////////////////////////////////////////////////////
 165  
 166    **
 167    ** Parse a number literal token.
 168    **
 169    private TokenVal number()
 170    {
 171      Bool neg := false
 172      Int whole := 0
 173      Int wholeCount := 0
 174      Float fraction := 0.0
 175      Int exp := 0
 176      Token tok := Token.intLiteral  // int or float literal
 177  
 178      // check for hex value
 179      if (cur === '0' && peek === 'x')
 180        return hex
 181  
 182      // read whole part
 183      while (cur.isDigit)
 184      {
 185        whole = whole*10 + cur.fromDigit
 186        consume
 187        // it would be nice to actually trap max long value, but
 188        // as a simpler catch all we know decimal numbers must
 189        // never be bigger than 19 digits
 190        wholeCount++
 191        if (wholeCount > 19) throw err("Numeric literal too big")
 192        if (cur === '_') consume
 193      }
 194  
 195      // if dot then read fraction
 196      if (cur === '.' && peek.isDigit)
 197      {
 198        consume
 199        for (Float m := 0.1; cur.isDigit; m /= 10.0)
 200        {
 201          fraction = fraction + cur.fromDigit.toFloat*m
 202          consume
 203          if (cur === '_') consume
 204        }
 205        tok = Token.floatLiteral
 206      }
 207  
 208      // check for exponent
 209      if (cur === 'e' || cur === 'E')
 210      {
 211        consume
 212        Bool negExp := false
 213        if (cur === '-') { consume; negExp = true }
 214        else if (cur === '+') { consume }
 215        if (!cur.isDigit) throw err("Expected exponent digits")
 216        while (cur.isDigit)
 217        {
 218          exp = exp*10 + cur.fromDigit
 219          consume
 220          if (cur === '_') consume
 221        }
 222        if (negExp) exp = -exp
 223        tok = Token.floatLiteral
 224      }
 225  
 226      // check for F suffix
 227      Int dur := null
 228      if (cur === 'f' || cur === 'F')
 229      {
 230        consume
 231        tok = Token.floatLiteral
 232      }
 233      // check if a duration
 234      else
 235      {
 236        if (cur === 'n' && peek === 's') { consume; consume; dur = 1 }
 237        if (cur === 'm' && peek === 's') { consume; consume; dur = 1000000 }
 238        if (cur === 's' && peek === 'e') { consume; consume; if (cur !== 'c') throw err("Expected 'sec' in Duration literal"); consume; dur = 1_000_000_000 }
 239        if (cur === 'm' && peek === 'i') { consume; consume; if (cur !== 'n') throw err("Expected 'min' in Duration literal"); consume; dur = 60_000_000_000 }
 240        if (cur === 'h' && peek === 'r') { consume; consume; dur = 3_600_000_000_000 }
 241        if (cur === 'd' && peek === 'a') { consume; consume; if (cur !== 'y') throw err("Expected 'day' in Duration literal"); consume; dur = 86_400_000_000_000 }
 242      }
 243  
 244      // int literal
 245      if (tok === Token.intLiteral)
 246      {
 247        if (fraction !== 0.0 || exp !== 0) throw err("Int literals may not have fractional or exponent components")
 248        Int val := whole
 249        if (neg) val = -val
 250        if (dur != null)
 251          return TokenVal.make(Token.durationLiteral, Duration.make(val*dur))
 252        else
 253          return TokenVal.make(tok, val)
 254      }
 255  
 256      // float literal
 257      if (tok === Token.floatLiteral)
 258      {
 259        Float val := whole.toFloat + fraction
 260        if (exp !== 0) val = val * 10f.pow(exp.toFloat)
 261        if (neg) val = -val
 262        if (dur != null)
 263          return TokenVal.make(Token.durationLiteral, Duration.make((val*dur.toFloat).toInt))
 264        else
 265          return TokenVal.make(tok, val)
 266      }
 267  
 268      throw err("Internal error")
 269    }
 270  
 271    **
 272    ** Process hex int/long literal starting with 0x
 273    **
 274    TokenVal hex()
 275    {
 276      consume // 0
 277      consume // x
 278  
 279      // read first hex
 280      Int val := cur.fromDigit(16)
 281      if (val == null) throw err("Expecting hex number")
 282      consume
 283      Int nibCount := 1
 284      while (true)
 285      {
 286        Int nib := cur.fromDigit(16)
 287        if (nib == null)
 288        {
 289          if (cur === '_') { consume; continue }
 290          break
 291        }
 292        nibCount++
 293        if (nibCount > 16) throw err("Hex literal too big")
 294        val = (val << 4) + nib;
 295        consume
 296      }
 297  
 298      return TokenVal.make(Token.intLiteral, val)
 299    }
 300  
 301  //////////////////////////////////////////////////////////////////////////
 302  // String
 303  //////////////////////////////////////////////////////////////////////////
 304  
 305    **
 306    ** Parse a raw string literal token.
 307    **
 308    private TokenVal rawStr()
 309    {
 310      // consume opening 'r' and quote
 311      consume
 312      consume
 313  
 314      // string contents
 315      s := StrBuf.make
 316      while (cur !==  '"')
 317      {
 318        if (cur <= 0) throw err("Unexpected end of string literal")
 319        s.addChar(cur)
 320        consume
 321      }
 322  
 323      // close quote
 324      consume
 325  
 326      return TokenVal.make(Token.strLiteral, s.toStr)
 327    }
 328  
 329    **
 330    ** Parse a string literal token.
 331    **
 332    private TokenVal str()
 333    {
 334      inStrLiteral = true
 335      try
 336      {
 337        // consume opening quote
 338        consume
 339  
 340        // store starting position
 341        s := StrBuf.make
 342  
 343        // loop until we find end of string
 344        interpolated := false
 345        while (true)
 346        {
 347          if (cur === '"') { consume; break }
 348          if (cur === 0) throw err("Unexpected end of string")
 349          if (cur === '$')
 350          {
 351            // if we have detected an interpolated string, then
 352            // insert opening paren to treat whole string atomically
 353            if (!interpolated)
 354            {
 355              interpolated = true
 356              tokens.add(makeVirtualToken(Token.lparen))
 357            }
 358  
 359            // process interpolated string, it returns null
 360            // if at end of string literal
 361            if (!strInterpolation(s.toStr))
 362            {
 363              tokens.add(makeVirtualToken(Token.rparen))
 364              return null
 365            }
 366  
 367            s.clear
 368          }
 369          else if (cur === '\\')
 370          {
 371            s.add(escape.toChar)
 372          }
 373          else
 374          {
 375            s.addChar(cur)
 376            consume
 377          }
 378        }
 379  
 380        // if interpolated then we add rparen to treat whole atomically
 381        if (interpolated)
 382        {
 383          tokens.add(makeVirtualToken(Token.strLiteral, s.toStr))
 384          tokens.add(makeVirtualToken(Token.rparen))
 385          return null
 386        }
 387        else
 388        {
 389          return TokenVal.make(Token.strLiteral, s.toStr)
 390        }
 391      }
 392      finally
 393      {
 394        inStrLiteral = false
 395      }
 396    }
 397  
 398    **
 399    ** When we hit a $ inside a string it indicates an embedded
 400    ** expression.  We make this look like a stream of tokens
 401    ** such that:
 402    **   "a ${b} c" -> "a " + b + " c"
 403    ** Return true if more in the string literal.
 404    **
 405    private Bool strInterpolation(Str s)
 406    {
 407      consume // $
 408      tokens.add(makeVirtualToken(Token.strLiteral, s))
 409      tokens.add(makeVirtualToken(Token.plus))
 410  
 411      // if { we allow an expression b/w {...}
 412      if (cur === '{')
 413      {
 414        tokens.add(makeVirtualToken(Token.lparen))
 415        consume
 416        while (true)
 417        {
 418          if (cur === '"') throw err("Unexpected end of string, missing }")
 419          tok := next
 420          if (tok.kind == Token.rbrace) break
 421          tokens.add(tok)
 422        }
 423        tokens.add(makeVirtualToken(Token.rparen))
 424      }
 425  
 426      // else also allow a single identifier with
 427      // dotted accessors x, x.y, x.y.z
 428      else
 429      {
 430        tok := next
 431        if (tok.kind != Token.identifier) throw err("Expected identifier after \$")
 432        tokens.add(tok)
 433        while (true)
 434        {
 435          if (cur !== '.') break
 436          tokens.add(next) // dot
 437          tok = next
 438          if (tok.kind !== Token.identifier) throw err("Expected identifier")
 439          tokens.add(tok)
 440        }
 441      }
 442  
 443      // if at end of string, all done
 444      if (cur === '\"')
 445      {
 446        consume
 447        return false
 448      }
 449  
 450      // add plus and return true to keep chugging
 451      tokens.add(makeVirtualToken(Token.plus))
 452      return true
 453    }
 454  
 455    **
 456    ** Create a virtual token for string interpolation.
 457    **
 458    private TokenVal makeVirtualToken(Token kind, Obj value := null)
 459    {
 460      tok := TokenVal.make(kind, value)
 461      tok.file  = filename
 462      tok.line  = line
 463      tok.col   = col
 464      return tok
 465    }
 466  
 467  //////////////////////////////////////////////////////////////////////////
 468  // Uri
 469  //////////////////////////////////////////////////////////////////////////
 470  
 471    **
 472    ** Parse a uri literal token.
 473    **
 474    private TokenVal uri()
 475    {
 476      // consume opening backtick
 477      consume
 478  
 479      // store starting position
 480      s := StrBuf.make
 481  
 482      // loop until we find end of string
 483      while (true)
 484      {
 485        ch := cur
 486        if (ch === '`') { consume; break }
 487        if (ch === 0 || ch === '\n') throw err("Unexpected end of uri")
 488        if (ch === '$') throw err("Uri interpolation not supported yet")
 489        if (ch === '\\') ch = escape
 490        else consume
 491        s.addChar(ch)
 492      }
 493  
 494      return TokenVal.make(Token.uriLiteral, s.toStr)
 495    }
 496  
 497  //////////////////////////////////////////////////////////////////////////
 498  // Char
 499  //////////////////////////////////////////////////////////////////////////
 500  
 501    **
 502    ** Parse a char literal token.
 503    **
 504    private TokenVal ch()
 505    {
 506      // consume opening quote
 507      consume
 508  
 509      // if \ then process as escape
 510      Int c
 511      if (cur === '\\')
 512      {
 513        c = escape
 514      }
 515      else
 516      {
 517        c = cur
 518        consume
 519      }
 520  
 521      // expecting ' quote
 522      if (cur !== '\'') throw err("Expecting ' close of char literal")
 523      consume
 524  
 525      return TokenVal.make(Token.intLiteral, c)
 526    }
 527  
 528    **
 529    ** Parse an escapse sequence which starts with a \
 530    **
 531    Int escape()
 532    {
 533      // consume slash
 534      if (cur !== '\\') throw err("Internal error")
 535      consume
 536  
 537      // check basics
 538      switch (cur)
 539      {
 540        case 'b':   consume; return '\b'
 541        case 'f':   consume; return '\f'
 542        case 'n':   consume; return '\n'
 543        case 'r':   consume; return '\r'
 544        case 't':   consume; return '\t'
 545        case '"':   consume; return '"'
 546        case '$':   consume; return '$'
 547        case '\'':  consume; return '\''
 548        case '`':   consume; return '`'
 549        case '\\':  consume; return '\\'
 550      }
 551  
 552      // check for uxxxx
 553      if (cur === 'u')
 554      {
 555        consume
 556        n3 := cur.fromDigit(16); consume
 557        n2 := cur.fromDigit(16); consume
 558        n1 := cur.fromDigit(16); consume
 559        n0 := cur.fromDigit(16); consume
 560        if (n3 == null || n2 == null || n1 == null || n0 == null) throw err("Invalid hex value for \\uxxxx")
 561        return ((n3 << 12) | (n2 << 8) | (n1 << 4) | n0)
 562      }
 563  
 564      throw err("Invalid escape sequence")
 565    }
 566  
 567  //////////////////////////////////////////////////////////////////////////
 568  // Comments
 569  //////////////////////////////////////////////////////////////////////////
 570  
 571    **
 572    ** Skip a single line // comment
 573    **
 574    private TokenVal skipCommentSL()
 575    {
 576      consume  // first slash
 577      consume  // next slash
 578      while (true)
 579      {
 580        if (cur === '\n') { consume; break }
 581        if (cur === 0) break
 582        consume
 583      }
 584      return null
 585    }
 586  
 587    **
 588    ** Skip a multi line /* comment.  Note unlike C/Java,
 589    ** slash/star comments can be nested.
 590    **
 591    private TokenVal skipCommentML()
 592    {
 593      consume   // first slash
 594      consume   // next slash
 595      depth := 1
 596      while (true)
 597      {
 598        if (cur === '*' && peek === '/') { consume; consume; depth--; if (depth <= 0) break }
 599        if (cur === '/' && peek === '*') { consume; consume; depth++; continue }
 600        if (cur === 0) break
 601        consume
 602      }
 603      return null
 604    }
 605  
 606    **
 607    ** Parse a Javadoc style comment into a documentation comment token.
 608    **
 609    private TokenVal docComment()
 610    {
 611      // if doc is off, then just skip the line and be done
 612      if (!isDoc) { skipCommentSL; return null }
 613  
 614      while (cur === '*') consume
 615      if (cur === ' ') consume
 616  
 617      // parse comment
 618      lines := Str[,]
 619      s := StrBuf.make
 620      while (cur > 0)
 621      {
 622        // add to buffer and advance
 623        c := cur
 624        consume
 625  
 626        // if not at newline, then loop
 627        if (c !== '\n')
 628        {
 629          s.addChar(c)
 630          continue
 631        }
 632  
 633        // add line and reset buffer (but don't add leading empty lines)
 634        line := s.toStr
 635        if (!lines.isEmpty || !line.trim.isEmpty) lines.add(line)
 636        s.clear
 637  
 638        // we at a newline, check for leading whitespace(0+)/star(2+)/whitespace(1)
 639        while (cur === ' ' || cur === '\t') consume
 640        if (cur !== '*' || peek !== '*') break
 641        while (cur === '*') consume
 642        if (cur === ' ' || cur === '\t') consume
 643      }
 644      lines.add(s.toStr)
 645  
 646      // strip trailing empty lines
 647      while (!lines.isEmpty)
 648        if (lines.last.trim.isEmpty) lines.removeAt(-1)
 649        else break
 650  
 651      return TokenVal.make(Token.docComment, lines)
 652    }
 653  
 654  //////////////////////////////////////////////////////////////////////////
 655  // Symbol
 656  //////////////////////////////////////////////////////////////////////////
 657  
 658    **
 659    ** Parse a symbol token (typically into an operator).
 660    **
 661    private TokenVal symbol()
 662    {
 663      c := cur
 664      consume
 665      switch (c)
 666      {
 667        case '\r':
 668          throw err("Carriage return \\r not allowed in source")
 669        case '!':
 670          if (cur === '=')
 671          {
 672            consume
 673            if (cur === '=') { consume; return TokenVal.make(Token.notSame) }
 674            return TokenVal.make(Token.notEq)
 675          }
 676          return TokenVal.make(Token.bang)
 677        case '#':
 678          return TokenVal.make(Token.pound)
 679        case '%':
 680          if (cur === '=') { consume; return TokenVal.make(Token.assignPercent) }
 681          return TokenVal.make(Token.percent)
 682        case '&':
 683          if (cur === '=') { consume; return TokenVal.make(Token.assignAmp) }
 684          if (cur === '&') { consume; return TokenVal.make(Token.doubleAmp) }
 685          return TokenVal.make(Token.amp)
 686        case '(':
 687          return TokenVal.make(Token.lparen)
 688        case ')':
 689          return TokenVal.make(Token.rparen)
 690        case '*':
 691          if (cur === '=') { consume; return TokenVal.make(Token.assignStar) }
 692          return TokenVal.make(Token.star)
 693        case '+':
 694          if (cur === '=') { consume; return TokenVal.make(Token.assignPlus) }
 695          if (cur === '+') { consume; return TokenVal.make(Token.increment) }
 696          return TokenVal.make(Token.plus)
 697        case ',':
 698          return TokenVal.make(Token.comma)
 699        case '-':
 700          if (cur === '>') { consume; return TokenVal.make(Token.arrow) }
 701          if (cur === '-') { consume; return TokenVal.make(Token.decrement) }
 702          if (cur === '=') { consume; return TokenVal.make(Token.assignMinus) }
 703          return TokenVal.make(Token.minus)
 704        case '.':
 705          if (cur === '.')
 706          {
 707            consume
 708            if (cur === '.') { consume; return TokenVal.make(Token.dotDotDot) }
 709            return TokenVal.make(Token.dotDot)
 710          }
 711          return TokenVal.make(Token.dot)
 712        case '/':
 713          if (cur === '=') { consume; return TokenVal.make(Token.assignSlash) }
 714          return TokenVal.make(Token.slash)
 715        case ':':
 716          if (cur === ':') { consume; return TokenVal.make(Token.doubleColon) }
 717          if (cur === '=') { consume; return TokenVal.make(Token.defAssign) }
 718          return TokenVal.make(Token.colon)
 719        case ';':
 720          return TokenVal.make(Token.semicolon)
 721        case '<':
 722          if (cur === '=')
 723          {
 724            consume
 725            if (cur === '>') { consume; return TokenVal.make(Token.cmp) }
 726            return TokenVal.make(Token.ltEq)
 727          }
 728          if (cur === '<')
 729          {
 730            consume
 731            if (cur === '=') { consume; return TokenVal.make(Token.assignLshift) }
 732            return TokenVal.make(Token.lshift)
 733          }
 734          return TokenVal.make(Token.lt)
 735        case '=':
 736          if (cur === '=')
 737          {
 738            consume
 739            if (cur === '=') { consume; return TokenVal.make(Token.same) }
 740            return TokenVal.make(Token.eq)
 741          }
 742          return TokenVal.make(Token.assign)
 743        case '>':
 744          if (cur === '=') { consume; return TokenVal.make(Token.gtEq) }
 745          if (cur === '>')
 746          {
 747            consume
 748            if (cur === '=') { consume; return TokenVal.make(Token.assignRshift) }
 749            return TokenVal.make(Token.rshift)
 750          }
 751          return TokenVal.make(Token.gt)
 752        case '?':
 753          return TokenVal.make(Token.question)
 754        case '@':
 755          return TokenVal.make(Token.at)
 756        case '[':
 757          return TokenVal.make(Token.lbracket)
 758        case ']':
 759          return TokenVal.make(Token.rbracket)
 760        case '^':
 761          if (cur === '=') { consume; return TokenVal.make(Token.assignCaret) }
 762          return TokenVal.make(Token.caret)
 763        case '{':
 764          return TokenVal.make(Token.lbrace)
 765        case '|':
 766          if (cur === '|') { consume; return TokenVal.make(Token.doublePipe) }
 767          if (cur === '=') { consume; return TokenVal.make(Token.assignPipe) }
 768          return TokenVal.make(Token.pipe)
 769        case '}':
 770          return TokenVal.make(Token.rbrace)
 771        case '~':
 772          return TokenVal.make(Token.tilde)
 773      }
 774  
 775      if (c === 0)
 776        return TokenVal.make(Token.eof)
 777  
 778      throw err("Unexpected symbol: " + c.toChar + " (0x" + c.toHex + ")")
 779    }
 780  
 781  //////////////////////////////////////////////////////////////////////////
 782  // Utils
 783  //////////////////////////////////////////////////////////////////////////
 784  
 785    **
 786    ** Return a CompilerException for current location in source.
 787    **
 788    override CompilerErr err(Str msg, Location loc := null)
 789    {
 790      if (loc == null) loc = Location.make(filename, line, col)
 791      return super.err(msg, loc);
 792    }
 793  
 794  ////////////////////////////////////////////////////////////////
 795  // Consume
 796  ////////////////////////////////////////////////////////////////
 797  
 798    **
 799    ** Consume the cur char and advance to next char in buffer:
 800    **  - updates cur and peek fields
 801    **  - updates the line and col count
 802    **  - end of file, sets fields to 0
 803    **
 804    private Void consume()
 805    {
 806      // if cur is a line break, then advance line number,
 807      // because the char we are getting ready to make cur
 808      // is the first char on the next line
 809      if (cur === '\n')
 810      {
 811        line++
 812        col = 1
 813      }
 814      else
 815      {
 816        col++
 817      }
 818  
 819      // get the next character from the buffer, any
 820      // problems mean that we have read past the end
 821      cur = peek
 822      pos++
 823      if (pos+1 < buf.size)
 824        peek = buf[pos+1] // next peek is cur+1
 825      else
 826        peek = 0
 827    }
 828  
 829  //////////////////////////////////////////////////////////////////////////
 830  // Test
 831  //////////////////////////////////////////////////////////////////////////
 832  
 833    static Void main()
 834    {
 835      t1 := Duration.now
 836      files := File.make(`/dev/fan/src/sysTest/fan/`).list
 837      files.each |File f|
 838      {
 839        tok := Tokenizer.make(null, Location.make(f.name), f.readAllStr, false).tokenize
 840        echo("-- " + f + " [" + tok.size + "]")
 841      }
 842      t2 := Duration.now
 843      echo("Time: " + (t2-t1).toMillis)
 844      echo("Time: " + (t2-t1))
 845    }
 846  
 847  //////////////////////////////////////////////////////////////////////////
 848  // Fields
 849  //////////////////////////////////////////////////////////////////////////
 850  
 851    private Str buf           // buffer
 852    private Int pos           // index into buf for cur
 853    private Bool isDoc        // return documentation comments or if false ignore them
 854    private Str filename      // source file name
 855    private Int line := 1     // pos line number
 856    private Int col := 1      // pos column number
 857    private Int cur           // current char
 858    private Int peek          // next char
 859    private Int lastLine      // line number of last token returned from next()
 860    private TokenVal[] tokens // token accumulator
 861    private Bool inStrLiteral // return if inside a string literal token
 862  
 863  
 864  }