logo

class

fandoc::FandocParser

sys::Obj
  fandoc::FandocParser
   1  //
   2  // Copyright (c) 2007, Brian Frank and Andy Frank
   3  // Licensed under the Academic Free License version 3.0
   4  //
   5  // History:
   6  //   17 Feb 07  Brian Frank  Creation
   7  //
   8  
   9  **
  10  ** FandocParser translate fandoc text into an in-memory
  11  ** representation of the document.
  12  **
  13  class FandocParser
  14  {
  15  
  16  //////////////////////////////////////////////////////////////////////////
  17  // Parser
  18  //////////////////////////////////////////////////////////////////////////
  19  
  20    **
  21    ** Parse the document from the specified in stream into an in-memory
  22    ** tree structure.  If close is true, the stream is guaranteed to be closed.
  23    **
  24    Doc parse(Str filename, InStream in, Bool close := true)
  25    {
  26      this.filename = filename
  27      readLines(in, close)
  28  
  29      doc := Doc.make
  30      header(doc)
  31      while (curt !== LineType.eof)
  32        doc.children.add(topBlock)
  33  
  34      lines = null
  35      return doc
  36    }
  37  
  38  //////////////////////////////////////////////////////////////////////////
  39  // Header
  40  //////////////////////////////////////////////////////////////////////////
  41  
  42    private Void header(Doc doc)
  43    {
  44      skipBlankLines
  45      while (curt !== LineType.eof && cur.startsWith("**"))
  46      {
  47        colon := cur.index(":")
  48        if (colon != null)
  49        {
  50          key := cur[2...colon].trim
  51          val := cur[colon+1..-1].trim
  52          doc.meta[key] = val
  53        }
  54        else
  55        {
  56          if (!cur.startsWith("****")) break
  57        }
  58        consume
  59      }
  60      skipBlankLines
  61    }
  62  
  63  //////////////////////////////////////////////////////////////////////////
  64  // Block
  65  //////////////////////////////////////////////////////////////////////////
  66  
  67    private DocElem topBlock()
  68    {
  69      switch (peekt)
  70      {
  71        case LineType.h1:
  72        case LineType.h2:
  73        case LineType.h3:
  74          return heading
  75      }
  76  
  77      return block(0)
  78    }
  79  
  80    private DocElem heading()
  81    {
  82      level := peekt.headingLevel
  83      h := Heading.make(level)
  84      curStart = 0
  85      formattedText(h)
  86      consume
  87      skipBlankLines
  88      return h
  89    }
  90  
  91    private DocElem block(Int indent)
  92    {
  93      switch (curt)
  94      {
  95        case LineType.ol:
  96          return ol
  97        case LineType.ul:
  98          return ul
  99        case LineType.blockquote:
 100          return blockquote
 101        case LineType.normal:
 102          if (curIndent >= indent+2)
 103            return pre
 104          else
 105            return para
 106        default:
 107          throw Err.make(curt.toStr)
 108      }
 109    }
 110  
 111    private DocElem para()
 112    {
 113      para := Para.make
 114  
 115      // if the first word is all capitals followed
 116      // by a colon then it is a admonition such as NOTE:
 117      first := cur.trim.split(" ").first
 118      if (first[-1] == ':')
 119      {
 120        first = first[0..-2]
 121        if (first.all |Int ch->Bool| { return ch.isUpper })
 122        {
 123          para.admonition = first
 124          curStart = cur.index(":") + 1
 125        }
 126      }
 127  
 128      return formattedText(para)
 129    }
 130  
 131    private DocElem blockquote()
 132    {
 133      return formattedText(BlockQuote.make)
 134    }
 135  
 136    private DocElem pre()
 137    {
 138      // first line defines left margin
 139      indent := curIndent
 140      buf := StrBuf.make(256)
 141      buf.add(cur[indent..-1])
 142      consume
 143  
 144      while (true)
 145      {
 146        // read in preformatted lines of code
 147        while (curt === LineType.normal && curIndent >= indent)
 148        {
 149          buf.add("\n").add(cur[indent..-1])
 150          consume
 151        }
 152  
 153        // skip blanks but keep track of count
 154        blanks := 0
 155        while (curt === LineType.blank) { consume; blanks++ }
 156  
 157        // if more code, then add blank lines and continue
 158        if (curt === LineType.normal && curIndent >= indent)
 159          blanks.times |,| { buf.add("\n") }
 160        else
 161          break
 162      }
 163  
 164      pre := Pre.make
 165      pre.children.add(DocText.make(buf.toStr))
 166      return pre
 167    }
 168  
 169    private DocElem ol()
 170    {
 171      style := OrderedListStyle.fromFirstChar(cur.trim[0])
 172      return listItems(OrderedList.make(style), curt, curIndent)
 173    }
 174  
 175    private DocElem ul()
 176    {
 177      return listItems(UnorderedList.make, curt, curIndent)
 178    }
 179  
 180    private DocElem listItems(DocElem list, LineType listType, Int listIndent)
 181    {
 182      while (true)
 183      {
 184        // next item in my own list
 185        if (curt === listType && curIndent === listIndent)
 186        {
 187          list.children.add(formattedText(ListItem.make))
 188        }
 189  
 190        // otherwise if indent is same or greater, then
 191        // this is a continuation of the my last node
 192        else if (curIndent >= listIndent)
 193        {
 194          ((DocElem)list.children.last).children.add(block(listIndent))
 195        }
 196  
 197        // end of list
 198        else
 199        {
 200          break
 201        }
 202      }
 203      return list
 204    }
 205  
 206    private DocElem formattedText(DocElem elem)
 207    {
 208      startLineNum := this.lineIndex - 1
 209      startIndent  := curStart
 210      isBlockQuote := curt === LineType.blockquote
 211  
 212      buf := StrBuf.make(256)
 213      buf.add(cur[curStart..-1].trim)
 214      consume
 215  
 216      while (curStart <= startIndent &&
 217             (curt === LineType.normal || (isBlockQuote && curt == LineType.blockquote)))
 218      {
 219        buf.add(" ").add(cur[curStart..-1].trim)
 220        consume
 221      }
 222      endLineNum := this.lineIndex - 2
 223      skipBlankLines
 224  
 225      try
 226      {
 227        InlineParser.make(buf).parse(elem)
 228      }
 229      catch (Err err)
 230      {
 231        echo("ERROR: ${err.message} [${filename}:$startLineNum-$endLineNum]")
 232        err.trace
 233        elem.children.add(DocText.make(buf.toStr))
 234      }
 235  
 236      return elem
 237    }
 238  
 239  //////////////////////////////////////////////////////////////////////////
 240  // IO
 241  //////////////////////////////////////////////////////////////////////////
 242  
 243    **
 244    ** Read all the lines into memory and close stream if required.
 245    **
 246    private Void readLines(InStream in, Bool close)
 247    {
 248      try
 249      {
 250        lines = in.readAllLines
 251        numLines = lines.size
 252        lineIndex = 0
 253        consume
 254        consume
 255      }
 256      finally
 257      {
 258        if (close) in.close
 259      }
 260    }
 261  
 262  //////////////////////////////////////////////////////////////////////////
 263  // Utils
 264  //////////////////////////////////////////////////////////////////////////
 265  
 266    **
 267    ** Skip any blank lines
 268    **
 269    private Void skipBlankLines()
 270    {
 271      while (curt === LineType.blank) consume
 272    }
 273  
 274    **
 275    ** Return if line starting at index i is an ordered
 276    ** list item:
 277    **   number* "." sp  (digits)
 278    **   letter  "." sp  (a-z | A-Z single letter only)
 279    **   roman*  "." sp  (ivx | IVX combos)
 280    **
 281    private static Bool isOrderedListMark(Str line, Int i)
 282    {
 283      // check if first char is alpha numeric
 284      if (!line[i].isAlphaNum) return false
 285  
 286      // find dot space
 287      dot := line.index(". ", i)
 288      if (dot == null) return false
 289  
 290      mark := line[i...dot]
 291      if (mark[0].isDigit)
 292      {
 293        return mark.all |Int ch->Bool| { return ch.isDigit }
 294      }
 295      else
 296      {
 297        return mark.all |Int ch, Int index->Bool|
 298        {
 299          switch (ch)
 300          {
 301            case 'I':
 302            case 'V':
 303            case 'X':
 304            case 'i':
 305            case 'v':
 306            case 'x':
 307              return true
 308            default:
 309              return index == 0
 310          }
 311        }
 312      }
 313    }
 314  
 315    **
 316    ** Consume the current line and advance to the next line
 317    **
 318    private Void consume()
 319    {
 320      // advance cur to peek
 321      cur       = peek
 322      curt      = peekt
 323      curIndent = peekIndent
 324      curStart  = peekStart
 325  
 326      // update peek, peekIndent, and peekType
 327      peek = (lineIndex < numLines) ? lines[lineIndex++] : null
 328      peekIndent = peekStart = 0
 329      if (peek == null)                 peekt = LineType.eof
 330      else if (peek.isSpace)            peekt = LineType.blank
 331      else if (peek.startsWith("***"))  peekt = LineType.h1
 332      else if (peek.startsWith("==="))  peekt = LineType.h2
 333      else if (peek.startsWith("---"))  peekt = LineType.h3
 334      else
 335      {
 336        peekt = LineType.normal
 337        while (peek[peekIndent].isSpace) peekIndent++
 338        if (peekIndent+2 < peek.size)
 339        {
 340          if (peek[peekIndent] == '-' && peek[peekIndent+1].isSpace)
 341          {
 342            peekt = LineType.ul
 343            peekIndent += 2
 344            peekStart = peekIndent
 345          }
 346          if (isOrderedListMark(peek, peekIndent))
 347          {
 348            peekt = LineType.ol
 349            peekIndent += 2
 350            peekStart = peek.index(".") + 2
 351          }
 352          else if (peek[peekIndent] == '>' && peek[peekIndent+1].isSpace)
 353          {
 354            peekt = LineType.blockquote
 355            peekIndent += 2
 356            peekStart = peekIndent
 357          }
 358          else
 359          {
 360            peekStart = peekIndent
 361          }
 362        }
 363      }
 364    }
 365  
 366  //////////////////////////////////////////////////////////////////////////
 367  // Main
 368  //////////////////////////////////////////////////////////////////////////
 369  
 370    static Void main()
 371    {
 372      doc := make.parse(Sys.args[0], File.make(Sys.args[0].toUri).in)
 373      doc.dump
 374    }
 375  
 376  //////////////////////////////////////////////////////////////////////////
 377  // Fields
 378  //////////////////////////////////////////////////////////////////////////
 379  
 380    private Str filename        // filename for reporting errors
 381    private Str[] lines         // lines of document
 382    private Int numLines        // lines.size
 383    private Int lineIndex       // current index in lines
 384    private Str cur             // current line
 385    private Str peek            // next line
 386    private LineType curt       // current line type
 387    private LineType peekt      // peek line type
 388    private Int curIndent       // how many spaces is cur indented
 389    private Int peekIndent      // how many spaces is peek indented
 390    private Int curStart        // starting index of cur text
 391    private Int peekStart       // starting index of cur text
 392  }
 393  
 394  **************************************************************************
 395  ** LineType
 396  **************************************************************************
 397  
 398  internal enum LineType
 399  {
 400    eof,         // end of file
 401    blank,       // space*
 402    ul,          // space* "-" space*
 403    ol,          // space* (number|letter)* "." space*
 404    h1,          // ***
 405    h2,          // ===
 406    h3,          // ---
 407    blockquote,  // >
 408    normal       // anything else
 409  
 410    Bool isList() { return this === ul }
 411  
 412    Int headingLevel()
 413    {
 414      switch (this)
 415      {
 416        case h1: return 1
 417        case h2: return 2
 418        case h3: return 3
 419        default: throw Err.make(toStr)
 420      }
 421    }
 422  }
 423