aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorazidar2015-07-02 16:16:52 -0700
committerazidar2015-07-02 16:16:52 -0700
commita5727e677bbf12674da94526366535a0f820590d (patch)
treed8e48d2e8f813690d3c83982b216b0335ed45add /src
parent843af237df6a677b387149ad76c1d343dc7271e1 (diff)
Added firrtl-lexer
Diffstat (limited to 'src')
-rw-r--r--src/main/stanza/firrtl-lexer.stanza570
1 files changed, 570 insertions, 0 deletions
diff --git a/src/main/stanza/firrtl-lexer.stanza b/src/main/stanza/firrtl-lexer.stanza
new file mode 100644
index 00000000..f0673a72
--- /dev/null
+++ b/src/main/stanza/firrtl-lexer.stanza
@@ -0,0 +1,570 @@
+defpackage firrtl/lexer :
+ import core
+ import core/stringeater
+ import verse
+
+;=============== PUBLIC INTERFACE ===========================
+public defn lex (text:String) -> List<Token> :
+ lex-all-forms(StringEater(text, "NOFILE"))
+
+public defn lex-file (filename:String) -> List<Token> :
+ println-all(["Reading " filename])
+ val eater = StringEater(read-file(filename), filename)
+ lex-all-forms(eater)
+
+public defn lex-form (eater:StringEater) :
+ init-lexer(eater)
+ eat-lexeme()
+ while (EATER[0] != false) and not empty?(SCOPES) :
+ eat-lexeme()
+ val form = head(lex-all(group-all()))
+ throw(LexerExceptions(ERRORS)) when not empty?(ERRORS)
+ form
+
+public defn lex-all-forms (eater:StringEater) :
+ init-lexer(eater)
+ eat-all()
+ val grouped = group-all()
+ val form = lex-all(grouped)
+ throw(LexerExceptions(ERRORS)) when not empty?(ERRORS)
+ form
+
+;=============== TOKEN CLASSES ==============================
+defstruct Indentation :
+ indent:Int
+defmethod print (o:OutputStream, i:Indentation) :
+ print-all(o, ["[Indentation " indent(i) "]"])
+
+defstruct OpenToken :
+ symbol:Symbol
+defmethod print (o:OutputStream, t:OpenToken) :
+ print-all(o, ["OPEN[" symbol(t) "]"])
+
+defstruct CloseToken :
+ symbol:Symbol
+defmethod print (o:OutputStream, t:CloseToken) :
+ print-all(o, ["CLOSE[" symbol(t) "]"])
+
+defstruct PuncToken :
+ symbol:Symbol
+defmethod print (o:OutputStream, t:PuncToken) :
+ print-all(o, ["PUNC[" symbol(t) "]"])
+
+;=============== LEXER STATE ================================
+var LEXEMES: Vector<Token>
+var SCOPES: Vector<Symbol>
+var ERRORS: Vector<LexerException>
+var EATER: StringEater
+var STAR?: True|False
+
+defn init-lexer (eater:StringEater) :
+ EATER = eater
+ LEXEMES = Vector<Token>()
+ SCOPES = Vector<Symbol>()
+ ERRORS = Vector<LexerException>()
+ STAR? = false
+
+;================= CHARACTER CLASSES ========================
+val CHAR-CLASSES = String(256, to-char(0))
+defn class? (c, bit:Int) -> True|False :
+ match(c) :
+ (c:Char) :
+ val mask = to-int(CHAR-CLASSES[to-int(c as Char)])
+ bit-set?(mask, bit)
+ (c) :
+ false
+
+defn tag-class (class:String, bit:Int) :
+ val tag = 1 << bit
+ for c in class do :
+ val i = to-int(c)
+ val mask = to-int(CHAR-CLASSES[i])
+ val c2 = to-char(mask | tag)
+ CHAR-CLASSES[i] = c2
+
+val DIGIT-CHAR = 0
+val ALPHA-CHAR = 1
+val PUNC-CHAR = 2
+val OPEN-BRACE-CHAR = 3
+val CLOSE-BRACE-CHAR = 4
+val OPERATOR-CHAR = 5
+val SYMBOL-CHAR = 6
+val WHITESPACE-CHAR = 7
+
+let :
+ val digits = "0123456789"
+ val letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ"
+ tag-class(letters, ALPHA-CHAR)
+ tag-class("_?", ALPHA-CHAR)
+ tag-class(digits, DIGIT-CHAR)
+ tag-class("`", PUNC-CHAR)
+ tag-class(" ,", WHITESPACE-CHAR)
+ tag-class("([{<", OPEN-BRACE-CHAR)
+ tag-class(")]}>", CLOSE-BRACE-CHAR)
+ tag-class("~!@#$%^*+-=/", OPERATOR-CHAR)
+ tag-class("~!@#$%^*+-=/", SYMBOL-CHAR)
+ tag-class(".:<&|", OPERATOR-CHAR)
+ tag-class("_?", SYMBOL-CHAR)
+ tag-class(letters, SYMBOL-CHAR)
+ tag-class(digits, SYMBOL-CHAR)
+
+;================ LOW LEVEL PREDICATES =====================
+;Lexer Predicates
+defn whitespace? (c) : class?(c, WHITESPACE-CHAR)
+defn digit? (c) : class?(c, DIGIT-CHAR)
+defn alpha? (c) : class?(c, ALPHA-CHAR)
+defn punc? (c) : class?(c, PUNC-CHAR)
+defn open-brace? (c) : class?(c, OPEN-BRACE-CHAR)
+defn close-brace? (c) : class?(c, CLOSE-BRACE-CHAR)
+defn number-char? (c) : digit?(c) or (c == '.')
+defn symbol-char? (c) : class?(c, SYMBOL-CHAR)
+defn operator-char? (c) :
+ if c == '>' :
+ empty?(SCOPES) or (peek(SCOPES) != `>)
+ else :
+ class?(c, OPERATOR-CHAR)
+
+;================ EATING FUNCTIONS =========================
+defn update-stack (info:FileInfo, c:Symbol) :
+ defn pop-stack () :
+ if empty?(SCOPES) :
+ throw(ExtraClosingToken(info, c))
+ else if peek(SCOPES) != c :
+ throw(WrongClosingToken(info, peek(SCOPES), c))
+ else :
+ pop(SCOPES)
+
+ switch {c == _} :
+ `\|<| : add(SCOPES, `\|>|)
+ `\|[| : add(SCOPES, `\|]|)
+ `\|{| : add(SCOPES, `\|}|)
+ `\|(| : add(SCOPES, `\|)|)
+ `\|*<| : add(SCOPES, `\|>|)
+ `\|*[| : add(SCOPES, `\|]|)
+ `\|*{| : add(SCOPES, `\|}|)
+ `\|*(| : add(SCOPES, `\|)|)
+ `\|>| : pop-stack()
+ `\|]| : pop-stack()
+ `\|}| : pop-stack()
+ `\|)| : pop-stack()
+
+defn token-eaten (t:Token) :
+ ;Update Lexemes
+ add(LEXEMES, t)
+
+ ;Update the stack
+ match(item(t)) :
+ (x:OpenToken) : update-stack(info(t), symbol(x))
+ (x:CloseToken) : update-stack(info(t), symbol(x))
+ (x) : false
+
+ ;Update STAR?
+ STAR? =
+ match(item(t)) :
+ (x:CloseToken) : true
+ (x:Int|Float|Char|String) : true
+ (x:True|False) : true
+ (x:Symbol) : any?(alpha?, to-string(x))
+ (x) : false
+
+ true
+
+defn escape-char (c:Char) -> Char :
+ switch {c == _} :
+ 'n' : '\n'
+ '\\' : c
+ '"' : c
+ '\'' : c
+ '|' : c
+ else : throw(InvalidEscapeChar(info(EATER), c))
+
+defn eat-escaped-chars () :
+ val buf = StringBuffer()
+ val end-char = EATER[0]
+ val end = loop(1) where :
+ defn* loop (i:Int) :
+ val c1 = EATER[i]
+ val c2 = EATER[i + 1]
+ if c1 == false :
+ false
+ else if c1 == end-char :
+ i + 1
+ else if c1 == '\\' and c2 != false :
+ add(buf, escape-char(c2 as Char))
+ loop(i + 2)
+ else :
+ add(buf, c1 as Char)
+ loop(i + 1)
+ if end != false :
+ eat(EATER, end as Int)
+ to-string(buf)
+
+defn eat-comment () -> True|False :
+ if EATER[0] == ';' :
+ while (EATER[0] != false and EATER[0] != '\n') :
+ eat(EATER)
+ true
+
+defn eat-string () :
+ val info = info(EATER)
+ if EATER[0] == '"' :
+ match(eat-escaped-chars()) :
+ (s:String) : token-eaten(Token(s, info))
+ (s:False) : throw(UnclosedString(info))
+
+defn eat-char () :
+ val info = info(EATER)
+ if EATER[0] == '\'' :
+ match(eat-escaped-chars()) :
+ (s:String) :
+ if length(s) == 1 : token-eaten(Token(s[0], info))
+ else : throw(InvalidCharString(info))
+ (s:False) : throw(UnclosedCharString(info))
+
+defn eat-escaped-symbol () :
+ val info = info(EATER)
+ if EATER[0] == '\\' and EATER[1] == '|' :
+ eat(EATER)
+ match(eat-escaped-chars()) :
+ (s:String) : token-eaten(Token(to-symbol(s), info))
+ (s:False) : throw(UnclosedSymbol(info))
+
+defn symbol-end (start:Int) -> False|Int :
+ defn length (a?:True|False, i:Int) :
+ if symbol-char?(EATER[i]) :
+ length(a? or alpha?(EATER[i]), i + 1)
+ else if a? :
+ i
+ length(false, start)
+
+defn eat-symbol () :
+ match(symbol-end(0)) :
+ (len:Int) :
+ val info = info(EATER)
+ val str = eat(EATER, len)
+ switch {str == _} :
+ "true" : token-eaten(Token(true, info))
+ "false" : token-eaten(Token(false, info))
+ else : token-eaten(Token(to-symbol(str), info))
+ (len:False) :
+ false
+
+defn eat-operator () :
+ val len = look-forward(0) where :
+ defn* look-forward (i:Int) :
+ if operator-char?(EATER[i]) : look-forward(i + 1)
+ else if alpha?(EATER[i]) : look-back(i - 1)
+ else : i
+ defn* look-back (i:Int) :
+ if symbol-char?(EATER[i]) : look-back(i - 1)
+ else : i + 1
+ if len > 0 :
+ val info = info(EATER)
+ token-eaten(Token(to-symbol(eat(EATER, len)), info))
+
+defn* eat-indent () :
+ val info = info(EATER)
+ val len = find({EATER[_] != ' '}, 0 to length(EATER) + 1) as Int
+ eat(EATER, len)
+ val indent = Token(Indentation(len), info)
+ if eat-comment() :
+ eat-indent()
+ else if EATER[0] == '\n' :
+ eat(EATER)
+ eat-indent()
+ else :
+ token-eaten(indent)
+
+defn eat-number () :
+ if digit?(EATER[0]) or
+ (EATER[0] == '-' and digit?(EATER[1])) :
+
+ val info = info(EATER)
+ val end = find({not number-char?(EATER[_])}, 1 to length(EATER) + 1) as Int
+ val str = eat(EATER, end)
+ if contains?(str, '.') :
+ match(to-float(str)) :
+ (f:Float) : token-eaten(Token(f, info))
+ (f:False) : throw(InvalidNumber(info))
+ else :
+ match(to-long(str)) :
+ (l:Long) :
+ if l < (to-long("2147483647") as Long) and l > (to-long("-2147483648") as Long) : token-eaten(Token(to-int(str), info))
+ else : token-eaten(Token(l, info))
+ (l:False) : token-eaten(Token(to-int(str), info))
+ ;else : token-eaten(Token(to-int(str), info)) <- ADAM CHANGE, FROM THIS
+
+defn eat-here-string () :
+ if EATER[0] == '\\' and EATER[1] == '<' :
+ val info = info(EATER)
+ eat(EATER)
+ val tag-len =
+ match(find({EATER[_] == '>'}, 0 to length(EATER))) :
+ (i:Int) : i + 1
+ (n:False) : throw(InvalidTag(info))
+ defn tag? (i:Int) :
+ for j in 0 to tag-len all? :
+ EATER[i + j] == EATER[j]
+ val str-len =
+ match(find(tag?, tag-len to length(EATER))) :
+ (i:Int) : i - tag-len
+ (n:False) : throw(NoEndTagFound(info))
+ eat(EATER, tag-len)
+ val str = eat(EATER, str-len)
+ eat(EATER, tag-len)
+ token-eaten(Token(str, info))
+
+defn eat-structural-token () :
+ val info = info(EATER)
+ if open-brace?(EATER[0]) :
+ token-eaten(Token(OpenToken(to-symbol(eat(EATER))), info))
+ else if close-brace?(EATER[0]) :
+ token-eaten(Token(CloseToken(to-symbol(eat(EATER))), info))
+ else if punc?(EATER[0]) :
+ token-eaten(Token(PuncToken(to-symbol(eat(EATER))), info))
+
+defn eat-star-token () :
+ val info = info(EATER)
+ if open-brace?(EATER[0]) :
+ token-eaten(Token(OpenToken(symbol-join(["*" eat(EATER)])), info))
+
+defn eat-capture () :
+ if (EATER[0] == '?') :
+ match(symbol-end(1)) :
+ (end:Int) :
+ val pinfo = info(EATER)
+ token-eaten(Token(PuncToken(to-symbol(eat(EATER))), pinfo))
+ val info = info(EATER)
+ token-eaten(Token(to-symbol(eat(EATER, end - 1)), info))
+ (end:False) :
+ false
+
+defn eat-lexeme! () :
+ val ate? =
+ eat-capture() or
+ eat-here-string() or
+ eat-escaped-symbol() or
+ eat-char() or
+ eat-string() or
+ eat-number() or
+ eat-symbol() or
+ eat-operator() or
+ eat-structural-token()
+ if ate? :
+ eat-star-token() when STAR?
+ else : throw(InvalidToken(info(EATER)))
+
+defn eat-whitespace () :
+ if whitespace?(EATER[0]) :
+ while whitespace?(EATER[0]) :
+ eat(EATER)
+ STAR? = false
+
+defn eat-lexeme () :
+ eat-whitespace()
+ if EATER[0] != false :
+ if eat-comment() :
+ eat-lexeme()
+ else if EATER[0] == '\n' :
+ eat(EATER)
+ eat-indent()
+ else :
+ eat-lexeme!()
+
+defn eat-all () :
+ while EATER[0] != false :
+ eat-lexeme()
+
+;================ GROUPING ==================================
+val OPEN-PAREN = `\|(|
+val STAR-PAREN = `\|*(|
+val CLOSE-PAREN = `\|)|
+val OPEN-BRACKET = `\|{|
+val STAR-BRACKET = `\|*{|
+val CLOSE-BRACKET = `\|}|
+val OPEN-BRACE = `\|[|
+val STAR-BRACE = `\|*[|
+val CLOSE-BRACE = `\|]|
+val STAR-ANGLE = `\|*<|
+val CLOSE-ANGLE = `\|>|
+val COLON = `:
+val QUESTION = `?
+val BACKTICK = `\|`|
+
+defn matching-end (s:Symbol) :
+ if s == OPEN-PAREN : CLOSE-PAREN
+ else if s == STAR-PAREN : CLOSE-PAREN
+ else if s == OPEN-BRACKET : CLOSE-BRACKET
+ else if s == STAR-BRACKET : CLOSE-BRACKET
+ else if s == OPEN-BRACE : CLOSE-BRACE
+ else if s == STAR-BRACE : CLOSE-BRACE
+ else if s == STAR-ANGLE : CLOSE-ANGLE
+ else : error("No matching end")
+
+var START-INFO = false
+var TOKEN-STREAM : Vector<Token>
+defn group-all () -> List :
+ TOKEN-STREAM = Vector<Token>(length(LEXEMES))
+ while not empty?(LEXEMES) :
+ add(TOKEN-STREAM, pop(LEXEMES))
+ group-rest(false)
+
+defn group-rest (end) -> List :
+ if empty?(TOKEN-STREAM) :
+ match(end) :
+ (end:Symbol) :
+ throw(NoClosingToken(START-INFO as FileInfo, end))
+ (end) :
+ List()
+ else :
+ val x = peek(TOKEN-STREAM)
+ match(item(x)) :
+ (t:CloseToken) :
+ match(end) :
+ (end:Symbol) :
+ pop(TOKEN-STREAM)
+ List()
+ (end:Indentation) :
+ List()
+ (t:OpenToken) :
+ pop(TOKEN-STREAM)
+ val old-info = START-INFO
+ START-INFO = info(x)
+ val g = group-rest(matching-end(symbol(t)))
+ START-INFO = old-info
+ List(List(x, g), group-rest(end))
+ (t:PuncToken) :
+ pop(TOKEN-STREAM)
+ List(x, group-rest(end))
+ (s:Symbol) :
+ pop(TOKEN-STREAM)
+ if s == COLON :
+ match(item(peek(TOKEN-STREAM))) :
+ (i:Indentation) :
+ val y = pop(TOKEN-STREAM)
+ val g = group-rest(i)
+ List(x, List(y, g), group-rest(end))
+ (t) :
+ List(x, group-rest(end))
+ else :
+ List(x, group-rest(end))
+ (i:Indentation) :
+ if (end typeof Indentation) and
+ (indent(i) < indent(end as Indentation)) :
+ List()
+ else :
+ pop(TOKEN-STREAM)
+ group-rest(end)
+ (t) :
+ pop(TOKEN-STREAM)
+ List(x, group-rest(end))
+
+;============== ADDING SHORTCUTS ============================
+defn indentation? (x) :
+ unwrap-token(x) typeof Indentation
+defn opentoken? (x, s:Symbol) :
+ match(unwrap-token(x)) :
+ (x:OpenToken) : symbol(x) == s
+ (x) : false
+defn opentoken? (x, s:Streamable<Symbol>) :
+ match(unwrap-token(x)) :
+ (x:OpenToken) : contains?(s, symbol(x))
+ (x) : false
+defn punctoken? (x, s:Symbol) :
+ match(unwrap-token(x)) :
+ (x:PuncToken) : symbol(x) == s
+ (x) : false
+defn startoken-pending? (xs:List) :
+ if not empty?(xs) :
+ match(head(xs)) :
+ (x:FullList) : opentoken?(head(x), [STAR-PAREN, STAR-BRACE, STAR-BRACKET, STAR-ANGLE])
+ (x) : false
+
+defn lex-atom (x) -> ? :
+ match(x) :
+ (x:Token) :
+ map(lex-atom, x)
+ (x:FullList) :
+ if indentation?(head(x)) : lex-all(tail(x))
+ else if opentoken?(head(x), OPEN-PAREN) : lex-all(tail(x))
+ else if opentoken?(head(x), OPEN-BRACE) : List(`@tuple, lex-all(tail(x)))
+ else if opentoken?(head(x), OPEN-BRACKET) : List(`@afn, lex-all(tail(x)))
+ else if opentoken?(head(x), STAR-PAREN) : List(`@do, lex-all(tail(x)))
+ else if opentoken?(head(x), STAR-BRACE) : List(`@get, lex-all(tail(x)))
+ else if opentoken?(head(x), STAR-BRACKET) : List(`@do-afn, lex-all(tail(x)))
+ else if opentoken?(head(x), STAR-ANGLE) : List(`@of, lex-all(tail(x)))
+ else : error(string-join(["Invalid grouped form: " x]))
+ (x) : x
+
+defn lex-all (xs:List) -> List :
+ if empty?(xs) :
+ xs
+ else if punctoken?(head(xs), QUESTION) :
+ val capped = list(OpenToken(`\|(|), `@cap, xs[1])
+ lex-all(List(capped, tailn(xs, 2)))
+ else if punctoken?(head(xs), BACKTICK) :
+ if empty?(tail(xs)) :
+ `(@quote)
+ else :
+ val rest = lex-all(tail(xs))
+ List(list(`@quote, head(rest)), tail(rest))
+ else :
+ List(lex-atom(head(xs)), lex-all(tail(xs)))
+
+;============== LEXER ERRORS ================================
+definterface LexerException <: Exception
+defn LexerException (s:String) :
+ new LexerException :
+ defmethod print (o:OutputStream, this) :
+ print(o, s)
+
+defn LexerExceptions (xs:Streamable<LexerException>) :
+ LexerException(string-join(xs, "\n"))
+
+defn NoClosingToken (info:FileInfo, end:Symbol) :
+ LexerException $ string-join $
+ [info ": No closing token found. Expecting " end "."]
+
+defn InvalidNumber (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Invalid number."]
+
+defn InvalidToken (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Invalid token."]
+
+defn InvalidEscapeChar (info:FileInfo, c:Char) :
+ LexerException $ string-join $
+ [info ": Invalid escape character: " c "."]
+
+defn UnclosedString (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Unclosed string. "]
+
+defn UnclosedCharString (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Unclosed character. "]
+
+defn UnclosedSymbol (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Unclosed symbol. "]
+
+defn InvalidCharString (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Invalid character string. Must have length 1."]
+
+defn WrongClosingToken (info:FileInfo, expected:Symbol, actual:Symbol) :
+ LexerException $ string-join $
+ [info ": Wrong closing parenthesis. Expecting " expected " but got " actual "."]
+
+defn ExtraClosingToken (info:FileInfo, c:Symbol) :
+ LexerException $ string-join $
+ [info ": Extra closing token found: " c "."]
+
+defn InvalidTag (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": Invalid tag for here string."]
+
+defn NoEndTagFound (info:FileInfo) :
+ LexerException $ string-join $
+ [info ": No ending tag found for here string."]