defpackage firrtl/lexer : import core import core/stringeater import verse import bigint2 ;=============== PUBLIC INTERFACE =========================== public defn lex (text:String) -> List : lex-all-forms(StringEater(text, "NOFILE")) public defn lex-file (filename:String) -> List : println-all(["Reading " filename]) val eater = StringEater(read-file(filename), filename) lex-all-forms(eater) public defn lex-form (eater:StringEater) : init-lexer(eater) eat-lexeme() while (EATER[0] != false) and not empty?(SCOPES) : eat-lexeme() val form = head(lex-all(group-all())) throw(LexerExceptions(ERRORS)) when not empty?(ERRORS) form public defn lex-all-forms (eater:StringEater) : init-lexer(eater) eat-all() val grouped = group-all() val form = lex-all(grouped) throw(LexerExceptions(ERRORS)) when not empty?(ERRORS) form ;=============== TOKEN CLASSES ============================== defstruct Indentation : indent:Int defmethod print (o:OutputStream, i:Indentation) : print-all(o, ["[Indentation " indent(i) "]"]) defstruct OpenToken : symbol:Symbol defmethod print (o:OutputStream, t:OpenToken) : print-all(o, ["OPEN[" symbol(t) "]"]) defstruct CloseToken : symbol:Symbol defmethod print (o:OutputStream, t:CloseToken) : print-all(o, ["CLOSE[" symbol(t) "]"]) defstruct PuncToken : symbol:Symbol defmethod print (o:OutputStream, t:PuncToken) : print-all(o, ["PUNC[" symbol(t) "]"]) ;=============== LEXER STATE ================================ var LEXEMES: Vector var SCOPES: Vector var ERRORS: Vector var EATER: StringEater var STAR?: True|False defn init-lexer (eater:StringEater) : EATER = eater LEXEMES = Vector() SCOPES = Vector() ERRORS = Vector() STAR? = false ;================= CHARACTER CLASSES ======================== val CHAR-CLASSES = String(256, to-char(0)) defn class? (c, bit:Int) -> True|False : match(c) : (c:Char) : val mask = to-int(CHAR-CLASSES[to-int(c as Char)]) bit-set?(mask, bit) (c) : false defn tag-class (class:String, bit:Int) : val tag = 1 << bit for c in class do : val i = to-int(c) val mask = to-int(CHAR-CLASSES[i]) val c2 = to-char(mask | tag) CHAR-CLASSES[i] = c2 val DIGIT-CHAR = 0 val ALPHA-CHAR = 1 val PUNC-CHAR = 2 val OPEN-BRACE-CHAR = 3 val CLOSE-BRACE-CHAR = 4 val OPERATOR-CHAR = 5 val SYMBOL-CHAR = 6 val WHITESPACE-CHAR = 7 let : val digits = "0123456789" val letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" tag-class(letters, ALPHA-CHAR) tag-class("_?", ALPHA-CHAR) tag-class(digits, DIGIT-CHAR) tag-class("`", PUNC-CHAR) tag-class(" ,", WHITESPACE-CHAR) tag-class("([{<", OPEN-BRACE-CHAR) tag-class(")]}>", CLOSE-BRACE-CHAR) tag-class("~!@#$%^*+-=/", OPERATOR-CHAR) tag-class("~!@#$%^*+-=/", SYMBOL-CHAR) tag-class(".:<&|", OPERATOR-CHAR) tag-class("_?", SYMBOL-CHAR) tag-class(letters, SYMBOL-CHAR) tag-class(digits, SYMBOL-CHAR) ;================ LOW LEVEL PREDICATES ===================== ;Lexer Predicates defn whitespace? (c) : class?(c, WHITESPACE-CHAR) defn digit? (c) : class?(c, DIGIT-CHAR) defn alpha? (c) : class?(c, ALPHA-CHAR) defn punc? (c) : class?(c, PUNC-CHAR) defn open-brace? (c) : class?(c, OPEN-BRACE-CHAR) defn close-brace? (c) : class?(c, CLOSE-BRACE-CHAR) defn number-char? (c) : digit?(c) or (c == '.') defn symbol-char? (c) : class?(c, SYMBOL-CHAR) defn operator-char? (c) : if c == '>' : empty?(SCOPES) or (peek(SCOPES) != `>) else : class?(c, OPERATOR-CHAR) ;================ EATING FUNCTIONS ========================= defn update-stack (info:FileInfo, c:Symbol) : defn pop-stack () : if empty?(SCOPES) : throw(ExtraClosingToken(info, c)) else if peek(SCOPES) != c : throw(WrongClosingToken(info, peek(SCOPES), c)) else : pop(SCOPES) switch {c == _} : `\|<| : add(SCOPES, `\|>|) `\|[| : add(SCOPES, `\|]|) `\|{| : add(SCOPES, `\|}|) `\|(| : add(SCOPES, `\|)|) `\|*<| : add(SCOPES, `\|>|) `\|*[| : add(SCOPES, `\|]|) `\|*{| : add(SCOPES, `\|}|) `\|*(| : add(SCOPES, `\|)|) `\|>| : pop-stack() `\|]| : pop-stack() `\|}| : pop-stack() `\|)| : pop-stack() defn token-eaten (t:Token) : ;Update Lexemes add(LEXEMES, t) ;Update the stack match(item(t)) : (x:OpenToken) : update-stack(info(t), symbol(x)) (x:CloseToken) : update-stack(info(t), symbol(x)) (x) : false ;Update STAR? STAR? = match(item(t)) : (x:CloseToken) : true (x:Int|Float|Char|String) : true (x:True|False) : true (x:Symbol) : any?(alpha?, to-string(x)) (x) : false true defn escape-char (c:Char) -> Char : switch {c == _} : 'n' : '\n' '\\' : c '"' : c '\'' : c '|' : c else : throw(InvalidEscapeChar(info(EATER), c)) defn eat-escaped-chars () : val buf = StringBuffer() val end-char = EATER[0] val end = loop(1) where : defn* loop (i:Int) : val c1 = EATER[i] val c2 = EATER[i + 1] if c1 == false : false else if c1 == end-char : i + 1 else if c1 == '\\' and c2 != false : add(buf, escape-char(c2 as Char)) loop(i + 2) else : add(buf, c1 as Char) loop(i + 1) if end != false : eat(EATER, end as Int) to-string(buf) defn eat-comment () -> True|False : if EATER[0] == ';' : while (EATER[0] != false and EATER[0] != '\n') : eat(EATER) true defn eat-string () : val info = info(EATER) if EATER[0] == '"' : match(eat-escaped-chars()) : (s:String) : token-eaten(Token(s, info)) (s:False) : throw(UnclosedString(info)) defn eat-char () : val info = info(EATER) if EATER[0] == '\'' : match(eat-escaped-chars()) : (s:String) : if length(s) == 1 : token-eaten(Token(s[0], info)) else : throw(InvalidCharString(info)) (s:False) : throw(UnclosedCharString(info)) defn eat-escaped-symbol () : val info = info(EATER) if EATER[0] == '\\' and EATER[1] == '|' : eat(EATER) match(eat-escaped-chars()) : (s:String) : token-eaten(Token(to-symbol(s), info)) (s:False) : throw(UnclosedSymbol(info)) defn symbol-end (start:Int) -> False|Int : defn length (a?:True|False, i:Int) : if symbol-char?(EATER[i]) : length(a? or alpha?(EATER[i]), i + 1) else if a? : i length(false, start) defn eat-symbol () : match(symbol-end(0)) : (len:Int) : val info = info(EATER) val str = eat(EATER, len) switch {str == _} : "true" : token-eaten(Token(true, info)) "false" : token-eaten(Token(false, info)) else : token-eaten(Token(to-symbol(str), info)) (len:False) : false defn eat-operator () : val len = look-forward(0) where : defn* look-forward (i:Int) : if operator-char?(EATER[i]) : look-forward(i + 1) else if alpha?(EATER[i]) : look-back(i - 1) else : i defn* look-back (i:Int) : if symbol-char?(EATER[i]) : look-back(i - 1) else : i + 1 if len > 0 : val info = info(EATER) token-eaten(Token(to-symbol(eat(EATER, len)), info)) defn* eat-indent () : val info = info(EATER) val len = find({EATER[_] != ' '}, 0 to length(EATER) + 1) as Int eat(EATER, len) val indent = Token(Indentation(len), info) if eat-comment() : eat-indent() else if EATER[0] == '\n' : eat(EATER) eat-indent() else : token-eaten(indent) defn eat-number () : if digit?(EATER[0]) or (EATER[0] == '-' and digit?(EATER[1])) : val info = info(EATER) val end = find({not number-char?(EATER[_])}, 1 to length(EATER) + 1) as Int val str = eat(EATER, end) if contains?(str, '.') : match(to-float(str)) : (f:Float) : token-eaten(Token(f, info)) (f:False) : throw(InvalidNumber(info)) else : token-eaten(Token(to-int(str), info)) ;else : token-eaten(Token(BigIntLit(str),info)) ;else : ;match(to-long(str)) : ; (l:Long) : ; if l < (to-long("2147483647") as Long) and l > (to-long("-2147483648") as Long) : token-eaten(Token(to-int(str), info)) ; else : token-eaten(Token(l, info)) ; (l:False) : token-eaten(Token(to-int(str), info)) defn eat-here-string () : if EATER[0] == '\\' and EATER[1] == '<' : val info = info(EATER) eat(EATER) val tag-len = match(find({EATER[_] == '>'}, 0 to length(EATER))) : (i:Int) : i + 1 (n:False) : throw(InvalidTag(info)) defn tag? (i:Int) : for j in 0 to tag-len all? : EATER[i + j] == EATER[j] val str-len = match(find(tag?, tag-len to length(EATER))) : (i:Int) : i - tag-len (n:False) : throw(NoEndTagFound(info)) eat(EATER, tag-len) val str = eat(EATER, str-len) eat(EATER, tag-len) token-eaten(Token(str, info)) defn eat-structural-token () : val info = info(EATER) if open-brace?(EATER[0]) : token-eaten(Token(OpenToken(to-symbol(eat(EATER))), info)) else if close-brace?(EATER[0]) : token-eaten(Token(CloseToken(to-symbol(eat(EATER))), info)) else if punc?(EATER[0]) : token-eaten(Token(PuncToken(to-symbol(eat(EATER))), info)) defn eat-star-token () : val info = info(EATER) if open-brace?(EATER[0]) : token-eaten(Token(OpenToken(symbol-join(["*" eat(EATER)])), info)) defn eat-capture () : if (EATER[0] == '?') : match(symbol-end(1)) : (end:Int) : val pinfo = info(EATER) token-eaten(Token(PuncToken(to-symbol(eat(EATER))), pinfo)) val info = info(EATER) token-eaten(Token(to-symbol(eat(EATER, end - 1)), info)) (end:False) : false defn eat-lexeme! () : val ate? = eat-capture() or eat-here-string() or eat-escaped-symbol() or eat-char() or eat-string() or eat-number() or eat-symbol() or eat-operator() or eat-structural-token() if ate? : eat-star-token() when STAR? else : throw(InvalidToken(info(EATER))) defn eat-whitespace () : if whitespace?(EATER[0]) : while whitespace?(EATER[0]) : eat(EATER) STAR? = false defn eat-lexeme () : eat-whitespace() if EATER[0] != false : if eat-comment() : eat-lexeme() else if EATER[0] == '\n' : eat(EATER) eat-indent() else : eat-lexeme!() defn eat-all () : while EATER[0] != false : eat-lexeme() ;================ GROUPING ================================== val OPEN-PAREN = `\|(| val STAR-PAREN = `\|*(| val CLOSE-PAREN = `\|)| val OPEN-BRACKET = `\|{| val STAR-BRACKET = `\|*{| val CLOSE-BRACKET = `\|}| val OPEN-BRACE = `\|[| val STAR-BRACE = `\|*[| val CLOSE-BRACE = `\|]| val STAR-ANGLE = `\|*<| val CLOSE-ANGLE = `\|>| val COLON = `: val QUESTION = `? val BACKTICK = `\|`| defn matching-end (s:Symbol) : if s == OPEN-PAREN : CLOSE-PAREN else if s == STAR-PAREN : CLOSE-PAREN else if s == OPEN-BRACKET : CLOSE-BRACKET else if s == STAR-BRACKET : CLOSE-BRACKET else if s == OPEN-BRACE : CLOSE-BRACE else if s == STAR-BRACE : CLOSE-BRACE else if s == STAR-ANGLE : CLOSE-ANGLE else : error("No matching end") var START-INFO = false var TOKEN-STREAM : Vector defn group-all () -> List : TOKEN-STREAM = Vector(length(LEXEMES)) while not empty?(LEXEMES) : add(TOKEN-STREAM, pop(LEXEMES)) group-rest(false) defn group-rest (end) -> List : if empty?(TOKEN-STREAM) : match(end) : (end:Symbol) : throw(NoClosingToken(START-INFO as FileInfo, end)) (end) : List() else : val x = peek(TOKEN-STREAM) match(item(x)) : (t:CloseToken) : match(end) : (end:Symbol) : pop(TOKEN-STREAM) List() (end:Indentation) : List() (t:OpenToken) : pop(TOKEN-STREAM) val old-info = START-INFO START-INFO = info(x) val g = group-rest(matching-end(symbol(t))) START-INFO = old-info List(List(x, g), group-rest(end)) (t:PuncToken) : pop(TOKEN-STREAM) List(x, group-rest(end)) (s:Symbol) : pop(TOKEN-STREAM) if s == COLON : match(item(peek(TOKEN-STREAM))) : (i:Indentation) : val y = pop(TOKEN-STREAM) val g = group-rest(i) List(x, List(y, g), group-rest(end)) (t) : List(x, group-rest(end)) else : List(x, group-rest(end)) (i:Indentation) : if (end typeof Indentation) and (indent(i) < indent(end as Indentation)) : List() else : pop(TOKEN-STREAM) group-rest(end) (t) : pop(TOKEN-STREAM) List(x, group-rest(end)) ;============== ADDING SHORTCUTS ============================ defn indentation? (x) : unwrap-token(x) typeof Indentation defn opentoken? (x, s:Symbol) : match(unwrap-token(x)) : (x:OpenToken) : symbol(x) == s (x) : false defn opentoken? (x, s:Streamable) : match(unwrap-token(x)) : (x:OpenToken) : contains?(s, symbol(x)) (x) : false defn punctoken? (x, s:Symbol) : match(unwrap-token(x)) : (x:PuncToken) : symbol(x) == s (x) : false defn startoken-pending? (xs:List) : if not empty?(xs) : match(head(xs)) : (x:FullList) : opentoken?(head(x), [STAR-PAREN, STAR-BRACE, STAR-BRACKET, STAR-ANGLE]) (x) : false defn lex-atom (x) -> ? : match(x) : (x:Token) : map(lex-atom, x) (x:FullList) : if indentation?(head(x)) : lex-all(tail(x)) else if opentoken?(head(x), OPEN-PAREN) : lex-all(tail(x)) else if opentoken?(head(x), OPEN-BRACE) : List(`@tuple, lex-all(tail(x))) else if opentoken?(head(x), OPEN-BRACKET) : List(`@afn, lex-all(tail(x))) else if opentoken?(head(x), STAR-PAREN) : List(`@do, lex-all(tail(x))) else if opentoken?(head(x), STAR-BRACE) : List(`@get, lex-all(tail(x))) else if opentoken?(head(x), STAR-BRACKET) : List(`@do-afn, lex-all(tail(x))) else if opentoken?(head(x), STAR-ANGLE) : List(`@of, lex-all(tail(x))) else : error(string-join(["Invalid grouped form: " x])) (x) : x defn lex-all (xs:List) -> List : if empty?(xs) : xs else if punctoken?(head(xs), QUESTION) : val capped = list(OpenToken(`\|(|), `@cap, xs[1]) lex-all(List(capped, tailn(xs, 2))) else if punctoken?(head(xs), BACKTICK) : if empty?(tail(xs)) : `(@quote) else : val rest = lex-all(tail(xs)) List(list(`@quote, head(rest)), tail(rest)) else : List(lex-atom(head(xs)), lex-all(tail(xs))) ;============== LEXER ERRORS ================================ definterface LexerException <: Exception defn LexerException (s:String) : new LexerException : defmethod print (o:OutputStream, this) : print(o, s) defn LexerExceptions (xs:Streamable) : LexerException(string-join(xs, "\n")) defn NoClosingToken (info:FileInfo, end:Symbol) : LexerException $ string-join $ [info ": No closing token found. Expecting " end "."] defn InvalidNumber (info:FileInfo) : LexerException $ string-join $ [info ": Invalid number."] defn InvalidToken (info:FileInfo) : LexerException $ string-join $ [info ": Invalid token."] defn InvalidEscapeChar (info:FileInfo, c:Char) : LexerException $ string-join $ [info ": Invalid escape character: " c "."] defn UnclosedString (info:FileInfo) : LexerException $ string-join $ [info ": Unclosed string. "] defn UnclosedCharString (info:FileInfo) : LexerException $ string-join $ [info ": Unclosed character. "] defn UnclosedSymbol (info:FileInfo) : LexerException $ string-join $ [info ": Unclosed symbol. "] defn InvalidCharString (info:FileInfo) : LexerException $ string-join $ [info ": Invalid character string. Must have length 1."] defn WrongClosingToken (info:FileInfo, expected:Symbol, actual:Symbol) : LexerException $ string-join $ [info ": Wrong closing parenthesis. Expecting " expected " but got " actual "."] defn ExtraClosingToken (info:FileInfo, c:Symbol) : LexerException $ string-join $ [info ": Extra closing token found: " c "."] defn InvalidTag (info:FileInfo) : LexerException $ string-join $ [info ": Invalid tag for here string."] defn NoEndTagFound (info:FileInfo) : LexerException $ string-join $ [info ": No ending tag found for here string."]