diff options
| author | azidar | 2015-07-02 16:16:52 -0700 |
|---|---|---|
| committer | azidar | 2015-07-02 16:16:52 -0700 |
| commit | a5727e677bbf12674da94526366535a0f820590d (patch) | |
| tree | d8e48d2e8f813690d3c83982b216b0335ed45add /src | |
| parent | 843af237df6a677b387149ad76c1d343dc7271e1 (diff) | |
Added firrtl-lexer
Diffstat (limited to 'src')
| -rw-r--r-- | src/main/stanza/firrtl-lexer.stanza | 570 |
1 files changed, 570 insertions, 0 deletions
diff --git a/src/main/stanza/firrtl-lexer.stanza b/src/main/stanza/firrtl-lexer.stanza new file mode 100644 index 00000000..f0673a72 --- /dev/null +++ b/src/main/stanza/firrtl-lexer.stanza @@ -0,0 +1,570 @@ +defpackage firrtl/lexer : + import core + import core/stringeater + import verse + +;=============== PUBLIC INTERFACE =========================== +public defn lex (text:String) -> List<Token> : + lex-all-forms(StringEater(text, "NOFILE")) + +public defn lex-file (filename:String) -> List<Token> : + println-all(["Reading " filename]) + val eater = StringEater(read-file(filename), filename) + lex-all-forms(eater) + +public defn lex-form (eater:StringEater) : + init-lexer(eater) + eat-lexeme() + while (EATER[0] != false) and not empty?(SCOPES) : + eat-lexeme() + val form = head(lex-all(group-all())) + throw(LexerExceptions(ERRORS)) when not empty?(ERRORS) + form + +public defn lex-all-forms (eater:StringEater) : + init-lexer(eater) + eat-all() + val grouped = group-all() + val form = lex-all(grouped) + throw(LexerExceptions(ERRORS)) when not empty?(ERRORS) + form + +;=============== TOKEN CLASSES ============================== +defstruct Indentation : + indent:Int +defmethod print (o:OutputStream, i:Indentation) : + print-all(o, ["[Indentation " indent(i) "]"]) + +defstruct OpenToken : + symbol:Symbol +defmethod print (o:OutputStream, t:OpenToken) : + print-all(o, ["OPEN[" symbol(t) "]"]) + +defstruct CloseToken : + symbol:Symbol +defmethod print (o:OutputStream, t:CloseToken) : + print-all(o, ["CLOSE[" symbol(t) "]"]) + +defstruct PuncToken : + symbol:Symbol +defmethod print (o:OutputStream, t:PuncToken) : + print-all(o, ["PUNC[" symbol(t) "]"]) + +;=============== LEXER STATE ================================ +var LEXEMES: Vector<Token> +var SCOPES: Vector<Symbol> +var ERRORS: Vector<LexerException> +var EATER: StringEater +var STAR?: True|False + +defn init-lexer (eater:StringEater) : + EATER = eater + LEXEMES = Vector<Token>() + SCOPES = Vector<Symbol>() + ERRORS = Vector<LexerException>() + STAR? = false + +;================= CHARACTER CLASSES ======================== +val CHAR-CLASSES = String(256, to-char(0)) +defn class? (c, bit:Int) -> True|False : + match(c) : + (c:Char) : + val mask = to-int(CHAR-CLASSES[to-int(c as Char)]) + bit-set?(mask, bit) + (c) : + false + +defn tag-class (class:String, bit:Int) : + val tag = 1 << bit + for c in class do : + val i = to-int(c) + val mask = to-int(CHAR-CLASSES[i]) + val c2 = to-char(mask | tag) + CHAR-CLASSES[i] = c2 + +val DIGIT-CHAR = 0 +val ALPHA-CHAR = 1 +val PUNC-CHAR = 2 +val OPEN-BRACE-CHAR = 3 +val CLOSE-BRACE-CHAR = 4 +val OPERATOR-CHAR = 5 +val SYMBOL-CHAR = 6 +val WHITESPACE-CHAR = 7 + +let : + val digits = "0123456789" + val letters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ" + tag-class(letters, ALPHA-CHAR) + tag-class("_?", ALPHA-CHAR) + tag-class(digits, DIGIT-CHAR) + tag-class("`", PUNC-CHAR) + tag-class(" ,", WHITESPACE-CHAR) + tag-class("([{<", OPEN-BRACE-CHAR) + tag-class(")]}>", CLOSE-BRACE-CHAR) + tag-class("~!@#$%^*+-=/", OPERATOR-CHAR) + tag-class("~!@#$%^*+-=/", SYMBOL-CHAR) + tag-class(".:<&|", OPERATOR-CHAR) + tag-class("_?", SYMBOL-CHAR) + tag-class(letters, SYMBOL-CHAR) + tag-class(digits, SYMBOL-CHAR) + +;================ LOW LEVEL PREDICATES ===================== +;Lexer Predicates +defn whitespace? (c) : class?(c, WHITESPACE-CHAR) +defn digit? (c) : class?(c, DIGIT-CHAR) +defn alpha? (c) : class?(c, ALPHA-CHAR) +defn punc? (c) : class?(c, PUNC-CHAR) +defn open-brace? (c) : class?(c, OPEN-BRACE-CHAR) +defn close-brace? (c) : class?(c, CLOSE-BRACE-CHAR) +defn number-char? (c) : digit?(c) or (c == '.') +defn symbol-char? (c) : class?(c, SYMBOL-CHAR) +defn operator-char? (c) : + if c == '>' : + empty?(SCOPES) or (peek(SCOPES) != `>) + else : + class?(c, OPERATOR-CHAR) + +;================ EATING FUNCTIONS ========================= +defn update-stack (info:FileInfo, c:Symbol) : + defn pop-stack () : + if empty?(SCOPES) : + throw(ExtraClosingToken(info, c)) + else if peek(SCOPES) != c : + throw(WrongClosingToken(info, peek(SCOPES), c)) + else : + pop(SCOPES) + + switch {c == _} : + `\|<| : add(SCOPES, `\|>|) + `\|[| : add(SCOPES, `\|]|) + `\|{| : add(SCOPES, `\|}|) + `\|(| : add(SCOPES, `\|)|) + `\|*<| : add(SCOPES, `\|>|) + `\|*[| : add(SCOPES, `\|]|) + `\|*{| : add(SCOPES, `\|}|) + `\|*(| : add(SCOPES, `\|)|) + `\|>| : pop-stack() + `\|]| : pop-stack() + `\|}| : pop-stack() + `\|)| : pop-stack() + +defn token-eaten (t:Token) : + ;Update Lexemes + add(LEXEMES, t) + + ;Update the stack + match(item(t)) : + (x:OpenToken) : update-stack(info(t), symbol(x)) + (x:CloseToken) : update-stack(info(t), symbol(x)) + (x) : false + + ;Update STAR? + STAR? = + match(item(t)) : + (x:CloseToken) : true + (x:Int|Float|Char|String) : true + (x:True|False) : true + (x:Symbol) : any?(alpha?, to-string(x)) + (x) : false + + true + +defn escape-char (c:Char) -> Char : + switch {c == _} : + 'n' : '\n' + '\\' : c + '"' : c + '\'' : c + '|' : c + else : throw(InvalidEscapeChar(info(EATER), c)) + +defn eat-escaped-chars () : + val buf = StringBuffer() + val end-char = EATER[0] + val end = loop(1) where : + defn* loop (i:Int) : + val c1 = EATER[i] + val c2 = EATER[i + 1] + if c1 == false : + false + else if c1 == end-char : + i + 1 + else if c1 == '\\' and c2 != false : + add(buf, escape-char(c2 as Char)) + loop(i + 2) + else : + add(buf, c1 as Char) + loop(i + 1) + if end != false : + eat(EATER, end as Int) + to-string(buf) + +defn eat-comment () -> True|False : + if EATER[0] == ';' : + while (EATER[0] != false and EATER[0] != '\n') : + eat(EATER) + true + +defn eat-string () : + val info = info(EATER) + if EATER[0] == '"' : + match(eat-escaped-chars()) : + (s:String) : token-eaten(Token(s, info)) + (s:False) : throw(UnclosedString(info)) + +defn eat-char () : + val info = info(EATER) + if EATER[0] == '\'' : + match(eat-escaped-chars()) : + (s:String) : + if length(s) == 1 : token-eaten(Token(s[0], info)) + else : throw(InvalidCharString(info)) + (s:False) : throw(UnclosedCharString(info)) + +defn eat-escaped-symbol () : + val info = info(EATER) + if EATER[0] == '\\' and EATER[1] == '|' : + eat(EATER) + match(eat-escaped-chars()) : + (s:String) : token-eaten(Token(to-symbol(s), info)) + (s:False) : throw(UnclosedSymbol(info)) + +defn symbol-end (start:Int) -> False|Int : + defn length (a?:True|False, i:Int) : + if symbol-char?(EATER[i]) : + length(a? or alpha?(EATER[i]), i + 1) + else if a? : + i + length(false, start) + +defn eat-symbol () : + match(symbol-end(0)) : + (len:Int) : + val info = info(EATER) + val str = eat(EATER, len) + switch {str == _} : + "true" : token-eaten(Token(true, info)) + "false" : token-eaten(Token(false, info)) + else : token-eaten(Token(to-symbol(str), info)) + (len:False) : + false + +defn eat-operator () : + val len = look-forward(0) where : + defn* look-forward (i:Int) : + if operator-char?(EATER[i]) : look-forward(i + 1) + else if alpha?(EATER[i]) : look-back(i - 1) + else : i + defn* look-back (i:Int) : + if symbol-char?(EATER[i]) : look-back(i - 1) + else : i + 1 + if len > 0 : + val info = info(EATER) + token-eaten(Token(to-symbol(eat(EATER, len)), info)) + +defn* eat-indent () : + val info = info(EATER) + val len = find({EATER[_] != ' '}, 0 to length(EATER) + 1) as Int + eat(EATER, len) + val indent = Token(Indentation(len), info) + if eat-comment() : + eat-indent() + else if EATER[0] == '\n' : + eat(EATER) + eat-indent() + else : + token-eaten(indent) + +defn eat-number () : + if digit?(EATER[0]) or + (EATER[0] == '-' and digit?(EATER[1])) : + + val info = info(EATER) + val end = find({not number-char?(EATER[_])}, 1 to length(EATER) + 1) as Int + val str = eat(EATER, end) + if contains?(str, '.') : + match(to-float(str)) : + (f:Float) : token-eaten(Token(f, info)) + (f:False) : throw(InvalidNumber(info)) + else : + match(to-long(str)) : + (l:Long) : + if l < (to-long("2147483647") as Long) and l > (to-long("-2147483648") as Long) : token-eaten(Token(to-int(str), info)) + else : token-eaten(Token(l, info)) + (l:False) : token-eaten(Token(to-int(str), info)) + ;else : token-eaten(Token(to-int(str), info)) <- ADAM CHANGE, FROM THIS + +defn eat-here-string () : + if EATER[0] == '\\' and EATER[1] == '<' : + val info = info(EATER) + eat(EATER) + val tag-len = + match(find({EATER[_] == '>'}, 0 to length(EATER))) : + (i:Int) : i + 1 + (n:False) : throw(InvalidTag(info)) + defn tag? (i:Int) : + for j in 0 to tag-len all? : + EATER[i + j] == EATER[j] + val str-len = + match(find(tag?, tag-len to length(EATER))) : + (i:Int) : i - tag-len + (n:False) : throw(NoEndTagFound(info)) + eat(EATER, tag-len) + val str = eat(EATER, str-len) + eat(EATER, tag-len) + token-eaten(Token(str, info)) + +defn eat-structural-token () : + val info = info(EATER) + if open-brace?(EATER[0]) : + token-eaten(Token(OpenToken(to-symbol(eat(EATER))), info)) + else if close-brace?(EATER[0]) : + token-eaten(Token(CloseToken(to-symbol(eat(EATER))), info)) + else if punc?(EATER[0]) : + token-eaten(Token(PuncToken(to-symbol(eat(EATER))), info)) + +defn eat-star-token () : + val info = info(EATER) + if open-brace?(EATER[0]) : + token-eaten(Token(OpenToken(symbol-join(["*" eat(EATER)])), info)) + +defn eat-capture () : + if (EATER[0] == '?') : + match(symbol-end(1)) : + (end:Int) : + val pinfo = info(EATER) + token-eaten(Token(PuncToken(to-symbol(eat(EATER))), pinfo)) + val info = info(EATER) + token-eaten(Token(to-symbol(eat(EATER, end - 1)), info)) + (end:False) : + false + +defn eat-lexeme! () : + val ate? = + eat-capture() or + eat-here-string() or + eat-escaped-symbol() or + eat-char() or + eat-string() or + eat-number() or + eat-symbol() or + eat-operator() or + eat-structural-token() + if ate? : + eat-star-token() when STAR? + else : throw(InvalidToken(info(EATER))) + +defn eat-whitespace () : + if whitespace?(EATER[0]) : + while whitespace?(EATER[0]) : + eat(EATER) + STAR? = false + +defn eat-lexeme () : + eat-whitespace() + if EATER[0] != false : + if eat-comment() : + eat-lexeme() + else if EATER[0] == '\n' : + eat(EATER) + eat-indent() + else : + eat-lexeme!() + +defn eat-all () : + while EATER[0] != false : + eat-lexeme() + +;================ GROUPING ================================== +val OPEN-PAREN = `\|(| +val STAR-PAREN = `\|*(| +val CLOSE-PAREN = `\|)| +val OPEN-BRACKET = `\|{| +val STAR-BRACKET = `\|*{| +val CLOSE-BRACKET = `\|}| +val OPEN-BRACE = `\|[| +val STAR-BRACE = `\|*[| +val CLOSE-BRACE = `\|]| +val STAR-ANGLE = `\|*<| +val CLOSE-ANGLE = `\|>| +val COLON = `: +val QUESTION = `? +val BACKTICK = `\|`| + +defn matching-end (s:Symbol) : + if s == OPEN-PAREN : CLOSE-PAREN + else if s == STAR-PAREN : CLOSE-PAREN + else if s == OPEN-BRACKET : CLOSE-BRACKET + else if s == STAR-BRACKET : CLOSE-BRACKET + else if s == OPEN-BRACE : CLOSE-BRACE + else if s == STAR-BRACE : CLOSE-BRACE + else if s == STAR-ANGLE : CLOSE-ANGLE + else : error("No matching end") + +var START-INFO = false +var TOKEN-STREAM : Vector<Token> +defn group-all () -> List : + TOKEN-STREAM = Vector<Token>(length(LEXEMES)) + while not empty?(LEXEMES) : + add(TOKEN-STREAM, pop(LEXEMES)) + group-rest(false) + +defn group-rest (end) -> List : + if empty?(TOKEN-STREAM) : + match(end) : + (end:Symbol) : + throw(NoClosingToken(START-INFO as FileInfo, end)) + (end) : + List() + else : + val x = peek(TOKEN-STREAM) + match(item(x)) : + (t:CloseToken) : + match(end) : + (end:Symbol) : + pop(TOKEN-STREAM) + List() + (end:Indentation) : + List() + (t:OpenToken) : + pop(TOKEN-STREAM) + val old-info = START-INFO + START-INFO = info(x) + val g = group-rest(matching-end(symbol(t))) + START-INFO = old-info + List(List(x, g), group-rest(end)) + (t:PuncToken) : + pop(TOKEN-STREAM) + List(x, group-rest(end)) + (s:Symbol) : + pop(TOKEN-STREAM) + if s == COLON : + match(item(peek(TOKEN-STREAM))) : + (i:Indentation) : + val y = pop(TOKEN-STREAM) + val g = group-rest(i) + List(x, List(y, g), group-rest(end)) + (t) : + List(x, group-rest(end)) + else : + List(x, group-rest(end)) + (i:Indentation) : + if (end typeof Indentation) and + (indent(i) < indent(end as Indentation)) : + List() + else : + pop(TOKEN-STREAM) + group-rest(end) + (t) : + pop(TOKEN-STREAM) + List(x, group-rest(end)) + +;============== ADDING SHORTCUTS ============================ +defn indentation? (x) : + unwrap-token(x) typeof Indentation +defn opentoken? (x, s:Symbol) : + match(unwrap-token(x)) : + (x:OpenToken) : symbol(x) == s + (x) : false +defn opentoken? (x, s:Streamable<Symbol>) : + match(unwrap-token(x)) : + (x:OpenToken) : contains?(s, symbol(x)) + (x) : false +defn punctoken? (x, s:Symbol) : + match(unwrap-token(x)) : + (x:PuncToken) : symbol(x) == s + (x) : false +defn startoken-pending? (xs:List) : + if not empty?(xs) : + match(head(xs)) : + (x:FullList) : opentoken?(head(x), [STAR-PAREN, STAR-BRACE, STAR-BRACKET, STAR-ANGLE]) + (x) : false + +defn lex-atom (x) -> ? : + match(x) : + (x:Token) : + map(lex-atom, x) + (x:FullList) : + if indentation?(head(x)) : lex-all(tail(x)) + else if opentoken?(head(x), OPEN-PAREN) : lex-all(tail(x)) + else if opentoken?(head(x), OPEN-BRACE) : List(`@tuple, lex-all(tail(x))) + else if opentoken?(head(x), OPEN-BRACKET) : List(`@afn, lex-all(tail(x))) + else if opentoken?(head(x), STAR-PAREN) : List(`@do, lex-all(tail(x))) + else if opentoken?(head(x), STAR-BRACE) : List(`@get, lex-all(tail(x))) + else if opentoken?(head(x), STAR-BRACKET) : List(`@do-afn, lex-all(tail(x))) + else if opentoken?(head(x), STAR-ANGLE) : List(`@of, lex-all(tail(x))) + else : error(string-join(["Invalid grouped form: " x])) + (x) : x + +defn lex-all (xs:List) -> List : + if empty?(xs) : + xs + else if punctoken?(head(xs), QUESTION) : + val capped = list(OpenToken(`\|(|), `@cap, xs[1]) + lex-all(List(capped, tailn(xs, 2))) + else if punctoken?(head(xs), BACKTICK) : + if empty?(tail(xs)) : + `(@quote) + else : + val rest = lex-all(tail(xs)) + List(list(`@quote, head(rest)), tail(rest)) + else : + List(lex-atom(head(xs)), lex-all(tail(xs))) + +;============== LEXER ERRORS ================================ +definterface LexerException <: Exception +defn LexerException (s:String) : + new LexerException : + defmethod print (o:OutputStream, this) : + print(o, s) + +defn LexerExceptions (xs:Streamable<LexerException>) : + LexerException(string-join(xs, "\n")) + +defn NoClosingToken (info:FileInfo, end:Symbol) : + LexerException $ string-join $ + [info ": No closing token found. Expecting " end "."] + +defn InvalidNumber (info:FileInfo) : + LexerException $ string-join $ + [info ": Invalid number."] + +defn InvalidToken (info:FileInfo) : + LexerException $ string-join $ + [info ": Invalid token."] + +defn InvalidEscapeChar (info:FileInfo, c:Char) : + LexerException $ string-join $ + [info ": Invalid escape character: " c "."] + +defn UnclosedString (info:FileInfo) : + LexerException $ string-join $ + [info ": Unclosed string. "] + +defn UnclosedCharString (info:FileInfo) : + LexerException $ string-join $ + [info ": Unclosed character. "] + +defn UnclosedSymbol (info:FileInfo) : + LexerException $ string-join $ + [info ": Unclosed symbol. "] + +defn InvalidCharString (info:FileInfo) : + LexerException $ string-join $ + [info ": Invalid character string. Must have length 1."] + +defn WrongClosingToken (info:FileInfo, expected:Symbol, actual:Symbol) : + LexerException $ string-join $ + [info ": Wrong closing parenthesis. Expecting " expected " but got " actual "."] + +defn ExtraClosingToken (info:FileInfo, c:Symbol) : + LexerException $ string-join $ + [info ": Extra closing token found: " c "."] + +defn InvalidTag (info:FileInfo) : + LexerException $ string-join $ + [info ": Invalid tag for here string."] + +defn NoEndTagFound (info:FileInfo) : + LexerException $ string-join $ + [info ": No ending tag found for here string."] |
