diff options
Diffstat (limited to 'language')
| -rw-r--r-- | language/jib.ott (renamed from language/bytecode.ott) | 136 |
1 files changed, 94 insertions, 42 deletions
diff --git a/language/bytecode.ott b/language/jib.ott index cc329e02..e54e2ea5 100644 --- a/language/bytecode.ott +++ b/language/jib.ott @@ -49,6 +49,7 @@ grammar % Fragments are small pure snippets of (abstract) C code, mostly % expressions, used by the aval and cval types. + fragment :: 'F_' ::= | id :: :: id | '&' id :: :: ref @@ -62,45 +63,67 @@ fragment :: 'F_' ::= | string :: :: raw | poly fragment :: :: poly -% init / clear -> create / kill +% Note that init / clear are sometimes refered to as create / kill + +%%% IR types ctyp :: 'CT_' ::= {{ com C type }} - | mpz_t :: :: lint -% Arbitrary precision GMP integer, mpz_t in C. - | bv_t ( bool ) :: :: lbits -% Variable length bitvector - flag represents direction, true - dec or false - inc - | sbv_t ( bool ) :: :: sbits -% Small variable length bitvector - less than 64 bits - | 'uint64_t' ( nat , bool ) :: :: fbits -% Fixed length bitvector that fits within a 64-bit word. - int -% represents length, and flag is the same as CT_bv. - | 'int64_t' nat :: :: fint -% Used for (signed) integers that fit within 64-bits. - | unit_t :: :: unit -% unit is a value in sail, so we represent it as a one element type -% here too for clarity but we actually compile it to an int which is -% always 0. +% Integer types +% +% lint is a large (l) arbitrary precision integer, mpz_t in C. +% fint(n) is a fixed precision signed integer that is representable in exactly n bits + | lint :: :: lint + | fint nat :: :: fint + +% Bitvector types - flag represents bit indexing direction, true - dec or false - inc +% +% lbits is a large (l) arbitrary precision bitvector +% sbits is a small (s) bitvector, such that sbits(n, _) is guaranteed to have a length of at most n. +% fbits is a fixed (f) bitvector, such that fbits(n, _) has a length of exactly n bits + | lbits ( bool ) :: :: lbits + | sbits ( nat , bool ) :: :: sbits + | fbits ( nat , bool ) :: :: fbits + +% Other Sail types + | unit :: :: unit | bool_t :: :: bool - | real_t :: :: real - | bit_t :: :: bit -% The real type in sail. Abstract here, but implemented using either -% GMP rationals or high-precision floating point. - | ( ctyp0 , ... , ctypn ) :: :: tup + | bit :: :: bit | string_t :: :: string - | enum id ( id0 , ... , idn ) :: :: enum - | struct id ( id0 * ctyp0 , ... , idn * ctypn ) :: :: struct - | variant id ( id0 * ctyp0 , ... , idn * ctypn ) :: :: variant + +% The real type in sail. Abstract here, so the code generator can +% choose to implement it using either GMP rationals or high-precision +% floating point. + | real :: :: real + + | ( ctyp0 , ... , ctypn ) :: :: tup + % Abstractly represent how all the Sail user defined types get mapped % into C. We don't fully worry about precise implementation details at % this point, as C doesn't have variants or tuples natively, but these % need to be encoded. + | enum id ( id0 , ... , idn ) :: :: enum + | struct id ( id0 * ctyp0 , ... , idn * ctypn ) :: :: struct + | variant id ( id0 * ctyp0 , ... , idn * ctypn ) :: :: variant + +% A vector type for non-bit vectors, and a (linked) list type. | vector ( bool , ctyp ) :: :: vector | list ( ctyp ) :: :: list -% A vector type for non-bit vectors, and a list type. + | ref ( ctyp ) :: :: ref + +% We can still have a very limited amount of polymorphism in this IR +% representation, as variants can have polymorphic constructors. The +% reason is we can put more precise types into constructors and then +% consume them as more general types meaning the underlying +% representation (rather than the high-level sail types) are what we +% need to specialise constructors, e.g. Some(0xFF) would be a Some +% constructor containing a fbits(8, true), but this could be pattern +% matched as Some(x) where the matching context expects x to have type +% lbits, and this must work without compiling to type incorrect C. | poly :: :: poly + cval :: 'CV_' ::= {{ ocaml fragment * ctyp }} {{ lem fragment * ctyp }} @@ -112,6 +135,8 @@ clexp :: 'CL_' ::= | clexp . nat :: :: tuple | current_exception : ctyp :: :: current_exception | have_exception :: :: have_exception + | return : ctyp :: :: return + | void :: :: void ctype_def :: 'CTD_' ::= {{ com C type definition }} @@ -125,45 +150,72 @@ iannot :: 'IA_' ::= instr :: 'I_' ::= {{ aux _ iannot }} +% The following are the minimal set of instructions output by +% Jib_compile.ml. | ctyp id :: :: decl | ctyp id = cval :: :: init - | if ( cval ) { instr0 ; ... ; instrn } - else { instr0 ; ... ; instrm } : ctyp :: :: if | jump ( cval ) string :: :: jump + | goto string :: :: goto + | string : :: :: label | clexp = bool id ( cval0 , ... , cvaln ) :: :: funcall | clexp = cval :: :: copy - | alias clexp = cval :: :: alias | clear ctyp id :: :: clear - | return cval :: :: return + | undefined ctyp :: :: undefined + | match_failure :: :: match_failure + | end :: :: end + +% All instructions containing nested instructions can be flattened +% away. try and throw only exist for internal use within +% Jib_compile.ml, as exceptional control flow is handled by a separate +% Jib->Jib pass. + | if ( cval ) { instr0 ; ... ; instrn } + else { instr0 ; ... ; instrm } : ctyp :: :: if | { instr0 ; ... ; instrn } :: :: block | try { instr0 ; ... ; instrn } :: :: try_block | throw cval :: :: throw + +% We can embed either comments or pass raw-strings through to the +% code-generator. The first is useful for annotating generated source, +% the second for inserting instrumention. I_raw should be side-effect +% free. | '//' string :: :: comment - | C string :: :: raw % only used for GCC attributes - | string : :: :: label - | goto string :: :: goto - | undefined ctyp :: :: undefined - | match_failure :: :: match_failure + | C string :: :: raw + +% Jib_compile.ml will represent all returns as assigments to the clexp +% CL_return, followed by end to signify the end of the +% function. + | return cval :: :: return -% For optimising away allocations. - | reset ctyp id :: :: reset - | ctyp id = cval :: :: reinit +% For optimising away allocations and copying. + | reset ctyp id :: :: reset + | ctyp id = cval :: :: reinit + | alias clexp = cval :: :: alias cdef :: 'CDEF_' ::= | register id : ctyp = { instr0 ; ... ; instrn } :: :: reg_dec - | ctype_def :: :: type + | ctype_def :: :: type + +% The first list of instructions sets up the global letbinding, while +% the second clears it. | let nat ( id0 : ctyp0 , ... , idn : ctypn ) = { instr0 ; ... ; instrm } :: :: let -% The first list of instructions creates up the global letbinding, the -% second kills it. - | val id ( ctyp0 , ... , ctypn ) -> ctyp - :: :: spec + + | val id ( ctyp0 , ... , ctypn ) -> ctyp :: :: spec + +% If mid = Some id this indicates that the caller should allocate the +% return type and passes a pointer to it as an extra argument id for +% the function to fill in. This is only done via Jib->Jib rewrites +% used when compiling to C. | function id mid ( id0 , ... , idn ) { instr0 ; ... ; instrm } :: :: fundef + +% Each function can have custom global state. In CDEF_startup and +% CDEF_finish all I_decl and I_init nodes are treated as global and no +% nested-instructions (if/block) are allowed. | startup id { instr0 ; ... ; instrn } :: :: startup |
