From 40625c16f7573398252ccf040ef49d398d64d5bd Mon Sep 17 00:00:00 2001 From: Alasdair Date: Thu, 13 Aug 2020 11:27:31 +0100 Subject: Preserve file structure through initial check Insert $file_start and $file_end pragmas in the AST, as well as $include_start and $include_end pragmas so we can reconstruct the original file structure later if needed, provided nothing like topological sorting has been done. Have the Lexer produce a list of comments whenever it parses a file, which can the be attached to the nearest nodes in the abstract syntax tree. --- src/ast_util.ml | 54 ++++++++++++++++++++++++++++++++++++++++++++++++ src/ast_util.mli | 4 ++++ src/initial_check.ml | 25 ++++++++++++++++------ src/initial_check.mli | 2 +- src/isail.ml | 2 +- src/lexer.mll | 37 ++++++++++++++++++++------------- src/parse_ast.ml | 2 +- src/parser.mly | 12 ++++------- src/pretty_print_sail.ml | 6 +++++- src/process_file.ml | 44 +++++++++++++++++++++++---------------- src/process_file.mli | 4 ++-- src/splice.ml | 4 ++-- 12 files changed, 142 insertions(+), 54 deletions(-) diff --git a/src/ast_util.ml b/src/ast_util.ml index d0af5da3..3123d447 100644 --- a/src/ast_util.ml +++ b/src/ast_util.ml @@ -2228,3 +2228,57 @@ let rec simple_string_of_loc = function | Parse_ast.Generated l -> "Generated(" ^ simple_string_of_loc l ^ ")" | Parse_ast.Range (lx1,lx2) -> "Range(" ^ string_of_lx lx1 ^ "->" ^ string_of_lx lx2 ^ ")" | Parse_ast.Documented (_,l) -> "Documented(_," ^ simple_string_of_loc l ^ ")" + +let attach_comments comments defs = + let open Lexing in + let module IntMap = Map.Make(struct type t = int let compare = compare end) in + let high_scores = ref IntMap.empty in + let uid = ref (-1) in + + let comment_text (Lexer.Comment (_, _, _, text)) = text in + + let loc_distance p1 p2 = + abs (p1.pos_lnum - p2.pos_lnum) * 1000 + abs (p1.pos_cnum - p2.pos_cnum) in + + let comment_distance (Lexer.Comment (_, c1, c2, _)) l = + match Reporting.simp_loc l with + | None -> None + | Some (p1, p2) -> Some (min (loc_distance c1 p2) (loc_distance c2 p1)) in + + let loc_width l = + match Reporting.simp_loc l with + | None -> -1 + | Some (p1, p2) -> p1.pos_cnum - p2.pos_cnum in + + let score_annot l n comment = + incr uid; + begin match comment_distance comment l with + | Some dist -> + begin match IntMap.find_opt n !high_scores with + | Some (best_dist, best_l, _) -> + if dist < best_dist || (dist = best_dist && loc_width l > loc_width best_l) then + high_scores := IntMap.add n (dist, l, !uid) !high_scores + else + () + | None -> + high_scores := IntMap.add n (dist, l, !uid) !high_scores + end + | None -> () + end; + !uid + in + + let defs = List.map (map_def_annot (fun (l, annot) -> (l, (List.mapi (score_annot l) comments, annot)))) defs in + + let attach_comment (l, (uids, annot)) = + let l = + IntMap.fold (fun n (_, _, uid) l -> + if List.mem uid uids then + Parse_ast.Documented (List.nth comments n |> comment_text, l) + else + l + ) !high_scores l + in + (l, annot) + in + List.map (map_def_annot attach_comment) defs diff --git a/src/ast_util.mli b/src/ast_util.mli index 1449a390..6ac29123 100644 --- a/src/ast_util.mli +++ b/src/ast_util.mli @@ -527,3 +527,7 @@ val quant_item_subst_kid : kid -> kid -> quant_item -> quant_item val typquant_subst_kid : kid -> kid -> typquant -> typquant val simple_string_of_loc : Parse_ast.l -> string + +(** Attach comments produced by the lexer into their nearest nodes in + the abstract syntax tree *) +val attach_comments : Lexer.comment list -> 'a def list -> 'a def list diff --git a/src/initial_check.ml b/src/initial_check.ml index f808af49..828d1b5e 100644 --- a/src/initial_check.ml +++ b/src/initial_check.ml @@ -828,12 +828,25 @@ let rec remove_mutrec = function | def :: defs -> def :: remove_mutrec defs -let to_ast ctx (P.Defs(defs)) = - let defs = remove_mutrec defs in +let to_ast ctx (P.Defs files) = + let to_ast_defs ctx (_, defs) = + let defs = remove_mutrec defs in + let defs, ctx = + List.fold_left (fun (defs, ctx) def -> let def, ctx = to_ast_def ctx def in (def @ defs, ctx)) ([], ctx) defs + in + List.rev defs, ctx + in + let wrap_file file defs = + [DEF_pragma ("file_start", file, P.Unknown)] + @ defs + @ [DEF_pragma ("file_end", file, P.Unknown)] + in let defs, ctx = - List.fold_left (fun (defs, ctx) def -> let def, ctx = to_ast_def ctx def in (def @ defs, ctx)) ([], ctx) defs + List.fold_left (fun (defs, ctx) file -> + let defs', ctx = to_ast_defs ctx file in (defs @ wrap_file (fst file) defs', ctx) + ) ([], ctx) files in - Defs (List.rev defs), ctx + Defs defs, ctx let initial_ctx = { type_constructors = @@ -1107,8 +1120,8 @@ let process_ast ?generate:(generate=true) defs = let ast_of_def_string_with f str = let def = Parser.def_eof Lexer.token (Lexing.from_string str) in - process_ast (f (P.Defs [def])) + process_ast (P.Defs [("", f [def])]) let ast_of_def_string str = let def = Parser.def_eof Lexer.token (Lexing.from_string str) in - process_ast (P.Defs [def]) + process_ast (P.Defs [("", [def])]) diff --git a/src/initial_check.mli b/src/initial_check.mli index 59c8f0b6..5a352eee 100644 --- a/src/initial_check.mli +++ b/src/initial_check.mli @@ -103,7 +103,7 @@ val process_ast : ?generate:bool -> Parse_ast.defs -> unit defs val extern_of_string : id -> string -> unit def val val_spec_of_string : id -> string -> unit def val ast_of_def_string : string -> unit defs -val ast_of_def_string_with : (Parse_ast.defs -> Parse_ast.defs) -> string -> unit defs +val ast_of_def_string_with : (Parse_ast.def list -> Parse_ast.def list) -> string -> unit defs val exp_of_string : string -> unit exp val typ_of_string : string -> typ val constraint_of_string : string -> n_constraint diff --git a/src/isail.ml b/src/isail.ml index dc7858c9..e7864751 100644 --- a/src/isail.ml +++ b/src/isail.ml @@ -597,7 +597,7 @@ let handle_input' input = | _ -> print_endline "Invalid arguments for :let" end | ":def" -> - let ast = Initial_check.ast_of_def_string_with (Process_file.preprocess_ast options) arg in + let ast = Initial_check.ast_of_def_string_with (Process_file.preprocess options) arg in let ast, env = Type_check.check !Interactive.env ast in Interactive.ast := append_ast !Interactive.ast ast; Interactive.env := env; diff --git a/src/lexer.mll b/src/lexer.mll index 40f4b470..d2901bdb 100644 --- a/src/lexer.mll +++ b/src/lexer.mll @@ -186,7 +186,12 @@ let kw_table = ("internal_return", (fun _ -> InternalReturn)); ] +type comment_type = Comment_block | Comment_line +type comment = + | Comment of comment_type * Lexing.position * Lexing.position * string + +let comments = ref [] } @@ -256,8 +261,8 @@ rule token = parse | "=>" { EqGt(r "=>") } | "<=" { (LtEq(r"<=")) } | "/*!" { Doc (doc_comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) 0 lexbuf) } - | "//" { line_comment (Lexing.lexeme_start_p lexbuf) lexbuf; token lexbuf } - | "/*" { comment (Lexing.lexeme_start_p lexbuf) 0 lexbuf; token lexbuf } + | "//" { line_comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf; token lexbuf } + | "/*" { comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) 0 lexbuf; token lexbuf } | "*/" { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) } | "$" (ident+ as i) (lchar* as f) "\n" { Lexing.new_line lexbuf; Pragma(i, String.trim f) } @@ -297,9 +302,10 @@ rule token = parse Printf.sprintf "Unexpected character: %c" c, Lexing.lexeme_start_p lexbuf)) } -and line_comment pos = parse - | "\n" { Lexing.new_line lexbuf; () } - | _ { line_comment pos lexbuf } +and line_comment pos b = parse + | "\n" { Lexing.new_line lexbuf; + comments := Comment (Comment_line, pos, Lexing.lexeme_end_p lexbuf, Buffer.contents b) :: !comments } + | _ as c { Buffer.add_string b (String.make 1 c); line_comment pos b lexbuf } | eof { raise (LexError("File ended before newline in comment", pos)) } and doc_comment pos b depth = parse @@ -312,16 +318,19 @@ and doc_comment pos b depth = parse | "\n" { Buffer.add_string b "\n"; Lexing.new_line lexbuf; doc_comment pos b depth lexbuf } | _ as c { Buffer.add_string b (String.make 1 c); doc_comment pos b depth lexbuf } -and comment pos depth = parse - | "/*" { comment pos (depth+1) lexbuf } - | "*/" { if depth = 0 then () - else if depth > 0 then comment pos (depth-1) lexbuf - else assert false } - | "\n" { Lexing.new_line lexbuf; - comment pos depth lexbuf } +and comment pos b depth = parse + | "/*" { comment pos b (depth + 1) lexbuf } + | "*/" { if depth = 0 then ( + comments := Comment (Comment_block, pos, Lexing.lexeme_end_p lexbuf, Buffer.contents b) :: !comments + ) else ( + comment pos b (depth-1) lexbuf + ) } + | "\n" { Buffer.add_string b "\n"; + Lexing.new_line lexbuf; + comment pos b depth lexbuf } | '"' { ignore(string (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf); - comment pos depth lexbuf } - | _ { comment pos depth lexbuf } + comment pos b depth lexbuf } + | _ as c { Buffer.add_string b (String.make 1 c); comment pos b depth lexbuf } | eof { raise (LexError("Unbalanced comment", pos)) } and string pos b = parse diff --git a/src/parse_ast.ml b/src/parse_ast.ml index 6cb3f84d..c7b01ba5 100644 --- a/src/parse_ast.ml +++ b/src/parse_ast.ml @@ -531,4 +531,4 @@ and lexp = type defs = (* Definition sequence *) - Defs of (def) list + Defs of (string * def list) list diff --git a/src/parser.mly b/src/parser.mly index 70f6f225..6c90e463 100644 --- a/src/parser.mly +++ b/src/parser.mly @@ -224,7 +224,7 @@ let rec desugar_rchain chain s e = %type typ_eof %type exp_eof %type def_eof -%type file +%type file %% @@ -1480,18 +1480,14 @@ defs_list: | def { [$1] } | def defs_list - { $1::$2 } + { $1 :: $2 } def_eof: | def Eof { $1 } -defs: - | defs_list - { (Defs $1) } - file: - | defs Eof + | defs_list Eof { $1 } | Eof - { (Defs []) } + { [] } diff --git a/src/pretty_print_sail.ml b/src/pretty_print_sail.ml index 7e98f4e3..10a15245 100644 --- a/src/pretty_print_sail.ml +++ b/src/pretty_print_sail.ml @@ -717,6 +717,10 @@ let rec doc_scattered (SD_aux (sd_aux, _)) = | SD_unioncl (id, tu) -> separate space [string "union clause"; doc_id id; equals; doc_union tu] +let doc_filter = function + | DEF_pragma ("file_start", _, _) | DEF_pragma ("file_end", _, _) -> false + | _ -> true + let rec doc_def def = group (match def with | DEF_default df -> doc_default df | DEF_spec v_spec -> doc_spec v_spec @@ -744,7 +748,7 @@ let rec doc_def def = group (match def with ) ^^ hardline let doc_defs (Defs(defs)) = - separate_map hardline doc_def defs + separate_map hardline doc_def (List.filter doc_filter defs) let pp_defs f d = ToChannel.pretty 1. 80 f (doc_defs d) diff --git a/src/process_file.ml b/src/process_file.ml index 74b4d05b..d1325e00 100644 --- a/src/process_file.ml +++ b/src/process_file.ml @@ -72,13 +72,15 @@ let get_lexbuf f = Lexing.pos_cnum = 0; }; lexbuf, in_chan -let parse_file ?loc:(l=Parse_ast.Unknown) (f : string) : Parse_ast.defs = +let parse_file ?loc:(l=Parse_ast.Unknown) (f : string) : (Lexer.comment list * Parse_ast.def list) = try let lexbuf, in_chan = get_lexbuf f in begin try - let ast = Parser.file Lexer.token lexbuf in - close_in in_chan; ast + Lexer.comments := []; + let defs = Parser.file Lexer.token lexbuf in + close_in in_chan; + (!Lexer.comments, defs) with | Parser.Error -> let pos = Lexing.lexeme_start_p lexbuf in @@ -155,8 +157,15 @@ let all_pragmas = "property"; "counterexample"; "suppress_warnings"; + "include_start"; + "include_end" ] +let wrap_include l file defs = + [Parse_ast.DEF_pragma ("include_start", file, l)] + @ defs + @ [Parse_ast.DEF_pragma ("include_end", file, l)] + let rec preprocess opts = function | [] -> [] | Parse_ast.DEF_pragma ("define", symbol, _) :: defs -> @@ -197,9 +206,9 @@ let rec preprocess opts = function | _ -> failwith "Couldn't figure out relative path for $include. This really shouldn't ever happen." in let file = String.sub file 1 (len - 2) in - let (Parse_ast.Defs include_defs) = parse_file ~loc:l (Filename.concat relative file) in - let include_defs = preprocess opts include_defs in - include_defs @ preprocess opts defs + let include_file = Filename.concat relative file in + let include_defs = parse_file ~loc:l (Filename.concat relative file) |> snd |> preprocess opts in + wrap_include l include_file include_defs @ preprocess opts defs else if file.[0] = '<' && file.[len - 1] = '>' then let file = String.sub file 1 (len - 2) in let sail_dir = @@ -212,9 +221,8 @@ let rec preprocess opts = function (failwith ("Library directory " ^ share_dir ^ " does not exist. Make sure sail is installed or try setting environment variable SAIL_DIR so that I can find $include " ^ file)) in let file = Filename.concat sail_dir ("lib/" ^ file) in - let (Parse_ast.Defs include_defs) = parse_file ~loc:l file in - let include_defs = preprocess opts include_defs in - include_defs @ preprocess opts defs + let include_defs = parse_file ~loc:l file |> snd |> preprocess opts in + wrap_include l file include_defs @ preprocess opts defs else let help = "Make sure the filename is surrounded by quotes or angle brackets" in (Reporting.warn "" l ("Skipping bad $include " ^ file ^ ". " ^ help); preprocess opts defs) @@ -226,6 +234,12 @@ let rec preprocess opts = function end; preprocess opts defs + (* Filter file_start and file_end out of the AST so when we + round-trip files through the compiler we don't end up with + incorrect start/end annotations *) + | (Parse_ast.DEF_pragma ("file_start", _, _) | Parse_ast.DEF_pragma ("file_end", _, _)) :: defs -> + preprocess opts defs + | Parse_ast.DEF_pragma (p, arg, l) :: defs -> if not (StringSet.mem p all_pragmas) then Reporting.warn "" l ("Unrecognised directive: " ^ p); @@ -240,8 +254,6 @@ let rec preprocess opts = function | def :: defs -> def :: preprocess opts defs -let preprocess_ast opts (Parse_ast.Defs defs) = Parse_ast.Defs (preprocess opts defs) - let opt_just_check = ref false let opt_ddump_tc_ast = ref false let opt_ddump_rewrite_ast = ref None @@ -258,11 +270,7 @@ let load_files ?check:(check=false) options type_envs files = if !opt_memo_z3 then Constraint.load_digests () else (); let t = Profile.start () in - let parsed = List.map (fun f -> (f, parse_file f)) files in - let ast = - List.fold_right (fun (_, Parse_ast.Defs ast_nodes) (Parse_ast.Defs later_nodes) - -> Parse_ast.Defs (ast_nodes@later_nodes)) parsed (Parse_ast.Defs []) in - let ast = preprocess_ast options ast in + let ast = Parse_ast.Defs (List.map (fun f -> (f, parse_file f |> snd |> preprocess options)) files) in let ast = Initial_check.process_ast ~generate:(not check) ast in (* The separate loop measures declarations would be awkward to type check, so move them into the definitions beforehand. *) @@ -276,8 +284,8 @@ let load_files ?check:(check=false) options type_envs files = if !opt_memo_z3 then Constraint.save_digests () else (); let out_name = match !opt_file_out with - | None when parsed = [] -> "out.sail" - | None -> fst (List.hd parsed) + | None when files = [] -> "out.sail" + | None -> List.hd files | Some f -> f ^ ".sail" in (out_name, ast, type_envs) diff --git a/src/process_file.mli b/src/process_file.mli index c9eb5e9e..3b23b1bd 100644 --- a/src/process_file.mli +++ b/src/process_file.mli @@ -50,13 +50,13 @@ (** Parse a file. The optional loc argument is the location of the $include directive that is importing the file, if applicable. *) -val parse_file : ?loc:Parse_ast.l -> string -> Parse_ast.defs +val parse_file : ?loc:Parse_ast.l -> string -> Lexer.comment list * Parse_ast.def list val clear_symbols : unit -> unit val have_symbol : string -> bool val add_symbol : string -> unit -val preprocess_ast : (Arg.key * Arg.spec * Arg.doc) list -> Parse_ast.defs -> Parse_ast.defs +val preprocess : (Arg.key * Arg.spec * Arg.doc) list -> Parse_ast.def list -> Parse_ast.def list val check_ast : Type_check.Env.t -> unit Ast.defs -> Type_check.tannot Ast.defs * Type_check.Env.t val rewrite_ast_initial : Type_check.Env.t -> Type_check.tannot Ast.defs -> Type_check.tannot Ast.defs * Type_check.Env.t val rewrite_ast_target : string -> Type_check.Env.t -> Type_check.tannot Ast.defs -> Type_check.tannot Ast.defs * Type_check.Env.t diff --git a/src/splice.ml b/src/splice.ml index 90488c0a..8ebd5793 100644 --- a/src/splice.ml +++ b/src/splice.ml @@ -40,8 +40,8 @@ let filter_replacements spec_found (Defs defs) = in List.filter not_found defs let splice ast file = - let parsed_ast = Process_file.parse_file file in - let repl_ast = Initial_check.process_ast ~generate:false parsed_ast in + let parsed_ast = Process_file.parse_file file |> snd in + let repl_ast = Initial_check.process_ast ~generate:false (Parse_ast.Defs [(file, parsed_ast)]) in let repl_ast = Rewrites.move_loop_measures repl_ast in let repl_ast = map_defs_annot (fun (l,_) -> l,Type_check.empty_tannot) repl_ast in let repl_ids, repl_specs = scan_defs repl_ast in -- cgit v1.2.3