From 43612633c7df7a3c96ae463e402d9a0e2c6e121a Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Sun, 20 Sep 2020 20:21:38 +0100
Subject: Improve parsing of saildoc comments

These were only parsed for val specs and scattered clauses, but many
other constructs can be meaningfully documented. Moreover, attaching the
documentation to the SD_aux rather than the FCL_aux etc inside it is
unhelpful since the latter is what the LaTeX backend sees.

Instead, push the documentation down into the non-scattered entity
within the SD_aux (i.e. the FCL_aux / Tu_aux / MCL_aux) when possible,
only leaving it on the SD_aux when they are more like a val spec. This
means that the saildoc for scattered function clauses is now emitted,
without any changes needed to the LaTeX backend.

Also support saildoc on a wider variety of non-scattered constructs, and
slightly simplify aspects of the grammar whilst here.
---
 src/parser.mly | 108 ++++++++++++++++++++++++++++++++++++++-------------------
 1 file changed, 73 insertions(+), 35 deletions(-)

diff --git a/src/parser.mly b/src/parser.mly
index 6c90e463..538fe3e8 100644
--- a/src/parser.mly
+++ b/src/parser.mly
@@ -107,7 +107,6 @@ let mk_typschm_opt ts n m = TypSchm_opt_aux (
 let mk_typschm_opt_none = TypSchm_opt_aux (TypSchm_opt_none, Unknown)
 
 let mk_sd s n m = SD_aux (s, loc n m)
-let mk_sd_doc s str n m = SD_aux (s, Documented (str, loc n m))
 let mk_ir r n m = BF_aux (r, loc n m)
 
 let mk_funcl f n m = FCL_aux (f, loc n m)
@@ -124,7 +123,30 @@ let mk_forwards_mapcl mpexp exp n m = MCL_aux (MCL_forwards (mpexp, exp), loc n
 let mk_backwards_mapcl mpexp exp n m = MCL_aux (MCL_backwards (mpexp, exp), loc n m)
 let mk_map id tannot mapcls n m = MD_aux (MD_mapping (id, tannot, mapcls), loc n m)
 
-let doc_vs doc (VS_aux (v, l)) = VS_aux (v, Documented (doc, l))
+let doc_loc doc l =
+  match l with
+  | Documented _ -> l
+  | _            -> Documented (doc, l)
+
+let doc_funcl doc (FCL_aux (f, l)) = FCL_aux (f, doc_loc doc l)
+let doc_fun doc (FD_aux (fn, l)) = FD_aux (fn, doc_loc doc l)
+let doc_td doc (TD_aux (t, l)) = TD_aux (t, doc_loc doc l)
+let doc_vs doc (VS_aux (v, l)) = VS_aux (v, doc_loc doc l)
+let doc_reg_dec doc (DEC_aux (d, l)) = DEC_aux (d, doc_loc doc l)
+let doc_mapcl doc (MCL_aux (d, l)) = MCL_aux (d, doc_loc doc l)
+let doc_map doc (MD_aux (m, l)) = MD_aux (m, doc_loc doc l)
+let doc_tu doc (Tu_aux (tu, l)) = Tu_aux (tu, doc_loc doc l)
+
+let doc_sd doc (SD_aux (sd, l)) =
+  match sd with
+  | SD_funcl fcl -> SD_aux (SD_funcl (doc_funcl doc fcl), l)
+  | SD_unioncl (id, tu) -> SD_aux (SD_unioncl (id, doc_tu doc tu), l)
+  | SD_mapcl (id, mcl) -> SD_aux (SD_mapcl (id, doc_mapcl doc mcl), l)
+
+  | SD_function _
+  | SD_variant _
+  | SD_mapping _
+  | SD_end _ -> SD_aux (sd, doc_loc doc l)
 
 let qi_id_of_kopt (KOpt_aux (kopt_aux, l) as kopt) = QI_aux (QI_id kopt, l)
 
@@ -1132,11 +1154,17 @@ funcl:
   | id funcl_patexp
     { mk_funcl (FCL_Funcl ($1, $2)) $startpos $endpos }
 
+funcl_doc:
+  | Doc funcl_doc
+    { doc_funcl $1 $2 }
+  | funcl
+    { $1 }
+
 funcls2:
-  | id funcl_patexp
-    { [mk_funcl (FCL_Funcl ($1, $2)) $startpos $endpos] }
-  | id funcl_patexp And funcls2
-    { mk_funcl (FCL_Funcl ($1, $2)) $startpos $endpos($2) :: $4 }
+  | funcl_doc
+    { [$1] }
+  | funcl_doc And funcls2
+    { $1 :: $3 }
 
 funcls:
   | id funcl_patexp_typ
@@ -1188,6 +1216,8 @@ typaram:
     { mk_typq $2 [] $startpos $endpos }
 
 type_def:
+  | Doc type_def
+    { doc_td $1 $2 }
   | Typedef id typaram Eq typ
     { mk_td (TD_abbrev ($2, $3, K_aux (K_type, Parse_ast.Unknown), $5)) $startpos $endpos }
   | Typedef id Eq typ
@@ -1240,6 +1270,8 @@ struct_fields:
     { $1 :: $3 }
 
 type_union:
+  | Doc type_union
+    { doc_tu $1 $2 }
   | id Colon typ
     { Tu_aux (Tu_ty_id ($3, $1), loc $startpos $endpos) }
   | id Colon typ MinusGt typ
@@ -1260,10 +1292,10 @@ rec_measure:
     { mk_recr (Rec_measure ($2, $4)) $startpos $endpos }
 
 fun_def:
-  | Function_ funcls
-    { let funcls, tannot = $2 in mk_fun (FD_function (mk_recn, tannot, mk_eannotn, funcls)) $startpos $endpos }
-  | Function_ rec_measure funcls
-    { let funcls, tannot = $3 in mk_fun (FD_function ($2, tannot, mk_eannotn, funcls)) $startpos $endpos }
+  | Doc fun_def
+    { doc_fun $1 $2 }
+  | Function_ rec_measure? funcls
+    { let funcls, tannot = $3 in mk_fun (FD_function (default_opt mk_recn $2, tannot, mk_eannotn, funcls)) $startpos $endpos }
 
 fun_def_list:
   | fun_def
@@ -1344,13 +1376,21 @@ mapcl:
    * | exp LtMinus pat If_ exp
    *   { mk_backwards_mapcl (mk_pexp (Pat_when ($3, $5, $1)) $startpos $endpos) $startpos $endpos } *)
 
-mapcl_list:
+mapcl_doc:
+  | Doc mapcl_doc
+    { doc_mapcl $1 $2 }
   | mapcl
+    { $1 }
+
+mapcl_list:
+  | mapcl_doc
     { [$1] }
-  | mapcl Comma mapcl_list
+  | mapcl_doc Comma mapcl_list
     { $1 :: $3 }
 
 map_def:
+  | Doc map_def
+    { doc_map $1 $2 }
   | Mapping id Eq Lcurly mapcl_list Rcurly
     { mk_map $2 mk_typschm_opt_none $5 $startpos $endpos }
   | Mapping id Colon typschm Eq Lcurly mapcl_list Rcurly
@@ -1389,6 +1429,8 @@ val_spec_def:
     { mk_vs (VS_val_spec ($9, $3, $6, true)) $startpos $endpos }
 
 register_def:
+  | Doc register_def
+    { doc_reg_dec $1 $2 }
   | Register id Colon typ
     { let rreg = mk_typ (ATyp_set [mk_effect BE_rreg $startpos($1) $endpos($1)]) $startpos($1) $endpos($1) in
       let wreg = mk_typ (ATyp_set [mk_effect BE_wreg $startpos($1) $endpos($1)]) $startpos($1) $endpos($1) in
@@ -1405,22 +1447,26 @@ default_def:
     { mk_default (DT_order ($2, mk_typ ATyp_dec $startpos($3) $endpos)) $startpos $endpos }
 
 scattered_def:
-  | Union id typaram
-    { mk_sd (SD_variant($2, $3)) $startpos $endpos }
-  | Union id
-    { mk_sd (SD_variant($2, mk_typqn)) $startpos $endpos }
-  | Function_ id
-    { mk_sd (SD_function(mk_recn, mk_tannotn, mk_eannotn, $2)) $startpos $endpos }
-  | Mapping id
-    { mk_sd (SD_mapping ($2, mk_tannotn)) $startpos $endpos }
-  | Mapping id Colon funcl_typ
-    { mk_sd (SD_mapping ($2, $4)) $startpos $endpos }
-
-scattered_clause:
-  | Doc Function_ Clause funcl
-    { mk_sd_doc (SD_funcl $4) $1 $startpos($2) $endpos }
+  | Doc scattered_def
+    { doc_sd $1 $2 }
+  | Scattered Union id typaram
+    { mk_sd (SD_variant($3, $4)) $startpos $endpos }
+  | Scattered Union id
+    { mk_sd (SD_variant($3, mk_typqn)) $startpos $endpos }
+  | Scattered Function_ id
+    { mk_sd (SD_function(mk_recn, mk_tannotn, mk_eannotn, $3)) $startpos $endpos }
+  | Scattered Mapping id
+    { mk_sd (SD_mapping ($3, mk_tannotn)) $startpos $endpos }
+  | Scattered Mapping id Colon funcl_typ
+    { mk_sd (SD_mapping ($3, $5)) $startpos $endpos }
   | Function_ Clause funcl
     { mk_sd (SD_funcl $3) $startpos $endpos }
+  | Union Clause id Eq type_union
+    { mk_sd (SD_unioncl ($3, $5)) $startpos $endpos }
+  | Mapping Clause id Eq mapcl
+    { mk_sd (SD_mapcl ($3, $5)) $startpos $endpos }
+  | End id
+    { mk_sd (SD_end $2) $startpos $endpos }
 
 loop_measure:
   | Until exp
@@ -1455,16 +1501,8 @@ def:
     { DEF_overload ($2, $5) }
   | Overload id Eq enum_bar
     { DEF_overload ($2, $4) }
-  | Scattered scattered_def
-    { DEF_scattered $2 }
-  | scattered_clause
+  | scattered_def
     { DEF_scattered $1 }
-  | Union Clause id Eq type_union
-    { DEF_scattered (mk_sd (SD_unioncl ($3, $5)) $startpos $endpos) }
-  | Mapping Clause id Eq mapcl
-    { DEF_scattered (mk_sd (SD_mapcl ($3, $5)) $startpos $endpos) }
-  | End id
-    { DEF_scattered (mk_sd (SD_end $2) $startpos $endpos) }
   | default_def
     { DEF_default $1 }
   | Mutual Lcurly fun_def_list Rcurly
-- 
cgit v1.2.3


From 1c14f5e373059fb00fce46f92ec75522f562aca1 Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Mon, 21 Sep 2020 03:13:19 +0100
Subject: Strip leading *'s from saildoc

We now parse

/*!
 * Paragraph
 */

and

/*!
 *Paragraph
 */

the same as

/*!
Paragraph
*/

since the first form is prettier, and similar to what Doxygen, Javadoc
and other such tools allow. This can cause mild confusion, as if the
start of a line in the final form happens to have a * then it will
unexpectedly remove it, but this is a problem shared by those tools too
and the intent is that everyone just use the first form and never need
to worry about it.
---
 src/lexer.mll | 24 +++++++++++++++---------
 1 file changed, 15 insertions(+), 9 deletions(-)

diff --git a/src/lexer.mll b/src/lexer.mll
index d2901bdb..153467c1 100644
--- a/src/lexer.mll
+++ b/src/lexer.mll
@@ -195,7 +195,8 @@ let comments = ref []
 
 }
 
-let ws = [' ''\t']+
+let wsc = [' ''\t']
+let ws = wsc+
 let letter = ['a'-'z''A'-'Z''?']
 let digit = ['0'-'9']
 let binarydigit = ['0'-'1''_']
@@ -260,7 +261,7 @@ rule token = parse
   | "<-"                                { LtMinus }
   | "=>"                                { EqGt(r "=>") }
   | "<="				{ (LtEq(r"<=")) }
-  | "/*!"       { Doc (doc_comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) 0 lexbuf) }
+  | "/*!" wsc*  { Doc (doc_comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) 0 false lexbuf) }
   | "//"        { line_comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) lexbuf; token lexbuf }
   | "/*"        { comment (Lexing.lexeme_start_p lexbuf) (Buffer.create 10) 0 lexbuf; token lexbuf }
   | "*/"        { raise (LexError("Unbalanced comment", Lexing.lexeme_start_p lexbuf)) }
@@ -308,15 +309,20 @@ and line_comment pos b = parse
   | _ as c                              { Buffer.add_string b (String.make 1 c); line_comment pos b lexbuf }
   | eof                                 { raise (LexError("File ended before newline in comment", pos)) }
 
-and doc_comment pos b depth = parse
-  | "/*!"                               { doc_comment pos b  (depth + 1) lexbuf }
-  | "/*"                                { doc_comment pos b (depth + 1) lexbuf }
-  | "*/"                                { if depth = 0 then Buffer.contents b
-					  else if depth > 0 then doc_comment pos b (depth - 1) lexbuf
+and doc_comment pos b depth lstart = parse
+  | "/*!"                               { doc_comment pos b (depth + 1) false lexbuf }
+  | "/*"                                { doc_comment pos b (depth + 1) false lexbuf }
+  | wsc* "*/"                           { if depth = 0 then Buffer.contents b
+					  else if depth > 0 then doc_comment pos b (depth - 1) false lexbuf
 					  else assert false }
   | eof                                 { raise (LexError("Unbalanced comment", pos)) }
-  | "\n"                                { Buffer.add_string b "\n"; Lexing.new_line lexbuf; doc_comment pos b depth lexbuf }
-  | _ as c                              { Buffer.add_string b (String.make 1 c); doc_comment pos b depth lexbuf }
+  | "\n"                                { Buffer.add_string b "\n"; Lexing.new_line lexbuf; doc_comment pos b depth true lexbuf }
+  | wsc* "*" wsc? as prefix             { if lstart then (
+                                            doc_comment pos b depth false lexbuf
+                                          ) else (
+                                            Buffer.add_string b prefix; doc_comment pos b depth false lexbuf
+                                          ) }
+  | _ as c                              { Buffer.add_string b (String.make 1 c); doc_comment pos b depth false lexbuf }
 
 and comment pos b depth = parse
   | "/*"                                { comment pos b (depth + 1) lexbuf }
-- 
cgit v1.2.3


From 3bbf63a126c48ab9368c0754ff32f1661b184843 Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Mon, 21 Sep 2020 03:42:58 +0100
Subject: latex: Dump out S-Expr of any unhandled markdown

Printing the text is only so helpful; the most important thing to know
is what kind of element it actually is, which is lost when extracting
the text. Instead, print the whole S-Expr.
---
 src/latex.ml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/latex.ml b/src/latex.ml
index 9f5979c1..d521a108 100644
--- a/src/latex.ml
+++ b/src/latex.ml
@@ -260,7 +260,7 @@ let latex_of_markdown str =
        ^ "\n\\end{itemize}"
     | Br -> "\n"
     | NL -> "\n"
-    | elem -> failwith ("Can't convert to latex: " ^ to_text [elem])
+    | elem -> failwith ("Can't convert to latex: " ^ Omd_backend.sexpr_of_md [elem])
 
   and format elems =
     String.concat "" (List.map format_elem elems)
-- 
cgit v1.2.3


From 41e3f0dcb15266aa8423486696fcf3326ff08ae2 Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Mon, 21 Sep 2020 03:50:24 +0100
Subject: latex: Handle Ulp, Ol and Olp markdown constructs

---
 src/latex.ml | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/src/latex.ml b/src/latex.ml
index d521a108..128d0ec1 100644
--- a/src/latex.ml
+++ b/src/latex.ml
@@ -254,10 +254,14 @@ let latex_of_markdown str =
        output_string chan code;
        close_out chan;
        sprintf "\\lstinputlisting[language=%s]{%s/block%s.%s}" lang !opt_directory uid lang
-    | Ul list ->
+    | (Ul list | Ulp list) ->
        "\\begin{itemize}\n\\item "
        ^ Util.string_of_list "\n\\item " format list
        ^ "\n\\end{itemize}"
+    | (Ol list | Olp list) ->
+       "\\begin{enumerate}\n\\item "
+       ^ Util.string_of_list "\n\\item " format list
+       ^ "\n\\end{enumerate}"
     | Br -> "\n"
     | NL -> "\n"
     | elem -> failwith ("Can't convert to latex: " ^ Omd_backend.sexpr_of_md [elem])
-- 
cgit v1.2.3


From bd2e3a46c298367894b7ec145810ef4521ff808b Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Wed, 23 Sep 2020 14:35:23 +0100
Subject: doc: Fix RISC-V X overload

This seems to be the current name for the overload.
---
 doc/riscv.tex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/doc/riscv.tex b/doc/riscv.tex
index ca2b9dfe..5c64b947 100644
--- a/doc/riscv.tex
+++ b/doc/riscv.tex
@@ -56,7 +56,7 @@ register Xs : vector(32, dec, xlenbits)
 
 \sailval{wX}
 \sailfn{wX}
-\sailoverloadUUX
+\sailoverloadXXX
 
 We also give a function \ll{MEMr} for reading memory, this function
 just points at a builtin we have defined elsewhere. Note that
-- 
cgit v1.2.3


From f702b004483720d0b3d135238779d68cf013aef0 Mon Sep 17 00:00:00 2001
From: Jessica Clarke
Date: Wed, 23 Sep 2020 14:35:56 +0100
Subject: doc: Fix building with pandoc 2.0 and above

---
 doc/manual.tex | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/doc/manual.tex b/doc/manual.tex
index f3578784..e6b88231 100644
--- a/doc/manual.tex
+++ b/doc/manual.tex
@@ -75,6 +75,9 @@
 %% \renewcommand{\ottrulehead}[3]{\multicolumn{9}{l}{$#1$\quad $#2$}}%
 %% \renewcommand{\ottprodline}[6]{\multicolumn{9}{l}{\quad$#1$\quad $#2$}}%
 
+% Pandoc 2.0 and above wrap lstinline with a dummy passthrough command for escaping purposes
+\newcommand{\passthrough}[1]{#1}
+
 \begin{document}
 
 \input{code_riscv}
-- 
cgit v1.2.3