diff options
Diffstat (limited to 'checker/analyze.ml')
| -rw-r--r-- | checker/analyze.ml | 458 |
1 files changed, 458 insertions, 0 deletions
diff --git a/checker/analyze.ml b/checker/analyze.ml new file mode 100644 index 0000000000..77e70318dd --- /dev/null +++ b/checker/analyze.ml @@ -0,0 +1,458 @@ +(** Headers *) + +let prefix_small_block = 0x80 +let prefix_small_int = 0x40 +let prefix_small_string = 0x20 + +[@@@ocaml.warning "-32"] +let code_int8 = 0x00 +let code_int16 = 0x01 +let code_int32 = 0x02 +let code_int64 = 0x03 +let code_shared8 = 0x04 +let code_shared16 = 0x05 +let code_shared32 = 0x06 +let code_double_array32_little = 0x07 +let code_block32 = 0x08 +let code_string8 = 0x09 +let code_string32 = 0x0A +let code_double_big = 0x0B +let code_double_little = 0x0C +let code_double_array8_big = 0x0D +let code_double_array8_little = 0x0E +let code_double_array32_big = 0x0F +let code_codepointer = 0x10 +let code_infixpointer = 0x11 +let code_custom = 0x12 +let code_block64 = 0x13 + +[@@@ocaml.warning "-37"] +type code_descr = +| CODE_INT8 +| CODE_INT16 +| CODE_INT32 +| CODE_INT64 +| CODE_SHARED8 +| CODE_SHARED16 +| CODE_SHARED32 +| CODE_DOUBLE_ARRAY32_LITTLE +| CODE_BLOCK32 +| CODE_STRING8 +| CODE_STRING32 +| CODE_DOUBLE_BIG +| CODE_DOUBLE_LITTLE +| CODE_DOUBLE_ARRAY8_BIG +| CODE_DOUBLE_ARRAY8_LITTLE +| CODE_DOUBLE_ARRAY32_BIG +| CODE_CODEPOINTER +| CODE_INFIXPOINTER +| CODE_CUSTOM +| CODE_BLOCK64 + +let code_max = 0x13 + +let magic_number = "\132\149\166\190" + +(** Memory reification *) + +module LargeArray : +sig + type 'a t + val empty : 'a t + val length : 'a t -> int + val make : int -> 'a -> 'a t + val get : 'a t -> int -> 'a + val set : 'a t -> int -> 'a -> unit +end = +struct + + let max_length = Sys.max_array_length + + type 'a t = 'a array array * 'a array + (** Invariants: + - All subarrays of the left array have length [max_length]. + - The right array has length < [max_length]. + *) + + let empty = [||], [||] + + let length (vl, vr) = + (max_length * Array.length vl) + Array.length vr + + let make n x = + let k = n / max_length in + let r = n mod max_length in + let vl = Array.init k (fun _ -> Array.make max_length x) in + let vr = Array.make r x in + (vl, vr) + + let get (vl, vr) n = + let k = n / max_length in + let r = n mod max_length in + let len = Array.length vl in + if k < len then vl.(k).(r) + else if k == len then vr.(r) + else invalid_arg "index out of bounds" + + let set (vl, vr) n x = + let k = n / max_length in + let r = n mod max_length in + let len = Array.length vl in + if k < len then vl.(k).(r) <- x + else if k == len then vr.(r) <- x + else invalid_arg "index out of bounds" + +end + +type repr = +| RInt of int +| RInt63 of Uint63.t +| RBlock of (int * int) (* tag × len *) +| RString of string +| RPointer of int +| RCode of int + +type data = +| Int of int (* value *) +| Ptr of int (* pointer *) +| Atm of int (* tag *) +| Fun of int (* address *) + +type obj = +| Struct of int * data array (* tag × data *) +| Int63 of Uint63.t (* Primitive integer *) +| String of string + +module type Input = +sig + type t + val input_byte : t -> int + val input_binary_int : t -> int +end + +module type S = +sig + type input + val parse : input -> (data * obj LargeArray.t) +end + +module Make(M : Input) = +struct + +open M + +type input = M.t + +let current_offset = ref 0 + +let input_byte chan = + let () = incr current_offset in + input_byte chan + +let input_binary_int chan = + let () = current_offset := !current_offset + 4 in + input_binary_int chan + +let input_char chan = Char.chr (input_byte chan) +let input_string len chan = String.init len (fun _ -> input_char chan) + +let parse_header chan = + let () = current_offset := 0 in + let magic = input_string 4 chan in + let length = input_binary_int chan in + let objects = input_binary_int chan in + let size32 = input_binary_int chan in + let size64 = input_binary_int chan in + (magic, length, size32, size64, objects) + +let input_int8s chan = + let i = input_byte chan in + if i land 0x80 = 0 + then i + else i lor ((-1) lsl 8) + +let input_int8u = input_byte + +let input_int16s chan = + let i = input_byte chan in + let j = input_byte chan in + let ans = (i lsl 8) lor j in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 16) + +let input_int16u chan = + let i = input_byte chan in + let j = input_byte chan in + (i lsl 8) lor j + +let input_int32s chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let ans = (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 31) + +let input_int32u chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l + +let input_int64s chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + let ans = + (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor + (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor p + in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 63) + +let input_int64u chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor + (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor p + +let input_header32 chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let tag = l in + let len = (i lsl 14) lor (j lsl 6) lor (k lsr 2) in + (tag, len) + +let input_header64 chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + let tag = p in + let len = + (i lsl 46) lor (j lsl 38) lor (k lsl 30) lor (l lsl 22) lor + (m lsl 14) lor (n lsl 6) lor (o lsr 2) + in + (tag, len) + +let input_cstring chan : string = + let buff = Buffer.create 17 in + let rec loop () = + match input_char chan with + | '\o000' -> Buffer.contents buff + | c -> Buffer.add_char buff c |> loop + in loop () + +let input_intL chan : int64 = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let m = input_byte chan in + let n = input_byte chan in + let o = input_byte chan in + let p = input_byte chan in + let ( lsl ) x y = Int64.(shift_left (of_int x) y) in + let ( lor ) = Int64.logor in + (i lsl 56) lor (j lsl 48) lor (k lsl 40) lor (l lsl 32) lor + (m lsl 24) lor (n lsl 16) lor (o lsl 8) lor (Int64.of_int p) + +let parse_object chan = + let data = input_byte chan in + if prefix_small_block <= data then + let tag = data land 0x0F in + let len = (data lsr 4) land 0x07 in + RBlock (tag, len) + else if prefix_small_int <= data then + RInt (data land 0x3F) + else if prefix_small_string <= data then + let len = data land 0x1F in + RString (input_string len chan) + else if data > code_max then + assert false + else match (Obj.magic data) with + | CODE_INT8 -> + RInt (input_int8s chan) + | CODE_INT16 -> + RInt (input_int16s chan) + | CODE_INT32 -> + RInt (input_int32s chan) + | CODE_INT64 -> + RInt (input_int64s chan) + | CODE_SHARED8 -> + RPointer (input_int8u chan) + | CODE_SHARED16 -> + RPointer (input_int16u chan) + | CODE_SHARED32 -> + RPointer (input_int32u chan) + | CODE_BLOCK32 -> + RBlock (input_header32 chan) + | CODE_BLOCK64 -> + RBlock (input_header64 chan) + | CODE_STRING8 -> + let len = input_int8u chan in + RString (input_string len chan) + | CODE_STRING32 -> + let len = input_int32u chan in + RString (input_string len chan) + | CODE_CODEPOINTER -> + let addr = input_int32u chan in + for _i = 0 to 15 do ignore (input_byte chan); done; + RCode addr + | CODE_CUSTOM -> + begin match input_cstring chan with + | "_j" -> RInt63 (Uint63.of_int64 (input_intL chan)) + | s -> Printf.eprintf "Unhandled custom code: %s" s; assert false + end + | CODE_DOUBLE_ARRAY32_LITTLE + | CODE_DOUBLE_BIG + | CODE_DOUBLE_LITTLE + | CODE_DOUBLE_ARRAY8_BIG + | CODE_DOUBLE_ARRAY8_LITTLE + | CODE_DOUBLE_ARRAY32_BIG + | CODE_INFIXPOINTER + -> Printf.eprintf "Unhandled code %04x\n%!" data; assert false + +let parse chan = + let (magic, len, _, _, size) = parse_header chan in + let () = assert (magic = magic_number) in + let memory = LargeArray.make size (Struct ((-1), [||])) in + let current_object = ref 0 in + let fill_obj = function + | RPointer n -> + let data = Ptr (!current_object - n) in + data, None + | RInt n -> + let data = Int n in + data, None + | RString s -> + let data = Ptr !current_object in + let () = LargeArray.set memory !current_object (String s) in + let () = incr current_object in + data, None + | RBlock (tag, 0) -> + (* Atoms are never shared *) + let data = Atm tag in + data, None + | RBlock (tag, len) -> + let data = Ptr !current_object in + let nblock = Array.make len (Atm (-1)) in + let () = LargeArray.set memory !current_object (Struct (tag, nblock)) in + let () = incr current_object in + data, Some nblock + | RCode addr -> + let data = Fun addr in + data, None + | RInt63 i -> + let data = Ptr !current_object in + let () = LargeArray.set memory !current_object (Int63 i) in + let () = incr current_object in + data, None + in + + let rec fill block off accu = + if Array.length block = off then + match accu with + | [] -> () + | (block, off) :: accu -> fill block off accu + else + let data, nobj = fill_obj (parse_object chan) in + let () = block.(off) <- data in + let block, off, accu = match nobj with + | None -> block, succ off, accu + | Some nblock -> nblock, 0, ((block, succ off) :: accu) + in + fill block off accu + in + let ans = [|Atm (-1)|] in + let () = fill ans 0 [] in + (ans.(0), memory) + +end + +module IChannel = +struct + type t = in_channel + let input_byte = Pervasives.input_byte + let input_binary_int = Pervasives.input_binary_int +end + +module IString = +struct + type t = (string * int ref) + + let input_byte (s, off) = + let ans = Char.code (s.[!off]) in + let () = incr off in + ans + + let input_binary_int chan = + let i = input_byte chan in + let j = input_byte chan in + let k = input_byte chan in + let l = input_byte chan in + let ans = (i lsl 24) lor (j lsl 16) lor (k lsl 8) lor l in + if i land 0x80 = 0 + then ans + else ans lor ((-1) lsl 31) + +end + +module PChannel = Make(IChannel) +module PString = Make(IString) + +let parse_channel = PChannel.parse +let parse_string s = PString.parse (s, ref 0) + +let instantiate (p, mem) = + let len = LargeArray.length mem in + let ans = LargeArray.make len (Obj.repr 0) in + (* First pass: initialize the subobjects *) + for i = 0 to len - 1 do + let obj = match LargeArray.get mem i with + | Struct (tag, blk) -> Obj.new_block tag (Array.length blk) + | Int63 i -> Obj.repr i + | String str -> Obj.repr str + in + LargeArray.set ans i obj + done; + let get_data = function + | Int n -> Obj.repr n + | Ptr p -> LargeArray.get ans p + | Atm tag -> Obj.new_block tag 0 + | Fun _ -> assert false (* We shouldn't serialize closures *) + in + (* Second pass: set the pointers *) + for i = 0 to len - 1 do + match LargeArray.get mem i with + | Struct (_, blk) -> + let obj = LargeArray.get ans i in + for k = 0 to Array.length blk - 1 do + Obj.set_field obj k (get_data blk.(k)) + done + | Int63 _ + | String _ -> () + done; + get_data p |
