diff options
Diffstat (limited to 'lib/ocaml_rts/linksem/memory_image.ml')
| -rw-r--r-- | lib/ocaml_rts/linksem/memory_image.ml | 839 |
1 files changed, 839 insertions, 0 deletions
diff --git a/lib/ocaml_rts/linksem/memory_image.ml b/lib/ocaml_rts/linksem/memory_image.ml new file mode 100644 index 00000000..fa9d1535 --- /dev/null +++ b/lib/ocaml_rts/linksem/memory_image.ml @@ -0,0 +1,839 @@ +(*Generated by Lem from memory_image.lem.*) +open Lem_basic_classes +open Lem_function +open Lem_string +open Lem_tuple +open Lem_bool +open Lem_list +open Lem_sorting +open Lem_map +(*import Map_extra*) +open Lem_set +open Lem_set_extra +open Multimap +open Lem_num +open Lem_maybe +open Lem_assert_extra +open Show + +open Byte_sequence +open Elf_file +open Elf_header +open Elf_interpreted_segment +open Elf_interpreted_section +open Elf_program_header_table +open Elf_section_header_table +open Elf_symbol_table +open Elf_types_native_uint +open Elf_relocation + +open Missing_pervasives + +(* Now we can define memory images *) + +type byte_pattern_element = char option +type byte_pattern = byte_pattern_element list + +(* An element might have an address/offset, and it has some contents. *) +type element = { startpos : Nat_big_num.num option + ; length1 : Nat_big_num.num option + ; contents : byte_pattern + } + +(* HMM -- ideally I want to fold these into the memory image notion + * and the startpos thingy. *) +type allocated_symbols_map = (string, (Nat_big_num.num * Nat_big_num.num)) Pmap.map (* start, length *) + +(* Instead of modelling address calculations (in linker scripts) like so: + +type address_expr = natural -> allocated_symbols_map -> natural + ( pos -> environment -> result address ) + + ... we model it as expressions in terms of CursorPosition. HMM. +*) + +type expr_operand = Var of string + | CursorPosition (* only valid in certain expressions... HMM *) + | Constant of Nat_big_num.num + | UnOp of (expr_unary_operation * expr_operand) + | BinOp of (expr_binary_operation * expr_operand * expr_operand) +and +expr_unary_operation = Neg of expr_operand + | BitwiseInverse of expr_operand +and +expr_binary_operation = Add of (expr_operand * expr_operand) + | Sub of (expr_operand * expr_operand) + | BitwiseAnd of (expr_operand * expr_operand) + | BitwiseOr of (expr_operand * expr_operand) + +type expr_binary_relation = + Lt + | Lte + | Gt + | Gte + | Eq + | Neq + +type expr = + False + | True + | Not of expr + | And of (expr * expr) + | Or of (expr * expr) + | BinRel of (expr_binary_relation * expr_operand) (* LH operand is the expr's value *) + +(* +val cond_expr : expr -> expr -> expr -> expr +let cond_expr expr1 expr2 expr3 = (Or((And(expr1, expr2)), (And((Not(expr1)), expr3)))) +*) + +(* Memory image elements all have identities. For convenience + * we make the identities strings. The string contents are arbitrary, + * and only their equality is relevant, but choosing friendly names + * like "ELF header" is good practice.*) +type memory_image = (string, element) Pmap.map + +type range = Nat_big_num.num * Nat_big_num.num (* start, length *) + +type element_range = string * range + +(* An "element" of an ELF image, in the linking phase, is either a section, + * the ELF header, the section header table or the program header table. + * + * PROBLEM: We'd like to use section names as the identifiers + * for those elements that are sections. + * but we can't, because they are not guaranteed to be unique. + * + * SOLUTION: Names that are unique in the file are used as keys. + * If not unique, the sections are treated as anonymous and given + * gensym'd string ids (FIXME: implement this). + *) + +(* Currently, our elements have unique names, which are strings. + * We *don't* want to encode any meaning onto these strings. + * All meaning should be encoded into labelled ranges. + * We want to be able to look up + * + * - elements + * - ranges within elements + * + * ... by their *labels* -- or sometimes just *part* of their labels. + *) + +(* ELF file features with which we can label ranges of the memory image. *) +type elf_file_feature = + ElfHeader of elf64_header + | ElfSectionHeaderTable of elf64_section_header_table (* do we want to expand these? *) + | ElfProgramHeaderTable of elf64_program_header_table + | ElfSection of (Nat_big_num.num * elf64_interpreted_section) (* SHT idx *) + | ElfSegment of (Nat_big_num.num * elf64_interpreted_segment) (* PHT idx *) + +type symbol_definition + = { def_symname : string + ; def_syment : elf64_symbol_table_entry (* definition's symtab entry *) + ; def_sym_scn : Nat_big_num.num (* symtab section index, to disamiguate dynsym *) + ; def_sym_idx : Nat_big_num.num (* index of symbol into the symtab *) + ; def_linkable_idx : Nat_big_num.num (* used to propagate origin linkable information to linked image *) + } + +let symDefCompare x1 x2:int= +(quintupleCompare compare elf64_symbol_table_entry_compare Nat_big_num.compare Nat_big_num.compare Nat_big_num.compare (x1.def_symname, x1.def_syment, x1.def_sym_scn, x1.def_sym_idx, x1.def_linkable_idx) + (x2.def_symname, x2.def_syment, x2.def_sym_scn, x2.def_sym_idx, x2.def_linkable_idx)) + +let instance_Basic_classes_Ord_Memory_image_symbol_definition_dict:(symbol_definition)ord_class= ({ + + compare_method = symDefCompare; + + isLess_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(symDefCompare f1 f2) (-1)))); + + isLessEqual_method = (fun f1 -> (fun f2 -> Pset.mem (symDefCompare f1 f2)(Pset.from_list compare [(-1); 0]))); + + isGreater_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(symDefCompare f1 f2) 1))); + + isGreaterEqual_method = (fun f1 -> (fun f2 -> Pset.mem (symDefCompare f1 f2)(Pset.from_list compare [1; 0])))}) + +type symbol_reference + = { ref_symname : string (* symbol name *) + ; ref_syment : elf64_symbol_table_entry (* likely-undefined (referencing) symbol *) + ; ref_sym_scn : Nat_big_num.num (* symtab section idx *) + ; ref_sym_idx : Nat_big_num.num (* index into symbol table *) + } + +let symRefCompare x1 x2:int= +(quadrupleCompare compare elf64_symbol_table_entry_compare Nat_big_num.compare Nat_big_num.compare (x1.ref_symname, x1.ref_syment, x1.ref_sym_scn, x1.ref_sym_idx) + (x2.ref_symname, x2.ref_syment, x2.ref_sym_scn, x2.ref_sym_idx)) + +let instance_Basic_classes_Ord_Memory_image_symbol_reference_dict:(symbol_reference)ord_class= ({ + + compare_method = symRefCompare; + + isLess_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(symRefCompare f1 f2) (-1)))); + + isLessEqual_method = (fun f1 -> (fun f2 -> Pset.mem (symRefCompare f1 f2)(Pset.from_list compare [(-1); 0]))); + + isGreater_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(symRefCompare f1 f2) 1))); + + isGreaterEqual_method = (fun f1 -> (fun f2 -> Pset.mem (symRefCompare f1 f2)(Pset.from_list compare [1; 0])))}) + +type reloc_site = { + ref_relent : elf64_relocation_a + ; ref_rel_scn : Nat_big_num.num (* the relocation section idx *) + ; ref_rel_idx : Nat_big_num.num (* the index of the relocation rec *) + ; ref_src_scn : Nat_big_num.num (* the section *from which* the reference logically comes *) +} + +let relocSiteCompare x1 x2:int= +(quadrupleCompare elf64_relocation_a_compare Nat_big_num.compare Nat_big_num.compare Nat_big_num.compare (x1.ref_relent, x1.ref_rel_scn, x1.ref_rel_idx, x1.ref_src_scn) + (x2.ref_relent, x2.ref_rel_scn, x2.ref_rel_idx, x2.ref_src_scn)) + +let instance_Basic_classes_Ord_Memory_image_reloc_site_dict:(reloc_site)ord_class= ({ + + compare_method = relocSiteCompare; + + isLess_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(relocSiteCompare f1 f2) (-1)))); + + isLessEqual_method = (fun f1 -> (fun f2 -> Pset.mem (relocSiteCompare f1 f2)(Pset.from_list compare [(-1); 0]))); + + isGreater_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(relocSiteCompare f1 f2) 1))); + + isGreaterEqual_method = (fun f1 -> (fun f2 -> Pset.mem (relocSiteCompare f1 f2)(Pset.from_list compare [1; 0])))}) + +type reloc_decision = LeaveReloc + | ApplyReloc + | ChangeRelocTo of (Nat_big_num.num * symbol_reference * reloc_site) + (* | MakePIC -- is now a kind of ChangeRelocTo *) + +let relocDecisionCompare x1 x2:int= + ((match (x1, x2) with + | (LeaveReloc, LeaveReloc) -> 0 + | (LeaveReloc, _) -> (-1) + | (ApplyReloc, ApplyReloc) -> 0 + | (ApplyReloc, ChangeRelocTo _) -> (-1) + | (ApplyReloc, LeaveReloc) -> 1 + | (ChangeRelocTo t1, ChangeRelocTo t2) -> (tripleCompare Nat_big_num.compare symRefCompare relocSiteCompare t1 t2) + | (ChangeRelocTo _, _) -> 1 + )) + +let instance_Basic_classes_Ord_Memory_image_reloc_decision_dict:(reloc_decision)ord_class= ({ + + compare_method = relocDecisionCompare; + + isLess_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(relocDecisionCompare f1 f2) (-1)))); + + isLessEqual_method = (fun f1 -> (fun f2 -> Pset.mem (relocDecisionCompare f1 f2)(Pset.from_list compare [(-1); 0]))); + + isGreater_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(relocDecisionCompare f1 f2) 1))); + + isGreaterEqual_method = (fun f1 -> (fun f2 -> Pset.mem (relocDecisionCompare f1 f2)(Pset.from_list compare [1; 0])))}) + +type symbol_reference_and_reloc_site = { + ref : symbol_reference + ; maybe_reloc : reloc_site option + ; maybe_def_bound_to : (reloc_decision * symbol_definition option)option + } + +let symRefAndRelocSiteCompare x1 x2:int= +(tripleCompare symRefCompare (maybeCompare relocSiteCompare) (maybeCompare (pairCompare relocDecisionCompare (maybeCompare symDefCompare))) (x1.ref, x1.maybe_reloc, x1.maybe_def_bound_to) + (x2.ref, x2.maybe_reloc, x2.maybe_def_bound_to)) + +let instance_Basic_classes_Ord_Memory_image_symbol_reference_and_reloc_site_dict:(symbol_reference_and_reloc_site)ord_class= ({ + + compare_method = symRefAndRelocSiteCompare; + + isLess_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(symRefAndRelocSiteCompare f1 f2) (-1)))); + + isLessEqual_method = (fun f1 -> (fun f2 -> Pset.mem (symRefAndRelocSiteCompare f1 f2)(Pset.from_list compare [(-1); 0]))); + + isGreater_method = (fun f1 -> (fun f2 -> ( Lem.orderingEqual(symRefAndRelocSiteCompare f1 f2) 1))); + + isGreaterEqual_method = (fun f1 -> (fun f2 -> Pset.mem (symRefAndRelocSiteCompare f1 f2)(Pset.from_list compare [1; 0])))}) + +(* We can also annotate arbitrary ranges of bytes within an element + * with arbitrary metadata. + * + * Ideally we want to data-abstract this a bit. But it's hard to do + * so without baking in ELF-specific and/or (moreover) per-ABI concepts, + * like PLTs and GOTs. Ideally we would use something like polymorphic + * variants here. For now, this has to be the union of all the concepts + * that we find in the various ABIs we care about. To avoid ELFy things + * creeping in, we parameterise by 'a, and instantiate the 'a with the + * relevant ELFy thing when we use it. OH, but then 'a is different for + * every distinct ELF thing, which is no good. Can we define a mapping + * from an umbrella "ELF" type to the relevant types in each case? *) +type 'abifeature range_tag = (* forall 'abifeature . *) + ImageBase + | EntryPoint + | SymbolDef of symbol_definition + | SymbolRef of symbol_reference_and_reloc_site + | FileFeature of elf_file_feature (* file feature other than symdef and reloc *) + | AbiFeature of 'abifeature + +type 'abifeature annotated_memory_image = { + elements : memory_image + ; by_range : (( element_range option) * ( 'abifeature range_tag)) Pset.set + ; by_tag : (( 'abifeature range_tag), ( element_range option)) multimap +} + +(*val get_empty_memory_image : forall 'abifeature. unit -> annotated_memory_image 'abifeature*) +let get_empty_memory_image:unit ->'abifeature annotated_memory_image= (fun _ -> { + elements = (Pmap.empty compare) + ; by_range = (Pset.empty (pairCompare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare))) compare)) + ; by_tag = (Pset.empty (pairCompare compare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare))))) +}) + +(* Basic ELFy and ABI-y things. *) +(* "Special" sections are those that necessarily require special treatment by the + * linker. Examples include symbol tables and relocation tables. There are some + * grey areas, such as .eh_frame, debug info, and string tables. For us, the rule + * is that if we have special code to create them, i.e. that we don't rely on + * ordinary section concatenation during the linker script interpretation, they + * should be special -- it means strip_metadata_sections will remove them from + * the image, they won't be seen by the linker script, and that it's *our* job + * to reinstate them afterwards (as we do with symtab and strtab, for example). *) +(* FIXME: this shouldn't really be here, but needs to be in some low-lying module; + * keeping it out of elf_* for now to avoid duplication into elf64_, elf32_. *) +let elf_section_is_special s f:bool= (not (Nat_big_num.equal s.elf64_section_type sht_progbits) + && (not (Nat_big_num.equal s.elf64_section_type sht_nobits) + && (not (Nat_big_num.equal s.elf64_section_type sht_fini_array) + && not (Nat_big_num.equal s.elf64_section_type sht_init_array)))) + +(* This record collects things that ABIs may or must define. + * + * Since we want to put all ABIs in a list and select one at run time, + * we can't maintain a type-level distinction between ABIs; we have to + * use elf_memory_image any_abi_feature. To avoid a reference cycle, + * stay polymorphic in the ABI feature type until we define specific ABIs. + * In practice we'll use only any_abi_feature, because we need to pull + * the ABI out of a list at run time. + *) +type null_abi_feature = unit + +(* The reloc calculation is complicated, so we split up the big function + * type into smaller ones. *) + +(* Q. Do we want "existing", or is it a kind of addend? + * A. We do want it -- modelling both separately is necessary, + * because we model relocations bytewise, but some arches + * do bitfield relocations (think ARM). *) +type reloc_calculate_fn = Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num (* symaddr -> addend -> existing -> relocated *) + +type 'abifeature reloc_apply_fn = 'abifeature + (* elf memory image: the context in which the relocation is being applied *) + annotated_memory_image -> + (* the site address *) + Nat_big_num.num -> + (* Typically there are two symbol table entries involved in a relocation. + * One is the reference, and is usually undefined. + * The other is the definition, and is defined (else absent, when we use 0). + * However, sometimes the reference is itself a defined symbol. + * Almost always, if so, *that* symbol *is* "the definition". + * However, copy relocs are an exception. + * + * In the case of copy relocations being fixed up by the dynamic + * linker, the dynamic linker must figure out which definition to + * copy from. This can't be as simple as "the first definition in + * link order", because *our* copy of that symbol is a definition + * (typically in bss). It could be as simple as "the first *after us* + * in link order". FIXME: find the glibc code that does this. + * + * Can we dig this stuff out of the memory image? If we pass the address + * being relocated, we can find the tags. But I don't want to pass + * the symbol address until the very end. It seems better to pass the symbol + * name, since that's the key that the dynamic linker uses to look for + * other definitions. + * + * Do we want to pass a whole symbol_reference? This has not only the + * symbol name but also syment, scn and idx. The syment is usually UND, + * but *could* be defined (and is for copy relocs). The scn and idx are + * not relevant, but it seems cleaner to pass the whole thing anyway. + *) + symbol_reference_and_reloc_site -> + (* Should we pass a symbol_definition too? Implicitly, we pass part of it + * by passing the symaddr argument (below). I'd prefer not to depend on + * others -- relocation calculations should look like "mostly address + * arithmetic", i.e. only the weird ones do something else. *) + (* How wide, in bytes, is the relocated field? this may depend on img + * and on the wider image (copy relocs), so it's returned *by* the reloc function. *) + (Nat_big_num.num (* width *) * reloc_calculate_fn) + +(* Some kinds of relocation necessarily give us back a R_*_RELATIVE reloc. + * We don't record this explicitly. Instead, the "bool" is a flag recording whether + * the field represents an absolute address. + * Similarly, some relocations can "fail" according to their ABI manuals. + * This just means that the result can't be represented in the field width. + * We detect this when actually applying the reloc in the memory image content + * (done elsewhere). *) +type 'abifeature reloc_fn = Nat_big_num.num -> (bool * 'abifeature reloc_apply_fn) + +(*val noop_reloc_calculate : natural -> integer -> natural -> natural*) +let noop_reloc_calculate symaddr addend existing:Nat_big_num.num= existing + +(*val noop_reloc_apply : forall 'abifeature. reloc_apply_fn 'abifeature*) +let noop_reloc_apply img2 site_addr ref1:Nat_big_num.num*(Nat_big_num.num ->Nat_big_num.num ->Nat_big_num.num ->Nat_big_num.num)= (Nat_big_num.of_int 0, noop_reloc_calculate) + +(*val noop_reloc : forall 'abifeature. natural -> (bool (* result is absolute addr *) * reloc_apply_fn 'abifeature)*) +let noop_reloc k:bool*('abifeature annotated_memory_image ->Nat_big_num.num ->symbol_reference_and_reloc_site ->Nat_big_num.num*reloc_calculate_fn)= (false, noop_reloc_apply) + +type 'abifeature abi = (* forall 'abifeature. *) + { is_valid_elf_header : elf64_header -> bool (* doesn't this generalise outrageously? is_valid_elf_file? *) + ; make_elf_header : Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num -> elf64_header + (* t entry shoff phoff phnum shnum shstrndx *) + ; reloc : 'abifeature reloc_fn + ; section_is_special : elf64_interpreted_section -> 'abifeature annotated_memory_image -> bool + ; section_is_large : elf64_interpreted_section -> 'abifeature annotated_memory_image -> bool + ; maxpagesize : Nat_big_num.num + ; minpagesize : Nat_big_num.num + ; commonpagesize : Nat_big_num.num + ; symbol_is_generated_by_linker : string -> bool + (*; link_inputs_tap : + ; link_output_sections_tap : + ; link_output_image_tap : *) + ; make_phdrs : Nat_big_num.num -> Nat_big_num.num -> Nat_big_num.num (* file type *) -> 'abifeature annotated_memory_image -> elf64_interpreted_section list -> elf64_program_header_table_entry list + ; max_phnum : Nat_big_num.num + ; guess_entry_point : 'abifeature annotated_memory_image -> Nat_big_num.num option + ; pad_data : Nat_big_num.num -> char list + ; pad_code : Nat_big_num.num -> char list + ; generate_support : (string * 'abifeature annotated_memory_image) (* list (list reloc_site_resolution) -> *)list -> 'abifeature annotated_memory_image + ; concretise_support : 'abifeature annotated_memory_image -> 'abifeature annotated_memory_image + ; get_reloc_symaddr : symbol_definition -> 'abifeature annotated_memory_image -> reloc_site option -> Nat_big_num.num + } + +(*val align_up_to : natural -> natural -> natural*) +let align_up_to align addr:Nat_big_num.num= + (let quot = (Nat_big_num.div addr align) + in + if Nat_big_num.equal (Nat_big_num.mul quot align) addr then addr else Nat_big_num.mul ( Nat_big_num.add quot(Nat_big_num.of_int 1)) align) + +(*val round_down_to : natural -> natural -> natural*) +let round_down_to align addr:Nat_big_num.num= + (let quot = (Nat_big_num.div addr align) + in Nat_big_num.mul + quot align) + +(*val uint32_max : natural*) +let uint32_max:Nat_big_num.num= (Nat_big_num.sub_nat ( Nat_big_num.pow_int(Nat_big_num.of_int 2)( 32))(Nat_big_num.of_int 1)) + +(*val uint64_max : natural*) +let uint64_max:Nat_big_num.num= (Nat_big_num.add (Nat_big_num.sub_nat (Nat_big_num.mul + (* HACK around Lem's inability to parse 18446744073709551615: + * the square of uint32_max is + * (2**32 - 1) (2**32 - 1) + * i.e. 2**64 - 2**32 - 2**32 + 1 + * So + * 2**64 - 1 = uint32_max * uint32_max + 2**32 + 2**32 - 2 + *) + uint32_max uint32_max)(Nat_big_num.of_int 2)) (Nat_big_num.pow_int(Nat_big_num.of_int 2)( 33))) + (* 18446744073709551615 *) (* i.e. 0x ffff ffff ffff ffff *) + (* HMM. This still overflows int64 *) + +(* The 2's complement of a value, at 64-bit width *) +(*val compl64 : natural -> natural*) +let compl64 v:Nat_big_num.num= (Nat_big_num.add(Nat_big_num.of_int 1) (Nat_big_num.bitwise_xor v uint64_max)) + +(*val gcd : natural -> natural -> natural*) +let rec gcd a b:Nat_big_num.num= + (if Nat_big_num.equal b(Nat_big_num.of_int 0) then a else gcd b ( Nat_big_num.modulus a b)) + +(*val lcm : natural -> natural -> natural*) +let lcm a b:Nat_big_num.num= (Nat_big_num.div + (* let _ = errln ("lcm of " ^ (show a) ^ " and " ^ (show b) ^ "?") + in *) + ( Nat_big_num.mul a b) (gcd a b)) + +(*val address_of_range : forall 'abifeature. element_range -> annotated_memory_image 'abifeature -> natural*) +let address_of_range el_range img2:Nat_big_num.num= + (let (el_name, (start, len)) = el_range + in + (match Pmap.lookup el_name img2.elements with + Some el -> + (match el.startpos with + Some addr -> Nat_big_num.add addr start + | None -> failwith "address_of_range called for element with no address" + ) + | None -> failwith "address_of_range called on nonexistent element" + )) + +(*val range_contains : (natural * natural) -> (natural * natural) -> bool*) +let range_contains (r1begin, r1len) (r2begin, r2len):bool= (Nat_big_num.greater_equal + (* r1 is at least as big as r2 *) + r2begin r1begin && Nat_big_num.less_equal ( Nat_big_num.add r2begin r2len) ( Nat_big_num.add r1begin r1len)) + +(*val range_overlaps : (natural * natural) -> (natural * natural) -> bool*) +let range_overlaps (r1begin, r1len) (r2begin, r2len):bool= + (( Nat_big_num.less r1begin ( Nat_big_num.add r2begin r2len) && Nat_big_num.greater ( Nat_big_num.add r1begin r1len) r2begin) + || ( Nat_big_num.less r2begin ( Nat_big_num.add r1begin r1len) && Nat_big_num.greater ( Nat_big_num.add r2begin r2len) r1begin)) + +(*val is_partition : list (natural * natural) -> list (natural * natural) -> bool*) +let is_partition rs ranges:bool= +( + (* 1. each element of the first list falls entirely within some element + * from the second list. *)let r_is_contained_by_some_range + = (fun r -> List.fold_left (||) false (Lem_list.map (fun range1 -> range_contains range1 r) ranges)) + in + List.for_all (fun r -> r_is_contained_by_some_range r) rs + && + (* 2. elements of the first list do not overlap *) + List.for_all (fun r -> List.for_all (fun r2 -> ( (Lem.pair_equal Nat_big_num.equal Nat_big_num.equal r (* should be "=="? *) r2)) || (not (range_overlaps r r2))) rs) rs) + +(*val nat_range : natural -> natural -> list natural*) +let rec nat_range base len:(Nat_big_num.num)list= + ( + if(Nat_big_num.equal len (Nat_big_num.of_int 0)) then ([]) else + (base :: + (nat_range ( Nat_big_num.add base (Nat_big_num.of_int 1)) + ( Nat_big_num.sub_nat len (Nat_big_num.of_int 1))))) + +(* Expand a sorted list of ranges into a list of bool, where the list contains + * true if its index is included in one or more ranges, else false. *) +(*val expand_sorted_ranges : list (natural * natural) -> natural -> list bool -> list bool*) +let rec expand_sorted_ranges sorted_ranges min_length accum:(bool)list= + ((match sorted_ranges with + [] -> List.rev_append (List.rev accum) ( + let pad_length = (Nat_big_num.max(Nat_big_num.of_int 0) ( Nat_big_num.sub_nat min_length (Missing_pervasives.length accum))) + in + (* let _ = Missing_pervasives.errln ( + "padding ranges cares list with " ^ (show pad_length) ^ + " cares (accumulated " ^ (show (Missing_pervasives.length accum)) ^ + ", min length " ^ (show min_length) ^ ")") + in *) + Missing_pervasives.replicate0 pad_length true) + | (base, len) :: more -> + (* pad the accum so that it reaches up to base *) + let up_to_base = (Missing_pervasives.replicate0 ( Nat_big_num.sub_nat base (Missing_pervasives.length accum)) true) + in + let up_to_end_of_range = (List.rev_append (List.rev up_to_base) (Missing_pervasives.replicate0 len false)) + in + expand_sorted_ranges more min_length ( List.rev_append (List.rev accum) up_to_end_of_range) + )) + +(*val expand_unsorted_ranges : list (natural * natural) -> natural -> list bool -> list bool*) +let rec expand_unsorted_ranges unsorted_ranges min_length accum:(bool)list= + (expand_sorted_ranges (insertSortBy (fun (base1, len1) -> (fun (base2, len2) -> Nat_big_num.less base1 base2)) unsorted_ranges) min_length accum) + +(*val make_byte_pattern_revacc : list (maybe byte) -> list byte -> list bool -> list (maybe byte)*) +let rec make_byte_pattern_revacc revacc bytes cares:((char)option)list= + ((match bytes with + [] -> List.rev revacc + | b :: bs -> (match cares with + care :: more -> make_byte_pattern_revacc ((if not care then None else Some b) :: revacc) bs more + | _ -> failwith "make_byte_pattern: unequal length" + ) + )) + +(*val make_byte_pattern : list byte -> list bool -> list (maybe byte)*) +let rec make_byte_pattern bytes cares:((char)option)list= + (make_byte_pattern_revacc [] bytes cares) + +(*val relax_byte_pattern_revacc : list (maybe byte) -> list (maybe byte) -> list bool -> list (maybe byte)*) +let rec relax_byte_pattern_revacc revacc bytes cares:((char)option)list= + ((match bytes with + [] -> List.rev revacc + | b :: bs -> (match cares with + care :: more -> relax_byte_pattern_revacc ((if not care then None else b) :: revacc) bs more + | _ -> failwith ("relax_byte_pattern: unequal length") + ) + )) + +(*val relax_byte_pattern : list (maybe byte) -> list bool -> list (maybe byte)*) +let rec relax_byte_pattern bytes cares:((char)option)list= + (relax_byte_pattern_revacc [] bytes cares) + +type pad_fn = Nat_big_num.num -> char list + +(*val concretise_byte_pattern : list byte -> natural -> list (maybe byte) -> pad_fn -> list byte*) +let rec concretise_byte_pattern rev_acc acc_pad bs pad:(char)list= + ((match bs with + [] -> + let padding_bytes = (if Nat_big_num.greater acc_pad(Nat_big_num.of_int 0) then pad acc_pad else []) + in List.rev ( List.rev_append (List.rev (List.rev padding_bytes)) rev_acc) + | Some(b) :: more -> + (* flush accumulated padding *) + let padding_bytes = (if Nat_big_num.greater acc_pad(Nat_big_num.of_int 0) then pad acc_pad else []) + in + concretise_byte_pattern (b :: ( List.rev_append (List.rev (List.rev padding_bytes)) rev_acc))(Nat_big_num.of_int 0) more pad + | None :: more -> + concretise_byte_pattern rev_acc (Nat_big_num.add acc_pad(Nat_big_num.of_int 1)) more pad + )) + +(*val byte_option_matches_byte : maybe byte -> byte -> bool*) +let byte_option_matches_byte optb b:bool= + ((match optb with + None -> true + | Some some -> some = b + )) + +(*val byte_list_matches_pattern : list (maybe byte) -> list byte -> bool*) +let rec byte_list_matches_pattern pattern bytes:bool= + ((match pattern with + [] -> true + | optbyte :: more -> (match bytes with + [] -> false + | abyte :: morebytes -> + byte_option_matches_byte optbyte abyte + && byte_list_matches_pattern more morebytes + ) + )) + +(*val append_to_byte_pattern_at_offset : natural -> list (maybe byte) -> list (maybe byte) -> list (maybe byte)*) +let append_to_byte_pattern_at_offset offset pat1 pat2:((char)option)list= + (let pad_length = (Nat_big_num.sub_nat offset (Missing_pervasives.length pat1)) + in + if Nat_big_num.less pad_length(Nat_big_num.of_int 0) then failwith "can't append at offset already used" + else List.rev_append (List.rev (List.rev_append (List.rev pat1) (Lem_list.replicate (Nat_big_num.to_int pad_length) None))) pat2) + +(*val accum_pattern_possible_starts_in_one_byte_sequence : list (maybe byte) -> nat -> list byte -> nat -> natural -> list natural -> list natural*) +let rec accum_pattern_possible_starts_in_one_byte_sequence pattern pattern_len seq seq_len offset accum:(Nat_big_num.num)list= +( + (* let _ = Missing_pervasives.errs ("At offset " ^ (show offset) ^ "... ") + in *)(match pattern with + [] -> (* let _ = Missing_pervasives.errs ("terminating with hit (empty pattern)\n") in *) + offset :: accum + | bpe :: more_bpes -> (* nonempty, so check for nonempty seq *) + (match seq with + [] -> (*let _ = Missing_pervasives.errs ("terminating with miss (empty pattern)\n") + in *) accum (* ran out of bytes in the sequence, so no match *) + | byte1 :: more_bytes -> let matched_this_byte = + (byte_option_matches_byte bpe byte1) + in + (* let _ = Missing_pervasives.errs ("Byte " ^ (show byte) ^ " matched " ^ (show byte_pattern) ^ "? " ^ (show matched_this_byte) ^ "; ") + in *) + let sequence_long_enough = (seq_len >= pattern_len) + in + (* let _ = Missing_pervasives.errs ("enough bytes remaining (" ^ (show seq_len) ^ ") to match rest of pattern (" ^ (show pattern_len) ^ ")? " ^ (show sequence_long_enough) ^ "; ") + in *) + let matched_here = (matched_this_byte && (sequence_long_enough && + byte_list_matches_pattern more_bpes more_bytes)) + in + (* let _ = Missing_pervasives.errs ("matched pattern anchored here? " ^ (show matched_this_byte) ^ "\n") + in *) + accum_pattern_possible_starts_in_one_byte_sequence + pattern pattern_len + more_bytes ( Nat_num.nat_monus seq_len( 1)) + ( Nat_big_num.add offset(Nat_big_num.of_int 1)) + (if matched_here then offset :: accum else accum) + ) + )) + +let swap_pairs dict_Basic_classes_SetType_a dict_Basic_classes_SetType_b s:('a*'b)Pset.set= (let x2 =(Pset.from_list (pairCompare + dict_Basic_classes_SetType_a.setElemCompare_method dict_Basic_classes_SetType_b.setElemCompare_method) []) in Pset.fold (fun(k, v) x2 -> if true then Pset.add (v, k) x2 else x2) s x2) + +let by_range_from_by_tag dict_Basic_classes_SetType_a dict_Basic_classes_SetType_b:('a*'b)Pset.set ->('b*'a)Pset.set= + (swap_pairs dict_Basic_classes_SetType_b dict_Basic_classes_SetType_a) + +let by_tag_from_by_range dict_Basic_classes_SetType_a dict_Basic_classes_SetType_b:('a*'b)Pset.set ->('b*'a)Pset.set= + (swap_pairs dict_Basic_classes_SetType_b dict_Basic_classes_SetType_a) + +(*val filter_elements : forall 'abifeature. ((string * element) -> bool) -> + annotated_memory_image 'abifeature -> annotated_memory_image 'abifeature*) +let filter_elements pred img2:'abifeature annotated_memory_image= + (let new_elements = (Lem_map.fromList + (instance_Map_MapKeyType_var_dict instance_Basic_classes_SetType_var_dict) (let x2 = ([]) in List.fold_right + (fun(n, r) x2 -> + if + let result = (pred (n, r)) in + if not result then + (*let _ = Missing_pervasives.outln ("Discarding element named " ^ n) in*) result + else result then (n, r) :: x2 else x2) + (Pset.elements + ((Pmap.bindings (pairCompare compare compare) img2.elements))) + x2)) + in + let new_by_range = (Pset.filter (fun (maybe_range, tag) -> (match maybe_range with + None -> true + | Some (el_name, el_range) -> Pset.mem el_name (Pmap.domain new_elements) + )) img2.by_range) + in + let new_by_tag = (let x2 =(Pset.from_list (pairCompare compare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare)))) + []) in Pset.fold (fun(k, v) x2 -> if true then Pset.add (v, k) x2 else x2) + new_by_range x2) + in + { elements = new_elements + ; by_range = new_by_range + ; by_tag = new_by_tag + }) + +(*val tag_image : forall 'abifeature. range_tag 'abifeature -> string -> natural -> natural -> annotated_memory_image 'abifeature + -> annotated_memory_image 'abifeature*) +let tag_image t el_name el_offset tag_len img2:'abifeature annotated_memory_image= + (let (k, v) = (Some (el_name, (el_offset, tag_len)), t) + in + let new_by_range = (Pset.add (k, v) img2.by_range) + in + let new_by_tag = (Pset.add (v, k) img2.by_tag) + in + { elements = (img2.elements) + ; by_range = new_by_range + ; by_tag = new_by_tag + }) + +(*val address_to_element_and_offset : forall 'abifeature. natural -> annotated_memory_image 'abifeature -> maybe (string * natural)*) +let address_to_element_and_offset query_addr img2:(string*Nat_big_num.num)option= +( + (* Find the element with the highest address <= addr. + * What about zero-length elements? + * Break ties on the bigger size. *)let (maybe_highest_le : (Nat_big_num.num * string * element)option) + = (List.fold_left (fun maybe_current_max_le -> (fun (el_name, el_rec) -> + (*let _ = errln ("Saw element named `" ^ el_name ^ " with startpos " ^ ( + (match el_rec.startpos with Just addr -> ("0x" ^ (hex_string_of_natural addr)) | Nothing -> "(none)" end) + ^ " and length " ^ + (match el_rec.length with Just len -> ("0x" ^ (hex_string_of_natural len)) | Nothing -> "(none)" end) + )) + in*) + (match (maybe_current_max_le, el_rec.startpos) with + (None, None) -> None + | (None, Some this_element_pos) -> if Nat_big_num.less_equal this_element_pos query_addr + then Some (this_element_pos, el_name, el_rec) + else None + | (Some (cur_max_le, cur_el_name, cur_el_rec), None) -> maybe_current_max_le + | (Some (cur_max_le, cur_el_name, cur_el_rec), Some this_element_pos) -> if Nat_big_num.less_equal this_element_pos query_addr + && ( Nat_big_num.greater this_element_pos cur_max_le + || ( Nat_big_num.equal this_element_pos cur_max_le + && ( (Lem.option_equal Nat_big_num.equal cur_el_rec.length1 (Some(Nat_big_num.of_int 0)))))) + then Some (this_element_pos, el_name, el_rec) + else maybe_current_max_le + ) + )) None (Pmap.bindings_list img2.elements)) + in + (match maybe_highest_le with + Some (el_def_startpos, el_name, el_rec) -> + (* final sanity check: is the length definite, and if so, does the + * element span far enough? *) + (match el_rec.length1 with + Some l -> if Nat_big_num.greater_equal (Nat_big_num.add el_def_startpos l) query_addr + then Some (el_name, Nat_big_num.sub_nat query_addr el_def_startpos) + else + (*let _ = errln ("Discounting " ^ el_name ^ " because length is too short") in*) None + | None -> (*let _ = errln ("Gave up because element has unknown length") in*) None + ) + | None -> + (* no elements with a low enough assigned address, so nothing *) + (*let _ = errln ("Found no elements with low enough address") in*) None + )) + +(*val element_and_offset_to_address : forall 'abifeature. (string * natural) -> annotated_memory_image 'abifeature -> maybe natural*) +let element_and_offset_to_address (el_name, el_off) img2:(Nat_big_num.num)option= + ((match Pmap.lookup el_name img2.elements with + Some el -> (match el.startpos with + Some addr -> Some ( Nat_big_num.add addr el_off) + | None -> None + ) + | None -> failwith ("error: nonexistent element: " ^ el_name) + )) + +let null_symbol_reference:symbol_reference= ({ + ref_symname = "" + ; ref_syment = elf64_null_symbol_table_entry + ; ref_sym_scn =(Nat_big_num.of_int 0) + ; ref_sym_idx =(Nat_big_num.of_int 0) +}) + +let null_elf_relocation_a:elf64_relocation_a= + ({ elf64_ra_offset = (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) + ; elf64_ra_info = (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) + ; elf64_ra_addend = (Nat_big_num.to_int64(Nat_big_num.of_int 0)) + }) + + +let null_symbol_reference_and_reloc_site:symbol_reference_and_reloc_site= ({ + ref = null_symbol_reference + ; maybe_reloc = + (Some { ref_relent = null_elf_relocation_a + ; ref_rel_scn =(Nat_big_num.of_int 0) + ; ref_rel_idx =(Nat_big_num.of_int 0) + ; ref_src_scn =(Nat_big_num.of_int 0) + }) + ; maybe_def_bound_to = None + }) + +let null_symbol_definition:symbol_definition= ({ + def_symname = "" + ; def_syment = elf64_null_symbol_table_entry + ; def_sym_scn =(Nat_big_num.of_int 0) + ; def_sym_idx =(Nat_big_num.of_int 0) + ; def_linkable_idx =(Nat_big_num.of_int 0) +}) + +(*val pattern_possible_starts_in_one_byte_sequence : list (maybe byte) -> list byte -> natural -> list natural*) +let pattern_possible_starts_in_one_byte_sequence pattern seq offset:(Nat_big_num.num)list= +( + (* let _ = Missing_pervasives.errs ("Looking for matches of " ^ + (show (List.length pattern)) ^ "-byte pattern in " ^ (show (List.length seq)) ^ "-byte region\n") + in *)accum_pattern_possible_starts_in_one_byte_sequence pattern (List.length pattern) seq (List.length seq) offset []) + +(*val byte_pattern_of_byte_sequence : byte_sequence -> list (maybe byte)*) +let byte_pattern_of_byte_sequence seq:((char)option)list= ((match seq with + Sequence(bs) -> Lem_list.map (fun b -> Some b) bs +)) + +(*val compute_virtual_address_adjustment : natural -> natural -> natural -> natural*) +let compute_virtual_address_adjustment max_page_size offset vaddr:Nat_big_num.num= (Nat_big_num.modulus + ( Nat_big_num.sub_nat vaddr offset) max_page_size) + +(*val extract_natural_field : natural -> element -> natural -> natural*) +let extract_natural_field width element1 offset:Nat_big_num.num= +( + (* Read n bytes from the contents *)let maybe_bytes = (take0 width (drop0 offset element1.contents)) + in + let bytes = (Lem_list.map (fun mb -> (match mb with None -> Char.chr (Nat_big_num.to_int (Nat_big_num.of_int 0)) | Some mb -> mb )) maybe_bytes) + in + (* FIXME: do we want little- or big-endian? *) + List.fold_left (fun acc -> fun next_byte -> Nat_big_num.add (Nat_big_num.mul + acc(Nat_big_num.of_int 256)) (Nat_big_num.of_int (Char.code next_byte)) + ) (Nat_big_num.of_int 0 : Nat_big_num.num) bytes) + +(*val natural_to_le_byte_list : natural -> list byte*) +let rec natural_to_le_byte_list n:(char)list= + ((Char.chr (Nat_big_num.to_int ( Nat_big_num.modulus n(Nat_big_num.of_int 256)))) :: (let d =(Nat_big_num.div n(Nat_big_num.of_int 256)) in if Nat_big_num.equal d(Nat_big_num.of_int 0) then [] else natural_to_le_byte_list ( Nat_big_num.div n(Nat_big_num.of_int 256)))) + +(*val natural_to_le_byte_list_padded_to : natural -> natural -> list byte*) +let rec natural_to_le_byte_list_padded_to width n:(char)list= + (let bytes = (natural_to_le_byte_list n) + in + List.rev_append (List.rev bytes) (replicate0 ( Nat_big_num.sub_nat width (length bytes)) (Char.chr (Nat_big_num.to_int (Nat_big_num.of_int 0))))) + +(*val n2i : natural -> integer*) +let n2i:Nat_big_num.num ->Nat_big_num.num= (fun n-> n) + +(*val i2n: integer -> natural*) +let i2n:Nat_big_num.num ->Nat_big_num.num= Nat_big_num.abs + +(*val i2n_signed : nat -> integer -> natural*) +let i2n_signed width i:Nat_big_num.num= + (if Nat_big_num.greater_equal i(Nat_big_num.of_int 0) then + if Nat_big_num.greater_equal i (Nat_big_num.pow_int(Nat_big_num.of_int 2) (Nat_num.nat_monus width( 1))) then failwith "overflow" + else Nat_big_num.abs i + else + (* We manually encode the 2's complement of the negated value *) + let negated = (Nat_big_num.abs ( Nat_big_num.sub(Nat_big_num.of_int 0) i)) in + let (xormask : Nat_big_num.num) = ( Nat_big_num.sub_nat (Nat_big_num.pow_int(Nat_big_num.of_int 2) width)(Nat_big_num.of_int 1)) in + let compl = (Nat_big_num.add(Nat_big_num.of_int 1) (Nat_big_num.bitwise_xor negated xormask)) + in + (*let _ = errln ("Signed value " ^ (show i) ^ " is 2's-compl'd to 0x" ^ (hex_string_of_natural compl)) + in*) compl) + +(*val to_le_signed_bytes : natural -> integer -> list byte*) +let to_le_signed_bytes bytewidth i:(char)list= + (natural_to_le_byte_list_padded_to bytewidth (i2n_signed (Nat_big_num.to_int (Nat_big_num.mul(Nat_big_num.of_int 8)bytewidth)) i)) + +(*val to_le_unsigned_bytes : natural -> integer -> list byte*) +let to_le_unsigned_bytes bytewidth i:(char)list= + (natural_to_le_byte_list_padded_to bytewidth (Nat_big_num.abs i)) + +(*val write_natural_field : natural -> natural -> element -> natural -> element*) +let write_natural_field new_field_value width element1 offset:element= + (let pre_bytes = (take0 offset element1.contents) + in + let post_bytes = (drop0 ( Nat_big_num.add offset width) element1.contents) + in + (* FIXME: avoid hard-coding little-endian *) + let field_bytes = (natural_to_le_byte_list new_field_value) + in + if Nat_big_num.greater (length field_bytes) width then failwith "internal error: relocation output unrepresentable" + else + { + contents = (List.rev_append (List.rev (List.rev_append (List.rev (List.rev_append (List.rev pre_bytes) (let x2 = + ([]) in List.fold_right (fun b x2 -> if true then Some b :: x2 else x2) field_bytes + x2))) (replicate0 ( Nat_big_num.sub_nat width (length field_bytes)) (Some (Char.chr (Nat_big_num.to_int (Nat_big_num.of_int 0))))))) post_bytes) + ; startpos = (element1.startpos) + ; length1 = (element1.length1) + }) |
