diff options
| author | Alasdair Armstrong | 2017-09-07 16:54:20 +0100 |
|---|---|---|
| committer | Alasdair Armstrong | 2017-09-07 16:54:20 +0100 |
| commit | 842165c1171fde332bd42e7520338c59a797f76b (patch) | |
| tree | 75b61297b6d9b6e4810542390eb1371afc2f183f /lib/ocaml_rts/linksem/linker_script.ml | |
| parent | 8124c487b576661dfa7a0833415d07d0978bc43e (diff) | |
Add ocaml run-time and updates to sail for ocaml backend
Diffstat (limited to 'lib/ocaml_rts/linksem/linker_script.ml')
| -rw-r--r-- | lib/ocaml_rts/linksem/linker_script.ml | 2783 |
1 files changed, 2783 insertions, 0 deletions
diff --git a/lib/ocaml_rts/linksem/linker_script.ml b/lib/ocaml_rts/linksem/linker_script.ml new file mode 100644 index 00000000..535d9037 --- /dev/null +++ b/lib/ocaml_rts/linksem/linker_script.ml @@ -0,0 +1,2783 @@ +(*Generated by Lem from linker_script.lem.*) +open Lem_basic_classes +open Lem_function +open Lem_string +open Lem_tuple +open Lem_bool +open Lem_list +open Lem_sorting +open Lem_num +open Lem_maybe +open Lem_assert_extra +open Lem_set +(*import Map*) + +open Byte_sequence +open Default_printing +open Error +open Missing_pervasives +open Show + +open Elf_header +open Elf_file +open Elf_interpreted_section + +open Abis +open Command_line +open Input_list +open Linkable_list +open Memory_image +open Elf_memory_image (* HMM -- ideally we'd be ELF-agnostic in this file. + But Abstract_abi is now merged into Elf_memory_image, so never mind. *) +open Elf_memory_image_of_elf64_file +open Elf_relocation +open Elf_symbol_table +open Elf_section_header_table +open Elf_types_native_uint +open Memory_image_orderings + +(* We model two kinds of linker script: "implicit scripts", which are supplied + * on the command line as input objects, and "control scripts" of which there + * is exactly one per link job. The abstract syntax of each script comes from the + * same grammar. + * + * We define the control script as a bunch of functions, to allow for + * link jobs where we don't have an AST and the script behaviour is hard-coded. + *) + +(* Input sections come from individual (relocatable) ELF files. + * The name of this file is important! + * + * Each input "section" is always an identified section or common symbol + * *within* some ELF memory image. *) + +type input_section_rec = { + idx : Nat_big_num.num (* linkable idx *) +; fname : string +; img : elf_memory_image +; shndx : Nat_big_num.num +; secname: string +; isec : elf64_interpreted_section +} + +type input_spec + = Common of (Nat_big_num.num * string * elf_memory_image * symbol_definition) (* string is symbol name -- must be a COMMON symbol *) + | InputSection of input_section_rec + +(* A control script defines + * - output sections + * - a mapping from output sections to (ordered) input sections + * - extra symbols + * - output format etc. (skip this for now) + *) + +(* We will have to deal with merging etc. at some point, somewhere + * (maybe here, maybe not); for now we just produce an ordered list + * of sections. + *) + +(* We can't model linker scripts as plain Lem functions without writing + * them to a very different structure than that of scripts. The reason is that + * certain features of the script language necessitate multiple passes + * over the script structure. For example, to figure out how big an + * output section is, hence where to begin the next section, you need to + * know which of the input sections are marked for KEEP. For that, you need + * a def-use graph over input sections. But for that, you also need to account + * for *all* symbol definitions, and the script itself is allowed to add new + * ones (right in among its input sections). So we have to do one pass to + * enumerate the symbol additions, and another pass to eliminate sections + * that we don't want to KEEP. + * + * Other gotchas include: + * + * - symbol provision and address advancement can occur in among the input + * section queries, but also outside any output section. + * + * - semantics of DATA_SEGMENT_ALIGN depend on future script contents + * + * - ONLY_IF_RO and ONLY_IF_RW are tricky: need to evaluate the input section + * queries + * + * - semantics of empty sections are subtle (". = ." will force an empty section + * to be emitted, but ". = . + 0" will not do so). + * + * Our approach is to define an interpreter for (at present) most of the script + * language. + *) + +type symbol_def_policy = AlwaysDefine + | ProvideIfUsed + +type input_selector = input_spec list -> input_spec list + +type address_expr = Memory_image.expr + +type output_guard = AlwaysOutput + | OnlyIfRo + | OnlyIfRw + +type symbol_spec = (Nat_big_num.num * Uint32.uint32 * Uint32.uint32) (* size, info, other *) + +type retain_policy + = DefaultKeep + | KeepEvenWhenGC + +type address_expr_fn_ref = Nat_big_num.num +type 'a address_expr_fn_map = (address_expr_fn_ref, (Nat_big_num.num -> 'a -> Nat_big_num.num)) Pmap.map +(* 'a = allocated_sections_map *) + +type output_section_composition_element + = IncludeInputSection of (retain_policy * input_section_rec) + | IncludeCommonSymbol of (retain_policy * string (* file *) * Nat_big_num.num (* linkable_idx *) * symbol_definition * elf_memory_image) + | Hole of address_expr_fn (* compute the next addr to continue layout at *) + | ProvideSymbol of (symbol_def_policy * string * symbol_spec) +and +sort_policy + = DefaultSort (* Use command line sort option, else "seen" order *) + | SeenOrder (* Always use "seen" order *) + | ByName + | ByNameThenAlignment + | ByAlignment + | ByAlignmentThenName + | ByInitPriority +and +(* This mirrors the OutputSection constructor, except that the script elements have become + * output_section_composition_elements, and we might store the size here. *) +output_section_spec = + OutputSectionSpec of (output_guard * Nat_big_num.num option * string * ( output_section_composition_element list)) +and +allocated_sections_map = + AllocatedSectionsMap of (string, (output_section_spec (* OutputSection element idx *) * Nat_big_num.num)) Pmap.map +and +address_expr_fn + = AddressExprFn of address_expr_fn_ref + +type script_element = + DefineSymbol of (symbol_def_policy * string * symbol_spec) +| AdvanceAddress of address_expr_fn +| MarkAndAlignDataSegment of (Nat_big_num.num * Nat_big_num.num) (* maxpagesize, commonpagesize *) +| MarkDataSegmentEnd +| MarkDataSegmentRelroEnd (*of (allocated_sections_map -> (natural * (natural -> natural))) DPM: commented out because of positivity constrains in Isabelle *) +| OutputSection of (output_guard * ( (* address_expr *) address_expr_fn option) * string * script_element list) +| DiscardInput of input_selector + (* Input queries can only occur within an output section. + Output sections may not nest within other output sections. + (Ideally we would use something like polymorphic variants to encode this.) + *) +| InputQuery of (retain_policy * sort_policy * input_selector) + +(* A linker control script is a function from inputs to output elements. + * We can define them in syntax (using an interpreter) + * or in Lem directly (as functions). *) +type linker_control_script = script_element list +type labelled_linker_control_script = (script_element * Nat_big_num.num) list + +(*val all_suffixes : list char -> list (list char)*) +let rec all_suffixes chars:((char)list)list= + ((match chars with + [] -> [[]] + | c :: morecs -> chars :: (all_suffixes morecs) + )) + +(*val glob_match : list char -> list char -> bool*) +let rec glob_match pat str:bool= + ((match (pat, str) with + ([], []) -> true + | ('?':: morepat, _ :: morestr) -> glob_match morepat morestr + | ('*':: morepat, _) -> + (* if any suffix of the remaining string matches + * the remaining pattern, we've matched the pattern + * from '*' onwards. *) + let or_suffix_match = (fun matched -> (fun newlist -> + matched || glob_match morepat newlist)) + in + List.fold_left (or_suffix_match) false (all_suffixes str) + | (patc :: morepat, c :: morestr) -> (patc = c) && glob_match morepat morestr + | ([], _) -> (* ran out of pattern *) false + | (_, []) -> (* ran out of str *) false + )) + +(*val default_symbol_spec : symbol_spec*) +let default_symbol_spec:Nat_big_num.num*Uint32.uint32*Uint32.uint32= (Nat_big_num.of_int 0, Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0)), Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) +(*val hidden_symbol_spec : symbol_spec*) +let hidden_symbol_spec:Nat_big_num.num*Uint32.uint32*Uint32.uint32= (Nat_big_num.of_int 0, Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0)), Uint32.of_string (Nat_big_num.to_string stv_hidden)) + +(* These Lem functions replicate linker script functions or builtin behaviours. *) + +(*val only_sections : input_selector*) +let only_sections inputs:(input_spec)list= (Lem_list.mapMaybe + (fun i -> (match i with + | InputSection(_) -> Some(i) + | _ -> None + )) inputs) + +(*val filter_and_concat : (input_spec -> bool) -> input_selector*) (* a.k.a. list input_spec -> list input_spec *) +let filter_and_concat p inputs:(input_spec)list= (List.filter p inputs) + +(*val name_matches : string -> input_spec -> bool*) +let name_matches pat input:bool= + ((match input with + InputSection(inp) -> + (*let _ = errln ("Does section name `" ^ inp.secname ^ "' match glob pattern `" ^ pat ^ "'? ") in + let result = *)glob_match (Xstring.explode pat) (Xstring.explode inp.secname) (*in + let _ = errln (if result then "yes" else "no") + in result*) + | _ -> false + )) + +(*val file_matches : string -> input_spec -> bool*) +let file_matches pat input:bool= + ((match input with + InputSection(inp) -> glob_match (Xstring.explode pat) (Xstring.explode inp.fname) + | _ -> false + )) + +let compareInputSpecByNameThenAlignment i1 i2:int= + (let toPair = (fun is -> ((match is with + Common(idx1, fname1, img2, def) -> ("COMMON" (* FIXME: is this right? *), Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_value) + | InputSection(isrec) -> (isrec.isec.elf64_section_name_as_string, isrec.isec.elf64_section_align) + ))) + in (pairCompare compare Nat_big_num.compare (toPair i1) (toPair i2))) + +let compareInputSpecByAlignment i1 i2:int= + (let toNatural = (fun is -> ((match is with + Common(idx1, fname1, img2, def) -> Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_value + | InputSection(isrec) -> isrec.isec.elf64_section_align + ))) + in Nat_big_num.compare (toNatural i1) (toNatural i2)) + +let compareInputSpecByName i1 i2:int= + (let toString = (fun is -> ((match is with + Common(idx1, fname1, img2, def) -> "COMMON" + | InputSection(isrec) -> isrec.isec.elf64_section_name_as_string + ))) + in compare (toString i1) (toString i2)) + +let compareInputSpecByAlignmentThenName i1 i2:int= + (let toPair = (fun is -> ((match is with + Common(idx1, fname1, img2, def) -> (Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_value, + "COMMON" (* FIXME: is this right? *)) + | InputSection(isrec) -> (isrec.isec.elf64_section_align, isrec.isec.elf64_section_name_as_string) + ))) + in (pairCompare Nat_big_num.compare compare (toPair i1) (toPair i2))) + +let compareInputSpecByInitPriority i1 i2:int= 0 (* FIXME *) + +(* DATA_SEGMENT_ALIGN is defined by two formulae + * (over pos and commonpagesize/maxpagesize) + * "... depending on whether the latter uses fewer COMMONPAGESIZE sized + pages for the data segment (area between the result of this + expression and `DATA_SEGMENT_END') than the former or not. If the + latter form is used, it means COMMONPAGESIZE bytes of runtime + memory will be saved at the expense of up to COMMONPAGESIZE wasted + bytes in the on-disk file." + + So the amount of padding that gets inserted here depends on the location + of something that comes *later*, namely DATA_SEGMENT_END. + So, we can't model it as a function of the current position. + Instead, we add MarkDataSegmentEnd and friends + to the script_element ADT. + *) + +let has_writability:'a ->input_spec ->bool= (fun writable -> (fun input_sec -> ( + (match input_sec with + Common(_, _, _, _) + -> (* all common symbols are potentially writable *) true + | InputSection(inp) + -> let (flags : Nat_big_num.num) = ((match elf_memory_image_section_by_index inp.shndx inp.img with + Some x -> x.elf64_section_flags + | None -> failwith ("impossible: no such section" (*(index " ^ (show inp.shndx) ^ ")""*)) + )) + in + flag_is_set shf_write flags + ) +))) + +(* LARGE_COMMON seems to have been defined in this patch set: + https://sourceware.org/ml/binutils/2005-07/txt00014.txt + and at the time was "only for x86-64". It seems to be analogous + to ".lbss", i.e. "large bss". libbfd defines SHF_X86_64_LARGE. + The best comment seems to be in llvm's Support/ELF.h: + +0814 // If an object file section does not have this flag set, then it may not hold +0815 // more than 2GB and can be freely referred to in objects using smaller code +0816 // models. Otherwise, only objects using larger code models can refer to them. +0817 // For example, a medium code model object can refer to data in a section that +0818 // sets this flag besides being able to refer to data in a section that does +0819 // not set it; likewise, a small code model object can refer only to code in a +0820 // section that does not set this flag. + + *) + +(*val address_zero : natural -> address_expr_fn_map allocated_sections_map -> + (natural * address_expr_fn_map allocated_sections_map * address_expr_fn)*) +let address_zero fresh alloc_map:Nat_big_num.num*((Nat_big_num.num),(Nat_big_num.num ->allocated_sections_map ->Nat_big_num.num))Pmap.map*address_expr_fn= + (let alloc_map' = (Pmap.add fresh (fun pos -> (fun secs ->Nat_big_num.of_int 0)) alloc_map) in + let fresh' = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + (fresh', alloc_map', AddressExprFn fresh)) + +(* +val output_sec_composition_size : list output_section_composition_element -> natural +let output_sec_composition_size comp = List.foldl (+) 0 (List.map size_of_output_section_composition_element comp) +*) +(*val do_output_section_layout_starting_at_addr : natural -> allocated_sections_map -> list output_section_composition_element -> (natural * list natural)*) +let do_output_section_layout_starting_at_addr start_addr (AllocatedSectionsMap secs) comps:Nat_big_num.num*(Nat_big_num.num)list= +( + (* map out where we plumb in each section, accounting for their alignment *)List.fold_left (fun (next_free_addr, addr_list) -> (fun comp_el -> (match comp_el with + IncludeInputSection(retain_pol, irec (* fname, linkable_idx, shndx, isec, img *)) -> + let aligned_next_free = (align_up_to irec.isec.elf64_section_align next_free_addr) + in + (*let _ = errln ("Aligned start address up to 0x" ^ hex_string_of_natural aligned_next_free ^ + " (align 0x" ^ (hex_string_of_natural irec.isec.elf64_section_align) ^ + ") for included output section `" ^ + irec.isec.elf64_section_name_as_string ^ "' from file `" ^ irec.fname ^ "'") + in*) + ( Nat_big_num.add aligned_next_free irec.isec.elf64_section_size, List.rev_append (List.rev addr_list) [aligned_next_free]) + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> + let aligned_next_free = (align_up_to (Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_value) next_free_addr) + in + ( Nat_big_num.add aligned_next_free (Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_size), List.rev_append (List.rev addr_list) [aligned_next_free]) + (*| Hole(AddressExprFn f) -> (f next_free_addr secs, addr_list ++ [next_free_addr])*) + | ProvideSymbol(pol, name1, spec) -> (next_free_addr, List.rev_append (List.rev addr_list) [next_free_addr]) + ) + )) (start_addr, []) comps) + +(*val output_sec_composition_size_given_start_addr : natural -> allocated_sections_map -> list output_section_composition_element -> natural*) +let output_sec_composition_size_given_start_addr start_addr secs comp:Nat_big_num.num= + (let (end_addr, comp_addrs) = (do_output_section_layout_starting_at_addr start_addr secs comp) + in Nat_big_num.sub_nat + end_addr start_addr) + +(*val sizeof : string -> allocated_sections_map -> natural*) +let sizeof secname1 (AllocatedSectionsMap secs):Nat_big_num.num= + ((match Pmap.lookup secname1 secs with + Some(OutputSectionSpec (_, maybe_addr, _, comp), _) -> (match maybe_addr with + Some addr -> output_sec_composition_size_given_start_addr addr (AllocatedSectionsMap secs) comp + | None -> failwith ("error: sizeof applied to section without defined start address") + ) + | None -> failwith ("error: sizeof applied to non-existent section name " ^ secname1) + )) + +(*val alignof_output_section_composition_element : output_section_composition_element -> natural*) +let alignof_output_section_composition_element comp:Nat_big_num.num= + ((match comp with + IncludeInputSection(_, irec) -> irec.isec.elf64_section_align + | IncludeCommonSymbol(_, _, _, def, _) -> Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_value + | _ ->Nat_big_num.of_int 1 (* CHECK *) + )) + +(*val alignof_output_section : list output_section_composition_element -> natural*) +let alignof_output_section comps:Nat_big_num.num= + (let aligns = (Lem_list.map alignof_output_section_composition_element comps) + in + List.fold_left (fun acc_lcm -> fun next -> lcm acc_lcm next)(Nat_big_num.of_int 1) aligns) + +(*val default_linker_control_script : natural -> address_expr_fn_map allocated_sections_map -> + abi any_abi_feature -> maybe natural -> maybe natural -> maybe natural -> + natural -> (natural * address_expr_fn_map allocated_sections_map * linker_control_script)*) +let default_linker_control_script fresh alloc_map a user_text_segment_start user_data_segment_start user_rodata_segment_start elf_headers_size:Nat_big_num.num*((Nat_big_num.num),(Nat_big_num.num ->allocated_sections_map ->Nat_big_num.num))Pmap.map*(script_element)list= + (let segment_start name1 default= ((match name1 with + "ldata-segment" -> (match user_data_segment_start with + None -> default + | Some addr -> (* fun _ -> *) addr + ) + | "text-segment" -> (match user_text_segment_start with + None -> default + | Some addr -> (* fun _ -> *) addr + ) + )) + in + let is_large_common = (fun inp -> (* FIXME: treat large commons separately *) false + ) + in + let is_common = (fun isec1 -> (match isec1 with + Common(idx1, fname1, img2, def) -> (*let _ = errln ("Common or large-common symbol: " ^ def.def_symname) in *) + not (is_large_common isec1) + | _ -> false + )) + in + let alloc_fn1 = (fun _ -> (fun _ -> Nat_big_num.add (segment_start "text-segment" ( Nat_big_num.mul(Nat_big_num.of_int 4)(Nat_big_num.of_int 1048576))) elf_headers_size)) in + let alloc_fn1_ref = fresh in + let alloc_map = (Pmap.add alloc_fn1_ref alloc_fn1 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn2 = (fun addr -> (fun _ -> Nat_big_num.sub_nat + (* (align_up_to a.maxpagesize addr) - (natural_land (a.maxpagesize - addr) (a.maxpagesize - 1)) *) + (* + FIXME: understand the intention of this assignment. + Evaluating a simple example of this (from true-static-uClibc) + + (ALIGN (0x200000) - ((0x200000 - .) & 0x1fffff)) + + starting from 0x00000000004017dc + means + 0x600000 - ((0x200000 - 0x4017dc) & 0x1fffff) + i.e. + 0x600000 - (((-0x2017dc)) & 0x1fffff) + i.e. + 0x600000 - ( -0x2017dc + & 0x1fffff ) + + which really does come to (according to bash) 0x4017dc + i.e. we subtract 0x1fe824 from 0x600000 + and end up back where we started. + + What does ANDing a negative number mean? + It doesn't seem to work for us. + Well, to take the negation we flip every bit and add one. + So if we don't want to do a subtraction that might go negative, + we can instead add the complement. + *) + (align_up_to a.maxpagesize addr) (Nat_big_num.bitwise_and ( Nat_big_num.add a.maxpagesize (compl64 addr)) ( Nat_big_num.sub_nat a.maxpagesize(Nat_big_num.of_int 1))))) in + let (fresh, alloc_map, (address_zero_fn : address_expr_fn)) = (address_zero fresh alloc_map) in + let alloc_fn2_ref = fresh in + let alloc_map = (Pmap.add alloc_fn2_ref alloc_fn2 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn3 = (fun pos -> (fun secs -> align_up_to (if Nat_big_num.equal pos(Nat_big_num.of_int 0) then (Nat_big_num.div(Nat_big_num.of_int 64)(Nat_big_num.of_int 8)) else Nat_big_num.of_int 1) pos)) in + let alloc_fn3_ref = fresh in + let alloc_map = (Pmap.add alloc_fn3_ref alloc_fn3 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn4 = (fun pos -> (fun secs -> align_up_to (Nat_big_num.div(Nat_big_num.of_int 64)(Nat_big_num.of_int 8)) pos)) in + let alloc_fn4_ref = fresh in + let alloc_map = (Pmap.add alloc_fn4_ref alloc_fn4 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn5 = (fun pos -> (fun secs -> segment_start "ldata-segment" pos)) in + let alloc_fn5_ref = fresh in + let alloc_map = (Pmap.add alloc_fn5_ref alloc_fn5 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn6 = (fun pos -> fun secs -> align_up_to ( Nat_big_num.add a.maxpagesize ( Nat_big_num.sub_nat(Nat_big_num.bitwise_and pos a.maxpagesize)(Nat_big_num.of_int 1))) pos) in + let alloc_fn6_ref = fresh in + let alloc_map = (Pmap.add alloc_fn6_ref alloc_fn6 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn7 = (fun pos -> (fun secs -> (if not (Nat_big_num.equal pos(Nat_big_num.of_int 0)) then Nat_big_num.div(Nat_big_num.of_int 64)(Nat_big_num.of_int 8) else Nat_big_num.of_int 1))) in + let alloc_fn7_ref = fresh in + let alloc_map = (Pmap.add alloc_fn7_ref alloc_fn7 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + let alloc_fn8 = (fun pos -> (fun secs -> align_up_to (Nat_big_num.div(Nat_big_num.of_int 64)(Nat_big_num.of_int 8)) pos)) in + let alloc_fn8_ref = fresh in + let alloc_map = (Pmap.add alloc_fn8_ref alloc_fn8 alloc_map) in + let fresh = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + (fresh, alloc_map, [ + (* For now, we base our script on the GNU bfd linker's scripts. + Here's the static -z combreloc one. + +/* Script for -z combreloc: combine and sort reloc sections */ +/* Copyright (C) 2014 Free Software Foundation, Inc. + Copying and distribution of this script, with or without modification, + are permitted in any medium without royalty provided the copyright + notice and this notice are preserved. */ +OUTPUT_FORMAT("elf64-x86-64", "elf64-x86-64", + "elf64-x86-64") +OUTPUT_ARCH(i386:x86-64) +ENTRY(_start) +SEARCH_DIR("=/usr/x86_64-linux-gnu/lib64"); SEARCH_DIR("=/usr/local/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/local/lib64"); SEARCH_DIR("=/lib/x86_64-linux-gnu"); SEARCH_DIR("=/lib64"); SEARCH_DIR("=/usr/lib/x86_64-linux-gnu"); SEARCH_DIR("=/usr/lib64"); SEARCH_DIR("=/usr/x86_64-linux-gnu/lib"); SEARCH_DIR("=/usr/local/lib"); SEARCH_DIR("=/lib"); SEARCH_DIR("=/usr/lib"); +SECTIONS +{ + /* Read-only sections, merged into text segment: */ + PROVIDE (__executable_start = SEGMENT_START("text-segment", 0x400000)); . = SEGMENT_START("text-segment", 0x400000) + SIZEOF_HEADERS; + .interp : { *(.interp) } + .note.gnu.build-id : { *(.note.gnu.build-id) } + .hash : { *(.hash) } + .gnu.hash : { *(.gnu.hash) } + .dynsym : { *(.dynsym) } + .dynstr : { *(.dynstr) } + .gnu.version : { *(.gnu.version) } + .gnu.version_d : { *(.gnu.version_d) } + .gnu.version_r : { *(.gnu.version_r) } + .rela.dyn : + { + *(.rela.init) + *(.rela.text .rela.text.* .rela.gnu.linkonce.t.* ) + *(.rela.fini) + *(.rela.rodata .rela.rodata.* .rela.gnu.linkonce.r.* ) + *(.rela.data .rela.data.* .rela.gnu.linkonce.d.* ) + *(.rela.tdata .rela.tdata.* .rela.gnu.linkonce.td.* ) + *(.rela.tbss .rela.tbss.* .rela.gnu.linkonce.tb.* ) + *(.rela.ctors) + *(.rela.dtors) + *(.rela.got) + *(.rela.bss .rela.bss.* .rela.gnu.linkonce.b.* ) + *(.rela.ldata .rela.ldata.* .rela.gnu.linkonce.l.* ) + *(.rela.lbss .rela.lbss.* .rela.gnu.linkonce.lb.* ) + *(.rela.lrodata .rela.lrodata.* .rela.gnu.linkonce.lr.* ) + *(.rela.ifunc) + } + .rela.plt : + { + *(.rela.plt) + PROVIDE_HIDDEN (__rela_iplt_start = .); + *(.rela.iplt) + PROVIDE_HIDDEN (__rela_iplt_end = .); + } + .init : + { + KEEP ( *(SORT_NONE(.init))) + } + .plt : { *(.plt) *(.iplt) } + .plt.bnd : { *(.plt.bnd) } + .text : + { + *(.text.unlikely .text.*_unlikely .text.unlikely.* ) + *(.text.exit .text.exit.* ) + *(.text.startup .text.startup.* ) + *(.text.hot .text.hot.* ) + *(.text .stub .text.* .gnu.linkonce.t.* ) + /* .gnu.warning sections are handled specially by elf32.em. */ + *(.gnu.warning) + } + .fini : + { + KEEP ( *(SORT_NONE(.fini))) + } + PROVIDE (__etext = .); + PROVIDE (_etext = .); + PROVIDE (etext = .); + .rodata : { *(.rodata .rodata.* .gnu.linkonce.r.* ) } + .rodata1 : { *(.rodata1) } + .eh_frame_hdr : { *(.eh_frame_hdr) } + .eh_frame : ONLY_IF_RO { KEEP ( *(.eh_frame)) } + .gcc_except_table : ONLY_IF_RO { *(.gcc_except_table + .gcc_except_table.* ) } + /* These sections are generated by the Sun/Oracle C++ compiler. */ + .exception_ranges : ONLY_IF_RO { *(.exception_ranges + .exception_ranges* ) } + /* Adjust the address for the data segment. We want to adjust up to + the same address within the page on the next page up. */ + . = ALIGN (CONSTANT (MAXPAGESIZE)) - ((CONSTANT (MAXPAGESIZE) - .) & (CONSTANT (MAXPAGESIZE) - 1)); . = DATA_SEGMENT_ALIGN (CONSTANT (MAXPAGESIZE), CONSTANT (COMMONPAGESIZE)); + /* Exception handling */ + .eh_frame : ONLY_IF_RW { KEEP ( *(.eh_frame)) } + .gcc_except_table : ONLY_IF_RW { *(.gcc_except_table .gcc_except_table.* ) } + .exception_ranges : ONLY_IF_RW { *(.exception_ranges .exception_ranges* ) } + /* Thread Local Storage sections */ + .tdata : { *(.tdata .tdata.* .gnu.linkonce.td.* ) } + .tbss : { *(.tbss .tbss.* .gnu.linkonce.tb.* ) *(.tcommon) } + .preinit_array : + { + PROVIDE_HIDDEN (__preinit_array_start = .); + KEEP ( *(.preinit_array)) + PROVIDE_HIDDEN (__preinit_array_end = .); + } + .init_array : + { + PROVIDE_HIDDEN (__init_array_start = .); + KEEP ( *(SORT_BY_INIT_PRIORITY(.init_array.* ) SORT_BY_INIT_PRIORITY(.ctors.* ))) + KEEP ( *(.init_array EXCLUDE_FILE ( *crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .ctors)) + PROVIDE_HIDDEN (__init_array_end = .); + } + .fini_array : + { + PROVIDE_HIDDEN (__fini_array_start = .); + KEEP ( *(SORT_BY_INIT_PRIORITY(.fini_array.* ) SORT_BY_INIT_PRIORITY(.dtors.* ))) + KEEP ( *(.fini_array EXCLUDE_FILE ( *crtbegin.o *crtbegin?.o *crtend.o *crtend?.o ) .dtors)) + PROVIDE_HIDDEN (__fini_array_end = .); + } + .ctors : + { + /* gcc uses crtbegin.o to find the start of + the constructors, so we make sure it is + first. Because this is a wildcard, it + doesn't matter if the user does not + actually link against crtbegin.o; the + linker won't look for a file to match a + wildcard. The wildcard also means that it + doesn't matter which directory crtbegin.o + is in. */ + KEEP ( *crtbegin.o(.ctors)) + KEEP ( *crtbegin?.o(.ctors)) + /* We don't want to include the .ctor section from + the crtend.o file until after the sorted ctors. + The .ctor section from the crtend file contains the + end of ctors marker and it must be last */ + KEEP ( *(EXCLUDE_FILE ( *crtend.o *crtend?.o ) .ctors)) + KEEP ( *(SORT(.ctors.* ))) + KEEP ( *(.ctors)) + } + .dtors : + { + KEEP ( *crtbegin.o(.dtors)) + KEEP ( *crtbegin?.o(.dtors)) + KEEP ( *(EXCLUDE_FILE ( *crtend.o *crtend?.o ) .dtors)) + KEEP ( *(SORT(.dtors.* ))) + KEEP ( *(.dtors)) + } + .jcr : { KEEP ( *(.jcr)) } + .data.rel.ro : { *(.data.rel.ro.local* .gnu.linkonce.d.rel.ro.local.* ) *(.data.rel.ro .data.rel.ro.* .gnu.linkonce.d.rel.ro.* ) } + .dynamic : { *(.dynamic) } + .got : { *(.got) *(.igot) } + . = DATA_SEGMENT_RELRO_END (SIZEOF (.got.plt) >= 24 ? 24 : 0, .); + .got.plt : { *(.got.plt) *(.igot.plt) } + .data : + { + *(.data .data.* .gnu.linkonce.d.* ) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + _edata = .; PROVIDE (edata = .); + . = .; + __bss_start = .; + .bss : + { + *(.dynbss) + *(.bss .bss.* .gnu.linkonce.b.* ) + *(COMMON) + /* Align here to ensure that the .bss section occupies space up to + _end. Align after .bss to ensure correct alignment even if the + .bss section disappears because there are no input sections. + FIXME: Why do we need it? When there is no .bss section, we don't + pad the .data section. */ + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + .lbss : + { + *(.dynlbss) + *(.lbss .lbss.* .gnu.linkonce.lb.* ) + *(LARGE_COMMON) + } + . = ALIGN(64 / 8); + . = SEGMENT_START("ldata-segment", .); + .lrodata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.lrodata .lrodata.* .gnu.linkonce.lr.* ) + } + .ldata ALIGN(CONSTANT (MAXPAGESIZE)) + (. & (CONSTANT (MAXPAGESIZE) - 1)) : + { + *(.ldata .ldata.* .gnu.linkonce.l.* ) + . = ALIGN(. != 0 ? 64 / 8 : 1); + } + . = ALIGN(64 / 8); + _end = .; PROVIDE (end = .); + . = DATA_SEGMENT_END (.); + /* Stabs debugging sections. */ + .stab 0 : { *(.stab) } + .stabstr 0 : { *(.stabstr) } + .stab.excl 0 : { *(.stab.excl) } + .stab.exclstr 0 : { *(.stab.exclstr) } + .stab.index 0 : { *(.stab.index) } + .stab.indexstr 0 : { *(.stab.indexstr) } + .comment 0 : { *(.comment) } + /* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. */ + /* DWARF 1 */ + .debug 0 : { *(.debug) } + .line 0 : { *(.line) } + /* GNU DWARF 1 extensions */ + .debug_srcinfo 0 : { *(.debug_srcinfo) } + .debug_sfnames 0 : { *(.debug_sfnames) } + /* DWARF 1.1 and DWARF 2 */ + .debug_aranges 0 : { *(.debug_aranges) } + .debug_pubnames 0 : { *(.debug_pubnames) } + /* DWARF 2 */ + .debug_info 0 : { *(.debug_info .gnu.linkonce.wi.* ) } + .debug_abbrev 0 : { *(.debug_abbrev) } + .debug_line 0 : { *(.debug_line .debug_line.* .debug_line_end ) } + .debug_frame 0 : { *(.debug_frame) } + .debug_str 0 : { *(.debug_str) } + .debug_loc 0 : { *(.debug_loc) } + .debug_macinfo 0 : { *(.debug_macinfo) } + /* SGI/MIPS DWARF 2 extensions */ + .debug_weaknames 0 : { *(.debug_weaknames) } + .debug_funcnames 0 : { *(.debug_funcnames) } + .debug_typenames 0 : { *(.debug_typenames) } + .debug_varnames 0 : { *(.debug_varnames) } + /* DWARF 3 */ + .debug_pubtypes 0 : { *(.debug_pubtypes) } + .debug_ranges 0 : { *(.debug_ranges) } + /* DWARF Extension. */ + .debug_macro 0 : { *(.debug_macro) } + .gnu.attributes 0 : { KEEP ( *(.gnu.attributes)) } + /DISCARD/ : { *(.note.GNU-stack) *(.gnu_debuglink) *(.gnu.lto_* ) } +} + *) + + (* function from + inputs and configuration + to + output sections-with-address-and-policy, output symbols-with-address-and-attributes, + discards, orphans + BUT + 1. policy is not a property of output sections, but of *inputs within outputs* + i.e. KEEP( *(.init)) + + what's helpful for writing such functions? + + e.g. only_if_ro (input_query) (output ): + + i.e. ++ only_if_ro OutputSection(AlwaysOutput, Nothing, ".eh_frame", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".eh_frame"))]) + + want to take a bunch of outputs + and return a bunch of outputs? + + if so, need to return a "current address" + + *) + (DefineSymbol(ProvideIfUsed, "__executable_start", default_symbol_spec)) + ; AdvanceAddress(AddressExprFn alloc_fn1_ref) + ; OutputSection(AlwaysOutput, None, ".interp", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".interp"))]) + ; OutputSection(AlwaysOutput, None, ".note.gnu.build-id", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".note.gnu.build-id"))]) + ; OutputSection(AlwaysOutput, None, ".hash", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".hash"))]) + ; OutputSection(AlwaysOutput, None, ".gnu.hash", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".gnu.hash"))]) + ; OutputSection(AlwaysOutput, None, ".dynsym", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".dynsym"))]) + ; OutputSection(AlwaysOutput, None, ".dynstr", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".dynstr"))]) + ; OutputSection(AlwaysOutput, None, ".gnu.version", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".gnu.version"))]) + ; OutputSection(AlwaysOutput, None, ".gnu.version_d", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".gnu.version_d"))]) + ; OutputSection(AlwaysOutput, None, ".gnu.version_r", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".gnu.version_r"))]) + ; OutputSection(AlwaysOutput, None, ".rela.dyn", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".rela.init")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.text" s || (name_matches ".rela.text.*" s || name_matches ".rela.gnu.linkonce.t.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.rodata" s || (name_matches ".rela.rodata.*" s || name_matches ".rela.gnu.linkonce.r.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.data" s || (name_matches ".rela.data.*" s || name_matches ".rela.gnu.linkonce.d.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.tdata" s || (name_matches ".rela.tdata.*" s || name_matches ".rela.gnu.linkonce.td.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.tbss" s || (name_matches ".rela.tbss.*" s || name_matches ".rela.gnu.linkonce.tb.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".rela.ctors")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".rela.got")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.bss" s || (name_matches ".rela.bss.*" s || name_matches ".rela.gnu.linkonce.b.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.ldata" s || (name_matches ".rela.ldata.*" s || name_matches ".rela.gnu.linkonce.l.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rela.lbss" s || (name_matches ".rela.lbss.*" s || name_matches ".rela.gnu.linkonce.lb.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".rela.ifunc")) + ]) + ; OutputSection(AlwaysOutput, None, ".rela.plt", [ + InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".rela.plt")) + ; DefineSymbol(ProvideIfUsed, "__rela_iplt_start", (Nat_big_num.of_int 0, make_symbol_info stb_local stt_notype (* FIXME *), make_symbol_other stv_hidden)) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".rela.iplt")) + ; DefineSymbol(ProvideIfUsed, "__rela_iplt_end", (Nat_big_num.of_int 0, make_symbol_info stb_local stt_notype (* FIXME *), make_symbol_other stv_hidden)) + ]) + ; OutputSection(AlwaysOutput, None, ".init", [ + InputQuery(KeepEvenWhenGC, SeenOrder, filter_and_concat (name_matches ".init")) + ]) + ; OutputSection(AlwaysOutput, None, ".plt", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".plt")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".iplt")) + ]) + ; OutputSection(AlwaysOutput, None, ".plt.bnd", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".plt.bnd"))]) + ; OutputSection(AlwaysOutput, None, ".text", [ + InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".text.unlikely" s || (name_matches ".text.*_unlikely" s || name_matches ".text.unlikely.*" s) + )) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".text.exit" s || name_matches ".text.exit.*" s)) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".text.startup" s || name_matches ".text.startup.*" s)) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".text.hot" s || name_matches ".text.hot.*" s)) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".text" s || (name_matches ".stub" s || (name_matches ".text.*" s || name_matches ".gnu.linkonce.t.*" s)))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + (* ".gnu.warning sections are handled specially by elf32.em." + * GAH. That means that what we specify here is not (completely) what + * needs to happen with these sections. *) + fun s -> name_matches ".gnu_warning" s)) + ]) + ; OutputSection(AlwaysOutput, None, ".fini", [ + InputQuery(KeepEvenWhenGC, SeenOrder, filter_and_concat (name_matches ".fini")) + ]) + ; DefineSymbol(ProvideIfUsed, "__etext", default_symbol_spec) + ; DefineSymbol(ProvideIfUsed, "_etext", default_symbol_spec) + ; DefineSymbol(ProvideIfUsed, "etext", default_symbol_spec) + ; OutputSection(AlwaysOutput, None, ".rodata", [ + InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".rodata" s || (name_matches ".rodata.*" s || name_matches ".gnu.linkonce.r.*" s) + ))]) + ; OutputSection(AlwaysOutput, None, ".eh_frame_hdr", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".eh_frame_hdr")) ]) + ; OutputSection(OnlyIfRo, None, ".eh_frame", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".eh_frame"))]) + ; OutputSection(OnlyIfRo, None, ".gcc_except_table", [InputQuery(DefaultKeep, DefaultSort, + filter_and_concat (fun s -> name_matches ".gcc_except_table" s || name_matches ".gcc_except_table.*" s))]) + ; OutputSection(OnlyIfRo, None, ".exception_ranges", [InputQuery(DefaultKeep, DefaultSort, + filter_and_concat (fun s -> name_matches ".exception_ranges" s || name_matches ".exception_ranges*" s))]) + ; AdvanceAddress(AddressExprFn alloc_fn2_ref) + ; MarkAndAlignDataSegment( Nat_big_num.mul (Nat_big_num.mul(Nat_big_num.of_int (* a.maxpagesize *)2)(Nat_big_num.of_int 1024))(Nat_big_num.of_int 1024) (* <-- for some reason binutils assumes 2MB max page size, + even if ABI says smaller *), a.commonpagesize) + ; OutputSection(OnlyIfRw, None, ".eh_frame", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".eh_frame"))]) + ; OutputSection(OnlyIfRw, None, ".gcc_except_table", [InputQuery(DefaultKeep, DefaultSort, + filter_and_concat (fun s -> name_matches ".gcc_except_table" s || name_matches ".gcc_except_table.*" s))]) + ; OutputSection(OnlyIfRw, None, ".exception_ranges", [InputQuery(DefaultKeep, DefaultSort, + filter_and_concat (fun s -> name_matches ".exception_ranges" s || name_matches ".exception_ranges*" s))]) + ; OutputSection(AlwaysOutput, None, ".tdata", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat + (fun s -> name_matches ".tdata" s || (name_matches ".tdata.*" s || name_matches ".gnu.linkonce.td.*" s)))]) + ; OutputSection(AlwaysOutput, None, ".tbss", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat + (fun s -> name_matches ".tbss" s || (name_matches ".tbss.*" s || name_matches ".gnu.linkonce.tb.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".tcommon"))]) + ; OutputSection(AlwaysOutput, None, ".preinit_array", [ + DefineSymbol(ProvideIfUsed, "__preinit_array_start", default_symbol_spec) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (fun s -> name_matches ".preinit_array" s)) + ; DefineSymbol(ProvideIfUsed, "__preinit_array_end", default_symbol_spec) + ]) + ; OutputSection(AlwaysOutput, None, ".init_array", [ + DefineSymbol(ProvideIfUsed, "__init_array_start", default_symbol_spec) + ; InputQuery(KeepEvenWhenGC, ByInitPriority, filter_and_concat (fun s -> name_matches ".init_array.*" s)) + ; InputQuery(KeepEvenWhenGC, ByInitPriority, filter_and_concat (fun s -> name_matches ".ctors.*" s)) + ; InputQuery(KeepEvenWhenGC, ByInitPriority, filter_and_concat + (fun s -> name_matches ".init_array" s + || (name_matches ".ctors" s && not (file_matches "*crtbegin.o" s || (file_matches "*crtbegin?.o" s + || (file_matches "*crtend.o" s || file_matches "*crtend?.o " s))))) + ) + ; DefineSymbol(ProvideIfUsed, "__init_array_end", default_symbol_spec) + ]) + ; OutputSection(AlwaysOutput, None, ".fini_array", [ + DefineSymbol(ProvideIfUsed, "__fini_array_start", default_symbol_spec) + ; InputQuery(KeepEvenWhenGC, ByInitPriority, filter_and_concat (fun s -> name_matches ".fini_array.*" s)) + ; InputQuery(KeepEvenWhenGC, ByInitPriority, filter_and_concat (fun s -> name_matches ".dtors.*" s)) + ; InputQuery(KeepEvenWhenGC, ByInitPriority, filter_and_concat + (fun s -> name_matches ".fini_array" s + || (name_matches ".dtors" s && not (file_matches "*crtbegin.o" s || (file_matches "*crtbegin?.o" s + || (file_matches "*crtend.o" s || file_matches "*crtend?.o " s))))) + ) + ; DefineSymbol(ProvideIfUsed, "__fini_array_end", default_symbol_spec) + ]) + ; OutputSection(AlwaysOutput, None, ".ctors", [ + InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (fun s -> file_matches "*crtbegin.o" s && name_matches ".ctors" s)) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (fun s -> file_matches "*crtbegin?.o" s && name_matches ".ctors" s)) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat + (fun s -> not (file_matches "*crtend.o" s || file_matches "*crtend?.o" s) && name_matches ".ctors" s)) + ; InputQuery(KeepEvenWhenGC, ByName, filter_and_concat (fun s -> name_matches ".ctors.*" s)) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat + (fun s -> (file_matches "*crtend.o" s || file_matches "*crtend?.o" s) && name_matches ".ctors" s)) + (* NOTE: this exclusion is implicit in the usual linker script, + * because it won't match an input section more than once. We should + * just replicate this behaviour, since other parts of the script might rely on it + * less obviously. *) + ]) + ; OutputSection(AlwaysOutput, None, ".dtors", [ + InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (fun s -> file_matches "*crtbegin.o" s && name_matches ".dtors" s)) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (fun s -> file_matches "*crtbegin?.o" s && name_matches ".dtors" s)) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat + (fun s -> not (file_matches "*crtend.o" s || file_matches "*crtend?.o" s) && name_matches ".dtors" s)) + ; InputQuery(KeepEvenWhenGC, ByName, filter_and_concat (fun s -> name_matches ".dtors.*" s)) + ; InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat + (fun s -> (file_matches "*crtend.o" s || file_matches "*crtend?.o" s) && name_matches ".dtors" s)) + ]) + ; OutputSection(AlwaysOutput, None, ".jcr", [InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (name_matches ".jcr"))]) + ; OutputSection(AlwaysOutput, None, ".data.rel.ro", [ + InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".data.rel.ro.local*" s || name_matches ".gnu.linkonce.d.rel.ro.local.*" s + )); + InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".data.rel.ro" s || (name_matches ".data.rel.ro.*" s || name_matches ".gnu.linkonce.d.rel.ro.*" s) + )) + ]) + ; OutputSection(AlwaysOutput, None, ".dynamic", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".dynamic"))]) + ; OutputSection(AlwaysOutput, None, ".got", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".got")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".igot")) + ]) + ; MarkDataSegmentRelroEnd (*(fun secs -> (if (sizeof ".got.plt" secs) >= 24 then 24 else 0, (fun pos -> pos)))*) + ; OutputSection(AlwaysOutput, None, ".got.plt", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".got.plt")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".igot.plt")) + ]) + ; OutputSection(AlwaysOutput, None, ".data", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".data" s || (name_matches ".data.*" s || name_matches ".gnu.linkonce.d.*" s))) + (* the script also has SORT(CONSTRUCTORS) here, but it has no effect for ELF (I think) *) + ]) + ; OutputSection(AlwaysOutput, None, ".data1", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".data1"))]) + ; DefineSymbol(AlwaysDefine, "_edata", default_symbol_spec) + ; DefineSymbol(ProvideIfUsed, "edata", default_symbol_spec) + ; (* . = .; <-- does this do anything? YES! It forces an output section to be emitted. + Since it occurs *outside* any output section, + it is assumed to start + *) + DefineSymbol(AlwaysDefine, "__bss_start", default_symbol_spec) + ; OutputSection(AlwaysOutput, None, ".bss", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".dynbss")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".bss" s || (name_matches ".bss.*" s || name_matches ".gnu.linkonce.b.*" s))) + ; InputQuery(DefaultKeep, DefaultSort, (fun inputlist -> + (*let _ = errln "Looking for commons" in *) + let result = (filter_and_concat is_common inputlist) + in + (*let _ = errln ("Got " ^ (show (length (result))) ^ " commons; sanity check: input list contains " ^ + (show (length inputlist)) ^ " of which " ^ + (show (length (List.filter (fun inp -> match inp with + Common _ -> true + | _ -> false + end) inputlist))) ^ " are commons." + ) + in*) result) + ) + ]) + ; AdvanceAddress(AddressExprFn alloc_fn3_ref) + ; OutputSection(AlwaysOutput, None, ".lbss", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".dynlbss")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".dynlbss")) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".lbss" s || (name_matches ".lbss.*" s || name_matches ".gnu.linkonce.lb.*" s) + )) + ; InputQuery(DefaultKeep, DefaultSort, filter_and_concat (is_large_common)) + ]) + ; AdvanceAddress(AddressExprFn alloc_fn4_ref) + ; AdvanceAddress(AddressExprFn alloc_fn5_ref) + ; OutputSection(AlwaysOutput, Some (AddressExprFn alloc_fn6_ref), + ".lrodata", + [InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".lrodata" s || (name_matches ".lrodata.*" s || name_matches ".gnu.linkonce.lr.*" s) + )) + ; AdvanceAddress(AddressExprFn alloc_fn7_ref) + ]) + ; AdvanceAddress(AddressExprFn alloc_fn8_ref) + ; DefineSymbol(AlwaysDefine, "_end", default_symbol_spec) + ; DefineSymbol(ProvideIfUsed, "end", default_symbol_spec) + ; MarkDataSegmentEnd + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".stab", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".stab"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".stabstr", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".stabstr"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".stab.excl", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".stab.excl"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".stab.exclstr", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".stab.exclstr"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".stab.index", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".stab.index"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".stab.indexstr", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".stab.indexstr"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".comment", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".comment"))]) + (* DWARF debug sections. + Symbols in the DWARF debugging sections are relative to the beginning + of the section so we begin them at 0. *) + (* DWARF 1 *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".line", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".line"))]) + (* GNU DWARF 1 extensions *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_srcinfo", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_srcinfo"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_sfnames", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_sfname"))]) + (* DWARF 1.1 and DWARF 2 *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_aranges", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_aranges"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_pubnames", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_pubnames"))]) + (* DWARF 2 *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_info", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".debug_info" s || name_matches ".gnu.linkonce.wi.*" s))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_abbrev", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_abbrev"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_line", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat ( + fun s -> name_matches ".debug_line" s || (name_matches ".debug_line.*" s || name_matches ".debug_line_end" s)))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_frame", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_frame"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_str", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_str"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_loc", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_loc"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_macinfo", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_macinfo"))]) + (* SGI/MIPS DWARF 2 extensions *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_weaknames", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_weaknames"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_funcnames", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_funcnames"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_typenames", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_typenames"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_varnames", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_varnames"))]) + (* DWARF 3 *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_pubtypes", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_pubtypes"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_ranges", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_ranges"))]) + (* DWARF Extension. *) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".debug_macro", [InputQuery(DefaultKeep, DefaultSort, filter_and_concat (name_matches ".debug_macro"))]) + ; OutputSection(AlwaysOutput, Some address_zero_fn, ".gnu.attributes", [InputQuery(KeepEvenWhenGC, DefaultSort, filter_and_concat (name_matches ".gnu.attributes"))]) + ; DiscardInput(filter_and_concat (fun s -> name_matches ".note.GNU-stack" s || (name_matches ".gnu_debuglink" s || name_matches ".gnu.lto_*" s))) + (* NOTE: orphan sections are dealt with in the core linking logic, + not the script. *) + ])) + +let interpret_guard guard comp name1:bool= + ((match guard with + always0 -> true + | OnlyIfRo -> + let v = (List.for_all (fun comp_el -> (match comp_el with + IncludeInputSection(retainpol, (* fname, linkable_idx, shndx, isec, img *) irec) -> Nat_big_num.equal(Nat_big_num.of_int + (* is this section read-only? if it doesn't have shf_write, yes *)0) (Nat_big_num.bitwise_and irec.isec.elf64_section_flags shf_write) + | _ -> (* holes, common symbols and provided symbols shouldn't prevent ONLY_IF_RO *) true + )) comp) + in (*let _ = errln ("only_if_ro evaluated " ^ (show v) ^ " for output section " ^ name) + in*) v + | OnlyIfRw -> + let v = (List.for_all (fun comp_el -> (match comp_el with + IncludeInputSection(retainpol, (* fname, linkable_idx, shndx, isec, img *) irec) -> not (Nat_big_num.equal(Nat_big_num.of_int + (* is this section read-only? if it doesn't have shf_write, yes *)0) (Nat_big_num.bitwise_and irec.isec.elf64_section_flags shf_write)) + | _ -> (* holes etc. shouldn't prevent ONLY_IF_RW *) true + )) comp) + in (*let _ = errln ("only_if_rw evaluated " ^ (show v) ^ " for output section " ^ name) + in *)v + )) + +(* Passes over the script: + * + * 1. assign input sections to output sections (or discard) and define symbols. + * + * 2. compute def-use and optionally GC, removing unwanted sections and symbols + * + * 3. build image, assigning addresses as we go. + * + * Some passes require matching/retrieving what a previous pass on the same node did. + * So we give each script element a natural "idx" label. + *) +(*val label_script_aux : natural -> linker_control_script -> labelled_linker_control_script*) +let label_script_aux start script1:(script_element*Nat_big_num.num)list= + (mapi (fun i -> fun el -> (el, ( Nat_big_num.add start (Nat_big_num.of_int i)))) script1) + +(*val label_script : linker_control_script -> labelled_linker_control_script*) +let label_script script1:(script_element*Nat_big_num.num)list= (label_script_aux(Nat_big_num.of_int 0) script1) + +type input_output_assignment = ( input_spec list * (output_section_spec * Nat_big_num.num) list) + +(*val assign_inputs_to_output_sections : + input_output_assignment -> (* accumulator: list of discards, list of output compositions (these include symbols) *) + set (natural * natural) -> (* used sections *) + set (natural * natural * natural) -> (* used commons *) + list input_spec -> (* remaining inputs *) + maybe (output_section_spec * natural) -> (* cur_sec -- the current output section spec and its OutputSection script item idx *) + maybe input_spec -> (* last input section to be output -- might not have one *) + (input_spec -> input_spec -> Basic_classes.ordering) (* "seen ordering" *) -> + labelled_linker_control_script -> + input_output_assignment*) (* accumulated result *) +let rec assign_inputs_to_output_sections acc used_sections used_commons inputs (cur_output_sec : (output_section_spec * Nat_big_num.num)option) last_input_sec seen_ordering script1:(input_spec)list*(output_section_spec*Nat_big_num.num)list= + (let (rev_discards, rev_outputs) = acc in + let flush_output_sec + = (fun maybe_output_sec_and_idx -> (match (maybe_output_sec_and_idx : (output_section_spec * Nat_big_num.num)option) with + Some (OutputSectionSpec (guard, addr, name1, comp), script_idx) -> + (*let _ = errln ("Guardedly flushing output section named " ^ name ^ " with " ^ ( + match addr with Nothing -> "no address yet" | Just a -> "address 0x" ^ (hex_string_of_natural a) end + ) ^ " and composed of " ^ (show (length comp)) ^ " constituents.") + in*) + (* evaluate the guard *) + if interpret_guard guard comp name1 + then (* do it *) (rev_discards, (((OutputSectionSpec (guard, addr, name1, comp)), script_idx) :: rev_outputs)) + else (* ignore it *) acc + | None -> (* for convenience, make this a no-op rather than error *) + (* failwith "internal error: flushing output section with no current output section" *) + acc + )) + in + (match script1 with + [] -> flush_output_sec cur_output_sec + | (element1, idx1) :: more_elements_and_idx -> + let do_nothing = (acc, used_sections, used_commons, cur_output_sec, last_input_sec) + in + let (new_acc, new_used_sections, new_used_commons, (new_cur_output_sec : (output_section_spec * Nat_big_num.num)option), new_last_input_sec) + = ((match element1 with + DefineSymbol(symdefpol, name1, (symsize, syminfo, symother)) -> + (* Label the current section in the image + * with a new symbol definition. If there isn't + * a current section, use the ABS section (what is that labelling?). *) + (acc, + used_sections, + used_commons, + (match (cur_output_sec : (output_section_spec * Nat_big_num.num)option) with + None -> (*let _ = errln ("FIXME: for defining `" ^ name ^ "': ABS symbol defs not yet supported") in*) None + | Some ((OutputSectionSpec (guard, maybe_addr, secname1, comp)), output_script_idx) -> + (*let _ = errln ("Including a symbol named `" ^ name ^ " in composition of output section `" ^ secname ^ "'") in*) + Some ((OutputSectionSpec (guard, maybe_addr, secname1, + List.rev_append (List.rev comp) [ProvideSymbol(symdefpol, name1, (symsize, syminfo, symother))])) + , output_script_idx) + ), + last_input_sec) + | AdvanceAddress(AddressExprFn advance_fn) -> + (* If we're inside a section, insert a hole, + * else just update the logical address *) + (*let _ = errln ("Advancing location counter") in*) + (match cur_output_sec with + None -> do_nothing + (* This assignment is setting a new LMA. *) + (* (acc, *) + | Some (sec, idx1) -> do_nothing + ) + | MarkAndAlignDataSegment(maxpagesize1, commonpagesize1) -> + (* The "data segment end" is a distinguished label, + * so we can encode the whole thing into a conditional. *) + (*let _ = errln ("Mark/aligning data segment") in*) + do_nothing + | MarkDataSegmentEnd -> + (*let _ = errln ("Marking data segment end") in*) + do_nothing + | MarkDataSegmentRelroEnd(*(fun_from_secs_to_something)*) -> + (*let _ = errln ("Marking data segment relro end") in*) + do_nothing + | OutputSection(outputguard, maybe_expr, name1, sub_elements) -> + (* If we have a current output section, finish it and add it to the image. + * Q. Where do guards ("ONLY_IF_RO" etc) get evaluated? + * A. Inside flush_output_sec. *) + (*let _ = errln ("Recursively composing a new output section `" ^ name ^ "'...") in*) + let acc_with_output_sec = (flush_output_sec cur_output_sec) + in + let new_cur_output_sec = (Some((OutputSectionSpec(outputguard, (* maybe_expr pos secs *) None, name1, [])), idx1)) + in + (* Recurse down the list of input queries, assigning them to this output sec + * Note that output sections may not nest within other output sections. + * At the end of the list of sub_elements, we will flush the section we built up. + *) + let final_acc + = (assign_inputs_to_output_sections acc used_sections used_commons inputs new_cur_output_sec last_input_sec seen_ordering (label_script sub_elements)) + in + (* NOTE that this sub-accumulation will never add a new output section + * because output sections can't nest. *) + (final_acc, used_sections, used_commons, (* cur_output_sec *) None, last_input_sec) + | DiscardInput(selector) -> + let selected = (selector inputs) + in + let (rev_discards, rev_outputs) = acc in + (*let _ = Missing_pervasives.errln ("Processing discard rule; selected " ^ (show (length selected)) + ^ " inputs.") + in*) + (( List.rev_append (List.rev (List.rev (let x2 = + ([]) in List.fold_right (fun i x2 -> if true then i :: x2 else x2) selected x2))) rev_discards, rev_outputs), used_sections, used_commons, cur_output_sec, last_input_sec) + | InputQuery(retainpol, sortpol, selector) -> + (* Input queries can only occur within an output section. *) + (match cur_output_sec with + None -> failwith "linker script error: input query without output section" + | Some ((OutputSectionSpec (output_guard1, output_sec_addr, output_sec_name, output_composition)), output_script_idx) -> + (* Add them to the current output spec. We have to be careful about ordering: + * according to the GNU ld manual (and observed behaviour), by default + * "the linker will place files and sections matched by wildcards in the order + * in which they are seen during the link". For .o files on the command line, + * this means the command line order. But for members of archives, it means + * the order in which they were "pulled in" during input enumeration. We + * actually don't compute this here; it is passed in from our caller in link.lem. *) + let sortfun = ((match sortpol with + DefaultSort -> List.sort seen_ordering (* FIXME: pay attention to command line *) + | SeenOrder -> List.sort seen_ordering + | ByName -> List.sort compareInputSpecByName + | ByNameThenAlignment -> List.sort compareInputSpecByNameThenAlignment + | ByAlignment -> List.sort compareInputSpecByAlignment + | ByAlignmentThenName -> List.sort compareInputSpecByAlignmentThenName + | ByInitPriority -> List.sort compareInputSpecByInitPriority + )) + in + let selected = (selector inputs) + in + let selected_deduplicated = (List.filter (fun inp -> (match inp with + InputSection(irec) -> not ( Pset.mem(irec.idx, irec.shndx) used_sections) + | Common(idx1, fname1, img2, def) -> not ( Pset.mem(idx1, def.def_sym_scn, def.def_sym_idx) used_commons) + )) selected) + in + (*let _ = errln ("Evaluated an input query, yielding " ^ + (show (length selected)) ^ " undeduplicated and " ^ + (show (length selected_deduplicated)) ^ + " deduplicated results, to be added to composition currently of " ^ + (show (length output_composition)) ^ " items.") in*) + (* Search input memory images for matching sections. *) + let sorted_selected_inputs = (sortfun selected_deduplicated) + in + let (sectionMatchList : input_section_rec list) = (Lem_list.mapMaybe (fun inp -> + (match inp with + InputSection(x) -> + (*let _ = errln ("Matched an input section named " ^ x.isec.elf64_section_name_as_string ^ + " in a file " ^ x.fname ^ " with first 20 bytes " ^ (show (take 20 + (let maybe_elname = elf_memory_image_element_coextensive_with_section x.shndx x.img + in + match maybe_elname with + Nothing -> failwith ("impossible: no such element (matching shndx " ^ (show x.shndx) ^ ")") + | Just idstr -> + match Map.lookup idstr x.img.elements with + Just el -> el.contents + | Nothing -> failwith "no such element" + end + end + )))) + in*) + Some x + | _ -> None + )) sorted_selected_inputs) + in + let commonMatchList = (Lem_list.mapMaybe (fun inp -> + (match inp with + | Common(idx1, fname1, img2, def) -> Some(idx1, fname1, img2, def) + | _ -> None + )) sorted_selected_inputs) + in + + (acc, + Pset.(union) used_sections (let x2 =(Pset.from_list (pairCompare Nat_big_num.compare Nat_big_num.compare) + []) in List.fold_right + (fun irec x2 -> if true then Pset.add (irec.idx, irec.shndx) x2 else x2) + sectionMatchList x2), + Pset.(union) used_commons (let x2 =(Pset.from_list (tripleCompare Nat_big_num.compare Nat_big_num.compare Nat_big_num.compare) + []) in List.fold_right + (fun(idx1, fname1, img2, def) x2 -> + if true then Pset.add (idx1, def.def_sym_scn, def.def_sym_idx) x2 else x2) + commonMatchList x2), + (* new_cur_output_spec *) Some ( + (OutputSectionSpec(output_guard1, output_sec_addr, output_sec_name, + List.rev_append (List.rev (List.rev_append (List.rev output_composition) (let x2 = + ([]) in List.fold_right + (fun input_sec x2 -> + if true then + IncludeInputSection + (retainpol, (* input_sec.fname, input_sec.idx, input_sec.shndx, input_sec.isec, input_sec.img *) input_sec) + :: x2 else x2) sectionMatchList x2))) (let x2 = ([]) in List.fold_right + (fun(idx1, fname1, img2, def) x2 -> + if true then + IncludeCommonSymbol (DefaultKeep, fname1, idx1, def, img2) :: x2 else + x2) commonMatchList x2) + )), output_script_idx), + last_input_sec + ) + ) + )) + in + (*let _ = match new_cur_output_sec with + Just (OutputSectionSpec (guard, addr, name, comp), script_idx) -> + errln ("Now output section `" ^ name ^ "' is composed of " ^ (show (length comp)) ^ " elements.") + | Nothing -> () + end in*) + assign_inputs_to_output_sections new_acc new_used_sections new_used_commons + (inputs : input_spec list) + (new_cur_output_sec) + (new_last_input_sec : input_spec option) + seen_ordering + (more_elements_and_idx : labelled_linker_control_script) + )) + +(* NOTE: this is also responsible for deleting any PROVIDEd symbols that + * were not actually referenced. BUT HOW, if we haven't built the image and + * hence haven't added the symbols yet? Symbols affect reachability, so + * we're going to have to figure this out. Really we want a memory image that + * does not yet have addresses assigned, but does have the symbols inserted. + * BUT even that is not right, because we want to be able to remove some + * sections (GC them). So the section composition is not yet fixed. So we have + * a problem. + * + * Note that the only symbols we have to remove are ones that were PROVIDEd + * in our output composition. So doing the GC on output compositions seems + * sane. We can get the graph's edge list by inspecting the constituent memory + * images from which each output section composition element is drawn. + * Collecting sections and collecting symbols seems fair. Note that symbols + * can never be placed mid-section (I don't think?? they can use arbitrary + * expressions, but not that depend on whether an input section is included + * or not) so removing a section should never imply the removal of a symbol. + * + * So that implies we need not yet build a memory image. + *) +(*val compute_def_use_and_gc : allocated_sections_map -> allocated_sections_map*) +let compute_def_use_and_gc outputs_by_name:allocated_sections_map= outputs_by_name (* FIXME: implement GC *) + +let output_section_type comp:Nat_big_num.num= +( + (* are we composed entirely of nobits sections and common symbols? *)let all_nobits = (List.for_all (fun comp_el -> + (match comp_el with + IncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *) irec) -> Nat_big_num.equal + irec.isec.elf64_section_type sht_nobits + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> true + | _ -> (* padding and symdefs can be nobits *) true + )) comp) + in + if all_nobits then sht_nobits else sht_progbits) + +let output_section_flags comp:Nat_big_num.num= + (let writable = (List.exists (fun comp_el -> + (match comp_el with + IncludeInputSection(retain_pol, (* fname, linkable_idx, shndx, isec, img *) irec) -> + flag_is_set shf_write irec.isec.elf64_section_flags + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> + (* assume common symbols are writable *) true + | _ -> (* padding and symdefs do not make a section writable *) false + )) comp) + in + let executable = (List.exists (fun comp_el -> + (match comp_el with + IncludeInputSection(retain_pol,(* fname, linkable_idx, shndx, isec, img *) irec) -> + flag_is_set shf_execinstr irec.isec.elf64_section_flags + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> + (* assume common symbols are not executable, since they're zeroed *) false + | _ -> (* padding and symdefs do not make a section executable -- HMM *) false + )) comp) + in + let alloc = (List.exists (fun comp_el -> + (match comp_el with + IncludeInputSection(retain_pol, (* fname, linkable_idx, shndx, isec, img *) irec) -> + flag_is_set shf_alloc irec.isec.elf64_section_flags + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> + (* common symbols are allocatable *) true + | ProvideSymbol(pol, name1, spec) -> + (* symbols make a section allocatable? HMM *) true + | _ -> (* padding does not make a section allocatable *) false + )) comp) + in + let is_thread_local_yesnomaybe = (fun comp_el -> + (match comp_el with + IncludeInputSection(retain_pol, (* fname, linkable_idx, shndx, isec, img *) irec) -> + Some(flag_is_set shf_tls irec.isec.elf64_section_flags) + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> + (* FIXME: support tcommon *) Some(false) + | ProvideSymbol(pol, name1, spec) -> + (* linker script symbols shouldn't be defined here, unless they can be declared thread-local (FIXME: can they?) *) + Some false + | _ -> (* padding does not make a section thread-local, or non-. *) None + ) + ) + in + let thread_local = ( + (* Is any element positively thread-local? *) + let v = (List.fold_left (fun acc_ynm -> fun comp_el -> + let new_ynm = (is_thread_local_yesnomaybe comp_el) + in + (match (acc_ynm, new_ynm) with + (None, None) -> None + | (None, Some x) -> Some x + | (Some x, None) -> Some x + | (Some true, Some false) -> Some true + | (Some true, Some true) -> Some true + | (Some false, Some false) -> Some false + | (Some true, Some false) -> Some true + )) None comp) + in + if (Lem.option_equal (=) v (Some(true))) && not ( (Lem.option_equal (=)(Some(true)) (* are *all* either don't-care or positively thread-local? *) + (List.fold_left (fun acc_ynm -> fun comp_el -> + let new_ynm = (is_thread_local_yesnomaybe comp_el) + in + (match (acc_ynm, new_ynm) with + (None, None) -> None + | (None, Some x) -> Some x + | (Some x, None) -> Some x + | (Some true, Some false) -> Some false + | (Some true, Some true) -> Some true + | (Some false, Some false) -> Some false + | (Some true, Some false) -> Some false + )) None comp))) then failwith "error: section mixes thread-local and non-thread-local inputs" + else (match v with + None -> false + | Some x -> x + ) + ) + in + Nat_big_num.bitwise_or + (if thread_local then shf_tls else Nat_big_num.of_int 0) + (Nat_big_num.bitwise_or + (if executable then shf_execinstr else Nat_big_num.of_int 0) + (Nat_big_num.bitwise_or + (if writable then shf_write else Nat_big_num.of_int 0) + (if alloc then shf_alloc else Nat_big_num.of_int 0) + ) + )) + +let symbol_def_for_provide_symbol name1 size2 info other control_script_linkable_idx:symbol_definition= + ({ + def_symname = (*let _ = errln ("Linker script is defining symbol called `" ^ name ^ "'") in*) name1 + ; def_syment = ({ + elf64_st_name = (Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) (* ignored *) + ; elf64_st_info = info + ; elf64_st_other = other + ; elf64_st_shndx = (Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) + ; elf64_st_value = (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) (* ignored *) + ; elf64_st_size = (Uint64.of_string (Nat_big_num.to_string size2)) + }) + ; def_sym_scn =(Nat_big_num.of_int 0) + ; def_sym_idx =(Nat_big_num.of_int 0) + ; def_linkable_idx = control_script_linkable_idx + }) + +(*val assign_dot_to_itself : natural -> address_expr_fn_map allocated_sections_map -> (natural * address_expr_fn_map allocated_sections_map * address_expr_fn)*) +let assign_dot_to_itself fresh alloc_map:Nat_big_num.num*((Nat_big_num.num),(Nat_big_num.num ->allocated_sections_map ->Nat_big_num.num))Pmap.map*address_expr_fn= + (let fn = (fun dot -> fun _ -> dot) in + let alloc_map' = (Pmap.add fresh fn alloc_map) in + let fresh' = (Nat_big_num.add(Nat_big_num.of_int 1) fresh) in + (fresh', alloc_map', AddressExprFn fresh)) + +(*val build_image : + address_expr_fn_map allocated_sections_map -> (* global dictionary of address_expr_fn_ref -> address_expr_fn *) + elf_memory_image -> (* accumulator *) + natural -> (* location counter *) + allocated_sections_map -> (* outputs constructed earlier *) + (Map.map string (list (natural * binding))) -> (* bindings_by_name *) + labelled_linker_control_script -> + natural -> (* control_script_linkable_idx *) + (Map.map string (list symbol_definition)) -> (* linker_defs_by_name *) + (elf_memory_image * allocated_sections_map)*) (* accumulated result *) +let rec build_image alloc_map acc pos (AllocatedSectionsMap outputs_by_name) bindings_by_name script1 control_script_linkable_idx linker_defs_by_name:(any_abi_feature)annotated_memory_image*allocated_sections_map= + (let (add_output_section : (Nat_big_num.num * elf_memory_image) -> output_section_spec -> (Nat_big_num.num * elf_memory_image * Nat_big_num.num * output_section_spec)) + = (fun ((*scn_idx, *)pos, acc_img) -> + (fun (OutputSectionSpec (guard, addr, secname1, comp)) -> + (*let _ = errln ("Computing composition of output section `" ^ secname ^ "' from " ^ (show (length comp)) ^ " elements") + in*) + let unaligned_start_addr = ((match addr with + Some a -> failwith ("internal error: section " ^ (secname1 ^ ": did not expect address to be assigned yet")) + | None -> pos + )) + in + let align = (alignof_output_section comp) + in + (*let _ = errln ("Aligning start of output section " ^ secname ^ " up to a " ^ (show align) ^ "-byte address boundary") + in*) + let output_section_start_addr = (align_up_to align unaligned_start_addr) + in + let (end_addr, comp_addrs) = (do_output_section_layout_starting_at_addr output_section_start_addr (AllocatedSectionsMap outputs_by_name) comp) + in + let size2 = (Nat_big_num.sub_nat end_addr output_section_start_addr) + in + (*let _ = Missing_pervasives.outln ( + if List.null comp then secname else ( + ((space_padded_and_maybe_newline 16 secname) ^ + ("0x" ^ (left_zero_padded_to 16 (hex_string_of_natural output_section_start_addr))) ^ " " ^ + (left_space_padded_to 10 ("0x" ^ (hex_string_of_natural size)))) + ) + ) + in*) + let (concatenated_content, final_addr, new_range_tag_pairs) = (List.fold_left (fun (accum_pat, accum_current_addr, accum_meta) -> (fun (comp_el, comp_addr) -> + (*let _ = errln ("Adding an element to composition of output section `" ^ secname ^ "', current address 0x" ^ (hex_string_of_natural accum_current_addr)) + in*) + let make_line = (fun namestr -> (fun addrstr -> (fun szstr -> (fun rhs -> ( + (space_padded_and_maybe_newline(Nat_big_num.of_int 16) (" " ^ namestr)) ^ + (("0x" ^ (left_zero_padded_to(Nat_big_num.of_int 16) addrstr)) ^ (" " ^ + ((left_space_padded_to(Nat_big_num.of_int 10) ("0x" ^ szstr)) ^ (" " ^ rhs)))) + ))))) + in + let (sz, comp_el_pat, this_el_meta) = ((match comp_el with + | IncludeInputSection(retainpolicy, (* fname, linkable_idx, shndx, isec, img *) irec) -> + (* We want to get the input section as a byte pattern *) + (*let _ = errln ("Processing inclusion of input section `" ^ irec.isec.elf64_section_name_as_string + ^ "' from file `" ^ irec.fname + ^ "' into output section `" ^ secname + ^ "'") + in*) + let maybe_secname = (elf_memory_image_element_coextensive_with_section irec.shndx irec.img) + in + (match maybe_secname with + None -> failwith ("impossible: no such section" (*(matching irec.shndx " ^ (show irec.shndx) ^ ")""*)) + | Some idstr -> + (*let _ = errln ("Found element named " ^ idstr ^ " coextensive with section named " ^ + irec.isec.elf64_section_name_as_string ^ " in file " ^ irec.fname) + in*) + (match Pmap.lookup idstr irec.img.elements with + Some el -> + (*let _ = Missing_pervasives.outln (make_line irec.isec.elf64_section_name_as_string + (hex_string_of_natural comp_addr) (hex_string_of_natural irec.isec.elf64_section_size) + irec.fname) + in*) + let section_el_name = (get_unique_name_for_section_from_index irec.shndx irec.isec irec.img) + in + (*let _ = errln ("Copying metadata for output section `" ^ section_el_name ^ "'") in*) + let range_or_sym_is_in_this_sec = (fun maybe_range -> (fun tag -> + (* is it within the section we're outputting? + * first we needs its element name. *) + (* filter out ones that don't overlap *) + (match maybe_range with + Some(el_name, (start, len)) -> + (* img and shndx came as a unit, so they're definitely + * talking about the same file *) + (* shndx = sym_shndx *) + section_el_name = el_name + | None -> + (* ABS symbols have this property *) + (match tag with + SymbolDef(def) -> + (* don't match section symbols, or we'll be inundated *) + let sym_shndx = (Nat_big_num.of_string (Uint32.to_string def.def_syment.elf64_st_shndx)) + in + if not (Nat_big_num.equal sym_shndx shn_abs) || ( not (Nat_big_num.equal (get_elf64_symbol_type def.def_syment) stt_section)) then false + else ( + let abs_address = (Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_value) + in + (* check it against our section *) + let section_end_addr = (Nat_big_num.add accum_current_addr irec.isec.elf64_section_size) + in + ( Nat_big_num.greater_equal abs_address accum_current_addr + && Nat_big_num.less abs_address section_end_addr) + (* FIXME: argument that this should be <=, i.e. can mark end addr *) + (* PROBLEM: this is all very well, but there's no reason why + * ABS symbols need to point at an address within some output + * section. They can just be arbitrary values. This is a bit of an + * abuse if we do it within the C language (to get the value, you + * have to do "(int) &sym", i.e. create a meaningless pointer + * intermediate) but arguably is okay in an impl-def way. + * + * WHAT to do? well, just always output the ABS symbols, for now. + * + * The example that provoked this is in glibc's + * locale/lc-address.c, which compiles down to create + * the following ABS symbol: + * + * 0000000000000001 g *ABS* 0000000000000000 _nl_current_LC_ADDRESS_used + * + * ... i.e. the _nl_current_LC_ADDRESS_used appears to be just a flag. + * + * Where can we handle this? We don't see ABS symbols since they + * aren't associated with sections. We simply need to copy over + * all the ABS symbols appearing in included input objects. + * That means there's no point doing anything with them here + * while we're fiddling with sections. Do it later in a whole- + * -image pass. + *) + && false (* ... at least until we see a better way *) + ) + | _ -> false + ) + ) + )) + in + let ranges_and_tags = (let x2 = + ([]) in List.fold_right + (fun(maybe_range, tag) x2 -> + if range_or_sym_is_in_this_sec maybe_range tag then + (maybe_range, tag) :: x2 else x2) (Pset.elements irec.img.by_range) + x2) + in + let included_defs = (let x2 = + ([]) in List.fold_right + (fun(maybe_range, def) x2 -> + if range_or_sym_is_in_this_sec maybe_range (SymbolDef (def)) then + def :: x2 else x2) + (elf_memory_image_defined_symbols_and_ranges irec.img) x2) + in + let included_global_defs = (let x2 = + ([]) in List.fold_right + (fun def x2 -> + if not + (Nat_big_num.equal + ( + (* filter out locals *) get_elf64_symbol_binding def.def_syment) + stb_local) then def :: x2 else x2) included_defs x2) + in + (* What symbol defs are being included? *) + (* For each global symbol defined in the section, output a line. *) + (*let _ = Missing_pervasives.outs (List.foldl (^) "" ( + List.map (fun def -> (make_line "" + (hex_string_of_natural (comp_addr + (natural_of_elf64_addr def.def_syment.elf64_st_value))) + (hex_string_of_natural (natural_of_elf64_xword def.def_syment.elf64_st_size)) + (" " ^ def.def_symname)) ^ "\n" + ) included_global_defs + )) + in*) + let (new_ranges_and_tags : (( element_range option) * ( any_abi_feature range_tag)) Pset.set) + = (Lem_set.setMapMaybe + (instance_Basic_classes_SetType_tup2_dict + (instance_Basic_classes_SetType_Maybe_maybe_dict + (instance_Basic_classes_SetType_tup2_dict + instance_Basic_classes_SetType_var_dict + (instance_Basic_classes_SetType_tup2_dict + instance_Basic_classes_SetType_Num_natural_dict + instance_Basic_classes_SetType_Num_natural_dict))) + instance_Basic_classes_SetType_var_dict) (instance_Basic_classes_SetType_tup2_dict + (instance_Basic_classes_SetType_Maybe_maybe_dict + (instance_Basic_classes_SetType_tup2_dict + instance_Basic_classes_SetType_var_dict + (instance_Basic_classes_SetType_tup2_dict + instance_Basic_classes_SetType_Num_natural_dict + instance_Basic_classes_SetType_Num_natural_dict))) + instance_Basic_classes_SetType_var_dict) (fun (maybe_range, tag) -> + (* How do we update existing metadata? In general, + * we get a new range. *) + let new_range = ((match maybe_range with + None -> None + | Some(el_name, (start, len)) -> + Some(secname1, +( (* FIXME: pass this through a section-to-element gensym. + We can just (for now) define output element names + to equal the section names, since we have no unnamed + output sections and no output common symbols. *)let new_start_off = (Nat_big_num.add start ( Nat_big_num.sub_nat comp_addr output_section_start_addr)) + in + (*let _ = errln ("Calculated element offset 0x" ^ (hex_string_of_natural new_start_off) ^ + " in element " ^ secname ^ " for tag at address 0x" ^ (hex_string_of_natural accum_current_addr) ^ + " , start offset 0x" ^ (hex_string_of_natural start) ^ ", output section start addr 0x" ^ + (hex_string_of_natural output_section_start_addr) ^ ", comp_addr 0x" ^ (hex_string_of_natural comp_addr)) + in*) + (new_start_off, + len))) + )) + in + (match tag with + (* If it's a section, we discard it. + * We will add a new section record at the end. (FIXME) *) + | FileFeature(ElfSection(idx1, isec1)) -> None + (* If it's a symbol def, we propagate it. + * We record its linkable idx, so we can + * match it later with the bindings we formed + * earlier. + * FIXME: this is a bit nasty. Perhaps we + * should replace syment with a minimal structure + * that avoids duplication. Same for isecs. *) + | SymbolDef(def) -> + (* if get_elf64_symbol_type def.def_syment = stt_section + then Nothing FIXME: also re-create the section symbol when we create the ElfSection + else *) (* This doesn't work -- some refs might be bound to this symbol. + Instead, strip the symbol when we generate the output symtab (FIXME). *) + (*let _ = errln ("Copying symbol named `" ^ def.def_symname ^ "'") + in*) + Some(new_range, SymbolDef({ + def_symname = (def.def_symname) + ; def_syment = (def.def_syment) + ; def_sym_scn = (def.def_sym_scn) + ; def_sym_idx = (def.def_sym_idx) + ; def_linkable_idx = (irec.idx) + })) + | AbiFeature(x) -> Some(new_range, AbiFeature(x)) + (* If it's a symbol ref with no reloc site, we discard it? *) + | SymbolRef(r) -> + (*let _ = if r.ref.ref_symname = "_start" then errln ("Saw ref to _start, " + ^ "in section " ^ irec.isec.elf64_section_name_as_string ^ " of linkable " ^ (show irec.idx)) + else () + in*) + let get_binding_for_ref = (fun symref -> (fun linkable_idx -> (fun fname1 -> + let name_matches1 = ((match Pmap.lookup symref.ref_symname bindings_by_name with Some x -> x | None -> [] )) + in + (match List.filter (fun (bi, ((r_idx, r, r_item), m_d)) -> Nat_big_num.equal r_idx linkable_idx && (r = symref)) name_matches1 with + [(b_idx, b)] -> (b_idx, b) + | [] -> failwith "no binding found" + | _ -> failwith ("ambiguous binding found for symbol `" ^ (symref.ref_symname ^ ("' in file " ^ fname1))) + ) + ))) + in + let (bi, b) = (get_binding_for_ref r.ref irec.idx irec.fname) + in + let ((ref_idx, ref1, ref_linkable), maybe_def) = b + in + (match r.maybe_reloc with + None -> None + (* If it's a reloc site, we need to somehow point it + * at the *definition* that it was bound to. YES. + * reloc_sites are + + type reloc_site = <| + ref_relent : elf64_relocation_a + ; ref_rel_scn : natural --the relocation section idx + ; ref_rel_idx : natural --the index of the relocation rec + ; ref_src_scn : natural --the section *from which* the reference logically comes + |> + + type elfNN_relocation_a = + <| elfNN_ra_offset : elf32_addr --Address at which to relocate + ; elfNN_ra_info : elf32_word --Symbol table index/type of relocation to apply + ; elfNN_ra_addend : elf32_sword --Addend used to compute value to be stored + |> + + * ... of which ref_src_scn, ref_rel_idx, + * ref_rel_scn and elfNN_ra_offset can be ignored. + * + * What *is* important is that we somehow point at + * the symbol definition (or perhaps *un*definition, + * if we're generating a shared library) that it + * refers to. + * + * For that, we update ra_info use the 1 + binding_idx, + * i.e. consider that there is a fresh symbol table + * and that it has a distinct entry for each binding. + * + * FIXME: we also need to account for + * reloc decisions -- MakePIC etc. + *) + | Some(rs) -> Some(new_range, SymbolRef( + { ref = ({ + (* This is not the place to be fixing up + * symbol references. We can't yet patch the element content, + * because we haven't yet decided on the address of everything. + * + * That said, we *do* need to represent the old ref in the new + * linked-image context. That's *all* we should be doing, right now. + * + *) + ref_symname = (ref1.ref_symname) + ; ref_syment = + ({ elf64_st_name = (Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) (* unused *) + ; elf64_st_info = (ref1.ref_syment.elf64_st_info) + ; elf64_st_other = (ref1.ref_syment.elf64_st_other) + ; elf64_st_shndx = (Uint32.of_string (Nat_big_num.to_string (Nat_big_num.of_int (* shn_abs *)0))) + ; elf64_st_value = (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) + ; elf64_st_size = (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) + }) + ; ref_sym_scn =(Nat_big_num.of_int 0) + ; ref_sym_idx =(Nat_big_num.of_int 0) + (* match maybe_def with Just _ -> 1+bi | Nothing -> 0 end *) + }) + ; maybe_reloc = (Some { + ref_relent = ({ + elf64_ra_offset = (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int 0))) (* ignored *) + ; elf64_ra_info = (Uint64.logor + (* HACK: use bi as the symbol index. *) + (Uint64.of_string (Nat_big_num.to_string (get_elf64_relocation_a_type rs.ref_relent))) + (Uint64.shift_left + (* ... actually, don't, now we have maybe_def_bound_to *) + (Uint64.of_string (Nat_big_num.to_string (Nat_big_num.of_int (* (1+bi) *)0)))( 32) + ) + ) + ; elf64_ra_addend = (rs.ref_relent.elf64_ra_addend) + }) + ; ref_rel_scn =(Nat_big_num.of_int 0) + ; ref_rel_idx =(Nat_big_num.of_int 0) + ; ref_src_scn =(Nat_big_num.of_int 0) + }) + ; maybe_def_bound_to = +( + (* Re-search the bindings list for a match, because we might have + * re-bound this symbol since we created the image. FIXME: since + * we do this, is there anything gained from populating this field + * earlier? Probably best not to. *)let (possible_bindings : (Nat_big_num.num * binding) list) + = ((match Pmap.lookup ref1.ref_symname bindings_by_name with + Some l -> if ref1.ref_symname = "__fini_array_end" then + (*let _ = errln ("Found " ^ (show (length l)) ^ " bindings for __fini_array_end, of which " ^ + (show (length (List.filter (fun (bi, (r, maybe_d)) -> maybe_d <> Nothing) l))) ^ + " are with definition") + in*) l + else l + | None -> [] + )) + in + (* what's the actual binding? *) + (match r.maybe_def_bound_to with + None -> failwith ("at this stage, all references must have a decision: `" ^ (ref1.ref_symname ^ "'")) + | Some(decision, _) -> + (* Search the list of bindings for a possibly-updated + * binding for this reference. *) + let matching_possibles = (List.filter (fun (bi, ((ref_idx, ref1, ref_item), maybe_d)) -> + (match maybe_d with + None -> false + | Some (def_idx, def, def_item) -> Nat_big_num.equal + (* match the *reference*, whose linkable we're processing now *) + irec.idx ref_idx + && (Nat_big_num.equal r.ref.ref_sym_scn ref1.ref_sym_scn + && Nat_big_num.equal r.ref.ref_sym_idx ref1.ref_sym_idx) + + (* + def.def_syment = sd.def_syment + && def.def_sym_scn = sd.def_sym_scn + && def.def_sym_idx = sd.def_sym_idx + && def_idx = sd.def_linkable_idx *) + ) + ) possible_bindings) + in + (*let _ = errln ("For a ref to `" ^ ref.ref_symname ^ + "', possibles list is: " ^ ( + List.foldl (fun x -> fun y -> x ^ ", " ^ y) "" (List.map (fun (bi, ((_, _, _), maybe_d)) -> + match maybe_d with + Just(def_idx, def, def_item) -> + "`" ^ def.def_symname ^ "' " ^ + "in linkable " ^ (show def_idx) ^ + ", section " ^ (show def.def_sym_scn) ^ + ", sym idx " ^ (show def.def_sym_idx) + | _ -> failwith "impossible: just filtered out no-def bindings" + end + ) matching_possibles) + )) + in*) + let new_bound_to = ((match matching_possibles with + [] -> Some(ApplyReloc, None) + | [(bi, ((rl, r, ri), maybe_d))] -> + Some(decision, + (match maybe_d with + Some (def_idx, def, def_item) -> Some { + def_symname = (def.def_symname) + ; def_syment = (def.def_syment) + ; def_sym_scn = (def.def_sym_scn) + ; def_sym_idx = (def.def_sym_idx) + ; def_linkable_idx = def_idx + } + | None -> None + )) + | _ -> failwith ("After linker script, ambiguous bindings for `" ^ (ref1.ref_symname ^ "'")) + )) + in + if not ((Lem.option_equal (Lem.pair_equal (=) (Lem.option_equal (=))) new_bound_to r.maybe_def_bound_to)) then + (*let _ = errln ("Changed binding for reference to `" ^ ref.ref_symname ^ + "' in linkable " ^ (show irec.idx)) + in*) + new_bound_to + else if (Lem.option_equal (Lem.pair_equal (=) (Lem.option_equal (=))) new_bound_to None) then failwith "really need a decision by now" + else new_bound_to + )) + + (* if irec.fname = "libc.a(__uClibc_main.os)" + && irec.isec.elf64_section_name_as_string = ".data.rel.local" + then + let _ = errln ("Saw the bugger: " ^ (match r.maybe_def_bound_to with + Just(decision, Just(sd)) -> show sd.def_syment + | _ -> "(not complete)" + end)) + in r.maybe_def_bound_to + else r.maybe_def_bound_to + *) + } + )) + ) (* match maybe_reloc *) + ) (* match tag *) + ) ((Pset.from_list (pairCompare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare))) compare) ranges_and_tags))) (* end mapMaybe fn *) + in + let isec_sz = (irec.isec.elf64_section_size) in + let maybe_el_sz = (el.length1) in + let contents_sz = (length el.contents) in + let (actual_sz, padded_contents) = + ((match maybe_el_sz with + Some el_sz -> + let diff = (Nat_big_num.sub_nat el_sz contents_sz) in + if Nat_big_num.less diff(Nat_big_num.of_int 0) then + (* contents greater than what the el says, so chop the end off *) + (*let _ = Missing_pervasives.errln ("Warning: size mismatch for section " ^ irec.isec.elf64_section_name_as_string ^ + " from " ^ irec.fname) + in*) + (el_sz, take0 el_sz el.contents) + else (el_sz, List.rev_append (List.rev el.contents) (replicate0 diff None)) + | None -> + if not (Nat_big_num.equal (length el.contents) isec_sz) + then failwith "input section size not equal to its content pattern length" + else (isec_sz, el.contents) + )) + in + (*let _ = errln ("Saw first 20 bytes of section " ^ irec.isec.elf64_section_name_as_string ^ + " from " ^ irec.fname ^ " as " ^ (show (take 20 padded_contents))) + in*) + (actual_sz, padded_contents, new_ranges_and_tags) + | _ -> failwith "impossible: no such element" + ) (* match Map.lookup idstr img.elements *) + ) (* match maybe_secname *) + | IncludeCommonSymbol(retain_pol, fname1, linkable_idx, def, img2) -> + (*let _ = errln ("Including common symbol called `" ^ def.def_symname ^ "'") + in*) + (* We want to get the common symbol as a byte pattern *) + let sz = (Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_size) + in + let content = (Missing_pervasives.replicate0 sz (Some(Char.chr (Nat_big_num.to_int (Nat_big_num.of_int 0))))) + in + (*let _ = Missing_pervasives.outln (make_line "COMMON" (hex_string_of_natural comp_addr) + (hex_string_of_natural sz) fname) + in*) + (sz, content,(Pset.from_list (pairCompare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare))) compare) [(Some(secname1, ( Nat_big_num.sub_nat comp_addr output_section_start_addr, sz)), SymbolDef({ + def_symname = (def.def_symname) + ; def_syment = (def.def_syment) + ; def_sym_scn = (def.def_sym_scn) + ; def_sym_idx = (def.def_sym_idx) + ; def_linkable_idx = linkable_idx + }))])) +(* | Hole(AddressExprFn f) -> + let next_addr = f addr (AllocatedSectionsMap outputs_by_name) + in + let n = next_addr - addr + in + let content = Missing_pervasives.replicate n Nothing + in + let _ = Missing_pervasives.outln (make_line "*fill*" (hex_string_of_natural comp_addr) + (hex_string_of_natural n) + "") + in + (next_addr - addr, content, {}) *) + | ProvideSymbol(pol, name1, (size2, info, other)) -> + (*let _ = errln ("Creating symbol definition named `" ^ name ^ "' in output section `" ^ secname ^ "'") + in*) + let symaddr = accum_current_addr (* FIXME: support others *) + in + (*let _ = Missing_pervasives.outln (make_line "" (hex_string_of_natural symaddr) "" ("PROVIDE (" ^ name ^ ", .)")) + in*) + (Nat_big_num.of_int (* sz *)0, (* comp_el_pat *) [],(Pset.from_list (pairCompare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare))) compare) [( + Some(secname1, (( Nat_big_num.sub_nat symaddr output_section_start_addr),Nat_big_num.of_int 0)), + SymbolDef(symbol_def_for_provide_symbol name1 size2 info other control_script_linkable_idx) + )]) + ) + )) (* match comp_el_pat *) + in + (*let _ = errln ("Appending byte pattern to section " ^ secname ^ ", first 20 bytes: " ^ + (show (take 20 comp_el_pat))) + in*) + let new_content = (append_to_byte_pattern_at_offset ( Nat_big_num.sub_nat comp_addr output_section_start_addr) accum_pat comp_el_pat) + in + let new_addr = (Nat_big_num.add comp_addr sz) + in + let new_meta = (Pset.(union) accum_meta this_el_meta) + in + (new_content, new_addr, new_meta) + )) ([], output_section_start_addr,(Pset.from_list (pairCompare (maybeCompare (pairCompare compare (pairCompare Nat_big_num.compare Nat_big_num.compare))) compare) [])) (list_combine comp comp_addrs)) + in + let concat_sec_el = ({ + Memory_image.startpos = (Some(output_section_start_addr)) + ; Memory_image.length1 = (Some(size2)) + ; Memory_image.contents = concatenated_content + }) + in + (*let _ = Missing_pervasives.outln "" in*) + (* Make a new element in the image, also transferring metadata from input elements + * as appropriate. *) + let new_by_range_list = + ((Some(secname1, (Nat_big_num.of_int 0, size2)), FileFeature(ElfSection(Nat_big_num.of_int (* We don't yet konw where this'll come in the output file, so ... *) (* scn_idx *)0, + { elf64_section_name =(Nat_big_num.of_int 0) (* ignored *) + ; elf64_section_type = (output_section_type comp) + ; elf64_section_flags = (output_section_flags comp) + ; elf64_section_addr =(Nat_big_num.of_int 0) (* ignored -- covered by element *) + ; elf64_section_offset =(Nat_big_num.of_int 0) (* ignored -- will be replaced when file offsets are assigned *) + ; elf64_section_size =(Nat_big_num.of_int 0) (* ignored *) + ; elf64_section_link =(Nat_big_num.of_int 0) (* HMM *) + ; elf64_section_info =(Nat_big_num.of_int 0) (* HMM *) + ; elf64_section_align = (alignof_output_section comp) + ; elf64_section_entsize =(Nat_big_num.of_int 0) (* HMM *) + ; elf64_section_body = Byte_sequence.empty (* ignored *) + ; elf64_section_name_as_string = secname1 (* can't rely on this being ignored *) + } + ))) :: Pset.elements new_range_tag_pairs) + in + (*let _ = errln ("Metadata for new section " ^ secname ^ " consists of " ^ (show (length new_by_range_list)) ^ " tags.") + in*) + let new_by_range = (List.fold_left (fun m -> fun (maybe_range, tag) -> + let new_s = (Pset.add (maybe_range, tag) m) + in + (* let _ = errln ("Inserting an element into by_range; before: " ^ (show (Set.size m)) ^ "; after: " ^ (show (Set.size new_s))) + in *) + new_s + ) acc_img.by_range new_by_range_list) + in + let new_by_tag = (by_tag_from_by_range + (instance_Basic_classes_SetType_Maybe_maybe_dict + (instance_Basic_classes_SetType_tup2_dict + instance_Basic_classes_SetType_var_dict + (instance_Basic_classes_SetType_tup2_dict + instance_Basic_classes_SetType_Num_natural_dict + instance_Basic_classes_SetType_Num_natural_dict))) instance_Basic_classes_SetType_var_dict new_by_range) + in + let _ = + (let section_tags_bare = (List.filter (fun (maybe_range, tag) -> + (match tag with + | FileFeature(ElfSection(idx1, isec1)) -> true + | _ -> false + )) (Pset.elements new_by_range)) + in + (* errln ("Total metadata now includes " ^ (show (length section_tags_bare)) ^ " sections; are by_range and " + ^ "by_tag consistent? " ^ (show (new_by_tag = by_tag_from_by_range new_by_range))) *) ()) + in + (* this expression is the return value of add_output_section *) + ( Nat_big_num.add + (* new_pos *) output_section_start_addr size2, + (* new_acc *) { + elements = (Pmap.add secname1 concat_sec_el acc_img.elements) + (* tag it as a section, and transfer any tags *) + ; by_range = (* let _ = errln ("Returning from add_output_section a by_range with " ^ + (show (Set.size new_by_range))) in *) new_by_range + ; by_tag = new_by_tag + }, + (* sec_sz *) size2, + (* replacement_output_sec *) (OutputSectionSpec (guard, Some(output_section_start_addr), secname1, comp)) + ) + )) (* end add_output_section *) + in + (match script1 with + [] -> (acc, (AllocatedSectionsMap outputs_by_name)) + | (element1, el_idx) :: more_elements_and_idx -> + let do_nothing = (acc, pos, (AllocatedSectionsMap outputs_by_name)) in + let (new_acc, new_pos, new_outputs_by_name) = + ((match element1 with + DefineSymbol(symdefpol, name1, (symsize, syminfo, symother)) -> + (* We've already added this to the output composition. *) + do_nothing + | AdvanceAddress(AddressExprFn advance_fn_ref) -> + let advance_fn = +((match Pmap.lookup advance_fn_ref alloc_map with + | Some m -> m + | None -> failwith "alloc_map invariant failure" + )) + in + let new_pos = (advance_fn pos (AllocatedSectionsMap outputs_by_name)) + in + (acc, new_pos, (AllocatedSectionsMap outputs_by_name)) + (* FIXME: the allocated sections map is the subset of the outputs_by_name map + * that has been allocated -- meaning *both* sized *and* placed. + * Since we're a multi-pass interpreter, we've sized everything already, but + * only a subset has been placed. So we need to weed out all elements from + * outputs_by_name that don't correspond to a section in the accumulated image. + * We should probably include the section's range_tag in the allocated_sections_map, + * which would force us to do this, but at the moment neither of these is done. *) + | MarkAndAlignDataSegment(maxpagesize1, commonpagesize1) -> + (* GNU linker manual says: + + "DATA_SEGMENT_ALIGN(MAXPAGESIZE, COMMONPAGESIZE) + is equivalent to either + (ALIGN(MAXPAGESIZE) + (. & (MAXPAGESIZE - 1))) + or + (ALIGN(MAXPAGESIZE) + (. & (MAXPAGESIZE - COMMONPAGESIZE))) + depending on whether the latter uses fewer COMMONPAGESIZE sized + pages for the data segment (area between the result of this + expression and `DATA_SEGMENT_END') than the former or not. If the + latter form is used, it means COMMONPAGESIZE bytes of runtime + memory will be saved at the expense of up to COMMONPAGESIZE wasted + bytes in the on-disk file." + + In other words, we're marking the beginning of the data segment + by aligning our position upwards by an amount that + + - guarantees we're on a new page... + + - ... but (option 1) at an address that's congruent, modulo the max page size + (e.g. for 64kB maxpage, 4kB commonpage, we AND with 0xffff) + + - ... (option 2) at an offset that's at the commonpagesize boundary + immediately preceding the lowest congruent address + (e.g. for 64kB maxpage, 4kB commonpage, we AND with 0xf000, + so if we're at pos 0x1234, we bump up to 0x11000). + + FIXME: + + The GNU linker seems to bump up to 0x12000 here, not 0x11000. + Specifically, + + DATA_SEGMENT_ALIGN (0x200000, 0x1000) + + bumps 0x4017dc up to 0x602000. + + This is indeed better, because it allows the next section + to be output without a big gap in the file. + + LOAD 0x0000000000000000 0x0000000000400000 0x0000000000400000 + 0x00000000000017dc 0x00000000000017dc R E 200000 + LOAD 0x0000000000002000 0x0000000000602000 0x0000000000602000 + 0x0000000000000120 0x0000000000000ce8 RW 200000 + + ... whereas if the second LOAD began at address 0x601000, + the file offset of its first section would have to be 0x11000. + + So what *should* the formula be? + It needs to calculate the next address which + + - is a commonpagesize boundary; + + - is minimally >= the current address, modulo the commonpagesize + + - is minimally >= the current address, modulo the maxpagesize. + + The AND operation gives us something that is minimally *below* + the commonpagesize boundary. I think we need to add COMMONPAGESIZE. + + The code does this (in ldexp.c around line 478 as of binutils 2.25): + + expld.result.value = align_n (expld.dot, maxpage); + /* omit relro phase */ + if (expld.dataseg.phase == exp_dataseg_adjust) + { + if (commonpage < maxpage) + expld.result.value += ((expld.dot + commonpage - 1) + & (maxpage - commonpage)); + } + else + { + expld.result.value += expld.dot & (maxpage - 1); + + Which amounts to: + + 1. first, align up to maxpage. So for our example, we're now 0x10000. + or for our real example, we're now 0x600000 + + THEN since the first phase (expld_dataseg_none) + hits the final "else" case, + we immediately restore the modulus of the address, + giving 0x60188c. + or 0x6019ac the second time around (FIXME: why two?) + + 2. next, on the relevant phase (pass) of the script interpreter, + i.e. OPTION 2 + if commonpage < maxpage, + bump up the *non-maxpage-aligned non-modulo-restored* address + by + (. + commonpage - 1) & (maxpage - commonpage) + + i.e. for our example earlier + (0x01234 + 0x1000 - 1) & (0xf000) + = + 0x02233 & 0xf000 + = + 0x02000 + + i.e. for our real example + (0x4019ac + 0x1000 - 1) & (0x1ff000) + = + 0x4019ac + 0x1000 - 1) & 0x1ff000 + = + 0x002000 + + 3. OPTION 1 is implemented by the trailing "else {" + -- it restores the modulus. + + So the problem with our original logic (below) was that + it did what the manual says, not what the code does. + Specifically, the code for option 2 does + + (. + commonpagesize - 1) & (maxpagesize - commonpagesize) + + and NOT simply + + . & (maxpagesize - commonpagesize). + + FIXME: report this bug. + + + Note that intervening commands can do arbitrary things to the location + counter, so we can't do any short-cut arithmetic based on section sizes; + we actually have to run the layout procedure til we hit the end of the + data segment, and then see how we do. + + We run this function *forward* with the first option on a subset + of the script ending with the end of the data segment. + We then see what comes back. + + *) + (* let num_pages_used *) + (*let _ = errln ("Option 1 congruence add-in from pos 0x" ^ (hex_string_of_natural pos) ^ ", maxpagesize 0x" ^ + (hex_string_of_natural maxpagesize) ^ " is 0x" ^ (hex_string_of_natural (natural_land pos (maxpagesize - 1)))) + in*) + let option1 = (Nat_big_num.add (align_up_to maxpagesize1 pos) (Nat_big_num.bitwise_and pos ( Nat_big_num.sub_nat maxpagesize1(Nat_big_num.of_int 1)))) + in + (*let _ = errln ("Mark/align data segment: option 1 is to bump pos to 0x" ^ (hex_string_of_natural option1)) + in*) + let option2 = (Nat_big_num.add (align_up_to maxpagesize1 pos) (Nat_big_num.bitwise_and ( Nat_big_num.sub_nat (Nat_big_num.add pos commonpagesize1)(Nat_big_num.of_int 1)) ( Nat_big_num.sub_nat maxpagesize1 commonpagesize1))) + in + (*let _ = errln ("Mark/align data segment: option 2 is to bump pos to 0x" ^ (hex_string_of_natural option2)) + in*) + let data_segment_endpos = (fun startpos1 -> + (* run forward from here until MarkDataSegmentEnd, + * accumulating the actually-made outputs by name and their sizes *) + let (endpos, _) = (List.fold_left (fun (curpos, seen_end) -> fun (new_script_item, new_script_item_idx) -> + (*let _ = errln ("Folding at pos 0x" ^ (hex_string_of_natural curpos)) + in*) + if seen_end + then (curpos, true) + else let (newpos, new_seen) = ((match new_script_item with + | MarkDataSegmentEnd -> + (*let _ = errln "data segment end" + in*) + (* break the loop early here *) + (curpos, true) + | OutputSection(outputguard, maybe_expr, name1, sub_elements) -> + (*let _ = errln ("output section " ^ name) + in*) + let maybe_found = (Pmap.lookup name1 outputs_by_name) + in + let (OutputSectionSpec (guard, addr, secname1, comp), seen_script_el_idx) = ((match maybe_found with + Some (f, seen_script_el_idx) -> (f, seen_script_el_idx) + | None -> failwith "internal error: output section not found" + )) + in + (* Sometimes a given output section name, say .eh_frame, can come from multiple + * script elements with disjoint guard conditions (only_if_ro and only_if_rw, say). + * Only one of them will actually be selected when the guard is being evaluated. + * So when we "replay" the sections' output here, we want to skip the ones whose + * guards were false. The way we implement this is to store the originating script + * element idx in the allocated_output_sections map. We can test that against our + * current script element_idx here *) + let replay_output = ( Nat_big_num.equal seen_script_el_idx el_idx) + in + if replay_output + then ( + let unaligned_start_addr = curpos + in + let start_addr = (align_up_to (alignof_output_section comp) unaligned_start_addr) + in + let (end_addr, comp_addrs) = (do_output_section_layout_starting_at_addr start_addr (AllocatedSectionsMap outputs_by_name) comp) + in + let size2 = (Nat_big_num.sub_nat end_addr start_addr) + in + (end_addr, (* seen_end *) false) + ) + else (curpos, (* seen_end *) false) + | AdvanceAddress(AddressExprFn advance_fn_ref) -> + (*let _ = errln "Advance address" + in*) + let advance_fn = +((match Pmap.lookup advance_fn_ref alloc_map with + | Some m -> m + | None -> failwith "alloc_map invariant failed" + )) + in + let new_pos = (advance_fn curpos (AllocatedSectionsMap outputs_by_name)) + in + (new_pos, false) + | _ -> (curpos, seen_end) + )) + in + if Nat_big_num.less newpos curpos then failwith "went backwards" else (newpos, new_seen) + ) (startpos1, false) more_elements_and_idx) + in endpos + ) + in + let endpos_option1 = (data_segment_endpos option1) + in + let endpos_option2 = (data_segment_endpos option2) + in + (*let _ = errln ("Mark/align data segment: option 1 gives an endpos of 0x" ^ (hex_string_of_natural endpos_option1)) + in*) + (*let _ = errln ("Mark/align data segment: option 2 gives an endpos of 0x" ^ (hex_string_of_natural endpos_option2)) + in*) + let npages = (fun startpos1 -> (fun endpos -> Nat_big_num.div + ( Nat_big_num.sub_nat(align_up_to commonpagesize1 endpos) + (round_down_to commonpagesize1 startpos1)) commonpagesize1 + )) + in + let npages_option1 = (npages option1 endpos_option1) + in + let npages_option2 = (npages option2 endpos_option1) + in + (*let _ = errln ("Mark/align data segment: option 1 uses " ^ (show npages_option1) ^ " COMMONPAGESIZE-sized pages") + in*) + (*let _ = errln ("Mark/align data segment: option 2 uses " ^ (show npages_option2) ^ " COMMONPAGESIZE-sized pages") + in*) + if Nat_big_num.less npages_option1 npages_option2 + then (*let _ = errln "Choosing option 1" in*) (acc, option1, (AllocatedSectionsMap outputs_by_name)) + else (*let _ = errln "Choosing option 2" in*) (acc, option2, (AllocatedSectionsMap outputs_by_name)) + | MarkDataSegmentEnd -> do_nothing + | MarkDataSegmentRelroEnd(*(fun_from_secs_to_something)*) -> do_nothing + | OutputSection(outputguard, maybe_expr, name1, sub_elements) -> + (* Get the composition we computed earlier, and actually put it in + * the image, assigning an address to it. *) + let maybe_found = (Pmap.lookup name1 outputs_by_name) + in + let (found, seen_script_el_idx) = ((match maybe_found with + Some (f, saved_idx) -> (f, saved_idx) + | None -> failwith "internal error: output section not found" + )) + in + let (OutputSectionSpec (guard, addr, secname1, comp)) = found + in + (* let next_free_section_idx = 1 + naturalFromNat (Map.size outputs_by_name) + in *) + let count_sections_in_image = (fun img2 -> ( + let (section_tags, section_ranges) = (elf_memory_image_section_ranges img2) + in + let section_tags_bare = (Lem_list.map (fun tag -> + (match tag with + | FileFeature(ElfSection(idx1, isec1)) -> true + | _ -> false + )) section_tags) + in + length section_tags_bare + )) + in + (* Do we actually want to add an output section? Skip empty sections. + * CARE: we actually want to heed the proper ld semantics for empty sections + * (e.g. ". = ." will force output). From the GNU ld manual: + + The linker will not normally create output sections with no contents. + This is for convenience when referring to input sections that may or + may not be present in any of the input files. For example: + .foo : { *(.foo) } + will only create a `.foo' section in the output file if there is a + `.foo' section in at least one input file, and if the input sections + are not all empty. Other link script directives that allocate space in + an output section will also create the output section. So too will + assignments to dot even if the assignment does not create space, except + for `. = 0', `. = . + 0', `. = sym', `. = . + sym' and `. = ALIGN (. != + 0, expr, 1)' when `sym' is an absolute symbol of value 0 defined in the + script. This allows you to force output of an empty section with `. = + .'. + + The linker will ignore address assignments ( *note Output Section + Address::) on discarded output sections, except when the linker script + defines symbols in the output section. In that case the linker will + obey the address assignments, possibly advancing dot even though the + section is discarded. + + * It follows that we might discard the output section, + * but *retain* the symbol definitions within it, + * and keep the dot-advancements that + * In other words, we care about two things: + * + * -- whether there are any non-empty input sections, *or* + * non-excluded assignments to dot, inside the composition: + * this controls whether the section is output + + * -- whether the script defines symbols in the section; if so + * then *even if the section is discarded* + * we must honour the address assignments, + * which means using the ending address of do_output_section_layout_starting_at_addr, + * *and* + * we must retain the symbol definitions (which now could + * end up going in some other section? HMM...) + *) + let comp_element_allocates_space = (fun comp_el -> (match comp_el with + IncludeInputSection(_, irec) -> Nat_big_num.greater + (*let _ = errln ("Saw an input section named `" ^ irec.isec.elf64_section_name_as_string ^ + "' of size " ^ (show irec.isec.elf64_section_size)) + in*) + irec.isec.elf64_section_size(Nat_big_num.of_int 0) + | IncludeCommonSymbol(retain_pol, fname1, idx1, def, img2) -> Nat_big_num.greater +(Ml_bindings.nat_big_num_of_uint64 def.def_syment.elf64_st_size)(Nat_big_num.of_int 0) + | ProvideSymbol(pol, name1, spec) -> true (* HACK: what else makes sense here? *) + | Hole(AddressExprFn(address_fn_ref)) -> + let address_fn = +((match Pmap.lookup address_fn_ref alloc_map with + | Some m -> m + | None -> failwith "alloc_map invariant failed" + )) + in + let assignment_is_excluded = (fun f -> + (* really makes you wish you were programming in Lisp *) + let always_gives_0 = + ( Nat_big_num.equal(f(Nat_big_num.of_int 0) (AllocatedSectionsMap outputs_by_name))(Nat_big_num.of_int 0) + && Nat_big_num.equal (f(Nat_big_num.of_int 42) (AllocatedSectionsMap outputs_by_name))(Nat_big_num.of_int 0)) (* FIXME: this is wrong *) + in + let always_gives_dot = + ( Nat_big_num.equal(f(Nat_big_num.of_int 0) (AllocatedSectionsMap outputs_by_name))(Nat_big_num.of_int 0) + && Nat_big_num.equal (f(Nat_big_num.of_int 42) (AllocatedSectionsMap outputs_by_name))(Nat_big_num.of_int 42)) (* FIXME: this is wrong *) + in + (* FIXME: what are the semantics of function equality in Lem? *) + always_gives_0 || (always_gives_dot (*&& (AddressExprFn(f)) <> assign_dot_to_itself*) (* FIXME DPM: almost certainly not what is meant... *))) + in + not (assignment_is_excluded address_fn) + )) + in + let section_contains_non_empty_inputs = +(List.exists comp_element_allocates_space comp) + in + (* See note in MarkDataSegmentEnd case about script element idx. Short version: + * multiple output section stanzas, for a given section name, may be in the script, + * but only one was activated by the section composition pass. Ignore the others. *) + let do_output = (( Nat_big_num.equal seen_script_el_idx el_idx) && section_contains_non_empty_inputs) + in + if not do_output then + (*let _ = errln ("At pos 0x" ^ (hex_string_of_natural pos) ^ ", skipping output section " ^ name ^ + " because " ^ (if not section_contains_non_empty_inputs + then "it contains no non-empty inputs" + else "it was excluded by its output guard")) + in*) + (acc, pos, (AllocatedSectionsMap outputs_by_name)) + else ( + (* let _ = errln ("Before adding output section, we have " ^ (show (count_sections_in_image acc)) + ^ " sections.") + in *) + let (new_pos, new_acc, sec_sz, replacement_output_sec) + = (add_output_section ((* next_free_section_idx, *) pos, acc) found) + in + (*let _ = errln ("At pos 0x" ^ (hex_string_of_natural pos) ^ ", adding output section " ^ name ^ + " composed of " ^ (show (length comp)) ^ " items, new pos is 0x" ^ (hex_string_of_natural new_pos)) + in*) + (* let _ = errln ("Received from add_output_section a by_range with " ^ (show (Set.size new_acc.by_range)) + ^ " metadata records of which " ^ (show (Set.size { + (r, t) + | forall ((r, t) IN new_acc.by_range) + | match t with FileFeature(ElfSection(x)) -> true | _ -> false end + } + )) ^ " are ELF sections; one more time: " ^ (show (Set.size { + (t, r) + | forall ((t, r) IN new_acc.by_tag) + | match t with FileFeature(ElfSection(x)) -> true | _ -> false end + } + )) ^ "; count_sections_in_image says " ^ (show ( + length (Multimap.lookupBy Memory_image_orderings.tagEquiv (FileFeature(ElfSection(0, null_elf64_interpreted_section))) new_acc.by_tag) + )) + ) + in *) + (* let _ = errln ("After adding output section, we have " ^ (show (count_sections_in_image new_acc)) + ^ " sections.") + in *) + (new_acc, new_pos, (AllocatedSectionsMap (Pmap.add name1 (replacement_output_sec, el_idx) (Pmap.remove name1 outputs_by_name)))) + ) + | DiscardInput(selector) -> do_nothing + | InputQuery(retainpol, sortpol, selector) -> do_nothing + )) + in + (* recurse *) + build_image alloc_map new_acc new_pos new_outputs_by_name bindings_by_name more_elements_and_idx control_script_linkable_idx linker_defs_by_name + )) + +(* +let rec consecutive_commons rev_acc l = + match l with + [] -> reverse rev_acc + | IncludeCommonSymbol(pol, fname, def, img) :: rest -> + consecutive_commons ((pol, fname, def, img) :: rev_acc) rest + | _ -> reverse rev_acc +end +*) + +(*val default_place_orphans : input_output_assignment -> list input_spec -> input_output_assignment*) +let default_place_orphans (discards, outputs) inputs:(input_spec)list*(output_section_spec*Nat_big_num.num)list= +( + (* Try to emulate the GNU linker. + * Its docs say: + + "It attempts to place orphan sections after + non-orphan sections of the same attribute, such as code vs data, + loadable vs non-loadable, etc. If there is not enough room to do this + then it places at the end of the file. + + + For ELF targets, the attribute of the section includes section type + as well as section flag." + + * It places the .tm_clone_table orphan + + [ 9] .tm_clone_table PROGBITS 0000000000000000 00000160 + 0000000000000000 0000000000000000 WA 0 0 8 + + as + + .data 0x0000000000602120 0x0 crtend.o + .data 0x0000000000602120 0x0 crtn.o + + .tm_clone_table + 0x0000000000602120 0x0 + .tm_clone_table + 0x0000000000602120 0x0 crtbeginT.o + .tm_clone_table + 0x0000000000602120 0x0 crtend.o + + .data1 + *(.data1) + 0x0000000000602120 _edata = . + + i.e. between .data and .data1. In the script: + + .got.plt : { *(.got.plt) *(.igot.plt) } + .data : + { + *(.data .data.* .gnu.linkonce.d.* ) + SORT(CONSTRUCTORS) + } + .data1 : { *(.data1) } + _edata = .; PROVIDE (edata = .); + . = .; + __bss_start = .; + + i.e. no clear reason for why between .data and .data1. In the code: + + (see elf32em.c line 1787 in binutils 2.25) + + ... the key bit of code is as follows. + + place = NULL; + if ((s->flags & (SEC_ALLOC | SEC_DEBUGGING)) == 0) + place = &hold[orphan_nonalloc]; + else if ((s->flags & SEC_ALLOC) == 0) + ; + else if ((s->flags & SEC_LOAD) != 0 + && ((iself && sh_type == SHT_NOTE) + || (!iself && CONST_STRNEQ (secname, ".note")))) + place = &hold[orphan_interp]; + else if ((s->flags & (SEC_LOAD | SEC_HAS_CONTENTS | SEC_THREAD_LOCAL)) == 0) + place = &hold[orphan_bss]; + else if ((s->flags & SEC_SMALL_DATA) != 0) + place = &hold[orphan_sdata]; + else if ((s->flags & SEC_THREAD_LOCAL) != 0) + place = &hold[orphan_tdata]; + else if ((s->flags & SEC_READONLY) == 0) + place = &hold[orphan_data]; + else if (((iself && (sh_type == SHT_RELA || sh_type == SHT_REL)) + || (!iself && CONST_STRNEQ (secname, ".rel"))) + && (s->flags & SEC_LOAD) != 0) + place = &hold[orphan_rel]; + else if ((s->flags & SEC_CODE) == 0) + place = &hold[orphan_rodata]; + else + place = &hold[orphan_text]; + + + .. we replicate it here. + *)let output_irecs = (List.fold_left (fun acc -> fun outp -> ((match outp with + (OutputSectionSpec(guard, maybe_addr, name1, comp), script_el_idx) -> + let all_irecs = (List.fold_left (fun inner_acc -> fun comp_el -> (match comp_el with + IncludeInputSection(_, irec) -> Pset.add irec inner_acc + | _ -> inner_acc + ))(Pset.from_list compare []) comp) + in + Pset.(union) all_irecs acc + | _ -> acc + )))(Pset.from_list compare []) outputs) + in + let (orphans : input_spec list) = (List.filter (fun inp -> (match inp with + InputSection(irec) -> let v = (not ( Pset.mem irec output_irecs)) + in (*let _ = if v then errln ("Saw an orphan input section: " ^ + irec.secname ^ " in " ^ irec.fname) else () + in*) v + | _ -> false + )) inputs) + in + let place_one_orphan = (fun acc -> fun input -> ( + let irec = ((match input with + InputSection(irec) -> irec + | _ -> failwith "impossible: orphan section is not a section" + )) + in + let (discards, outputs) = acc in + let find_output = (fun maybe_name -> fun maybe_type -> fun flags_must_have -> fun flags_must_not_have -> ( + Missing_pervasives.find_index0 (fun (OutputSectionSpec (guard, maybe_addr, name1, comp), script_el_idx) -> + let flags = (output_section_flags comp) in + (match maybe_name with Some n -> n = name1 | None -> true ) + && ((match maybe_type with Some t -> Nat_big_num.equal (output_section_type comp) t | None -> true ) + && (Pset.for_all (fun x -> flag_is_set x flags) flags_must_have + && Pset.for_all (fun x -> not (flag_is_set x flags)) flags_must_not_have)) + ) outputs + )) + in + let place_after_nonalloc = (find_output None None(Pset.from_list Nat_big_num.compare [])(Pset.from_list Nat_big_num.compare [ shf_alloc ])) in + let place_after_interp = (find_output (Some(".interp")) (Some(sht_progbits))(Pset.from_list Nat_big_num.compare [ shf_alloc ])(Pset.from_list Nat_big_num.compare [])) in + let place_after_bss = (find_output (Some(".bss")) (Some(sht_nobits))(Pset.from_list Nat_big_num.compare [ shf_alloc; shf_write])(Pset.from_list Nat_big_num.compare [])) in + let place_after_rodata = (find_output (Some(".rodata")) (Some(sht_progbits))(Pset.from_list Nat_big_num.compare [ shf_alloc ])(Pset.from_list Nat_big_num.compare [ shf_write ])) in + let place_after_rel = (find_output (Some(".rela.dyn")) (Some(sht_rela))(Pset.from_list Nat_big_num.compare [])(Pset.from_list Nat_big_num.compare [])) in + let place_after_data = (find_output (Some(".data")) (Some(sht_progbits))(Pset.from_list Nat_big_num.compare [ shf_alloc; shf_write ])(Pset.from_list Nat_big_num.compare [])) in + let place_after_text = (find_output (Some(".text")) (Some(sht_progbits))(Pset.from_list Nat_big_num.compare [ shf_alloc; shf_execinstr ])(Pset.from_list Nat_big_num.compare [])) in + let (place_after : Nat_big_num.num option) = ((match input with + InputSection(irec) -> + (* HACK: simulates GNU linker, but this logic ought to go elsewhere *) + if irec.isec.elf64_section_name_as_string = ".note.GNU-stack" then None + else + if not (flag_is_set shf_alloc irec.isec.elf64_section_flags) + && (* not flag_is_set shf_alloc irec.isec.elf64_section_flags *) (* no debugging, for now *) true + then place_after_nonalloc + else (* FIXME: reinstate alloc-debugging case *) + if Nat_big_num.equal irec.isec.elf64_section_type sht_note (* FIXME: replicate iself logic *) + || (irec.isec.elf64_section_name_as_string = ".note") + then place_after_interp + else if Nat_big_num.equal irec.isec.elf64_section_type sht_nobits + then place_after_bss + else (* FIXME: implement thread-local case *) + if not (flag_is_set shf_write irec.isec.elf64_section_flags) + && not (flag_is_set shf_execinstr irec.isec.elf64_section_flags) + then place_after_rodata + else if flag_is_set shf_write irec.isec.elf64_section_flags + && not (flag_is_set shf_execinstr irec.isec.elf64_section_flags) + then place_after_data + else place_after_text + )) + in + let (discards, outputs) = acc in + (match place_after with + Some idx1 -> (* The section exists and has the flags we expected, and is at output idx *) + (discards, mapi (fun i -> fun output -> + (* FIXME: also fix up flags, alignment etc. *) + let (OutputSectionSpec (guard, maybe_addr, name1, comp), script_el_idx) = output in + if Nat_big_num.equal (Nat_big_num.of_int i) idx1 then (OutputSectionSpec(guard, maybe_addr, name1, List.rev_append (List.rev comp) [IncludeInputSection(DefaultKeep, irec)]), script_el_idx) else output + ) outputs + ) + | None -> + (*let _ = errln ("Warning: discarding orphan section `" ^ irec.isec.elf64_section_name_as_string + ^ "' from file `" ^ irec.fname ^ "'") + in*) + ( List.rev_append (List.rev discards) [input], outputs) + ) + )) + in + List.fold_left place_one_orphan (discards, outputs) orphans) + +(*val interpret_linker_control_script : + address_expr_fn_map allocated_sections_map -> + linker_control_script + -> linkable_list + -> natural (* control_script_linkable_idx *) + -> abi any_abi_feature + -> list input_spec + -> (input_spec -> input_spec -> ordering) (* seen ordering *) + -> (input_output_assignment -> list input_spec -> input_output_assignment) (* place orphans *) + -> (Map.map string (list (natural * binding))) (* initial_bindings_by_name *) + -> (elf_memory_image * Map.map string (list (natural * binding)))*) +let interpret_linker_control_script alloc_map script1 linkables control_script_linkable_idx a inputs seen_ordering place_orphans initial_bindings_by_name:(any_abi_feature)annotated_memory_image*((string),((Nat_big_num.num*binding)list))Pmap.map= + (let labelled_script = (label_script script1) + in + (*let _ = List.mapi (fun i -> fun input -> + errln ("Input " ^ (show i) ^ " is " ^ + match input with + InputSection(inp) -> + "input section, name `" ^ inp.secname ^ + "', from file `" ^ inp.fname ^ "' (linkable idx " ^ (show inp.idx) ^ ")" + | Common(idx, symname, img, def) -> + "common symbol `" ^ symname ^ "'" + end + ) + ) inputs + in*) + let (discards_before_orphans, outputs_before_orphans) + = (assign_inputs_to_output_sections ([], [])(Pset.from_list (pairCompare Nat_big_num.compare Nat_big_num.compare) [])(Pset.from_list (tripleCompare Nat_big_num.compare Nat_big_num.compare Nat_big_num.compare) []) inputs None None seen_ordering labelled_script) + in + (* place orphans *) + let (discards, outputs) = (place_orphans (discards_before_orphans, outputs_before_orphans) inputs) + in + (* In assigning inputs to outputs, we may also have defined some symbols. These affect the + * bindings that are formed. So, we rewrite the bindings here. Note that we have to do so here, + * not in the caller, because these extra bindings can affect the reachability calculation + * during GC. *) + let (linker_defs_by_name, (bindings_by_name : ( (string, ( (Nat_big_num.num * binding)list))Pmap.map))) = ( + let (script_defs_by_name : (string, ( (symbol_definition * symbol_def_policy)list)) Pmap.map) + = (List.fold_left (fun acc -> (fun ((OutputSectionSpec (guard, maybe_addr, secname1, comp)), script_el_idx) -> + List.fold_left (fun inner_acc -> fun comp_el -> ( + (match comp_el with + ProvideSymbol(pol, name1, (size2, info, other)) -> + (*let _ = errln ("Linker script defining symbol `" ^ name ^ "'") + in*) + let def = (symbol_def_for_provide_symbol name1 size2 info other control_script_linkable_idx) + in + let v = ((match Pmap.lookup name1 inner_acc with + None -> [(def, pol)] + | Some l -> (def, pol) :: l + )) + in + Pmap.add name1 v inner_acc + | _ -> inner_acc + ) + )) (acc : (string, ( (symbol_definition * symbol_def_policy)list)) Pmap.map) comp + )) (Pmap.empty compare) outputs) + in + (* Now that we've made these definitions, what bindings are affected? + * We also use this opportunity to bind references to linker-generated symbols, + * such as _GLOBAL_OFFSET_TABLE_, since any definitions of these should now be merged + * into our inputs. *) + (* bit of a HACK: reconstruct the linkable img and idx from the input items *) + let idx_to_img = (List.fold_left (fun acc_m -> fun item -> + (match item with + Common(idx1, _, img2, symdef) -> Pmap.add idx1 img2 (Pmap.remove idx1 acc_m) + | InputSection(irec) -> Pmap.add irec.idx irec.img (Pmap.remove irec.idx acc_m) + ) + ) (Pmap.empty Nat_big_num.compare) inputs) + in + let (lowest_idx : Nat_big_num.num) = ((match Pset.min_elt_opt (Pmap.domain idx_to_img) + with Some x -> x + | None -> failwith "internal error: no linkable items" + )) + in + let first_linkable_item = ((match linkables with x :: more -> x | _ -> failwith "internal error: no linkables" )) + in + let (control_script_input_item : input_item) = ( + "(built-in control script)", + ControlScript, + (BuiltinControlScript, [Builtin]) + ) + in + let (control_script_linkable_item : linkable_item) = ( + ControlScriptDefs, control_script_input_item, + { item_fmt = "" + ; item_check_sections = false + ; item_copy_dt_needed = false + ; item_force_output = true + } + ) + in + let updated_bindings_and_new_defs = (Pmap.map (fun b_list_initial -> + Lem_list.map (fun (b_idx, b_initial) -> + let ((iref_idx, iref, iref_item), maybe_idef) = b_initial + in + (*let _ = errln ("Looking for linker script or linker-generated defs of symbol `" ^ iref.ref_symname ^ "'") + in*) + let possible_script_defs = ((match Pmap.lookup iref.ref_symname script_defs_by_name with + Some l -> l + | None -> [] + )) + in + let (possible_linker_generated_def : symbol_definition option) = + (if a.symbol_is_generated_by_linker iref.ref_symname + then (* can we find a definition by this name? *) + ((match Pmap.lookup lowest_idx idx_to_img with + None -> failwith "no lowest idx found" + | Some img2 -> + (match List.filter (fun def -> def.def_symname = iref.ref_symname) (defined_symbols + instance_Basic_classes_Ord_Abis_any_abi_feature_dict instance_Abi_classes_AbiFeatureTagEquiv_Abis_any_abi_feature_dict img2) with + [] -> None + | [def] -> Some(def) + | _ -> failwith ("first linkable has multiple defs of name `" ^ (iref.ref_symname ^ "'")) + ) + )) + else None) + in + (* If the binding has no def, we always use the def we have. + * If the binding has a def, we use our def only if the policy is AlwaysDefine. *) + (*let _ = errs ("Do we override binding " ^ (show b_idx) ^ ", symbol named `" ^ + iref.ref_symname ^ "'? ") + in*) + (* FIXME: check real semantics of defining symbols like '_GLOBAL_OFFSET_TABLE_' in linker script or input objects. + * This is really just a guess. *) + let new_b_and_maybe_new_def = ((match (maybe_idef, possible_script_defs, possible_linker_generated_def) with + | (_, [], None) -> (*let _ = errln "no" in *) + (((iref_idx, iref, iref_item), maybe_idef), None) + | (None, [], Some(def)) -> (*let _ = errln "yes (was undefined)" in*) + (((iref_idx, iref, iref_item), Some(lowest_idx, def, first_linkable_item)), Some(def)) + | (_, [(def, AlwaysDefine)], _) -> (*let _ = errln "yes (linker script provides unconditional def)" in*) + (((iref_idx, iref, iref_item), Some (control_script_linkable_idx, def, control_script_linkable_item)), Some(def)) + | (Some existing_def, ([(def, ProvideIfUsed)]), _) -> (*let _ = errln "no" in*) + (((iref_idx, iref, iref_item), Some existing_def), None) + | (None, [(def, ProvideIfUsed)], _) -> (*let _ = errln "yes (linker script provides if-used def)" in*) + (((iref_idx, iref, iref_item), Some (control_script_linkable_idx, def, control_script_linkable_item)), Some(def)) + | (_, pair1 :: pair2 :: more, _) -> (*let _ = errln "error" in*) + failwith "ambiguous symbol binding in linker control script" + )) + in + (b_idx, new_b_and_maybe_new_def) + ) b_list_initial + ) initial_bindings_by_name) + in + let (new_symbol_defs_map : (string, ( ( symbol_definition option)list)) Pmap.map) + = (Pmap.map (fun b_pair_list -> Lem_list.map (fun (b_idx, (new_b, maybe_new_def)) -> maybe_new_def) b_pair_list) updated_bindings_and_new_defs) + in + let (new_symbol_defs_by_name : (string, ( symbol_definition list)) Pmap.map) = (Pmap.map + (fun v -> Lem_list.mapMaybe id0 v) new_symbol_defs_map) + in + (* { List.mapMaybe id maybe_def_list | forall ((_, maybe_def_list) IN (Map.toSet new_symbol_defs_map)) | true } + in*) + (*let new_symbol_defs = List.concat (Set_extra.toList new_symbol_def_list_set) + in*) + let updated_bindings = (Pmap.map (fun b_pair_list -> Lem_list.map (fun (b_idx, (new_b, maybe_new_def)) -> (b_idx, new_b)) b_pair_list) updated_bindings_and_new_defs) + in + (new_symbol_defs_by_name, updated_bindings) + ) + in + (*let _ = errln ("For __fini_array_end, we have " ^ + (let all_bs = match Map.lookup "__fini_array_end" bindings_by_name with + Just l -> l + | Nothing -> [] + end + in + ((show (length all_bs)) ^ + " bindings, of which " ^ + (show (length (List.filter (fun (bi, ((ref_idx, ref, ref_item), maybe_def)) -> + match maybe_def with + Just _ -> true + | _ -> false + end + ) all_bs))) ^ " have defs"))) + in*) + let outputs_by_name = + (let insert_fun = (fun m -> (fun (OutputSectionSpec(guard, maybe_addr, name1, compos), script_idx) -> Pmap.add name1 ((OutputSectionSpec (guard, maybe_addr, name1, compos)), script_idx) m)) + in + List.fold_left insert_fun (Pmap.empty compare) outputs) + in + (* Print the link map's "discarded input sections" output. *) + (*let _ = Missing_pervasives.outln "\nDiscarded input sections\n" + in*) + let discard_line = (fun i -> ((match i with + InputSection(s) -> + let lpadded_secname = (" " ^ s.secname) + in + lpadded_secname ^ ((space_padding_and_maybe_newline(Nat_big_num.of_int 16) lpadded_secname) ^ ("0x0000000000000000" (* FIXME *) + ^ (" 0x" ^ ((hex_string_of_natural s.isec.elf64_section_size) ^ (" " + ^ (s.fname ^ "\n")))))) + | Common(idx1, fname1, img2, def) -> "" (* don't print discard lines for discarded commons *) + ))) + in + (*let _ = Missing_pervasives.outs (List.foldl (fun str -> (fun input -> (str ^ (discard_line input)))) "" (reverse discards)) + in*) + let outputs_by_name_after_gc = (compute_def_use_and_gc (AllocatedSectionsMap outputs_by_name)) + in + (*let _ = Missing_pervasives.outs "\nMemory Configuration\n\nName Origin Length Attributes\n*default* 0x0000000000000000 0xffffffffffffffff\n" + in + let _ = Missing_pervasives.outln "\nLinker script and memory map\n" + in*) + (* FIXME: print LOAD and START_GROUP trace *) + let (img2, outputs_by_name_with_position) + = (build_image alloc_map empty_elf_memory_image(Nat_big_num.of_int 0) outputs_by_name_after_gc bindings_by_name labelled_script control_script_linkable_idx linker_defs_by_name) + in + (*let _ = errln ("Final image has " ^ (show (Map.size img.elements)) ^ " elements and " + ^ (show (Set.size img.by_tag)) ^ " metadata tags, of which " ^ ( + let (section_tags, section_ranges) = elf_memory_image_section_ranges img + in + let section_tags_bare = List.map (fun tag -> + match tag with + | FileFeature(ElfSection(idx, isec)) -> (idx, isec) + | _ -> failwith "not section tag" + end) section_tags + in + show (length section_tags_bare) + ) ^ " are sections.") + in*) + (* The link map output for the section/address assignment basically mirrors our notion of + * output section composition. In the following: + + 0x0000000000400000 PROVIDE (__executable_start, 0x400000) + 0x0000000000400190 . = (0x400000 + SIZEOF_HEADERS) + +.interp + *(.interp) + +.note.ABI-tag 0x0000000000400190 0x20 + .note.ABI-tag 0x0000000000400190 0x20 crt1.o + +.note.gnu.build-id + 0x00000000004001b0 0x24 + *(.note.gnu.build-id) + .note.gnu.build-id + 0x00000000004001b0 0x24 crt1.o + +.hash + *(.hash) + +.gnu.hash + *(.gnu.hash) + +... we can see that + + - symbol provision, holes and output sections all get lines + + - each output section appears with its name left-aligned, and its address, + if any, appearing afterwards; if so, the section's total size also follows. + + - each input query is printed verbatim, e.g. "*(.note.gnu.build-id)" + + - underneath this, a line is printed for each input section that was included, + with its address and size. This can spill onto a second line in the usual way. + + - holes are shown as "*fill*" + + - provided symbols are shown as in the linker script source. + + PROBLEM: we don't have the script in source form, so we can't print the queries verbatim. + I should really annotate each query with its source form; when the script is parsed from source, + this can be inserted automatically. For the moment, what to do? I could annotate each script + element manually. For the moment, for diffing purposes, filter out lines with asterisks. + + *) + (img2, bindings_by_name)) |
