#!/bin/bash # We merge the binary-derived allocsite information from objdumpallocs # with the precise source-level allocsite information from dumpallocs.ml. # And similarly for other metadata -- the idea is generic. # What we're really doing is matching binary-level features (instructions) # against source-level features (file/line/column spans) # and outputting the union of the metadata, # generally propagating source-level metadata (allocation types) # to binary features (instructions). setglobal all_obj_meta_file = $1 proc lexicographic_compare_le { setglobal sorted = $[ echo "$1"$'\n'"$2" | env LANG=C sort] if [[ "$sorted" == "$1"$'\n'"$2" ]] { # echo "strings $1 and $2 compare le" 1>&2 return 0 # true } elif [[ "$sorted" == "$2"$'\n'"$1" ]] { # echo "strings $1 and $2 compare gt" 1>&2 return 1 # false } else { echo "lexicographic_compare_le: internal error" !1 > !2 return 99 } } proc lexicographic_compare_lt { # is $1 lt $2? if [[ "$1" == "$2" ]] { #echo "strings compare eq" 1>&2 return 1 # false } else { lexicographic_compare_le $1 $2 } } proc lexicographic_compare_gt { if ! lexicographic_compare_le $1 $2 { # true return 0 } else { # false return 1 } } proc lexicographic_compare_ge { if ! lexicographic_compare_lt $1 $2 { # true return 0 } else { # false return 1 } } setglobal all_source_meta_file = $2 echo "all_source_meta_file: $all_source_meta_file" !1 > !2 echo "all_obj_meta_file: $all_obj_meta_file" !1 > !2 # second pass -- we read input grouped by source file then line number setglobal keep_old_source_line = '0' setglobal have_matched_this_source_line = '0' while read -r obj func addr sourcefile sourceline sourceline_end typ rest { #echo "read line for obj $obj sourcefile $sourcefile" 1>&2 # We have our source-level metadata on fd 3 while true { # read a line of source-level metadata, unless the last line is still good if [[ $keep_old_source_line -eq 0 ]] { #echo "reading some more" 1>&2 setglobal have_matched_this_source_line = '0' # for allocsite metadata, srcmeta_rest will be two fields: the target function and then the type name/descr read -r srcmeta_sourcefile srcmeta_sourceline srcmeta_sourceline_end srcmeta_rest <&3 || break 2 #echo "Setting have_matched_this_source_line to 0 for sourcefile $srcmeta_sourcefile line $srcmeta_sourceline" 1>&2 #echo "read returned $?, new sourceline is $srcmeta_sourceline" 1>&2 } else { #echo "retained old line" 1>&2 true } # possibilities: # 1. this is the metadata that matches our toplevel line # 2. this precedes the metadata that matches our toplevel line # 3. our toplevel line will never be matched, and we need to advance past it # (There is no "this follows our toplevel line" case, because we sorted # both inputs.) # Detecting 1 is easy; if so, we advance both inputs. # ... NO! We can have >1 instr for a given source-level feature, so just advance obj. # Detecting 2: if it has source line < toplevel, we can safely skip it as it will never be needed. # Else if its source line is in our window, it's a match # Else we have case 3, so we need to advance toplevel. if [[ "$srcmeta_sourcefile" == "$sourcefile" ]] && \ lexicographic_compare_ge $srcmeta_sourceline $sourceline && \ lexicographic_compare_lt $srcmeta_sourceline $sourceline_end { # matched -- output, and advance both inputs #echo "matched, so advancing both" 1>&2 echo "$obj"$'\t'"$func"$'\t'"$addr"$'\t'"$sourcefile"$'\t'"$sourceline"$'\t'"$sourceline_end"$'\t'"$srcmeta_rest"$'\t' #echo "Setting have_matched_this_source_line to 1 for sourcefile $srcmeta_sourcefile line $srcmeta_sourceline" 1>&2 setglobal have_matched_this_source_line = '1' # We might want to keep the old source line, because there might be other # instructions that it matches. But we can definitely consume the objdump line setglobal keep_old_source_line = '1' continue 2 # lexicographic compare... } else { echo "NO MATCH: found $srcmeta_sourcefile != $sourcefile or " \ "NOT lexicographic_compare_ge $srcmeta_sourceline $sourceline or" \ "NOT lexicographic_compare_lt $srcmeta_sourceline $sourceline_end" !1 > !2 if lexicographic_compare_lt $srcmeta_sourcefile $sourcefile || \ shell { [[ "$srcmeta_sourcefile" == "$sourcefile" ]] && \ lexicographic_compare_lt $srcmeta_sourceline $sourceline_end } { # we will not use this source line [again], so skip it # warn only if we have not used this source line if ! [[ $have_matched_this_source_line -eq 1 ]] { #echo "Found have_matched_this_source_line not equal to 1" 1>&2 echo "warning: skipping source meta line, comparing lt next obj entry (which has file $(sourcefile), lines $(sourceline)-$(sourceline_end), address $(obj)<$(func)> @$(addr)): $srcmeta_sourcefile"$'\t'"$srcmeta_sourceline"$'\t'"$srcmeta_rest" !1 > !2 } setglobal keep_old_source_line = '0' # we have not yet consumed the obj line, so don't grab a new obj line continue 1 } else { # This means we didn't match, and the source line is not LT the obj line. # Try advancing the outer loop and re-testing # We might have a match for the next iteration of the outer loop # Each time we advance the outer, we are giving up on matching that line. # (We can say "comparing lt" because the equality case was handled in the first test.) echo "warning: skipping objdump meta line, comparing lt next source entry (which has file $(srcmeta_sourcefile), line $(srcmeta_sourceline)): $obj $func $addr $sourcefile $sourceline $sourceline_end $srcmeta_rest" !1 > !2 setglobal keep_old_source_line = '1' continue 2 } } } } <"$all_obj_meta_file" 3<"$all_source_meta_file"