OVM Build Performance

#!/bin/bash # # Measure the time it takes to build a binary with different compilers on # different machines, and measure the binary size. # # Usage: # ./ovm-build.sh # Directories used: # # oilshell.org/blob/ # ovm-build/ # # ~/git/oilshell/ # oil/ # _deps/ # ovm-build # tarballs and extracted source # _tmp/ # ovm-build/ # raw/ # output CSV # stage1 # benchmark-data/ # ovm-build/ # raw/ # compiler-id/ # host-id/ set -o nounset set -o pipefail set -o errexit source benchmarks/common.sh # for log, etc. source build/common.sh # for $CLANG readonly BASE_DIR=_tmp/ovm-build readonly TAR_DIR=$PWD/_deps/ovm-build # Make it absolute # # Dependencies # readonly OIL_VERSION=$[head -n 1 oil-version.txt] # Leave out mksh for now, because it doesn't follow ./configure make. It just # has Build.sh. readonly -a TAR_SUBDIRS=( bash-4.4 dash-0.5.9.1 ) # mksh ) # NOTE: Same list in oilshell.org/blob/run.sh. proc tarballs { cat << """ bash-4.4.tar.gz dash-0.5.9.1.tar.gz mksh-R56c.tgz """ } proc download { mkdir -p $TAR_DIR tarballs | xargs -n 1 -I {} --verbose -- \ wget --directory $TAR_DIR 'https://www.oilshell.org/blob/ovm-build/{}' } # Done MANUALLY. proc extract-other { time for f in [$TAR_DIR/*gz] { tar -x --directory $TAR_DIR --file $f } } # Done automatically by 'measure' function. # # NOTE: We assume that _release/oil.tar exists. It should be made by # scripts/release.sh build-and-test or benchmark-build. proc extract-oil { # This is different than the others tarballs. rm -r -f -v $TAR_DIR/oil-* tar -x --directory $TAR_DIR --file _release/oil.tar } # # Measure Size of Binaries. # # Other tools: # - bloaty to look inside elf file # - nm? Just a flat list of symbols? Counting them would be nice. # - zipfile.py to look inside bytecode.zip proc sizes-tsv { # host_label matches the times.tsv file output by report.R echo $'host_label\tnum_bytes\tpath' local host=$[hostname] find @Argv -maxdepth 0 -printf "$host\t%s\t%p\n" } # NOTE: This should be the same on all x64 machines. But I want to run it on # x64 machines. proc measure-sizes { local prefix=$(1:-$BASE_DIR/raw/demo) # PROBLEM: Do I need provenance for gcc/clang here? I can just join it later # in R. sizes-tsv $TAR_DIR/oil-$OIL_VERSION/_build/oil/bytecode-opy.zip \ > $(prefix).bytecode-size.tsv sizes-tsv $BASE_DIR/bin/*/oil.* \ > $(prefix).bin-sizes.tsv sizes-tsv $BASE_DIR/bin/*/*sh \ > $(prefix).other-shell-sizes.tsv log "Wrote $(prefix).*.tsv" # Native portion, but it's not separated out by compiler. We can just # subtract. #$TAR_DIR/oil-$OIL_VERSION/_build/oil/ovm* \ } # # Unused Demos # proc bytecode-size { local zip=_build/oil/bytecode.zip # 242 files, 1.85 MB unzip -l $zip | tail -n 1 # 1.88 MB, so there's 30K of header overhead. ls -l $zip } # 6.8 seconds for debug build, instead of 8 seconds. proc clang-oil-dbg { make clean env CC=$CLANG make _build/oil/ovm-dbg } # # Measure Elapsed Time # # Add --target-size? Add that functionality to benchmarks/time.py? # # Should we add explicit targets? # - ovm-clang, ovm-clang-dbg # - ovm-gcc, ovm-gcc-dbg # # It would be possible, but it complicates the makefile. proc build-task { local raw_dir=$1 # output local job_id=$2 local host=$3 local host_hash=$4 local compiler_path=$5 local compiler_hash=$6 local src_dir=$7 local action=$8 local times_out="$PWD/$raw_dir/$host.$job_id.times.tsv" # Definitions that depends on $PWD. local -a TIME_PREFIX=( time-tsv \ --output $times_out \ --field "$host" --field "$host_hash" \ --field "$compiler_path" --field "$compiler_hash" \ --field "$src_dir" --field "$action" ) local bin_base_dir=$PWD/$BASE_DIR/bin local bin_dir="$bin_base_dir/$[basename $compiler_path]" mkdir -p $bin_dir pushd $src_dir >/dev/null # NOTE: We're not saving the output anywhere. We save the status, which # protects against basic errors. match $action { with configure $(TIME_PREFIX[@]) -- ./configure # Cleaning here relies on the ORDER of tasks.txt. configure happens # before build. The Clang build shouldn't reuse GCC objects! # It has to be done after configure, because the Makefile must exist! make clean with make $(TIME_PREFIX[@]) -- make CC=$compiler_path local target match $src_dir { with */bash* setglobal target = 'bash' with */dash* setglobal target = 'src/dash' } strip $target cp -v $target $bin_dir with * local target=$action # Assume it's a target like _bin/oil.ovm $(TIME_PREFIX[@]) -- make CC=$compiler_path $target cp -v $target $bin_dir } popd >/dev/null } proc oil-tasks { local provenance=$1 # NOTE: it MUST be a tarball and not the git repo, because we don't build # bytecode-*.zip! We care about the "packager's experience". local dir="$TAR_DIR/oil-$OIL_VERSION" # Add 1 field for each of 5 fields. cat $provenance | while read line { # NOTE: configure is independent of compiler. echo $line $dir configure echo $line $dir _bin/oil.ovm echo $line $dir _bin/oil.ovm-dbg } } proc other-shell-tasks { local provenance=$1 # NOTE: it MUST be a tarball and not the git repo, because we do the build # of bytecode.zip! We care about the "package experience". local tarball='_release/oil.0.5.alpha1.gz' # Add 1 field for each of 5 fields. cat $provenance | while read line { match $line { # Skip clang for now. with *clang* continue } for dir in [$(TAR_SUBDIRS[@])] { echo $line $TAR_DIR/$dir configure echo $line $TAR_DIR/$dir make } } } # 5 releases: 0.0.0 to 0.4.0. For now, just do the 0.5.alpha1 release, and # show the drop. proc oil-historical-tasks { echo } # action is 'configure', a target name, etc. readonly HEADER=$'status\telapsed_secs\thost_name\thost_hash\tcompiler_path\tcompiler_hash\tsrc_dir\taction' readonly NUM_COLUMNS=7 # 5 from provenence, then tarball/target proc measure { local provenance=$1 # from benchmarks/id.sh compiler-provenance local raw_dir=$(2:-$BASE_DIR/raw) extract-oil # Job ID is everything up to the first dot in the filename. local name=$[basename $provenance] local prefix=$(name%.compiler-provenance.txt) # strip suffix local times_out="$raw_dir/$prefix.times.tsv" # NOTE: Do we need two raw dirs? mkdir -p $BASE_DIR/{raw,stage1,bin} $raw_dir # TODO: the $times_out calculation is duplicated in build-task()0 # Write Header of the CSV file that is appended to. echo $HEADER > $times_out local t1=$BASE_DIR/oil-tasks.txt local t2=$BASE_DIR/other-shell-tasks.txt oil-tasks $provenance > $t1 other-shell-tasks $provenance > $t2 #grep dash $t2 | time cat $t1 $t2 | xargs -n $NUM_COLUMNS -- $0 build-task $raw_dir || die "*** Some tasks failed. ***" measure-sizes $raw_dir/$prefix cp -v $provenance $raw_dir } # # Data Preparation and Analysis # proc stage1 { local raw_dir=$(1:-$BASE_DIR/raw) local out=$BASE_DIR/stage1 mkdir -p $out local x local -a a b # Globs are in lexicographical order, which works for our dates. setglobal x = "$out/times.tsv" setglobal a = ''($raw_dir/flanders.*.times.tsv) setglobal b = ''($raw_dir/lisa.*.times.tsv) tsv-concat $(a[-1]) $(b[-1]) > $x setglobal x = "$out/bytecode-size.tsv" setglobal a = ''($raw_dir/flanders.*.bytecode-size.tsv) setglobal b = ''($raw_dir/lisa.*.bytecode-size.tsv) tsv-concat $(a[-1]) $(b[-1]) > $x setglobal x = "$out/bin-sizes.tsv" setglobal a = ''($raw_dir/flanders.*.bin-sizes.tsv) setglobal b = ''($raw_dir/lisa.*.bin-sizes.tsv) tsv-concat $(a[-1]) $(b[-1]) > $x # Construct a one-column TSV file local raw_data_tsv=$out/raw-data.tsv do { echo 'path' echo $(a[-1]) echo $(b[-1]) } > $raw_data_tsv head $out/* wc -l $out/* } proc print-report { local in_dir=$1 local base_url='../../web' cat << """ OVM Build Performance

oilshell.org

OVM Build Performance

Elapsed Time by Host and Compiler

We measure the build speed of bash and dash for comparison.

""" tsv2html --css-class-pattern 'special ^oil' $in_dir/times.tsv cat << """

Binary Size

The oil binary has two portions:

Architecture-independent bytecode.zip
Architecture- and compiler- dependent native code (_build/oil/ovm*)

""" # Highlight the "default" production build tsv2html --css-class-pattern 'special /gcc/oil.ovm$' $in_dir/sizes.tsv cat << """

Host and Compiler Details

""" tsv2html $in_dir/hosts.tsv tsv2html $in_dir/compilers.tsv cat << """ """ } @Argv