#!/bin/sh # # git-subtree.sh: split/join git repositories in subdirectories of this one # # Copyright (C) 2009 Avery Pennarun # if test $Argc -eq 0 { set -- -h } setvar OPTS_SPEC = ""\ git subtree add --prefix= git subtree add --prefix= git subtree merge --prefix= git subtree pull --prefix= git subtree push --prefix= git subtree split --prefix= -- h,help show the help q quiet d show debug messages P,prefix= the name of the subdir to split out m,message= use the given message as the commit message for the merge commit options for 'split' annotate= add a prefix to commit message of new commits b,branch= create a new branch from the split subtree ignore-joins ignore prior --rejoin commits onto= try connecting new tree to an existing one rejoin merge the new branch back into HEAD options for 'add', 'merge', and 'pull' squash merge subtree changes as a single commit "" eval $(echo "$OPTS_SPEC" | git rev-parse --parseopt -- "$@" || echo exit $?) setvar PATH = "$PATH:$(git --exec-path)" source git-sh-setup require_work_tree setvar quiet = '' setvar branch = '' setvar debug = '' setvar command = '' setvar onto = '' setvar rejoin = '' setvar ignore_joins = '' setvar annotate = '' setvar squash = '' setvar message = '' setvar prefix = '' proc debug { if test -n $debug { printf "%s\n" "$[join(ARGV)]" >&2 } } proc say { if test -z $quiet { printf "%s\n" "$[join(ARGV)]" >&2 } } proc progress { if test -z $quiet { printf "%s\r" "$[join(ARGV)]" >&2 } } proc assert { if ! @ARGV { die "assertion failed: " @ARGV } } while test $# -gt 0 { setvar opt = "$1" shift case (opt) { -q { setvar quiet = '1' } -d { setvar debug = '1' } --annotate { setvar annotate = "$1" shift } --no-annotate { setvar annotate = '' } -b { setvar branch = "$1" shift } -P { setvar prefix = "${1%/}" shift } -m { setvar message = "$1" shift } --no-prefix { setvar prefix = '' } --onto { setvar onto = "$1" shift } --no-onto { setvar onto = '' } --rejoin { setvar rejoin = '1' } --no-rejoin { setvar rejoin = '' } --ignore-joins { setvar ignore_joins = '1' } --no-ignore-joins { setvar ignore_joins = '' } --squash { setvar squash = '1' } --no-squash { setvar squash = '' } -- { break } * { die "Unexpected option: $opt" } } } setvar command = "$1" shift case (command) { add|merge|pull { setvar default = '' } split|push { setvar default = ""--default HEAD"" } * { die "Unknown command '$command'" } } if test -z $prefix { die "You must provide the --prefix option." } case (command) { add { test -e $prefix && die "prefix '$prefix' already exists." } * { test -e $prefix || die "'$prefix' does not exist; use 'git subtree add'" } } setvar dir = "$(dirname "$prefix/.")" if test $command != "pull" && test $command != "add" && test $command != "push" { setvar revs = $(git rev-parse $default --revs-only "$@") || exit $? setvar dirs = $(git rev-parse --no-revs --no-flags "$@") || exit $? if test -n $dirs { die "Error: Use --prefix instead of bare filenames." } } debug "command: {$command}" debug "quiet: {$quiet}" debug "revs: {$revs}" debug "dir: {$dir}" debug "opts: {$[join(ARGV)]}" debug proc cache_setup { setvar cachedir = ""$GIT_DIR/subtree-cache/"$$" rm -rf $cachedir || die "Can't delete old cachedir: $cachedir" mkdir -p $cachedir || die "Can't create new cachedir: $cachedir" mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree" debug "Using cachedir: $cachedir" >&2 } proc cache_get { for oldrev in "$@" { if test -r "$cachedir/$oldrev" { read newrev <"$cachedir/$oldrev" echo $newrev } } } proc cache_miss { for oldrev in "$@" { if ! test -r "$cachedir/$oldrev" { echo $oldrev } } } proc check_parents { setvar missed = $(cache_miss "$@") for miss in $missed { if ! test -r "$cachedir/notree/$miss" { debug " incorrect order: $miss" } } } proc set_notree { echo "1" > "$cachedir/notree/$1" } proc cache_set { setvar oldrev = "$1" setvar newrev = "$2" if test $oldrev != "latest_old" && test $oldrev != "latest_new" && test -e "$cachedir/$oldrev" { die "cache for $oldrev already exists!" } echo $newrev >"$cachedir/$oldrev" } proc rev_exists { if git rev-parse $1 >/dev/null 2>&1 { return 0 } else { return 1 } } proc rev_is_descendant_of_branch { setvar newrev = "$1" setvar branch = "$2" setvar branch_hash = $(git rev-parse "$branch") setvar match = $(git rev-list -1 "$branch_hash" "^$newrev") if test -z $match { return 0 } else { return 1 } } # if a commit doesn't have a parent, this might not work. But we only want # to remove the parent from the rev-list, and since it doesn't exist, it won't # be there anyway, so do nothing in that case. proc try_remove_previous { if rev_exists "$1^" { echo "^$1^" } } proc find_latest_squash { debug "Looking for latest squash ($dir)..." setvar dir = "$1" setvar sq = '' setvar main = '' setvar sub = '' git log --grep="^git-subtree-dir: $dir/*\$" \ --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD | while read a b junk { debug "$a $b $junk" debug "{{$sq/$main/$sub}}" case (a) { START { setvar sq = "$b" } git-subtree-mainline: { setvar main = "$b" } git-subtree-split: { setvar sub = "$(git rev-parse "$b^0")" || die "could not rev-parse split hash $b from commit $sq" } END { if test -n $sub { if test -n $main { # a rejoin commit? # Pretend its sub was a squash. setvar sq = "$sub" } debug "Squash found: $sq $sub" echo $sq $sub break } setvar sq = '' setvar main = '' setvar sub = '' } } } } proc find_existing_splits { debug "Looking for prior splits..." setvar dir = "$1" setvar revs = "$2" setvar main = '' setvar sub = '' git log --grep="^git-subtree-dir: $dir/*\$" \ --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs | while read a b junk { case (a) { START { setvar sq = "$b" } git-subtree-mainline: { setvar main = "$b" } git-subtree-split: { setvar sub = "$(git rev-parse "$b^0")" || die "could not rev-parse split hash $b from commit $sq" } END { debug " Main is: '$main'" if test -z $main -a -n $sub { # squash commits refer to a subtree debug " Squash: $sq from $sub" cache_set $sq $sub } if test -n $main -a -n $sub { debug " Prior: $main -> $sub" cache_set $main $sub cache_set $sub $sub try_remove_previous $main try_remove_previous $sub } setvar main = '' setvar sub = '' } } } } proc copy_commit { # We're going to set some environment vars here, so # do it in a subshell to get rid of them safely later debug copy_commit "{$1}" "{$2}" "{$3}" git log -1 --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' $1 | shell { read GIT_AUTHOR_NAME read GIT_AUTHOR_EMAIL read GIT_AUTHOR_DATE read GIT_COMMITTER_NAME read GIT_COMMITTER_EMAIL read GIT_COMMITTER_DATE export GIT_AUTHOR_NAME \ GIT_AUTHOR_EMAIL \ GIT_AUTHOR_DATE \ GIT_COMMITTER_NAME \ GIT_COMMITTER_EMAIL \ GIT_COMMITTER_DATE shell { printf "%s" $annotate cat } | git commit-tree $2 $3 # reads the rest of stdin } || die "Can't copy commit $1" } proc add_msg { setvar dir = "$1" setvar latest_old = "$2" setvar latest_new = "$3" if test -n $message { setvar commit_message = "$message" } else { setvar commit_message = ""Add '$dir/' from commit '$latest_new'"" } cat <<< """ $commit_message git-subtree-dir: $dir git-subtree-mainline: $latest_old git-subtree-split: $latest_new """ } proc add_squashed_msg { if test -n $message { echo $message } else { echo "Merge commit '$1' as '$2'" } } proc rejoin_msg { setvar dir = "$1" setvar latest_old = "$2" setvar latest_new = "$3" if test -n $message { setvar commit_message = "$message" } else { setvar commit_message = ""Split '$dir/' into commit '$latest_new'"" } cat <<< """ $commit_message git-subtree-dir: $dir git-subtree-mainline: $latest_old git-subtree-split: $latest_new """ } proc squash_msg { setvar dir = "$1" setvar oldsub = "$2" setvar newsub = "$3" setvar newsub_short = $(git rev-parse --short "$newsub") if test -n $oldsub { setvar oldsub_short = $(git rev-parse --short "$oldsub") echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short" echo git log --pretty=tformat:'%h %s' "$oldsub..$newsub" git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub" } else { echo "Squashed '$dir/' content from commit $newsub_short" } echo echo "git-subtree-dir: $dir" echo "git-subtree-split: $newsub" } proc toptree_for_commit { setvar commit = "$1" git log -1 --pretty=format:'%T' $commit -- || exit $? } proc subtree_for_commit { setvar commit = "$1" setvar dir = "$2" git ls-tree $commit -- $dir | while read mode type tree name { assert test $name = $dir assert test $type = "tree" -o $type = "commit" test $type = "commit" && continue # ignore submodules echo $tree break } } proc tree_changed { setvar tree = "$1" shift if test $Argc -ne 1 { return 0 # weird parents, consider it changed } else { setvar ptree = $(toptree_for_commit $1) if test $ptree != $tree { return 0 # changed } else { return 1 # not changed } } } proc new_squash_commit { setvar old = "$1" setvar oldsub = "$2" setvar newsub = "$3" setvar tree = $(toptree_for_commit $newsub) || exit $? if test -n $old { squash_msg $dir $oldsub $newsub | git commit-tree $tree -p $old || exit $? } else { squash_msg $dir "" $newsub | git commit-tree $tree || exit $? } } proc copy_or_skip { setvar rev = "$1" setvar tree = "$2" setvar newparents = "$3" assert test -n $tree setvar identical = '' setvar nonidentical = '' setvar p = '' setvar gotparents = '' for parent in $newparents { setvar ptree = $(toptree_for_commit $parent) || exit $? test -z $ptree && continue if test $ptree = $tree { # an identical parent could be used in place of this rev. setvar identical = "$parent" } else { setvar nonidentical = "$parent" } # sometimes both old parents map to the same newparent; # eliminate duplicates setvar is_new = '1' for gp in $gotparents { if test $gp = $parent { setvar is_new = '' break } } if test -n $is_new { setvar gotparents = ""$gotparents $parent"" setvar p = ""$p -p $parent"" } } setvar copycommit = '' if test -n $identical && test -n $nonidentical { setvar extras = $(git rev-list --count $identical..$nonidentical) if test $extras -ne 0 { # we need to preserve history along the other branch setvar copycommit = '1' } } if test -n $identical && test -z $copycommit { echo $identical } else { copy_commit $rev $tree $p || exit $? } } proc ensure_clean { if ! git diff-index HEAD --exit-code --quiet 2>&1 { die "Working tree has modifications. Cannot add." } if ! git diff-index --cached HEAD --exit-code --quiet 2>&1 { die "Index has modifications. Cannot add." } } proc ensure_valid_ref_format { git check-ref-format "refs/heads/$1" || die "'$1' does not look like a ref" } proc cmd_add { if test -e $dir { die "'$dir' already exists. Cannot add." } ensure_clean if test $Argc -eq 1 { git rev-parse -q --verify "$1^{commit}" >/dev/null || die "'$1' does not refer to a commit" cmd_add_commit @ARGV } elif test $Argc -eq 2 { # Technically we could accept a refspec here but we're # just going to turn around and add FETCH_HEAD under the # specified directory. Allowing a refspec might be # misleading because we won't do anything with any other # branches fetched via the refspec. ensure_valid_ref_format $2 cmd_add_repository @ARGV } else { say "error: parameters were '$[join(ARGV)]'" die "Provide either a commit or a repository and commit." } } proc cmd_add_repository { echo "git fetch" @ARGV setvar repository = "$1" setvar refspec = "$2" git fetch @ARGV || exit $? setvar revs = 'FETCH_HEAD' set -- $revs cmd_add_commit @ARGV } proc cmd_add_commit { setvar revs = $(git rev-parse $default --revs-only "$@") || exit $? set -- $revs setvar rev = "$1" debug "Adding $dir as '$rev'..." git read-tree --prefix="$dir" $rev || exit $? git checkout -- $dir || exit $? setvar tree = $(git write-tree) || exit $? setvar headrev = $(git rev-parse HEAD) || exit $? if test -n $headrev && test $headrev != $rev { setvar headp = ""-p $headrev"" } else { setvar headp = '' } if test -n $squash { setvar rev = $(new_squash_commit "" "" "$rev") || exit $? setvar commit = $(add_squashed_msg "$rev" "$dir" | git commit-tree "$tree" $headp -p "$rev") || exit $? } else { setvar revp = $(peel_committish "$rev") && setvar commit = $(add_msg "$dir" $headrev "$rev" | git commit-tree "$tree" $headp -p "$revp") || exit $? } git reset $commit || exit $? say "Added dir '$dir'" } proc cmd_split { debug "Splitting $dir..." cache_setup || exit $? if test -n $onto { debug "Reading history for --onto=$onto..." git rev-list $onto | while read rev { # the 'onto' history is already just the subdir, so # any parent we find there can be used verbatim debug " cache: $rev" cache_set $rev $rev } } if test -n $ignore_joins { setvar unrevs = '' } else { setvar unrevs = "$(find_existing_splits "$dir" "$revs")" } # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) setvar grl = ''git rev-list --topo-order --reverse --parents $revs $unrevs'' setvar revmax = $(eval "$grl" | wc -l) setvar revcount = '0' setvar createcount = '0' eval $grl | while read rev parents { setvar revcount = $(($revcount + 1)) progress "$revcount/$revmax ($createcount)" debug "Processing commit: $rev" setvar exists = $(cache_get "$rev") if test -n $exists { debug " prior: $exists" continue } setvar createcount = $(($createcount + 1)) debug " parents: $parents" setvar newparents = $(cache_get $parents) debug " newparents: $newparents" setvar tree = $(subtree_for_commit "$rev" "$dir") debug " tree is: $tree" check_parents $parents # ugly. is there no better way to tell if this is a subtree # vs. a mainline commit? Does it matter? if test -z $tree { set_notree $rev if test -n $newparents { cache_set $rev $rev } continue } setvar newrev = $(copy_or_skip "$rev" "$tree" "$newparents") || exit $? debug " newrev is: $newrev" cache_set $rev $newrev cache_set latest_new $newrev cache_set latest_old $rev } || exit $? setvar latest_new = $(cache_get latest_new) if test -z $latest_new { die "No new revisions were found" } if test -n $rejoin { debug "Merging split branch into HEAD..." setvar latest_old = $(cache_get latest_old) git merge -s ours \ --allow-unrelated-histories \ -m $(rejoin_msg "$dir" "$latest_old" "$latest_new") \ $latest_new >&2 || exit $? } if test -n $branch { if rev_exists "refs/heads/$branch" { if ! rev_is_descendant_of_branch $latest_new $branch { die "Branch '$branch' is not an ancestor of commit '$latest_new'." } setvar action = ''Updated'' } else { setvar action = ''Created'' } git update-ref -m 'subtree split' \ "refs/heads/$branch" $latest_new || exit $? say "$action branch '$branch'" } echo $latest_new exit 0 } proc cmd_merge { setvar revs = $(git rev-parse $default --revs-only "$@") || exit $? ensure_clean set -- $revs if test $Argc -ne 1 { die "You must provide exactly one revision. Got: '$revs'" } setvar rev = "$1" if test -n $squash { setvar first_split = "$(find_latest_squash "$dir")" if test -z $first_split { die "Can't squash-merge: '$dir' was never added." } set $first_split setvar old = "$1" setvar sub = "$2" if test $sub = $rev { say "Subtree is already at commit $rev." exit 0 } setvar new = $(new_squash_commit "$old" "$sub" "$rev") || exit $? debug "New squash commit: $new" setvar rev = "$new" } setvar version = $(git version) if test $version '<' "git version 1.7" { if test -n $message { git merge -s subtree --message="$message" $rev } else { git merge -s subtree $rev } } else { if test -n $message { git merge -Xsubtree="$prefix" \ --message="$message" $rev } else { git merge -Xsubtree="$prefix" $rev } } } proc cmd_pull { if test $Argc -ne 2 { die "You must provide " } ensure_clean ensure_valid_ref_format $2 git fetch @ARGV || exit $? setvar revs = 'FETCH_HEAD' set -- $revs cmd_merge @ARGV } proc cmd_push { if test $Argc -ne 2 { die "You must provide " } ensure_valid_ref_format $2 if test -e $dir { setvar repository = "$1" setvar refspec = "$2" echo "git push using: " $repository $refspec setvar localrev = $(git subtree split --prefix="$prefix") || die git push $repository "$localrev":"refs/heads/$refspec" } else { die "'$dir' must already exist. Try 'git subtree add'." } } "cmd_$command" @ARGV