#!/bin/sh # # git-subtree.sh: split/join git repositories in subdirectories of this one # # Copyright (C) 2009 Avery Pennarun # if test $Argc -eq 0 { set -- -h } setglobal OPTS_SPEC = '"\ git subtree add --prefix= git subtree add --prefix= git subtree merge --prefix= git subtree pull --prefix= git subtree push --prefix= git subtree split --prefix= -- h,help show the help q quiet d show debug messages P,prefix= the name of the subdir to split out m,message= use the given message as the commit message for the merge commit options for 'split' annotate= add a prefix to commit message of new commits b,branch= create a new branch from the split subtree ignore-joins ignore prior --rejoin commits onto= try connecting new tree to an existing one rejoin merge the new branch back into HEAD options for 'add', 'merge', and 'pull' squash merge subtree changes as a single commit '" eval $[echo $OPTS_SPEC | git rev-parse --parseopt -- @Argv || echo exit $Status] setglobal PATH = "$PATH:$[git --exec-path]" source git-sh-setup require_work_tree setglobal quiet = '' setglobal branch = '' setglobal debug = '' setglobal command = '' setglobal onto = '' setglobal rejoin = '' setglobal ignore_joins = '' setglobal annotate = '' setglobal squash = '' setglobal message = '' setglobal prefix = '' proc debug { if test -n $debug { printf "%s\n" "$ifsjoin(Argv)" > !2 } } proc say { if test -z $quiet { printf "%s\n" "$ifsjoin(Argv)" > !2 } } proc progress { if test -z $quiet { printf "%s\r" "$ifsjoin(Argv)" > !2 } } proc assert { if ! @Argv { die "assertion failed: " @Argv } } while test $# -gt 0 { setglobal opt = $1 shift match $opt { with -q setglobal quiet = '1' with -d setglobal debug = '1' with --annotate setglobal annotate = $1 shift with --no-annotate setglobal annotate = '' with -b setglobal branch = $1 shift with -P setglobal prefix = $(1%/) shift with -m setglobal message = $1 shift with --no-prefix setglobal prefix = '' with --onto setglobal onto = $1 shift with --no-onto setglobal onto = '' with --rejoin setglobal rejoin = '1' with --no-rejoin setglobal rejoin = '' with --ignore-joins setglobal ignore_joins = '1' with --no-ignore-joins setglobal ignore_joins = '' with --squash setglobal squash = '1' with --no-squash setglobal squash = '' with -- break with * die "Unexpected option: $opt" } } setglobal command = $1 shift match $command { with add|merge|pull setglobal default = '' with split|push setglobal default = '"--default HEAD'" with * die "Unknown command '$command'" } if test -z $prefix { die "You must provide the --prefix option." } match $command { with add test -e $prefix && die "prefix '$prefix' already exists." with * test -e $prefix || die "'$prefix' does not exist; use 'git subtree add'" } setglobal dir = $[dirname "$prefix/.] if test $command != "pull" && test $command != "add" && test $command != "push" { setglobal revs = $[git rev-parse $default --revs-only @Argv] || exit $? setglobal dirs = $[git rev-parse --no-revs --no-flags @Argv] || exit $? if test -n $dirs { die "Error: Use --prefix instead of bare filenames." } } debug "command: {$command}" debug "quiet: {$quiet}" debug "revs: {$revs}" debug "dir: {$dir}" debug "opts: {$ifsjoin(Argv)}" debug proc cache_setup { setglobal cachedir = ""$GIT_DIR/subtree-cache/$Pid"" rm -rf $cachedir || die "Can't delete old cachedir: $cachedir" mkdir -p $cachedir || die "Can't create new cachedir: $cachedir" mkdir -p "$cachedir/notree" || die "Can't create new cachedir: $cachedir/notree" debug "Using cachedir: $cachedir" > !2 } proc cache_get { for oldrev in [@Argv] { if test -r "$cachedir/$oldrev" { read newrev <"$cachedir/$oldrev" echo $newrev } } } proc cache_miss { for oldrev in [@Argv] { if ! test -r "$cachedir/$oldrev" { echo $oldrev } } } proc check_parents { setglobal missed = $[cache_miss @Argv] for miss in [$missed] { if ! test -r "$cachedir/notree/$miss" { debug " incorrect order: $miss" } } } proc set_notree { echo "1" > "$cachedir/notree/$1" } proc cache_set { setglobal oldrev = $1 setglobal newrev = $2 if test $oldrev != "latest_old" && test $oldrev != "latest_new" && test -e "$cachedir/$oldrev" { die "cache for $oldrev already exists!" } echo $newrev >"$cachedir/$oldrev" } proc rev_exists { if git rev-parse $1 >/dev/null !2 > !1 { return 0 } else { return 1 } } proc rev_is_descendant_of_branch { setglobal newrev = $1 setglobal branch = $2 setglobal branch_hash = $[git rev-parse $branch] setglobal match = $[git rev-list -1 $branch_hash "^$newrev] if test -z $match { return 0 } else { return 1 } } # if a commit doesn't have a parent, this might not work. But we only want # to remove the parent from the rev-list, and since it doesn't exist, it won't # be there anyway, so do nothing in that case. proc try_remove_previous { if rev_exists "$1^" { echo "^$1^" } } proc find_latest_squash { debug "Looking for latest squash ($dir)..." setglobal dir = $1 setglobal sq = '' setglobal main = '' setglobal sub = '' git log --grep="^git-subtree-dir: $dir/*\$" \ --pretty=format:'START %H%n%s%n%n%b%nEND%n' HEAD | while read a b junk { debug "$a $b $junk" debug "{{$sq/$main/$sub}}" match $a { with START setglobal sq = $b with git-subtree-mainline: setglobal main = $b with git-subtree-split: setglobal sub = $[git rev-parse "$b^0] || die "could not rev-parse split hash $b from commit $sq" with END if test -n $sub { if test -n $main { # a rejoin commit? # Pretend its sub was a squash. setglobal sq = $sub } debug "Squash found: $sq $sub" echo $sq $sub break } setglobal sq = '' setglobal main = '' setglobal sub = '' } } } proc find_existing_splits { debug "Looking for prior splits..." setglobal dir = $1 setglobal revs = $2 setglobal main = '' setglobal sub = '' git log --grep="^git-subtree-dir: $dir/*\$" \ --pretty=format:'START %H%n%s%n%n%b%nEND%n' $revs | while read a b junk { match $a { with START setglobal sq = $b with git-subtree-mainline: setglobal main = $b with git-subtree-split: setglobal sub = $[git rev-parse "$b^0] || die "could not rev-parse split hash $b from commit $sq" with END debug " Main is: '$main'" if test -z $main -a -n $sub { # squash commits refer to a subtree debug " Squash: $sq from $sub" cache_set $sq $sub } if test -n $main -a -n $sub { debug " Prior: $main -> $sub" cache_set $main $sub cache_set $sub $sub try_remove_previous $main try_remove_previous $sub } setglobal main = '' setglobal sub = '' } } } proc copy_commit { # We're going to set some environment vars here, so # do it in a subshell to get rid of them safely later debug copy_commit "{$1}" "{$2}" "{$3}" git log -1 --pretty=format:'%an%n%ae%n%aD%n%cn%n%ce%n%cD%n%B' $1 | shell { read GIT_AUTHOR_NAME read GIT_AUTHOR_EMAIL read GIT_AUTHOR_DATE read GIT_COMMITTER_NAME read GIT_COMMITTER_EMAIL read GIT_COMMITTER_DATE export GIT_AUTHOR_NAME \ GIT_AUTHOR_EMAIL \ GIT_AUTHOR_DATE \ GIT_COMMITTER_NAME \ GIT_COMMITTER_EMAIL \ GIT_COMMITTER_DATE shell { printf "%s" $annotate cat } | git commit-tree $2 $3 # reads the rest of stdin } || die "Can't copy commit $1" } proc add_msg { setglobal dir = $1 setglobal latest_old = $2 setglobal latest_new = $3 if test -n $message { setglobal commit_message = $message } else { setglobal commit_message = ""Add '$dir/' from commit '$latest_new'"" } cat << """ $commit_message git-subtree-dir: $dir git-subtree-mainline: $latest_old git-subtree-split: $latest_new """ } proc add_squashed_msg { if test -n $message { echo $message } else { echo "Merge commit '$1' as '$2'" } } proc rejoin_msg { setglobal dir = $1 setglobal latest_old = $2 setglobal latest_new = $3 if test -n $message { setglobal commit_message = $message } else { setglobal commit_message = ""Split '$dir/' into commit '$latest_new'"" } cat << """ $commit_message git-subtree-dir: $dir git-subtree-mainline: $latest_old git-subtree-split: $latest_new """ } proc squash_msg { setglobal dir = $1 setglobal oldsub = $2 setglobal newsub = $3 setglobal newsub_short = $[git rev-parse --short $newsub] if test -n $oldsub { setglobal oldsub_short = $[git rev-parse --short $oldsub] echo "Squashed '$dir/' changes from $oldsub_short..$newsub_short" echo git log --pretty=tformat:'%h %s' "$oldsub..$newsub" git log --pretty=tformat:'REVERT: %h %s' "$newsub..$oldsub" } else { echo "Squashed '$dir/' content from commit $newsub_short" } echo echo "git-subtree-dir: $dir" echo "git-subtree-split: $newsub" } proc toptree_for_commit { setglobal commit = $1 git log -1 --pretty=format:'%T' $commit -- || exit $? } proc subtree_for_commit { setglobal commit = $1 setglobal dir = $2 git ls-tree $commit -- $dir | while read mode type tree name { assert test $name = $dir assert test $type = "tree" -o $type = "commit" test $type = "commit" && continue # ignore submodules echo $tree break } } proc tree_changed { setglobal tree = $1 shift if test $Argc -ne 1 { return 0 # weird parents, consider it changed } else { setglobal ptree = $[toptree_for_commit $1] if test $ptree != $tree { return 0 # changed } else { return 1 # not changed } } } proc new_squash_commit { setglobal old = $1 setglobal oldsub = $2 setglobal newsub = $3 setglobal tree = $[toptree_for_commit $newsub] || exit $? if test -n $old { squash_msg $dir $oldsub $newsub | git commit-tree $tree -p $old || exit $? } else { squash_msg $dir "" $newsub | git commit-tree $tree || exit $? } } proc copy_or_skip { setglobal rev = $1 setglobal tree = $2 setglobal newparents = $3 assert test -n $tree setglobal identical = '' setglobal nonidentical = '' setglobal p = '' setglobal gotparents = '' for parent in [$newparents] { setglobal ptree = $[toptree_for_commit $parent] || exit $? test -z $ptree && continue if test $ptree = $tree { # an identical parent could be used in place of this rev. setglobal identical = $parent } else { setglobal nonidentical = $parent } # sometimes both old parents map to the same newparent; # eliminate duplicates setglobal is_new = '1' for gp in [$gotparents] { if test $gp = $parent { setglobal is_new = '' break } } if test -n $is_new { setglobal gotparents = ""$gotparents $parent"" setglobal p = ""$p -p $parent"" } } setglobal copycommit = '' if test -n $identical && test -n $nonidentical { setglobal extras = $[git rev-list --count $identical..$nonidentical] if test $extras -ne 0 { # we need to preserve history along the other branch setglobal copycommit = '1' } } if test -n $identical && test -z $copycommit { echo $identical } else { copy_commit $rev $tree $p || exit $? } } proc ensure_clean { if ! git diff-index HEAD --exit-code --quiet !2 > !1 { die "Working tree has modifications. Cannot add." } if ! git diff-index --cached HEAD --exit-code --quiet !2 > !1 { die "Index has modifications. Cannot add." } } proc ensure_valid_ref_format { git check-ref-format "refs/heads/$1" || die "'$1' does not look like a ref" } proc cmd_add { if test -e $dir { die "'$dir' already exists. Cannot add." } ensure_clean if test $Argc -eq 1 { git rev-parse -q --verify "$1^{commit}" >/dev/null || die "'$1' does not refer to a commit" cmd_add_commit @Argv } elif test $Argc -eq 2 { # Technically we could accept a refspec here but we're # just going to turn around and add FETCH_HEAD under the # specified directory. Allowing a refspec might be # misleading because we won't do anything with any other # branches fetched via the refspec. ensure_valid_ref_format $2 cmd_add_repository @Argv } else { say "error: parameters were '$ifsjoin(Argv)'" die "Provide either a commit or a repository and commit." } } proc cmd_add_repository { echo "git fetch" @Argv setglobal repository = $1 setglobal refspec = $2 git fetch @Argv || exit $? setglobal revs = 'FETCH_HEAD' set -- $revs cmd_add_commit @Argv } proc cmd_add_commit { setglobal revs = $[git rev-parse $default --revs-only @Argv] || exit $? set -- $revs setglobal rev = $1 debug "Adding $dir as '$rev'..." git read-tree --prefix="$dir" $rev || exit $? git checkout -- $dir || exit $? setglobal tree = $[git write-tree] || exit $? setglobal headrev = $[git rev-parse HEAD] || exit $? if test -n $headrev && test $headrev != $rev { setglobal headp = ""-p $headrev"" } else { setglobal headp = '' } if test -n $squash { setglobal rev = $[new_squash_commit "" "" $rev] || exit $? setglobal commit = $[add_squashed_msg $rev $dir | git commit-tree $tree $headp -p $rev] || exit $? } else { setglobal revp = $[peel_committish $rev] && setglobal commit = $[add_msg $dir $headrev $rev | git commit-tree $tree $headp -p $revp] || exit $? } git reset $commit || exit $? say "Added dir '$dir'" } proc cmd_split { debug "Splitting $dir..." cache_setup || exit $? if test -n $onto { debug "Reading history for --onto=$onto..." git rev-list $onto | while read rev { # the 'onto' history is already just the subdir, so # any parent we find there can be used verbatim debug " cache: $rev" cache_set $rev $rev } } if test -n $ignore_joins { setglobal unrevs = '' } else { setglobal unrevs = $[find_existing_splits $dir $revs] } # We can't restrict rev-list to only $dir here, because some of our # parents have the $dir contents the root, and those won't match. # (and rev-list --follow doesn't seem to solve this) setglobal grl = ''git rev-list --topo-order --reverse --parents $revs $unrevs'' setglobal revmax = $[eval $grl | wc -l] setglobal revcount = '0' setglobal createcount = '0' eval $grl | while read rev parents { setglobal revcount = $shExpr('$revcount + 1') progress "$revcount/$revmax ($createcount)" debug "Processing commit: $rev" setglobal exists = $[cache_get $rev] if test -n $exists { debug " prior: $exists" continue } setglobal createcount = $shExpr('$createcount + 1') debug " parents: $parents" setglobal newparents = $[cache_get $parents] debug " newparents: $newparents" setglobal tree = $[subtree_for_commit $rev $dir] debug " tree is: $tree" check_parents $parents # ugly. is there no better way to tell if this is a subtree # vs. a mainline commit? Does it matter? if test -z $tree { set_notree $rev if test -n $newparents { cache_set $rev $rev } continue } setglobal newrev = $[copy_or_skip $rev $tree $newparents] || exit $? debug " newrev is: $newrev" cache_set $rev $newrev cache_set latest_new $newrev cache_set latest_old $rev } || exit $? setglobal latest_new = $[cache_get latest_new] if test -z $latest_new { die "No new revisions were found" } if test -n $rejoin { debug "Merging split branch into HEAD..." setglobal latest_old = $[cache_get latest_old] git merge -s ours \ --allow-unrelated-histories \ -m $[rejoin_msg $dir $latest_old $latest_new] \ $latest_new > !2 || exit $? } if test -n $branch { if rev_exists "refs/heads/$branch" { if ! rev_is_descendant_of_branch $latest_new $branch { die "Branch '$branch' is not an ancestor of commit '$latest_new'." } setglobal action = ''Updated'' } else { setglobal action = ''Created'' } git update-ref -m 'subtree split' \ "refs/heads/$branch" $latest_new || exit $? say "$action branch '$branch'" } echo $latest_new exit 0 } proc cmd_merge { setglobal revs = $[git rev-parse $default --revs-only @Argv] || exit $? ensure_clean set -- $revs if test $Argc -ne 1 { die "You must provide exactly one revision. Got: '$revs'" } setglobal rev = $1 if test -n $squash { setglobal first_split = $[find_latest_squash $dir] if test -z $first_split { die "Can't squash-merge: '$dir' was never added." } set $first_split setglobal old = $1 setglobal sub = $2 if test $sub = $rev { say "Subtree is already at commit $rev." exit 0 } setglobal new = $[new_squash_commit $old $sub $rev] || exit $? debug "New squash commit: $new" setglobal rev = $new } setglobal version = $[git version] if test $version '<' "git version 1.7" { if test -n $message { git merge -s subtree --message="$message" $rev } else { git merge -s subtree $rev } } else { if test -n $message { git merge -Xsubtree="$prefix" \ --message="$message" $rev } else { git merge -Xsubtree="$prefix" $rev } } } proc cmd_pull { if test $Argc -ne 2 { die "You must provide " } ensure_clean ensure_valid_ref_format $2 git fetch @Argv || exit $? setglobal revs = 'FETCH_HEAD' set -- $revs cmd_merge @Argv } proc cmd_push { if test $Argc -ne 2 { die "You must provide " } ensure_valid_ref_format $2 if test -e $dir { setglobal repository = $1 setglobal refspec = $2 echo "git push using: " $repository $refspec setglobal localrev = $[git subtree split --prefix="$prefix] || die git push $repository "$localrev":"refs/heads/$refspec" } else { die "'$dir' must already exist. Try 'git subtree add'." } } "cmd_$command" @Argv