Last active
September 17, 2021 12:39
-
-
Save mildsunrise/daded4cd4b0de657106dee9153c514b7 to your computer and use it in GitHub Desktop.
Git history flattener https://twitter.com/mild_sunrise/status/1300181598306996224 (needs GitPython)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
from git import Repo, Commit | |
list_commits = lambda commits: '\n'.join(' {} {}'.format(n.hexsha[:8], n.message.splitlines()[0]) for n in commits) | |
repo = Repo.init('path to bare clone', bare=True, mkdir=False) | |
refs = repo.refs | |
# load whole graph | |
def visit_graph(roots, get_parents=lambda n: n.parents): | |
queue = list(roots) | |
graph = set() | |
while queue: | |
n = queue.pop(0) | |
graph.add(n) | |
for parent in get_parents(n): | |
if not (parent in graph or parent in queue): | |
queue.append(parent) | |
return graph | |
refs = { r: r.commit for r in refs } | |
graph = { n: { "parents": list(n.parents) } for n in visit_graph(refs.values()) } | |
# processing step (squash merge commits that point to same tree as one parent) | |
def squash_into(c1, c2): | |
# update pointers to c1, so they point to c2 | |
for desc in graph.values(): | |
for i, parent in enumerate(desc['parents']): | |
if parent == c1: | |
desc['parents'][i] = c2 | |
for k, target in refs.items(): | |
if target == c1: | |
refs[k] = c2 | |
# remove c1 from graph | |
del graph[c1] | |
total = 0 | |
squashed = [] | |
for n in list(graph): | |
if len(n.parents) > 1: | |
total += 1 | |
candidates = [ parent for parent in graph[n]['parents'] if parent.tree == n.tree ] | |
if candidates: | |
squash_into(n, candidates[0]) | |
squashed.append(n) | |
print('{} of {} merge commits were squashed\n{}\n'.format(len(squashed), total, list_commits(squashed))) | |
# check for additional orphaned commits | |
orphans = set(graph) - visit_graph(refs.values(), lambda n: graph[n]['parents']) | |
print('{} additional commits were lost (due to changes being in both branches)\n{}\n'.format(len(orphans), list_commits(orphans))) | |
# rewrite step (emit rewritten commits) | |
def rewrite(n): | |
if 'rewritten' not in graph[n]: | |
parents = [rewrite(c) for c in graph[n]['parents']] | |
n2 = Commit.create_from_tree(repo=n.repo, tree=n.tree, message=n.message.encode(n.encoding), | |
parent_commits=parents, author=n.author, committer=n.committer, | |
author_date=n.authored_datetime, commit_date=n.committed_datetime) | |
graph[n]['rewritten'] = n2 | |
return graph[n]['rewritten'] | |
for n in graph: | |
rewrite(n) | |
# point refs to rewritten commits | |
for r, target in refs.items(): | |
if r.is_detached and not r.is_remote(): | |
r.set_commit(graph[target]['rewritten']) | |
print('done!') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment