Last active
April 22, 2022 02:28
-
-
Save t3rmin4t0r/6991ce21b41b2558c5362455c249204b to your computer and use it in GitHub Desktop.
Convert Hive Tez explains into images for debugging
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re, sys | |
NX = True | |
try: | |
import networkx as nx | |
except: | |
NX = False | |
sys.stderr.write("Could not import nx\npip install networkx, please\n") | |
plan39 = """ | |
Map 1 <- Map 5 (BROADCAST_EDGE), Map 6 (BROADCAST_EDGE), Map 8 (BROADCAST_EDGE), Reducer 7 (BROADCAST_EDGE), Reducer 9 (BROADCAST_EDGE) | |
Map 10 <- Map 12 (BROADCAST_EDGE), Map 13 (BROADCAST_EDGE), Map 15 (BROADCAST_EDGE), Reducer 14 (BROADCAST_EDGE), Reducer 16 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) | |
Reducer 11 <- Map 10 (SIMPLE_EDGE) | |
Reducer 14 <- Map 13 (CUSTOM_SIMPLE_EDGE) | |
Reducer 16 <- Map 15 (CUSTOM_SIMPLE_EDGE) | |
Reducer 2 <- Map 1 (SIMPLE_EDGE), Reducer 11 (BROADCAST_EDGE) | |
Reducer 3 <- Reducer 2 (CUSTOM_SIMPLE_EDGE) | |
Reducer 4 <- Reducer 2 (SIMPLE_EDGE) | |
Reducer 7 <- Map 6 (CUSTOM_SIMPLE_EDGE) | |
Reducer 9 <- Map 8 (CUSTOM_SIMPLE_EDGE) | |
""" | |
plan58 = """ | |
Map 1 <- Map 2 (BROADCAST_EDGE), Reducer 3 (BROADCAST_EDGE) | |
Map 13 <- Map 14 (BROADCAST_EDGE), Reducer 15 (BROADCAST_EDGE) | |
Map 14 <- Map 16 (BROADCAST_EDGE), Reducer 17 (BROADCAST_EDGE) | |
Map 18 <- Map 13 (BROADCAST_EDGE), Map 20 (BROADCAST_EDGE), Reducer 21 (BROADCAST_EDGE) | |
Map 2 <- Map 4 (BROADCAST_EDGE), Reducer 5 (BROADCAST_EDGE) | |
Map 20 <- Reducer 8 (BROADCAST_EDGE) | |
Map 22 <- Map 23 (BROADCAST_EDGE), Reducer 24 (BROADCAST_EDGE) | |
Map 23 <- Map 25 (BROADCAST_EDGE), Reducer 26 (BROADCAST_EDGE) | |
Map 27 <- Map 22 (BROADCAST_EDGE), Map 29 (BROADCAST_EDGE), Reducer 30 (BROADCAST_EDGE) | |
Map 29 <- Reducer 9 (BROADCAST_EDGE) | |
Map 6 <- Map 1 (BROADCAST_EDGE), Map 11 (BROADCAST_EDGE), Reducer 12 (BROADCAST_EDGE) | |
Reducer 10 <- Reducer 7 (SIMPLE_EDGE) | |
Reducer 12 <- Map 11 (CUSTOM_SIMPLE_EDGE) | |
Reducer 15 <- Map 14 (CUSTOM_SIMPLE_EDGE) | |
Reducer 17 <- Map 16 (CUSTOM_SIMPLE_EDGE) | |
Reducer 19 <- Map 18 (SIMPLE_EDGE) | |
Reducer 21 <- Map 20 (CUSTOM_SIMPLE_EDGE) | |
Reducer 24 <- Map 23 (CUSTOM_SIMPLE_EDGE) | |
Reducer 26 <- Map 25 (CUSTOM_SIMPLE_EDGE) | |
Reducer 28 <- Map 27 (SIMPLE_EDGE) | |
Reducer 3 <- Map 2 (CUSTOM_SIMPLE_EDGE) | |
Reducer 30 <- Map 29 (CUSTOM_SIMPLE_EDGE) | |
Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE) | |
Reducer 7 <- Map 6 (SIMPLE_EDGE), Reducer 19 (BROADCAST_EDGE), Reducer 28 (BROADCAST_EDGE) | |
Reducer 8 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) | |
Reducer 9 <- Reducer 7 (CUSTOM_SIMPLE_EDGE) | |
""" | |
plan = plan58 | |
VALID=re.compile("([A-Za-z0-9 ]*) <-(.*)") | |
EDGE = re.compile("([A-Za-z0-9 ]*) \(([A-Z_]*)\)") | |
COLOURS =['', | |
'red', 'green', 'blue', 'cyan', | |
'purple', 'magenta', 'pink', | |
'forestgreen', 'teal' | |
] | |
def mark_cycles(edges): | |
if NX: | |
return _mark_cycles(edges) | |
def _mark_cycles(edges): | |
import networkx as nx | |
g = nx.DiGraph() | |
for k in edges: | |
for e in edges[k]: | |
g.add_edge(e.src, e.target) | |
cycles = sorted(list(nx.simple_cycles(g)), key=lambda a : len(a)) | |
for (i,c) in enumerate(cycles): | |
b = c[1:]+[c[0]] | |
bad = zip(c,b) | |
for (x,y) in bad: | |
# remember edges is target -> sources | |
for z in filter(lambda a : a.src == x, edges[y]): | |
z.cycle = i+1 | |
return len(cycles) | |
class TezEdge(object): | |
def __init__(self, target, (src, kind)): | |
self.target = target | |
self.src = src | |
self.kind = kind | |
self.cycle = 0 | |
def __repr__(self): | |
return "%s -> %s (cycle=%s)" % (self.src, self.target, self.cycle) | |
def parse(l): | |
m = VALID.match(l) | |
if m: | |
target = m.group(1) | |
sources = [TezEdge(target, EDGE.match(x.strip()).groups()) for x in m.group(2).split(",")] | |
return (target,sources) | |
edges = dict(filter(lambda a : a, [parse(l.strip()) for l in plan.split("\n")])) | |
label = {"BROADCAST_EDGE" : "broadcast", "CUSTOM_SIMPLE_EDGE" : "unsorted", "SIMPLE_EDGE" : "sorted", "CUSTOM_EDGE" : "bucketed"} | |
n = mark_cycles(edges) | |
print "digraph {" | |
if n: | |
print 'label = "%s";labelloc="t";' % ("%d Cycles" % n) | |
for k in edges: | |
v=edges[k] | |
for e in v: | |
print '"%s" -> "%s" [label="%s", color="%s"];' % (e.src,e.target,label.get(e.kind), COLOURS[e.cycle]) | |
print "}" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment