daler · November 1, 2016 18:55
diff --git a/binary-heatmap-inspect.py b/binary-heatmap-inspect.py
 """
 Run the script to see example output.

 Modify the features in a, b, and c to see how they affect the output

 pybedtools.contrib.plotting.BinaryHeatmap uses the "cluster" method,
 but unclustered and naive merge versions are shown as well.
 """

 import numpy as np
 import pybedtools


 def ascii_bed(x, lim):
    """
    Prints a single-line ASCII representation of a BED file, replacing the
    coordinates with the one-character name of each feature.
    """
    first = False
    y = x.sort()
    a = np.zeros((lim,), dtype=str)
    a[:] = '.'
    for i in y:
        a[i.start:i.stop] = i.name
    return ''.join(list(a))

 # Play around with the coordinates of a, b, and c.
 #
 # The code() function below expects the names to be "a", "b", or "c".
 a = pybedtools.BedTool(
    """
    chr1  6   12 a
    chr1  14  20 a
    chr1  22  30 a
    """, from_string=True)

 b = pybedtools.BedTool(
    """
    chr1  14  32 b
    """, from_string=True)

 c = pybedtools.BedTool(
    """
    chr1  10  13 c
    """, from_string=True)

 unclustered = pybedtools.BedTool().multi_intersect(
    i=[a.fn, b.fn, c.fn])

 clustered = pybedtools.BedTool().multi_intersect(
    i=[a.fn, b.fn, c.fn], cluster=True)



 def code(n):
    """
    Get a unique code for each combination of characters so the combinatorial
    binding can be represented by a single character.

    e.g.,

    0 = no feature
    1 = a
    3 = b
    4 = a+b
    5 = c
    6 = c+a
    8 = b+c
    9 = a+b+c
    """
    i = 0
    if 'a' in n:
        i += 1
    if 'b' in n:
        i += 3
    if 'c' in n:
        i += 5
    return i


 def cluster_rename(x):
    d = {'1': 'a', '2':'b', '3':'c'}
    letters = [d[i] for i in x[4].split(',')]
    x.name = str(code(letters))
    return x


 def fix(x):
    """
    After merging distinct, split out the letters into a code and set that code
    as the feature's name.
    """
    n = x.name.split(',')
    x.name = str(code(n))
    return x

 # An alternative strategy: concatenate all files, merge them together keeping
 # track of the sets of names, and convert to codes (see the code() function).
 f = a.cat(b, c, postmerge=False).sort().merge(c=4, o='distinct')

 print("a:\n{}".format(a))
 print("b:\n{}".format(b))
 print("c:\n{}".format(c))
 print("d (multiintersect; cluster=False):\n{}".format(unclustered))
 print("e (multiintersect; cluster=True):\n{}".format(clustered))
 print("f (cat-and-merge):\n{}".format(f))
 number_line = '01234567890123456789012345678901234567890'

 lim = max(max(i.stop for i in x) for x in [a,b,c])

 print(number_line)
 print('ASCII version of a, b, c:\n' + '\n'.join([ascii_bed(i, lim) for i in [a, b, c]]))

 print("\nclustered:")
 print(ascii_bed(clustered.each(cluster_rename).saveas(), lim))

 print("\nunclustered:")
 print(ascii_bed(unclustered.each(cluster_rename).saveas(), lim))

 print('\nmerged:')
 print(ascii_bed(f.each(fix).saveas(), lim))


 print('key:')
 print('\n'.join('{0:>5}: {1}'.format(i, code(i)) for i in ['a', 'b', 'c', 'ab', 'ac', 'bc', 'abc']))
	"""
	Run the script to see example output.

	Modify the features in a, b, and c to see how they affect the output

	pybedtools.contrib.plotting.BinaryHeatmap uses the "cluster" method,
	but unclustered and naive merge versions are shown as well.
	"""

	import numpy as np
	import pybedtools


	def ascii_bed(x, lim):
	"""
	Prints a single-line ASCII representation of a BED file, replacing the
	coordinates with the one-character name of each feature.
	"""
	first = False
	y = x.sort()
	a = np.zeros((lim,), dtype=str)
	a[:] = '.'
	for i in y:
	a[i.start:i.stop] = i.name
	return ''.join(list(a))

	# Play around with the coordinates of a, b, and c.
	#
	# The code() function below expects the names to be "a", "b", or "c".
	a = pybedtools.BedTool(
	"""
	chr1 6 12 a
	chr1 14 20 a
	chr1 22 30 a
	""", from_string=True)

	b = pybedtools.BedTool(
	"""
	chr1 14 32 b
	""", from_string=True)

	c = pybedtools.BedTool(
	"""
	chr1 10 13 c
	""", from_string=True)

	unclustered = pybedtools.BedTool().multi_intersect(
	i=[a.fn, b.fn, c.fn])

	clustered = pybedtools.BedTool().multi_intersect(
	i=[a.fn, b.fn, c.fn], cluster=True)



	def code(n):
	"""
	Get a unique code for each combination of characters so the combinatorial
	binding can be represented by a single character.

	e.g.,

	0 = no feature
	1 = a
	3 = b
	4 = a+b
	5 = c
	6 = c+a
	8 = b+c
	9 = a+b+c
	"""
	i = 0
	if 'a' in n:
	i += 1
	if 'b' in n:
	i += 3
	if 'c' in n:
	i += 5
	return i


	def cluster_rename(x):
	d = {'1': 'a', '2':'b', '3':'c'}
	letters = [d[i] for i in x[4].split(',')]
	x.name = str(code(letters))
	return x


	def fix(x):
	"""
	After merging distinct, split out the letters into a code and set that code
	as the feature's name.
	"""
	n = x.name.split(',')
	x.name = str(code(n))
	return x

	# An alternative strategy: concatenate all files, merge them together keeping
	# track of the sets of names, and convert to codes (see the code() function).
	f = a.cat(b, c, postmerge=False).sort().merge(c=4, o='distinct')

	print("a:\n{}".format(a))
	print("b:\n{}".format(b))
	print("c:\n{}".format(c))
	print("d (multiintersect; cluster=False):\n{}".format(unclustered))
	print("e (multiintersect; cluster=True):\n{}".format(clustered))
	print("f (cat-and-merge):\n{}".format(f))
	number_line = '01234567890123456789012345678901234567890'

	lim = max(max(i.stop for i in x) for x in [a,b,c])

	print(number_line)
	print('ASCII version of a, b, c:\n' + '\n'.join([ascii_bed(i, lim) for i in [a, b, c]]))

	print("\nclustered:")
	print(ascii_bed(clustered.each(cluster_rename).saveas(), lim))

	print("\nunclustered:")
	print(ascii_bed(unclustered.each(cluster_rename).saveas(), lim))

	print('\nmerged:')
	print(ascii_bed(f.each(fix).saveas(), lim))


	print('key:')
	print('\n'.join('{0:>5}: {1}'.format(i, code(i)) for i in ['a', 'b', 'c', 'ab', 'ac', 'bc', 'abc']))