daler · May 25, 2016 05:21
diff --git a/biostars_193145.py b/biostars_193145.py
 # Example for https://www.biostars.org/p/193145

 import pybedtools
 import pysam

 # Use an example BAM file included with pybedtools
 fn = pybedtools.example_filename('x.bam')
 bam = pysam.Samfile(fn, 'r')

 # Filter reads using pysam. Since all aspects of the SAM-format read
 # are accessible via pysam.AlignedRead, you can do complicated things
 # here if needed. For now, just filter on query length.
 #
 # Using pysam directly avoids the overhead of conversion to
 # pybedtools.Interval objects which are not needed in this use case.
 length_limit = 60
 with pysam.Samfile('z.bam', 'wb', template=bam) as fout:
    for read in bam:
        if read.qlen < length_limit:
            fout.write(read)


 # Then do downstream stuff with pybedtools/bedtools -- here, a histogram of
 # read counts in intervals
 z = pybedtools.BedTool('z.bam')
 bed = pybedtools.example_bedtool('Cp190_Kc_Bushey_2009.bed')
 result = bed.coverage(z, counts=True).to_dataframe(names=['chrom', 'start', 'end', 'count'])
 print(result['count'].value_counts())
 # 
 # 0     5200
 # 1       32
 # 2       11
 # 3        4
 # 5        3
 # 6        2
 # 4        2
 # 22       1
 # 14       1
 # 42       1
 # 45       1
 # 11       1
 # 7        1
 # 76       1
 # 40       1
 # 32       1
 # 20       1
 # 12       1
 # 8        1
 # 15       1
 # Name: count, dtype: int64
	# Example for https://www.biostars.org/p/193145

	import pybedtools
	import pysam

	# Use an example BAM file included with pybedtools
	fn = pybedtools.example_filename('x.bam')
	bam = pysam.Samfile(fn, 'r')

	# Filter reads using pysam. Since all aspects of the SAM-format read
	# are accessible via pysam.AlignedRead, you can do complicated things
	# here if needed. For now, just filter on query length.
	#
	# Using pysam directly avoids the overhead of conversion to
	# pybedtools.Interval objects which are not needed in this use case.
	length_limit = 60
	with pysam.Samfile('z.bam', 'wb', template=bam) as fout:
	for read in bam:
	if read.qlen < length_limit:
	fout.write(read)


	# Then do downstream stuff with pybedtools/bedtools -- here, a histogram of
	# read counts in intervals
	z = pybedtools.BedTool('z.bam')
	bed = pybedtools.example_bedtool('Cp190_Kc_Bushey_2009.bed')
	result = bed.coverage(z, counts=True).to_dataframe(names=['chrom', 'start', 'end', 'count'])
	print(result['count'].value_counts())
	#
	# 0 5200
	# 1 32
	# 2 11
	# 3 4
	# 5 3
	# 6 2
	# 4 2
	# 22 1
	# 14 1
	# 42 1
	# 45 1
	# 11 1
	# 7 1
	# 76 1
	# 40 1
	# 32 1
	# 20 1
	# 12 1
	# 8 1
	# 15 1
	# Name: count, dtype: int64