Created
May 10, 2019 22:53
-
-
Save fanjin-z/0830b61dc7550e9e86acc8056d4cc173 to your computer and use it in GitHub Desktop.
De Novo peptide sequencing algorithm (python)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
MIT License | |
Copyright (c) 2019 Fanjin Zeng | |
This work is licensed under the terms of the MIT license, see <https://opensource.org/licenses/MIT>. | |
''' | |
class Peak(): | |
''' | |
Nodes that store peak info | |
''' | |
def __init__(self, mass, intensity): | |
self.mass = mass | |
self.intensity = intensity | |
self.valid = False | |
self.prev = [] | |
self.sequence = '' | |
self.score = - float('inf') | |
def build_graph(spectrum, mass2amino): | |
''' | |
Edges: all possible amino acid mass differences between peaks | |
Nodes: Mass of peaks | |
''' | |
mass2peak = {} | |
idx2peak = {} | |
for idx, (mass, intensity) in enumerate(sorted(spectrum.items())): | |
peak = Peak(mass, intensity) | |
mass2peak[mass] = peak | |
idx2peak[idx] = peak | |
for a in mass2amino.keys(): | |
try: | |
mass2peak[mass-a] | |
peak.prev.append((mass2amino[a], mass2peak[mass-a])) | |
except: | |
pass | |
return mass2peak, idx2peak | |
def mark_path2end(peak): | |
'''Recursive marks peaks that connect to the measure mass peak''' | |
if peak.valid: | |
return | |
peak.valid = True | |
for ch, p in peak.prev: | |
mark_path2end(p) | |
def mark_all(mass2peak): | |
'''Mark all peak as valid''' | |
for mass, peak in mass2peak.items(): | |
peak.valid = True | |
def find_sequence(measure_mass, mass2peak, offset, use_yion): | |
'''Find valid sequence by de novo algorithm''' | |
y_mass = measure_mass - offset + 1 | |
if use_yion and y_mass in mass2peak: | |
mass2peak[offset].score += mass2peak[y_mass].intensity | |
for mass,peak in sorted(mass2peak.items()): | |
if peak.valid: | |
score = - float('inf') | |
best_prev = None | |
best_ch = None | |
for ch, p in peak.prev: | |
if p.score > score: | |
score = p.score | |
best_prev = p | |
best_ch = ch | |
if best_prev is not None: | |
peak.score = score + peak.intensity | |
peak.sequence = best_prev.sequence + best_ch | |
y_mass = measure_mass - mass + 1 | |
if use_yion and y_mass in mass2peak: | |
peak.score += mass2peak[y_mass].intensity | |
def check_spectrum(spectrum, offset, end_peak): | |
'''Check if H and measure mass supposed peaks in spectrum ''' | |
if offset not in spectrum: | |
spectrum[offset] = 0 | |
if end_peak not in spectrum: | |
spectrum[end_peak] = 0 | |
def fill_spectrum(spectrum, offset, end_peak): | |
'''Fill gaps in spectrum peaks''' | |
for mass in range(offset, end_peak+1): | |
if mass not in spectrum: | |
spectrum[mass] = 0 | |
def init_score(mass2peak, offset, subseq): | |
'''Set initial score''' | |
if subseq in ['both', 'right']: | |
for mass, peak in mass2peak.items(): | |
peak.score = peak.intensity | |
else: | |
mass2peak[offset].score = mass2peak[offset].intensity | |
def de_novo(measure_mass, spectrum, mass2amino, use_yion=False, enable_missing_peak=False, subseq=None): | |
''' | |
Run De Novo algorithm | |
measure_mass: INT, unfragmented y-ion mass. | |
spectrum: DICT(INT: INT), mass: intensity | |
mass2amino: DICT(String: INT), amino acid character: mass | |
use_yion: Boolean, If consider y-ion intensity. | |
enable_missing_peak: Boolean, If consider missing peaks | |
subseq: One of [None, 'both', 'left', 'right'] | |
''' | |
offset = 1 | |
end_peak = measure_mass - 19 + offset | |
check_spectrum(spectrum, offset, end_peak) | |
if enable_missing_peak: fill_spectrum(spectrum, offset, end_peak) | |
mass2peak, idx2peak = build_graph(spectrum, mass2amino) | |
if subseq in ['both', 'left']: | |
mark_all(mass2peak) | |
else: | |
peak = mass2peak[end_peak] | |
mark_path2end(peak) | |
init_score(mass2peak, offset, subseq) | |
find_sequence(measure_mass, mass2peak, offset, use_yion) | |
return mass2peak |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment