aurthurm · June 27, 2025 16:11 · aurthurm · Jun 27, 2025
diff --git a/recategoriser.py b/recategoriser.py
 import pandas as pd

 breakpoints = pd.read_csv('Breakpoints.txt', sep='\t', low_memory=False)
 antibiotics = pd.read_csv('Antibiotics.txt', sep='\t', low_memory=False)
 organisms = pd.read_csv('Organisms.txt', sep='\t', low_memory=False)

 # limit breakpoints to 2024 CLSI for Humans only
 breakpoints = breakpoints[
    breakpoints['TEST_METHOD'].isin(['MIC', 'DISK']) & 
    (breakpoints['GUIDELINES'] == 'CLSI') & 
    (breakpoints['YEAR'] == 2024) & 
    (breakpoints['HOST'] == 'Human')
 ]

 # map to groups names based on the grouping codes provided
 def get_org_category(row, organisms_df):
    code_type = row['ORGANISM_CODE_TYPE']
    code = row['ORGANISM_CODE']

    # Handle special cases
    if code_type == 'WHONET_ORG_CODE':
        return "organism-direct"
    if code_type == 'ALL':
        return "organisms-all"
    if code_type == 'ANAEROBE':
        return "Anaerobe"

    # Handle special mappings for GENUS_GROUP
    if code_type == 'GENUS_GROUP':
        special_map = {
            'NFR': 'Non-Fermenting Rods',
        }
        return special_map.get(code, 'drop-me')

    # Handle special mappings for SEROVAR_GROUP
    if code_type == 'SEROVAR_GROUP':
        special_map = {
            'HIN': 'Haemophilus',
            'ECO': 'Enterobacteriaceae'
        }
        return special_map.get(code, 'drop-me')

    # Handle special mappings for SPECIES_GROUP
    # Species code definitions not in any file: Thanks to deepseek AI for the research to find mapping names
    if code_type == 'SPECIES_GROUP':
        special_map = {
            'ABX': 'Acinetobacter baumannii complex',
            'MTX': 'Mycobacterium tuberculosis complex',
            'MAX': 'Mycobacterium avium complex',
            'SGM': 'Slowly Growing Mycobacteria',
            'RGM': 'Rapidly Growing Mycobacteria',
            'BCX': 'Burkholderia cepacia complex',
            'SVI': 'Streptococcus (Viridans)',
            'BS-': 'Streptococcus (Beta Hem)',
            'COF': 'Coryneform (Diphtheroids)',
        }
        return special_map.get(code, 'drop-me')

    # Check if code_type is in organism dataframe
    if code_type in organisms_df.columns:
        value_column = code_type.split('_')[0].upper()
        if value_column not in organisms_df.columns:
            return "value-column-not-found"

        # Find matching organism
        subset = organisms_df[organisms_df[code_type] == code]
        if not subset.empty:
            non_null_values = subset[value_column].dropna()
            if not non_null_values.empty:
                return non_null_values.iloc[0]

    return "no-match"
    
 breakpoints['CATEGORY'] = breakpoints.apply(get_org_category, axis=1, args=(organisms,))

 # map to organisms for those with direct mappings to specific organisms
 def get_org_direct(row, organisms_df):
    code_type = row['ORGANISM_CODE_TYPE']
    code = row['ORGANISM_CODE']

    # Handle special cases
    if code_type == 'WHONET_ORG_CODE':
        subset = organisms_df[organisms_df[code_type] == code]
        if not subset.empty:
            return subset.iloc[0]['ORGANISM']   
    return "Grouping"
    
 breakpoints['ORGANISM'] = breakpoints.apply(get_org_direct, axis=1, args=(organisms,))
 breakpoints['CATEGORY'].unique()
   
 # get good breakpoints that mapped well with our criterias
 not_found = ['value-column-not-found','no-match','organisms-all','drop-me']
 good_bp = breakpoints[~breakpoints['CATEGORY'].isin(not_found)]

 # merge remapped breakpoints to antibiotics
 dataset = pd.merge(good_bp, antibiotics, on='WHONET_ABX_CODE', how='inner')  # 'inner' join
 dataset.columns

 # select only required columns and rename
 def columner(df):
    return df[[
    'GUIDELINES_x', 'YEAR', 'TEST_METHOD', 'HOST', 'ORGANISM_CODE', 'ORGANISM_CODE_TYPE', 'BREAKPOINT_TYPE',
    'ANTIBIOTIC', 'POTENCY_x', 'R', 'I','S', 'CATEGORY', 'ORGANISM'
 ]]

 dataset = columner(dataset)
 dataset = dataset.rename(columns={
    'GUIDELINES_x': 'GUIDELINES',
    'POTENCY_x': 'POTENCY',
 })[
    [
    'GUIDELINES', 'YEAR', 'TEST_METHOD', 'HOST', 'ORGANISM_CODE', 'ORGANISM_CODE_TYPE', 'BREAKPOINT_TYPE',
    'ANTIBIOTIC', 'POTENCY', 'R', 'I','S', 'CATEGORY', 'ORGANISM'
    ]
 ]

 # drop duplicated cleanup
 dataset.drop_duplicates(inplace=True)

 # split mic and disk
 mic_breakpoints = dataset[dataset['TEST_METHOD'] == 'MIC']
 disk_breakpoints = dataset[dataset['TEST_METHOD'] == 'DISK']

 # save to csv
 dataset.to_csv("bp_recategorised_all.csv", index=False)
 mic_breakpoints.to_csv("bp_recategorised_mic.csv", index=False)
 disk_breakpoints.to_csv("bp_recategorised_disc.csv", index=False)

 # understand the mappings
 dataset[['CATEGORY', 'ORGANISM', 'ANTIBIOTIC']] \
    .value_counts() \
    .sort_index() \
    .reset_index(name='count') \
    .to_csv('bp_all_counts.csv', index=False)

 # get the ast category mappings that we created as a dict
 mapping_dict = {}
 for _, row in breakpoints.iterrows():
    key = (row['ORGANISM_CODE_TYPE'], row['ORGANISM_CODE'])
    category = row['CATEGORY']
    if category == 'organism-direct': continue
    elif key in mapping_dict: continue
    else: mapping_dict[key] = category

 # For each organism determine its AST category function
 def find_category_for_organism(row, mapper):
    miss = None
    for (col, code), category in mapper.items():
        if col in row and row[col] == code:
            return category
    return None

 organisms['CATEGORY'] = organisms.apply(find_category_for_organism, axis=1, args=(mapping_dict,))

 GRAM_POSITIVE_PHYLA = {
    "Actinobacteria", "Actinomycetota",
    "Firmicutes", "Bacillota",
    "Tenericutes", "Mycoplasmatota",
    "Chloroflexi", "Chloroflexota"
 }

 def mo_gramstain(row):
    if row["KINGDOM"] != "Bacteria":
        return None
    gram = "gram-"
    if (
        row["PHYLUM"] in GRAM_POSITIVE_PHYLA and
        row["CLASS"] != "Negativicutes"
    ):
        gram = "gram+"
        
    if row['ORGANISM_TYPE'] == '-':
        gram = "gram-"
    if row['ORGANISM_TYPE'] == '+':
        gram = "gram+"
    return gram

 organisms["GRAM_STAIN"] = organisms.apply(lambda row: mo_gramstain(row), axis=1)

 def infer_morphology(row):
    genus = str(row.get("GENUS", "")).strip()
    full_name = str(row.get("ORGANISM", "")).strip().lower()

    # Specific species overrides (optional)
    species_overrides = {
        "neisseria meningitidis": "coccus.diplococci",
        "neisseria gonorrhoeae": "coccus.diplococci",
        "streptococcus pneumoniae": "coccus.diplococci",
        "staphylococcus aureus": "coccus.staphylococci",
        "streptococcus pyogenes": "coccus.streptococci",
        "lactobacillus acidophilus": "rod.bacillus",
        "vibrio cholerae": "spiral.vibrio",
        "helicobacter pylori": "spiral.spirillum",
        "treponema pallidum": "spiral.spirillum",
    }

    if full_name in species_overrides:
        return species_overrides[full_name]

    # Genus-level default mapping
    genus_map = {
        # Cocci
        "Staphylococcus": "coccus.staphylococci",
        "Streptococcus": "coccus.streptococci",
        "Neisseria": "coccus.diplococci",
        "Sarcina": "coccus.sarcina",
        "Micrococcus": "coccus.tetrad",
        "Enterococcus": "coccus.streptococci",
        "Planococcus": "coccus",
        # Rods
        "Escherichia": "rod.bacillus",
        "Klebsiella": "rod.bacillus",
        "Salmonella": "rod.bacillus",
        "Shigella": "rod.bacillus",
        "Bacillus": "rod.bacillus",
        "Lactobacillus": "rod.bacillus",
        "Listeria": "rod.bacillus",
        "Corynebacterium": "rod.bacillus",
        "Yersinia": "rod.coccobacilli",
        "Brucella": "rod.coccobacilli",
        "Haemophilus": "rod.coccobacilli",
        "Acinetobacter": "rod.coccobacilli",
        "Pseudomonas": "rod.bacillus",
        "Mycobacterium": "rod.bacillus",
        "Clostridium": "rod.bacillus",
        "Enterobacter": "rod.bacillus",
        # Spirals
        "Vibrio": "spiral.vibrio",
        "Campylobacter": "spiral.spirillum",
        "Helicobacter": "spiral.spirillum",
        "Spirillum": "spiral.spirillum",
        "Treponema": "spiral.spirillum",
        "Borrelia": "spiral.spirillum",
        "Leptospira": "spiral.spirillum",
    }

    return genus_map.get(genus, None)  # Return None if unknown

 organisms["SHAPE"] = organisms.apply(infer_morphology, axis=1)

 def is_glass_priority(row):
    """
    Return 'Yes' or 'No' based on whether the organism is part of the GLASS priority list.
    Uses genus and species if available.
    """
    genus = str(row.get("GENUS", "")).strip()
    full_name = str(row.get("ORGANISM", "")).strip().lower()

    # GLASS-priority organisms (WHO GLASS list, version 2023)
    glass_species = {
        "escherichia coli",
        "klebsiella pneumoniae",
        "acinetobacter baumannii",
        "pseudomonas aeruginosa",
        "salmonella spp",
        "shigella spp",
        "neisseria gonorrhoeae",
        "streptococcus pneumoniae",
        "staphylococcus aureus"
    }

    glass_genera = {
        "Escherichia", "Klebsiella", "Acinetobacter", "Pseudomonas",
        "Salmonella", "Shigella", "Neisseria", "Streptococcus", "Staphylococcus"
    }

    # Exact match preferred
    if full_name in glass_species:
        return "Yes"
    # Genus-based fallback
    elif genus in glass_genera:
        return "Yes"
    return "No"

 organisms["GLASS"] = organisms.apply(is_glass_priority, axis=1)

 def infer_mro(row):
    """
    Return a tuple (is_mro: Yes/No, phenotype: string or None)
    Based on genus and species. Uses known epidemiological patterns.
    """
    genus = str(row.get("GENUS", "")).strip()
    full_name = str(row.get("ORGANISM", "")).strip().lower()

    # Dictionary of known MROs and their phenotypes
    mro_phenotypes = {
        "escherichia coli": "ESBL or CRE",
        "klebsiella pneumoniae": "ESBL or CRE",
        "enterobacter cloacae": "ESBL or CRE",
        "acinetobacter baumannii": "MDR/XDR",
        "pseudomonas aeruginosa": "MDR/XDR",
        "staphylococcus aureus": "MRSA",
        "enterococcus faecalis": "VRE",
        "enterococcus faecium": "VRE",
        "streptococcus pneumoniae": "DRSP",
        "neisseria gonorrhoeae": "FQ resistant",
    }

    # Genus-level fallbacks
    genus_to_phenotype = {
        "Klebsiella": "ESBL or CRE",
        "Enterobacter": "ESBL or CRE",
        "Escherichia": "ESBL or CRE",
        "Acinetobacter": "MDR/XDR",
        "Pseudomonas": "MDR/XDR",
        "Staphylococcus": "MRSA (if mecA or cefoxitin resistant)",
        "Enterococcus": "VRE (if vancomycin resistant)",
        "Streptococcus": "DRSP (if penicillin or macrolide resistant)",
        "Neisseria": "FQ resistant (if ciprofloxacin resistant)"
    }

    # Check full name first
    if full_name in mro_phenotypes:
        return "Yes", mro_phenotypes[full_name]
    elif genus in genus_to_phenotype:
        return "Yes", genus_to_phenotype[genus]
    else:
        return "No", None

 organisms[["MRO", "MRO_PHENOTYPE"]] = organisms.apply(lambda row: pd.Series(infer_mro(row)), axis=1) 
 # save
 organisms.to_csv("org_recategorised.csv", index=False)
	import pandas as pd

	breakpoints = pd.read_csv('Breakpoints.txt', sep='\t', low_memory=False)
	antibiotics = pd.read_csv('Antibiotics.txt', sep='\t', low_memory=False)
	organisms = pd.read_csv('Organisms.txt', sep='\t', low_memory=False)

	# limit breakpoints to 2024 CLSI for Humans only
	breakpoints = breakpoints[
	breakpoints['TEST_METHOD'].isin(['MIC', 'DISK']) &
	(breakpoints['GUIDELINES'] == 'CLSI') &
	(breakpoints['YEAR'] == 2024) &
	(breakpoints['HOST'] == 'Human')
	]

	# map to groups names based on the grouping codes provided
	def get_org_category(row, organisms_df):
	code_type = row['ORGANISM_CODE_TYPE']
	code = row['ORGANISM_CODE']

	# Handle special cases
	if code_type == 'WHONET_ORG_CODE':
	return "organism-direct"
	if code_type == 'ALL':
	return "organisms-all"
	if code_type == 'ANAEROBE':
	return "Anaerobe"

	# Handle special mappings for GENUS_GROUP
	if code_type == 'GENUS_GROUP':
	special_map = {
	'NFR': 'Non-Fermenting Rods',
	}
	return special_map.get(code, 'drop-me')

	# Handle special mappings for SEROVAR_GROUP
	if code_type == 'SEROVAR_GROUP':
	special_map = {
	'HIN': 'Haemophilus',
	'ECO': 'Enterobacteriaceae'
	}
	return special_map.get(code, 'drop-me')

	# Handle special mappings for SPECIES_GROUP
	# Species code definitions not in any file: Thanks to deepseek AI for the research to find mapping names
	if code_type == 'SPECIES_GROUP':
	special_map = {
	'ABX': 'Acinetobacter baumannii complex',
	'MTX': 'Mycobacterium tuberculosis complex',
	'MAX': 'Mycobacterium avium complex',
	'SGM': 'Slowly Growing Mycobacteria',
	'RGM': 'Rapidly Growing Mycobacteria',
	'BCX': 'Burkholderia cepacia complex',
	'SVI': 'Streptococcus (Viridans)',
	'BS-': 'Streptococcus (Beta Hem)',
	'COF': 'Coryneform (Diphtheroids)',
	}
	return special_map.get(code, 'drop-me')

	# Check if code_type is in organism dataframe
	if code_type in organisms_df.columns:
	value_column = code_type.split('_')[0].upper()
	if value_column not in organisms_df.columns:
	return "value-column-not-found"

	# Find matching organism
	subset = organisms_df[organisms_df[code_type] == code]
	if not subset.empty:
	non_null_values = subset[value_column].dropna()
	if not non_null_values.empty:
	return non_null_values.iloc[0]

	return "no-match"

	breakpoints['CATEGORY'] = breakpoints.apply(get_org_category, axis=1, args=(organisms,))

	# map to organisms for those with direct mappings to specific organisms
	def get_org_direct(row, organisms_df):
	code_type = row['ORGANISM_CODE_TYPE']
	code = row['ORGANISM_CODE']

	# Handle special cases
	if code_type == 'WHONET_ORG_CODE':
	subset = organisms_df[organisms_df[code_type] == code]
	if not subset.empty:
	return subset.iloc[0]['ORGANISM']
	return "Grouping"

	breakpoints['ORGANISM'] = breakpoints.apply(get_org_direct, axis=1, args=(organisms,))
	breakpoints['CATEGORY'].unique()

	# get good breakpoints that mapped well with our criterias
	not_found = ['value-column-not-found','no-match','organisms-all','drop-me']
	good_bp = breakpoints[~breakpoints['CATEGORY'].isin(not_found)]

	# merge remapped breakpoints to antibiotics
	dataset = pd.merge(good_bp, antibiotics, on='WHONET_ABX_CODE', how='inner') # 'inner' join
	dataset.columns

	# select only required columns and rename
	def columner(df):
	return df[[
	'GUIDELINES_x', 'YEAR', 'TEST_METHOD', 'HOST', 'ORGANISM_CODE', 'ORGANISM_CODE_TYPE', 'BREAKPOINT_TYPE',
	'ANTIBIOTIC', 'POTENCY_x', 'R', 'I','S', 'CATEGORY', 'ORGANISM'
	]]

	dataset = columner(dataset)
	dataset = dataset.rename(columns={
	'GUIDELINES_x': 'GUIDELINES',
	'POTENCY_x': 'POTENCY',
	})[
	[
	'GUIDELINES', 'YEAR', 'TEST_METHOD', 'HOST', 'ORGANISM_CODE', 'ORGANISM_CODE_TYPE', 'BREAKPOINT_TYPE',
	'ANTIBIOTIC', 'POTENCY', 'R', 'I','S', 'CATEGORY', 'ORGANISM'
	]
	]

	# drop duplicated cleanup
	dataset.drop_duplicates(inplace=True)

	# split mic and disk
	mic_breakpoints = dataset[dataset['TEST_METHOD'] == 'MIC']
	disk_breakpoints = dataset[dataset['TEST_METHOD'] == 'DISK']

	# save to csv
	dataset.to_csv("bp_recategorised_all.csv", index=False)
	mic_breakpoints.to_csv("bp_recategorised_mic.csv", index=False)
	disk_breakpoints.to_csv("bp_recategorised_disc.csv", index=False)

	# understand the mappings
	dataset[['CATEGORY', 'ORGANISM', 'ANTIBIOTIC']] \
	.value_counts() \
	.sort_index() \
	.reset_index(name='count') \
	.to_csv('bp_all_counts.csv', index=False)

	# get the ast category mappings that we created as a dict
	mapping_dict = {}
	for _, row in breakpoints.iterrows():
	key = (row['ORGANISM_CODE_TYPE'], row['ORGANISM_CODE'])
	category = row['CATEGORY']
	if category == 'organism-direct': continue
	elif key in mapping_dict: continue
	else: mapping_dict[key] = category

	# For each organism determine its AST category function
	def find_category_for_organism(row, mapper):
	miss = None
	for (col, code), category in mapper.items():
	if col in row and row[col] == code:
	return category
	return None

	organisms['CATEGORY'] = organisms.apply(find_category_for_organism, axis=1, args=(mapping_dict,))

	GRAM_POSITIVE_PHYLA = {
	"Actinobacteria", "Actinomycetota",
	"Firmicutes", "Bacillota",
	"Tenericutes", "Mycoplasmatota",
	"Chloroflexi", "Chloroflexota"
	}

	def mo_gramstain(row):
	if row["KINGDOM"] != "Bacteria":
	return None
	gram = "gram-"
	if (
	row["PHYLUM"] in GRAM_POSITIVE_PHYLA and
	row["CLASS"] != "Negativicutes"
	):
	gram = "gram+"

	if row['ORGANISM_TYPE'] == '-':
	gram = "gram-"
	if row['ORGANISM_TYPE'] == '+':
	gram = "gram+"
	return gram

	organisms["GRAM_STAIN"] = organisms.apply(lambda row: mo_gramstain(row), axis=1)

	def infer_morphology(row):
	genus = str(row.get("GENUS", "")).strip()
	full_name = str(row.get("ORGANISM", "")).strip().lower()

	# Specific species overrides (optional)
	species_overrides = {
	"neisseria meningitidis": "coccus.diplococci",
	"neisseria gonorrhoeae": "coccus.diplococci",
	"streptococcus pneumoniae": "coccus.diplococci",
	"staphylococcus aureus": "coccus.staphylococci",
	"streptococcus pyogenes": "coccus.streptococci",
	"lactobacillus acidophilus": "rod.bacillus",
	"vibrio cholerae": "spiral.vibrio",
	"helicobacter pylori": "spiral.spirillum",
	"treponema pallidum": "spiral.spirillum",
	}

	if full_name in species_overrides:
	return species_overrides[full_name]

	# Genus-level default mapping
	genus_map = {
	# Cocci
	"Staphylococcus": "coccus.staphylococci",
	"Streptococcus": "coccus.streptococci",
	"Neisseria": "coccus.diplococci",
	"Sarcina": "coccus.sarcina",
	"Micrococcus": "coccus.tetrad",
	"Enterococcus": "coccus.streptococci",
	"Planococcus": "coccus",
	# Rods
	"Escherichia": "rod.bacillus",
	"Klebsiella": "rod.bacillus",
	"Salmonella": "rod.bacillus",
	"Shigella": "rod.bacillus",
	"Bacillus": "rod.bacillus",
	"Lactobacillus": "rod.bacillus",
	"Listeria": "rod.bacillus",
	"Corynebacterium": "rod.bacillus",
	"Yersinia": "rod.coccobacilli",
	"Brucella": "rod.coccobacilli",
	"Haemophilus": "rod.coccobacilli",
	"Acinetobacter": "rod.coccobacilli",
	"Pseudomonas": "rod.bacillus",
	"Mycobacterium": "rod.bacillus",
	"Clostridium": "rod.bacillus",
	"Enterobacter": "rod.bacillus",
	# Spirals
	"Vibrio": "spiral.vibrio",
	"Campylobacter": "spiral.spirillum",
	"Helicobacter": "spiral.spirillum",
	"Spirillum": "spiral.spirillum",
	"Treponema": "spiral.spirillum",
	"Borrelia": "spiral.spirillum",
	"Leptospira": "spiral.spirillum",
	}

	return genus_map.get(genus, None) # Return None if unknown

	organisms["SHAPE"] = organisms.apply(infer_morphology, axis=1)

	def is_glass_priority(row):
	"""
	Return 'Yes' or 'No' based on whether the organism is part of the GLASS priority list.
	Uses genus and species if available.
	"""
	genus = str(row.get("GENUS", "")).strip()
	full_name = str(row.get("ORGANISM", "")).strip().lower()

	# GLASS-priority organisms (WHO GLASS list, version 2023)
	glass_species = {
	"escherichia coli",
	"klebsiella pneumoniae",
	"acinetobacter baumannii",
	"pseudomonas aeruginosa",
	"salmonella spp",
	"shigella spp",
	"neisseria gonorrhoeae",
	"streptococcus pneumoniae",
	"staphylococcus aureus"
	}

	glass_genera = {
	"Escherichia", "Klebsiella", "Acinetobacter", "Pseudomonas",
	"Salmonella", "Shigella", "Neisseria", "Streptococcus", "Staphylococcus"
	}

	# Exact match preferred
	if full_name in glass_species:
	return "Yes"
	# Genus-based fallback
	elif genus in glass_genera:
	return "Yes"
	return "No"

	organisms["GLASS"] = organisms.apply(is_glass_priority, axis=1)

	def infer_mro(row):
	"""
	Return a tuple (is_mro: Yes/No, phenotype: string or None)
	Based on genus and species. Uses known epidemiological patterns.
	"""
	genus = str(row.get("GENUS", "")).strip()
	full_name = str(row.get("ORGANISM", "")).strip().lower()

	# Dictionary of known MROs and their phenotypes
	mro_phenotypes = {
	"escherichia coli": "ESBL or CRE",
	"klebsiella pneumoniae": "ESBL or CRE",
	"enterobacter cloacae": "ESBL or CRE",
	"acinetobacter baumannii": "MDR/XDR",
	"pseudomonas aeruginosa": "MDR/XDR",
	"staphylococcus aureus": "MRSA",
	"enterococcus faecalis": "VRE",
	"enterococcus faecium": "VRE",
	"streptococcus pneumoniae": "DRSP",
	"neisseria gonorrhoeae": "FQ resistant",
	}

	# Genus-level fallbacks
	genus_to_phenotype = {
	"Klebsiella": "ESBL or CRE",
	"Enterobacter": "ESBL or CRE",
	"Escherichia": "ESBL or CRE",
	"Acinetobacter": "MDR/XDR",
	"Pseudomonas": "MDR/XDR",
	"Staphylococcus": "MRSA (if mecA or cefoxitin resistant)",
	"Enterococcus": "VRE (if vancomycin resistant)",
	"Streptococcus": "DRSP (if penicillin or macrolide resistant)",
	"Neisseria": "FQ resistant (if ciprofloxacin resistant)"
	}

	# Check full name first
	if full_name in mro_phenotypes:
	return "Yes", mro_phenotypes[full_name]
	elif genus in genus_to_phenotype:
	return "Yes", genus_to_phenotype[genus]
	else:
	return "No", None

	organisms[["MRO", "MRO_PHENOTYPE"]] = organisms.apply(lambda row: pd.Series(infer_mro(row)), axis=1)
	# save
	organisms.to_csv("org_recategorised.csv", index=False)