Skip to content

Instantly share code, notes, and snippets.

@saravanabalagi
Created December 2, 2024 17:15
Show Gist options
  • Save saravanabalagi/bb36645f90cfb5ff790fe2ac5a2a3b7d to your computer and use it in GitHub Desktop.
Save saravanabalagi/bb36645f90cfb5ff790fe2ac5a2a3b7d to your computer and use it in GitHub Desktop.
Find duplicates in bibtex file based on publication title
import bibtexparser
from collections import defaultdict
# Replace 'yourfile.bib' with the path to your BibTeX file
bibtex_filename = 'root.bib'
# Load the BibTeX file
with open(bibtex_filename, encoding='utf-8') as bibtex_file:
bib_database = bibtexparser.load(bibtex_file)
# Initialize a dictionary to map normalized titles to entry IDs
title_to_ids = defaultdict(list)
# Process each entry in the BibTeX database
for entry in bib_database.entries:
title = entry.get('title', '').strip()
entry_id = entry.get('ID', '').strip()
# Normalize the title
normalized_title = title.replace('{', '').replace('}', '').lower().strip()
# Add the entry ID to the list for this normalized title
if normalized_title:
title_to_ids[normalized_title].append(entry_id)
# Identify and print duplicate titles
duplicates_found = False
for title, ids in title_to_ids.items():
if len(ids) > 1:
duplicates_found = True
print(f'Duplicate title: "{title}"')
print(f'Entry IDs: {", ".join(ids)}\n')
if not duplicates_found:
print("No duplicate titles found.")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment