Created
December 2, 2024 17:15
-
-
Save saravanabalagi/bb36645f90cfb5ff790fe2ac5a2a3b7d to your computer and use it in GitHub Desktop.
Find duplicates in bibtex file based on publication title
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import bibtexparser | |
from collections import defaultdict | |
# Replace 'yourfile.bib' with the path to your BibTeX file | |
bibtex_filename = 'root.bib' | |
# Load the BibTeX file | |
with open(bibtex_filename, encoding='utf-8') as bibtex_file: | |
bib_database = bibtexparser.load(bibtex_file) | |
# Initialize a dictionary to map normalized titles to entry IDs | |
title_to_ids = defaultdict(list) | |
# Process each entry in the BibTeX database | |
for entry in bib_database.entries: | |
title = entry.get('title', '').strip() | |
entry_id = entry.get('ID', '').strip() | |
# Normalize the title | |
normalized_title = title.replace('{', '').replace('}', '').lower().strip() | |
# Add the entry ID to the list for this normalized title | |
if normalized_title: | |
title_to_ids[normalized_title].append(entry_id) | |
# Identify and print duplicate titles | |
duplicates_found = False | |
for title, ids in title_to_ids.items(): | |
if len(ids) > 1: | |
duplicates_found = True | |
print(f'Duplicate title: "{title}"') | |
print(f'Entry IDs: {", ".join(ids)}\n') | |
if not duplicates_found: | |
print("No duplicate titles found.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment