saravanabalagi · December 2, 2024 17:15
diff --git a/find_duplicates_in_bib.py b/find_duplicates_in_bib.py
 import bibtexparser
 from collections import defaultdict

 # Replace 'yourfile.bib' with the path to your BibTeX file
 bibtex_filename = 'root.bib'

 # Load the BibTeX file
 with open(bibtex_filename, encoding='utf-8') as bibtex_file:
    bib_database = bibtexparser.load(bibtex_file)

 # Initialize a dictionary to map normalized titles to entry IDs
 title_to_ids = defaultdict(list)

 # Process each entry in the BibTeX database
 for entry in bib_database.entries:
    title = entry.get('title', '').strip()
    entry_id = entry.get('ID', '').strip()

    # Normalize the title
    normalized_title = title.replace('{', '').replace('}', '').lower().strip()

    # Add the entry ID to the list for this normalized title
    if normalized_title:
        title_to_ids[normalized_title].append(entry_id)

 # Identify and print duplicate titles
 duplicates_found = False
 for title, ids in title_to_ids.items():
    if len(ids) > 1:
        duplicates_found = True
        print(f'Duplicate title: "{title}"')
        print(f'Entry IDs: {", ".join(ids)}\n')

 if not duplicates_found:
    print("No duplicate titles found.")
	import bibtexparser
	from collections import defaultdict

	# Replace 'yourfile.bib' with the path to your BibTeX file
	bibtex_filename = 'root.bib'

	# Load the BibTeX file
	with open(bibtex_filename, encoding='utf-8') as bibtex_file:
	bib_database = bibtexparser.load(bibtex_file)

	# Initialize a dictionary to map normalized titles to entry IDs
	title_to_ids = defaultdict(list)

	# Process each entry in the BibTeX database
	for entry in bib_database.entries:
	title = entry.get('title', '').strip()
	entry_id = entry.get('ID', '').strip()

	# Normalize the title
	normalized_title = title.replace('{', '').replace('}', '').lower().strip()

	# Add the entry ID to the list for this normalized title
	if normalized_title:
	title_to_ids[normalized_title].append(entry_id)

	# Identify and print duplicate titles
	duplicates_found = False
	for title, ids in title_to_ids.items():
	if len(ids) > 1:
	duplicates_found = True
	print(f'Duplicate title: "{title}"')
	print(f'Entry IDs: {", ".join(ids)}\n')

	if not duplicates_found:
	print("No duplicate titles found.")